In [1]:
# Clear any logs from previous runs
!rmdir ".\logs\hparam_tuning" /S /Q  #Para windows

%reload_ext tensorboard

El sistema no puede encontrar el archivo especificado.
El sistema no puede encontrar el archivo especificado.


In [2]:
import numpy as np
import pandas as pd
import time, os

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense
from tensorboard.plugins.hparams import api as hp

from keras.utils import to_categorical
from keras.losses import CategoricalCrossentropy
from keras.models import Model



from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split as split

Using TensorFlow backend.


In [3]:
scaler = StandardScaler()

### Conjunto de datos: SDSS DR17

In [4]:
data = pd.read_csv('./SDSS/star_classification.csv')
cols = ['alpha','delta','u','g','r','i','z','redshift','class']
data = data[cols]
data["class"]=[0 if i == "GALAXY" else 1 if i == "STAR" else 2 for i in data["class"]]
print(data.head())
data = data.to_numpy()

        alpha      delta         u         g         r         i         z  \
0  135.689107  32.494632  23.87882  22.27530  20.39501  19.16573  18.79371   
1  144.826101  31.274185  24.77759  22.83188  22.58444  21.16812  21.61427   
2  142.188790  35.582444  25.26307  22.66389  20.60976  19.34857  18.94827   
3  338.741038  -0.402828  22.13682  23.77656  21.61162  20.50454  19.25010   
4  345.282593  21.183866  19.43718  17.58028  16.49747  15.97711  15.54461   

   redshift  class  
0  0.634794      0  
1  0.779136      0  
2  0.644195      0  
3  0.932346      0  
4  0.116123      0  


In [5]:
def prepare_dataset(data):
    X, Y = np.empty((0)), np.empty((0))
    X = data[:, :8]
    Y = data[:, 8]
    Y = to_categorical(Y, num_classes=3)
    return X, Y

In [6]:
X, Y = prepare_dataset(data)
X_train, X_test, Y_train, Y_test = split(X, Y, test_size = 0.3, random_state = 0)
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

lenx, input_shape = np.shape(X)

### Hiperparámetros del modelo

In [7]:
HP_LAYERS =    hp.HParam('layers', hp.Discrete([8, 16]))
HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([4,8,16,32]))
HP_LEARNING  = hp.HParam('learning_rate', hp.Discrete([4,8,16,32]))
# HP_BATCHSIZE = hp.HParam('batch_size', hp.Discrete([16, 32]))

callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_categorical_accuracy', mode='max',
                                   min_delta=0,
                                   patience=3,
                                   restore_best_weights=True)]
batch_size = 64
epochs = 10

In [8]:
# METRIC_ACCURACY = 'accuracy'
with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
# with tf.summary.FileWriter('logs/hparam_tuning', sess.graph):
#     init = tf.initialize_all_variables()
#     sess.run(init)
    hp.hparams_config(
        hparams=[HP_LAYERS, HP_NUM_UNITS, HP_LEARNING],
        metrics=[hp.Metric('loss', display_name="Accuracy")])

In [9]:
def train_test_model(hparams):    
    
    # Train LSTM model and predict on validation set
    model = keras.Sequential()
    model.add(Input(shape=(int(X_train.shape[1]),)))
    model.add(Dense(hparams[HP_NUM_UNITS], input_shape=(int(X_train.shape[1]),)))
    
    for i in range(hparams[HP_LAYERS]):        
        model.add(Dense(hparams[HP_NUM_UNITS], activation='relu'))
    model.add(Dense(3, activation=tf.nn.softmax))
     
    optimizer = keras.optimizers.Adam(learning_rate=hparams[HP_LEARNING]*10**(-4), beta_1=0.9, beta_2=0.999, epsilon=1e-3)
    model.compile(
            optimizer=optimizer,
            loss=CategoricalCrossentropy(),
            metrics=["categorical_accuracy"])
    
    # Run with 1 epoch to speed things up for demo purposes

    model.fit(X_train, Y_train, epochs=epochs, validation_data=(X_test, Y_test),
              callbacks=callbacks, batch_size=batch_size, shuffle=True, verbose=0)

    _, loss = model.evaluate(X_test, Y_test)
    
    return loss

In [10]:
def run(run_dir, hparams):
    with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial
        loss = train_test_model(hparams)
        tf.summary.scalar("loss", loss, step=1)
        return loss

In [11]:
session_num = 0
datos = []

for deep_layers in HP_LAYERS.domain.values:
    for num_units in HP_NUM_UNITS.domain.values:
        for learning_rate in HP_LEARNING.domain.values:
#             for batch_size in HP_BATCHSIZE.domain.values:
            t = time.time()
            hparams = {

                HP_LAYERS: deep_layers,
                HP_NUM_UNITS: num_units,
                HP_LEARNING: learning_rate,
#                     HP_BATCHSIZE: batch_size
            }
            run_name = "run-%d" % session_num
            print('\n--- Starting trial: %s' % run_name)
            print({h.name: hparams[h] for h in hparams})
            score = run('logs/hparam_tuning/' + run_name, hparams)
            t = time.time()-t
            session_num += 1
            print("Accuracy:", score, "Tiempo transcurrido:", t)
            
            datos.append([deep_layers, num_units, learning_rate, score, t])

print(session_num)


--- Starting trial: run-0
{'layers': 8, 'num_units': 4, 'learning_rate': 4}
Accuracy: 0.9578 Tiempo transcurrido: 89.89737129211426

--- Starting trial: run-1
{'layers': 8, 'num_units': 4, 'learning_rate': 8}
Accuracy: 0.9647 Tiempo transcurrido: 99.82058095932007

--- Starting trial: run-2
{'layers': 8, 'num_units': 4, 'learning_rate': 16}
Accuracy: 0.9683333 Tiempo transcurrido: 90.81737971305847

--- Starting trial: run-3
{'layers': 8, 'num_units': 4, 'learning_rate': 32}
Accuracy: 0.59363335 Tiempo transcurrido: 40.76187705993652

--- Starting trial: run-4
{'layers': 8, 'num_units': 8, 'learning_rate': 4}
Accuracy: 0.9643667 Tiempo transcurrido: 91.63001346588135

--- Starting trial: run-5
{'layers': 8, 'num_units': 8, 'learning_rate': 8}
Accuracy: 0.9651333 Tiempo transcurrido: 68.54588437080383

--- Starting trial: run-6
{'layers': 8, 'num_units': 8, 'learning_rate': 16}
Accuracy: 0.96646667 Tiempo transcurrido: 86.8001480102539

--- Starting trial: run-7
{'layers': 8, 'num_unit

### Guardar datos

In [12]:
filename = "historial_sdss_tunning.txt"
df = pd.DataFrame(datos, columns = ["Deep size", "Num units", "Learning rate", "Accuracy", "Tiempo de ejecución"])

df.sort_values(by=["Accuracy", "Tiempo de ejecución"], ascending=[0,0], ignore_index=True, inplace=True)

df.to_csv(filename, header=True, index=False, sep='\t', mode='w') # a=append, w=overwrite

In [15]:
df

Unnamed: 0,Deep size,Num units,Learning rate,Accuracy,Tiempo de ejecución
0,8,32,16,0.970767,101.786902
1,16,32,8,0.968767,140.648438
2,16,16,32,0.968633,118.574814
3,8,4,16,0.968333,90.81738
4,16,32,16,0.968133,112.127909
5,8,16,4,0.968067,90.934768
6,16,8,32,0.967733,113.535275
7,8,32,4,0.9677,102.355333
8,8,32,8,0.967467,81.492269
9,16,32,4,0.967,138.207336


In [16]:
np.sum(df[["Tiempo de ejecución"]])/60/60

Tiempo de ejecución    0.770105
dtype: float64

In [13]:
# rm -rf /tmp/tb_logs/

In [14]:
%!kill 13652

UsageError: Line magic function `%!kill` not found.


In [None]:
%reload_ext tensorboard

In [None]:
%tensorboard --logdir logs/hparam_tuning