In [1]:
# Clear any logs from previous runs
!rmdir ".\logs\hparam_tuning" /S /Q  #Para windows

%reload_ext tensorboard

El sistema no puede encontrar el archivo especificado.
El sistema no puede encontrar el archivo especificado.


In [2]:
import numpy as np
import pandas as pd
import time, os

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense
from tensorboard.plugins.hparams import api as hp

from keras.utils import to_categorical
from keras.losses import CategoricalCrossentropy
from keras.models import Model

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split as split

Using TensorFlow backend.


In [3]:
scaler = StandardScaler()

### Conjunto de datos: SDSS DR17

In [None]:
datas = pd.read_csv('./SDSS/star_classification.csv')

In [4]:
data = pd.read_csv('./SDSS/star_classification.csv')
cols = ['alpha','delta','u','g','r','i','z','redshift','class']
data = data[cols]
data["class"]=[0 if i == "GALAXY" else 1 if i == "STAR" else 2 for i in data["class"]]
print(data.head())
data = data.to_numpy()

        alpha      delta         u         g         r         i         z  \
0  135.689107  32.494632  23.87882  22.27530  20.39501  19.16573  18.79371   
1  144.826101  31.274185  24.77759  22.83188  22.58444  21.16812  21.61427   
2  142.188790  35.582444  25.26307  22.66389  20.60976  19.34857  18.94827   
3  338.741038  -0.402828  22.13682  23.77656  21.61162  20.50454  19.25010   
4  345.282593  21.183866  19.43718  17.58028  16.49747  15.97711  15.54461   

   redshift  class  
0  0.634794      0  
1  0.779136      0  
2  0.644195      0  
3  0.932346      0  
4  0.116123      0  


In [5]:
def prepare_dataset(data):
    X, Y = np.empty((0)), np.empty((0))
    X = data[:, :8]
    Y = data[:, 8]
    Y = to_categorical(Y, num_classes=3)
    return X, Y

In [6]:
X, Y = prepare_dataset(data)
X_train, X_test, Y_train, Y_test = split(X, Y, test_size = 0.3, random_state = 0)
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

lenx, input_shape = np.shape(X)

### Hiperparámetros del modelo

In [7]:
HP_LAYERS =    hp.HParam('layers', hp.Discrete([5,10,20,30,40]))
HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([5,10,20,30,40]))
HP_LEARNING  = hp.HParam('learning_rate', hp.Discrete([4,8,16,32]))
# HP_BATCHSIZE = hp.HParam('batch_size', hp.Discrete([16, 32]))

callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_categorical_accuracy', mode='max',
                                   min_delta=0,
                                   patience=6,
                                   restore_best_weights=True)]
batch_size = 128
epochs = 15

In [8]:
# METRIC_ACCURACY = 'accuracy'
with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
# with tf.summary.FileWriter('logs/hparam_tuning', sess.graph):
#     init = tf.initialize_all_variables()
#     sess.run(init)
    hp.hparams_config(
        hparams=[HP_LAYERS, HP_NUM_UNITS, HP_LEARNING],
        metrics=[hp.Metric('loss', display_name="Accuracy")])

In [9]:
def train_test_model(hparams):    
    
    # Train LSTM model and predict on validation set
    model = keras.Sequential()
    model.add(Input(shape=(int(X_train.shape[1]),)))
    model.add(Dense(hparams[HP_NUM_UNITS], input_shape=(int(X_train.shape[1]),)))
    
    for i in range(hparams[HP_LAYERS]):        
        model.add(Dense(hparams[HP_NUM_UNITS], activation='relu'))
    model.add(Dense(3, activation=tf.nn.softmax))
     
    optimizer = keras.optimizers.Adam(learning_rate=hparams[HP_LEARNING]*10**(-4), beta_1=0.9, beta_2=0.999, epsilon=1e-3)
    model.compile(
            optimizer=optimizer,
            loss=CategoricalCrossentropy(),
            metrics=["categorical_accuracy"])
    
    # Run with 1 epoch to speed things up for demo purposes

    model.fit(X_train, Y_train, epochs=epochs, validation_data=(X_test, Y_test),
              callbacks=callbacks, batch_size=batch_size, shuffle=True, verbose=0)

    _, loss = model.evaluate(X_test, Y_test)
    
    return loss

In [10]:
def run(run_dir, hparams):
    with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial
        loss = train_test_model(hparams)
        tf.summary.scalar("loss", loss, step=1)
        return loss

In [11]:
session_num = 0
datos = []

for deep_layers in HP_LAYERS.domain.values:
    for num_units in HP_NUM_UNITS.domain.values:
        for learning_rate in HP_LEARNING.domain.values:
#             for batch_size in HP_BATCHSIZE.domain.values:
            t = time.time()
            hparams = {

                HP_LAYERS: deep_layers,
                HP_NUM_UNITS: num_units,
                HP_LEARNING: learning_rate,
#                     HP_BATCHSIZE: batch_size
            }
            run_name = "run-%d" % session_num
            print('\n--- Starting trial: %s' % run_name)
            print({h.name: hparams[h] for h in hparams})
            score = run('logs/hparam_tuning/' + run_name, hparams)
            t = time.time()-t
            session_num += 1
            print("Accuracy:", score, "Tiempo transcurrido:", t)
            
            datos.append([deep_layers, num_units, learning_rate, score, t])

print(session_num)


--- Starting trial: run-0
{'layers': 5, 'num_units': 5, 'learning_rate': 4}
Accuracy: 0.9622333 Tiempo transcurrido: 63.71489977836609

--- Starting trial: run-1
{'layers': 5, 'num_units': 5, 'learning_rate': 8}
Accuracy: 0.9644333 Tiempo transcurrido: 76.927969455719

--- Starting trial: run-2
{'layers': 5, 'num_units': 5, 'learning_rate': 16}
Accuracy: 0.9694 Tiempo transcurrido: 81.68807935714722

--- Starting trial: run-3
{'layers': 5, 'num_units': 5, 'learning_rate': 32}
Accuracy: 0.9687333 Tiempo transcurrido: 71.85704350471497

--- Starting trial: run-4
{'layers': 5, 'num_units': 10, 'learning_rate': 4}
Accuracy: 0.9671 Tiempo transcurrido: 83.73688459396362

--- Starting trial: run-5
{'layers': 5, 'num_units': 10, 'learning_rate': 8}
Accuracy: 0.9673667 Tiempo transcurrido: 85.2121639251709

--- Starting trial: run-6
{'layers': 5, 'num_units': 10, 'learning_rate': 16}
Accuracy: 0.9663 Tiempo transcurrido: 66.70876121520996

--- Starting trial: run-7
{'layers': 5, 'num_units': 

Accuracy: 0.9684333 Tiempo transcurrido: 112.58196067810059

--- Starting trial: run-33
{'layers': 10, 'num_units': 30, 'learning_rate': 8}
Accuracy: 0.96893334 Tiempo transcurrido: 104.847975730896

--- Starting trial: run-34
{'layers': 10, 'num_units': 30, 'learning_rate': 16}
Accuracy: 0.96893334 Tiempo transcurrido: 117.40698719024658

--- Starting trial: run-35
{'layers': 10, 'num_units': 30, 'learning_rate': 32}
Accuracy: 0.96933335 Tiempo transcurrido: 100.11485528945923

--- Starting trial: run-36
{'layers': 10, 'num_units': 40, 'learning_rate': 4}
Accuracy: 0.9696 Tiempo transcurrido: 121.40403604507446

--- Starting trial: run-37
{'layers': 10, 'num_units': 40, 'learning_rate': 8}
Accuracy: 0.9694333 Tiempo transcurrido: 112.28766131401062

--- Starting trial: run-38
{'layers': 10, 'num_units': 40, 'learning_rate': 16}
Accuracy: 0.9694 Tiempo transcurrido: 80.31105041503906

--- Starting trial: run-39
{'layers': 10, 'num_units': 40, 'learning_rate': 32}
Accuracy: 0.9678 Tiemp

Accuracy: 0.59363335 Tiempo transcurrido: 72.14480948448181

--- Starting trial: run-66
{'layers': 30, 'num_units': 10, 'learning_rate': 16}
Accuracy: 0.9598333 Tiempo transcurrido: 147.36121129989624

--- Starting trial: run-67
{'layers': 30, 'num_units': 10, 'learning_rate': 32}
Accuracy: 0.59363335 Tiempo transcurrido: 75.17692136764526

--- Starting trial: run-68
{'layers': 30, 'num_units': 20, 'learning_rate': 4}
Accuracy: 0.9666333 Tiempo transcurrido: 171.72831296920776

--- Starting trial: run-69
{'layers': 30, 'num_units': 20, 'learning_rate': 8}
Accuracy: 0.7521667 Tiempo transcurrido: 178.69517755508423

--- Starting trial: run-70
{'layers': 30, 'num_units': 20, 'learning_rate': 16}
Accuracy: 0.96896666 Tiempo transcurrido: 163.03982543945312

--- Starting trial: run-71
{'layers': 30, 'num_units': 20, 'learning_rate': 32}
Accuracy: 0.965 Tiempo transcurrido: 170.45292925834656

--- Starting trial: run-72
{'layers': 30, 'num_units': 30, 'learning_rate': 4}
Accuracy: 0.9636 Ti

Accuracy: 0.59363335 Tiempo transcurrido: 125.78302764892578

--- Starting trial: run-99
{'layers': 40, 'num_units': 40, 'learning_rate': 32}
Accuracy: 0.59363335 Tiempo transcurrido: 126.45973467826843
100


### Guardar datos

In [12]:
filename = "historial_sdss_tunning.txt"
df = pd.DataFrame(datos, columns = ["Deep size", "Num units", "Learning rate", "Accuracy", "Tiempo de ejecución"])

df.sort_values(by=["Accuracy", "Tiempo de ejecución"], ascending=[0,0], ignore_index=True, inplace=True)

df.to_csv(filename, header=True, index=False, sep='\t', mode='w') # a=append, w=overwrite

In [13]:
df

Unnamed: 0,Deep size,Num units,Learning rate,Accuracy,Tiempo de ejecución
0,10,20,16,0.971733,95.412978
1,5,30,32,0.970600,86.703002
2,5,40,8,0.970467,93.795267
3,10,5,16,0.969633,71.592510
4,20,40,8,0.969600,152.674356
...,...,...,...,...,...
95,30,5,8,0.593633,66.433003
96,30,5,16,0.593633,64.768998
97,30,5,32,0.593633,63.366240
98,20,5,16,0.593633,56.559066


In [14]:
np.sum(df[["Tiempo de ejecución"]])/60/60

Tiempo de ejecución    3.134891
dtype: float64

In [15]:
# rm -rf /tmp/tb_logs/

In [16]:
%!kill 13652

UsageError: Line magic function `%!kill` not found.


In [None]:
%reload_ext tensorboard

In [None]:
%tensorboard --logdir logs/hparam_tuning