# Hyperameter tuning (of pretrained model using simulated data) with Keras tuner
### target is validation loss, best model is saved, tunable parammeters are number of hidden layers, number of units in each layer, dropout, batch size, 

In [4]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import math
from LSTMutils import MeanVarianceLogLikelyhoodLoss
from LSTMutils import LSTMnetwork
import keras_tuner as kt

np.random.seed(42)
tf.random.set_seed(42)

SequenceLength = 250
validation_split = 0.1
NumEpochs = 1000

df = pd.read_csv(r"../TrainingData/SimulatedTrainingSet10000.csv",sep=',',header=0)

labels = df.iloc[:,0]
df_data = df.iloc[:,1:]

df_train, df_val = train_test_split(df_data, test_size=validation_split, train_size=1-validation_split, random_state=42, shuffle=True, stratify=labels)

# normalise time series data
min_value, max_value = df_train.min().min(), df_train.max().max()
df_norm_train = (df_train - min_value)/(max_value - min_value)
df_norm_test = (df_test - min_value)/(max_value - min_value)
df_norm_val = (df_val - min_value)/(max_value - min_value)
    
X_train = df_norm_train.iloc[:,:SequenceLength].values
y_train = df_norm_train.iloc[:,SequenceLength-1].values
X_train = np.expand_dims(X_train, 2)
y_train = np.broadcast_to(y_train[:,None], (y_train.shape[0],SequenceLength))
y_train = np.expand_dims(y_train, 2)

X_val = df_norm_val.iloc[:,:SequenceLength].values
y_val = df_norm_val.iloc[:,SequenceLength-1].values
X_val = np.expand_dims(X_val, 2)
y_val = np.broadcast_to(y_val[:,None], (y_val.shape[0],SequenceLength))
y_val = np.expand_dims(y_val, 2)


#Implementation informed by https://github.com/keras-team/keras-tuner/issues/122

class LSTMHyperModel(kt.HyperModel):
    
    def build(self,hp):
        model = keras.models.Sequential(keras.layers.LSTM(hp.Choice("layer_1_units", [10,20,50,100,200,500]), input_shape=(SequenceLength,1), return_sequences=True))#, kernel_regularizer=keras.regularizers.L2(hp.Choice("L2Regularizer1", [0., 1e-4]))))
#         if hp.Boolean("layer_1_dropout"):
#             model.add(keras.layers.Dropout(rate=0.25))
        for i in range(hp.Int("LSTM Layers", min_value = 1, max_value = 5)):
             model.add(keras.layers.LSTM(hp.Choice(f"layer_{i+2}_units", [5,10,20,50,100,200,500]), return_sequences=True))#, kernel_regularizer=keras.regularizers.L2(hp.Choice(f"L2Regularizer{i+2}", [0., 1e-4]))))
# #             if hp.Boolean(f"layer_{i+2}_Dropout"):
# #                 model.add(keras.layers.Dropout(rate=0.25))
        #learning_rate = hp.Float("lr", min_value=1e-5, max_value=1e-2, sampling="log")
        model.add(keras.layers.LSTM(2, activation='softplus',return_sequences=True))

#         optimizer = hp.Choice("optimizer", ["adam","sgd"])
        
        model.compile(optimizer="adam",loss = MeanVarianceLogLikelyhoodLoss)
    
        return model

    
    def fit(self, hp, model, *args, **kwargs):
        return model.fit(*args,batch_size=hp.Choice("batch_size",[64,128,256,512,1024]),**kwargs)

tuner = kt.BayesianOptimization(#kt.RandomSearch(#kt.Hyperband(#(#
    LSTMHyperModel(),
    objective='val_loss',
    overwrite = False,#True,#
    #beta=5,
    #factor = 3,
    #hyperband_iterations = 1,
    #max_epochs = 100,
    max_trials = 300,
    seed = 42,
    directory = "../Models/SimulatedPretrainedModelTuner")
        
tuner.search(X_train, y_train, epochs=NumEpochs, validation_data=(X_val,y_val), callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss',patience=50),keras.callbacks.TerminateOnNaN()])
best_model = tuner.get_best_models()[0]
best_model.summary()
best_model.save("../Models/SimulatedPretrainedModelTuner")

Trial 64 Complete [00h 02m 10s]
val_loss: -0.9200577735900879

Best val_loss So Far: nan
Total elapsed time: 1d 13h 28m 42s

Search: Running Trial #65

Value             |Best Value So Far |Hyperparameter
20                |500               |layer_1_units
1                 |1                 |LSTM Layers
5                 |50                |layer_2_units
64                |64                |batch_size
200               |None              |layer_3_units
500               |None              |layer_4_units
100               |None              |layer_5_units
5                 |None              |layer_6_units

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000

KeyboardInterrupt: 

In [5]:
tuner.results_summary(20)

Results summary
Results in ../Models/SimulatedPretrainedModelTuner/untitled_project
Showing 20 best trials
<keras_tuner.engine.objective.Objective object at 0x7f9fb9c924f0>
Trial summary
Hyperparameters:
layer_1_units: 500
LSTM Layers: 1
layer_2_units: 50
batch_size: 64
Score: nan
Trial summary
Hyperparameters:
layer_1_units: 10
LSTM Layers: 4
layer_2_units: 200
batch_size: 64
layer_3_units: 100
layer_4_units: 50
layer_5_units: 100
layer_6_units: 500
Score: -5.104083061218262
Trial summary
Hyperparameters:
layer_1_units: 50
LSTM Layers: 3
layer_2_units: 200
batch_size: 128
layer_3_units: 50
layer_4_units: 10
layer_5_units: 500
layer_6_units: 5
Score: -5.094405651092529
Trial summary
Hyperparameters:
layer_1_units: 200
LSTM Layers: 2
layer_2_units: 100
batch_size: 64
layer_3_units: 200
layer_4_units: 50
layer_5_units: 20
layer_6_units: 100
Score: -5.0400238037109375
Trial summary
Hyperparameters:
layer_1_units: 100
LSTM Layers: 4
layer_2_units: 10
batch_size: 64
layer_3_units: 5
layer_4