# Hyperameter tuning with Keras tuner
### target is validation loss, best model is saved, tunable parammeters are number of hidden layers, number of units in each layer, dropout, batch size, 

In [2]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import math
from LSTMutils import MeanVarianceLogLikelyhoodLoss
from LSTMutils import LSTMnetwork
from sklearn.model_selection import train_test_split
import keras_tuner as kt

np.random.seed(42)
tf.random.set_seed(42)

SequenceLength = 250
validation_split = 0.25
# batch_size = 32
NumEpochs = 2000
test_split = 0.2

df = pd.read_csv(r"../TrainingData/ExperimentalTrainingSet.csv",sep=',',header=0)

labels = df.iloc[:,0]
df_data = df.iloc[:,1:]

df_train, df_test = train_test_split(df_data, test_size=test_split, train_size=1-test_split, random_state=42, shuffle=True, stratify=labels)

train_labels = df_train.iloc[:,0]

df_train, df_val = train_test_split(df_train, test_size=validation_split, train_size=1-validation_split, random_state=42, shuffle=True, stratify=train_labels)

# normalise time series data
min_value, max_value = df_train.min().min(), df_train.max().max()
df_norm_train = (df_train - min_value)/(max_value - min_value)
df_norm_test = (df_test - min_value)/(max_value - min_value)
df_norm_val = (df_val - min_value)/(max_value - min_value)

X_train = df_norm_train.iloc[:,:SequenceLength].values
y_train = df_norm_train.iloc[:,SequenceLength-1].values
X_train = np.expand_dims(X_train, 2)
y_train = np.broadcast_to(y_train[:,None], (y_train.shape[0],SequenceLength))
y_train = np.expand_dims(y_train, 2)

X_val = df_norm_val.iloc[:,:SequenceLength].values
y_val = df_norm_val.iloc[:,SequenceLength-1].values
X_val = np.expand_dims(X_val, 2)
y_val = np.broadcast_to(y_val[:,None], (y_val.shape[0],SequenceLength))
y_val = np.expand_dims(y_val, 2)


#Implementation informed by https://github.com/keras-team/keras-tuner/issues/122

class LSTMHyperModel(kt.HyperModel):
    
    def build(self,hp):
        model = keras.models.Sequential(keras.layers.LSTM(hp.Choice("layer_1_units", [5,10,20,50,100,200,500,750,1000]), input_shape=(SequenceLength,1), return_sequences=True))#, kernel_regularizer=keras.regularizers.L2(hp.Choice("L2Regularizer1", [0., 1e-4]))))
#         if hp.Boolean("layer_1_dropout"):
#             model.add(keras.layers.Dropout(rate=0.25))
        for i in range(hp.Int("LSTM Layers", min_value = 1, max_value = 8)):
             model.add(keras.layers.LSTM(hp.Choice(f"layer_{i+2}_units", [5,10,20,50,100,200,500,750,1000]), return_sequences=True))#, kernel_regularizer=keras.regularizers.L2(hp.Choice(f"L2Regularizer{i+2}", [0., 1e-4]))))
# #             if hp.Boolean(f"layer_{i+2}_Dropout"):
# #                 model.add(keras.layers.Dropout(rate=0.25))
        #learning_rate = hp.Float("lr", min_value=1e-5, max_value=1e-2, sampling="log")
        model.add(keras.layers.LSTM(2, activation='softplus',return_sequences=True))
        
#         optimizer = hp.Choice("optimizer", ["adam","sgd"])
        
        model.compile(optimizer="adam",loss = MeanVarianceLogLikelyhoodLoss)
    
        return model

    
    def fit(self, hp, model, *args, **kwargs):
        return model.fit(*args,batch_size=hp.Choice("batch_size",[2,4,8,16,32,64,128]),**kwargs)

tuner = kt.RandomSearch(#kt.BayesianOptimization(#kt.Hyperband(#(#
    LSTMHyperModel(),
    objective='val_loss',
    overwrite = False,#True,#
    #beta=5,
    #factor = 3,
    #hyperband_iterations = 1,
    #max_epochs = 100,
    max_trials = 300,
    seed = 42,
    directory = "../Models/ExperimentalDataModelTuner")
        
tuner.search(X_train, y_train, epochs=NumEpochs, validation_data=(X_val,y_val), callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss',patience=500),keras.callbacks.TerminateOnNaN()])
best_model = tuner.get_best_models()[0]
best_model.summary()
best_model.save("../Models/ExperimentalDataModelTuner")

INFO:tensorflow:Reloading Oracle from existing project ../Models/ExperimentalDataModelTuner/untitled_project/oracle.json


2023-11-06 13:41:13.536300: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE: forward compatibility was attempted on non supported HW
2023-11-06 13:41:13.536342: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: simonjward-MS-7C91
2023-11-06 13:41:13.536349: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: simonjward-MS-7C91
2023-11-06 13:41:13.536549: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 525.147.5
2023-11-06 13:41:13.536573: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 525.125.6
2023-11-06 13:41:13.536579: E tensorflow/stream_executor/cuda/cuda_diagnostics.cc:313] kernel version 525.125.6 does not match DSO version 525.147.5 -- cannot find working devices in this configuration
2023-11-06 13:41:13.536863: I tensorflow/core/platform/cpu_feature_guard

INFO:tensorflow:Reloading Tuner from ../Models/ExperimentalDataModelTuner/untitled_project/tuner0.json

Search: Running Trial #81

Value             |Best Value So Far |Hyperparameter
50                |50                |layer_1_units
4                 |1                 |LSTM Layers
100               |500               |layer_2_units
2                 |4                 |batch_size
100               |1000              |layer_3_units
200               |750               |layer_4_units
5                 |5                 |layer_5_units
500               |50                |layer_6_units
5                 |5                 |layer_7_units
50                |500               |layer_8_units
750               |20                |layer_9_units

Epoch 1/2000

KeyboardInterrupt: 

In [3]:
tuner.results_summary(30)

Results summary
Results in ../Models/ExperimentalDataModelTuner/untitled_project
Showing 30 best trials
<keras_tuner.engine.objective.Objective object at 0x7f6298f2f520>
Trial summary
Hyperparameters:
layer_1_units: 50
LSTM Layers: 1
layer_2_units: 500
batch_size: 4
layer_3_units: 1000
layer_4_units: 750
layer_5_units: 5
layer_6_units: 50
layer_7_units: 5
layer_8_units: 500
layer_9_units: 20
Score: -3.926276922225952
Trial summary
Hyperparameters:
layer_1_units: 10
LSTM Layers: 1
layer_2_units: 200
batch_size: 4
layer_3_units: 500
layer_4_units: 50
layer_5_units: 5
layer_6_units: 5
layer_7_units: 20
layer_8_units: 750
layer_9_units: 5
Score: -3.880645275115967
Trial summary
Hyperparameters:
layer_1_units: 50
LSTM Layers: 2
layer_2_units: 5
batch_size: 8
layer_3_units: 50
layer_4_units: 100
layer_5_units: 5
layer_6_units: 750
layer_7_units: 20
layer_8_units: 10
layer_9_units: 20
Score: -3.876002311706543
Trial summary
Hyperparameters:
layer_1_units: 20
LSTM Layers: 3
layer_2_units: 10
b

In [None]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import math
from LSTMutils import MeanVarianceLogLikelyhoodLoss
from LSTMutils import LSTMnetwork
from sklearn.model_selection import train_test_split
import keras_tuner as kt

np.random.seed(42)
tf.random.set_seed

SequenceLength = 250
validation_split = 0.25
batch_size = 32
NumEpochs = 5

df = pd.read_csv(r"../TrainingData/ExperimentalTrainingSet.csv",sep=',',header=0)

labels = df.iloc[:,0]
dftrain = df.iloc[:,1:]

min_value, max_value = dftrain.min().min(), dftrain.max().max()
dftrain = (dftrain - min_value)/(max_value - min_value)

df_train, df_test = train_test_split(dftrain, test_size=0.2, train_size=0.8, random_state=42, shuffle=True, stratify=labels)

X_train = df_train.iloc[:,:SequenceLength].values
y_train = df_train.iloc[:,SequenceLength-1].values
X_train = np.expand_dims(X_train, 2)
y_train = np.broadcast_to(y_train[:,None], (y_train.shape[0],SequenceLength))
y_train = np.expand_dims(y_train, 2)

print(X_train.shape)
print(y_train.shape)

def build_model(hp):
    model = keras.models.Sequential(keras.layers.LSTM(hp.Choice("layer_1_units", [10,20,50,100,200,300,500,1000]), input_shape=(SequenceLength,1), return_sequences=True))
    # if hp.Boolean("layer_1_dropout"):
    #     model.add(keras.layers.Dropout(rate=0.25))
    for i in range(hp.Int("LSTM Layers", min_value = 0, max_value = 8)):
        model.add(keras.layers.LSTM(hp.Choice(f"layer_{i+2}_units", [10,20,50,100,200,300,500,1000]), return_sequences=True))#, kernel_regularizer=keras.regularizers.L2(hp.Choice(f"L2Regularizer{i+2}", [0., 1e-4]))))
        # if hp.Boolean(f"layer_{i+2}_Dropout"):
        #     model.add(keras.layers.Dropout(rate=0.25))
    #learning_rate = hp.Float("lr", min_value=1e-5, max_value=1e-2, sampling="log")
    model.add(keras.layers.LSTM(2, activation='softplus',return_sequences=True))
    
    model.compile(optimizer="adam",loss = MeanVarianceLogLikelyhoodLoss)
    
    return model

tuner = kt.RandomSearch(#kt.BayesianOptimization(#kt.RandomSearch(#kt.Hyperband(#(#
    build_model,
    objective='val_loss',
    overwrite = False,#True#
    #beta=5,
    #factor = 3,
    #hyperband_iterations = 1,
    #max_epochs = 100,
    max_trials = 300,
    seed = 42,
    directory = "../Models/ExperimentalDataModelTuner"
        
tuner.search(X_train, y_train, epochs=NumEpochs, validation_split=validation_split, batch_size = batch_size, callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss',patience=10)])
best_model = tuner.get_best_models()[0]
best_model.summary()
best_model.save("../Models/ExperimentalDataModelTuner")