<a id='ExperimentalDataModelTunerTop'></a>
# Hyperameter Tuning for Single LSTM Model Trained on Experimental Data using Keras Tuner

Hyperparameter tuning of single LSTM model on the experimental dataset using keras tuner.

- Tunable parameters are:
    - number of nodes in each layer (5-1000)
    - number of hidden layers (2-9)
    - batch size (2-128)
- Optional tunable parameters are:
    - regularization
    - dropout
    - custom learning rate

Hyperparameter tuning parameters
- max epoch = 2000
- random search
- early stopping after 500 epochs of no improvement, or validation loss is nan

The hyperparameters and validation loss from the epoch and model which resulted in the best validation loss is saved, informing the hyperparameters of the [model](ExperimentalDataEnsembleModels.ipynb#ExperimentalDataEnsembleModelsTop)

Minimum loss is -ln(minimum_variance)/2 = -6.91 (for a minimum variance chosen to be 1e-6)

In [None]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
from LSTMutils import MeanVarianceLogLikelyhoodLoss
import LSTMutils
from sklearn.model_selection import train_test_split
import keras_tuner as kt

# input parameters
SequenceLength = 250
validation_split = 0.25
NumEpochs = 2000
test_split = 0.2
AddDropout = False
AddRegularization = False
CustomLearningRate = False
EarlyStoppingPatience = 500
NumTopResults = 30

#set random seeds
np.random.seed(42)
tf.random.set_seed(42)

# read experimental dataset
ExperimentalData = LSTMutils.ExperimentalData(SequenceLength=SequenceLength)
time, concentrations, df_data, unused = ExperimentalData.ReadData()

# split data into stratified train and test sets, size defined by the test_split variable
# the split will always be the same provided the data is in the same order, the same random_state is used,
# and strangely the labels used for stratification are always the same type (str is used here)
df_train, df_test = train_test_split(df_data, test_size=test_split, train_size=1-test_split, random_state=42, shuffle=True, stratify=concentrations)

# split data into stratified train and validation sets, size defined by the validation_split variable
train_concentrations = df_train.iloc[:,0]
df_train, df_val = train_test_split(df_train, test_size=validation_split, train_size=1-validation_split, random_state=42, shuffle=True, stratify=train_concentrations)

# normalise time series data
df_norm_train, df_norm_test, df_norm_val = ExperimentalData.NormalizeData(df_train,df_test,df_val)

# Define y as the last element in X, and ensure X and y are the correct shape
X_train, y_train = ExperimentalData.Shape(df_norm_train)
X_val, y_val = ExperimentalData.Shape(df_norm_val)

# implementation informed by https://github.com/keras-team/keras-tuner/issues/122
# set up class to enable batch size to be searched with the rest of the hyperparameters
class LSTMHyperModel(kt.HyperModel):
    
    def build(self,hp):
        
        # if needed, add L2 regularizer
        if AddRegularization:
            Regularizer = keras.regularizers.L2(hp.Choice("L2Regularizer1", [0., 1e-4]))
        else:
            Regularizer = None
        
        # define network architecture of input and first hidden layer
        model = keras.models.Sequential(keras.layers.LSTM(hp.Choice("layer_1_units", [5,10,20,50,100,200,500,750,1000])
                                        , input_shape=(SequenceLength,1), return_sequences=True
                                        , kernel_regularizer=Regularizer))
        
        # if needed add dropout
        if AddDropout:
            if hp.Boolean("layer_1_dropout"):
                model.add(keras.layers.Dropout(rate=0.25))
        
        # define network architecture of subsequent hidden layers
        for i in range(hp.Int("LSTM Layers", min_value = 1, max_value = 8)):
            model.add(keras.layers.LSTM(hp.Choice(f"layer_{i+2}_units", [5,10,20,50,100,200,500,750,1000])
                                        , return_sequences=True, kernel_regularizer=Regularizer))
            
            # if needed add dropout
            if AddDropout:
                if hp.Boolean(f"layer_{i+2}_dropout"):
                    model.add(keras.layers.Dropout(rate=0.25))
        
        # if needed specify learning rate schedule            
        if CustomLearningRate:
            learning_rate = hp.Float("lr", min_value=1e-5, max_value=1e-2, sampling="log")
        
        # add output layer with softmax activation to ensure variance predictions are positive
        model.add(keras.layers.LSTM(2, activation='softplus',return_sequences=True))
        
        # adam optimizer and custom negative log likelyhood loss
        model.compile(optimizer="adam",loss = MeanVarianceLogLikelyhoodLoss)
    
        return model

    # define custom fit function called by keras tuner that includes batch size as a hyperparameter
    def fit(self, hp, model, *args, **kwargs):
        return model.fit(*args,batch_size=hp.Choice("batch_size",[2,4,8,16,32,64,128]),**kwargs)

# set up tuner
tuner = kt.RandomSearch(
    LSTMHyperModel(),
    objective='val_loss',
    overwrite = False,
    max_trials = 300,
    seed = 42,
    directory = "../Models/ExperimentalDataModelTuner")

# run random hyperparameter search, with early stopping if there is no decrease in validation loss
# in 'EarlyStoppingPatience' epochs, or the loss is nan
tuner.search(X_train, y_train, epochs=NumEpochs, validation_data=(X_val,y_val)
             , callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss',patience=EarlyStoppingPatience)
             ,keras.callbacks.TerminateOnNaN()])

# get and evaluate model from the epoch that achieved the best validation loss
best_model = tuner.get_best_models()[0]
best_model.summary()
best_model.save("../Models/ExperimentalDataModelTuner")

# show summary of top 'NumTopResults' candidate sets of hyperparameters, ordered in terms of lowest validation loss
tuner.results_summary(NumTopResults)