# Artificial Neural Network

### Importing the libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow import keras

In [None]:
pip install -q -U keras-tuner

In [None]:
import kerastuner as kt

In [None]:
tf.__version__

'2.4.1'

# Data Preprocessing

### Importing the dataset

In [None]:
styleSilentShot = pd.read_excel('1.2 SilentSoundScale_asl_count30+.xlsx')
X = styleSilentShot.iloc[: , 1:-1].values # These are the predictor, independent variables (the 1:-1 excludes the first and last columns)
y = styleSilentShot.iloc[: , -1].values # These are the target variables (or classes, in this case)
styleSilentShot.head()

Unnamed: 0,Title,COUNTRY,ASL,RA,POV,INS,BCU,CU,MCU,MS,MLS,LS,VLS,Sound
0,10 Things I Hate About You,USA,6.7,58,4.0,2.0,64,224,82,37,36,53,3,1
1,"Adventures of Robin Hood, The",USA,5.0,0,0.0,0.0,3,53,71,77,109,111,76,1
2,"Affairs of Anatole, The",USA,8.0,23,16.0,9.0,33,26,89,141,136,70,5,0
3,Alley Cat,BRI,6.0,10,0.0,0.0,17,68,84,82,101,142,5,0
4,"Almost Perfect Affair, An",USA,4.2,64,12.0,7.0,70,199,93,51,26,51,9,1


In [None]:
# print(X)

In [None]:
# print(y)

### Encoding categorical data

One Hot Encoding the "COUNTRY" column

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [None]:
print(X)

[[0.0 0.0 0.0 ... 36 53 3]
 [0.0 0.0 0.0 ... 109 111 76]
 [0.0 0.0 0.0 ... 136 70 5]
 ...
 [0.0 0.0 1.0 ... 99 66 40]
 [0.0 0.0 1.0 ... 123 243 26]
 [0.0 0.0 1.0 ... 83 396 21]]


### Splitting the dataset into the Training set and Test set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)
# random_state: Pass an int for reproducible output across multiple function calls

### Feature Scaling

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train) # Feature scale even the one-hot-encoded variables for deep learning
X_test = sc.transform(X_test) # Not fitted to the test set to avoid information leakage
                              # Using the same scaler (fit mean and std dev) as the training data because 
                              # the test data is "unavailable"

In [None]:
# print(X_test)

# Keras Tuner ANN model with layer hypertuning

Adapted from: https://keras-team.github.io/keras-tuner/

## Define the model builder function

In [None]:
tf.keras.backend.clear_session() 

In [None]:
def build_model(hp):
    model = keras.Sequential()

    # Dropout function: keras.layers.Dropout(0.3)

    # Hyperparameter specifying the number of hidden layers
    for i in range(hp.Int('num_layers', 1, 5)):
        model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
                                            min_value=2,
                                            max_value=28,
                                            step=2),
                                        kernel_regularizer='l2', # the default values used are l1=0.01 and l2=0.01
                                        activation='relu'))
    
    # Define the output layer with units=1 for a binary output
    model.add(tf.keras.layers.Dense(units=1, activation='sigmoid')) # activation = 'softmax' for non-binary

    # Tune the learning rate for the optimizer
    # Choose an optimal value from 0.01, 0.001
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3])

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                loss=keras.losses.BinaryCrossentropy(from_logits=True),
                metrics=['AUC'])
    
    # I use AUC here, but val_auc for the tuner, splitting the training data for validation. 
    # When training the final model, I do not want to split the data further
    # (i.e. validation_split=0.3) ---  I want to train the model with the best hyperparameters identified during
    # tuning on the FULL training data set and thereafter evaluate it on the test data.

    return model

## Instantiate the tuner

In [None]:
from kerastuner.tuners import BayesianOptimization
from kerastuner.tuners import RandomSearch

# 50 Bayesian combinations with 30 trials per combination
MAX_TRIALS = 50
EXECUTION_PER_TRIAL = 30
MAX_EPOCHS = 100
BATCH_SIZE = 32

tuner = BayesianOptimization (
    build_model,
    objective=kt.Objective('val_auc', direction='max'),
    max_trials=MAX_TRIALS,
    executions_per_trial=EXECUTION_PER_TRIAL,
    directory='my_dir',
    project_name='silent_sound_ann_bayesian_search_1.1')

# tuner = RandomSearch(
#     build_model,
#     objective=kt.Objective('val_auc', direction='max'),
#     max_trials=MAX_TRIALS,
#     executions_per_trial=EXECUTION_PER_TRIAL,
#     directory='my_dir',
#     project_name='silent_sound_ann_random_search_1.1')

# Note from Keras: the purpose of having multiple executions per trial is to reduce
# results variance and therefore be able to more accurately assess the performance of a model. 

Print a summary of the search space

In [None]:
tuner.search_space_summary()

Search space summary
Default search space size: 3
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 5, 'step': 1, 'sampling': None}
units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 2, 'max_value': 28, 'step': 2, 'sampling': None}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001], 'ordered': True}


Implement an early stopping mechanism

In [None]:
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

# This callback will stop the training when there is no improvement in
# the validation loss for five consecutive epochs.

## Search for the best hyperparameter configuration

In [None]:
# Run the Search tuner for MAX_TRIALS combinations, EXECUTION_PER_TRIAL executions per combinationa and 30 epochs per trial.

tuner.search(X_train, y_train,
             batch_size = BATCH_SIZE,
             epochs=MAX_EPOCHS, # The best_epochs value in subsequent sections is usually between 30 and 100
             validation_split=0.3,
             callbacks=[stop_early]) 

             # The validation performance is the criterion by which the best hyperparameter configuration is chosen

Trial 50 Complete [00h 02m 05s]
val_auc: 0.9425679087638855

Best val_auc So Far: 0.9457654456297556
Total elapsed time: 01h 57m 20s
INFO:tensorflow:Oracle triggered exit


Summary of the results

In [None]:
tuner.results_summary(num_trials=10)

Results summary
Results in my_dir/silent_sound_ann_bayesian_search_1.1
Showing 10 best trials
Objective(name='val_auc', direction='max')
Trial summary
Hyperparameters:
num_layers: 1
units_0: 28
learning_rate: 0.01
units_1: 28
units_2: 2
units_3: 2
units_4: 12
Score: 0.9457654456297556
Trial summary
Hyperparameters:
num_layers: 1
units_0: 28
learning_rate: 0.01
units_1: 28
units_2: 28
units_3: 2
units_4: 28
Score: 0.9449382861455281
Trial summary
Hyperparameters:
num_layers: 1
units_0: 28
learning_rate: 0.01
units_1: 2
units_2: 2
units_3: 2
units_4: 14
Score: 0.9448395133018493
Trial summary
Hyperparameters:
num_layers: 1
units_0: 28
learning_rate: 0.01
units_1: 28
units_2: 28
units_3: 28
units_4: 28
Score: 0.9445185323556264
Trial summary
Hyperparameters:
num_layers: 1
units_0: 28
learning_rate: 0.01
units_1: 28
units_2: 2
units_3: 2
units_4: 28
Score: 0.9441728552182516
Trial summary
Hyperparameters:
num_layers: 1
units_0: 28
learning_rate: 0.01
units_1: 28
units_2: 2
units_3: 6
units

## Retrain the model with the best hyperparameters

Save the best hyperparameters

In [None]:
# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0] # num_trials=2 will return the hp's of the best 2 trials

Create a log directory for Tensorboard

In [None]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

import datetime, os

logdir = os.path.join("history_logs_random_search", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

Over-fitting: Retrain the model and identify the best epoch (epoch at which the largest val_auc is achieved)

In [None]:
# Build the model with the optimal hyperparameters and train it on the data for 100 epochs
model = tuner.hypermodel.build(best_hps)
history = model.fit(X_train, y_train,
                    batch_size = BATCH_SIZE,
                    epochs = 100,
                    validation_split=0.3,
                    callbacks=[tensorboard_callback]) 

# Overfitting: After about 20 epochs, the model fits the data with an AUC of 1.00, but the val_AUC declines from about 0.92 to 0.82
# Determine the optimal number of epochs based on the validation data

# Validation Split (validation_split=0.3)
# Fraction of the training data to be used as validation data.
# The model will set apart this fraction of the training data, will not train on it, and will evaluate
# the loss and any model metrics on this data at the end of each epoch.
# The validation data is selected from the last samples in the x and y data provided, before shuffling. 

val_auc_per_epoch = history.history['val_auc']
best_epoch = val_auc_per_epoch.index(max(val_auc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))

## Inspect the training Tensorboard

In [None]:
%tensorboard --logdir history_logs_random_search 

## Train the final hypermodel

This section exists so as to evaluate the model performance on the test set.

Re-instantiate the hypermodel and train it with the optimal number of epochs from above.

In [None]:
hypermodel = tuner.hypermodel.build(best_hps)

# Retrain the model
hypermodel.fit(X_train, y_train, batch_size = BATCH_SIZE, epochs=best_epoch) # best_epoch

Epoch 1/78
Epoch 2/78
Epoch 3/78
Epoch 4/78
Epoch 5/78
Epoch 6/78
Epoch 7/78
Epoch 8/78
Epoch 9/78
Epoch 10/78
Epoch 11/78
Epoch 12/78
Epoch 13/78
Epoch 14/78
Epoch 15/78
Epoch 16/78
Epoch 17/78
Epoch 18/78
Epoch 19/78
Epoch 20/78
Epoch 21/78
Epoch 22/78
Epoch 23/78
Epoch 24/78
Epoch 25/78
Epoch 26/78
Epoch 27/78
Epoch 28/78
Epoch 29/78
Epoch 30/78
Epoch 31/78
Epoch 32/78
Epoch 33/78
Epoch 34/78
Epoch 35/78
Epoch 36/78
Epoch 37/78
Epoch 38/78
Epoch 39/78
Epoch 40/78
Epoch 41/78
Epoch 42/78
Epoch 43/78
Epoch 44/78
Epoch 45/78
Epoch 46/78
Epoch 47/78
Epoch 48/78
Epoch 49/78
Epoch 50/78
Epoch 51/78
Epoch 52/78
Epoch 53/78
Epoch 54/78
Epoch 55/78
Epoch 56/78
Epoch 57/78
Epoch 58/78
Epoch 59/78
Epoch 60/78
Epoch 61/78
Epoch 62/78
Epoch 63/78
Epoch 64/78
Epoch 65/78
Epoch 66/78
Epoch 67/78
Epoch 68/78
Epoch 69/78
Epoch 70/78
Epoch 71/78
Epoch 72/78
Epoch 73/78
Epoch 74/78
Epoch 75/78
Epoch 76/78
Epoch 77/78
Epoch 78/78


<tensorflow.python.keras.callbacks.History at 0x7fee84b4ed10>

## Evaluate the hypermodel on the test data

In [None]:
eval_result = hypermodel.evaluate(X_test, y_test)
print("[test loss, test auc]:", eval_result)