# Notebook 6: Neural Networks 2

**Model 5**: Neural Network with 2 layers

**Reason for model**: powerful class of model with expressiveness. 

**Metric**: RMSE

**Reason for metric**: focus on penalising large errors over small errors, RMSE is the better choice.

In [1]:
ROOT_PATH_FROM_NOTEBOOK = ".."
DATA_PATH = "data"
PROCESSED_DATA_PATH = "processed"
SAMPLE_DATASET_NAME = "data_sample.parquet"

df_path = f"{ROOT_PATH_FROM_NOTEBOOK}/{DATA_PATH}/{PROCESSED_DATA_PATH}/{SAMPLE_DATASET_NAME}"

In [2]:
import sys
import os

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from scikeras.wrappers import KerasRegressor

from assignment2_pkg_11919925.metrics.regression import print_regressor_scores_from_gridsearchcv

In [3]:
# ! pip install -q -U keras-tuner

In [4]:
# ! pip install scikeras

In [5]:
import keras_tuner as kt

In [6]:
# Set Pandas option to show all columns in prints
pd.set_option('display.max_columns', None)

In [7]:
# Get the current working directory
current_dir = os.getcwd()

# Add the src directory to sys.path to use custom functions
sys.path.append(os.path.abspath(os.path.join(current_dir, '..', 'src')))

In [8]:
df = pd.read_parquet(df_path)

In [9]:
df.head()

Unnamed: 0,flightDayOfWeekSin,flightDayOfWeekCos,flightMonthSin,flightMonthCos,flightHourSin,flightHourCos,flightMinuteSin,flightMinuteCos,timeDeltaDays,travelDurationDay,totalTravelDistance,totalFare,isBasicEconomy,isRefundable,isNonStop,numLegs,business,coach,first,premium coach
0,-0.974928,-0.222521,0.5,-0.866025,-0.5,-0.8660254,-0.951057,-0.309017,15,0.195139,1191.0,294.6,-1,-1,-1,2,-1,1,-1,-1
1,-0.433884,-0.900969,0.5,-0.866025,0.258819,0.9659258,-0.5,-0.866025,37,0.095139,762.0,262.6,-1,-1,1,1,-1,1,-1,-1
2,0.781831,0.62349,0.866025,-0.5,-1.0,-1.83697e-16,0.5,0.866025,1,0.127083,1235.0,234.59,-1,-1,1,1,-1,1,-1,-1
3,0.974928,-0.222521,0.5,-0.866025,-0.5,-0.8660254,-0.104528,0.994522,34,0.101389,762.0,118.6,-1,-1,1,1,-1,1,-1,-1
4,0.433884,-0.900969,0.5,-0.866025,-0.965926,-0.258819,-0.669131,0.743145,17,0.333333,2618.0,446.6,-1,-1,-1,2,-1,1,-1,-1


In [10]:
y = df.pop('totalFare')
X = df

## Neural Network with 2 hidden layers

In [11]:
def model_builder_2_hidden(hp):
    model = keras.Sequential()
    # Tune the number of units
    hp_units = hp.Int('units', min_value=4, max_value=18, step=2)
    # Hidden layer 1
    model.add(keras.layers.Dense(units=hp_units, input_shape=[X.shape[1]], activation='relu'))
    # Hidden layer 2
    model.add(keras.layers.Dense(units=hp_units, activation='relu'))
    # Output layer
    model.add(keras.layers.Dense(1))

    # Tune the learning rate for the optimizer
    # Choose an optimal value from 0.01, 0.001, or 0.0001
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
        loss=keras.losses.MeanSquaredError(
        reduction="sum_over_batch_size", 
        name="mean_squared_error"),
        metrics=[keras.metrics.RootMeanSquaredError(
        name="root_mean_squared_error")])
    
    return model

In [15]:
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

In [17]:
tuner_2l = kt.Hyperband(
    model_builder_2_hidden,
    objective='val_loss',
    max_epochs=2000,
    factor=3,
    directory='nicholas_keras_neuralnetwork',
    project_name='tuner_2_layer')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [18]:
tuner_2l.search(X, y, epochs=1000, validation_split=0.2, callbacks=[early_stop])

Trial 20 Complete [00h 00m 02s]
val_loss: 33871.78515625

Best val_loss So Far: 26833.169921875
Total elapsed time: 00h 00m 38s


In [19]:
# Get the optimal hyperparameters
best_hps_2l = tuner_2l.get_best_hyperparameters(num_trials=1)[0]

In [20]:
# Build the model with the optimal hyperparameters and train it on the data
model_2l = tuner_2l.hypermodel.build(best_hps_2l)
history_2l = model_2l.fit(X, y, epochs=1000, validation_split=0.2)

Epoch 1/1000


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 444us/step - loss: 41087.9023 - root_mean_squared_error: 199.8203 - val_loss: 31356.8984 - val_root_mean_squared_error: 177.0788
Epoch 2/1000
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 353us/step - loss: 29640.1230 - root_mean_squared_error: 172.1290 - val_loss: 28252.5918 - val_root_mean_squared_error: 168.0851
Epoch 3/1000
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 333us/step - loss: 27463.8379 - root_mean_squared_error: 165.6469 - val_loss: 26870.6328 - val_root_mean_squared_error: 163.9226
Epoch 4/1000
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 366us/step - loss: 27392.4082 - root_mean_squared_error: 165.4442 - val_loss: 27495.5781 - val_root_mean_squared_error: 165.8179
Epoch 5/1000
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 328us/step - loss: 27578.7227 - root_mean_squared_error: 166.0248 - val_loss: 26526.6328 - v

In [21]:
val_rmse_per_epoch_2l = history_2l.history['val_root_mean_squared_error']
best_epoch_2l = val_rmse_per_epoch_2l.index(min(val_rmse_per_epoch_2l)) + 1
print('Best epoch: %d' % (best_epoch_2l,))

Best epoch: 934


For the 2-layer network, best statistics are from epoch 934:

`loss: 21279.9570 - root_mean_squared_error: 145.8587 - val_loss: 21378.3320 - val_root_mean_squared_error: 146.2133`

This is not better than the 1-layer network, so the 1-layer will be the final model.

In [22]:
model_2l.summary()

The 2-layer model is summarised as follows:
- Input - hidden1: 33 -> 24
- Hidden1 - hidden2: 24 -> 24
- Hidden2 - output: 24 -> 1