# Notebook 5: Neural Networks

**Model 4**: Neural Network with 1 layer

**Reason for model**: powerful class of model with expressiveness. 

**Metric**: RMSE

**Reason for metric**: focus on penalising large errors over small errors, RMSE is the better choice.

**Metrics of last best model**: Random Forest

***RMSE Train:*** 145.38896174058732

***RMSE Val:*** 162.8335526958362

In [1]:
ROOT_PATH_FROM_NOTEBOOK = ".."
DATA_PATH = "data"
PROCESSED_DATA_PATH = "processed"
SAMPLE_DATASET_NAME = "data_sample.parquet"

df_path = f"{ROOT_PATH_FROM_NOTEBOOK}/{DATA_PATH}/{PROCESSED_DATA_PATH}/{SAMPLE_DATASET_NAME}"

In [2]:
import sys
import os

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from scikeras.wrappers import KerasRegressor

from assignment2_pkg_11919925.metrics.regression import print_regressor_scores_from_gridsearchcv

In [3]:
# ! pip install -q -U keras-tuner

In [4]:
# ! pip install scikeras

In [3]:
import keras_tuner as kt

In [4]:
# Set Pandas option to show all columns in prints
pd.set_option('display.max_columns', None)

In [5]:
# Get the current working directory
current_dir = os.getcwd()

# Add the src directory to sys.path to use custom functions
sys.path.append(os.path.abspath(os.path.join(current_dir, '..', 'src')))

In [6]:
df = pd.read_parquet(df_path)

In [7]:
df.head()

Unnamed: 0,flightDayOfWeekSin,flightDayOfWeekCos,flightMonthSin,flightMonthCos,flightHourSin,flightHourCos,flightMinuteSin,flightMinuteCos,timeDeltaDays,travelDurationDay,totalTravelDistance,totalFare,isBasicEconomy,isRefundable,isNonStop,numLegs,business,coach,first,premium coach
0,-0.974928,-0.222521,0.5,-0.866025,-0.5,-0.8660254,-0.951057,-0.309017,15,0.195139,1191.0,294.6,-1,-1,-1,2,-1,1,-1,-1
1,-0.433884,-0.900969,0.5,-0.866025,0.258819,0.9659258,-0.5,-0.866025,37,0.095139,762.0,262.6,-1,-1,1,1,-1,1,-1,-1
2,0.781831,0.62349,0.866025,-0.5,-1.0,-1.83697e-16,0.5,0.866025,1,0.127083,1235.0,234.59,-1,-1,1,1,-1,1,-1,-1
3,0.974928,-0.222521,0.5,-0.866025,-0.5,-0.8660254,-0.104528,0.994522,34,0.101389,762.0,118.6,-1,-1,1,1,-1,1,-1,-1
4,0.433884,-0.900969,0.5,-0.866025,-0.965926,-0.258819,-0.669131,0.743145,17,0.333333,2618.0,446.6,-1,-1,-1,2,-1,1,-1,-1


In [8]:
y = df.pop('totalFare')
X = df

In [9]:
X.shape

(50000, 19)

## Neural Network with 1 hidden layer

In [10]:
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

In [13]:
def model_builder_1_hidden(hp):
    model = keras.Sequential()
    # Tune the number of units
    
    hp_units = hp.Int('units', min_value=4, max_value=18, step=2)
    # Hidden layer
    model.add(keras.layers.Dense(units=hp_units, input_shape=[X.shape[1]], activation='relu'))
    # Output layer
    model.add(keras.layers.Dense(1))

    # Tune the learning rate for the optimizer
    # Choose an optimal value from 0.01, 0.001, or 0.0001
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
        loss=keras.losses.MeanSquaredError(
        reduction="sum_over_batch_size", 
        name="mean_squared_error"),
        metrics=[keras.metrics.RootMeanSquaredError(
        name="root_mean_squared_error")])
    
    return model

In [None]:
tuner_1l = kt.Hyperband(
    model_builder_1_hidden,
    objective='val_loss',
    max_epochs=1000,
    factor=3,
    directory='nicholas_keras_neuralnetwork',
    project_name='tuner_1_layer')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [15]:
tuner_1l.search(X, y, epochs=1000, validation_split=0.2, callbacks=[early_stop])

Trial 24 Complete [00h 00m 02s]
val_loss: 30565.107421875

Best val_loss So Far: 26685.83984375
Total elapsed time: 00h 00m 38s


In [16]:
# Get the optimal hyperparameters
best_hps=tuner_1l.get_best_hyperparameters(num_trials=1)[0]

In [17]:
# Build the model with the optimal hyperparameters and train it on the data
model_1l = tuner_1l.hypermodel.build(best_hps)
history = model_1l.fit(X, y, epochs=1000, validation_split=0.2)

Epoch 1/1000


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 360us/step - loss: 36562.2344 - root_mean_squared_error: 190.0397 - val_loss: 28677.2090 - val_root_mean_squared_error: 169.3435
Epoch 2/1000
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 311us/step - loss: 28714.7383 - root_mean_squared_error: 169.4073 - val_loss: 27135.2676 - val_root_mean_squared_error: 164.7279
Epoch 3/1000
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 303us/step - loss: 28130.6152 - root_mean_squared_error: 167.6902 - val_loss: 26747.1543 - val_root_mean_squared_error: 163.5456
Epoch 4/1000
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 307us/step - loss: 27902.6367 - root_mean_squared_error: 166.9863 - val_loss: 28614.5156 - val_root_mean_squared_error: 169.1582
Epoch 5/1000
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 311us/step - loss: 27518.1270 - root_mean_squared_error: 165.8488 - val_loss: 27482.5723 - v

In [18]:
val_rmse_per_epoch = history.history['val_root_mean_squared_error']
best_epoch = val_rmse_per_epoch.index(min(val_rmse_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))

Best epoch: 965


For the 1-layer network, best statistics are from epoch 965:

`loss: 20903.2871 - root_mean_squared_error: 144.5450 - val_loss: 20853.8203 - val_root_mean_squared_error: 144.4085`

In [19]:
model_1l.summary()

The 1-layer model is summarised as follows:
- Input - hidden: 19 -> 14
- Hidden - output: 14 -> 1

**Next model**: Artificial Neural Network with 2 layers.

## Retrain the best model on the complete dataset

Notebook 6 shows that the 2-layer model is not better than the 1-layer model, so I will retrain the 1-layer model on the full dataset.

In [20]:
ALL_DATASET_NAME = "all_data_transformed.parquet"
df_all_path = f"{ROOT_PATH_FROM_NOTEBOOK}/{DATA_PATH}/{PROCESSED_DATA_PATH}/{ALL_DATASET_NAME}"

df_all = pd.read_parquet(df_all_path)

In [21]:
y_all = df_all.pop('totalFare')
X_all = df_all

In [22]:
X_all.head()

Unnamed: 0,flightDayOfWeekSin,flightDayOfWeekCos,flightMonthSin,flightMonthCos,flightHourSin,flightHourCos,flightMinuteSin,flightMinuteCos,timeDeltaDays,travelDurationDay,totalTravelDistance,isBasicEconomy,isRefundable,isNonStop,numLegs,business,coach,first,premium coach
0,-0.433884,-0.900969,0.5,-0.866025,0.0,1.0,-0.2079117,0.978148,34,0.40625,1931.0,-1,-1,-1,2,-1,1,-1,-1
1,-0.433884,-0.900969,0.5,-0.866025,1.0,6.123234000000001e-17,0.9510565,-0.309017,34,0.181944,1947.0,-1,-1,1,1,-1,1,-1,-1
2,0.433884,-0.900969,0.5,-0.866025,-0.9659258,-0.258819,-0.9510565,-0.309017,33,0.182639,1947.0,-1,-1,1,1,-1,1,-1,-1
3,0.433884,-0.900969,0.5,-0.866025,1.224647e-16,-1.0,5.665539e-16,-1.0,33,0.253472,1947.0,-1,-1,-1,2,-1,1,-1,-1
4,0.433884,-0.900969,0.5,-0.866025,-0.258819,-0.9659258,0.0,1.0,33,0.255556,1947.0,-1,-1,-1,2,-1,1,-1,-1


In [23]:
X_train, X_test, y_train, y_test = train_test_split(
    X_all, y_all, test_size=0.2, random_state=42)

In [24]:
hypermodel = tuner_1l.hypermodel.build(best_hps)

# Retrain the model
hypermodel.fit(X_train, y_train, epochs=best_epoch, validation_split=0.2, callbacks=[early_stop])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/965
[1m263331/263331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 329us/step - loss: 24728.2148 - root_mean_squared_error: 157.1797 - val_loss: 21834.5801 - val_root_mean_squared_error: 147.7653
Epoch 2/965
[1m263331/263331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 315us/step - loss: 22136.3223 - root_mean_squared_error: 148.7822 - val_loss: 21668.0098 - val_root_mean_squared_error: 147.2006
Epoch 3/965
[1m263331/263331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 314us/step - loss: 21969.5781 - root_mean_squared_error: 148.2210 - val_loss: 21736.7148 - val_root_mean_squared_error: 147.4338
Epoch 4/965
[1m263331/263331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 316us/step - loss: 21897.4160 - root_mean_squared_error: 147.9775 - val_loss: 21603.8613 - val_root_mean_squared_error: 146.9825
Epoch 5/965
[1m263331/263331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 318us/step - loss: 21849.3809 - root_mean_squared_error: 14

<keras.src.callbacks.history.History at 0x169da1650>

In [25]:
eval_result = hypermodel.evaluate(X_test, y_test)
print("[test loss, test rmse]:", eval_result)

[1m82291/82291[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 238us/step - loss: 21589.5000 - root_mean_squared_error: 146.9327
[test loss, test rmse]: [21599.3828125, 146.96728515625]


Best model: 

`loss: 20903.2871 - root_mean_squared_error: 144.5450 - val_loss: 20853.8203 - val_root_mean_squared_error: 144.4085`

`test_loss: 21599.3828125 - test_root_mean_squared_error: 146.96728515625`

In [26]:
hypermodel.save('../models/nicholas_neuralnetwork_best.keras')