## 10. Introduction to Artificial Neural Networks with Keras

In [1]:
from tensorflow import keras
from tensorflow.keras.datasets import boston_housing

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from scipy.stats import reciprocal

from sklearn.model_selection import train_test_split, RandomizedSearchCV

In [2]:
# Ingestion
###########
(train_data, y_train), (test_data, y_test) = boston_housing.load_data()

# Preprocessing
###############
sc = StandardScaler()
x_train = sc.fit_transform(train_data)
x_test = sc.transform(test_data)

x_train__train, x_train__val, y_train__train, y_train__val = train_test_split(x_train, y_train, test_size=0.15,
                                                                             random_state=0)
NUM_FEATURES = x_train.shape[1:]

### Fine-Tuning Neural Network Hyperparameters

Some of the things to consider when using neural networks are:
1. Architecture
2. For an MLP, the no. of layers, size of layers / no. of neurons, type of activation function, weight inisialisation logic etc.

How do you know what combinations of hyperparameters is the best fo the problem?

One way is to simply try many combinations of hyperparameters and see which ones work the best during k-fold CV. For this, we can wrap the model around a parameter search algorithm like `GridSearchCV` or `RandomizedSearchCV`.

Using a function call, let's build a way to initialise models with keyword arguments.

In [3]:
def build_model(n_hidden_layers=1, n_neurons=64, learning_rate=3e-3, input_shape=(13,),
                dropout=0.0, kernel_regularizer=None):
    m = keras.models.Sequential()
    
    for l in range(n_hidden_layers):
        m.add(keras.layers.Dense(n_neurons, activation='relu', input_shape=input_shape, 
                              kernel_regularizer=kernel_regularizer))
        if 0.0 < dropout:
            m.add(keras.layers.Dropout(0.5))
    m.add(keras.layers.Dense(1))

    optimizer = keras.optimizers.RMSprop(learning_rate=learning_rate)
    m.compile(optimizer=optimizer, loss='mse', metrics=['mae'])
    return m

Here we execute a simple workflow on the baseline model.

In [4]:
# BASELINE MODEL 
model0 = build_model()
history0 = model0.fit(x_train__train, y_train__train, 
                      epochs=20, batch_size=32, 
                      validation_data=(x_train__val, y_train__val), verbose=0) # Train

In [5]:
model0.predict(x_test[:10]) # Predict

array([[10.033567],
       [17.042376],
       [20.761349],
       [33.363026],
       [26.3914  ],
       [15.98591 ],
       [25.909231],
       [23.377611],
       [21.331947],
       [18.341013]], dtype=float32)

Let's now build a `keras_reg` that wraps the Keras model, and now can be treated like a classifier like one in `sklearn`. This allows us to implement sklearn functions like `RandomizedSearch`.

In [6]:
keras_reg = keras.wrappers.scikit_learn.KerasRegressor(build_fn=build_model)

In [7]:
# Training baseline model using keras_clf (treating it like a model from sklearn)
stop_early_checkpoint = keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)
keras_reg.fit(x_train__train, y_train__train, 
              epochs=20, batch_size=32, callbacks=[stop_early_checkpoint],
              validation_data=(x_train__val, y_train__val), verbose=0) # Train
keras_reg.predict(x_test[:10]) # Predict

array([10.701542, 18.000772, 20.44781 , 34.402363, 26.31796 , 16.30954 ,
       25.831837, 22.999666, 21.384874, 17.611002], dtype=float32)

In [8]:
r = keras.regularizers.l2(l=0.01)
# Training model with regularisation
stop_early_checkpoint = keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)

keras_reg2 = keras.wrappers.scikit_learn.KerasRegressor(build_fn=build_model, 
                                                         kernel_regularizer=r)
keras_reg2.fit(x_train__train, y_train__train, 
              epochs=20, batch_size=32, callbacks=[stop_early_checkpoint],
              validation_data=(x_train__val, y_train__val), verbose=0) # Train
keras_reg2.predict(x_test[:10]) # Predict

array([11.008568, 17.386282, 21.598707, 33.99249 , 26.74907 , 15.577572,
       27.03369 , 23.962278, 20.918337, 16.829638], dtype=float32)

In [12]:
# Impementing RandomizedSearch on a Keras model.
param_dist = {
    'n_hidden_layers' : (1,2,3,4,5),
    'n_neurons' : (6,7,8,9,10,11),
    'learning_rate' : reciprocal(3e-4, 3e-2),
    'kernel_regularizer' : (None, 
                     keras.regularizers.l2(l=0.01),
                     keras.regularizers.l1(l=0.001),),
    'dropout' : (0.0, 0.1, 0.2, 0.3, 0.4, 0.5),
}

rnd_search_cv = RandomizedSearchCV(keras_reg, param_dist, n_iter=10, cv=4)
rnd_search_cv.fit(x_train__train, y_train__train, 
              epochs=10, batch_size=512, callbacks=[stop_early_checkpoint],
              validation_data=(x_train__val, y_train__val), verbose=0)





RandomizedSearchCV(cv=4, error_score='raise-deprecating',
                   estimator=<tensorflow.python.keras.wrappers.scikit_learn.KerasRegressor object at 0x138fef208>,
                   iid='warn', n_iter=10, n_jobs=None,
                   param_distributions={'dropout': (0.0, 0.1, 0.2, 0.3, 0.4,
                                                    0.5),
                                        'kernel_regularizer': (None,
                                                               <tensorflow.python.keras.regularizers.L1L2 object at 0x13a644278>,
                                                               <tensorflow.python.keras.regularizers.L1L2 object at 0x13a644630>),
                                        'learning_rate': <scipy.stats._distn_infrastructure.rv_frozen object at 0x13a644048>,
                                        'n_hidden_layers': (1, 2, 3, 4, 5),
                                        'n_neurons': (6, 7, 8, 9, 10, 11)},
                   pre_dispat

In [13]:
# Obtaining the best model params
print(rnd_search_cv.best_params_)

{'dropout': 0.0, 'kernel_regularizer': <tensorflow.python.keras.regularizers.L1L2 object at 0x13a644278>, 'learning_rate': 0.02343330549173664, 'n_hidden_layers': 5, 'n_neurons': 6}


In [14]:
print(rnd_search_cv.best_score_)

-63.79938958615673


And from here, you can save the model, evalauate on test set and if happy, deploy it to production. 