## 10. Introduction to Artificial Neural Networks with Keras

In [1]:
from tensorflow import keras
from tensorflow.keras.datasets import imdb

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy.stats import reciprocal

from sklearn.model_selection import train_test_split, RandomizedSearchCV

In [2]:
##########
# Ingestion
##########
VOCABULARY_SIZE = 10000
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=VOCABULARY_SIZE)

##########
# Preprocessing
##########
def vectorize_sequences(sequences, dimension=VOCABULARY_SIZE):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
          results[i, sequence] = 1.
    return results
x_train = vectorize_sequences(train_data)
print(x_train.shape)
x_test = vectorize_sequences(test_data)
y_train = np.asarray(train_labels).astype('float32')
print(y_train.shape)
y_test = np.asarray(test_labels).astype('float32')

# Train-Validation Split
x_train__train, x_train__val, y_train__train, y_train__val = train_test_split(x_train, y_train, test_size=0.4,
                                                                             random_state=0)

(25000, 10000)
(25000,)


### Fine-Tuning Neural Network Hyperparameters

Some of the things to consider when using neural networks are:
1. Architecture
2. For an MLP, the no. of layers, size of layers / no. of neurons, type of activation function, weight inisialisation logic etc.

How do you know what combinations of hyperparameters is the best fo the problem?

One way is to simply try many combinations of hyperparameters and see which ones work the best during k-fold CV. For this, we can wrap the model around a parameter search algorithm like `GridSearchCV` or `RandomizedSearchCV`.

Using a function call, let's build a way to initialise models with keyword arguments.

In [3]:
def build_model(n_hidden_layers=1, n_neurons=30, learning_rate=3e-3, input_shape=(10000,),
                dropout=0.0, kernel_regularizer=None):
    m = keras.models.Sequential()
    for l in range(n_hidden_layers):
        m.add(keras.layers.Dense(n_neurons, activation='relu', input_shape=input_shape,
                                kernel_regularizer=kernel_regularizer))
        if 0.0 < dropout:
            m.add(keras.layers.Dropout(0.5))
    m.add(keras.layers.Dense(1, activation='sigmoid'))
    optimizer = keras.optimizers.RMSprop(learning_rate=learning_rate)
    m.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return m

Here we execute a simple workflow on the baseline model.

In [4]:
# BASELINE MODEL 
model0 = build_model()
history0 = model0.fit(x_train__train, y_train__train, 
                      epochs=20, batch_size=512, 
                      validation_data=(x_train__val, y_train__val), verbose=0) # Train

In [5]:
(model0.predict(x_test[:3])>0.5).astype('int32').reshape(-1,1) # Predict

array([[0],
       [1],
       [1]], dtype=int32)

Let's now build a `keras_clf` that wraps the Keras model, and now can be treated like a classifier like one in `sklearn`. This allows us to implement sklearn functions like `RandomizedSearch`.

In [6]:
keras_clf = keras.wrappers.scikit_learn.KerasClassifier(build_fn=build_model)


In [7]:
# Training baseline model using keras_clf (treating it like a model from sklearn)
stop_early_checkpoint = keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)
keras_clf.fit(x_train__train, y_train__train, 
              epochs=20, batch_size=512, callbacks=[stop_early_checkpoint],
              validation_data=(x_train__val, y_train__val), verbose=0) # Train
print((keras_clf.predict(x_test[:3])>0.5).astype('int32').reshape(-1,1)) # Predict

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).
[[0]
 [1]
 [1]]


In [8]:
r = keras.regularizers.l2(l=0.01)
# Training model with regularisation
stop_early_checkpoint = keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)
keras_clf2 = keras.wrappers.scikit_learn.KerasClassifier(build_fn=build_model, 
                                                         kernel_regularizer=r)
keras_clf2.fit(x_train__train, y_train__train, 
              epochs=20, batch_size=512, callbacks=[stop_early_checkpoint],
              validation_data=(x_train__val, y_train__val), verbose=0) # Train
print(np.argmax(keras_clf2.predict(x_test[:3]), axis=-1)) # Predict



[0 0 0]


In [9]:
# Training model with regularisation
stop_early_checkpoint = keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)
keras_clf3 = keras.wrappers.scikit_learn.KerasClassifier(build_fn=build_model, 
                                                         learning_rate=3e-4)
keras_clf3.fit(x_train__train, y_train__train, 
              epochs=20, batch_size=512, callbacks=[stop_early_checkpoint],
              validation_data=(x_train__val, y_train__val), verbose=0) # Train
print(np.argmax(keras_clf3.predict(x_test[:3]), axis=-1)) # Predict

[0 0 0]


In [10]:
# Impementing RandomizedSearch on a Keras model.
param_dist = {
    'n_hidden_layers' : (1,3,5,7),
    'n_neurons' : np.arange(20,100),
    'learning_rate' : reciprocal(3e-4, 3e-2),
    'kernel_regularizer' : (None, 
                            keras.regularizers.l2(l=0.01),
                            keras.regularizers.l1(l=0.001),),
    'dropout' : (0.0, 0.1, 0.2, 0.3, 0.4, 0.5),
}

rnd_search_cv = RandomizedSearchCV(keras_clf, param_dist, n_iter=10, cv=3)
rnd_search_cv.fit(x_train__train, y_train__train, 
              epochs=10, batch_size=512, callbacks=[stop_early_checkpoint],
              validation_data=(x_train__val, y_train__val), verbose=0)



RandomizedSearchCV(cv=3, error_score='raise-deprecating',
                   estimator=<tensorflow.python.keras.wrappers.scikit_learn.KerasClassifier object at 0x14de91f28>,
                   iid='warn', n_iter=10, n_jobs=None,
                   param_distributions={'dropout': (0.0, 0.1, 0.2, 0.3, 0.4,
                                                    0.5),
                                        'kernel_regularizer': (None,
                                                               <tensorflow.python.keras.regularizers.L1L2 object at 0x143810668>,
                                                               <tensorflow.python.ke...
                                        'n_neurons': array([20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
       37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
       54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
       71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 

In [11]:
# Obtaining the best model params
print(rnd_search_cv.best_params_)

{'dropout': 0.0, 'kernel_regularizer': None, 'learning_rate': 0.0005693098669592988, 'n_hidden_layers': 3, 'n_neurons': 65}


In [12]:
print(rnd_search_cv.best_score_)

0.8809333443641663


And from here, you can save the model, evalauate on test set and if happy, deploy it to production. 