## Hyper-Parameter Tuning with Keras Tuner

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import tensorflow as tf
import tensorflow.keras as K
import keras_tuner as kt

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from pathlib import Path
from time import strftime

In [None]:
tf.config.list_physical_devices('GPU')

# Dataset setup

In [4]:
fashion_mnist = tf.keras.datasets.fashion_mnist.load_data()

In [5]:
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist

In [6]:
X_train, y_train = X_train_full[:-5000], y_train_full[:-5000]
X_valid, y_valid = X_train_full[-5000:], y_train_full[-5000:]

In [7]:
tf.random.set_seed(42)

# Define model builder

In [10]:
# This accepts Hyperparameters object

def build_model(hp):
    n_hidden = hp.Int("n_hidden", min_value=0, max_value=8, default=2)
    n_neurons = hp.Int("n_neurons", min_value=16, max_value=256)
    learning_rate = hp.Float("learning_rate", min_value=1e-4, max_value=1e-2, sampling="log")
    optimizer = hp.Choice("optimizer", values=["sgd", "adam"])
    
    if optimizer =='sgd':
        optimizer = K.optimizers.SGD(learning_rate=learning_rate)
    else:
        optimizer = K.optimizers.Adam(learning_rate=learning_rate)
        
    model = K.models.Sequential()
    model.add(K.layers.Flatten())
    for _ in range(n_hidden):
        model.add(K.layers.Dense(n_neurons, activation='relu'))
    model.add(K.layers.Dense(10, activation='softmax'))
    
    model.compile(optimizer=optimizer,
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'])

    return model
    

In [None]:
random_search_tuner=kt.RandomSearch(build_model,
                                  objective='val_accuracy',
                                  max_trials=5,
                                  overwrite=True,
                                  directory='my_fashion_mnist',
                                  project_name='my_rnd_search',
                                  seed=42)


In [10]:
random_search_tuner.search(X_train, y_train, epochs=10,
                          validation_data=(X_valid, y_valid))

Trial 5 Complete [00h 00m 19s]
val_accuracy: 0.8385999798774719

Best val_accuracy So Far: 0.8632000088691711
Total elapsed time: 00h 01m 39s
INFO:tensorflow:Oracle triggered exit


In [11]:
top3_models = random_search_tuner.get_best_models(num_models=3)

In [12]:
best_model = top3_models[0]

In [13]:
top3_params = random_search_tuner.get_best_hyperparameters(num_trials=3)

In [15]:
top3_params[0].values

{'n_hidden': 7,
 'n_neurons': 100,
 'learning_rate': 0.0012482904754698163,
 'optimizer': 'sgd'}

In [16]:
best_trial = random_search_tuner.oracle.get_best_trials(num_trials=1)[0]

In [17]:
best_trial.summary()

Trial 1 summary
Hyperparameters:
n_hidden: 7
n_neurons: 100
learning_rate: 0.0012482904754698163
optimizer: sgd
Score: 0.8632000088691711


In [20]:
best_trial.metrics.get_last_value('val_accuracy')

0.8632000088691711

# Best model can be trained on the full dataset (train+val) and later evaluated on the test set

In [21]:
best_model.fit(X_train_full, y_train_full, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f53cbe87850>

In [22]:
test_loss, test_accuracy = best_model.evaluate(X_test, y_test)



# Tuning of the model.fit() arguments instead of  architecture parameters, like the batch size, requires subclassing kt.HyperModel class and implementing a wrapper of the .fit() method

In [11]:
class MyClassificationHyperModel(kt.HyperModel):
    def build(self, hp):
        return build_model(hp)
    
    def fit(self, hp, model, X, y, **kwargs):
        if hp.Boolean('normalize'):
            norm_layer = K.layers.Normalization()
            X = norm_layer(X)
        return model.fit(X, y, **kwargs)

In [None]:
hyperband_tuner = kt.Hyperband(MyClassificationHyperModel(),
                              objective='val_accuracy',
                              seed=42,
                              max_epochs=8,
                              factor=3,
                              hyperband_iterations=2,
                              overwrite=True,
                              directory='my_fashion_mnist',
                              project_name='hyperband')

In [13]:
root_logdir = Path(hyperband_tuner.project_dir) / 'tensorboard'
tensorboard_cb = K.callbacks.TensorBoard(root_logdir)
earlystopping_cb = K.callbacks.EarlyStopping(patience=2)
hyperband_tuner.search(X_train, y_train,
                      epochs=10,
                      validation_data=(X_valid, y_valid),
                      callbacks=[earlystopping_cb, tensorboard_cb])

Trial 20 Complete [00h 00m 23s]
val_accuracy: 0.8460000157356262

Best val_accuracy So Far: 0.8600000143051147
Total elapsed time: 00h 04m 14s
INFO:tensorflow:Oracle triggered exit


In [14]:
%load_ext tensorboard

In [15]:
root_logdir

PosixPath('my_fashion_mnist/hyperband/tensorboard')

## Observing HPT with TensorBoard

In [16]:
%tensorboard --logdir='my_fashion_mnist/hyperband/tensorboard'

# Bayesian HP search

In [17]:
bayesian_opt_tuner = kt.BayesianOptimization(
    MyClassificationHyperModel(),
    objective="val_accuracy",
    seed=42,
    max_trials=10,
    alpha=1e-4,
    beta=2.6,
    overwrite=True,
    directory="my_fashion_mnist",
    project_name="bayesian_opt")

In [18]:
bayesian_opt_tuner.search(X_train, y_train,
                      epochs=10,
                      validation_data=(X_valid, y_valid),
                      callbacks=[earlystopping_cb, tensorboard_cb])

Trial 10 Complete [00h 00m 34s]
val_accuracy: 0.8464000225067139

Best val_accuracy So Far: 0.8604000210762024
Total elapsed time: 00h 04m 26s
INFO:tensorflow:Oracle triggered exit
