# Fine Tuning Neural Network Hyperparameters

In [1]:
# We can use GridSearchCV or RandomizedSearchCV to explore the hyperparameter space.
# We'll need to wrap our Keras models in objects that mimic regular Scikit-learn Regressors.

In [2]:
import tensorflow as tf
from tensorflow import keras 

In [3]:
fashion_mnist = keras.datasets.fashion_mnist
(X_train_full, y_train_full),(X_test, y_test) = fashion_mnist.load_data()

In [4]:
X_valid, X_train = X_train_full[:5000] / 255.0, X_train_full[5000:] / 255.0
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

In [5]:
# First we create a function that will build and compile a keras model, given a set of hyperparameters.

import keras_tuner as kt

def build_model(hp):
    n_hidden = hp.Int("n_hidden", min_value=0, max_value=8, default=2)
    n_neurons = hp.Int("n_neurons", min_value=16, max_value=256)
    learning_rate = hp.Float("learning_rate", min_value=1e-4, max_value=1e-2, sampling="log")
    optimizer = hp.Choice("optimizer", values=["sgd", "adam"])
    if optimizer == "sgd":
        optimizer = tf.keras.optimizers.SGD(learning_rate = learning_rate)
    else:
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Flatten())
    for _ in range(n_hidden):
        model.add(tf.keras.layers.Dense(10, activation="softmax"))
        model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
        return model

In [6]:
# doing a basic random search:

random_search_tuner = kt.RandomSearch(
    build_model, objective="val_accuracy", max_trials=5, overwrite=True, directory="my_fashion_mnist", project_name="my_rnd_search", seed=42)

random_search_tuner.search(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))

Trial 5 Complete [00h 00m 35s]
val_accuracy: 0.7570000290870667

Best val_accuracy So Far: 0.8468000292778015
Total elapsed time: 00h 03m 17s


In [7]:
top3_models = random_search_tuner.get_best_models(num_models=3)
best_model = top3_models[0]
best_model

  saveable.load_own_variables(weights_store.get(inner_path))


<Sequential name=sequential, built=True>

In [8]:
top3_models

[<Sequential name=sequential, built=True>,
 <Sequential name=sequential, built=True>,
 <Sequential name=sequential, built=True>]

In [9]:
top3_params = random_search_tuner.get_best_hyperparameters(num_trials=3)
top3_params[0].values

{'n_hidden': 8,
 'n_neurons': 37,
 'learning_rate': 0.008547485565344062,
 'optimizer': 'sgd'}

In [10]:
# Getting the best trial from the RandomSearchOracle.

best_trial = random_search_tuner.oracle.get_best_trials(num_trials=1)[0]
best_trial.summary()

Trial 3 summary
Hyperparameters:
n_hidden: 8
n_neurons: 37
learning_rate: 0.008547485565344062
optimizer: sgd
Score: 0.8468000292778015


In [11]:
# More training and evaluations.

best_model.fit(X_train_full, y_train_full, epochs=10)
test_loss, test_accuracy = best_model.evaluate(X_test, y_test)

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.6984 - loss: 2385.3774
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.7737 - loss: 1391.2427
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7866 - loss: 1306.9091
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7866 - loss: 1331.8324
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.7957 - loss: 1241.5813
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7938 - loss: 1268.6241
Epoch 7/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.7969 - loss: 1238.6377
Epoch 8/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.7911 - loss: 1333.6084


In [22]:
# If we may want to fine-tune the data preprocessing hyperparameters or model.fit() args such as the batch size.
# The following buils the same model with the same hyperparameters but uses the Boolean "normalize" hyperparameter to control wether or not to 
# standardize the training data before fitting the model.

class MyClassificationHyperModel(kt.HyperModel):
    def build(self, hp):
        model =  build_model(hp)
        if not isinstance(model, keras.models.Model):
            raise ValueError("build_model function did not return a valid Keras model instance.")
        return model
    def fit(self, hp, model, X, y, **kwargs):
        if hp.Boolean("normalize"):
            norm_layer = tf.keras.layers.Normalization()
            X = norm_layer(X)
        return model.fit(X, y, **kwargs)

In [23]:
# we can then pass an instance of this class to the tuner of our choice, instead of passing the build_model function.

hyperband_tuner = kt.Hyperband(
    MyClassificationHyperModel(), objective="val_accuracy", seed=42, max_epochs=10, factor=3, 
    hyperband_iterations=2, overwrite=True, directory="my_fashion_mnist", project_name="hyperband")

In [24]:
from time import strftime

def get_run_logdir(root_logdir="my_logs"):
    return Path(root_logdir) / strftime("run_%Y_%m_%d_%H_%M_%S")
run_logdir = get_run_logdir()
run_logdir

WindowsPath('my_logs/run_2024_06_17_16_05_15')

In [25]:
# we'll run the hyperband tuner using the TensorBoard callback.
from pathlib import Path

root_logdir = Path(hyperband_tuner.project_dir) / "tensorboard"
tensorboard_cb = tf.keras.callbacks.TensorBoard(root_logdir)
early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=2)
hyperband_tuner.search(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid), callbacks=[early_stopping_cb, tensorboard_cb])

Trial 50 Complete [00h 01m 00s]
val_accuracy: 0.8222000002861023

Best val_accuracy So Far: 0.8626000285148621
Total elapsed time: 00h 17m 14s
