In [None]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from functools import partial
from tensorflow.python.keras.layers import Dense
from tensorboard.plugins.hparams import api as hp
# run in colab
# !pip install tensorboard
# !rm -rvf logs

In [None]:
fashion_mnist = tf.keras.datasets.fashion_mnist.load_data()
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist
X_train, y_train = X_train_full[:-5000], y_train_full[:-5000]
X_valid, y_valid = X_train_full[-5000:], y_train_full[-5000:]

X_train, X_valid, X_test = X_train / 255., X_valid / 255., X_test / 255.
m = X_train.shape[0]
print(X_train[0])

class_names = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
               "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]

NN

In [None]:
early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=10,
                                                     restore_best_weights=True)

HP_LEARNING_RATE = hp.HParam('learning_rate', hp.Discrete([0.001, 0.01]))
HP_INITIALIZER = hp.HParam('initializer', hp.Discrete(['uniform', 'random_normal']))
HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0., 0.2))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['SGD', 'Adam']))
HP_ACTIVATION = hp.HParam('activation', hp.Discrete(['relu', 'tanh']))


METRIC_ACCURACY = 'accuracy'

with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
    hp.hparams_config(
        hparams=[HP_ACTIVATION, HP_DROPOUT, HP_OPTIMIZER, HP_LEARNING_RATE, HP_INITIALIZER],
        metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],)
    
tf.keras.backend.clear_session()
tf.random.set_seed(42)
def create_model(hparams, nodes=[300,100], batch_normalization=True, epochs_num=20, batch_size=32):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Flatten(input_shape=(28, 28)))

    for node in nodes:
        if batch_normalization:
          model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Dense(node, activation=hparams[HP_ACTIVATION], kernel_initializer = hparams[HP_INITIALIZER]))
        model.add(tf.keras.layers.Dropout(hparams[HP_DROPOUT]))

    model.add(tf.keras.layers.Dense(10, activation='softmax'))


    if hparams[HP_OPTIMIZER] == 'Adam':
      optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=hparams[HP_LEARNING_RATE])
    elif hparams[HP_OPTIMIZER] == 'SGD':
      optimizer = tf.keras.optimizers.legacy.SGD(learning_rate=hparams[HP_LEARNING_RATE])

    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=10,
                                                     restore_best_weights=True)
    model.fit(X_train, 
              y_train,
              batch_size=batch_size,
              epochs=epochs_num,
              validation_data=(X_valid, y_valid),
              callbacks=[early_stopping_cb])
    
    loss, accuracy = model.evaluate(X_test, y_test)

    return accuracy


def experiment(experiment_dir, hparams):

    with tf.summary.create_file_writer(experiment_dir).as_default():
        hp.hparams(hparams)
        accuracy = create_model(hparams)
        tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)

experiment_no = 0

for initializer in HP_INITIALIZER.domain.values:
  for activation in HP_ACTIVATION.domain.values:
      for dropout_rate in (HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value):
          for optimizer in HP_OPTIMIZER.domain.values:
              for learning_rate in HP_LEARNING_RATE.domain.values:
                  hparams = {
                      HP_ACTIVATION: activation,
                      HP_DROPOUT: dropout_rate,
                      HP_OPTIMIZER: optimizer,
                      HP_LEARNING_RATE: learning_rate,
                      HP_INITIALIZER: initializer
                  }


              experiment_name = f'Experiment {experiment_no}'
              print(f'Starting Experiment: {experiment_name}')
              print({h.name: hparams[h] for h in hparams})
              experiment('logs/hparam_tuning/' + experiment_name, hparams)
              experiment_no += 1

In [None]:
%load_ext tensorboard

# %tensorboard --logdir logs/hparam_tuning --reload_interval 15
# Launches tensorboard to read the logs from ./logs directory that was specified earlier when creating the callback
%tensorboard --logdir my_logs --reload_interval 5

Fine-Tuning NN Hyperparameters with keras_tuner

In [None]:
# run in colab
%pip install -q -U keras-tuner
import keras_tuner as kt

def build_model(hp):
    n_hidden = hp.Int("n_hidden", min_value=0, max_value=8, default=2)
    n_neurons = hp.Int("n_neurons", min_value=16, max_value=256)
    learning_rate = hp.Float("learning_rate", min_value=1e-4, max_value=1e-2,
                             sampling="log")
    optimizer = hp.Choice("optimizer", values=["sgd", "adam"])
    if optimizer == "sgd":
        optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    else:
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Flatten())
    for _ in range(n_hidden):
        model.add(tf.keras.layers.Dense(n_neurons, activation="relu"))
    model.add(tf.keras.layers.Dense(10, activation="softmax"))
    model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer,
                  metrics=["accuracy"])
    return model

random_search_tuner = kt.RandomSearch(
    build_model, objective="val_accuracy", max_trials=5, overwrite=True,
    directory="my_fashion_mnist", project_name="my_rnd_search", seed=42)
random_search_tuner.search(X_train, y_train, epochs=10,
                           validation_data=(X_valid, y_valid))

top3_models = random_search_tuner.get_best_models(num_models=3)
best_model = top3_models[0]

best_trial = random_search_tuner.oracle.get_best_trials(num_trials=1)[0]
best_trial.summary()

CNN

In [4]:
mnist = tf.keras.datasets.fashion_mnist.load_data()
(X_train_full, y_train_full), (X_test, y_test) = mnist
X_train_full = np.expand_dims(X_train_full, axis=-1).astype(np.float32) / 255
X_test = np.expand_dims(X_test.astype(np.float32), axis=-1) / 255
X_train, X_valid = X_train_full[:-5000], X_train_full[-5000:]
y_train, y_valid = y_train_full[:-5000], y_train_full[-5000:]

In [None]:
from functools import partial

early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=10,
                                                     restore_best_weights=True)

HP_LEARNING_RATE = hp.HParam('learning_rate', hp.Discrete([0.001, 0.01]))
HP_INITIALIZER = hp.HParam('initializer', hp.Discrete(['he_normal','random_normal', 'uniform']))
HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0., 0.5))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['SGD','Adam']))
HP_ACTIVATION = hp.HParam('activation', hp.Discrete(['relu','tanh']))
HP_KERNEL_SIZE = hp.HParam('kernel_size', hp.Discrete([3]))
HP_PADDING = hp.HParam('padding', hp.Discrete(['same', 'valid']))

METRIC_ACCURACY = 'accuracy'

with tf.summary.create_file_writer('logsCnn/hparam_tuning').as_default():
    hp.hparams_config(
        hparams=[HP_ACTIVATION, HP_DROPOUT, HP_OPTIMIZER, HP_LEARNING_RATE, HP_INITIALIZER, HP_KERNEL_SIZE, HP_PADDING],
        metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],)

tf.keras.backend.clear_session()
tf.random.set_seed(42)

def create_cnn_model(hparams, filtersList=[64,128, 256], nodes=[300,100], batch_normalization=True, epochs_num=20, batch_size=32):
    DefaultConv2D = partial(tf.keras.layers.Conv2D, kernel_size=3, padding='same',
                        activation=hparams[HP_ACTIVATION], kernel_initializer='he_normal')
    
    model = tf.keras.models.Sequential()
    
    for filters in filtersList:
      model.add(DefaultConv2D(filters=filters, input_shape=[28, 28, 1]))
      model.add(tf.keras.layers.MaxPool2D())

    model.add(tf.keras.layers.Flatten())
    for node in nodes:
        model.add(tf.keras.layers.Dense(node, activation=hparams[HP_ACTIVATION], kernel_initializer = hparams[HP_INITIALIZER]))
        model.add(tf.keras.layers.Dropout(hparams[HP_DROPOUT]))
        if batch_normalization:
          model.add(tf.keras.layers.BatchNormalization())

    model.add(tf.keras.layers.Dense(10, activation='softmax'))


    if hparams[HP_OPTIMIZER] == 'Adam':
      optimizer = tf.keras.optimizers.Adam(learning_rate=hparams[HP_LEARNING_RATE])
    elif hparams[HP_OPTIMIZER] == 'SGD':
      optimizer = tf.keras.optimizers.SGD(learning_rate=hparams[HP_LEARNING_RATE])

    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=10,
                                                     restore_best_weights=True)
    model.fit(X_train,
              y_train,
              batch_size=batch_size,
              epochs=epochs_num,
              validation_data=(X_valid, y_valid),
              callbacks=[early_stopping_cb])

    loss, accuracy = model.evaluate(X_test, y_test)

    return accuracy


def experiment(experiment_dir, hparams):

    with tf.summary.create_file_writer(experiment_dir).as_default():
        hp.hparams(hparams)
        accuracy = create_cnn_model(hparams)
        tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)

experiment_no = 0

for initializer in HP_INITIALIZER.domain.values:
  for activation in HP_ACTIVATION.domain.values:
      for dropout_rate in (HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value):
          for optimizer in HP_OPTIMIZER.domain.values:
              for learning_rate in HP_LEARNING_RATE.domain.values:
                  hparams = {
                      HP_ACTIVATION: activation,
                      HP_DROPOUT: dropout_rate,
                      HP_OPTIMIZER: optimizer,
                      HP_LEARNING_RATE: learning_rate,
                      HP_INITIALIZER: initializer
                  }


              experiment_name = f'Experiment {experiment_no}'
              print(f'Starting Experiment: {experiment_name}')
              print({h.name: hparams[h] for h in hparams})
              experiment('logs/hparam_tuning/' + experiment_name, hparams)
              experiment_no += 1

Fine-Tuning CNN Hyperparameters with keras_tuner

In [None]:
%pip install -q -U keras-tuner
import keras_tuner as kt

def build_model(hp):  
  model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(
        filters=hp.Int('conv_1_filter', min_value=32, max_value=128, step=16),
        kernel_size=hp.Choice('conv_1_kernel', values = [3,5]),
        activation='relu',
        input_shape=(28,28,1)
    ),
    tf.keras.layers.Conv2D(
        filters=hp.Int('conv_2_filter', min_value=32, max_value=64, step=16),
        kernel_size=hp.Choice('conv_2_kernel', values = [3,5]),
        activation='relu'
    ),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        units=hp.Int('dense_1_units', min_value=32, max_value=128, step=16),
        activation='relu'
    ),
    tf.keras.layers.Dense(10, activation='softmax')
  ])
  
  model.compile(optimizer=tf.keras.optimizers.Adam(hp.Choice('learning_rate', values=[1e-2, 1e-3])),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
  
  return model

random_search_tuner=kt.RandomSearch(build_model,objective='val_accuracy',max_trials=5,directory='output',project_name="Mnist Fashion")

random_search_tuner.search(X_train, y_train, epochs=10,
                           validation_data=(X_valid, y_valid))

top3_models = random_search_tuner.get_best_models(num_models=3)
best_model = top3_models[0]

best_trial = random_search_tuner.oracle.get_best_trials(num_trials=1)[0]
best_trial.summary()

In [None]:
for i in range(len(top3_models)):
    print(f'Model {i+1}:')
    print(top3_models[i].summary())

In [None]:
# best accuaracy
tf.keras.backend.clear_session()
tf.random.set_seed(42)
early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=10,
                                                     restore_best_weights=True)

conv_net = tf.keras.Sequential()
conv_net.add(tf.keras.layers.Input(shape=[28, 28, 1]))

conv_net.add(tf.keras.layers.BatchNormalization())
conv_net.add(tf.keras.layers.Conv2D(filters=32, kernel_size=5, activation='relu'))
conv_net.add(tf.keras.layers.MaxPool2D())

conv_net.add(tf.keras.layers.BatchNormalization())
conv_net.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu'))
conv_net.add(tf.keras.layers.MaxPool2D())

conv_net.add(tf.keras.layers.BatchNormalization())
conv_net.add(tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu'))
conv_net.add(tf.keras.layers.MaxPool2D())

conv_net.add(tf.keras.layers.Flatten())
conv_net.add(tf.keras.layers.BatchNormalization())
conv_net.add(tf.keras.layers.Dense(128, activation='relu'))

conv_net.add(tf.keras.layers.Dense(10, activation='softmax'))

optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
loss = tf.keras.losses.sparse_categorical_crossentropy
conv_net.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])


conv_net.fit(X_train, y_train, epochs=50, validation_data=(X_valid, y_valid))

conv_net.evaluate(X_test, y_test)