In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import mnist
from tensorflow.keras import initializers
from tensorflow.keras import regularizers
from keras import Input
import utils
import metrics

# MNIST dataset params
num_classes = 10 # 0-9 digits
num_features = 784 # img shape: 28*28

# load data
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# pre-process data
X_train, y_train, X_test, y_test = utils.preprocess(X_train, y_train, X_test, y_test, num_classes, num_features, print_summary=False)

In [None]:
# network loss function
loss =tf.keras.losses.SparseCategoricalCrossentropy()

# network metrics
eval_metrics = [metrics.f1]

# training batch size
batch_size = 256

# training epochs
epochs = 1000

# print options during training
verbose = 1

# early stopping callback
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=200)

# **$1$. Fine Tuning**

In [None]:
import keras_tuner

def build_model(hp):

    # hyperparams
    n_hidden_1      =   hp.Choice("units_1", [64, 128])
    n_hidden_2      =   hp.Choice("units_2", [256, 512])
    a_reg           =   hp.Choice("a_reg", [0.1, 0.001, 0.000001])
    learning_rate   =   hp.Choice("learning_rate", [0.1, 0.01, 0.001])

    # mlp model
    mlp_rmsprop_ft = keras.Sequential(name="MLP_RMSProp_FT")

    mlp_rmsprop_ft.add(Input(shape=(num_features,)))

    mlp_rmsprop_ft.add(keras.layers.Dense(name="hidden_layer_1", units=n_hidden_1, activation="relu", 
                        kernel_regularizer=regularizers.l2(a_reg), kernel_initializer=initializers.HeNormal()))

    mlp_rmsprop_ft.add(keras.layers.Dense(name="hidden_layer_2", units=n_hidden_2, activation="relu", 
                        kernel_regularizer=regularizers.l2(a_reg), kernel_initializer=initializers.HeNormal()))

    mlp_rmsprop_ft.add(keras.layers.Dense(name="output_layer", units=num_classes, activation="softmax", 
                        kernel_regularizer=regularizers.l2(a_reg), kernel_initializer=initializers.HeNormal()))
    
    mlp_rmsprop_ft.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=learning_rate),
                            loss=loss, 
                            metrics=eval_metrics)
    
    return mlp_rmsprop_ft

tuner = keras_tuner.BayesianOptimization(
    hypermodel=build_model,
    objective="val_loss",
    max_trials=50,
    overwrite=True,
    directory="tuning",
    project_name="mlp_tuning",
)

# tuner = keras_tuner.RandomSearch(
#     hypermodel=build_model,
#     objective="val_loss",
#     max_trials=12,
#     overwrite=True,
#     directory="tuning",
#     project_name="mlp_tuning",
# )

tuner.search_space_summary()

In [None]:
tuner.search(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test), verbose=verbose, callbacks=[callback])

In [None]:
best_model = tuner.get_best_models()[0]
best_hyperparameters = tuner.get_best_hyperparameters()[0]
best_model.summary()
print(best_hyperparameters.get("units_1"))
print(best_hyperparameters.get("units_2"))
print(best_hyperparameters.get("a_reg"))
print(best_hyperparameters.get("learning_rate"))
