In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:75% !important; }</style>"))

# Hyperparameter Tuning

This notebook was made in order to give a brief introduction to hyperparameter tuning. A comparison between the most basic algorithms is shown. 

In [None]:
# Install dependencies - Google Colab
#!pip install keras-tuner==1.0.2 aisaratuners==1.4.3

In [None]:
import time
import tensorflow as tf
import kerastuner as kt
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## Preprocessing

In [None]:
# Import dataset
# from tensorflow.keras.datasets import mnist
# (x_train, y_train), (x_test, y_test) = mnist.load_data()
from tensorflow.keras.datasets import cifar10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
# Preprocessing
# x_train = np.expand_dims(x_train, axis=3).astype('float32')/255.0
# x_test = np.expand_dims(x_test, axis=3)/255.0
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

In [None]:
random_sample_idx =  np.random.randint(0, len(x_train)-1)
# plt.imshow(x_train[random_sample_idx], cmap='gray')
plt.imshow(x_train[random_sample_idx])
plt.show()
print(f'label: {y_train[random_sample_idx]}')
print(f'input shape: {len(x_train[0])} by {len(x_train[0][0])}')

## Model Definition
A simple classification model based on Convolutional Neuronal Networks will be used. It consists of three Convolutional layers with ReLU activations, MaxPooling and dropout regularization for encoding the image, and a two-layered Fully-Connected Network for classifying.

In [None]:
# Setting Environment
from tensorflow import keras
from tensorflow.keras.layers import Conv2D,Dense, Dropout, Flatten, MaxPooling2D

physical_devices = tf.config.experimental.list_physical_devices("GPU")
if physical_devices:
    for device in physical_devices:
        tf.config.experimental.set_memory_growth(device, True)

INPUT_SHAPE = x_train[0].shape
NUM_CLASSES = 10
EPOCHS = 30
SEED = 37

np.random.seed(SEED)

## Baseline Performance

In [None]:
# Model Definition
tf.random.set_seed(SEED)
model = keras.Sequential()
model.add(
    Conv2D(
        filters=8,
        kernel_size=3,
        activation='relu',
        input_shape=INPUT_SHAPE
    )
)
model.add(Conv2D(16, 3, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(rate=0.25))
model.add(Conv2D(32, 3, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(rate=0.25))
model.add(Flatten())
model.add(Dense(units=128, activation='relu'))
model.add(Dropout(rate=0.25))
model.add(Dense(NUM_CLASSES, activation='softmax'))
model.compile(
        optimizer=keras.optimizers.Adam(1e-3),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"],
    )

### Training Baseline

In [None]:
start_t = time.time()
model.fit(x_train, y_train, epochs=EPOCHS, validation_split=0.1)
end_t = time.time()

### Baseline Performance

In [None]:
base_loss, base_accuracy = model.evaluate(x_test, y_test)
base_elapsed_time = end_t - start_t
print(f"Elapsed time (s): {base_elapsed_time:0.2f} (s)")
print(f'Baseline loss: {base_loss:0.3f}, accuracy: {base_accuracy:0.3f}%')

## Classical Hyperparameter Tuning tools

### Model Definition

In [None]:
from kerastuner import HyperModel

class CNNHyperModel(HyperModel):
    def __init__(self, input_shape, num_classes):
        self.input_shape = input_shape
        self.num_classes = num_classes

    def build(self, hp):
        model = keras.Sequential()
        model.add(
            Conv2D(
                filters=16,
                kernel_size=3,
                activation="relu",
                input_shape=self.input_shape,
            )
        )
        model.add(Conv2D(16, 3, activation='relu'))
        model.add(MaxPooling2D(pool_size=2))
        model.add(
            Dropout(
                rate=0.25
            )
        )
        model.add(
            Conv2D(
                filters=hp.Choice("num_filters", values=[16, 32, 64, 128], default=32,),
                activation="relu",
                kernel_size=3,
            )
        )
        model.add(MaxPooling2D(pool_size=2))
        model.add(
            Dropout(
                rate=0.25
            )
        )
        model.add(Flatten())
        model.add(
            Dense(
                units=hp.Int(
                    "units", min_value=32, max_value=512, step=16, default=128
                ),
                activation=hp.Choice(
                    "dense_activation",
                    values=["relu", "tanh", "sigmoid"],
                    default="relu",
                ),
            )
        )
        model.add(
            Dropout(
                rate=0.25
            )
        )
        model.add(Dense(self.num_classes, activation="softmax"))

        model.compile(
            optimizer=keras.optimizers.Adam(
                hp.Float(
                    "learning_rate",
                    min_value=1e-4,
                    max_value=1e-2,
                    sampling="LOG",
                    default=1e-3,
                )
            ),
            loss="sparse_categorical_crossentropy",
            metrics=["accuracy"],
        )
        return model

In [None]:
# Environmental Variables
from pathlib import Path
from kerastuner.tuners import BayesianOptimization, Hyperband, RandomSearch

model = CNNHyperModel(input_shape=INPUT_SHAPE, num_classes=NUM_CLASSES)

output_dir = Path("./output/cifar10/")
project_name = "simple_cnn_model_tuning"
HYPERBAND_MAX_EPOCHS = 30
MAX_TRIALS = 30
EXECUTION_PER_TRIAL = 2
BAYESIAN_NUM_INITIAL_POINTS = 1

### Tuner definitions

In [None]:
tuners = [
    RandomSearch(
        model,
        objective="val_accuracy",
        seed=SEED,
        max_trials=MAX_TRIALS,
        executions_per_trial=EXECUTION_PER_TRIAL,
        directory=f"{output_dir}_random_search",
        project_name=project_name,
    ),
    Hyperband(
        model,
        max_epochs=HYPERBAND_MAX_EPOCHS,
        objective="val_accuracy",
        seed=SEED,
        executions_per_trial=EXECUTION_PER_TRIAL,
        directory=f"{output_dir}_hyperband",
        project_name=project_name,
    ),
    BayesianOptimization(
        model,
        objective='val_accuracy',
        seed=SEED,
        num_initial_points=BAYESIAN_NUM_INITIAL_POINTS,
        max_trials=MAX_TRIALS,
        directory=f"{output_dir}_bayesian",
        project_name=project_name
    )
]

### Tuner Workflow

In [None]:
def tuner_workflow(tuner, x_train, y_train, x_test, y_test):
    tuner.search_space_summary()
    search_start = time.time()
    tuner.search(x_train, y_train, epochs=EPOCHS, validation_split=0.1)
    search_end = time.time()
    elapsed_time = search_end - search_start

    # Show a summary of the search
    tuner.results_summary()

    # Retrieve the best model.
    best_model = tuner.get_best_models(num_models=1)[0]

    # Evaluate the best model.
    loss, accuracy = best_model.evaluate(x_test, y_test)
    return elapsed_time, loss, accuracy

In [None]:
results = [[base_elapsed_time, base_loss, base_accuracy]]
for tuner in tuners:
    elapsed_time, loss, accuracy = tuner_workflow(
        tuner, x_train, y_train, x_test, y_test
    )
    results.append([elapsed_time, loss, accuracy])

In [None]:
# Save results
import pickle
with open('./output/tuners.pk', 'wb') as f:
    pickle.dump(tuners, f)

## AI Driven Hyperparameter Tuning tools
### Model Definition

In [None]:
def hypermodel_func(hyperparams, trial):
    tf.random.set_seed(SEED)
    model = keras.Sequential()
    model.add(
        Conv2D(
            filters=8,
            kernel_size=3,
            activation='relu',
            input_shape=INPUT_SHAPE
        )
    )
    model.add(Conv2D(16, 3, activation='relu'))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(rate=0.25))
    model.add(Conv2D(filters=hyperparams['num_filters'][trial], kernel_size=3, activation='relu'))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(rate=0.25))
    model.add(Flatten())
    model.add(Dense(
        units=hyperparams['units'][trial], 
        activation='relu'
    ))
    model.add(Dropout(rate=0.25))
    model.add(Dense(NUM_CLASSES, activation='softmax'))
    model.compile(
            optimizer=keras.optimizers.Adam(hyperparams['learning_rate'][trial]),
            loss="sparse_categorical_crossentropy",
            metrics=["accuracy"],
        )
    history = model.fit(x_train, y_train, epochs=EPOCHS, validation_split=0.1)
    return model, history          

### Hyperparameter ranges

In [None]:
from aisaratuners import aisara_keras_tuner as akt

hyperparams = akt.Hp()
hyperparams.numrange(name='num_filters', min=16, max=128)
hyperparams.numrange(name='units', min=32, max=512)
hyperparams.numrange(name='learning_rate', min=1e-4, max=1e-2, type='log')

### Tuner configuration

In [None]:
tuner = akt.HpOptimization(
    hyperparams, 
    hypermodel_func, 
    ['val_accuracy', 'val_loss'], 
    ['max', 'min'], 
    num_trials=5, 
    rounds=3,
    mode='p',
    aisara_seed='fixed'
)

### Tuning job

In [None]:
start_t = time.time()
tuner.run_opti()
end_t = time.time()

### AI Tuner performance

In [None]:
ai_best_model = tuner.best_model
ai_loss, ai_accuracy = ai_best_model.evaluate(x_test, y_test)
ai_elapsed_time = end_t - start_t
print(f"Elapsed time (s): {ai_elapsed_time:0.2f} (s)")
print(f'AI Tuner Best model loss: {ai_loss:0.3f}, accuracy: {ai_accuracy:0.3f}%')
results.append([ai_elapsed_time, ai_loss, ai_accuracy])

### AI Tuner Optimization Results

In [None]:
tuner.plot_opti_results()

### Hyperparameter solution space

In [None]:
tuner.plot_search_space()

## Comparison Results

In [None]:
print(f'Baseline\t\ttime: {results[0][0]/60:6.2f}m, loss:{results[0][1]:5.3f}, accuracy:{results[0][2]:5.3f}')
print(f'Random Search\t\ttime: {results[1][0]/60:6.2f}m, loss:{results[1][1]:5.3f}, accuracy:{results[1][2]:5.3f}')
print(f'Hyperband\t\ttime: {results[2][0]/60:6.2f}m, loss:{results[2][1]:5.3f}, accuracy:{results[2][2]:5.3f}')
print(f'Bayesian Optimization\ttime: {results[3][0]/60:6.2f}m, loss:{results[3][1]:5.3f}, accuracy:{results[3][2]:5.3f}')
print(f'AiSara\t\t\ttime: {results[4][0]/60:6.2f}m, loss:{results[4][1]:5.3f}, accuracy:{results[4][2]:5.3f}')

In [None]:
#Save Tuner
with open('./output/aisaraTuner.pk', 'wb') as f:
    pickle.dump(tuners, f)