In [None]:
import tensorflow as tf
import keras
import random
from pathlib import Path 
from time import strftime 

In [None]:
# Load the dataset
(X_train_all, y_train_all), (X_test, y_test) = keras.datasets.cifar10.load_data()

X_valid, y_valid = X_train_all[-5000:], y_train_all[-5000:]
X_train, y_train = X_train_all[:-5000], y_train_all[:-5000]


In [None]:
X_train = (X_train / 255.)
X_valid = (X_valid / 255.) 
X_test = (X_test / 255.)

In [None]:
# The manual tuning algorithm will randomly pick from these values for each hyperparameter

param_dict = {
    'learning_rate' : [0.0001, 0.0005, 0.001, 0.005, 0.01], 
    'batch_size' : [16, 32, 64, 128], 
    'epochs' : [50, 100, 150, 200], 
    'num_layers' : [2, 4, 6, 8, 10],
    'neurons' : [200, 400, 600],
    'optimizer' : ['adam', 'sgd'],
    'lr_sched' : ['exp', 'poly']
}

In [None]:
## this is the function for the random model
def random_model(neurons=128, num_layers=2):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape=[32, 32, 3]))
    model.add(tf.keras.layers.Flatten())
    for _ in range(num_layers):
        model.add(tf.keras.layers.Dense(neurons, activation="relu"))
    model.add(tf.keras.layers.Dense(10, activation="softmax"))
    return model

In [None]:
def other_model(neurons):
    # This was a test model used to plot tensorboard learning curves
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape=[32, 32, 3]))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(neurons, activation="relu"))
    model.add(tf.keras.layers.Dense(neurons, activation="relu"))
    model.add(tf.keras.layers.Dense(neurons, activation="relu"))
    model.add(tf.keras.layers.Dense(neurons, activation="relu"))
    model.add(tf.keras.layers.Dense(neurons, activation="relu"))
    model.add(tf.keras.layers.Dense(neurons, activation="relu"))
    model.add(tf.keras.layers.Dense(neurons, activation="relu"))
    model.add(tf.keras.layers.Dense(neurons, activation="relu"))
    model.add(tf.keras.layers.Dense(10, activation="softmax"))
    return model

In [None]:
# this is NOT the random tuning model, it was used to plot the
# learning curves from other_model

results2 = []
num_trials = 10
for trial in range(num_trials):

    lr_og = 0.0005
    
    end_learning_rate = 0.01
    decay_steps = 10000
    lr_schedule = keras.optimizers.schedules.PolynomialDecay(
        lr_og,
        decay_steps,
        end_learning_rate,
        power=0.5)
    
    model = other_model(neurons=600) # creating the  model

    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

    
    model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
    
    run_logdir = Path("my_logs/manual2") / f"trial_{trial}"
    tensorboard_cb = tf.keras.callbacks.TensorBoard(run_logdir)
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=100)
    
    history = model.fit(
        X_train, y_train, 
        epochs=200,
        validation_data=(X_valid, y_valid), 
        callbacks=[tensorboard_cb, early_stopping_cb],
        batch_size=128
    )
    
    results2.append({
        'final_val_acc' : max(history.history['val_accuracy']),
        'final_train_acc' : max(history.history['accuracy']),
        'run_id' : trial
    })

In [None]:
results2 # printing the results

In [None]:
def get_hyperparams(param_dict):
    # randomly picking values for hyperparameters
    list = []
    for key, values in param_dict.items():
        dict = {key : random.choice(values) for key, values in param_dict.items()}
        return dict

In [None]:
def get_run_logdir(root_logdir="my_logs"):
    return Path(root_logdir) / strftime("run_%Y_%m_%d_%H_%M_%S")

run_logdir = get_run_logdir()

In [None]:
# this is the random hyperparameter tuning model
results = []
num_trials = 10
for trial in range(num_trials):
    hps = get_hyperparams(param_dict)
    print(f"Hyperparams for trial {trial}: {hps}")

    lr_og = hps["learning_rate"] # original learning rate for lr schedulers
    lr_sched = hps["lr_sched"] # picking a random lr scheduler
    
    if lr_sched == "exp": 
        lr_schedule = keras.optimizers.schedules.ExponentialDecay(
            lr_og,
            decay_steps=100000,
            decay_rate=0.96,
            staircase=True)
    else:
        end_learning_rate = 0.01
        decay_steps = 10000
        lr_schedule = keras.optimizers.schedules.PolynomialDecay(
            lr_og,
            decay_steps,
            end_learning_rate,
            power=0.5)
    
    model = random_model(neurons=hps['neurons'], num_layers=hps["num_layers"]) # creating the random model

    opt = hps["optimizer"] # picking the random optimizer
    if opt == "adam":
        optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
    else:
        optimizer = tf.keras.optimizers.SGD(learning_rate=lr_schedule, momentum=0.9)
    
    model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
    
    run_logdir = Path("my_logs/manual3") / f"trial_{trial}"
    tensorboard_cb = tf.keras.callbacks.TensorBoard(run_logdir)
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=30)
    
    history = model.fit(
        X_train, y_train, 
        epochs=hps['epochs'],
        validation_data=(X_valid, y_valid), 
        callbacks=[tensorboard_cb, early_stopping_cb],
        batch_size=hps['batch_size'],
    )
    
    results.append({
        'hyperparams' : hps,
        'final_val_acc' : max(history.history['val_accuracy']),
        'final_train_acc' : max(history.history['accuracy']),
        'run_id' : trial
    })

In [None]:
%load_ext tensorboard
%tensorboard --logdir=./my_logs/manual2

In [None]:
results