<a href="https://colab.research.google.com/github/rahulroynit/Bayesian-vs-Hyperband-Optimization/blob/master/Hyperparameter_Tuning_(Hyperband_vs_Bayesian).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# create set up for installing kerastuner
# installation needed only once
import os, sys
lib_path = '/content/libraries'
os.symlink('/content/drive/My Drive/Colab Notebooks', lib_path)
sys.path.insert(0, lib_path)

In [None]:
# install keras-tuner
# needs to be run once for permanent installation in colab
!pip install --target=$lib_path keras-tuner

In [None]:
# import dependencies
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping
from tensorflow.keras.optimizers import Adam
from kerastuner.tuners import Hyperband, BayesianOptimization
from kerastuner import HyperModel
from keras.datasets import boston_housing 
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np
import datetime
%load_ext tensorboard
pd.set_option('display.max_columns', None)

In [None]:
# load dataset into train and evaluation data
# evaluation data will be split into test and valid data
(X_train, y_train), (X_eval, y_eval) = boston_housing.load_data()

In [None]:
# query train data
print(type(X_train), type(y_train))

In [None]:
# check dims of train and eval data
print('dims of train and eval data')
print(X_train.shape, X_eval.shape)

In [None]:
# check train data info
print(pd.DataFrame(X_train).info())

In [None]:
# query train data samples
print(pd.DataFrame(X_train).head())
print(pd.DataFrame(y_train).head())

In [None]:
# split evaluation data into validation and test samples
from sklearn.model_selection import train_test_split
X_test, X_valid, y_test, y_valid = train_test_split(X_eval, y_eval, test_size = 0.5)

In [None]:
# normalize data using min-max scaling
# train, valid and test predictors are scaled using parameters of train predictors
# train and valid target values are scaled using parameters of train target 
# test target not scaled as it will be used for performance comparison on unseen data
# performance comparison to be done after unscaling predictions
y_train = y_train.reshape((-1, 1))
y_valid = y_valid.reshape((-1, 1))
scaler_X, scaler_y = MinMaxScaler(), MinMaxScaler()
scaler_X.fit(X_train)
scaler_y.fit(y_train)
X_train_array = np.array(scaler_X.transform(X_train))
X_valid_array = np.array(scaler_X.transform(X_valid))
X_test_array = np.array(scaler_X.transform(X_test))
y_train_array = np.array(scaler_y.transform(y_train))
y_valid_array = np.array(scaler_y.transform(y_valid))

In [None]:
# check shape of validation data
print(X_valid_array.shape, y_valid_array.shape)

In [None]:
# check test data shape
# test target should be rank-1 array, i.e., of the form: (N, )
print(type(X_test), type(y_test))
print(X_test.shape, y_test.shape)

In [None]:
# define model build
# define tunable hyperparameters using suitable hyperparameter types
# tunable hyperparameters in model: layers, neurons, dropout, learning rate
def build_model(hp):
  model = Sequential()
  model.add(Dense(units = hp.Int("dense_input", min_value = 32, max_value = 128, step = 32),
                                 input_shape = (X_train_array.shape[1],)))
  model.add(BatchNormalization())
  model.add(Activation("relu"))
  model.add(Dropout(hp.Float("drop_input", min_value = 0, max_value = 0.3, step = 0.1)))
  for i in range(hp.Int("num_intermediate_layers", 1, 3)):
    model.add(Dense(units = hp.Int(f'dense_{i}', min_value = 32, max_value = 128, step = 32)))
    model.add(BatchNormalization())
    model.add(Activation("relu"))
    model.add(Dropout(hp.Float(f'drop_{i}', min_value = 0, max_value = 0.3, step = 0.1)))
  model.add(Dense(1, activation = "relu"))
  model.compile(optimizer = Adam(learning_rate = hp.Float('lr', min_value = 0.0001, max_value = 0.01, sampling = "LOG", 
                                                          default = 0.01)), 
                loss = "mse", metrics = ["mae"])
  return(model)

In [None]:
## set up tensorboard
# clear logs from previous runs
!rm -rf ./logs/
# set up path where logs of tensorboard will be saved
# datetime suffix makes the logs unique
path = "logs/hp_tuning/" + datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
tensorboard = TensorBoard(log_dir = path, histogram_freq = 1) 

In [None]:
# select tuner for hyperparameter tuning (BayesianOptimization (BO) or Hyperband (HB))
tuner_choice = "HB"
# set max_trials (trials) for BO and max_epochs (ep) for HB
trials, ep = 10, 50
# set batch size
bs = 32

In [None]:
# set up tuner
if tuner_choice == "BO":
  tuner = BayesianOptimization(build_model, objective = "val_mae", max_trials = trials, executions_per_trial = 2, 
                               seed = 2020, project_name = "bo_tuning", overwrite = True)
else:
  tuner = Hyperband(build_model, objective = "val_mae", max_epochs = ep, executions_per_trial = 2,
                    seed = 2020, project_name = "hb_tuning", overwrite = True)

In [None]:
# tuner search space summary
print(tuner.search_space_summary())

In [None]:
# optimize hyperparameters
# we set up early stopping of tuning if validation loss doesn't improve after 5 epochs  
tuner.search(x = X_train_array, y = y_train_array, batch_size = bs, epochs = ep, verbose = 1, validation_data = (X_valid_array, y_valid_array), 
             callbacks = [EarlyStopping('val_loss', patience = 5), tensorboard])

In [None]:
# tuning summary
print(tuner.results_summary())

In [None]:
# best hyperparameter values 
best_hp = pd.DataFrame(tuner.get_best_hyperparameters()[0].values, index = [0])
print(best_hp)

In [None]:
# best model
best_model = tuner.get_best_models()[0]
print(best_model.summary())

In [None]:
# save best model
best_model.save(f'reg_model_{tuner_choice}.h5')

In [None]:
# load best model (if not loaded)
best_model = keras.models.load_model('reg_model_HB.h5')

In [None]:
# predictions on test data
# predictions are not rank-1 arrays
# predictions to be flattened after inverse transformation
preds = best_model.predict(x = X_test_array)
print(type(preds), preds.shape)
print(pd.DataFrame(preds).head())

In [None]:
# unscale predictions and convert into rank-1 array
preds_unscaled = np.array(scaler_y.inverse_transform(preds)).flatten()
print(type(preds_unscaled), preds_unscaled.shape)

In [None]:
# compare unscaled predictions and actual values
print(pd.DataFrame(preds_unscaled, columns = ['predictions']).head())
print(pd.DataFrame(y_test, columns = ['actual_values']).head())

In [None]:
# performance on test data
mape = np.mean(np.abs((np.array(y_test) - np.array(preds_unscaled))/np.array(y_test))) * 100
mae = np.mean(np.abs(np.array(y_test) - np.array(preds_unscaled)))
print(f'mape = {mape} \n', f'mae = {mae}')

In [None]:
# run tensorboard
%tensorboard --logdir logs/hp_tuning