# Three layers tuning search.

We want to learn the best possible architecture for a 3 hidden layers DNN.
What the results show us is that, as we expect the training process to be longer and harder, the results are still not optimal great. The search space might be bad or simply, as we the input space suggests, the classification problem is too hard to be done on a such low scale of hidden units.

In [1]:
import os

# Why Torch? You'll find the answer in the .md files! 
os.environ["KERAS_BACKEND"] = "torch"

In [2]:
# Dataset loading
from dataset.k_fold_dataset_wrapper import KFoldDatasetWrapper
from torch.utils.data import DataLoader
from dataset.dataset_loader import dataset_loader

train, test = dataset_loader((128, 128), is_grayscale=False)
dataset_split_controller = KFoldDatasetWrapper(5)
dataset_split_controller.load_data(train)

local_train, validation = dataset_split_controller.get_data_for_fold(0)
train_dataloader = DataLoader(dataset=local_train, batch_size=16, shuffle=True)
validation_dataloader = DataLoader(dataset=validation, batch_size=16, shuffle=True)

In [3]:
import keras_tuner
from utils.my_tuner import HistoryDeletingBayesianOptimization
from models.naive_dnn_gen.naive_dnn import NaiveDnnTunableWrapper
from models.structure.tunable_model_family_hypermodel import TunableModelFamilyHypermodel

# Top results:
# Reload the tuner.
tuner = HistoryDeletingBayesianOptimization(
    TunableModelFamilyHypermodel((3, 128, 128), NaiveDnnTunableWrapper()),
    hyperparameters=keras_tuner.HyperParameters(), objective='val_loss',
    tune_new_entries=True, overwrite=False, directory="dnn-search",
    max_trials=15, project_name="three-layers"
)


In [4]:
tuner.results_summary(5)

In [5]:
import pandas

# Prepare the pandas dataframe so we can see what trial has which iterations done.
csv = pandas.read_csv("./dnn-search/three-layers/search.log")
csv['tuner_iteration'] = 0

current_iteration = 0
for index, row in enumerate(csv.itertuples()):
    csv.at[index, 'tuner_iteration'] = int(index / 22)

best_dataframe = csv.query("tuner_iteration in [12]")

In [6]:
csv

In [42]:
best_dataframe

In [55]:
# The best model was hardly overfitting. There are no dropout layers on the model which might make performance better. (Trial 12)
# As the learning was steadily following the train accuracy we are probably missing units to better learn the structure.
# We could try to tune it with different combinations of dropout layers and maybe few more neurons.
best_dataframe[["loss", "val_loss", "accuracy", "val_accuracy"]].plot()
# This model is of no use if we dont change some of its structure

# Best configuration with dropout

In [44]:
# Trial 14 had some dropout layers. Could it have been still learning well? Remember we have no early stopping.
best_dataframe_with_dropout = csv.query("tuner_iteration in [14]")

In [45]:
best_dataframe_with_dropout

In [54]:
# This model might be worth to be still trained without putting hand on the parameters.
# We could still try to search for better parameters. (The neurons are prolly too few)
best_dataframe_with_dropout[["loss", "val_loss", "accuracy", "val_accuracy"]].plot()

In [7]:
import keras.optimizers
# Retrain the model with early stopping as it is. (K-fold will follow)
dropout_best_family = NaiveDnnTunableWrapper()
dropout_best_family.load_parameters(tuner.get_best_hyperparameters(5)[2])
model = dropout_best_family.make_model((3, 128, 128))

model.compile(loss='binary_crossentropy',
              optimizer=keras.optimizers.SGD(learning_rate=1e-4, momentum=0.9, nesterov=True), metrics=['accuracy'])

history = model.fit(train_dataloader, validation_data=validation_dataloader, epochs=150, callbacks=[keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=1e-4, patience=10, verbose=1, mode='min', restore_best_weights=True
)])

In [11]:
dataframe = pandas.DataFrame(history.history)
dataframe

In [13]:
dataframe[["loss", "val_loss", "accuracy", "val_accuracy"]].plot()

In [25]:
from pprint import pprint

# The model doesnt look well. The gap in loss is noticable and the room for improvement was low. We could try using a pre-proecessing procedure on data to increase the sample size and also increase the neurons of the network.
print(tuner.get_best_hyperparameters(5)[2].values)
tuner.get_best_hyperparameters(5)[2].values['units_1'] = 1024
tuner.get_best_hyperparameters(5)[2].values['dropout_1'] = True
tuner.get_best_hyperparameters(5)[2].values['units_2'] = 248

In [27]:
import keras.optimizers
# Retrain the model with early stopping as it is. (K-fold will follow)
dropout_best_family = NaiveDnnTunableWrapper()
dropout_best_family.load_parameters(tuner.get_best_hyperparameters(5)[2])
model = dropout_best_family.make_model((3, 128, 128))

model.compile(loss='binary_crossentropy',
              optimizer=keras.optimizers.SGD(learning_rate=1e-4, momentum=0.9, nesterov=True), metrics=['accuracy'])

model.summary()

In [28]:
history = model.fit(train_dataloader, validation_data=validation_dataloader, epochs=150, callbacks=[keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=1e-4, patience=10, verbose=1, mode='min', restore_best_weights=True
)])

In [29]:
dataframe = pandas.DataFrame(history.history)
dataframe

In [35]:
import gc
import torch

# We learn slowly but we learn. We could try with augmentation. 80% on validation aint that bad.,
# Yet the model before performs in a kinda similar way so where went the complexity of our model?
torch.cuda.empty_cache()
gc.collect()


In [36]:
# Per sfizio lo rendiamo immenso
print(tuner.get_best_hyperparameters(5)[2].values)
tuner.get_best_hyperparameters(5)[2].values['units_0'] = 3072
tuner.get_best_hyperparameters(5)[2].values['units_1'] = 1536
tuner.get_best_hyperparameters(5)[2].values['dropout_1'] = True
tuner.get_best_hyperparameters(5)[2].values['units_2'] = 256

import keras.optimizers
# Retrain the model with early stopping as it is. (K-fold will follow)
dropout_best_family = NaiveDnnTunableWrapper()
dropout_best_family.load_parameters(tuner.get_best_hyperparameters(5)[2])
model = dropout_best_family.make_model((3, 128, 128))

model.compile(loss='binary_crossentropy',
              optimizer=keras.optimizers.SGD(learning_rate=1e-4, momentum=0.9, nesterov=True), metrics=['accuracy'])

model.summary()

In [37]:
history = model.fit(train_dataloader, validation_data=validation_dataloader, epochs=150, callbacks=[keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=1e-4, patience=20, verbose=1, mode='min', restore_best_weights=True
)])