In [2]:
import keras
from torch.utils.data import DataLoader

from dataset.dataset_loader import dataset_loader
import keras_tuner

from utils.my_tuner import HistoryDeletingBayesianOptimization

from dataset.k_fold_dataset_wrapper import KFoldDatasetWrapper
from models.naive_dnn_gen.two_layers_dnn import TwoHiddenLayersTunableAugmentedDNN, TwoHiddenLayersDNNAugModelFamily
from models.structure.base_model_family import HiddenLayerStructure

In [1]:
import os

# Why Torch? You'll find the answer in the .md files! 
os.environ["KERAS_BACKEND"] = "torch"

In [3]:
import torch

torch.cuda.is_available()

In [4]:
# Initial steps
hyperparameters = keras_tuner.HyperParameters()
train, test = dataset_loader((192, 192), is_grayscale=False)
dataset_split_controller = KFoldDatasetWrapper(5)
dataset_split_controller.load_data(train)

local_train, validation = dataset_split_controller.get_data_for_fold(0)
train_dataloader = DataLoader(dataset=local_train, batch_size=32, shuffle=True)
validation_dataloader = DataLoader(dataset=validation, batch_size=32, shuffle=True)

# Steps:

- 1: Study the best network inner structure by searching for best hidden nodes structure.
- 2: Study the best Hyperparameters for SGD of our top 4 models in general and also the best overfitting one
- 3: See top results and try to increase performance by attaching an Augmentation procedure.
- 4: See if adding dropout increases the performance of model

# Step 1:  Study the best network inner structure by searching for best hidden nodes structure

  As we need to find the structure of the two layers network. We already saw that 3 layers are harder to train and didn't yield a great performance result.
  
  We still will try the top 3 models of the 3 layer study with augmentation and dropout to see if performance increases.
```py  
TwoHiddenLayersTunableAugmentedDNN() # Family we will be tuning
```

In [5]:
from models.structure.tunable_model_family_hypermodel import TunableModelFamilyHypermodel
from models.naive_dnn_gen.two_layers_dnn import TwoHiddenLayersTunableAugmentedDNN

# For now the optimizer is also fixed to SGD with these parameters:
hyperparameters.Fixed("lr", 1e-4)
hyperparameters.Fixed("momentum", 0.9)

# For now dropout layers are frozen to be disabled.
hyperparameters.Fixed("dropout_0", False)
hyperparameters.Fixed("dropout_1", False)

project_name = "two-layers-192-192"
project_directory = "dnn-search"

tuner = HistoryDeletingBayesianOptimization(
    TunableModelFamilyHypermodel((3, 192, 192), TwoHiddenLayersTunableAugmentedDNN()),
    hyperparameters=hyperparameters,
    objective='val_loss',
    tune_new_entries=True,
    executions_per_trial=1,
    overwrite=False,
    directory=project_directory,
    max_trials=15,
    project_name=project_name
)

In [6]:
search_has_been_done = True  # To avoid overriding my stuff.
if not search_has_been_done:
    tuner.search(train_dataloader, epochs=20, validation_data=validation_dataloader,
                 callbacks=[keras.callbacks.CSVLogger(
                     f"{project_directory}/{project_name}/search.log", separator=",", append=True)
                 ])

In [54]:
tuner.results_summary(5)

In [6]:
import pandas

csv = pandas.read_csv("./dnn-search/two-layers-192-192/search.log")
csv['tuner_iteration'] = 0

current_iteration = 0
for index, row in enumerate(csv.itertuples()):
    csv.at[index, 'tuner_iteration'] = int(index / 20)

best_dataframe = csv.query("tuner_iteration in [10, 12, 14, 7, 8]")
best_overfitting = csv.query("tuner iteration in [0]")

In [7]:
import plotly.express as px
import plotly.graph_objects as go

px.line(best_dataframe, x="epoch", y=["loss", "val_loss"], color="tuner_iteration", template="plotly_white",
        markers=True)

In [8]:
loss_figure = px.line(best_dataframe, x="epoch", y=["loss"], color="tuner_iteration", template="plotly_white",
                      markers=True)
loss_figure.update_layout(title="Loss in tuner search", xaxis_title="Epoch", yaxis_title="Loss")

In [9]:
figure = px.line(best_dataframe, x="epoch", y=["val_loss"], color="tuner_iteration", template="plotly_white",
                 markers=True)
figure.update_layout(title="Validation Loss in tuner search", xaxis_title="Epoch", yaxis_title="Loss")

How can we increase the performance of the 5 best models? We select 2 models to perform the following improvements:
 - 1 -Augmentation of the dataset
- 2 - Dropout layers inside the network
 - 3 -Better tune hyperparameters related to the learning process and not network structure (SGD)

### What models should we pick?
Should we only look the loss? As I see it a nice idea could to take the best overall model we generated, the best that has lowest variance in the val loss (least amount of spikes) (10) (riformula come si deve) and the best overfitting model (which might become a good model with augmentation). Therefore we pick:
- 10 (Compared to 8 and 12 it has less spikes and generally performs beter than 8 and 14) (Which also is best of search)
- 12 (As it is the best in training loss, our procedure might make it a good model)
- 7 (It performs well enough compared to the others and is the least complex network)

In [6]:
best_hyperparameters_references = [
    dict(iteration=14, hyperparameters_index=0),
    dict(iteration=12, hyperparameters_index=1),
    dict(iteration=7, hyperparameters_index=3),
]

[print(f"iteration:{i['iteration']}, hp: {tuner.get_best_hyperparameters(5)[i['hyperparameters_index']].values}") for i
 in best_hyperparameters_references]

# 1 - Augmentation of the dataset

In [18]:
# For Iteration 10.
current_hyperparameters = tuner.get_best_hyperparameters(5)[best_hyperparameters_references[0]['hyperparameters_index']]
aug_model_family = TwoHiddenLayersDNNAugModelFamily()

# This section could be made a function as it is always the same.
# Dropout will be done in the next steps. So for now it is None.
aug_model_family.hidden_layer_0 = HiddenLayerStructure(units=current_hyperparameters['units_0'], following_dropout=None)
aug_model_family.hidden_layer_1 = HiddenLayerStructure(units=current_hyperparameters['units_1'], following_dropout=None)

current_model = aug_model_family.make_model((3, 192, 192))
current_model.compile(loss="binary_crossentropy", metrics=["accuracy"],
                      optimizer=keras.optimizers.SGD(learning_rate=1e-4, momentum=0.9, nesterov=True))
current_model.summary(expand_nested=True)

In [20]:
# Should I already use K-fold CV? It would take forever
history = current_model.fit(train_dataloader, validation_data=validation_dataloader, epochs=150,
                            callbacks=[
                                # To avoid going further when training
                                keras.callbacks.EarlyStopping(
                                    monitor='val_loss', min_delta=1e-4, patience=10,
                                    verbose=1, mode='min', restore_best_weights=True
                                ),
                                # To persist the history
                                keras.callbacks.CSVLogger(
                                    f"{project_directory}/{project_name}/model{best_hyperparameters_references[0]['iteration']}-aug.log",
                                    separator=",", append=True
                                )
                            ])

In [None]:
# Validation loss has not decreased. Augmentation not enough? 

In [8]:
# For Iteration 10.
current_hyperparameters = tuner.get_best_hyperparameters(5)[best_hyperparameters_references[1]['hyperparameters_index']]
aug_model_family = TwoHiddenLayersDNNAugModelFamily()

# This section could be made a function as it is always the same.
# Dropout will be done in the next steps. So for now it is None.
aug_model_family.hidden_layer_0 = HiddenLayerStructure(units=current_hyperparameters['units_0'], following_dropout=None)
aug_model_family.hidden_layer_1 = HiddenLayerStructure(units=current_hyperparameters['units_1'], following_dropout=None)

current_model = aug_model_family.make_model((3, 192, 192))
current_model.compile(loss="binary_crossentropy", metrics=["accuracy"],
                      optimizer=keras.optimizers.SGD(learning_rate=1e-4, momentum=0.9, nesterov=True))
current_model.summary(expand_nested=True)

In [9]:
# Should I already use K-fold CV? It would take forever
history = current_model.fit(train_dataloader, validation_data=validation_dataloader, epochs=150,
                            callbacks=[
                                # To avoid going further when training
                                keras.callbacks.EarlyStopping(
                                    monitor='val_loss', min_delta=1e-4, patience=10,
                                    verbose=1, mode='min', restore_best_weights=True
                                ),
                                # To persist the history
                                keras.callbacks.CSVLogger(
                                    f"{project_directory}/{project_name}/model{best_hyperparameters_references[1]['iteration']}-aug.log",
                                    separator=",", append=True
                                )
                            ])