In [49]:
import os

# Why Torch? You'll find the answer in the .md files! 
os.environ["KERAS_BACKEND"] = "torch"

In [50]:
import keras
from torch.utils.data import DataLoader

from dataset.dataset_loader import dataset_loader
import keras_tuner

from utils.my_tuner import HistoryDeletingBayesianOptimization

from dataset.k_fold_dataset_wrapper import KFoldDatasetWrapper

In [51]:
import torch

torch.cuda.is_available()

True

In [52]:
# Initial steps
hyperparameters = keras_tuner.HyperParameters()
train, test = dataset_loader((192, 192), is_grayscale=False)
dataset_split_controller = KFoldDatasetWrapper(5)
dataset_split_controller.load_data(train)

local_train, validation = dataset_split_controller.get_data_for_fold(0)
train_dataloader = DataLoader(dataset=local_train, batch_size=32, shuffle=True)
validation_dataloader = DataLoader(dataset=validation, batch_size=32, shuffle=True)

# Steps:

- 1: Study the best network inner structure by searching for best hidden nodes structure.
- 2: Study the best Hyperparameters for SGD of our top 4 models in general and also the best overfitting one
- 3: See top results and try to increase performance by attaching an Augmentation procedure.
- 4: See if adding dropout increases the performance of model

# Step 1:  Study the best network inner structure by searching for best hidden nodes structure

  As we need to find the structure of the two layers network. We already saw that 3 layers are harder to train and didn't yield a great performance result.
  
  We still will try the top 3 models of the 3 layer study with augmentation and dropout to see if performance increases.
```py  
TwoHiddenLayersTunableAugmentedDNN() # Family we will be tuning
```

In [53]:
from models.structure.tunable_model_family_hypermodel import TunableModelFamilyHypermodel
from models.naive_dnn_gen.two_layers_dnn import TwoHiddenLayersTunableAugmentedDNN

# For now the optimizer is also fixed to SGD with these parameters:
hyperparameters.Fixed("lr", 1e-4)
hyperparameters.Fixed("momentum", 0.9)

# For now dropout layers are frozen to be disabled.
hyperparameters.Fixed("dropout_0", False)
hyperparameters.Fixed("dropout_1", False)

project_name = "two-layers-192-192"
project_directory = "dnn-search"

tuner = HistoryDeletingBayesianOptimization(
    TunableModelFamilyHypermodel((3, 192, 192), TwoHiddenLayersTunableAugmentedDNN()),
    hyperparameters=hyperparameters,
    objective='val_loss',
    tune_new_entries=True,
    executions_per_trial=1,
    overwrite=False,
    directory=project_directory,
    max_trials=15,
    project_name=project_name
)

Reloading Tuner from dnn-search/two-layers-192-192/tuner0.json


In [6]:
search_has_been_done = True # To avoid overriding my stuff.
if not search_has_been_done:
    tuner.search(train_dataloader, epochs=20, validation_data=validation_dataloader, callbacks=[keras.callbacks.CSVLogger(
        f"{project_directory}/{project_name}/search.log", separator=",", append=True)
    ])

Trial 15 Complete [00h 14m 47s]
val_loss: 0.42832475900650024

Best val_loss So Far: 0.42472130060195923
Total elapsed time: 2d 01h 26m 16s


In [54]:
tuner.results_summary(5)

Results summary
Results in dnn-search/two-layers-192-192
Showing 5 best trials
Objective(name="val_loss", direction="min")

Trial 10 summary
Hyperparameters:
lr: 0.0001
momentum: 0.9
dropout_0: False
dropout_1: False
units_0: 1536
units_1: 1152
Score: 0.42472130060195923

Trial 12 summary
Hyperparameters:
lr: 0.0001
momentum: 0.9
dropout_0: False
dropout_1: False
units_0: 4096
units_1: 128
Score: 0.4257681667804718

Trial 14 summary
Hyperparameters:
lr: 0.0001
momentum: 0.9
dropout_0: False
dropout_1: False
units_0: 2048
units_1: 1152
Score: 0.42832475900650024

Trial 07 summary
Hyperparameters:
lr: 0.0001
momentum: 0.9
dropout_0: False
dropout_1: False
units_0: 1536
units_1: 640
Score: 0.42843180894851685

Trial 08 summary
Hyperparameters:
lr: 0.0001
momentum: 0.9
dropout_0: False
dropout_1: False
units_0: 2048
units_1: 384
Score: 0.4294884502887726


In [55]:
import pandas
csv = pandas.read_csv("./dnn-search/two-layers-192-192/search.log")
csv['tuner_iteration'] = 0

current_iteration = 0
for index, row in enumerate(csv.itertuples()):
    csv.at[index, 'tuner_iteration'] = int(index / 20)
    
best_dataframe = csv.query("tuner_iteration in [10, 12, 14, 7, 8]")
best_overfitting = csv.query("tuner iteration in [0]")

SyntaxError: Python keyword not valid identifier in numexpr query (<unknown>, line 1)

In [56]:
import plotly.express as px
import plotly.graph_objects as go
px.line(best_dataframe, x="epoch", y=["loss", "val_loss"], color="tuner_iteration", template="plotly_white",markers=True)

In [57]:
loss_figure = px.line(best_dataframe, x="epoch", y=["loss"], color="tuner_iteration", template="plotly_white",markers=True)
loss_figure.update_layout(title="Loss in tuner search", xaxis_title="Epoch", yaxis_title="Loss")

In [59]:
figure = px.line(best_dataframe, x="epoch", y=["val_loss"], color="tuner_iteration", template="plotly_white",markers=True)
figure.update_layout(title="Validation Loss in tuner search", xaxis_title="Epoch", yaxis_title="Loss")

How can we increase the performance of the 5 best models? We select 2 models to perform the following improvements:
 - 1 -Augmentation of the dataset
- 2 - Dropout layers inside the network
 - 3 -Better tune hyperparameters related to the learning process and not network structure (SGD)

### What models should we pick?
Should we only look the loss? As I see it a nice idea could to take the best overall model we generated, the best that has lowest variance in the val loss (least amount of spikes) (10) (riformula come si deve) and the best overfitting model (which might become a good model with augmentation). Therefore we pick:
- 10 (Compared to 8 and 12 it has less spikes and generally performs beter than 8 and 14) (Which also is best of search)
- 12 (As it is the best in training loss, our procedure might make it a good model)
- 7 (It performs well enough compared to the others and is the least complex network)

In [68]:
best_hyperparameters = [
    dict(iteration=10, hyperparameters=tuner.get_best_hyperparameters(5)[0]),
    dict(iteration=12, hyperparameters=tuner.get_best_hyperparameters(5)[1]),
    dict(iteration=7, hyperparameters=tuner.get_best_hyperparameters(5)[3]),
]

[print(f"iteration:{i['iteration']}, hp: {i['hyperparameters'].values}") for i in best_hyperparameters]

iteration:10, hp: {'lr': 0.0001, 'momentum': 0.9, 'dropout_0': False, 'dropout_1': False, 'units_0': 1536, 'units_1': 1152}
iteration:12, hp: {'lr': 0.0001, 'momentum': 0.9, 'dropout_0': False, 'dropout_1': False, 'units_0': 4096, 'units_1': 128}
iteration:7, hp: {'lr': 0.0001, 'momentum': 0.9, 'dropout_0': False, 'dropout_1': False, 'units_0': 1536, 'units_1': 640}


[None, None, None]

In [9]:
from models.structure.base_model_family import HiddenLayerStructure
from models.naive_dnn_gen.two_layers_dnn import TwoHiddenLayersDNNAugModelFamily

family_gen = TwoHiddenLayersDNNAugModelFamily()
family_gen.hidden_layer_0 = HiddenLayerStructure(2718, None)
family_gen.hidden_layer_1 = HiddenLayerStructure(728, None)

model = family_gen.make_model((3, 192, 192))

model.compile(loss='binary_crossentropy',
              optimizer=keras.optimizers.SGD(learning_rate=1e-4, momentum=0.9, nesterov=True), metrics=['accuracy'])

model.summary(expand_nested=True)

In [10]:
history = model.fit(train_dataloader, validation_data=validation_dataloader, epochs=150,
                    callbacks=[keras.callbacks.EarlyStopping(
                        monitor='val_loss', min_delta=1e-4, patience=20, verbose=1, mode='min',
                        restore_best_weights=True
                    )])

Epoch 1/150
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 394ms/step - accuracy: 0.5857 - loss: 0.6649 - val_accuracy: 0.7159 - val_loss: 0.5835
Epoch 2/150
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 384ms/step - accuracy: 0.7110 - loss: 0.5601 - val_accuracy: 0.6874 - val_loss: 0.5902
Epoch 3/150
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 385ms/step - accuracy: 0.7229 - loss: 0.5400 - val_accuracy: 0.6737 - val_loss: 0.6034
Epoch 4/150
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 389ms/step - accuracy: 0.7474 - loss: 0.5288 - val_accuracy: 0.7497 - val_loss: 0.5147
Epoch 5/150
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 391ms/step - accuracy: 0.7579 - loss: 0.5108 - val_accuracy: 0.7878 - val_loss: 0.4904
Epoch 6/150
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 395ms/step - accuracy: 0.7683 - loss: 0.4891 - val_accuracy: 0.8025 - val_loss: 0.4724
Epoc