In [3]:
default_values = {"batch_size": 32, "epochs": 15, "learning_rate": 1e-2}

The goal of this procedure is to find a good structure for the network that by hand we might not
find as the process is all but trivial. We begin by finding good networks, we select one or two and then we tune the learning hyperparameters.

This kind of approach seems to be very popular

# 1- Load data

In [4]:
project_definition: dict[str, any] = {"name": "complete_search"}

In [5]:
import models.structure.base_model_wrapper
from dataset.k_fold_dataset_wrapper import KFoldDatasetWrapper
from torch.utils.data import DataLoader
from dataset.dataset_loader import dataset_loader, dataset_information

train, test = dataset_loader((224, 224))

# We take 20% of train as validation. 
dataset_split_controller = KFoldDatasetWrapper(5)
dataset_split_controller.load_data(train)

local_train, validation = dataset_split_controller.get_data_for_fold(0)

train_dataloader = DataLoader(dataset=local_train, batch_size=default_values["batch_size"], shuffle=True)
validation_dataloader = DataLoader(dataset=validation, batch_size=default_values["batch_size"], shuffle=True)
test_dataloader = DataLoader(dataset=test, batch_size=default_values["batch_size"], shuffle=True)

In [6]:
mean, variance = dataset_information(local_train, (224, 224))
measures = {"mean": mean, "variance": variance}

In [7]:
measures

{'mean': tensor([0.6507, 0.5938, 0.5402]),
 'variance': tensor([0.0868, 0.0899, 0.1023])}

# 2 - Model family definition
The model family is defined in the file ```conv_network_structure.py``` as ```ConvNetworkStructure```. To work with my custom implementation of the keras hypermodel
I extended it to be ```TunableConvNetworkStructure```

In [8]:
from models.structure.augmentation_wrapper import TorchAugmentationModel
from conv_network_structure import TunableConvNetworkStructure


class TunableConvNetworkFamilyWithPreprocess(TunableConvNetworkStructure, TorchAugmentationModel):
    pass


tunableConvNetworkFamily = TunableConvNetworkFamilyWithPreprocess()

# We require this step.
tunableConvNetworkFamily.load_dataset_mean_and_variance(mean, variance)

# 3 - Tuning process

## 3.1 - Search space definition

In [9]:
from keras_tuner import HyperParameters

hyperparameters = HyperParameters()

# To avoid a bug with the keras tuner generation that doesn't correctly register variables
# If not done like this some iteration would be lost as we have no way of knowing some parameters.
for i in range(4):
    hyperparameters.Int(f"filters_{i}", min_value=16, max_value=256, step=2, sampling='log')
    hyperparameters.Choice(f"kernel_{i}", values=[3, 5], default=3)

hyperparameters.Int(name=f"units_0", min_value=32, max_value=256, step=2, sampling='log')
hyperparameters.Int(name=f"units_1", min_value=32, max_value=256, step=2, sampling='log')

32

## 3.2 - Tuning procedure

In [10]:
from utils.my_tuner import HistoryDeletingBayesianOptimization
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters
from models.structure.tunable_hypermodel import TunableHyperModel

sgd_learning_parameters = SgdLearningParameters(learning_rate=default_values["learning_rate"])
hypermodel = TunableHyperModel(tunableConvNetworkFamily, sgd_learning_parameters, (3, 224, 224))

tuner = HistoryDeletingBayesianOptimization(
    hypermodel,
    hyperparameters=hyperparameters,
    objective='val_loss',
    tune_new_entries=True,
    overwrite=False,
    directory="tuned_models",
    project_name=project_definition["name"],
    max_trials=40  # We do 15 now, 15 later and 15 later again. I split it so i can resume later
)

Given model is tunable


To avoid going on forever we use we callbacks:
- Early Stopping
- ThresholdStopCallback (custom)

In [None]:
import keras
import callbacks.threshold_stop_cb

tuner.search(train_dataloader, epochs=default_values["epochs"], validation_data=validation_dataloader, callbacks=[
    keras.callbacks.CSVLogger(f"{project_definition['name']}_train.csv", separator=",", append=True),
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='min'),

    callbacks.threshold_stop_cb.ThresholdStopCallback(1, 2),
    callbacks.threshold_stop_cb.ThresholdStopCallback(0.6, 4),
])

Trial 17 Complete [00h 12m 29s]
val_loss: 0.29904496669769287

Best val_loss So Far: 0.24560382962226868
Total elapsed time: 02h 32m 45s

Search: Running Trial #18

Value             |Best Value So Far |Hyperparameter
16                |32                |filters_0
5                 |5                 |kernel_0
128               |256               |filters_1
5                 |5                 |kernel_1
128               |128               |filters_2
5                 |5                 |kernel_2
64                |128               |filters_3
5                 |5                 |kernel_3
128               |64                |units_0
64                |32                |units_1
4                 |4                 |convolution_layers
2                 |2                 |hidden_layers

Given model is tunable


Epoch 1/15
[1m 82/119[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m13s[0m 353ms/step - accuracy: 0.5600 - loss: 0.6821

In [39]:
import pandas as pd

trials = tuner.oracle.get_best_trials(num_trials=40)
hyperparameters_frame = pd.DataFrame(
    [trial.hyperparameters.get_config()["values"] | {"score": trial.score} | {"trial": trial.trial_id} for trial in
     trials]
)

hyperparameters_frame.to_csv(f"{project_definition['name']}_results.csv", index=False)

In [40]:
hyperparameters_frame

In [61]:
import plotly.express as px

fig = px.density_heatmap(hyperparameters_frame, x="convolution_layers", y="Score",
                         nbinsx=4, nbinsy=20, color_continuous_scale="Viridis", text_auto=True)
fig.show()

In [63]:
fig = px.density_heatmap(hyperparameters_frame, x="hidden_layers", y="Score",
                         nbinsx=2, nbinsy=20, color_continuous_scale="Viridis", text_auto=True)
fig.show()

## 3.3 - Results

In [14]:
from utils.data_processing import make_loss_graphs, make_loss_accuracy_graphs, add_tuner_iteration_to_data
import pandas

csv = pandas.read_csv(f"{project_definition['name']}_train.csv")
add_tuner_iteration_to_data(csv)

loss_graph = make_loss_graphs(csv)
acc_graph = make_loss_accuracy_graphs(csv)

loss_graph.update_layout(title="Loss vs Val_loss in tuner search per epoch (Val dashed)").show()
acc_graph.update_layout(title="Accuracy vs Val_Accuracy in tuner search per epoch (Val dashed)").show()

In [15]:
csv

In [64]:
best_tuner_results = csv.query("tuner_iteration in [2, 1, 25, 16, 32]")
loss_graph = make_loss_graphs(best_tuner_results)
acc_graph = make_loss_accuracy_graphs(best_tuner_results)

loss_graph.update_layout(title="Loss vs Val_loss in tuner search per epoch (Val dashed)").show()
acc_graph.update_layout(title="Accuracy vs Val_Accuracy in tuner search per epoch (Val dashed)").show()

In [None]:
# The best model is 1. So we retrain that one. We now need to estimate lr, epochs and batch size.
# We avoid doing epochs as we will use the early stopping technique.

# 4 - Best found model

## 4.1 - Model definition

In [3]:
import pandas

parameters = pandas.read_csv(f"{project_definition['name']}_results.csv")

In [72]:
parameters.head(1)

We build the best model from the given hp. <br />
To make things more readable we define the model directly in the following cell:

In [3]:
from conv_network_structure import ConvNetworkStructure
from models.structure.layer_structure_data import ConvLayerStructure, PoolLayerStructure, HiddenLayerStructure


class BestFoundModel(ConvNetworkStructure, TorchAugmentationModel):
    convolutional_layers: tuple = [
        ConvLayerStructure((3, 3), 32),
        PoolLayerStructure.default(),
        ConvLayerStructure((3, 3), 256),
        PoolLayerStructure.default(),
        ConvLayerStructure((5, 5), 32),
        PoolLayerStructure.default(),
        ConvLayerStructure((5, 5), 32),
        PoolLayerStructure.default(),
    ]

    dense_layers: tuple = [
        HiddenLayerStructure(128),
        HiddenLayerStructure(128)
    ]

## 4.2 - Learning parameters tuning


In [9]:
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParametersTunable

sgd_learning_parameters = SgdLearningParametersTunable(learning_rate=default_values["learning_rate"])

In [12]:
model_generator = BestFoundModel()
model_generator.load_dataset_means_and_stds(mean, variance)
model = model_generator.make_model((3, 224, 224))

sgd_learning_parameters.compile_model(model)
model.summary()

In [14]:
import keras

model.fit(train_dataloader, validation_data=validation_dataloader, epochs=default_values["epochs"], callbacks=[
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='min'),
])

In [7]:
import keras

model.fit(train_dataloader, validation_data=validation_dataloader, epochs=default_values["epochs"], callbacks=[
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='min'),
])

In [8]:
model.evaluate(test_dataloader)

## 4.3 - k-fold CV for estimating the expected model