In [1]:
default_values = {"batch_size": 32, "epochs": 20, "learning_rate": 1e-2}

The goal of this procedure is to find a good structure for the network that by hand we might not
find as the process is all but trivial. We begin by finding good networks, we select one or two and then we tune the learning hyperparameters.

This kind of approach seems to be very popular

# 1- Load data

In [2]:
project_definition: dict[str, any] = {"name": "complete_search"}

In [3]:
import models.structure.base_model_wrapper
from dataset.k_fold_dataset_wrapper import KFoldDatasetWrapper
from torch.utils.data import DataLoader
from dataset.dataset_loader import dataset_loader

train, test = dataset_loader((224, 224), is_grayscale=False)

# We take 20% of train as validation. 
dataset_split_controller = KFoldDatasetWrapper(5)
dataset_split_controller.load_data(train)

local_train, validation = dataset_split_controller.get_data_for_fold(0)

train_dataloader = DataLoader(dataset=local_train, batch_size=default_values["batch_size"], shuffle=True)
validation_dataloader = DataLoader(dataset=validation, batch_size=default_values["batch_size"], shuffle=True)
test_dataloader = DataLoader(dataset=test, batch_size=default_values["batch_size"], shuffle=True)

# 2 - Model family definition
The model family is defined in the file ```conv_network_structure.py``` as ```ConvNetworkStructure```. To work with my custom implementation of the keras hypermodel
I extended it to be ```TunableConvNetworkStructure```

In [4]:
from models.structure.augmentation_wrapper import CustomInvertedAugmentationWrapper
from conv_network_structure import TunableConvNetworkStructure


# Cannot use augmentation as time is CPU then?
class TunableConvNetworkFamilyWithPreprocess(TunableConvNetworkStructure, CustomInvertedAugmentationWrapper):
    pass


tunableConvNetworkFamily = TunableConvNetworkFamilyWithPreprocess()

# 3 - Tuning process

## 3.1 - Search space definition

In [5]:
from keras_tuner import HyperParameters

hyperparameters = HyperParameters()

# To avoid a bug with the keras tuner generation that doesn't correctly register variables
# If not done like this some iteration would be lost as we have no way of knowing some parameters.
for i in range(4):
    hyperparameters.Int(f"filters_{i}", min_value=16, max_value=256, step=2, sampling='log')
    hyperparameters.Choice(f"kernel_{i}", values=[3, 5], default=3)

hyperparameters.Int(name=f"units_0", min_value=32, max_value=256, step=2, sampling='log')
hyperparameters.Int(name=f"units_1", min_value=32, max_value=256, step=2, sampling='log')

32

## 3.2 - Tuning procedure

In [6]:
from utils.my_tuner import HistoryDeletingBayesianOptimization
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters
from models.structure.tunable_hypermodel import TunableHyperModel

sgd_learning_parameters = SgdLearningParameters(learning_rate=default_values["learning_rate"])
hypermodel = TunableHyperModel(tunableConvNetworkFamily, sgd_learning_parameters, (3, 224, 224))

tuner = HistoryDeletingBayesianOptimization(
    hypermodel,
    hyperparameters=hyperparameters,
    objective='val_loss',
    tune_new_entries=True,
    overwrite=False,
    directory="tuned_models",
    project_name=project_definition["name"],
    max_trials=40  # We do 15 now, 15 later and 15 later again. I split it so i can resume later
)

Reloading Tuner from tuned_models/complete_search/tuner0.json


To avoid going on forever we use we callbacks:
- Early Stopping
- ThresholdStopCallback (custom)

In [7]:
import keras
import callbacks.threshold_stop_cb

tuner.search(train_dataloader, epochs=default_values["epochs"], validation_data=validation_dataloader, callbacks=[
    keras.callbacks.CSVLogger(f"{project_definition['name']}_train.csv", separator=",", append=True),
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='min'),

    callbacks.threshold_stop_cb.ThresholdStopCallback(1, 2),
    callbacks.threshold_stop_cb.ThresholdStopCallback(0.6, 4),
])

Trial 40 Complete [00h 15m 02s]
val_loss: 0.3339422345161438

Best val_loss So Far: 0.23140113055706024
Total elapsed time: 18h 28m 04s


In [39]:
import pandas as pd

trials = tuner.oracle.get_best_trials(num_trials=40)
hyperparameters_frame = pd.DataFrame(
    [trial.hyperparameters.get_config()["values"] | {"Score": trial.score} | {"Trial": trial.trial_id} for trial in
     trials]
)

hyperparameters_frame.to_csv(f"{project_definition['name']}_results.csv", index=False)

In [40]:
hyperparameters_frame

Unnamed: 0,filters_0,kernel_0,filters_1,kernel_1,filters_2,kernel_2,filters_3,kernel_3,units_0,units_1,convolution_layers,hidden_layers,Score,Trial
0,32,3,256,3,32,5,32,5,128,128,4,2,0.231401,2
1,32,3,64,5,256,3,256,5,32,32,4,1,0.276838,1
2,32,3,128,3,64,3,256,5,32,64,3,1,0.297501,26
3,64,3,64,5,16,3,128,3,256,64,3,1,0.299843,17
4,16,3,64,5,128,3,16,3,128,128,4,1,0.321418,35
5,128,5,32,3,64,5,32,3,128,128,4,2,0.32605,24
6,16,3,128,3,256,3,16,3,64,128,3,1,0.326418,32
7,128,3,64,5,32,3,16,5,256,64,4,2,0.326729,12
8,32,3,128,5,128,3,16,5,128,128,2,1,0.333739,18
9,16,5,16,3,32,5,32,3,256,64,4,1,0.333942,39


In [61]:
import plotly.express as px

fig = px.density_heatmap(hyperparameters_frame, x="convolution_layers", y="Score",
                         nbinsx=4, nbinsy=20, color_continuous_scale="Viridis", text_auto=True)
fig.show()

In [63]:
fig = px.density_heatmap(hyperparameters_frame, x="hidden_layers", y="Score",
                         nbinsx=2, nbinsy=20, color_continuous_scale="Viridis", text_auto=True)
fig.show()

## 3.3 - Results

In [14]:
from utils.data_processing import make_loss_graphs, make_loss_accuracy_graphs, add_tuner_iteration_to_data
import pandas

csv = pandas.read_csv(f"{project_definition['name']}_train.csv")
add_tuner_iteration_to_data(csv)

loss_graph = make_loss_graphs(csv)
acc_graph = make_loss_accuracy_graphs(csv)

loss_graph.update_layout(title="Loss vs Val_loss in tuner search per epoch (Val dashed)").show()
acc_graph.update_layout(title="Accuracy vs Val_Accuracy in tuner search per epoch (Val dashed)").show()

In [15]:
csv

Unnamed: 0,epoch,accuracy,loss,val_accuracy,val_loss,tuner_iteration
0,0,0.644216,0.677861,0.614572,0.645177,0
1,1,0.716852,0.572052,0.672650,0.605910,0
2,2,0.753038,0.532180,0.755016,0.520638,0
3,3,0.766772,0.519008,0.738120,0.559115,0
4,4,0.783677,0.496404,0.737064,0.553764,0
...,...,...,...,...,...,...
627,15,0.846540,0.361780,0.825766,0.390710,35
628,16,0.853143,0.338630,0.839493,0.346454,35
629,17,0.853671,0.336488,0.851109,0.333942,35
630,18,0.861595,0.329186,0.827878,0.401572,35


In [64]:
best_tuner_results = csv.query("tuner_iteration in [2, 1, 25, 16, 32]")
loss_graph = make_loss_graphs(best_tuner_results)
acc_graph = make_loss_accuracy_graphs(best_tuner_results)

loss_graph.update_layout(title="Loss vs Val_loss in tuner search per epoch (Val dashed)").show()
acc_graph.update_layout(title="Accuracy vs Val_Accuracy in tuner search per epoch (Val dashed)").show()

In [None]:
# The best model is 1. So we retrain that one. We now need to estimate lr, epochs and batch size.
# We avoid doing epochs as we will use the early stopping technique.

# 4 - Best found model

In [1]:
default_values = {"batch_size": 32, "epochs": 20, "learning_rate": 1e-2}

In [2]:
import models.structure.base_model_wrapper
from dataset.k_fold_dataset_wrapper import KFoldDatasetWrapper
from torch.utils.data import DataLoader
from dataset.dataset_loader import dataset_loader

train, test = dataset_loader((224, 224))

# We take 20% of train as validation. 
dataset_split_controller = KFoldDatasetWrapper(5)
dataset_split_controller.load_data(train)

local_train, validation = dataset_split_controller.get_data_for_fold(0)

train_dataloader = DataLoader(dataset=local_train, batch_size=default_values["batch_size"], shuffle=True)
validation_dataloader = DataLoader(dataset=validation, batch_size=default_values["batch_size"], shuffle=True)
test_dataloader = DataLoader(dataset=test, batch_size=default_values["batch_size"], shuffle=True)

## 4.1 - Model definition

In [3]:
import pandas

parameters = pandas.read_csv(f"{project_definition['name']}_results.csv")

NameError: name 'project_definition' is not defined

In [72]:
parameters.head(1)

Unnamed: 0,filters_0,kernel_0,filters_1,kernel_1,filters_2,kernel_2,filters_3,kernel_3,units_0,units_1,convolution_layers,hidden_layers,Score,Trial
0,32,3,256,3,32,5,32,5,128,128,4,2,0.231401,2


In [3]:
from conv_network_structure import ConvNetworkStructure
from models.structure.augmentation_wrapper import CustomInvertedAugmentationWrapper, NormalizedInvertedAugmentation

from models.structure.layer_structure_data import ConvLayerStructure, PoolLayerStructure, HiddenLayerStructure, \
    DropoutLayerStructure


class BestFoundModel(ConvNetworkStructure, CustomInvertedAugmentationWrapper):
    convolutional_layers: tuple = [
        ConvLayerStructure((3, 3), 32),
        PoolLayerStructure.default(),
        ConvLayerStructure((3, 3), 256),
        PoolLayerStructure.default(),
        ConvLayerStructure((5, 5), 32),
        PoolLayerStructure.default(),
        ConvLayerStructure((5, 5), 32),
        PoolLayerStructure.default(),
    ]

    dense_layers: tuple = [
        HiddenLayerStructure(128),
        HiddenLayerStructure(128)
    ]

## 4.2 - Learning parameters tuning


In [9]:
import torchvision
from dataset.dataset_loader import mean_calculator
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters
sgd_learning_parameters = SgdLearningParameters(learning_rate=default_values["learning_rate"])

model_generator = BestFoundModel()
means, variances = mean_calculator(local_train)



AttributeError: 'list' object has no attribute 'transform'

In [11]:

local_train.datasets[0].transform = torchvision.transforms.Compose([
    torchvision.transforms.Normalize(mean=means, std=variances)
])

In [26]:
local_train.datasets[2].dataset.transform = torchvision.transforms.Compose([local_train.datasets[2].dataset.transform,     torchvision.transforms.Normalize(mean=means, std=variances)])

In [27]:
local_train.datasets[2].dataset.transform

Compose(
    Compose(
)
    Normalize(mean=(tensor(0.6518), tensor(0.5949), tensor(0.5412)), std=(tensor(0.1753), tensor(0.1763), tensor(0.1940)))
)

In [12]:
train_dataloader = DataLoader(dataset=local_train, batch_size=default_values["batch_size"], shuffle=True)

model_generator.load_dataset_means_and_stds(means, variances)
model = model_generator.make_model((3, 224, 224))

sgd_learning_parameters.compile_model(model)

model.summary()

In [13]:
variances

(tensor(0.1753), tensor(0.1763), tensor(0.1940))

In [14]:
import keras
model.fit(train_dataloader, validation_data=validation_dataloader, epochs=default_values["epochs"], callbacks=[
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='min'),
])

Epoch 1/20
[1m 30/119[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m31s[0m 352ms/step - accuracy: 0.5371 - loss: 0.6913

KeyboardInterrupt: 

In [7]:
import keras
model.fit(train_dataloader, validation_data=validation_dataloader, epochs=default_values["epochs"], callbacks=[
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='min'),
])

Epoch 1/20
[1m  1/119[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:06[0m 560ms/step - accuracy: 0.5000 - loss: 0.6942

  outputs = tnn.conv2d(


[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 435ms/step - accuracy: 0.6247 - loss: 0.6480 - val_accuracy: 0.7867 - val_loss: 0.5017
Epoch 2/20
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 441ms/step - accuracy: 0.7648 - loss: 0.4931 - val_accuracy: 0.7297 - val_loss: 0.5786
Epoch 3/20
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 442ms/step - accuracy: 0.7901 - loss: 0.4753 - val_accuracy: 0.8184 - val_loss: 0.4278
Epoch 4/20
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 424ms/step - accuracy: 0.8198 - loss: 0.4159 - val_accuracy: 0.8532 - val_loss: 0.3926
Epoch 5/20
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 426ms/step - accuracy: 0.8474 - loss: 0.3738 - val_accuracy: 0.8131 - val_loss: 0.4648
Epoch 6/20
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 424ms/step - accuracy: 0.8641 - loss: 0.3377 - val_accuracy: 0.8501 - val_loss: 0.3782
Epoch 7/20
[1m119/11

<keras.src.callbacks.history.History at 0x72623e46a960>

In [8]:
model.evaluate(test_dataloader)

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 303ms/step - accuracy: 0.9224 - loss: 0.2342


[0.21820950508117676, 0.9197635054588318]

## 4.3 - k-fold CV for estimating the expected model