In [1]:
default_values = {"batch_size": 32, "epochs": 20, "learning_rate": 1e-2}

The goal of this procedure is to find a good structure for the network that by hand we might not
find as the process is all but trivial. We begin by finding good networks, we select one or two and then we tune the learning hyperparameters.

This kind of approach seems to be very popular

# 1- Load data

In [2]:
project_definition: dict[str, any] = {"name": "complete_search"}

In [3]:
import models.structure.base_model_wrapper
from dataset.k_fold_dataset_wrapper import KFoldDatasetWrapper
from torch.utils.data import DataLoader
from dataset.dataset_loader import dataset_loader

train, test = dataset_loader((224, 224), is_grayscale=False)

# We take 20% of train as validation. 
dataset_split_controller = KFoldDatasetWrapper(5)
dataset_split_controller.load_data(train)

local_train, validation = dataset_split_controller.get_data_for_fold(0)

train_dataloader = DataLoader(dataset=local_train, batch_size=default_values["batch_size"], shuffle=True)
validation_dataloader = DataLoader(dataset=validation, batch_size=default_values["batch_size"], shuffle=True)
test_dataloader = DataLoader(dataset=test, batch_size=default_values["batch_size"], shuffle=True)

# 2 - Model family definition
The model family is defined in the file ```conv_network_structure.py``` as ```ConvNetworkStructure```. To work with my custom implementation of the keras hypermodel
I extended it to be ```TunableConvNetworkStructure```

In [4]:
from models.structure.augmentation_wrapper import CustomInvertedAugmentationWrapper
from conv_network_structure import TunableConvNetworkStructure


# Cannot use augmentation as time is CPU then?
class TunableConvNetworkFamilyWithPreprocess(TunableConvNetworkStructure, CustomInvertedAugmentationWrapper):
    pass


tunableConvNetworkFamily = TunableConvNetworkFamilyWithPreprocess()


# 3 - Tuning process

## 3.1 - Search space definition

In [5]:
from keras_tuner import HyperParameters

hyperparameters = HyperParameters()

# To avoid a bug with the keras tuner generation that doesn't correctly register variables
# If not done like this some iteration would be lost as we have no way of knowing some parameters.
for i in range(4):
    hyperparameters.Int(f"filters_{i}", min_value=16, max_value=256, step=2, sampling='log')
    hyperparameters.Choice(f"kernel_{i}", values=[3, 5], default=3)

hyperparameters.Int(name=f"units_0", min_value=32, max_value=256, step=2, sampling='log')
hyperparameters.Int(name=f"units_1", min_value=32, max_value=256, step=2, sampling='log')

32

In [6]:
from utils.my_tuner import HistoryDeletingBayesianOptimization
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters
from models.structure.tunable_hypermodel import TunableHyperModel

sgd_learning_parameters = SgdLearningParameters(learning_rate=default_values["learning_rate"])
hypermodel = TunableHyperModel(tunableConvNetworkFamily, sgd_learning_parameters, (3, 224, 224))

tuner = HistoryDeletingBayesianOptimization(
    hypermodel,
    hyperparameters=hyperparameters,
    objective='val_loss',
    tune_new_entries=True,
    overwrite=False,
    directory="tuned_models",
    project_name=project_definition["name"],
    max_trials=15  # We do 15 now, 15 later and 15 later again. I split it so i can resume later
)

Given model is tunable


To avoid going on forever we use we callbacks:
- Early Stopping
- ThresholdStopCallback (custom)

In [7]:
import keras
import callbacks.threshold_stop_cb

tuner.search(train_dataloader, epochs=default_values["epochs"], validation_data=validation_dataloader, callbacks=[
    keras.callbacks.CSVLogger(f"{project_definition['name']}_train.csv", separator=",", append=True),
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='min'),

    callbacks.threshold_stop_cb.ThresholdStopCallback(1, 2),
    callbacks.threshold_stop_cb.ThresholdStopCallback(0.6, 4),
])

Trial 15 Complete [00h 08m 44s]
val_loss: 0.49977484345436096

Best val_loss So Far: 0.23140113055706024
Total elapsed time: 03h 42m 27s


In [9]:
import pandas as pd

trials = tuner.oracle.get_best_trials(num_trials=10)
hyperparameters_frame = pd.DataFrame(
    [trial.hyperparameters.get_config()["values"] | {"Score": trial.score} for trial in trials]
)

hyperparameters_frame.to_csv(f"{project_definition['name']}_results", index=False)

In [10]:
hyperparameters_frame

Unnamed: 0,filters_0,kernel_0,filters_1,kernel_1,filters_2,kernel_2,filters_3,kernel_3,units_0,units_1,convolution_layers,hidden_layers,Score
0,32,3,256,3,32,5,32,5,128,128,4,2,0.231401
1,32,3,64,5,256,3,256,5,32,32,4,1,0.276838
2,128,3,64,5,32,3,16,5,256,64,4,2,0.326729
3,64,3,128,5,256,5,16,5,128,256,3,1,0.340135
4,64,3,64,5,32,5,16,3,256,32,3,1,0.363358
5,128,3,32,3,32,5,32,3,128,64,3,1,0.374502
6,128,3,16,3,256,5,64,3,128,64,3,2,0.391342
7,128,3,32,5,16,5,256,3,128,64,3,1,0.394137
8,64,5,64,5,16,3,256,3,256,32,2,1,0.40009
9,128,5,256,3,16,3,256,3,32,32,2,2,0.416033


In [12]:
tuner.results_summary()

Results summary
Results in tuned_models/complete_search
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 02 summary
Hyperparameters:
filters_0: 32
kernel_0: 3
filters_1: 256
kernel_1: 3
filters_2: 32
kernel_2: 5
filters_3: 32
kernel_3: 5
units_0: 128
units_1: 128
convolution_layers: 4
hidden_layers: 2
Score: 0.23140113055706024

Trial 01 summary
Hyperparameters:
filters_0: 32
kernel_0: 3
filters_1: 64
kernel_1: 5
filters_2: 256
kernel_2: 3
filters_3: 256
kernel_3: 5
units_0: 32
units_1: 32
convolution_layers: 4
hidden_layers: 1
Score: 0.2768384516239166

Trial 12 summary
Hyperparameters:
filters_0: 128
kernel_0: 3
filters_1: 64
kernel_1: 5
filters_2: 32
kernel_2: 3
filters_3: 16
kernel_3: 5
units_0: 256
units_1: 64
convolution_layers: 4
hidden_layers: 2
Score: 0.3267289698123932

Trial 05 summary
Hyperparameters:
filters_0: 64
kernel_0: 3
filters_1: 128
kernel_1: 5
filters_2: 256
kernel_2: 5
filters_3: 16
kernel_3: 5
units_0: 128
units_1: 256
convolution_layers

In [None]:
## TODO GO ON WITH RESULTS