# Conv Net Family: Model 8 of keras tuner

We reference the best hyperparameter iteration  8 (being second best)

## 1 - Model Ad Hoc Definition

In [1]:
from __future__ import annotations

from utils.my_tuner import HistoryDeletingRandomSearch

project_name = "random-search-rich-structure"
directory = "cnn_search"

# Load previous hypertuner
previous_tuner = HistoryDeletingRandomSearch(None, overwrite=False, project_name=project_name, directory=directory)
previous_tuner.get_best_hyperparameters(2)[1].values

Reloading Tuner from cnn_search/random-search-rich-structure/tuner0.json


{'lr': 0.0001,
 'momentum': 0.9,
 'filters_0': 64,
 'filters_1': 64,
 'filters_2': 128,
 'filters_3': 32,
 'kernel_0': 5,
 'kernel_1': 3,
 'kernel_2': 3,
 'kernel_3': 3,
 'units_0': 256,
 'units_1': 32,
 'units_2': 64,
 'dropout_0': False,
 'dropout_1': False,
 'dropout_2': False,
 'convolution_layers': 1,
 'hidden_layers': 1}

The tuner yielded a model with only one conv layer (64, (5x5)) and a hidden activation layer with 256 units.

To avoid loading the parameters from tuner story we simply redefine in order to fix the structure.

In [2]:
from typing import Final
from models.structure.layer_structure_data import ConvLayerStructure, PoolLayerStructure, HiddenLayerStructure
from models.simple_cnn.conv_net_family import TunableConvNetFamily


# Definition on the tuner parameters.
class ConvNetFamilyModel8(TunableConvNetFamily):
    convolution_layers: list[tuple[ConvLayerStructure, PoolLayerStructure | None]] = [
        (ConvLayerStructure((5, 5), 64), PoolLayerStructure((2, 2), 2)),
    ]

    dense_layers: list[HiddenLayerStructure] = [
        HiddenLayerStructure(256, None),
    ]

    # We won't allow to override the configuration of this structure
    parameters_fixed: Final[bool] = True

## 2 - Learning parameters tuning via Keras Tuner

In [3]:
model_wrapper = ConvNetFamilyModel8()

### 2.1 - Setting parameters

In [4]:
from keras_tuner import HyperParameters

learning_parameters = HyperParameters()
learning_parameters.Choice(name="batch_size", values=[8, 16, 32, 64], default=16)
learning_parameters.Float(name="lr", min_value=1e-5, max_value=1e-3, sampling='log', step=2)
learning_parameters.Float(name="momentum", min_value=0.5, max_value=1, step=0.05, default=0.5)

0.5

### 2.2 - Loading data

In [5]:
from dataset.dataset_loader import prepare_dataloaders, dataset_loader
from dataset.k_fold_dataset_wrapper import KFoldDatasetWrapper

# Load all the data
train, test = dataset_loader((224, 224), is_grayscale=False)

# Split it to creat a validation split as we don't want to use the test data
dataset_split_controller = KFoldDatasetWrapper(5)
dataset_split_controller.load_data(train)

train_dataloader, validation_dataloader = prepare_dataloaders(
    dataset_split_controller.get_data_for_fold(0), None)

### 2.3 - Setting up the tuner
Before setting we decide our metrics:

In [6]:
project_name = "random-search-best-8-hp"
directory = "cnn_search"

In [7]:
from models.zero_one_validation_loss import ZeroOneLoss, iter_0_1_loss

metrics = ['accuracy', iter_0_1_loss, ZeroOneLoss()]

In [8]:
from utils.my_tuner import HistoryDeletingBayesianOptimization
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParametersTunable
from models.structure.tunable_hypermodel import TunableHyperModel

# Learning parameters. We use SGD as reported for various reasons
tunable_learning_parameters = SgdLearningParametersTunable(learning_rate=1e-4)
hypermodel = TunableHyperModel(model_wrapper, tunable_learning_parameters, (3, 224, 224), tune_batch=True)
batch_tuner = HistoryDeletingBayesianOptimization(
    hypermodel,
    hyperparameters=learning_parameters,

    objective='val_loss',
    tune_new_entries=False,
    
    overwrite=False,

    metrics=['accuracy', iter_0_1_loss, ZeroOneLoss()],
    max_trials=40,

    directory=directory,
    project_name=project_name
)

Reloading Tuner from cnn_search/random-search-best-8-hp/tuner0.json


In [9]:
import keras
import callbacks.threshold_stop_cb

batch_tuner.search(train_dataloader, epochs=15, validation_data=validation_dataloader, callbacks=[
    keras.callbacks.CSVLogger(f"{directory}/{project_name}/search.log", separator=",", append=True),
    callbacks.threshold_stop_cb.ThresholdStopCallback(0.6, 4),
])

Trial 40 Complete [00h 15m 53s]
val_loss: 0.3601981997489929

Best val_loss So Far: 0.3450351655483246
Total elapsed time: 09h 38m 27s

Search: Running Trial #41

Value             |Best Value So Far |Hyperparameter
8                 |8                 |batch_size
4e-05             |4e-05             |lr
0.95              |0.95              |momentum



Epoch 1/15
[1m474/474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 134ms/step - accuracy: 0.6041 - iter_0_1_loss: 3.1673 - loss: 0.6505 - total_0-1_loss: 710.4505 - val_accuracy: 0.7328 - val_iter_0_1_loss: 8.4333 - val_loss: 0.5453 - val_total_0-1_loss: 253.0000
Epoch 2/15
[1m474/474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 136ms/step - accuracy: 0.7527 - iter_0_1_loss: 1.9786 - loss: 0.5268 - total_0-1_loss: 472.3390 - val_accuracy: 0.7730 - val_iter_0_1_loss: 7.1667 - val_loss: 0.4785 - val_total_0-1_loss: 215.0000
Epoch 3/15
[1m474/474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 127ms/step - accuracy: 0.7951 - iter_0_1_loss: 1.6390 - loss: 0.4611 - total_0-1_loss: 384.4568 - val_accuracy: 0.7846 - val_iter_0_1_loss: 6.8000 - val_loss: 0.4908 - val_total_0-1_loss: 204.0000
Epoch 4/15
[1m474/474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 136ms/step - accuracy: 0.8256 - iter_0_1_loss: 1.3950 - loss: 0.4095 - total_0-1_loss: 332.4632 -

KeyboardInterrupt: 

## 3 - Check the results

In [12]:
batch_tuner.results_summary()

Results summary
Results in cnn_search/random-search-best-8-hp
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 16 summary
Hyperparameters:
batch_size: 8
lr: 4e-05
momentum: 0.95
Score: 0.3450351655483246

Trial 36 summary
Hyperparameters:
batch_size: 8
lr: 4e-05
momentum: 0.95
Score: 0.347702294588089

Trial 21 summary
Hyperparameters:
batch_size: 8
lr: 4e-05
momentum: 0.95
Score: 0.34815552830696106

Trial 31 summary
Hyperparameters:
batch_size: 8
lr: 4e-05
momentum: 0.95
Score: 0.3501090109348297

Trial 22 summary
Hyperparameters:
batch_size: 8
lr: 4e-05
momentum: 0.95
Score: 0.3506608009338379

Trial 30 summary
Hyperparameters:
batch_size: 8
lr: 4e-05
momentum: 0.95
Score: 0.35252252221107483

Trial 34 summary
Hyperparameters:
batch_size: 8
lr: 4e-05
momentum: 0.95
Score: 0.35348737239837646

Trial 35 summary
Hyperparameters:
batch_size: 8
lr: 4e-05
momentum: 0.95
Score: 0.35406792163848877

Trial 26 summary
Hyperparameters:
batch_size: 8
lr: 4e-05
momentum:

In [13]:
import pandas
from utils.data_processing import add_tuner_iteration_to_data

csv = pandas.read_csv(f"./cnn_search/{project_name}/search.log")
add_tuner_iteration_to_data(csv)

# 2 Tuner iterations are missing in my CSV. 
# Might the reason be unknown all we know that 42 and 47 are mapped to 40 and 45
best_dataframe = csv.query("tuner_iteration in [16, 36, 21, 31, 22]")

In [14]:
from utils.data_processing import make_loss_graphs, make_loss_accuracy_graphs

loss_graph = make_loss_graphs(best_dataframe)
loss_graph.update_layout(title="Loss vs Val_loss in tuner search per epoch (Val dashed)")

acc_graph = make_loss_accuracy_graphs(best_dataframe)
acc_graph.update_layout(title="Accuracy vs Val_Accuracy in tuner search per epoch (Val dashed)")

loss_graph.show()
acc_graph.show()

## A questo punto uso k-fold cv?

## 4 - K-fold on the resulting model

In [None]:
# For some reason skipped trial 1

In [None]:
batch_tuner.get_best_hyperparameters(5)[0].values

In [None]:
# We choose to take the best parameters (Top 3 are all the same)
best_hyperparameters = batch_tuner.get_best_hyperparameters(5)[0]

train, test = dataset_loader((224, 224), is_grayscale=False)

train_dataloader = DataLoader(dataset=train, batch_size=batch_tuner.get_best_hyperparameters(5)[0]['batch_size'],
                              shuffle=True)
test_dataloader = DataLoader(dataset=test, batch_size=batch_tuner.get_best_hyperparameters(5)[0]['batch_size'],
                             shuffle=True)

sgd_learning_parameters = SgdLearningParametersTunable(1e-4, metrics=['accuracy', iter_0_1_loss, ZeroOneLoss()])

sgd_learning_parameters.load_parameters(best_hyperparameters)
model = model_family.make_model((3, 224, 224))

sgd_learning_parameters.compile_model(model)
model.summary()

In [None]:
model.optimizer.get_config()

In [None]:
model.fit(train_dataloader, epochs=10, callbacks=[
    keras.callbacks.CSVLogger(f"{directory}/{project_name}/best_params_search.log", separator=",", append=True),
])

In [None]:
model.evaluate(test_dataloader)

With only 10 epochs we have a 0.35 loss on test. Which is in line with the results

In [None]:
history = model.fit(train_dataloader, epochs=10, callbacks=[
    keras.callbacks.CSVLogger(f"{directory}/{project_name}/best_params_search.log", separator=",", append=True),
])

In [None]:
model.evaluate(test_dataloader)

# We are overfitting. The accuracy decreasded on test ( stayed more or less the same) and the model fit better

In [None]:
previous_tuner.get_best_hyperparameters(2)[1].values

In [None]:
from models.structure.augmentation_wrapper import InvertedAugmentationWrapper

best_hyperparameters = batch_tuner.get_best_hyperparameters(5)[0]


# Use Augmentation to increase the performance
class AugmentedConvNetFamily(TunableConvNetFamily, InvertedAugmentationWrapper):
    pass


augmented_family = AugmentedConvNetFamily()
augmented_family.load_parameters(previous_tuner.get_best_hyperparameters(2)[1])

model = augmented_family.make_model((3, 224, 224))

train_dataloader = DataLoader(dataset=train, batch_size=batch_tuner.get_best_hyperparameters(5)[0]['batch_size'],
                              shuffle=True)
test_dataloader = DataLoader(dataset=test, batch_size=batch_tuner.get_best_hyperparameters(5)[0]['batch_size'],
                             shuffle=True)

sgd_learning_parameters = SgdLearningParametersTunable(1e-4, metrics=['accuracy', iter_0_1_loss, ZeroOneLoss()])

sgd_learning_parameters.load_parameters(best_hyperparameters)
sgd_learning_parameters.compile_model(model)

model.summary(expand_nested=True)

In [None]:
# https://www.deeplearningbook.org/contents/optimization.html
model.fit(train_dataloader, epochs=12, callbacks=[
    keras.callbacks.CSVLogger(f"{directory}/{project_name}/best_params_search_aug.log", separator=",", append=True),
])

In [None]:
model.evaluate(test_dataloader)

# Use nested K Fold to tune epoches and then train on best

In [None]:
# Use K fold CV to tune epoches and avoid early stopping

# Use early stopping with a validation split