# Conv Net Family: Model 40 of keras tuner

We reference the best hyperparameter iteration  8 (being second best)

## 1 - Model Ad Hoc Definition

In [1]:
from __future__ import annotations

from utils.my_tuner import HistoryDeletingRandomSearch

project_name = "random-search-rich-structure"
directory = "cnn_search"

# Load previous hypertuner
previous_tuner = HistoryDeletingRandomSearch(None, overwrite=False, project_name=project_name, directory=directory)
previous_tuner.get_best_hyperparameters(2)[0].values

Reloading Tuner from cnn_search/random-search-rich-structure/tuner0.json


{'lr': 0.0001,
 'momentum': 0.9,
 'filters_0': 64,
 'filters_1': 64,
 'filters_2': 64,
 'filters_3': 64,
 'kernel_0': 3,
 'kernel_1': 5,
 'kernel_2': 5,
 'kernel_3': 3,
 'units_0': 128,
 'units_1': 64,
 'units_2': 64,
 'dropout_0': False,
 'dropout_1': False,
 'dropout_2': False,
 'convolution_layers': 2,
 'hidden_layers': 2}

In [2]:
from typing import Final
from models.structure.layer_structure_data import ConvLayerStructure, PoolLayerStructure, HiddenLayerStructure
from models.simple_cnn.conv_net_family import TunableConvNetFamily


# Definition on the tuner parameters.
class ConvNetFamilyModel40(TunableConvNetFamily):
    convolution_layers: list[tuple[ConvLayerStructure, PoolLayerStructure | None]] = [
        (ConvLayerStructure((3, 3), 64), PoolLayerStructure((2, 2), 2)),
        (ConvLayerStructure((5, 5), 64), PoolLayerStructure((2, 2), 2))
    ]

    dense_layers: list[HiddenLayerStructure] = [
        HiddenLayerStructure(128, None),
        HiddenLayerStructure(64, None)
    ]

    # We won't allow to override the configuration of this structure
    parameters_fixed: Final[bool] = True

## 2 - Learning parameters tuning via Keras Tuner

In [3]:
model_wrapper = ConvNetFamilyModel40()

### 2.1 - Setting parameters

In [4]:
from keras_tuner import HyperParameters

learning_parameters = HyperParameters()
learning_parameters.Choice(name="batch_size", values=[8, 16, 32, 64], default=16)
learning_parameters.Float(name="lr", min_value=1e-5, max_value=1e-3, sampling='log', step=2)
learning_parameters.Float(name="momentum", min_value=0.5, max_value=1, step=0.05, default=0.5)

0.5

### 2.2 - Loading data

In [5]:
from dataset.dataset_loader import prepare_dataloaders, dataset_loader
from dataset.k_fold_dataset_wrapper import KFoldDatasetWrapper

# Load all the data
train, test = dataset_loader((224, 224), is_grayscale=False)

# Split it to creat a validation split as we don't want to use the test data
dataset_split_controller = KFoldDatasetWrapper(5)
dataset_split_controller.load_data(train)

train_dataloader, validation_dataloader = prepare_dataloaders(
    dataset_split_controller.get_data_for_fold(0), None)

### 2.3 - Setting up the tuner
Before setting we decide our metrics:

In [6]:
project_name = "random-search-best-40-hp"
directory = "cnn_search"

In [7]:
from models.zero_one_validation_loss import ZeroOneLoss, iter_0_1_loss

metrics = ['accuracy', iter_0_1_loss, ZeroOneLoss()]

In [8]:
from utils.my_tuner import HistoryDeletingBayesianOptimization
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParametersTunable
from models.structure.tunable_hypermodel import TunableHyperModel

# Learning parameters. We use SGD as reported for various reasons
tunable_learning_parameters = SgdLearningParametersTunable(learning_rate=1e-4)
hypermodel = TunableHyperModel(model_wrapper, tunable_learning_parameters, (3, 224, 224), tune_batch=True)
batch_tuner = HistoryDeletingBayesianOptimization(
    hypermodel,
    hyperparameters=learning_parameters,

    objective='val_loss',
    tune_new_entries=False,

    executions_per_trial=1,
    overwrite=False,

    metrics=['accuracy', iter_0_1_loss, ZeroOneLoss()],
    max_trials=20,

    directory=directory,
    project_name=project_name
)

In [9]:
import keras
import callbacks.threshold_stop_cb

batch_tuner.search(train_dataloader, epochs=15, validation_data=validation_dataloader, callbacks=[
    keras.callbacks.CSVLogger(f"{directory}/{project_name}/search.log", separator=",", append=True),
    callbacks.threshold_stop_cb.ThresholdStopCallback(0.6, 4),
])

Trial 7 Complete [00h 13m 00s]
val_loss: 0.4859161376953125

Best val_loss So Far: 0.36094075441360474
Total elapsed time: 01h 27m 08s

Search: Running Trial #8

Value             |Best Value So Far |Hyperparameter
8                 |16                |batch_size
0.00032           |0.00064           |lr
0.85              |0.85              |momentum



Epoch 1/15
[1m474/474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 111ms/step - accuracy: 0.6099 - iter_0_1_loss: 3.1210 - loss: 0.6526 - total_0-1_loss: 700.0168 - val_accuracy: 0.7086 - val_iter_0_1_loss: 9.2000 - val_loss: 0.5600 - val_total_0-1_loss: 276.0000
Epoch 2/15
[1m413/474[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m5s[0m 85ms/step - accuracy: 0.7514 - iter_0_1_loss: 1.9889 - loss: 0.5127 - total_0-1_loss: 410.8111

KeyboardInterrupt: 

## 3 - Check the results

In [None]:
import torch
from torch.utils.data import DataLoader
from dataset.k_fold_dataset_wrapper import KFoldDatasetWrapper
from dataset.dataset_loader import dataset_loader
import keras_tuner

# Search for best hyperparameters for model 4 and 8: Batch size, and SGD params

# Initial steps
hyperparameters = keras_tuner.HyperParameters()
train, test = dataset_loader((224, 224), is_grayscale=False)
dataset_split_controller = KFoldDatasetWrapper(5)
dataset_split_controller.load_data(train)

# No batch size is fixed
local_train, validation = dataset_split_controller.get_data_for_fold(0)
train_dataloader = DataLoader(dataset=local_train, shuffle=True)
validation_dataloader = DataLoader(dataset=validation, shuffle=True, batch_size=32)

## 4 - K-fold on the resulting model