In [None]:
# todo rerun all of this and end the k-fold-cv.

In [23]:
default_values = {"batch_size": 32, "epochs": 15, "learning_rate": 1e-2}

We want to study a good structure for Conv Networks. We will so follow common rules of thumb like:
- The number of filter may be mixed in increasing order to better match more complex patterns in the images
- A (3x3) kernel generally performs well
- CNN benefit from having a multiple succession of layers

For starters the epochs will be set to 20 and later studied as hyperparameter or regularized by Early Stopping.


We will also be using BatchNormalization as proposed by the paper:


# 1 - Load data
Total splitting of data will be [70%, 10%, 20%] (If we consider test and train to be the full set) <br />

In [1]:
import torch

from dataset.k_fold_dataset_wrapper import KFoldDatasetWrapper
from torch.utils.data import DataLoader
from dataset.dataset_loader import dataset_loader, dataset_information

train, test = dataset_loader((224, 224))
# Optionally fix the generator for reproducible results, e.g.: torch.Generator().manual_seed(42)
train, validation = torch.utils.data.random_split(train, [0.875, 0.125])

train_dataloader = DataLoader(dataset=train, batch_size=default_values["batch_size"], shuffle=True)
validation_dataloader = DataLoader(dataset=validation, batch_size=default_values["batch_size"], shuffle=True)
test_dataloader = DataLoader(dataset=test, batch_size=default_values["batch_size"], shuffle=True)

NameError: name 'default_values' is not defined

In [3]:
mean, variance = dataset_information(train, (224, 224))
measures = {"mean": mean, "variance": variance}

In [4]:
measures

{'mean': tensor([0.6515, 0.5948, 0.5407]),
 'variance': tensor([0.0868, 0.0900, 0.1029])}

# 2 - First model
Our first model is a simple CNN. <br><br />


## 2.1 - Model definition 

In [5]:
project_definition: dict[str, any] = {"name": "hand_tailored_v1"}

Here we report the first handbuilt model which can also found in ```models.conv_network.hand_tailored_conv_net.py```

In [24]:
from models.structure.augmentation_wrapper import NormalizedModelWrapper
from keras.layers import Conv2D, MaxPool2D, Input, Flatten, Dense, Layer


class HandTailoredConvNet(NormalizedModelWrapper):
    def make_layers(self, input_shape: (int, int, int)) -> tuple[Layer, Layer]:
        chan = self.data_format.value
        input_layer = Input(shape=input_shape, name=self.__class__.__name__)

        x = Conv2D(64, kernel_size=(3, 3), padding='same', data_format=chan, activation="relu")(input_layer)
        x = MaxPool2D(pool_size=(2, 2), data_format=chan)(x)

        x = Conv2D(filters=128, kernel_size=(3, 3), padding='same', data_format=chan, activation="relu")(x)
        x = MaxPool2D(pool_size=(2, 2), data_format=chan)(x)

        x = Flatten(data_format=chan)(x)
        x = Dense(units=128, activation="relu")(x)

        output_layer = Dense(units=1, activation="sigmoid")(x)
        return input_layer, output_layer

## 2.2 - Model instance and learning

In [7]:
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters

model_generator = HandTailoredConvNet()
model_generator.load_dataset_mean_and_variance(mean, variance)

model = model_generator.make_model((3, 224, 224))
# Default Keras learning-rate Value (0.01)
SgdLearningParameters(learning_rate=default_values["learning_rate"]).compile_model(model)

model.summary()

The model is large

In [8]:
import keras

# We fix the number of epochs for now. Later we will add early stopping.
model.fit(x=train_dataloader, validation_data=validation_dataloader, epochs=default_values["epochs"], callbacks=[
    # To persist the history
    keras.callbacks.CSVLogger(f"{project_definition['name']}_train.csv", separator=",", append=True)
])

persist_model: bool = True
if persist_model:
    model.save(f'{project_definition["name"]}.keras')

Epoch 1/15


  outputs = tnn.conv2d(


[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 422ms/step - accuracy: 0.6532 - loss: 0.7563 - val_accuracy: 0.7582 - val_loss: 0.4942
Epoch 2/15
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 409ms/step - accuracy: 0.8377 - loss: 0.3721 - val_accuracy: 0.8522 - val_loss: 0.3544
Epoch 3/15
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 404ms/step - accuracy: 0.9114 - loss: 0.2438 - val_accuracy: 0.8110 - val_loss: 0.4838
Epoch 4/15
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 420ms/step - accuracy: 0.9476 - loss: 0.1265 - val_accuracy: 0.8870 - val_loss: 0.3297
Epoch 5/15
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 433ms/step - accuracy: 0.9773 - loss: 0.0599 - val_accuracy: 0.8775 - val_loss: 0.5366
Epoch 6/15
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 425ms/step - accuracy: 0.9858 - loss: 0.0445 - val_accuracy: 0.8659 - val_loss: 0.5196
Epoch 7/15
[1m119/11

2024-05-23 15:04:17.223745: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-23 15:04:17.424289: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
res = model.evaluate(test_dataloader)
print(f"Test accuracy is {res[1] * 100:.2f}% while loss is {res[0]}")

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 314ms/step - accuracy: 0.8891 - loss: 0.6539
Test accuracy is 88.18% while loss is 0.6591373085975647


## 2.4 - Results summary
> Test accuracy is 88.18% while loss is 0.6591373085975647

The model is clearly overfitting

In [9]:
from utils.data_processing import make_loss_graphs, make_loss_accuracy_graphs, add_tuner_iteration_to_data
import pandas

csv = pandas.read_csv(f"{project_definition['name']}_train.csv")
add_tuner_iteration_to_data(csv)

loss_graph = make_loss_graphs(csv)
acc_graph = make_loss_accuracy_graphs(csv)

loss_graph.update_layout(title="Loss vs Val_loss in tuner search per epoch (Val dashed)").show()
acc_graph.update_layout(title="Accuracy vs Val_Accuracy in tuner search per epoch (Val dashed)").show()

## 2.5 Comparison with the first DNN experiment

# 3 - Dataset Augmentation
To do data augmentation and overcome some keras bugs (if those really are bugs) we have to invert
the input shape on the data loaded by the ```torch.Dataloader```. Therefore we use the 
```AbstractAugmentationModel``` reported here. The reported class is stripped down to see augmentation in action, the real extended class in our experiments is ```TorchAugmentationModel```

In [10]:
from abc import ABC  # So that inverted augmentation wrapper stays an abstract class
from models.structure.augmentation_wrapper import AugmentationWrapperBase
import keras


class AbstractAugmentationModel(AugmentationWrapperBase, ABC):
    def make_augmentation(self, input_shape: (int, int, int)) -> tuple[keras.Layer, keras.Layer]:
        input_layer = keras.Input(shape=input_shape, name=self.__class__.__name__)
        x = keras.layers.Permute(dims=(2, 3, 1))(input_layer)  # Channels Last

        x = keras.layers.RandomFlip(mode="horizontal_and_vertical")(x)
        x = keras.layers.RandomRotation(0.3)(x)
        x = keras.layers.RandomBrightness(0.4, value_range=(0., 1.))(x)

        # Reset the shape so that the channels are first. 
        # We don't do it for this test as we need permuted images to see the augmentation in action
        # x = keras.layers.Permute(dims=(3, 2, 1))(x)
        return input_layer, x

## 3.1 - See augmentation in action
On some samples

In [11]:
import torch
import plotly.express as px
import numpy as np

aug_in, aug_out = AbstractAugmentationModel().make_augmentation((3, 224, 224))
aug_procedure = keras.Model(inputs=aug_in, outputs=aug_out)

VISUALIZE_SAMPLES: int = 8

image_list: list = []
for i in torch.rand(VISUALIZE_SAMPLES):
    image = train[int(i * len(test))][0]
    #image = torch.permute(image, (1, 2, 0))
    local_image = np.expand_dims(image, 0)

    # We add the original followed by the altered one
    image_list.append(torch.permute(image, (1, 2, 0)))
    image_list.append(np.squeeze(aug_procedure(local_image), 0))

image_list = [i.cpu() for i in image_list]
fig = px.imshow(np.array(image_list), facet_col=0, facet_col_wrap=4)

fig.update_layout(coloraxis_showscale=False, width=720, height=720, margin=dict(l=10, r=10, b=10, t=10))
fig.update_xaxes(showticklabels=False).update_yaxes(showticklabels=False)
fig.update_layout(width=720, height=720, margin=dict(l=10, r=10, b=10, t=10))

fig.show()

In [12]:
# Visualize 8 different versions of the image
VISUALIZE_SAMPLES: int = 7

image_list: list = []
image = train[int(torch.rand(1)[0] * len(test))][0]
image_list.append(torch.permute(image, (1, 2, 0)))
for i in range(VISUALIZE_SAMPLES):
    image_list.append(np.squeeze(aug_procedure(np.expand_dims(image, 0)), 0))

image_list = [i.cpu() for i in image_list]
fig = px.imshow(np.array(image_list), facet_col=0, facet_col_wrap=4)

fig.update_layout(coloraxis_showscale=False, width=720, height=720, margin=dict(l=10, r=10, b=10, t=10))
fig.update_xaxes(showticklabels=False).update_yaxes(showticklabels=False)
fig.update_layout(width=720, height=520, margin=dict(l=10, r=10, b=10, t=10))

fig.show()

The augmentation procedure is so now handled by Keras and is actually part of the model.

> From now on on all models we apply directly the image augmentation procedure.

# 4 - First model with image augmentation


## 4.1 - Model definition

In [13]:
project_definition: dict[str, any] = {"name": "hand_tailored_v1_augmented"}

In [9]:
from models.structure.augmentation_wrapper import TorchAugmentationModel


class AugmentedHandTailoredConvNet(TorchAugmentationModel):
    def make_layers(self, input_shape: (int, int, int)) -> tuple[Layer, Layer]:
        chan = self.data_format.value
        input_layer = Input(shape=input_shape, name=self.__class__.__name__)

        x = Conv2D(64, kernel_size=(3, 3), padding='same', data_format=chan, activation="relu")(input_layer)
        x = MaxPool2D(pool_size=(2, 2), data_format=chan)(x)

        x = Conv2D(filters=128, kernel_size=(3, 3), padding='same', data_format=chan, activation="relu")(x)
        x = MaxPool2D(pool_size=(2, 2), data_format=chan)(x)

        x = Flatten(data_format=chan)(x)
        x = Dense(units=128, activation="relu")(x)

        output_layer = Dense(units=1, activation="sigmoid")(x)
        return input_layer, output_layer

##  4.2 - Model definition and learning

In [32]:
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters

model_generator = AugmentedHandTailoredConvNet()
model_generator.load_dataset_mean_and_variance(mean, variance)

model = model_generator.make_model((3, 224, 224))
# Default Keras learning-rate Value (0.01)
SgdLearningParameters(learning_rate=default_values["learning_rate"]).compile_model(model)
model.summary(expand_nested=True)

In [33]:
import keras

# We fix the number of epochs for now. Later we will add early stopping.
model.fit(x=train_dataloader, validation_data=validation_dataloader, epochs=default_values["epochs"], callbacks=[
    # To persist the history
    keras.callbacks.CSVLogger(f"{project_definition['name']}_train.csv", separator=",", append=True)
])

persist_model: bool = True
if persist_model:
    model.save(f'{project_definition["name"]}.keras')

Epoch 1/15
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 439ms/step - accuracy: 0.6351 - loss: 0.6966 - val_accuracy: 0.7804 - val_loss: 0.4987
Epoch 2/15
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 450ms/step - accuracy: 0.7770 - loss: 0.4961 - val_accuracy: 0.7920 - val_loss: 0.4443
Epoch 3/15
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 456ms/step - accuracy: 0.8071 - loss: 0.4515 - val_accuracy: 0.8004 - val_loss: 0.4542
Epoch 4/15
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 453ms/step - accuracy: 0.8213 - loss: 0.4138 - val_accuracy: 0.8226 - val_loss: 0.4177
Epoch 5/15
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 453ms/step - accuracy: 0.8317 - loss: 0.3910 - val_accuracy: 0.8279 - val_loss: 0.4324
Epoch 6/15
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 429ms/step - accuracy: 0.8450 - loss: 0.3780 - val_accuracy: 0.8479 - val_loss: 0.4008
Epoch 7/15

Convergence is way slower (in fact the model has not stopped improving both on validation and training). <br/>
More epochs could further improve the model so we go on for other 5 epochs.

In [36]:
model.fit(x=train_dataloader, validation_data=validation_dataloader, epochs=5, callbacks=[
    # To persist the history
    keras.callbacks.CSVLogger(f"{project_definition['name']}_train.csv", separator=",", append=True)
])

Epoch 1/5
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 429ms/step - accuracy: 0.8824 - loss: 0.2841 - val_accuracy: 0.8522 - val_loss: 0.3718
Epoch 2/5
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 425ms/step - accuracy: 0.8872 - loss: 0.2641 - val_accuracy: 0.9018 - val_loss: 0.2337
Epoch 3/5
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 426ms/step - accuracy: 0.8958 - loss: 0.2508 - val_accuracy: 0.8659 - val_loss: 0.3244
Epoch 4/5
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 423ms/step - accuracy: 0.8890 - loss: 0.2684 - val_accuracy: 0.9166 - val_loss: 0.2163
Epoch 5/5
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 420ms/step - accuracy: 0.9062 - loss: 0.2447 - val_accuracy: 0.8574 - val_loss: 0.3366


<keras.src.callbacks.history.History at 0x7c36883c2ae0>

In [37]:
res = model.evaluate(test_dataloader)
print(f"Test accuracy is {res[1] * 100:.2f}% while loss is {res[0]}")

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 328ms/step - accuracy: 0.8568 - loss: 0.3590
Test accuracy is 85.90% while loss is 0.3554505407810211


Convergence was very slow and is not done.

## 4.3 - Results summary

In [45]:
from utils.data_processing import make_loss_graphs, make_loss_accuracy_graphs, add_tuner_iteration_to_data
import pandas

csv = pandas.read_csv(f"{project_definition['name']}_train.csv")
add_tuner_iteration_to_data(csv)

loss_graph = make_loss_graphs(csv)
acc_graph = make_loss_accuracy_graphs(csv)

loss_graph.update_layout(title="Loss vs Val_loss in tuner search per epoch (Val dashed)").show()
acc_graph.update_layout(title="Accuracy vs Val_Accuracy in tuner search per epoch (Val dashed)").show()

The model now seems to be slightly underfitting. The augmentation procedure could be a little too strong.

### 4.3.1 - Comparison with non augmented model
To better make a comparison we should watch K-fold CV means but nothing stops us from making some graphs:

In [49]:
csv

Unnamed: 0,epoch,accuracy,loss,val_accuracy,val_loss,tuner_iteration
0,0,0.736397,0.561055,0.758184,0.494196,0
1,1,0.852351,0.354081,0.852165,0.354391,0
2,2,0.913629,0.236874,0.810982,0.483826,0
3,3,0.950607,0.123949,0.887012,0.329699,0
4,4,0.977285,0.060433,0.877508,0.536639,0
5,5,0.987322,0.03894,0.865892,0.519587,0
6,6,0.988378,0.036287,0.856389,0.522463,0
7,7,0.989435,0.029062,0.865892,0.593255,0
8,8,0.994189,0.016204,0.873284,0.535449,0
9,9,1.0,0.002647,0.880676,0.643556,0


In [54]:
non_augmented_dataframe = pandas.read_csv(f"hand_tailored_v1_train.csv")
augmented_dataframe = pandas.read_csv(f"{project_definition['name']}_train.csv")

non_augmented_dataframe["tuner_iteration"] = "non-augmented"
augmented_dataframe["tuner_iteration"] = "augmented"

csv = pandas.concat([non_augmented_dataframe, augmented_dataframe])
loss_graph = make_loss_graphs(csv)
acc_graph = make_loss_accuracy_graphs(csv)

loss_graph.update_layout(title="Loss vs Val_loss in tuner search per epoch (Val dashed)").show()
acc_graph.update_layout(title="Accuracy vs Val_Accuracy in tuner search per epoch (Val dashed)").show()

# 5 - Second model: less parameters and simpler

## 5.1 - Model definition

In [39]:
project_definition: dict[str, any] = {"name": "hand_tailored_v2"}

In [1]:
from keras.layers import Conv2D, MaxPool2D, Input, Flatten, Dense, Layer
from models.structure.augmentation_wrapper import TorchAugmentationModel


# todo move to a class 

class SmallerHandTailoredConvNet(TorchAugmentationModel):
    def make_layers(self, input_shape: (int, int, int)) -> tuple[Layer, Layer]:
        chan = self.data_format.value
        input_layer = Input(shape=input_shape, name=self.__class__.__name__)

        x = Conv2D(64, kernel_size=(5, 5), padding='same', data_format=chan, activation="relu")(input_layer)
        x = MaxPool2D(pool_size=(2, 2), data_format=chan)(x)

        x = Conv2D(filters=64, kernel_size=(3, 3), padding='same', data_format=chan, activation="relu")(x)
        x = MaxPool2D(pool_size=(2, 2), data_format=chan)(x)

        x = Flatten(data_format=chan)(x)
        x = Dense(units=128, activation="relu")(x)

        output_layer = Dense(units=1, activation="sigmoid")(x)
        return input_layer, output_layer

## 5.2 - Model instance and learning

In [41]:
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters

model_generator = SmallerHandTailoredConvNet()
model_generator.load_dataset_mean_and_variance(mean, variance)

model = model_generator.make_model((3, 224, 224))

# Default Keras learning-rate Value (0.01)
SgdLearningParameters(learning_rate=default_values["learning_rate"]).compile_model(model)

model.summary()

It has less parameters so it should have a harder time learning the samples of the training set.<br />
For starters we won't be using the augmentation procedure, then we will apply one but way simpler than the one before.

In [42]:
import keras

# We fix the number of epochs for now. Later we will add early stopping.
model.fit(x=train_dataloader, validation_data=validation_dataloader, epochs=default_values["epochs"], callbacks=[
    # To persist the history
    keras.callbacks.CSVLogger(f"{project_definition['name']}_train.csv", separator=",", append=True)
])

persist_model: bool = True
if persist_model:
    model.save(f'{project_definition["name"]}.keras')

Epoch 1/15
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 422ms/step - accuracy: 0.6449 - loss: 0.6525 - val_accuracy: 0.7381 - val_loss: 0.5641
Epoch 2/15
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 413ms/step - accuracy: 0.7553 - loss: 0.5256 - val_accuracy: 0.7994 - val_loss: 0.4688
Epoch 3/15
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 410ms/step - accuracy: 0.7721 - loss: 0.4969 - val_accuracy: 0.8163 - val_loss: 0.4370
Epoch 4/15
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 411ms/step - accuracy: 0.8049 - loss: 0.4443 - val_accuracy: 0.8046 - val_loss: 0.5375
Epoch 5/15
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 412ms/step - accuracy: 0.8039 - loss: 0.4530 - val_accuracy: 0.8490 - val_loss: 0.3755
Epoch 6/15
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 411ms/step - accuracy: 0.8349 - loss: 0.4011 - val_accuracy: 0.8226 - val_loss: 0.4339
Epoch 7/15

In [43]:
res = model.evaluate(test_dataloader)
print(f"Test accuracy is {res[1] * 100:.2f}% while loss is {res[0]}")

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 310ms/step - accuracy: 0.8902 - loss: 0.2512
Test accuracy is 89.61% while loss is 0.24825835227966309


## 5.3 - Results summary

In [44]:
from utils.data_processing import make_loss_graphs, make_loss_accuracy_graphs, add_tuner_iteration_to_data
import pandas

csv = pandas.read_csv(f"{project_definition['name']}_train.csv")
add_tuner_iteration_to_data(csv)

loss_graph = make_loss_graphs(csv)
acc_graph = make_loss_accuracy_graphs(csv)

loss_graph.update_layout(title="Loss vs Val_loss in tuner search per epoch (Val dashed)").show()
acc_graph.update_layout(title="Accuracy vs Val_Accuracy in tuner search per epoch (Val dashed)").show()

# 6 - Regularization
Is the process of managing the training process in order to avoid overfitting (which is the case in our base model).
Image augmentation is considered a regularization technique.

## 6.1 - Early Stopping
Allows to stop training when there is not notable increase in performance on the validation set given.
This of course requires to partition further to keep a validation fold at all times (not only hyperparameters tuning).

We already kept a small part of the data outside of the training set to measure the learning of our models, to apply
early-stopping we simply have to add a callback to the fit method (```EarlyStopping```)

> More can be found here: https://scikit-learn.org/stable/auto_examples/linear_model/plot_sgd_early_stopping.html

In [None]:
# From now on we use Early stopping to avoid overfitting and better selecting a model.
# We already had a validation split, to better evaluate the learning process, so we better make use of it

### 6.1 - First model via Early Stopping

In [None]:
# TODO


## 6.2 - Dropout

Simple yet effective way of reducing overfitting, these are simply layers of the neural network that put some random inputs to 0 during training.
> This technique was found
to improve the performance of neural nets in a wide variety of application domains includ-
ing object classification, digit recognition, speech recognition, document classification and
analysis of computational biology data. This suggests that dropout is a general technique
and is not specific to any domain.
> 
> ~ https://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf 

In [None]:
# We could try to improve model 1 via dropout. Do we want to? yes todo

In [None]:
# todo rewrite

# 7 - Learning parameters fine tuning
While the structure of the network can also be learnt (as in the dedicated notebook) we also try to fine tune
the learning parameters for the best of our "handcrafted" models yet. The hyperparameters we are interested in are:
- SGD related:
    - learning_rate
    - momentumt
- epochs -> We use early stopping therefore we can avoid tuning it.
- batch size

In [2]:
project_definition: dict[str, any] = {"name": "hand_tailored_v1_hp_tune"}

## 7.1 - Hyperparameters space definition

In [3]:
from keras_tuner import HyperParameters

hyperparameters = HyperParameters()

# There is no real need to define them here as the Hypermodel defines them directly.
# The real definition is hardwired in the TunableLearningParameters / TunableModelWrapper
hyperparameters.Float(name="lr", min_value=1e-5, max_value=1e-2, sampling='log', step=2)
hyperparameters.Choice(name=f"batch_size", values=[8, 16, 32, 64])
hyperparameters.Float(name="momentum", min_value=0.5, max_value=1, step=0.05)

0.5

## 7.2 - Tuning Process

In [4]:
# Define the model wrapper
model_family = SmallerHandTailoredConvNet()
model_family.load_dataset_mean_and_variance(mean, variance)

NameError: name 'mean' is not defined

In [7]:
from utils.my_tuner import HistoryDeletingBayesianOptimization
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParametersTunable
from models.structure.tunable_hypermodel import TunableHyperModel

sgd_learning_parameters = SgdLearningParametersTunable(learning_rate=default_values["learning_rate"])
hypermodel = TunableHyperModel(model_family, sgd_learning_parameters, (3, 224, 224), tune_batch=True)

tuner = HistoryDeletingBayesianOptimization(
    hypermodel,
    hyperparameters=hyperparameters,
    objective='val_loss',
    tune_new_entries=True,
    overwrite=False,
    directory="tuned_models",
    project_name=project_definition["name"],
    max_trials=15  # We do 15 now, 15 later and 15 later again. I split it so i can resume later
)

Reloading Tuner from tuned_models/hand_tailored_v1_hp_tune/tuner0.json


In [14]:
import keras
import callbacks.threshold_stop_cb

tuner.search(train_dataloader, epochs=default_values["epochs"], validation_data=validation_dataloader, callbacks=[
    keras.callbacks.CSVLogger(f"{project_definition['name']}_train.csv", separator=",", append=True),
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='min'),

    callbacks.threshold_stop_cb.ThresholdStopCallback(1, 2),
    callbacks.threshold_stop_cb.ThresholdStopCallback(0.6, 4),
])

Trial 15 Complete [00h 13m 56s]
val_loss: 0.29568028450012207

Best val_loss So Far: 0.26821333169937134
Total elapsed time: 02h 51m 49s


## 7.3 - Results

In [8]:
tuner.results_summary(5)

Results summary
Results in tuned_models/hand_tailored_v1_hp_tune
Showing 5 best trials
Objective(name="val_loss", direction="min")

Trial 06 summary
Hyperparameters:
lr: 0.00512
batch_size: 32
momentum: 0.9
Score: 0.26821333169937134

Trial 13 summary
Hyperparameters:
lr: 0.00512
batch_size: 64
momentum: 0.95
Score: 0.2830537259578705

Trial 12 summary
Hyperparameters:
lr: 0.00512
batch_size: 8
momentum: 0.5
Score: 0.2952437698841095

Trial 14 summary
Hyperparameters:
lr: 0.00512
batch_size: 8
momentum: 0.65
Score: 0.29568028450012207

Trial 11 summary
Hyperparameters:
lr: 0.00512
batch_size: 8
momentum: 0.95
Score: 0.319545179605484


# 8 - K-fold CV to evaluate final model

In [2]:
k = 5

In [3]:
best_learning_parameters = {"lr": 0.00512, "momentum": 0.9, "batch_size": 32}

## 8.1 - Dataset loading

In [27]:
from torch.utils.data import DataLoader, ConcatDataset
from dataset.k_fold_dataset_wrapper import KFoldDatasetWrapper

from dataset.dataset_loader import dataset_loader

train, test = dataset_loader((224, 224))
full_dataset = ConcatDataset([train, test])

k_fold_manager = KFoldDatasetWrapper(k)
k_fold_manager.load_data(full_dataset)

## 8.2 - K-fold CV procedure

In [28]:
# todo per il primo riporterei lagoritmo cosi non devi leggerlo dalla classe?

In [29]:
import dataset
import importlib

importlib.reload(dataset.k_fold_dataset_wrapper)

<module 'dataset.k_fold_dataset_wrapper' from '/home/jacopo/PycharmProjects/muffin-stat-project/dataset/k_fold_dataset_wrapper.py'>

In [30]:
from models.zero_one_validation_loss import ZeroOneLoss
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters

model_generator = SmallerHandTailoredConvNet()

learning_parameters = SgdLearningParameters(learning_rate=best_learning_parameters["lr"],
                                            metrics=["accuracy", ZeroOneLoss()])
learning_parameters.momentum = best_learning_parameters["momentum"]

In [31]:
results, test_fold_sizes = k_fold_manager.run_k_fold_cv(learning_parameters, model_generator, (3, 224, 224),
                                                  batch_size=best_learning_parameters["batch_size"])

Starting procedure for fold 0
I am calculating mean and variance of train dataset (without split 0!
Epoch 1/80
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 403ms/step - accuracy: 0.6274 - loss: 0.6531 - total_0-1_loss: 712.0381 - val_accuracy: 0.7225 - val_loss: 0.5518 - val_total_0-1_loss: 164.0000
Epoch 2/80
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 401ms/step - accuracy: 0.7888 - loss: 0.4744 - total_0-1_loss: 454.8702 - val_accuracy: 0.6582 - val_loss: 0.6228 - val_total_0-1_loss: 202.0000
Epoch 3/80
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 401ms/step - accuracy: 0.7903 - loss: 0.4551 - total_0-1_loss: 432.1145 - val_accuracy: 0.7800 - val_loss: 0.4934 - val_total_0-1_loss: 130.0000
Epoch 4/80
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 410ms/step - accuracy: 0.8189 - loss: 0.4139 - total_0-1_loss: 370.4656 - val_accuracy: 0.8156 - val_loss: 0.4516 - val_total_0-1_loss: 109.0000
Epoch 5/

In [47]:
import numpy as np
import pandas as pd

dataframe = pd.DataFrame(results)
dataframe.columns = ["loss", "accuracy", "0-1-loss"]

# To measure 0-1 loss correctly
dataframe["test_size"] = test_fold_sizes  
dataframe["0-1-scaled-loss"] = dataframe["0-1-loss"] / dataframe["test_size"]

In [48]:
dataframe

Unnamed: 0,loss,accuracy,0-1-loss,test_size,0-1-scaled-loss
0,0.271387,0.896959,122.0,1184,0.103041
1,0.238102,0.901182,117.0,1184,0.098818
2,0.272938,0.886729,134.0,1183,0.113271
3,0.308198,0.876585,146.0,1183,0.123415
4,0.226818,0.903635,114.0,1183,0.096365


In [51]:
np.average(dataframe[["loss", "accuracy", "0-1-scaled-loss"]], axis=0)

array([0.26348876, 0.89301807, 0.10698193])

In [None]:
dataframe.to_csv(f"{project_definition["name"]}_k_fold_cv.csv", index=False)

## 8.3 - K fold on first Model


In [13]:
project_definition: dict[str, any] = {"name": "hand_tailored_v1_augmented"}

In [7]:
from torch.utils.data import DataLoader, ConcatDataset
from dataset.k_fold_dataset_wrapper import KFoldDatasetWrapper

from dataset.dataset_loader import dataset_loader

train, test = dataset_loader((224, 224))
full_dataset = ConcatDataset([train, test])

k_fold_manager = KFoldDatasetWrapper(k)
k_fold_manager.load_data(full_dataset)

In [8]:
from models.zero_one_validation_loss import ZeroOneLoss
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters
from models.conv_network.hand_tailored_conv_net import AugmentedHandTailoredConvNet

model_generator = AugmentedHandTailoredConvNet()

learning_parameters = SgdLearningParameters(learning_rate=best_learning_parameters["lr"],
                                            metrics=["accuracy", ZeroOneLoss()])
learning_parameters.momentum = best_learning_parameters["momentum"]

In [9]:
results, test_fold_sizes = k_fold_manager.run_k_fold_cv(learning_parameters, model_generator, (3, 224, 224),
                                                        batch_size=best_learning_parameters["batch_size"])

Starting procedure for fold 0
I am calculating mean and variance of train dataset (without split 0!
Epoch 1/80
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 420ms/step - accuracy: 0.6254 - loss: 0.6758 - total_0-1_loss: 724.3740 - val_accuracy: 0.7902 - val_loss: 0.4941 - val_total_0-1_loss: 124.0000
Epoch 2/80
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 417ms/step - accuracy: 0.7817 - loss: 0.4899 - total_0-1_loss: 459.8549 - val_accuracy: 0.8122 - val_loss: 0.4177 - val_total_0-1_loss: 111.0000
Epoch 3/80
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 406ms/step - accuracy: 0.8027 - loss: 0.4455 - total_0-1_loss: 413.0382 - val_accuracy: 0.8037 - val_loss: 0.4775 - val_total_0-1_loss: 116.0000
Epoch 4/80
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 398ms/step - accuracy: 0.8310 - loss: 0.4004 - total_0-1_loss: 360.5725 - val_accuracy: 0.7817 - val_loss: 0.5199 - val_total_0-1_loss: 129.0000
Epoch 5/

In [10]:
import numpy as np
import pandas as pd

dataframe = pd.DataFrame(results)
dataframe.columns = ["loss", "accuracy", "0-1-loss"]

# To measure 0-1 loss correctly
dataframe["test_size"] = test_fold_sizes
dataframe["0-1-scaled-loss"] = dataframe["0-1-loss"] / dataframe["test_size"]

In [11]:
dataframe

Unnamed: 0,loss,accuracy,0-1-loss,test_size,0-1-scaled-loss
0,0.296302,0.866554,158.0,1184,0.133446
1,0.238041,0.902872,115.0,1184,0.097128
2,0.299641,0.870668,153.0,1183,0.129332
3,0.286732,0.893491,126.0,1183,0.106509
4,0.268716,0.880812,141.0,1183,0.119189


In [12]:
np.average(dataframe[["loss", "accuracy", "0-1-scaled-loss"]], axis=0)

array([0.2778863 , 0.88287923, 0.11712078])

In [14]:
dataframe.to_csv(f"{project_definition["name"]}_k_fold_cv.csv", index=False)