In [1]:
default_values = {"batch_size": 32, "epochs": 10, "learning_rate": 1e-2}

We want to study a good structure for Conv Networks. We will so follow common rules of thumb like:
- The number of filter may be mixed in increasing order to better match more complex patterns in the images
- A (3x3) kernel generally performs well
- CNN benefit from having a multiple succession of layers

For starters the epochs will be set to 20 and later studied as hyperparameter or regularized by Early Stopping.


We will also be using BatchNormalization as proposed by the paper:


# 1 - Load data
Total splitting of data will be [64%, 16%, 20%] (If we consider test and train to be the full set) <br />
Best practices suggest to go for a [70%, 15%, 15%] splitting but we will just keep it this way.

In [2]:
import models.structure.base_model_wrapper
from dataset.k_fold_dataset_wrapper import KFoldDatasetWrapper
from torch.utils.data import DataLoader
from dataset.dataset_loader import dataset_loader, dataset_information

train, test = dataset_loader((224, 224))

# We take 20% of train as validation. 
dataset_split_controller = KFoldDatasetWrapper(5)
dataset_split_controller.load_data(train)

local_train, validation = dataset_split_controller.get_data_for_fold(0)

train_dataloader = DataLoader(dataset=local_train, batch_size=default_values["batch_size"], shuffle=True)
validation_dataloader = DataLoader(dataset=validation, batch_size=default_values["batch_size"], shuffle=True)
test_dataloader = DataLoader(dataset=test, batch_size=default_values["batch_size"], shuffle=True)

In [3]:
mean, variance = dataset_information(local_train, (224, 224))
measures = {"mean": mean, "variance": variance}

In [4]:
measures

{'mean': tensor([0.6508, 0.5944, 0.5403]),
 'variance': tensor([0.0874, 0.0903, 0.1033])}

# 2 - First model
Our first model is a simple CNN. <br><br />


## 2.1 - Model definition 

In [5]:
project_definition: dict[str, any] = {"name": "hand_tailored_v1_normalized"}

In [5]:
from models.structure.augmentation_wrapper import NormalizedModelWrapper
from keras.layers import Conv2D, MaxPool2D, Input, Flatten, Dense, Layer
from models.structure.base_model_wrapper import BaseModelWrapper


class HandTailoredConvNet(NormalizedModelWrapper):
    def make_layers(self, input_shape: (int, int, int)) -> tuple[Layer, Layer]:
        chan = self.data_format.value
        input_layer = Input(shape=input_shape, name=self.__class__.__name__)

        x = Conv2D(64, kernel_size=(3, 3), padding='same', data_format=chan, activation="relu")(input_layer)
        x = MaxPool2D(pool_size=(2, 2), data_format=chan)(x)

        x = Conv2D(filters=128, kernel_size=(3, 3), padding='same', data_format=chan, activation="relu")(x)
        x = MaxPool2D(pool_size=(2, 2), data_format=chan)(x)
        
        x = Flatten(data_format=chan)(x)
        x = Dense(units=128, activation="relu")(x)

        output_layer = Dense(units=1, activation="sigmoid")(x)
        return input_layer, output_layer

## 2.2 - Model instance and learning

In [7]:
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters

model_generator = HandTailoredConvNet()
model_generator.load_dataset_means_and_stds(mean, variance)

model = model_generator.make_model((3, 224, 224))
# Default Keras learning-rate Value (0.01)
SgdLearningParameters(learning_rate=1e-2).compile_model(model)

model.summary()

In [8]:
import keras

# We fix the number of epochs for now. Later we will add early stopping.
model.fit(x=train_dataloader, validation_data=validation_dataloader, epochs=default_values["epochs"], callbacks=[
    # To persist the history
    keras.callbacks.CSVLogger(f"{project_definition['name']}_train.csv", separator=",", append=True)
])

persist_model: bool = True
if persist_model:
    model.save(f'{project_definition["name"]}.keras')

Epoch 1/10
[1m  1/119[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:12[0m 618ms/step - accuracy: 0.4688 - loss: 0.6937

  outputs = tnn.conv2d(


[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 399ms/step - accuracy: 0.6525 - loss: 0.7357 - val_accuracy: 0.8427 - val_loss: 0.3654
Epoch 2/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 396ms/step - accuracy: 0.8612 - loss: 0.3440 - val_accuracy: 0.8363 - val_loss: 0.3736
Epoch 3/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 405ms/step - accuracy: 0.9185 - loss: 0.2179 - val_accuracy: 0.8490 - val_loss: 0.3724
Epoch 4/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 392ms/step - accuracy: 0.9630 - loss: 0.1087 - val_accuracy: 0.8574 - val_loss: 0.3936
Epoch 5/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 390ms/step - accuracy: 0.9770 - loss: 0.0594 - val_accuracy: 0.8458 - val_loss: 0.5161
Epoch 6/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 395ms/step - accuracy: 0.9822 - loss: 0.0500 - val_accuracy: 0.8543 - val_loss: 0.4497
Epoch 7/10
[1m119/11

2024-05-22 21:33:51.828262: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-22 21:33:52.026037: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
res = model.evaluate(test_dataloader)
print(f"Test accuracy is {res[1] * 100:.2f}% while loss is {res[0]}")

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 314ms/step - accuracy: 0.8891 - loss: 0.6539
Test accuracy is 88.18% while loss is 0.6591373085975647


## 2.4 - Results summary
> Test accuracy is 88.18% while loss is 0.6591373085975647

The model is clearly overfitting

In [10]:
from utils.data_processing import make_loss_graphs, make_loss_accuracy_graphs, add_tuner_iteration_to_data
import pandas

csv = pandas.read_csv(f"{project_definition['name']}_train.csv")
add_tuner_iteration_to_data(csv)

loss_graph = make_loss_graphs(csv)
acc_graph = make_loss_accuracy_graphs(csv)

loss_graph.update_layout(title="Loss vs Val_loss in tuner search per epoch (Val dashed)").show()
acc_graph.update_layout(title="Accuracy vs Val_Accuracy in tuner search per epoch (Val dashed)").show()

## 2.5 Comparison with the first DNN experiment

# 3 - Dataset Augmentation
To do data augmentation and overcome some keras bugs (if those really are bugs) we have to invert
the input shape on the data loaded by the ```torch.Dataloader```. Therefore we use the 
```InvertedChannelsAugmentationWrapper```

In [12]:
from abc import ABC  # So that inverted augmentation wrapper stays an abstract class
from models.structure.augmentation_wrapper import InvertedChannelsAugmentationWrapper
import keras

# todo rewrite
# We report the Augmentation Class
class CustomInvertedAugmentationWrapper(InvertedChannelsAugmentationWrapper, ABC):
    def make_augmentation(self, input_shape: (int, int, int)) -> tuple[keras.Layer, keras.Layer]:
        input_layer = keras.Input(shape=input_shape, name=self.__class__.__name__)
        x = keras.layers.Permute(dims=(2, 3, 1))(input_layer)  # Channels Last

        x = keras.layers.RandomFlip(mode="horizontal_and_vertical")(x)
        x = keras.layers.RandomRotation(0.3)(x)
        # Input is already normalized in [0,1]
        x = keras.layers.RandomBrightness(0.4, value_range=(0., 1.))(x)

        return input_layer, x

## 3.1 - See augmentation in action
On some samples

In [13]:
import torch
import plotly.express as px
import numpy as np

aug_in, aug_out = CustomInvertedAugmentationWrapper().make_augmentation((3, 224, 224))
aug_procedure = keras.Model(inputs=aug_in, outputs=aug_out)

VISUALIZE_SAMPLES: int = 8

image_list: list = []
for i in torch.rand(VISUALIZE_SAMPLES):
    image = train[int(i * len(test))][0]
    #image = torch.permute(image, (1, 2, 0))
    local_image = np.expand_dims(image, 0)

    # We add the original followed by the altered one
    image_list.append(torch.permute(image, (1, 2, 0)))
    image_list.append(np.squeeze(aug_procedure(local_image), 0))

image_list = [i.cpu() for i in image_list]
fig = px.imshow(np.array(image_list), facet_col=0, facet_col_wrap=4)

fig.update_layout(coloraxis_showscale=False, width=720, height=720, margin=dict(l=10, r=10, b=10, t=10))
fig.update_xaxes(showticklabels=False).update_yaxes(showticklabels=False)
fig.update_layout(width=720, height=720, margin=dict(l=10, r=10, b=10, t=10))

fig.show()

In [14]:
# Visualize 8 different versions of the image
VISUALIZE_SAMPLES: int = 7

image_list: list = []
image = train[int(torch.rand(1)[0] * len(test))][0]
image_list.append(torch.permute(image, (1, 2, 0)))
for i in range(VISUALIZE_SAMPLES):
    image_list.append(np.squeeze(aug_procedure(np.expand_dims(image, 0)), 0))

image_list = [i.cpu() for i in image_list]
fig = px.imshow(np.array(image_list), facet_col=0, facet_col_wrap=4)

fig.update_layout(coloraxis_showscale=False, width=720, height=720, margin=dict(l=10, r=10, b=10, t=10))
fig.update_xaxes(showticklabels=False).update_yaxes(showticklabels=False)
fig.update_layout(width=720, height=520, margin=dict(l=10, r=10, b=10, t=10))

fig.show()

The augmentation procedure is so now handled by Keras and is actually part of the model.

# 4 - First model with image augmentation


## 4.1 - Model definition

In [6]:
project_definition: dict[str, any] = {"name": "hand_tailored_v1_aug_normalized"}

In [7]:
from models.structure.augmentation_wrapper import NormalizedInvertedAugmentation, TorchAugmentationModel


class AugmentedHandTailoredConvNet(TorchAugmentationModel):
    def make_layers(self, input_shape: (int, int, int)) -> tuple[Layer, Layer]:
        chan = self.data_format.value
        input_layer = Input(shape=input_shape, name=self.__class__.__name__)
    
        x = Conv2D(64, kernel_size=(3, 3), padding='same', data_format=chan, activation="relu")(input_layer)
        x = MaxPool2D(pool_size=(2, 2), data_format=chan)(x)
    
        x = Conv2D(filters=128, kernel_size=(3, 3), padding='same', data_format=chan, activation="relu")(x)
        x = MaxPool2D(pool_size=(2, 2), data_format=chan)(x)
    
        x = Flatten(data_format=chan)(x)
        x = Dense(units=128, activation="relu")(x)
    
        output_layer = Dense(units=1, activation="sigmoid")(x)
        return input_layer, output_layer

##  4.2 - Model definition and learning

In [8]:
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters

model_generator = AugmentedHandTailoredConvNet()
model_generator.load_dataset_means_and_stds(mean, variance)

model = model_generator.make_model((3, 224, 224))
# Default Keras learning-rate Value (0.01)
SgdLearningParameters(learning_rate=1e-2).compile_model(model)
model.summary(expand_nested=True)

In [9]:
import keras

# We fix the number of epochs for now. Later we will add early stopping.
model.fit(x=train_dataloader, validation_data=validation_dataloader, epochs=default_values["epochs"], callbacks=[
    # To persist the history
    keras.callbacks.CSVLogger(f"{project_definition['name']}_train.csv", separator=",", append=True)
])

persist_model: bool = True
if persist_model:
    model.save(f'{project_definition["name"]}.keras')

Epoch 1/10
[1m  1/119[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:16[0m 650ms/step - accuracy: 0.5312 - loss: 0.6982

  outputs = tnn.conv2d(


[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 428ms/step - accuracy: 0.6187 - loss: 0.7589 - val_accuracy: 0.7360 - val_loss: 0.5625
Epoch 2/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 437ms/step - accuracy: 0.7636 - loss: 0.5234 - val_accuracy: 0.8004 - val_loss: 0.4583
Epoch 3/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 432ms/step - accuracy: 0.7699 - loss: 0.4958 - val_accuracy: 0.8226 - val_loss: 0.4332
Epoch 4/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 425ms/step - accuracy: 0.7983 - loss: 0.4491 - val_accuracy: 0.8068 - val_loss: 0.4425
Epoch 5/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 435ms/step - accuracy: 0.8263 - loss: 0.4153 - val_accuracy: 0.8384 - val_loss: 0.3928
Epoch 6/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 429ms/step - accuracy: 0.8335 - loss: 0.3817 - val_accuracy: 0.8606 - val_loss: 0.3463
Epoch 7/10
[1m119/11

2024-05-22 22:44:14.152101: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-22 22:44:14.173260: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Convergence is way slower (in fact the model has not stopped improving both on validation and training). <br/>
More epochs could further improve the model so we go on for other 10 epochs.

In [None]:
model.fit(x=train_dataloader, validation_data=validation_dataloader, epochs=10, callbacks=[
    # To persist the history
    keras.callbacks.CSVLogger(f"{project_definition['name']}_train.csv", separator=",", append=True)
])

In [10]:
res = model.evaluate(test_dataloader)
print(f"Test accuracy is {res[1] * 100:.2f}% while loss is {res[0]}")

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 326ms/step - accuracy: 0.8646 - loss: 0.3417
Test accuracy is 85.98% while loss is 0.33995598554611206


Convergence was very slow and is not done.

## 4.3 - Results summary

In [None]:
from utils.data_processing import make_loss_graphs, make_loss_accuracy_graphs, add_tuner_iteration_to_data
import pandas

csv = pandas.read_csv(f"{project_definition['name']}_train.csv")
add_tuner_iteration_to_data(csv)

loss_graph = make_loss_graphs(csv)
acc_graph = make_loss_accuracy_graphs(csv)

loss_graph.update_layout(title="Loss vs Val_loss in tuner search per epoch (Val dashed)").show()
acc_graph.update_layout(title="Accuracy vs Val_Accuracy in tuner search per epoch (Val dashed)").show()

The model now seems to be slightly underfitting. The augmentation procedure could be a little too strong.

# 5 - Secondo model: less parameters and simpler

## 5.1 - Model definition

In [None]:
project_definition: dict[str, any] = {"name": "hand_tailored_v2"}

In [None]:
from keras.src.layers import BatchNormalization
from keras.layers import Conv2D, MaxPool2D, Input, Flatten, Dense, Layer
from models.structure.base_model_wrapper import BaseModelWrapper


class SmallerHandTailoredConvNet(BaseModelWrapper):
    def make_layers(self, input_shape: (int, int, int)) -> tuple[Layer, Layer]:
        chan = self.data_format.value
        input_layer = Input(shape=input_shape, name=self.__class__.__name__)

        x = Conv2D(64, kernel_size=(5, 5), padding='same', data_format=chan, activation="relu")(input_layer)
        x = MaxPool2D(pool_size=(2, 2), data_format=chan)(x)

        x = Conv2D(filters=64, kernel_size=(3, 3), padding='same', data_format=chan, activation="relu")(x)
        x = MaxPool2D(pool_size=(2, 2), data_format=chan)(x)

        x = Flatten(data_format=chan)(x)
        x = Dense(units=128, activation="relu")(x)

        output_layer = Dense(units=1, activation="sigmoid")(x)
        return input_layer, output_layer

## 5.2 - Model instance and learning

In [None]:
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters

model = SmallerHandTailoredConvNet().make_model((3, 224, 224))
# Default Keras learning-rate Value (0.01)
SgdLearningParameters(learning_rate=1e-2).compile_model(model)

model.summary()

It has less parameters so it should have a harder time learning the samples of the training set.<br />
For starters we won't be using the augmentation procedure, then we will apply one but way simpler than the one before.

In [None]:
import keras

# We fix the number of epochs for now. Later we will add early stopping.
model.fit(x=train_dataloader, validation_data=validation_dataloader, epochs=default_values["epochs"], callbacks=[
    # To persist the history
    keras.callbacks.CSVLogger(f"{project_definition['name']}_train.csv", separator=",", append=True)
])

persist_model: bool = True
if persist_model:
    model.save(f'{project_definition["name"]}.keras')

In [None]:
res = model.evaluate(test_dataloader)
print(f"Test accuracy is {res[1] * 100:.2f}% while loss is {res[0]}")

## 5.3 - Results summary

In [None]:
from utils.data_processing import make_loss_graphs, make_loss_accuracy_graphs, add_tuner_iteration_to_data
import pandas

csv = pandas.read_csv(f"{project_definition['name']}_train.csv")
add_tuner_iteration_to_data(csv)

loss_graph = make_loss_graphs(csv)
acc_graph = make_loss_accuracy_graphs(csv)

loss_graph.update_layout(title="Loss vs Val_loss in tuner search per epoch (Val dashed)").show()
acc_graph.update_layout(title="Accuracy vs Val_Accuracy in tuner search per epoch (Val dashed)").show()

# 6 - Second model with image augmentation
## 6.1 - Model definition

In [None]:
project_definition: dict[str, any] = {"name": "hand_tailored_v2_aug"}

In [None]:
class AugmentedHandTailoredConvNet(SmallerHandTailoredConvNet, CustomInvertedAugmentationWrapper):
    pass

## 6.2 - Model instance and learning

In [None]:
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters

model = AugmentedHandTailoredConvNet().make_model((3, 224, 224))
# Default Keras learning-rate Value (0.01)
SgdLearningParameters(learning_rate=1e-2).compile_model(model)

model.summary()

In [None]:
import keras

# We fix the number of epochs for now. Later we will add early stopping.
model.fit(x=train_dataloader, validation_data=validation_dataloader, epochs=default_values["epochs"], callbacks=[
    # To persist the history
    keras.callbacks.CSVLogger(f"{project_definition['name']}_train.csv", separator=",", append=True)
])

persist_model: bool = True
if persist_model:
    model.save(f'{project_definition["name"]}.keras')

In [None]:
res = model.evaluate(test_dataloader)
print(f"Test accuracy is {res[1] * 100:.2f}% while loss is {res[0]}")

## 6.3 - Results summary

In [None]:
from utils.data_processing import make_loss_graphs, make_loss_accuracy_graphs, add_tuner_iteration_to_data
import pandas

csv = pandas.read_csv(f"{project_definition['name']}_train.csv")
add_tuner_iteration_to_data(csv)

loss_graph = make_loss_graphs(csv)
acc_graph = make_loss_accuracy_graphs(csv)

loss_graph.update_layout(title="Loss vs Val_loss in tuner search per epoch (Val dashed)").show()
acc_graph.update_layout(title="Accuracy vs Val_Accuracy in tuner search per epoch (Val dashed)").show()

# 7 - Learning parameters fine tuning
While the structure of the network can also be learnt (as in the dedicated notebook) we also try to fine tune
the learning parameters for the best of our "handcrafted" models yet. The hyperparameters we are interested in are:
- SGD related:
    - learning_rate
    - momentumt
- epochs
- batch size

# 8 - K-fold CV to evaluate final model