In [19]:
default_values = {"batch_size": 32, "epochs": 15, "learning_rate": 1e-2}

We want start by creating a custom simple DNN.

# 1 - Load data
Total splitting of data will be [64%, 16%, 20%] (If we consider test and train to be the full set) <br />
Best practices suggest to go for a [70%, 15%, 15%] splitting but we will just keep it this way.

In [20]:
import torch

from dataset.k_fold_dataset_wrapper import KFoldDatasetWrapper
from torch.utils.data import DataLoader
from dataset.dataset_loader import dataset_loader, dataset_information

train, test = dataset_loader((224, 224))
# Optionally fix the generator for reproducible results, e.g.: torch.Generator().manual_seed(42)
train, validation = torch.utils.data.random_split(train, [0.875, 0.125])

train_dataloader = DataLoader(dataset=train, batch_size=default_values["batch_size"], shuffle=True)
validation_dataloader = DataLoader(dataset=validation, batch_size=default_values["batch_size"], shuffle=True)
test_dataloader = DataLoader(dataset=test, batch_size=default_values["batch_size"], shuffle=True)

In [21]:
mean, variance = dataset_information(train, (224, 224))
measures = {"mean": mean, "variance": variance}

In [22]:
measures

{'mean': tensor([0.6489, 0.5926, 0.5387]),
 'variance': tensor([0.0871, 0.0903, 0.1029])}

# 2 - First model
Our first model is a simple CNN. <br><br />


## 2.1 - Model definition 

In [3]:
project_definition: dict[str, any] = {"name": "hand_tailored_v1"}

In [8]:
from models.structure.augmentation_wrapper import NormalizedModelWrapper
from keras.layers import Conv2D, MaxPool2D, Input, Flatten, Dense, Layer


class HandTailoredDeepNet(NormalizedModelWrapper):
    def make_layers(self, input_shape: (int, int, int)) -> tuple[Layer, Layer]:
        input_layer = Input(shape=input_shape, name=self.__class__.__name__)
        x = Flatten(data_format=self.data_format.value)(input_layer)

        # The number I chose are arbitrary
        x = Dense(units=1024, activation='relu')(x)
        x = Dense(units=256, activation="relu")(x)
        output_layer = Dense(units=1, activation="sigmoid")(x)

        return input_layer, output_layer

## 2.2 - Model instance and learning

In [10]:
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters

model_generator = HandTailoredDeepNet()
model_generator.load_dataset_mean_and_variance(measures["mean"], measures["variance"])

model = model_generator.make_model((3, 224, 224))
# Default Keras learning-rate Value (0.01) doesnt work. 
# We always have a huge loss therefore we decrease it.
SgdLearningParameters(learning_rate=1e-3).compile_model(model)

model.summary()

In [11]:
import keras

# We fix the number of epochs for now. Later we will add early stopping.
model.fit(x=train_dataloader, validation_data=validation_dataloader, epochs=default_values["epochs"], callbacks=[
    # To persist the history
    keras.callbacks.CSVLogger(f"{project_definition['name']}_train.csv", separator=",", append=True)
])

persist_model: bool = True
if persist_model:
    model.save(f'{project_definition["name"]}.keras')

In [4]:
res = model.evaluate(test_dataloader)
print(f"Test accuracy is {res[1] * 100:.2f}% while loss is {res[0]}")

NameError: name 'model' is not defined

## 2.3 - Results summary

In [14]:
from utils.data_processing import make_loss_graphs, make_loss_accuracy_graphs, add_tuner_iteration_to_data
import pandas

csv = pandas.read_csv(f"{project_definition['name']}_train.csv")
add_tuner_iteration_to_data(csv)

loss_graph = make_loss_graphs(csv)
acc_graph = make_loss_accuracy_graphs(csv)

loss_graph.update_layout(title="Loss vs Val_loss in tuner search per epoch (Val dashed)", width=700, height=360).show()
acc_graph.update_layout(title="Accuracy vs Val_Accuracy in tuner search per epoch (Val dashed)", width=700, height=360 ).show()

We are very clearly overfitting. We might reduce the model size to better generalize the data as it is insanely huge right now.

Considering the previous results it would be interesting to see if it is possible to make a smaller model able to generalize the function.

# 3 - Smaller model
As we will see this attempt to make the model smaller failed, it memorizes the dataset like before.<br/>
This won't be reported in the final document as we try another, even smaller, model.

## 3.1 - Model definition


In [2]:
project_definition: dict[str, any] = {"name": "hand_tailored_small"}

In [15]:
from keras.layers import Conv2D, MaxPool2D, Input, Flatten, Dense, Layer


class SmallHandTailoredDeepNet(NormalizedModelWrapper):
    def make_layers(self, input_shape: (int, int, int)) -> tuple[Layer, Layer]:
        input_layer = Input(shape=input_shape, name=self.__class__.__name__)
        x = Flatten(data_format=self.data_format.value)(input_layer)

        # The number I chose are arbitrary
        x = Dense(units=512, activation='relu')(x)
        x = Dense(units=64, activation="relu")(x)

        output_layer = Dense(units=1, activation="sigmoid")(x)

        return input_layer, output_layer

## 3.2 - Model instance and learning

In [17]:
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters

model_generator = SmallHandTailoredDeepNet()
model_generator.load_dataset_mean_and_variance(mean, variance)

model = model_generator.make_model((3, 224, 224))
# Default Keras learning-rate Value (0.01) doesnt work. 
# We always have a huge loss therefore we decrease it.
SgdLearningParameters(learning_rate=1e-3).compile_model(model)

model.summary()

In [18]:
import keras

# We fix the number of epochs for now. Later we will add early stopping.
model.fit(x=train_dataloader, validation_data=validation_dataloader, epochs=default_values["epochs"], callbacks=[
    # To persist the history
    keras.callbacks.CSVLogger(f"{project_definition['name']}_train.csv", separator=",", append=True)
])

persist_model: bool = True
if persist_model:
    model.save(f'{project_definition["name"]}.keras')

In [19]:
res = model.evaluate(test_dataloader)
print(f"Test accuracy is {res[1] * 100:.2f}% while loss is {res[0]}")

## 3.3 - Results summary

In [1]:
from utils.data_processing import make_loss_graphs, make_loss_accuracy_graphs, add_tuner_iteration_to_data
import pandas

csv = pandas.read_csv(f"{project_definition['name']}_train.csv")
add_tuner_iteration_to_data(csv)

loss_graph = make_loss_graphs(csv)
acc_graph = make_loss_accuracy_graphs(csv)

loss_graph.update_layout(title="Loss vs Val_loss in tuner search per epoch (Val dashed)").show()
acc_graph.update_layout(title="Accuracy vs Val_Accuracy in tuner search per epoch (Val dashed)").show()

NameError: name 'project_definition' is not defined

The model is still hardly overfitting

# 4 - Even smaller model

## 4.1 - Model definition

In [15]:
project_definition: dict[str, any] = {"name": "hand_tailored_xs"}

In [16]:
from models.structure.augmentation_wrapper import NormalizedModelWrapper
from keras.layers import Conv2D, MaxPool2D, Input, Flatten, Dense, Layer


class VerySmallHandTailoredDeepNet(NormalizedModelWrapper):
    def make_layers(self, input_shape: (int, int, int)) -> tuple[Layer, Layer]:
        input_layer = Input(shape=input_shape, name=self.__class__.__name__)
        x = Flatten(data_format=self.data_format.value)(input_layer)

        # The number I chose are arbitrary
        x = Dense(units=128, activation='relu')(x)
        output_layer = Dense(units=1, activation="sigmoid")(x)

        return input_layer, output_layer

## 4.2 - Model instance and learning

In [25]:
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters

model_generator = VerySmallHandTailoredDeepNet()
model_generator.load_dataset_mean_and_variance(mean, variance)

model = model_generator.make_model((3, 224, 224))
# Default Keras learning-rate Value (0.01) doesnt work. 
# We always have a huge loss therefore we manually decrease it (overshooting).
SgdLearningParameters(learning_rate=1e-4).compile_model(model)

model.summary()

In [26]:
import keras

# We fix the number of epochs for now. Later we will add early stopping.
model.fit(x=train_dataloader, validation_data=validation_dataloader, epochs=default_values["epochs"], callbacks=[
    # To persist the history
    keras.callbacks.CSVLogger(f"{project_definition['name']}_train.csv", separator=",", append=True)
])

persist_model: bool = True
if persist_model:
    model.save(f'{project_definition["name"]}.keras')

Epoch 1/15
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 337ms/step - accuracy: 0.6915 - loss: 0.6095 - val_accuracy: 0.7800 - val_loss: 0.5228
Epoch 2/15
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 339ms/step - accuracy: 0.8491 - loss: 0.3519 - val_accuracy: 0.7817 - val_loss: 0.5755
Epoch 3/15
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 332ms/step - accuracy: 0.9038 - loss: 0.2618 - val_accuracy: 0.7800 - val_loss: 0.4928
Epoch 4/15
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 338ms/step - accuracy: 0.9416 - loss: 0.2048 - val_accuracy: 0.7783 - val_loss: 0.5153
Epoch 5/15
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 334ms/step - accuracy: 0.9598 - loss: 0.1585 - val_accuracy: 0.7936 - val_loss: 0.5159
Epoch 6/15
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 337ms/step - accuracy: 0.9721 - loss: 0.1296 - val_accuracy: 0.8054 - val_loss: 0.4943
Epoch 7/15

2024-05-31 01:31:16.730629: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-31 01:31:16.751951: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [27]:
res = model.evaluate(test_dataloader)
print(f"Test accuracy is {res[1] * 100:.2f}% while loss is {res[0]}")

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 308ms/step - accuracy: 0.7873 - loss: 0.6059
Test accuracy is 79.56% while loss is 0.5600281953811646


The model fails to generalize well as it only has 

In [29]:
from utils.data_processing import make_loss_graphs, make_loss_accuracy_graphs, add_tuner_iteration_to_data
import pandas

csv = pandas.read_csv(f"{project_definition['name']}_train.csv")
add_tuner_iteration_to_data(csv)

loss_graph = make_loss_graphs(csv)
acc_graph = make_loss_accuracy_graphs(csv)

loss_graph.update_layout(title="Loss vs Val_loss in tuner search per epoch (Val dashed)").show()
acc_graph.update_layout(title="Accuracy vs Val_Accuracy in tuner search per epoch (Val dashed)").show()