In [4]:
default_values = {"batch_size": 32, "epochs": 15, "learning_rate": 1e-3}

We want start by creating a custom simple DNN.

# 1 - Load data
Total splitting of data will be [64%, 16%, 20%] (If we consider test and train to be the full set) <br />
Best practices suggest to go for a [70%, 15%, 15%] splitting but we will just keep it this way.

In [5]:
import models.structure.base_model_wrapper
from dataset.k_fold_dataset_wrapper import KFoldDatasetWrapper
from torch.utils.data import DataLoader
from dataset.dataset_loader import dataset_loader, dataset_information

train, test = dataset_loader((224, 224))
# todo rewrite some of it to be consistent
# We take 20% of train as validation. 
dataset_split_controller = KFoldDatasetWrapper(5)
dataset_split_controller.load_data(train)

local_train, validation = dataset_split_controller.get_data_for_fold(0)

train_dataloader = DataLoader(dataset=local_train, batch_size=default_values["batch_size"], shuffle=True)
validation_dataloader = DataLoader(dataset=validation, batch_size=default_values["batch_size"], shuffle=True)
test_dataloader = DataLoader(dataset=test, batch_size=default_values["batch_size"], shuffle=True)

In [8]:
mean, variance = dataset_information(local_train, (224, 224))
measures = {"mean": mean, "variance": variance}

In [5]:
measures

# 2 - First model
Our first model is a simple CNN. <br><br />


## 2.1 - Model definition 

In [7]:
project_definition: dict[str, any] = {"name": "hand_tailored_v1"}

In [8]:
from models.structure.augmentation_wrapper import NormalizedModelWrapper
from keras.layers import Conv2D, MaxPool2D, Input, Flatten, Dense, Layer


class HandTailoredDeepNet(NormalizedModelWrapper):
    def make_layers(self, input_shape: (int, int, int)) -> tuple[Layer, Layer]:
        input_layer = Input(shape=input_shape, name=self.__class__.__name__)
        x = Flatten(data_format=self.data_format.value)(input_layer)

        # The number I chose are arbitrary
        x = Dense(units=1024, activation='relu')(x)
        x = Dense(units=256, activation="relu")(x)
        output_layer = Dense(units=1, activation="sigmoid")(x)

        return input_layer, output_layer

## 2.2 - Model instance and learning

In [10]:
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters

model_generator = HandTailoredDeepNet()
model_generator.load_dataset_mean_and_variance(measures["mean"], measures["variance"])

model = model_generator.make_model((3, 224, 224))
# Default Keras learning-rate Value (0.01) doesnt work. 
# We always have a huge loss therefore we decrease it.
SgdLearningParameters(learning_rate=1e-3).compile_model(model)

model.summary()

In [11]:
import keras

# We fix the number of epochs for now. Later we will add early stopping.
model.fit(x=train_dataloader, validation_data=validation_dataloader, epochs=default_values["epochs"], callbacks=[
    # To persist the history
    keras.callbacks.CSVLogger(f"{project_definition['name']}_train.csv", separator=",", append=True)
])

persist_model: bool = True
if persist_model:
    model.save(f'{project_definition["name"]}.keras')

In [12]:
res = model.evaluate(test_dataloader)
print(f"Test accuracy is {res[1] * 100:.2f}% while loss is {res[0]}")

## 2.3 - Results summary

In [13]:
from utils.data_processing import make_loss_graphs, make_loss_accuracy_graphs, add_tuner_iteration_to_data
import pandas

csv = pandas.read_csv(f"{project_definition['name']}_train.csv")
add_tuner_iteration_to_data(csv)

loss_graph = make_loss_graphs(csv)
acc_graph = make_loss_accuracy_graphs(csv)

loss_graph.update_layout(title="Loss vs Val_loss in tuner search per epoch (Val dashed)").show()
acc_graph.update_layout(title="Accuracy vs Val_Accuracy in tuner search per epoch (Val dashed)").show()

We are very clearly overfitting. We might reduce the model size to better generalize the data as it is insanely huge right now.

Considering the previous results it would be interesting to see if it is possible to make a smaller model able to generalize the function.
# 3 - Smaller model

## 3.1 - Model definition


In [14]:
project_definition: dict[str, any] = {"name": "hand_tailored_small"}

In [15]:
from keras.layers import Conv2D, MaxPool2D, Input, Flatten, Dense, Layer


class SmallHandTailoredDeepNet(NormalizedModelWrapper):
    def make_layers(self, input_shape: (int, int, int)) -> tuple[Layer, Layer]:
        input_layer = Input(shape=input_shape, name=self.__class__.__name__)
        x = Flatten(data_format=self.data_format.value)(input_layer)

        # The number I chose are arbitrary
        x = Dense(units=512, activation='relu')(x)
        x = Dense(units=64, activation="relu")(x)

        output_layer = Dense(units=1, activation="sigmoid")(x)

        return input_layer, output_layer

## 3.2 - Model instance and learning

In [17]:
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters

model_generator = SmallHandTailoredDeepNet()
model_generator.load_dataset_mean_and_variance(mean, variance)

model = model_generator.make_model((3, 224, 224))
# Default Keras learning-rate Value (0.01) doesnt work. 
# We always have a huge loss therefore we decrease it.
SgdLearningParameters(learning_rate=1e-3).compile_model(model)

model.summary()

In [18]:
import keras

# We fix the number of epochs for now. Later we will add early stopping.
model.fit(x=train_dataloader, validation_data=validation_dataloader, epochs=default_values["epochs"], callbacks=[
    # To persist the history
    keras.callbacks.CSVLogger(f"{project_definition['name']}_train.csv", separator=",", append=True)
])

persist_model: bool = True
if persist_model:
    model.save(f'{project_definition["name"]}.keras')

In [19]:
res = model.evaluate(test_dataloader)
print(f"Test accuracy is {res[1] * 100:.2f}% while loss is {res[0]}")

## 3.3 - Results summary

In [20]:
from utils.data_processing import make_loss_graphs, make_loss_accuracy_graphs, add_tuner_iteration_to_data
import pandas

csv = pandas.read_csv(f"{project_definition['name']}_train.csv")
add_tuner_iteration_to_data(csv)

loss_graph = make_loss_graphs(csv)
acc_graph = make_loss_accuracy_graphs(csv)

loss_graph.update_layout(title="Loss vs Val_loss in tuner search per epoch (Val dashed)").show()
acc_graph.update_layout(title="Accuracy vs Val_Accuracy in tuner search per epoch (Val dashed)").show()

The model is still hardly overfitting

# 4 - Even smaller model

## 4.1 - Model definition

In [1]:
project_definition: dict[str, any] = {"name": "hand_tailored_xs"}

In [6]:
from models.structure.augmentation_wrapper import NormalizedModelWrapper
from keras.layers import Conv2D, MaxPool2D, Input, Flatten, Dense, Layer


class VerySmallHandTailoredDeepNet(NormalizedModelWrapper):
    def make_layers(self, input_shape: (int, int, int)) -> tuple[Layer, Layer]:
        input_layer = Input(shape=input_shape, name=self.__class__.__name__)
        x = Flatten(data_format=self.data_format.value)(input_layer)

        # The number I chose are arbitrary
        x = Dense(units=128, activation='relu')(x)
        output_layer = Dense(units=1, activation="sigmoid")(x)

        return input_layer, output_layer

## 4.2 - Model instance and learning

In [9]:
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters

model_generator = VerySmallHandTailoredDeepNet()
model_generator.load_dataset_mean_and_variance(mean, variance)

model = model_generator.make_model((3, 224, 224))
# Default Keras learning-rate Value (0.01) doesnt work. 
# We always have a huge loss therefore we decrease it.
SgdLearningParameters(learning_rate=1e-4).compile_model(model)

model.summary()

In [10]:
import keras

# We fix the number of epochs for now. Later we will add early stopping.
model.fit(x=train_dataloader, validation_data=validation_dataloader, epochs=default_values["epochs"], callbacks=[
    # To persist the history
    keras.callbacks.CSVLogger(f"{project_definition['name']}_train.csv", separator=",", append=True)
])

persist_model: bool = True
if persist_model:
    model.save(f'{project_definition["name"]}.keras')

In [26]:
res = model.evaluate(test_dataloader)
print(f"Test accuracy is {res[1] * 100:.2f}% while loss is {res[0]}")

In [27]:
from utils.data_processing import make_loss_graphs, make_loss_accuracy_graphs, add_tuner_iteration_to_data
import pandas

csv = pandas.read_csv(f"{project_definition['name']}_train.csv")
add_tuner_iteration_to_data(csv)

loss_graph = make_loss_graphs(csv)
acc_graph = make_loss_accuracy_graphs(csv)

loss_graph.update_layout(title="Loss vs Val_loss in tuner search per epoch (Val dashed)").show()
acc_graph.update_layout(title="Accuracy vs Val_Accuracy in tuner search per epoch (Val dashed)").show()

In [None]:
# https://www.tensorflow.org/tutorials/keras/overfit_and_underfit#strategies_to_prevent_overfitting