In [1]:
default_values = {"batch_size": 32, "epochs": 20, "learning_rate": 1e-3}

We want start by creating a custom simple DNN.

# 1 - Load data
Total splitting of data will be [64%, 16%, 20%] (If we consider test and train to be the full set) <br />
Best practices suggest to go for a [70%, 15%, 15%] splitting but we will just keep it this way.

In [2]:
import models.structure.base_model_wrapper
from dataset.k_fold_dataset_wrapper import KFoldDatasetWrapper
from torch.utils.data import DataLoader
from dataset.dataset_loader import dataset_loader

train, test = dataset_loader((160, 160), is_grayscale=False)

# We take 20% of train as validation. 
dataset_split_controller = KFoldDatasetWrapper(5)
dataset_split_controller.load_data(train)

local_train, validation = dataset_split_controller.get_data_for_fold(0)

train_dataloader = DataLoader(dataset=local_train, batch_size=default_values["batch_size"], shuffle=True)
validation_dataloader = DataLoader(dataset=validation, batch_size=default_values["batch_size"], shuffle=True)
test_dataloader = DataLoader(dataset=test, batch_size=default_values["batch_size"], shuffle=True)

# 2 - First model
Our first model is a simple CNN. <br><br />


## 2.1 - Model definition 

In [3]:
project_definition: dict[str, any] = {"name": "hand_tailored_v1"}

In [4]:
from keras.layers import Conv2D, MaxPool2D, Input, Flatten, Dense, Layer
from models.structure.base_model_wrapper import BaseModelWrapper


class HandTailoredDeepNet(BaseModelWrapper):
    def make_layers(self, input_shape: (int, int, int)) -> tuple[Layer, Layer]:
        input_layer = Input(shape=input_shape, name=self.__class__.__name__)
        x = Flatten(data_format=self.data_format.value)(input_layer)
        
        # The number I chose are arbitrary
        x = Dense(units=2048, activation='relu')(x)
        x = Dense(units=720, activation="relu")(x)
        output_layer = Dense(units=1, activation="sigmoid")(x)
        
        return input_layer, output_layer

## 2.2 - Model instance and learning

In [5]:
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters

model = HandTailoredDeepNet().make_model((3, 160, 160))
# Default Keras learning-rate Value (0.01) doesnt work. 
# We always have a huge loss therefore we decrease it.
SgdLearningParameters(learning_rate=1e-3).compile_model(model)

model.summary()

In [6]:
import keras

# We fix the number of epochs for now. Later we will add early stopping.
model.fit(x=train_dataloader, validation_data=validation_dataloader, epochs=default_values["epochs"], callbacks=[
    # To persist the history
    keras.callbacks.CSVLogger(f"{project_definition['name']}_train.csv", separator=",", append=True)
])

persist_model: bool = True
if persist_model:
    model.save(f'{project_definition["name"]}.keras')

In [7]:
res = model.evaluate(test_dataloader)
print(f"Test accuracy is {res[1] * 100:.2f}% while loss is {res[0]}")

## 2.3 - Results summary

In [9]:
from utils.data_processing import make_loss_graphs, make_loss_accuracy_graphs, add_tuner_iteration_to_data
import pandas

csv = pandas.read_csv(f"{project_definition['name']}_train.csv")
add_tuner_iteration_to_data(csv)

loss_graph = make_loss_graphs(csv)
acc_graph = make_loss_accuracy_graphs(csv)

loss_graph.update_layout(title="Loss vs Val_loss in tuner search per epoch (Val dashed)").show()
acc_graph.update_layout(title="Accuracy vs Val_Accuracy in tuner search per epoch (Val dashed)").show()

As learning hp are not fine tuned we can expect the learning process to improve by studying them (learning is very slow right now). 
Yet the network size is insanely huge
compared to a better (even if hardly overfitting) CNN so unless the memory imprint can be reduced the CNN is our way to go.

(It seems like we are underfitting)

Considering the previous results it would be interesting to see if it is possible to make a smaller model able to generalize the function.
# 3 - Smaller model

## 3.1 - Model definition


In [10]:
project_definition: dict[str, any] = {"name": "hand_tailored_small"}

In [13]:
from keras.layers import Conv2D, MaxPool2D, Input, Flatten, Dense, Layer
from models.structure.base_model_wrapper import BaseModelWrapper

# This model is still 100MB bigger than the biggest CNN we have
class SmallHandTailoredDeepNet(BaseModelWrapper):
    def make_layers(self, input_shape: (int, int, int)) -> tuple[Layer, Layer]:
        input_layer = Input(shape=input_shape, name=self.__class__.__name__)
        x = Flatten(data_format=self.data_format.value)(input_layer)

        # The number I chose are arbitrary
        x = Dense(units=960, activation='relu')(x)
        x = Dense(units=128, activation="relu")(x)
        
        output_layer = Dense(units=1, activation="sigmoid")(x)

        return input_layer, output_layer

## 3.2 - Model instance and learning

In [18]:
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters

model = SmallHandTailoredDeepNet().make_model((3, 160, 160))
# Default Keras learning-rate Value (0.01) doesnt work. 
# We always have a huge loss therefore we decrease it.
SgdLearningParameters(learning_rate=0.005).compile_model(model)

model.summary()

In [19]:
import keras

# We fix the number of epochs for now. Later we will add early stopping.
model.fit(x=train_dataloader, validation_data=validation_dataloader, epochs=default_values["epochs"], callbacks=[
    # To persist the history
    keras.callbacks.CSVLogger(f"{project_definition['name']}_train.csv", separator=",", append=True)
])

persist_model: bool = True
if persist_model:
    model.save(f'{project_definition["name"]}.keras')

In [20]:
res = model.evaluate(test_dataloader)
print(f"Test accuracy is {res[1] * 100:.2f}% while loss is {res[0]}")

## 3.3 - Results summary

In [None]:
from utils.data_processing import make_loss_graphs, make_loss_accuracy_graphs, add_tuner_iteration_to_data
import pandas

csv = pandas.read_csv(f"{project_definition['name']}_train.csv")
add_tuner_iteration_to_data(csv)

loss_graph = make_loss_graphs(csv)
acc_graph = make_loss_accuracy_graphs(csv)

loss_graph.update_layout(title="Loss vs Val_loss in tuner search per epoch (Val dashed)").show()
acc_graph.update_layout(title="Accuracy vs Val_Accuracy in tuner search per epoch (Val dashed)").show()

The model is underfitting. We require more data and/or a richer network.