In [48]:
# To make it easier
label_mappings = {0: "chihuahua", 1: "muffin"}

Case Studies we take a look at:
- Xception
- VGG-16 


> https://www.topbots.com/important-cnn-architectures/

In [2]:
# https://keras.io/guides/transfer_learning/

## Utility functions


In [60]:
from torchvision.datasets import ImageFolder
from keras.src import Functional
from typing import Callable
import plotly.express as px
import torch
import numpy as np


def make_evaluations_list(evaluate_model: Callable[[any, Functional], any], model: Functional,
                          reference_dataset: ImageFolder, samples: int = 8) -> list[tuple[any, any, any]]:
    """
    
    :param evaluate_model: 
    :param model: 
    :param reference_dataset: 
    :param samples: 
    :return: A tuple containing: [the image ready for plotting, the predicted label from the model, the true label]
    """
    for i in torch.rand(samples):
        # Random draw
        random_index = int(i * (len(reference_dataset)))

        img = reference_dataset[random_index][0]
        label = reference_dataset[random_index][1]
        yield torch.permute(img, (1, 2, 0)), evaluate_model(img, model), label


def print_evaluation_lists_information(image_evaluations_list: list[tuple[any, any, any]]) -> None:
    current_index = 0
    images = []
    for img, pred, y in image_evaluations_list:
        images.append(img)
        print(f"facet_{current_index}: y ={label_mappings[y]}, y_pred={[pred_entry[1] for pred_entry in pred]}")
        current_index += 1

    image_show = px.imshow(np.array(images), binary_string=True, facet_col=0, facet_col_wrap=4, width=600, height=400, template="plotly")
    image_show.update_layout(margin=dict(l=20, r=20, t=20, b=20))
    image_show.update_xaxes(showticklabels=False)
    image_show.update_yaxes(showticklabels=False)
    image_show.show()

# 1 - Xception
Before training and fine tuning lets see the performance of the pretrained model on some samples.

In [77]:
project_definition: dict[str, any] = {"name": "xception"}
# Our Xception default values. We had to reduce batch size or else we weren't able to work.
default_values = {"batch_size": 16, "epochs": 15, "learning_rate": 1e-2}

In [20]:
import keras

# Setting weights of course requires downloading them from the source (Keras is our mirror as we work with their libraries)
untouched_xception = keras.applications.Xception(weights='imagenet')

In [4]:
import numpy


def evaluate_xception(image, xception, verbose: bool = False):
    local_image = torch.permute(image, (1, 2, 0))
    local_image = numpy.expand_dims(local_image, 0)

    return keras.applications.xception.decode_predictions(xception.predict(local_image, verbose=verbose), top=3)[0]

## 1.2 - Data loading for Xception

In [114]:
from torch.utils.data import DataLoader
from dataset.k_fold_dataset_wrapper import KFoldDatasetWrapper
import torch

from dataset.dataset_loader import dataset_loader

train, test = dataset_loader((299, 299))
train, validation = torch.utils.data.random_split(train, [0.875, 0.125])

train_dataloader = DataLoader(dataset=train, batch_size=default_values["batch_size"], shuffle=True)
validation_dataloader = DataLoader(dataset=validation, batch_size=default_values["batch_size"], shuffle=True)
test_dataloader = DataLoader(dataset=test, batch_size=default_values["batch_size"], shuffle=True)

We don't need to calculate the mean and variance of the dataset as we use a pre-trained model.

It is required to use mean/variance of the data xception used (as in the pre-process pipe).

## 1.1 -  Model Evaluation without training

In [57]:
print_evaluation_lists_information(make_evaluations_list(evaluate_xception, untouched_xception, train))

facet_0: y =chihuahua, y_pred=['Chihuahua', 'toy_terrier', 'miniature_pinscher']
facet_1: y =chihuahua, y_pred=['bull_mastiff', 'bloodhound', 'Great_Dane']
facet_2: y =chihuahua, y_pred=['Chihuahua', 'toy_terrier', 'Mexican_hairless']
facet_3: y =muffin, y_pred=['French_loaf', 'tray', 'hamper']
facet_4: y =chihuahua, y_pred=['Chihuahua', 'Pomeranian', 'papillon']
facet_5: y =chihuahua, y_pred=['envelope', 'packet', 'handkerchief']
facet_6: y =chihuahua, y_pred=['Pomeranian', 'keeshond', 'Pekinese']
facet_7: y =chihuahua, y_pred=['standard_poodle', 'toy_poodle', 'miniature_poodle']


In [9]:
# The model does not recognize Muffins as being muffins just as bakery (Which makes sense)
# As the label Muffin is missing in the decoding of the Xception we just map it to bakery

In [115]:
predictions: list[tuple[list, int]] = [(evaluate_xception(i[0], untouched_xception), i[1]) for i in test]

In [116]:
# Redefine the labels to fit the observations on Xception
label_mappings = {0: ["chihuahua", "dog"], 1: ["muffin", "bakery", "bagel"]}

TP = 0  # True positives
for i in range(len(predictions)):
    predicted_values = [j[1].lower() for j in predictions[i][0]]
    true_label = label_mappings[predictions[i][1]]

    TP += 1 if set(predicted_values) & set(true_label) else 0

precision = TP / len(predictions)
precision  # On the top 3 considering the fact that many miss classifications happen for the fact that the labels are more accurate for the dogs (some samples in the training set are not chihuahuas) and that Muffins do not have a real label.

0.6765202702702703

\sim\sim## 1.2 - Fine tuning the model
We follow the following guide: https://keras.io/guides/transfer_learning/

### 1.2.1 - Model definition

> Note: each Keras Application expects a specific kind of input preprocessing. For Xception, call keras.applications.xception.preprocess_input on your inputs before passing them to the model. xception.preprocess_input will scale input pixels between -1 and 1

#### Note:
While Keras documents to use ```keras.applications.xception.preprocess_input(x, mode='torch')```
we cannot use it with our loading functions as it requires input to not be scaled to tensor!

So we simply workaround it by applying the ```tf``` procedure.

In [63]:
from models.structure.base_model_wrapper import BaseModelWrapper
import keras
from keras.src import Functional


# https://keras.io/guides/transfer_learning/#the-typical-transferlearning-workflow 
# With augmentation just to permute here
class XceptionAugmented(BaseModelWrapper):
    latest_xception_model: Functional

    def make_layers(self, input_shape: (int, int, int)) -> tuple[keras.Layer, keras.Layer]:
        C, W, H = input_shape

        inputs = keras.Input(input_shape)

        x = keras.layers.Permute((2, 3, 1))(inputs)

        # Augmentation process.
        x = keras.layers.RandomFlip(mode="horizontal_and_vertical")(x)
        x = keras.layers.RandomRotation(0.3)(x)
        x = keras.layers.RandomBrightness(0.4, value_range=(0., 1.))(x)

        x = keras.layers.Rescaling(255)(x)
        x = keras.applications.xception.preprocess_input(x)
        self.latest_xception_model = keras.applications.Xception(
            weights='imagenet', include_top=False, input_shape=(W, H, C)
        )

        self.latest_xception_model.trainable = False
        x = self.latest_xception_model(x, training=False)
        x = keras.layers.GlobalAveragePooling2D()(x)

        outputs = keras.layers.Dense(1, activation='sigmoid')(x)
        return inputs, outputs

### 1.2.2 - Training and evaluation only on appended structure

In [64]:
from models.zero_one_validation_loss import ZeroOneLoss
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters

xception_aug = XceptionAugmented()
model = xception_aug.make_model((3, 299, 299))

learning_parameters = SgdLearningParameters(learning_rate=0.01, momentum=0.9, metrics=["accuracy", ZeroOneLoss()])
learning_parameters.compile_model(model)

model.summary()

In [65]:
history = model.fit(train_dataloader, epochs=5, validation_data=validation_dataloader, callbacks=[
    # To persist the history
    keras.callbacks.CSVLogger(f"{project_definition['name']}_train.csv", separator=",", append=True),
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=2, verbose=1, mode='min')
])

Epoch 1/5
[1m259/259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 262ms/step - accuracy: 0.9286 - loss: 0.1870 - total_0-1_loss: 86.0346 - val_accuracy: 0.9831 - val_loss: 0.0397 - val_total_0-1_loss: 10.0000
Epoch 2/5
[1m259/259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 254ms/step - accuracy: 0.9853 - loss: 0.0499 - total_0-1_loss: 29.9769 - val_accuracy: 0.9882 - val_loss: 0.0328 - val_total_0-1_loss: 7.0000
Epoch 3/5
[1m259/259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 255ms/step - accuracy: 0.9902 - loss: 0.0376 - total_0-1_loss: 22.6500 - val_accuracy: 0.9898 - val_loss: 0.0311 - val_total_0-1_loss: 6.0000
Epoch 4/5
[1m259/259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 248ms/step - accuracy: 0.9885 - loss: 0.0366 - total_0-1_loss: 23.4000 - val_accuracy: 0.9915 - val_loss: 0.0273 - val_total_0-1_loss: 5.0000
Epoch 5/5
[1m259/259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 251ms/step - accuracy: 0.9908 - loss: 0.0306 -

In [12]:
res = model.evaluate(test_dataloader)
print(f"Test accuracy is {res[1] * 100:.2f}% while loss is {res[0]}")

### 1.2.3 - Free the network and final training 
I'd really like to do this step but we run OOM.
The performance of the network still is good enough (we have a loss of only 0.02 on test) so
we can stop there with an acceptable model.

## 1.3 - K Fold Cross Validation

In [66]:
k = 5

### 1.3.1 - Data Loading

In [67]:
from torch.utils.data import DataLoader, ConcatDataset
from dataset.k_fold_dataset_wrapper import KFoldDatasetWrapper

from dataset.dataset_loader import dataset_loader

train, test = dataset_loader((299, 299))
full_dataset = ConcatDataset([train, test])

k_fold_manager = KFoldDatasetWrapper(k)
k_fold_manager.load_data(full_dataset)

### Procedure

In [68]:
from models.zero_one_validation_loss import ZeroOneLoss
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters

learning_parameters = SgdLearningParameters(learning_rate=0.01, momentum=0.9, metrics=["accuracy", ZeroOneLoss()])

In [69]:
results, test_fold_sizes = k_fold_manager.run_k_fold_cv(learning_parameters, XceptionAugmented(), (3, 299, 299),
                                                        batch_size=16)

Starting procedure for fold 0
Epoch 1/80
[1m259/259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 256ms/step - accuracy: 0.9403 - loss: 0.1798 - total_0-1_loss: 74.5462 - val_accuracy: 0.9949 - val_loss: 0.0337 - val_total_0-1_loss: 3.0000
Epoch 2/80
[1m259/259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 259ms/step - accuracy: 0.9844 - loss: 0.0497 - total_0-1_loss: 34.3769 - val_accuracy: 0.9932 - val_loss: 0.0247 - val_total_0-1_loss: 4.0000
Epoch 3/80
[1m259/259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 255ms/step - accuracy: 0.9862 - loss: 0.0434 - total_0-1_loss: 25.8385 - val_accuracy: 0.9966 - val_loss: 0.0211 - val_total_0-1_loss: 2.0000
Epoch 4/80
[1m259/259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 257ms/step - accuracy: 0.9927 - loss: 0.0308 - total_0-1_loss: 18.8115 - val_accuracy: 0.9966 - val_loss: 0.0192 - val_total_0-1_loss: 2.0000
Epoch 5/80
[1m259/259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 259ms/step -

In [70]:
import numpy as np
import pandas as pd

dataframe = pd.DataFrame(results)
dataframe.columns = ["loss", "accuracy", "0-1-loss"]

# To measure 0-1 loss correctly
dataframe["test_size"] = test_fold_sizes
dataframe["0-1-scaled-loss"] = dataframe["0-1-loss"] / dataframe["test_size"]

In [71]:
dataframe

Unnamed: 0,loss,accuracy,0-1-loss,test_size,0-1-scaled-loss
0,0.018905,0.998311,2.0,1184,0.001689
1,0.025221,0.991554,10.0,1184,0.008446
2,0.023068,0.993238,8.0,1183,0.006762
3,0.016118,0.997464,3.0,1183,0.002536
4,0.022453,0.994083,7.0,1183,0.005917


In [72]:
np.average(dataframe[["loss", "accuracy", "0-1-scaled-loss"]], axis=0)

array([0.02115294, 0.99492987, 0.00507014])

In [73]:
dataframe.to_csv(f"{project_definition["name"]}_k_fold_cv.csv", index=False)

## Retrain model 

In [80]:
from models.zero_one_validation_loss import ZeroOneLoss
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters

xception_aug = XceptionAugmented()
model = xception_aug.make_model((3, 299, 299))

learning_parameters = SgdLearningParameters(learning_rate=0.01, momentum=0.9, metrics=["accuracy", ZeroOneLoss()])
learning_parameters.compile_model(model)

model.summary()

In [81]:
history = model.fit(train_dataloader, epochs= 80, validation_data=validation_dataloader, callbacks=[
    # To persist the history
    keras.callbacks.CSVLogger(f"{project_definition['name']}_train.csv", separator=",", append=True),
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, min_delta=1e-2, verbose=1, mode='min')
])

Epoch 1/80
[1m259/259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 255ms/step - accuracy: 0.9506 - loss: 0.1788 - total_0-1_loss: 65.5192 - val_accuracy: 0.9865 - val_loss: 0.0365 - val_total_0-1_loss: 8.0000
Epoch 2/80
[1m259/259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 257ms/step - accuracy: 0.9839 - loss: 0.0493 - total_0-1_loss: 32.4192 - val_accuracy: 0.9898 - val_loss: 0.0318 - val_total_0-1_loss: 6.0000
Epoch 3/80
[1m259/259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 263ms/step - accuracy: 0.9906 - loss: 0.0412 - total_0-1_loss: 20.8077 - val_accuracy: 0.9882 - val_loss: 0.0297 - val_total_0-1_loss: 7.0000
Epoch 4/80
[1m259/259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 254ms/step - accuracy: 0.9883 - loss: 0.0401 - total_0-1_loss: 23.8615 - val_accuracy: 0.9865 - val_loss: 0.0274 - val_total_0-1_loss: 8.0000
Epoch 5/80
[1m259/259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 257ms/step - accuracy: 0.9886 - loss: 0.03

In [86]:
res = model.evaluate(test_dataloader)
print(f"Test accuracy is {res[1] * 100:.2f}% while loss is {res[0]}")
print(f"The 0-1 loss is: {res[2]/len(test)}")

[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 233ms/step - accuracy: 0.9942 - loss: 0.0187 - total_0-1_loss: 4.2933
Test accuracy is 99.24% while loss is 0.021568473428487778
The 0-1 loss is: 0.007601351351351352


In [85]:
res[2] / len(test)

0.007601351351351352

# 2- VGG-16
https://arxiv.org/abs/1409.1556

In [87]:
project_definition: dict[str, any] = {"name": "vgg16"}
default_values = {"batch_size": 32, "epochs": 15, "learning_rate": 1e-2}

In [88]:
import keras

# Setting weights of course requires downloading them from the source (Keras is our mirror as we work with their libraries)
untouched_vgg16 = keras.applications.vgg16.VGG16(weights='imagenet')

In [99]:
import numpy


def evaluate_vgg16(image, vgg16, verbose: bool = False):
    local_image = torch.permute(image, (1, 2, 0))
    local_image = numpy.expand_dims(local_image, 0)

    local_image = local_image * 255  # Workaround for the torch env and our loader
    local_image = keras.applications.vgg16.preprocess_input(local_image)
    
    return keras.applications.vgg16.decode_predictions(vgg16.predict(local_image, verbose=verbose), top=3)[0]

## 2.1 - Data loading for VGG-16

In [91]:
from torch.utils.data import DataLoader
from dataset.k_fold_dataset_wrapper import KFoldDatasetWrapper
import torch

from dataset.dataset_loader import dataset_loader

train, test = dataset_loader((224, 224))
train, validation = torch.utils.data.random_split(train, [0.875, 0.125])

train_dataloader = DataLoader(dataset=train, batch_size=default_values["batch_size"], shuffle=True)
validation_dataloader = DataLoader(dataset=validation, batch_size=default_values["batch_size"], shuffle=True)
test_dataloader = DataLoader(dataset=test, batch_size=default_values["batch_size"], shuffle=True)

## 2.1 -Model evaulation without training

In [100]:
print_evaluation_lists_information(make_evaluations_list(evaluate_vgg16, untouched_vgg16, test))

facet_0: y =['chihuahua', 'dog'], y_pred=['redbone', 'Rhodesian_ridgeback', 'basset']
facet_1: y =['chihuahua', 'dog'], y_pred=['Chihuahua', 'Boston_bull', 'sombrero']
facet_2: y =['muffin', 'bakery'], y_pred=['pretzel', 'bagel', 'bakery']
facet_3: y =['muffin', 'bakery'], y_pred=['thimble', 'perfume', 'ski_mask']
facet_4: y =['muffin', 'bakery'], y_pred=['bagel', 'French_loaf', 'plate']
facet_5: y =['muffin', 'bakery'], y_pred=['trifle', 'ice_cream', 'bakery']
facet_6: y =['chihuahua', 'dog'], y_pred=['Chihuahua', 'running_shoe', 'Yorkshire_terrier']
facet_7: y =['chihuahua', 'dog'], y_pred=['Chihuahua', 'toy_terrier', 'French_bulldog']


In [96]:
def evaluate_vgg16(image, vgg16, verbose: bool = False):
    local_image = torch.permute(image, (1, 2, 0))
    local_image = numpy.expand_dims(local_image, 0)

    return keras.applications.vgg16.decode_predictions(vgg16.predict(local_image, verbose=verbose), top=3)[0]

In [104]:
predictions: list[tuple[list, int]] = [(evaluate_vgg16(i[0], untouched_vgg16), i[1]) for i in test]

In [112]:
label_mappings = {0: ["chihuahua", "dog"], 1: ["muffin", "bakery", "bagel"]}
TP = 0  # True positives
for i in range(len(predictions)):
    predicted_values = [j[1].lower() for j in predictions[i][0]]
    true_label = label_mappings[predictions[i][1]]

    TP += 1 if set(predicted_values) & set(true_label) else 0

precision = TP / len(predictions)
precision  # On the top 3 considering the fact that many miss classifications happen for the fact that the labels are more accurate for the dogs (some samples in the training set are not chihuahuas) and that Muffins do not have a real label.

0.5701013513513513

## 2.2 - Fine tuning the model

### 2.2.1 -  Model definition

In [20]:
from models.structure.base_model_wrapper import BaseModelWrapper

import keras


# https://keras.io/guides/transfer_learning/#the-typical-transferlearning-workflow 
class VGG16Custom(BaseModelWrapper):
    latest_model: Functional

    def make_layers(self, input_shape: (int, int, int)) -> tuple[keras.Layer, keras.Layer]:
        C, W, H = input_shape

        inputs = keras.Input(input_shape)
        x = keras.layers.Permute((2, 3, 1))(inputs)

        # Augmentation process.
        x = keras.layers.RandomFlip(mode="horizontal_and_vertical")(x)
        x = keras.layers.RandomRotation(0.3)(x)
        x = keras.layers.RandomBrightness(0.4, value_range=(0., 1.))(x)

        x = keras.layers.Rescaling(255)(x)  # Avoid torch problem
        x = keras.applications.vgg16.preprocess_input(x)

        self.latest_model = keras.applications.vgg16.VGG16(
            weights='imagenet', include_top=False, input_shape=(W, H, C)
        )

        self.latest_model.trainable = False
        x = self.latest_model(x, training=False)
        x = keras.layers.GlobalAveragePooling2D()(x)

        outputs = keras.layers.Dense(1, activation='sigmoid')(x)
        return inputs, outputs

### 2.2.2. - Training and evaluation on frozen base model

In [31]:
from models.zero_one_validation_loss import ZeroOneLoss
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters

vgg_custom_wrapper = VGG16Custom()
model = vgg_custom_wrapper.make_model((3, 224, 224))

learning_parameters = SgdLearningParameters(learning_rate=0.01, momentum=0.9, metrics=["accuracy", ZeroOneLoss()])
learning_parameters.compile_model(model)

model.summary(line_length=110)

In [48]:
history = model.fit(train_dataloader, epochs=5, validation_data=validation_dataloader, callbacks=[])

In [49]:
res = model.evaluate(test_dataloader)
print(f"Test accuracy is {res[1] * 100:.2f}% while loss is {res[0]}")

### 2.2.3 - Fine tuning on the whole structure 

In [50]:
vgg_custom_wrapper.latest_model.trainable = True
model.compile(optimizer=keras.optimizers.Adam(1e-5), loss="binary_crossentropy", metrics=["accuracy"])

model.fit(train_dataloader, epochs=10, validation_data=validation_dataloader, callbacks=[
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, verbose=0, mode="min"),
])

In [51]:
res = model.evaluate(test_dataloader)
print(f"Test accuracy is {res[1] * 100:.2f}% while loss is {res[0]}")

In [None]:
# TODO FInish
# But well done! Vgg16 is op for now in our problem domani!

## 2.3 - K fold CV

In [52]:
k = 5

### 1.3.1 - Data Loading

In [53]:
from torch.utils.data import DataLoader, ConcatDataset
from dataset.k_fold_dataset_wrapper import KFoldDatasetWrapper

from dataset.dataset_loader import dataset_loader

train, test = dataset_loader((224, 224))
full_dataset = ConcatDataset([train, test])

k_fold_manager = KFoldDatasetWrapper(k)
k_fold_manager.load_data(full_dataset)

### Procedure

In [54]:
from models.zero_one_validation_loss import ZeroOneLoss
from models.structure.learning_parameters.sgd_learning_parameters import SgdLearningParameters

learning_parameters = SgdLearningParameters(learning_rate=0.01, momentum=0.9, metrics=["accuracy", ZeroOneLoss()])

In [55]:
results, test_fold_sizes = k_fold_manager.run_k_fold_cv(learning_parameters, VGG16Custom(), (3, 224, 224),
                                                        batch_size=16)

In [56]:
import numpy as np
import pandas as pd

dataframe = pd.DataFrame(results)
dataframe.columns = ["loss", "accuracy", "0-1-loss"]

# To measure 0-1 loss correctly
dataframe["test_size"] = test_fold_sizes
dataframe["0-1-scaled-loss"] = dataframe["0-1-loss"] / dataframe["test_size"]

In [59]:
dataframe

In [60]:
np.average(dataframe[["loss", "accuracy", "0-1-scaled-loss"]], axis=0)

In [58]:
dataframe.to_csv(f"{project_definition["name"]}_k_fold_cv.csv", index=False)