In [1]:
label_mappings = {0: "chihuahua", 1: "muffin"}

Case Studies we take a look at:
- VGG-16 (from Keras models repository, fine tuned)
- XCeption

> https://www.topbots.com/important-cnn-architectures/

In [50]:
# https://keras.io/guides/transfer_learning/

## Utility functions


In [2]:
from torchvision.datasets import ImageFolder
from keras.src import Functional
from typing import Callable
import plotly.express as px
import torch
import numpy as np


def make_evaluations_list(evaluate_model: Callable[[any, Functional], any], model: Functional,
                          reference_dataset: ImageFolder, samples: int = 8) -> list[tuple[any, any, any]]:
    """
    
    :param evaluate_model: 
    :param model: 
    :param reference_dataset: 
    :param samples: 
    :return: A tuple containing: [the image ready for plotting, the predicted label from the model, the true label]
    """
    for i in torch.rand(samples):
        # Random draw
        random_index = int(i * (len(reference_dataset)))

        img = reference_dataset[random_index][0]
        label = reference_dataset[random_index][1]
        yield torch.permute(img, (1, 2, 0)), evaluate_model(img, model), label


def print_evaluation_lists_information(image_evaluations_list: list[tuple[any, any, any]]) -> None:
    current_index = 0
    images = []

    for img, pred, y in image_evaluations_list:
        images.append(img)
        
        print(f"For facet {current_index} model has predicted: {[pred_entry[1] for pred_entry in pred]}. "
              f"The correct label is {y}")

        current_index += 1
        
    image_show = px.imshow(np.array(images), binary_string=True, facet_col=0, facet_col_wrap=4)
    image_show.show()

# 1 - Xception
Before training and fine tuning lets see the performance of the pretrained model on some samples.

In [1]:
project_definition: dict[str, any] = {"name": "xception"}

In [85]:
import keras

untouched_xception = keras.applications.Xception(weights='imagenet')

In [2]:
import numpy


def evaluate_xception(image, xception, verbose: bool = False):
    local_image = torch.permute(image, (1, 2, 0))
    local_image = numpy.expand_dims(local_image, 0)

    return keras.applications.xception.decode_predictions(xception.predict(local_image, verbose=verbose), top=3)[0]

## 1.2 - Data loading for Xception

In [3]:
from torch.utils.data import DataLoader
from dataset.k_fold_dataset_wrapper import KFoldDatasetWrapper

from dataset.dataset_loader import dataset_loader

train, test = dataset_loader((299, 299), is_grayscale=False)
dataset_split_controller = KFoldDatasetWrapper(5)
dataset_split_controller.load_data(train)

local_train, validation = dataset_split_controller.get_data_for_fold(0)

train_dataloader = DataLoader(dataset=local_train, batch_size=16, shuffle=True)
validation_dataloader = DataLoader(dataset=validation, batch_size=16, shuffle=True)
test_dataloader = DataLoader(dataset=test, batch_size=16, shuffle=True)

## 1.1 -  Model Evaluation without training

In [88]:
print_evaluation_lists_information(make_evaluations_list(evaluate_xception, untouched_xception, local_train))

For facet 0 model has predicted: ['pick', 'comic_book', 'jersey']. The correct label is 0
For facet 1 model has predicted: ['Chihuahua', 'Boston_bull', 'toy_terrier']. The correct label is 0
For facet 2 model has predicted: ['plate', 'acorn', 'mushroom']. The correct label is 1
For facet 3 model has predicted: ['papillon', 'Chihuahua', 'Brabancon_griffon']. The correct label is 0
For facet 4 model has predicted: ['Chihuahua', 'toy_terrier', 'Mexican_hairless']. The correct label is 0
For facet 5 model has predicted: ['Chihuahua', 'toy_terrier', 'miniature_pinscher']. The correct label is 0
For facet 6 model has predicted: ['bakery', 'tray', 'plate']. The correct label is 1
For facet 7 model has predicted: ['bakery', 'plate', 'ice_cream']. The correct label is 1


In [None]:
# The model does not recognize Muffins as being muffins just as bakery (Which makes sense)
# As the label Muffin is missing in the decoding of the Xception we just map it to bakery

In [None]:
predictions: list[tuple[list, int]] = [(evaluate_xception(i[0], untouched_xception), i[1]) for i in test]

In [None]:
# Redefine the labels to fit the observations on Xception
label_mappings = {0: ["chihuahua", "dog"], 1: ["muffin", "bakery"]}

TP = 0  # True positives
for i in range(len(predictions)):
    predicted_values = [j[1].lower() for j in predictions[i][0]]
    true_label = label_mappings[predictions[i][1]]

    TP += 1 if set(predicted_values) & set(true_label) else 0

precision = TP / len(predictions)
precision  # On the top 3 considering the fact that many miss classifications happen for the fact that the labels are more accurate for the dogs (some samples in the training set are not chihuahuas) and that Muffins do not have a real label.

## 1.2 - Fine tuning the model
We follow the following guide: https://keras.io/guides/transfer_learning/

### 1.2.1 - Model definition

In [4]:
from models.structure.base_model_wrapper import BaseModelWrapper
from models.structure.augmentation_wrapper import InvertedChannelsAugmentationWrapper
import keras
from keras.src import Functional

# https://keras.io/guides/transfer_learning/#the-typical-transferlearning-workflow 
# With augmentation just to permute here
class XceptionAugmented(BaseModelWrapper):
    latest_xception_model: Functional

    def make_layers(self, input_shape: (int, int, int)) -> tuple[keras.Layer, keras.Layer]:
        C, W, H = input_shape

        inputs = keras.Input(input_shape)

        x = keras.layers.Permute((2, 3, 1))(inputs)
        x = keras.layers.Rescaling(scale=1 / 2, offset=-1)(x)

        self.latest_xception_model = keras.applications.Xception(
            weights='imagenet', include_top=False, input_shape=(W, H, C)
        )

        self.latest_xception_model.trainable = False
        x = self.latest_xception_model(x, training=False)
        x = keras.layers.GlobalAveragePooling2D()(x)

        outputs = keras.layers.Dense(1, activation='sigmoid')(x)
        return inputs, outputs

### 1.2.2 - Training and evaluation only on appended structure

In [5]:
xception_aug = XceptionAugmented()
model = xception_aug.make_model((3, 299, 299))
# Default configuration for the Xception model learning
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
model.summary()

In [6]:
history = model.fit(train_dataloader, epochs=5, validation_data=validation_dataloader, callbacks=[
    # To persist the history
    keras.callbacks.CSVLogger(f"{project_definition['name']}_train.csv", separator=",", append=True),
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=2, verbose=1, mode='min')
])

Epoch 1/5
[1m  1/237[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:39[0m 422ms/step - accuracy: 0.5625 - loss: 0.7136

  outputs = tnn.conv2d(


[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 257ms/step - accuracy: 0.9629 - loss: 0.2153 - val_accuracy: 0.9926 - val_loss: 0.0439
Epoch 2/5
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 263ms/step - accuracy: 0.9934 - loss: 0.0367 - val_accuracy: 0.9968 - val_loss: 0.0326
Epoch 3/5
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 264ms/step - accuracy: 0.9942 - loss: 0.0245 - val_accuracy: 0.9947 - val_loss: 0.0262
Epoch 4/5
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 257ms/step - accuracy: 0.9952 - loss: 0.0190 - val_accuracy: 0.9968 - val_loss: 0.0254
Epoch 5/5
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 256ms/step - accuracy: 0.9986 - loss: 0.0104 - val_accuracy: 0.9958 - val_loss: 0.0229


In [7]:
res = model.evaluate(test_dataloader)
print(f"Test accuracy is {res[1] * 100:.2f}% while loss is {res[0]}")

[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 220ms/step - accuracy: 0.9975 - loss: 0.0141
Test accuracy is 99.49% while loss is 0.01716923713684082


### 1.2.3 - Free the network and final training 

In [8]:
xception_aug.latest_xception_model.trainable = True
model.compile(optimizer=keras.optimizers.Adam(1e-5), loss="binary_crossentropy", metrics=["accuracy"])

history = model.fit(train_dataloader, epochs=5, validation_data=validation_dataloader)

Epoch 1/5


OutOfMemoryError: Exception encountered when calling SeparableConv2D.call().

[1mCUDA out of memory. Tried to allocate 18.00 MiB. GPU 0 has a total capacity of 7.75 GiB of which 51.25 MiB is free. Process 6502 has 962.00 MiB memory in use. Including non-PyTorch memory, this process has 5.41 GiB memory in use. Of the allocated memory 4.98 GiB is allocated by PyTorch, and 225.60 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)[0m

Arguments received by SeparableConv2D.call():
  • inputs=torch.Tensor(shape=torch.Size([16, 19, 19, 728]), dtype=float32)

## 1.3 - K Fold Cross Validation

# 2- VGG-16
https://arxiv.org/abs/1409.1556

## 2.1 - Data loading for VGG-16

In [None]:
from torch.utils.data import DataLoader
from dataset.k_fold_dataset_wrapper import KFoldDatasetWrapper

from dataset.dataset_loader import dataset_loader

train, test = dataset_loader((224, 224), is_grayscale=False)
dataset_split_controller = KFoldDatasetWrapper(5)
dataset_split_controller.load_data(train)

local_train, validation = dataset_split_controller.get_data_for_fold(0)

train_dataloader = DataLoader(dataset=local_train, batch_size=16, shuffle=True)
validation_dataloader = DataLoader(dataset=validation, batch_size=16, shuffle=True)
test_dataloader = DataLoader(dataset=test, batch_size=16, shuffle=True)

## 2.1 -Model evaulation without training

In [None]:
untouched_vgg16 = keras.applications.VGG16(weights='imagenet')

In [None]:
def evaluate_vgg16(image, vgg16, verbose: bool = False):
    local_image = torch.permute(image, (1, 2, 0))
    local_image = numpy.expand_dims(local_image, 0)

    return keras.applications.vgg16.decode_predictions(vgg16.predict(local_image, verbose=verbose), top=3)[0]

In [None]:
import torch
import plotly.express as px
import numpy as np

# todo make this a method. We repeat it 3 times like idiots
# We look at 8 samples directly
VISUALIZE_SAMPLES: int = 8
image_list: list = []

evaluations: list[tuple] = []

for i in torch.rand(VISUALIZE_SAMPLES):
    image = test[int(i * len(test))][0]
    evaluations.append((evaluate_vgg16(image, untouched_vgg16), test[int(i * len(test))][1]))
    image_list.append(torch.permute(image, (1, 2, 0)))

fig = px.imshow(np.array(image_list), binary_string=True, facet_col=0, facet_col_wrap=4)
fig.show()

for i in range(len(evaluations)):
    print(
        f"For facet {i} VGG16 has prediceted: {[i[1] for i in evaluations[i][0]]} while the true label is {label_mappings[evaluations[i][1]]}")

In [None]:
import torch
import keras

predictions: list[tuple[list, int]] = [(evaluate_vgg16(i[0], untouched_vgg16), i[1]) for i in test]

In [None]:
label_mappings = {0: ["chihuahua", "dog"], 1: ["muffin", "bakery"]}
TP = 0  # True positives
for i in range(len(predictions)):
    predicted_values = [j[1].lower() for j in predictions[i][0]]
    true_label = label_mappings[predictions[i][1]]

    TP += 1 if set(predicted_values) & set(true_label) else 0

precision = TP / len(predictions)
precision  # On the top 3 considering the fact that many miss classifications happen for the fact that the labels are more accurate for the dogs (some samples in the training set are not chihuahuas) and that Muffins do not have a real label.

## 2.2 - Fine tuning the model

### 2.2.1 -  Model definition

In [None]:
from models.structure.base_model_wrapper import BaseModelWrapper
from models.structure.augmentation_wrapper import InvertedChannelsAugmentationWrapper
import keras


# https://keras.io/guides/transfer_learning/#the-typical-transferlearning-workflow 
# With augmentation just to permute here
class VGG16Custom(BaseModelWrapper):
    latest_model: Functional

    def make_layers(self, input_shape: (int, int, int)) -> tuple[keras.Layer, keras.Layer]:
        C, W, H = input_shape
        inputs = keras.Input(input_shape)

        x = keras.applications.mobilenet.preprocess_input(inputs, data_format=self.data_format.value)
        x = keras.layers.Permute((2, 3, 1))(x)

        self.latest_model = keras.applications.VGG16(
            weights='imagenet', include_top=False, input_shape=(W, H, C)
        )

        self.latest_model.trainable = False
        x = self.latest_model(x, training=False)
        x = keras.layers.GlobalAveragePooling2D()(x)

        outputs = keras.layers.Dense(1, activation='sigmoid')(x)
        return inputs, outputs

### 2.2.2. - Training and evaluation on frozen base model

In [None]:
vgg_custom_wrapper = VGG16Custom()
model = vgg_custom_wrapper.make_model((3, 224, 224))
# Default configuration for the Xception model learning
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
model.summary()

In [None]:
history = model.fit(train_dataloader, epochs=20, validation_data=validation_dataloader, callbacks=[])

In [None]:
res = model.evaluate(test_dataloader)
print(f"Test accuracy is {res[1] * 100:.2f}% while loss is {res[0]}")

### 2.2.3 - Fine tuning on the whole structure 

In [None]:
vgg_custom_wrapper.latest_model.trainable = True
model.compile(optimizer=keras.optimizers.Adam(1e-5), loss="binary_crossentropy", metrics=["accuracy"])

history = model.fit(train_dataloader, epochs=10, validation_data=validation_dataloader)

## 2.3 - K fold CV

In [None]:
k_fold_controller = dataset_split_controller
results = []
for i in range(k_fold_controller.k):
    # Delete previous model in memory todo

    local_train, local_test = dataset_split_controller.get_data_for_fold(i)

    train_dataloader = DataLoader(dataset=local_train, batch_size=16, shuffle=True)
    test_dataloader = DataLoader(dataset=local_test, batch_size=16, shuffle=True)

    # Do we want to early stop? If so we need to split train further to have a validation split.
    model = vgg_custom_wrapper.make_model((3, 224, 224))
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

    model.fit(train_dataloader, epochs=20, callbacks=[])
    results.append(model.evaluate(test_dataloader))

In [None]:
from torch.utils.data import DataLoader
from dataset.k_fold_dataset_wrapper import KFoldDatasetWrapper

from dataset.dataset_loader import dataset_loader

train, test = dataset_loader((224, 224), is_grayscale=False)
dataset_split_controller = KFoldDatasetWrapper(5)
dataset_split_controller.load_data(train)

local_train, validation = dataset_split_controller.get_data_for_fold(0)

train_dataloader = DataLoader(dataset=local_train, batch_size=16, shuffle=True)
validation_dataloader = DataLoader(dataset=validation, batch_size=16, shuffle=True)
test_dataloader = DataLoader(dataset=test, batch_size=16, shuffle=True)