In [None]:
from torchvision import datasets, transforms
import torchvision
import torch
import torch.optim as optim
from skorch import NeuralNetClassifier

import seaborn as sns
from itertools import islice
import numpy as np
import matplotlib.pyplot as plt


In [None]:
# add root project to path for imports, this issue is just for notebook environment
import os, sys
from pathlib import Path

def get_project_path(project_name: str) -> Path:
    path = Path(os.path.abspath(''))
    previous_path = None  # Use to prevent infinite loop in case of error
    while path.name != project_name:
        path = path.parent
        if path == previous_path:
            raise ValueError(f"Project '{project_name}' not found in path hierarchy.")
        previous_path = path
    return path

try:
    project_name = "AIProjects"
    project_path = get_project_path(project_name)
    sys.path.append(str(project_path))
except ValueError as e:
    print(f"Project '{project_name}' not found in path hierarchy.")
    try:
        project_name = "machine-learning-portfolio"
        project_path = get_project_path(project_name)
        sys.path.append(str(project_path))
    except ValueError:
        print(f"Project '{project_name}' not found in path hierarchy.")


In [None]:
IMG_HEIGHT = 32
IMG_WIDTH = 32

FIGURES_DIR = Path("figures/cifar10/")
FIGURES_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
# Transformations de base (obligatoires)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.4914, 0.4822, 0.4465),
        std=(0.2470, 0.2435, 0.2616)
    )
])

train_dataset = datasets.CIFAR10(
    root="../data",
    train=True,
    transform=transform,
    download=True
)

test_dataset = datasets.CIFAR10(
    root="../data",
    train=False,
    transform=transform,
    download=True
)

# Visualize some images

In [None]:
X_example, y_example = zip(*islice(iter(train_dataset), 7))

In [None]:
from computer_vision.src.utils import plot_example

plot_example(torch.stack(X_example), y_example, train_dataset.classes, n=7);

# Distribution of classes in the training set

In [None]:
from computer_vision.src.utils import plot_label_distribution

figures_name = FIGURES_DIR / "class_distribution.png"

plot_label_distribution(train_dataset, figures_name)

# Training a baseline model

In [None]:
y_train = np.array([y for x, y in iter(train_dataset)])
y_test = np.array([y for x, y in iter(test_dataset)])

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
from computer_vision.src.baseline import BaselineModel
torch.manual_seed(0)

baseline = NeuralNetClassifier(
    BaselineModel,
    max_epochs=10,
    iterator_train__num_workers=2,
    iterator_valid__num_workers=2,
    lr=0.1,
    device=device,
    module__input_dim=IMG_WIDTH*IMG_HEIGHT*3,
)

In [None]:
baseline.fit(train_dataset, y=y_train);

## Plot training loss

In [None]:
train_loss_history = baseline.history[:, 'train_loss']
val_loss_history = baseline.history[:, 'valid_loss']
sns.lineplot(x=range(1, len(train_loss_history) + 1), y=train_loss_history, label='Train Loss')
sns.lineplot(x=range(1, len(val_loss_history) + 1), y=val_loss_history , label='Validation Loss')
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training Loss over Epochs")
plt.show()

## Evaluating the model

In [None]:
print("Test set Accuracy: {:.2f}%".format(
    100 * baseline.score(test_dataset, y_test)
))

# Use BaseCNN with custom parameters

In [None]:
from utils.BaseCNN import BaseCNN
from sklearn.model_selection import GridSearchCV
from skorch.helper import SliceDataset


cnn = NeuralNetClassifier(
    BaseCNN,
    max_epochs=10,
    lr=0.001,
    optimizer=optim.Adam,
    device=device,
    module__num_classes=10,
    module__img_height=IMG_HEIGHT,
    module__img_width=IMG_WIDTH,
    module__nb_conv_layers=2,
    module__nb_layers=2,
    module__net_width=256,
    module__dropout_rates=[0.25, 0.5],
)


In [None]:
cnn.fit(train_dataset, y_train)

In [None]:
train_loss_history = cnn.history[:, 'train_loss']
valid_loss_history = cnn.history[:, 'valid_loss']

sns.lineplot(x=range(1, len(train_loss_history) + 1), y=train_loss_history, label='Train Loss')
sns.lineplot(x=range(1, len(valid_loss_history) + 1), y=valid_loss_history , label='Validation Loss')
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training Loss over Epochs")
plt.show()

In [None]:
print("Test set Accuracy: {:.2f}%".format(
    100 * cnn.score(test_dataset, y_test)
))

# Hyperparameter Tuning with Grid Search

In [None]:
net = NeuralNetClassifier(
    BaseCNN,
    max_epochs=10,
    lr=0.01,
    train_split=False,

    module__num_classes=10,
    module__img_height=IMG_HEIGHT,
    module__img_width=IMG_WIDTH,
)

In [None]:
params = {
    #'lr': [0.01, 0.02],
    'max_epochs': [10, 20],
    'optimizer': [optim.Adam, optim.SGD],
    'module__nb_conv_layers':[2, 3],
    'module__nb_layers':[2, 3],
    'module__net_width':[128, 256],
    'module__dropout_rates':[[0],[0.25, 0.5]],
}

In [None]:
grid = GridSearchCV(net, params, cv=3, scoring='accuracy', verbose=2, n_jobs=-1)

In [None]:
train_dataset_sliceable = SliceDataset(train_dataset)

In [None]:
grid.fit(train_dataset_sliceable, y_train)

In [None]:
print("Best parameters found: ", grid.best_params_)
print("Best cross-validation accuracy: ", grid.best_score_)

In [None]:
print("Test set accuracy: ", grid.score(SliceDataset(test_dataset), y_test))