In [None]:
from torchvision import datasets, transforms
import torchvision
import torch

In [None]:
# add root project to path for imports, this issue is just for notebook environment
import os, sys
from pathlib import Path

def get_project_path(project_name: str) -> Path:
    path = Path(os.path.abspath(''))
    previous_path = None  # Use to prevent infinite loop in case of error
    while path.name != project_name:
        path = path.parent
        if path == previous_path:
            raise ValueError(f"Project '{project_name}' not found in path hierarchy.")
        previous_path = path
    return path

try:
    project_name = "AIProjects"
    project_path = get_project_path(project_name)
    sys.path.append(str(project_path))
except ValueError as e:
    print(f"Project '{project_name}' not found in path hierarchy.")
    try:
        project_name = "machine-learning-portfolio"
        project_path = get_project_path(project_name)
        sys.path.append(str(project_path))
    except ValueError:
        print(f"Project '{project_name}' not found in path hierarchy.")


In [None]:
# Transformations de base (obligatoires)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.4914, 0.4822, 0.4465),
        std=(0.2470, 0.2435, 0.2616)
    )
])

train_dataset = datasets.CIFAR10(
    root="../data",
    train=True,
    transform=transform,
    download=True
)

test_dataset = datasets.CIFAR10(
    root="../data",
    train=False,
    transform=transform,
    download=True
)

In [None]:
from torch.utils.data import DataLoader

train_loader = DataLoader(
    train_dataset,
    batch_size=64,
    shuffle=True,
    num_workers=2
)

test_loader = DataLoader(
    test_dataset,
    batch_size=64,
    shuffle=False,
    num_workers=2
)

images, labels = next(iter(train_loader))
print(images.shape)   # (64, 3, 32, 32)
print(labels.shape)   # (64,)


# Visualize some images

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import random as rnd

def imshow(img, title):
    img = img / 2 + 0.5  # unnormalize
    npimg = img.numpy()
    plt.title(title)
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

# Get some random training images
rnd.seed(42)
ids = rnd.sample(range(len(train_loader.dataset)), 4)
images = [train_loader.dataset[i][0] for i in ids]
labels = [train_loader.dataset.classes[train_loader.dataset[i][1]] for i in ids]

for i, (im, l)in enumerate(zip(images, labels)):
    imshow(im, l)

# Distribution of classes in the training set

In [None]:
import seaborn as sns

label_counts = dict()
for _, label in train_loader.dataset:
    label_name = train_loader.dataset.classes[label]
    if label_name in label_counts:
        label_counts[label_name] += 1
    else:
        label_counts[label_name] = 1

sns.barplot(x=list(label_counts.keys()), y=list(label_counts.values()))
plt.xticks(rotation=45)
plt.title("Distribution of classes in the training set")
plt.show()

# Training a simple CNN

In [None]:
from computer_vision.src.baseline import BaselineModel
import torch.optim as optim

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BaselineModel().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Training loop
num_epochs = 10
loss_history = []
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        preds = model(images)
        loss = model.compute_loss(preds, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    epoch_loss = running_loss / len(train_loader)
    loss_history.append(epoch_loss)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")

## Plot training loss

In [None]:
sns.lineplot(x=range(1, num_epochs + 1), y=loss_history)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training Loss over Epochs")
plt.show()

# Evaluating the model

In [None]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")

# Use BaseCNN with custom parameters

In [None]:
from utils.BaseCNN import BaseCNN

model = BaseCNN(num_classes=10,
                img_height=32,
                img_width=32,
                nb_conv_layers=2,
                nb_layers=2,
                net_width=512,
                dropout_rates=[0.25, 0.5]).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0005)

In [None]:
num_epochs = 10
loss_history = []
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        preds = model(images)
        loss = model.compute_loss(preds, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    epoch_loss = running_loss / len(train_loader)
    loss_history.append(epoch_loss)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")

In [None]:
sns.lineplot(x=range(1, num_epochs + 1), y=loss_history)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training Loss over Epochs")
plt.show()

In [None]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")

# Finetuning and hyperparameter tuning

In [None]:
# TODO