# **Uncertainty Project -- Deep Learning**

---

_Fabio TOCCO, Antoine GUIDON, Yelman YAHI, Anis OUEDGHIRI, Ram NADER_


## Imports


In [None]:
import os
import random
from typing import Literal
from pathlib import Path

import numpy as np
import matplotlib.pyplot as plt

import tools

import torch
import torch.nn as nn
from torch.utils.data import random_split

from torchvision import transforms, datasets
from torchvision.transforms import functional as TF

## Setup


In [None]:
# Project directories
DATA_ROOT = os.path.join(os.path.pardir, "data")
MODELS_ROOT = os.path.join(os.path.pardir, "models")

# Create the directories if they do not exist
os.makedirs(DATA_ROOT, exist_ok=True)
os.makedirs(MODELS_ROOT, exist_ok=True)

# Device configuration
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Selected device: {DEVICE}")

## Hyperparameters (DO NOT CHANGE)


In [None]:
# Training
EPOCHS: int = 10
LEARNING_RATE: float = 1e-4
WEIGHT_DECAY: float = 1e-4
CRITERION: nn.Module = nn.CrossEntropyLoss()
BATCH_SIZE: int = 256

# Performance
NUM_WORKERS: int = (os.cpu_count() or 0) // 2
print(f"NUM_WORKERS: {NUM_WORKERS}")

## Parameters (change for different training)


In [None]:
RESIZE_VALUE: int = 32  # resize dataset images to RESIZE_VALUE x RESIZE_VALUE

FORCE_RETRAIN: bool = False

NORMALIZATION: Literal["MNIST", "ImageNet"] = "MNIST"
SEED: int = 0
SHUFFLE: bool = False

tools.seed_everything(seed=SEED)

## Datasets & DataLoaders


In [None]:
data_transforms = tools.get_data_transforms(
    data_root=DATA_ROOT, resize_value=RESIZE_VALUE, normalization=NORMALIZATION
)

train_data = datasets.MNIST(
    DATA_ROOT,
    train=True,
    download=True,
    transform=data_transforms,
)
print(f"Number of train samples: {len(train_data)}")

test_data = datasets.MNIST(
    DATA_ROOT,
    train=False,
    download=True,
    transform=data_transforms,
)
print(f"Number of test samples: {len(test_data)}")

NUM_CLASSES: int = len(train_data.classes)

# Define the validation set by splitting the training data into 2 subsets (80% training and 20% validation)
n_train_samples = int(len(train_data) * 0.8)
n_validation_samples = len(train_data) - n_train_samples
train_data, validation_data = random_split(
    train_data, [n_train_samples, n_validation_samples]
)

train_loader, validation_loader, test_loader = tools.get_loaders(
    train_data,
    validation_data,
    test_data,
    shuffle=SHUFFLE,
    batch_size=BATCH_SIZE,
    drop_last=True,
    num_workers=NUM_WORKERS,
)

## Random MNIST samples


In [None]:
RANDOM_SAMPLES, RANDOM_SELECTED_INDEXES = tools.get_random_samples(
    test_data, set_size=len(test_data), seed=SEED, num_samples=20
)

## Experience #1


### Version 1 - Random weights


In [None]:
PRETRAINED: bool = False

model_name = tools.get_model_name(
    pretrained=PRETRAINED, seed=SEED, normalization=NORMALIZATION
)
model_dir = os.path.join(MODELS_ROOT, model_name)
os.makedirs(model_dir, exist_ok=True)

In [None]:
# If you have the files of the model, you can only load it or load the metrics to plot them in other cells

v1_model = tools.make_resnet18(NUM_CLASSES, pretrained=PRETRAINED)

OPTIMIZER = torch.optim.Adam(
    v1_model.parameters(),
    lr=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY,
)

v1_model, _, _, _, _ = tools.train_model(
    model=v1_model,
    train_loader=train_loader,
    validation_loader=validation_loader,
    criterion=CRITERION,
    optimizer=OPTIMIZER,
    epochs=EPOCHS,
    device=DEVICE,
    file_path=os.path.join(model_dir, model_name + ".pt"),
    verbose=True,
    save_plots=True,
)

test_loss, test_accuracy = tools.evaluate(
    v1_model, test_loader, criterion=CRITERION, device=DEVICE
)

print(
    f"{model_name} -- Loss on test set: {test_loss:.4f} | Accuracy on test set: {100 * test_accuracy:.2f}%",
)

In [None]:
df_metrics = tools.load_metrics(model_dir)

plt.figure()
plt.plot(
    df_metrics["epoch"],
    df_metrics["train_acc"] * 100,
    label="Train Accuracy",
    linewidth=2,
)
plt.plot(
    df_metrics["epoch"],
    df_metrics["val_acc"] * 100,
    label="Validation Accuracy",
    linewidth=2,
)
plt.xlabel("Epoch", fontsize=12)
plt.ylabel("Accuracy (%)", fontsize=12)
plt.title(
    "Version #1 - Evolution of accuracy over epochs", fontsize=14, fontweight="bold"
)
plt.xticks(df_metrics["epoch"])
plt.legend()
plt.grid()
plt.tight_layout()
plt.show()


plt.figure()
plt.plot(
    df_metrics["epoch"],
    df_metrics["train_loss"],
    label="Train Loss",
    linewidth=2,
)
plt.plot(
    df_metrics["epoch"],
    df_metrics["val_loss"],
    label="Validation Loss",
    linewidth=2,
)
plt.xlabel("Epoch", fontsize=12)
plt.ylabel("Loss", fontsize=12)
plt.title("Version #1 - Evolution of loss over epochs", fontsize=14, fontweight="bold")
plt.xticks(df_metrics["epoch"])
plt.legend()
plt.grid()
plt.tight_layout()
plt.show()

### Version 2 - Pre-trained weights on ImageNet


In [None]:
PRETRAINED: bool = True

model_name = tools.get_model_name(
    pretrained=PRETRAINED, seed=SEED, normalization=NORMALIZATION
)
model_dir = os.path.join(MODELS_ROOT, model_name)
os.makedirs(model_dir, exist_ok=True)

In [None]:
# If you have the files of the model, you can only load it or load the metrics to plot them in other cells

v2_model = tools.make_resnet18(NUM_CLASSES, pretrained=PRETRAINED)

OPTIMIZER = torch.optim.Adam(
    v2_model.parameters(),
    lr=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY,
)

v2_model, _, _, _, _ = tools.train_model(
    model=v2_model,
    train_loader=train_loader,
    validation_loader=validation_loader,
    criterion=CRITERION,
    optimizer=OPTIMIZER,
    epochs=EPOCHS,
    device=DEVICE,
    file_path=os.path.join(model_dir, model_name + ".pt"),
    verbose=True,
    save_plots=True,
)

test_loss, test_accuracy = tools.evaluate(
    v2_model, test_loader, criterion=CRITERION, device=DEVICE
)

print(
    f"{model_name} -- Loss on test set: {test_loss:.4f} | Accuracy on test set: {100 * test_accuracy:.2f}%",
)

In [None]:
df_metrics = tools.load_metrics(model_dir)

plt.figure()
plt.plot(
    df_metrics["epoch"],
    df_metrics["train_acc"] * 100,
    label="Train Accuracy",
    linewidth=2,
)
plt.plot(
    df_metrics["epoch"],
    df_metrics["val_acc"] * 100,
    label="Validation Accuracy",
    linewidth=2,
)
plt.xlabel("Epoch", fontsize=12)
plt.ylabel("Accuracy (%)", fontsize=12)
plt.title(
    "Version #1 - Evolution of accuracy over epochs", fontsize=14, fontweight="bold"
)
plt.xticks(df_metrics["epoch"])
plt.legend()
plt.grid()
plt.tight_layout()
plt.show()


plt.figure()
plt.plot(
    df_metrics["epoch"],
    df_metrics["train_loss"],
    label="Train Loss",
    linewidth=2,
)
plt.plot(
    df_metrics["epoch"],
    df_metrics["val_loss"],
    label="Validation Loss",
    linewidth=2,
)
plt.xlabel("Epoch", fontsize=12)
plt.ylabel("Loss", fontsize=12)
plt.title("Version #1 - Evolution of loss over epochs", fontsize=14, fontweight="bold")
plt.xticks(df_metrics["epoch"])
plt.legend()
plt.grid()
plt.tight_layout()
plt.show()

## Experience #2


In [None]:
fig2 = tools.visualize_predictions(
    model=v2_model,
    samples=RANDOM_SAMPLES,
    device=DEVICE,
    title="Model Version 2 - Pretrained Weights (ImageNet)",
)
plt.show()

In [None]:
# Extract images and labels from RANDOM_SAMPLES
imgs = torch.stack(
    [img for img, _ in RANDOM_SAMPLES]
)  # Shape: (20, 3, RESIZE_SIZE, RESIZE_SIZE)
labels = [label for _, label in RANDOM_SAMPLES]


NOISE_LEVELS: list[float] = [0.1, 0.2, 0.3, 0.4, 0.5]

# Apply different levels of Gaussian noise
for level in NOISE_LEVELS:
    print(f"\n{'=' * 70}")
    print(f"Gaussian Noise Level: {level}")
    print(f"{'=' * 70}\n")

    if level == 0.0:
        noisy_imgs = imgs
    else:
        noisy_imgs = tools.add_pixel_noise(
            imgs,
            level=level,
            gray=True,  # Bruit en niveaux de gris
        )

    # Recréer les samples
    noisy_samples = [(img, label) for img, label in zip(noisy_imgs, labels)]

    # Visualiser
    fig = tools.visualize_predictions(
        v2_model,
        noisy_samples,
        device=DEVICE,
        figsize=(20, 8),
        title=f"Bruit Gaussien (level={level})",
    )
    plt.show()

## Experience #3


In [None]:
# ================================================================
# EXPÉRIENCE 3 — Bruit blanc sur MNIST (Single best vs Ensemble)
#
# Pré-requis AVANT d’exécuter cette cellule :
#   - import tools
#   - variables globales définies : DATA_ROOT, RESIZE_VALUE, NORMALIZATION, SEED, DEVICE
#   - models : liste de 7 chemins .pt (les 7 modèles entraînés sur MNIST)
#   - tools.seed_everything(SEED) a déjà été appelé dans le notebook
#
# Ce que fait cette cellule :
#   1) Charge tes 7 modèles et sélectionne le meilleur sur MNIST test complet
#   2) Sélectionne 20 images du test MNIST (reproductible)
#   3) Applique du bruit blanc gaussien de niveaux : [0.0, 0.2, 0.3, 0.4, 0.5]
#   4) Pour chaque niveau : crée une grille (20 images) avec S:pred_single(conf) et E:pred_ensemble(conf)
#      -> Fichiers sauvegardés : exp3/noise_grid_level_0.0.png ... 0.5.png
#   5) Trace l’accuracy en fonction du bruit (single vs ensemble)
#      -> Fichier : exp3/noise_accuracy_curve.png
# ================================================================

import os, random
from pathlib import Path

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import functional as TF

import numpy as np
import matplotlib.pyplot as plt
import tools

models, model_paths = tools.load_or_train_ensemble(
    num_models=7,
    num_classes=NUM_CLASSES,
    train_loader=train_loader,
    validation_loader=validation_loader,
    test_loader=test_loader,
    criterion=CRITERION,
    epochs=EPOCHS,
    learning_rate=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY,
    batch_size=BATCH_SIZE,
    device=DEVICE,
    models_root=MODELS_ROOT,
    pretrained=PRETRAINED,
    shuffle=SHUFFLE,
    normalization=NORMALIZATION,
    force_retrain=FORCE_RETRAIN,  # True to force retraining
    partial_load=True,  # True to load existing models
    verbose=True,
)

# --------- Config sortie ---------
EXP3_DIR = Path("exp3")
EXP3_DIR.mkdir(exist_ok=True)

# --------- Vérifs ---------
try:
    DEVICE
except NameError:
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

assert "models" in globals() and len(models) == 7, (
    "Il faut définir `models` = liste de 7 chemins .pt"
)

# --------- Data & transforms ---------
tf = tools.get_data_transforms(DATA_ROOT, RESIZE_VALUE, NORMALIZATION)
to_tensor = transforms.ToTensor()  # PIL -> [0,1] tensor
to_pil = transforms.ToPILImage()  # tensor -> PIL

# Datasets
mnist_raw = datasets.MNIST(DATA_ROOT, train=False, download=True, transform=None)
mnist_test = datasets.MNIST(DATA_ROOT, train=False, download=True, transform=tf)

# Loader pour évaluer le meilleur modèle sur le test complet
test_loader_mnist = DataLoader(mnist_test, batch_size=256, shuffle=False, num_workers=0)

# Sélection de 20 indices (reproductible)
rng = random.Random(SEED)
mnist_idx = sorted(rng.sample(range(len(mnist_raw)), 20))
print(f"[Exp3] Indices MNIST (20) : {mnist_idx}")

# --------- Chargement des 7 modèles ---------
nets = []
for p in model_paths:
    net = tools.make_resnet18(num_classes=10, pretrained=False)
    net = tools.load_model(net, file_path=p, device=DEVICE).eval()
    nets.append(net)
print(f"[Exp3] {len(nets)} modèles chargés.")

# --------- Sélection du meilleur modèle (accuracy sur test MNIST complet) ---------
crit = nn.CrossEntropyLoss()
accs = []
for i, m in enumerate(nets, 1):
    loss, acc = tools.evaluate(m, test_loader_mnist, criterion=crit, device=DEVICE)
    accs.append(acc)
    print(f"Model {i}: Loss={loss:.4f}, Acc={acc * 100:.2f}%")
best_i = int(np.argmax(accs))
best_model = nets[best_i]
print(f"[Exp3] Meilleur modèle : model_{best_i + 1} (Acc={accs[best_i] * 100:.2f}%)")


# --------- Helpers ---------
@torch.no_grad()
def softmax_single(model, x):
    return torch.softmax(model(x.to(DEVICE)), dim=1)  # (B,10)


@torch.no_grad()
def softmax_ensemble(model_list, x):
    s = None
    for m in model_list:
        p = softmax_single(m, x)
        s = p if s is None else (s + p)
    return s / len(model_list)


def add_gaussian_noise_pil(pil_img, sigma):
    """Ajoute un bruit gaussien N(0, sigma) canal-par-canal, clampé sur [0,1], puis retourne une PIL."""
    t = to_tensor(pil_img)  # [0,1], (C,H,W)
    if sigma > 0:
        noise = torch.randn_like(t) * sigma
        t = (t + noise).clamp(0.0, 1.0)
    return to_pil(t)


def build_noisy_batch_from_indices(raw_ds, idx_list, sigma):
    """Construit un batch (N,C,H,W) transformé (tf) après ajout de bruit sur PIL, + liste des PIL bruitées et labels."""
    pil_list, labels = [], []
    for idx in idx_list:
        pil, y = raw_ds[idx]
        pil_n = add_gaussian_noise_pil(pil, sigma)
        pil_list.append(pil_n)
        labels.append(int(y))
    x = torch.stack([tf(p) for p in pil_list], dim=0)
    y = torch.tensor(labels, dtype=torch.long)
    return pil_list, x, y


def save_grid(pil_imgs, y_true, pred_s, conf_s, pred_e, conf_e, title, out_path):
    n = len(pil_imgs)
    cols = 10
    rows = (n + cols - 1) // cols
    fig, axes = plt.subplots(rows, cols, figsize=(1.6 * cols, 2.2 * rows))
    axes = axes.flatten()
    for i in range(n):
        ax = axes[i]
        ax.imshow(pil_imgs[i], cmap="gray")
        ax.set_title(
            f"y={y_true[i]}\nS:{pred_s[i]}({conf_s[i]:.2f})  E:{pred_e[i]}({conf_e[i]:.2f})",
            fontsize=7,
        )
        ax.axis("off")
    for j in range(i + 1, len(axes)):
        axes[j].axis("off")
    plt.suptitle(title, fontsize=14, y=1.02)
    plt.tight_layout()
    plt.savefig(out_path, dpi=150, bbox_inches="tight")
    plt.close()


# --------- Expérience : niveaux de bruit ---------
NOISE_LEVELS = [0.0, 0.2, 0.3, 0.4, 0.5]
acc_single, acc_ens = [], []

for sigma in NOISE_LEVELS:
    # Préparer batch bruité
    pil_list, x, y = build_noisy_batch_from_indices(mnist_raw, mnist_idx, sigma)

    # Prédictions
    p_single = softmax_single(best_model, x)
    p_ensemble = softmax_ensemble(nets, x)

    pred_s = p_single.argmax(1).cpu().numpy()
    pred_e = p_ensemble.argmax(1).cpu().numpy()

    conf_s = p_single.max(1).values.cpu().numpy()
    conf_e = p_ensemble.max(1).values.cpu().numpy()

    # Accuracy
    a_s = (pred_s == y.numpy()).mean()
    a_e = (pred_e == y.numpy()).mean()
    acc_single.append(a_s)
    acc_ens.append(a_e)

    # Grilles à sauvegarder
    out_name = EXP3_DIR / f"noise_grid_level_{sigma:.1f}.png"
    save_grid(
        pil_list,
        y.numpy(),
        pred_s,
        conf_s,
        pred_e,
        conf_e,
        title=f"Exp3 — MNIST (20 images) • Bruit σ={sigma:.1f}",
        out_path=out_name,
    )
    print(
        f"[Exp3] Grid sauvegardée → {out_name} | Acc Single={a_s * 100:.1f}%  Ens={a_e * 100:.1f}%"
    )

# --------- Courbe Accuracy vs Niveau de bruit ---------
plt.figure(figsize=(6, 4))
plt.plot(
    NOISE_LEVELS,
    np.array(acc_single) * 100,
    marker="o",
    label="Single (meilleur modèle)",
)
plt.plot(
    NOISE_LEVELS, np.array(acc_ens) * 100, marker="o", label="Ensemble (moy. softmax)"
)
plt.xlabel("Niveau de bruit σ")
plt.ylabel("Accuracy (%)")
plt.title("Exp3 — Accuracy vs. niveau de bruit (MNIST, 20 images)")
plt.ylim(0, 100)
plt.grid(True, alpha=0.3)
plt.legend()
acc_curve_path = EXP3_DIR / "noise_accuracy_curve.png"
plt.tight_layout()
plt.savefig(acc_curve_path, dpi=150)
plt.close()
print(f"[Exp3] Courbe sauvegardée → {acc_curve_path}")

print("\n[Exp3] Terminé. Fichiers générés dans le dossier: exp3/")

## Experience #4


In [None]:
# ================================================================
# EXPÉRIENCE 4 – MNIST vs KMNIST (Single Model vs Ensemble)
#
# Pour que cette cellule fonctionne, il faut AVANT :
#    1. Avoir importé ton module `tools` (qui contient load_model, make_resnet18, get_data_transforms, evaluate, seed_everything, etc.)
#    2. Avoir défini dans le notebook :
#         - DATA_ROOT : chemin du dataset MNIST/KMNIST
#         - RESIZE_VALUE : taille d’entrée (ex: 32 ou 224)
#         - NORMALIZATION : "MNIST" ou "ImageNet"
#         - SEED : entier pour reproductibilité
#         - DEVICE : torch.device("cuda") ou torch.device("cpu")
#         - models : liste contenant les 7 chemins complets des poids (.pt)
#
# Exemple minimal AVANT cette cellule :
#   import tools
#   DATA_ROOT = "./data"
#   RESIZE_VALUE = 32
#   NORMALIZATION = "MNIST"
#   SEED = 0
#   DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#   models = [
#       r"C:\...\resnet18_normMNIST_no-shuffle_seed0_1\resnet18_normMNIST_no-shuffle_seed0_1.pt",
#       ...
#   ]  # 7 fichiers .pt
#   tools.seed_everything(SEED)
#
# Cette cellule :
#    • Charge les 7 modèles
#    • Sélectionne 20 images MNIST et 20 KMNIST
#    • Identifie le meilleur modèle (single) sur le test MNIST complet
#    • Compare prédictions Single vs Ensemble (mean softmax)
#    • Affiche accuracy et sauvegarde 2 grilles d’images (MNIST & KMNIST)
#    • Stocke les résultats dans le dossier "exp4/"
# ================================================================

import os, random, csv
from pathlib import Path

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.transforms.functional import to_pil_image

import numpy as np
import matplotlib.pyplot as plt
import tools

models, model_paths = tools.load_or_train_ensemble(
    num_models=7,
    num_classes=NUM_CLASSES,
    train_loader=train_loader,
    validation_loader=validation_loader,
    test_loader=test_loader,
    criterion=CRITERION,
    epochs=EPOCHS,
    learning_rate=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY,
    batch_size=BATCH_SIZE,
    device=DEVICE,
    models_root=MODELS_ROOT,
    pretrained=PRETRAINED,
    shuffle=SHUFFLE,
    normalization=NORMALIZATION,
    force_retrain=FORCE_RETRAIN,  # True to force retraining
    partial_load=True,  # True to load existing models
    verbose=True,
)

# --------- Config & dossier de sortie ---------
EXP_DIR = Path("exp4")
EXP_DIR.mkdir(exist_ok=True)

try:
    DEVICE
except NameError:
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

assert "models" in globals() and len(models) == 7, (
    "Il faut définir `models` = liste de 7 chemins .pt"
)

# --------- Chargement transforms & datasets ---------
tf = tools.get_data_transforms(DATA_ROOT, RESIZE_VALUE, NORMALIZATION)

mnist_raw = datasets.MNIST(DATA_ROOT, train=False, download=True, transform=None)
kmnist_raw = datasets.KMNIST(DATA_ROOT, train=False, download=True, transform=None)

mnist_test = datasets.MNIST(DATA_ROOT, train=False, download=True, transform=tf)

test_loader_mnist = DataLoader(mnist_test, batch_size=256, shuffle=False)

# --------- Sélection de 20 indices pour MNIST & KMNIST ---------
rng = random.Random(SEED)
mnist_idx = sorted(rng.sample(range(len(mnist_raw)), 20))
kmnist_idx = sorted(rng.sample(range(len(kmnist_raw)), 20))

print(f"[Exp4] Indices MNIST (20)  : {mnist_idx}")
print(f"[Exp4] Indices KMNIST (20) : {kmnist_idx}")

# --------- Chargement des 7 modèles ---------
nets = []
for p in model_paths:
    net = tools.make_resnet18(num_classes=10, pretrained=False)
    net = tools.load_model(net, file_path=p, device=DEVICE).eval()
    nets.append(net)

print(f"[Exp4] {len(nets)} modèles chargés.")

# --------- Trouver le meilleur modèle (sur test MNIST) ---------
crit = nn.CrossEntropyLoss()
accs = []
for i, m in enumerate(nets, 1):
    loss, acc = tools.evaluate(m, test_loader_mnist, criterion=crit, device=DEVICE)
    accs.append(acc)
    print(f"Model {i}: Loss={loss:.4f}, Acc={acc * 100:.2f}%")

best_i = int(np.argmax(accs))
best_model = nets[best_i]
print(f"[Exp4] Meilleur modèle : model_{best_i + 1} (Acc={accs[best_i] * 100:.2f}%)")


# --------- Fonctions de prédiction ---------
@torch.no_grad()
def softmax_single(model, x):
    return torch.softmax(model(x.to(DEVICE)), dim=1)


@torch.no_grad()
def softmax_ensemble(model_list, x):
    s = None
    for m in model_list:
        p = softmax_single(m, x)
        s = p if s is None else s + p
    return s / len(model_list)


def pil_batch(raw_ds, idx_list):
    imgs, labels = [], []
    for idx in idx_list:
        img, y = raw_ds[idx]
        imgs.append(img)
        labels.append(int(y))
    return imgs, torch.tensor(labels)


def pil_to_tensor_batch(pil_list):
    return torch.stack([tf(p) for p in pil_list], dim=0)


# --------- Préparer les 20 images MNIST & KMNIST ---------
mnist_pil, mnist_y = pil_batch(mnist_raw, mnist_idx)
kmnist_pil, kmnist_y = pil_batch(kmnist_raw, kmnist_idx)

mnist_x = pil_to_tensor_batch(mnist_pil)
kmnist_x = pil_to_tensor_batch(kmnist_pil)

# --------- Prédictions modèle seul et ensemble ---------
mnist_p_single = softmax_single(best_model, mnist_x)
mnist_p_ensemble = softmax_ensemble(nets, mnist_x)

kmnist_p_single = softmax_single(best_model, kmnist_x)
kmnist_p_ensemble = softmax_ensemble(nets, kmnist_x)

mnist_pred_s = mnist_p_single.argmax(1).cpu()
mnist_pred_e = mnist_p_ensemble.argmax(1).cpu()

kmnist_pred_s = kmnist_p_single.argmax(1).cpu()
kmnist_pred_e = kmnist_p_ensemble.argmax(1).cpu()

# --------- Accuracy comparée ---------
mnist_acc_s = (mnist_pred_s == mnist_y).float().mean().item()
mnist_acc_e = (mnist_pred_e == mnist_y).float().mean().item()
kmnist_acc_s = (kmnist_pred_s == kmnist_y).float().mean().item()
kmnist_acc_e = (kmnist_pred_e == kmnist_y).float().mean().item()

print(
    f"\n[Exp4] Accuracy sur 20 MNIST  : Single={mnist_acc_s * 100:.2f}% | Ensemble={mnist_acc_e * 100:.2f}%"
)
print(
    f"[Exp4] Accuracy sur 20 KMNIST : Single={kmnist_acc_s * 100:.2f}% | Ensemble={kmnist_acc_e * 100:.2f}%"
)


# --------- Grilles d’images (fonction existante) ---------
def save_grid(pil_imgs, y_true, pred_s, conf_s, pred_e, conf_e, title, out_path):
    n = len(pil_imgs)
    cols = 10
    rows = (n + cols - 1) // cols
    fig, axes = plt.subplots(rows, cols, figsize=(1.6 * cols, 2.2 * rows))
    axes = axes.flatten()
    for i in range(n):
        ax = axes[i]
        ax.imshow(pil_imgs[i], cmap="gray")
        ax.set_title(
            f"y={y_true[i]}\nS:{pred_s[i]}({conf_s[i]:.2f})  E:{pred_e[i]}({conf_e[i]:.2f})",
            fontsize=7,
        )
        ax.axis("off")
    for j in range(i + 1, len(axes)):
        axes[j].axis("off")
    plt.suptitle(title, fontsize=14, y=1.02)
    plt.tight_layout()
    plt.savefig(out_path, dpi=150, bbox_inches="tight")
    plt.close()


mnist_conf_s = mnist_p_single.max(1).values.cpu().numpy()
mnist_conf_e = mnist_p_ensemble.max(1).values.cpu().numpy()

kmnist_conf_s = kmnist_p_single.max(1).values.cpu().numpy()
kmnist_conf_e = kmnist_p_ensemble.max(1).values.cpu().numpy()

mnist_grid = EXP_DIR / "mnist_grid_20.png"
kmnist_grid = EXP_DIR / "kmnist_grid_20.png"

save_grid(
    mnist_pil,
    mnist_y.numpy(),
    mnist_pred_s.numpy(),
    mnist_conf_s,
    mnist_pred_e.numpy(),
    mnist_conf_e,
    "Exp4 — MNIST (20 images)",
    mnist_grid,
)

save_grid(
    kmnist_pil,
    kmnist_y.numpy(),
    kmnist_pred_s.numpy(),
    kmnist_conf_s,
    kmnist_pred_e.numpy(),
    kmnist_conf_e,
    "Exp4 — KMNIST (20 images)",
    kmnist_grid,
)

print(f"[Exp4] Terminé — images sauvegardées dans: {EXP_DIR}/")

## Experience #5


In [None]:
NUM_MODELS: int = 7
PRETRAINED: bool = False

models, model_paths = tools.load_or_train_ensemble(
    num_models=NUM_MODELS,
    num_classes=NUM_CLASSES,
    train_loader=train_loader,
    validation_loader=validation_loader,
    test_loader=test_loader,
    criterion=CRITERION,
    epochs=EPOCHS,
    learning_rate=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY,
    batch_size=BATCH_SIZE,
    device=DEVICE,
    models_root=MODELS_ROOT,
    pretrained=PRETRAINED,
    shuffle=SHUFFLE,
    normalization=NORMALIZATION,
    force_retrain=FORCE_RETRAIN,
    partial_load=True,
    verbose=True,
)

print(f"\nEnsemble prêt avec {len(models)} modèles!")
print(f"Chemins des modèles:")
for i, path in enumerate(model_paths, 1):
    print(f"  {i}. {path}")


ANGLES: list[int] = list(range(0, 361, 10))  # 0, 10, ..., 360
NUM_SAMPLES: int = 20


# Extract images and labels from RANDOM_SAMPLES
imgs = torch.stack([img for img, _ in RANDOM_SAMPLES])  # Shape: (20, 3, 32, 32)
labels = torch.tensor([y for _, y in RANDOM_SAMPLES])  # Shape: (20,)

print(f"Indices sélectionnés: {RANDOM_SELECTED_INDEXES}")
print(f"Batch shape: {imgs.shape}")
print(f"Labels shape: {labels.shape}")
print(f"Labels: {labels.tolist()}\n")


# ----------------------------------------------------------------------------
# 3. FOLLOW 4 SPECIFIC IMAGES FOR DETAILED ANALYSIS
# ----------------------------------------------------------------------------

track_indices = RANDOM_SELECTED_INDEXES[:4]
per_image_data = {
    idx: {
        "label": int(labels[i]),  # True label
        "angle_probs": {},  # Mean probabilities per angle
        "predictions": {},  # Prediction per angle
    }
    for i, idx in enumerate(track_indices)
}

print(f"Images suivies pour analyse détaillée: {track_indices}")
print(f"Labels vrais: {[per_image_data[idx]['label'] for idx in track_indices]}\n")


print("Angle | Accuracy (moyenne softmax sur 7 modèles)")
print("-" * 45)

accuracies = []  # Stocker les accuracies pour plot

for angle in ANGLES:
    rot_imgs = TF.rotate(
        imgs,
        angle,
        interpolation=transforms.InterpolationMode.BILINEAR,
    )  # Shape: (20, 3, 32, 32)

    # Mean predictions over the 7 models
    mean_probs = tools.get_mean_probs_fast(
        rot_imgs, models, device=DEVICE
    )  # Shape: (20, 10)

    preds = mean_probs.argmax(dim=1)  # Shape: (20,)

    correct = (preds.cpu() == labels).sum().item()
    acc = correct / NUM_SAMPLES
    accuracies.append(acc)

    print(f"{angle:5d}° | {acc:.2%}")

    for i, idx in enumerate(track_indices):
        per_image_data[idx]["angle_probs"][angle] = mean_probs[i].cpu().clone()
        per_image_data[idx]["predictions"][angle] = preds[i].item()

print(f"\nÉvaluation terminée sur {len(ANGLES)} angles de rotation.\n")


show_angles = [0, 30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330, 360]

print("\n" + "=" * 70)
print("APERÇU DÉTAILLÉ DES PROBABILITÉS MOYENNES (4 images suivies)")
print("=" * 70)

for idx in track_indices:
    true_y = per_image_data[idx]["label"]
    print(f"\nImage idx={idx} | Vrai label: {true_y}")
    print("-" * 60)
    print(" Angle | Pred | P(pred) | P(vrai) | Correct?")
    print("-" * 60)

    for angle in show_angles:
        if angle not in per_image_data[idx]["angle_probs"]:
            continue

        probs = per_image_data[idx]["angle_probs"][angle]
        pred = per_image_data[idx]["predictions"][angle]

        p_pred = probs[pred].item()
        p_true = probs[true_y].item()
        is_correct = "✓" if pred == true_y else "✗"

        print(f"{angle:6d}° | {pred:4d} | {p_pred:7.3f} | {p_true:7.3f} | {is_correct}")


plt.figure(figsize=(12, 6))
plt.plot(ANGLES, accuracies, marker="o", linewidth=2, markersize=6, color="steelblue")
plt.axhline(y=1.0, color="green", linestyle="--", alpha=0.3, label="Perfect accuracy")
plt.xlabel("Angle de rotation (°)", fontsize=12)
plt.ylabel("Accuracy", fontsize=12)
plt.title(
    "Robustesse de l'ensemble de modèles aux rotations", fontsize=14, fontweight="bold"
)
plt.grid(True, alpha=0.3)
plt.legend()
plt.ylim(0, 1.05)
plt.tight_layout()
plt.show()

print(f"\nAccuracy min: {min(accuracies):.2%} | Accuracy max: {max(accuracies):.2%}")
print(f"Accuracy moyenne: {np.mean(accuracies):.2%}\n")

In [None]:
plt.figure(figsize=(14, 7))

for idx in track_indices:
    label = per_image_data[idx]["label"]
    angles = sorted(per_image_data[idx]["angle_probs"].keys())

    # Probability of the true class at each angle
    probs_true = [per_image_data[idx]["angle_probs"][a][label].item() for a in angles]

    plt.plot(
        angles,
        probs_true,
        marker="o",
        label=f"Chiffre {label} (idx={idx})",
        linewidth=2,
    )

plt.xlabel("Angle de rotation (°)", fontsize=12)
plt.ylabel("P(vraie classe)", fontsize=12)
plt.title(
    "Confiance du modèle sur la vraie classe vs angle de rotation",
    fontsize=14,
    fontweight="bold",
)
plt.legend(title="Vrai label", fontsize=10)
plt.grid(True, alpha=0.3)
plt.ylim(0, 1.05)
plt.axhline(y=0.5, color="red", linestyle="--", alpha=0.3, label="Seuil 50%")
plt.tight_layout()
plt.show()

# ----------------------------------------------------------------------------
# 8. HEATMAP OF CLASS PROBABILITIES VS ROTATION ANGLE (4 TRACKED IMAGES)
# ----------------------------------------------------------------------------

fig, axes = plt.subplots(2, 2, figsize=(16, 12))
axes = axes.flatten()

for i, idx in enumerate(track_indices):
    ax = axes[i]
    label = per_image_data[idx]["label"]

    # Matrix (angle, classe) of probabilities
    angles = sorted(per_image_data[idx]["angle_probs"].keys())
    prob_matrix = np.array(
        [per_image_data[idx]["angle_probs"][a].numpy() for a in angles]
    )  # Shape: (len(angles), 10)

    im = ax.imshow(prob_matrix.T, aspect="auto", cmap="viridis", vmin=0, vmax=1)

    ax.set_xlabel("Angle (index)", fontsize=10)
    ax.set_ylabel("Classe", fontsize=10)
    ax.set_title(f"Image {idx} | Vrai label: {label}", fontsize=12, fontweight="bold")
    ax.set_yticks(range(10))
    ax.set_xticks(range(0, len(angles), 5))
    ax.set_xticklabels([angles[i] for i in range(0, len(angles), 5)])

    # Highlight true class
    ax.axhline(y=label, color="red", linestyle="--", linewidth=2, alpha=0.5)

    plt.colorbar(im, ax=ax, label="Probabilité")

plt.suptitle(
    "Heatmap des probabilités par classe et angle de rotation",
    fontsize=14,
    fontweight="bold",
    y=0.995,
)
plt.tight_layout()
plt.show()

print("\nAnalyse complète terminée!")