In [6]:
import os
import struct
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor  # Import fixé
from torchvision.models import vgg16, alexnet
from sklearn.metrics import accuracy_score, f1_score
import time
import matplotlib.pyplot as plt

# Chemin dataset
data_path = '/kaggle/input/dataset-atelier2'

def load_mnist(path, kind='train'):
    labels_path = os.path.join(path, f'{kind}-labels-idx1-ubyte')
    images_path = os.path.join(path, f'{kind}-images-idx3-ubyte')
    
    with open(labels_path, 'rb') as lbpath:
        magic, n = struct.unpack('>II', lbpath.read(8))
        labels = np.fromfile(lbpath, dtype=np.uint8)
    
    with open(images_path, 'rb') as imgpath:
        magic, num, rows, cols = struct.unpack('>IIII', imgpath.read(16))
        images = np.fromfile(imgpath, dtype=np.uint8).reshape(len(labels), 28, 28)  # Fix: [N, H, W] sans channel dim
    
    return images, labels

train_images, train_labels = load_mnist(data_path, 'train')
test_images, test_labels = load_mnist(data_path, 't10k')

class MNISTDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images.astype(np.float32) / 255.0  # [N, H, W]
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        img = self.images[idx]  # [H, W]
        if self.transform:
            img = self.transform(img)  # ToTensor ajoute [C=1, H, W]
        return img, self.labels[idx]

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

train_set = MNISTDataset(train_images, train_labels, transform)
test_set = MNISTDataset(test_images, test_labels, transform)

train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [7]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)  # Régularisation

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(x)
        x = self.relu(self.conv2(x))
        x = self.pool(x)
        x = x.view(-1, 64 * 7 * 7)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        return self.fc2(x)

cnn_model = SimpleCNN().to(device)
optimizer_cnn = optim.Adam(cnn_model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [8]:
def train_model(model, train_loader, optimizer, criterion, epochs=5):
    model.train()
    losses = []
    for epoch in range(epochs):
        epoch_loss = 0
        for data, targets in train_loader:
            data, targets = data.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        losses.append(epoch_loss / len(train_loader))
        print(f"Epoch {epoch+1}/{epochs}, Loss: {losses[-1]:.4f}")
    return losses

start_time = time.time()
cnn_losses = train_model(cnn_model, train_loader, optimizer_cnn, criterion)
cnn_time = time.time() - start_time

Epoch 1/5, Loss: 0.2358
Epoch 2/5, Loss: 0.0883
Epoch 3/5, Loss: 0.0643
Epoch 4/5, Loss: 0.0546
Epoch 5/5, Loss: 0.0453


In [9]:
def evaluate_model(model, loader):
    model.eval()
    preds = []
    labels = []
    with torch.no_grad():
        for data, targets in loader:
            data = data.to(device)
            outputs = model(data)
            preds.extend(torch.argmax(outputs, dim=1).cpu().numpy())
            labels.extend(targets.numpy())
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='macro')
    return acc, f1

cnn_acc, cnn_f1 = evaluate_model(cnn_model, test_loader)
print(f"CNN - Acc: {cnn_acc:.4f}, F1: {cnn_f1:.4f}, Time: {cnn_time:.2f}s")

CNN - Acc: 0.9927, F1: 0.9927, Time: 33.91s


In [10]:
class MNISTDetection(Dataset):
    def __init__(self, images, labels):
        self.images = images.astype(np.float32) / 255.0
        self.labels = labels
        self.transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        img = self.transform(self.images[idx])
        boxes = torch.tensor([[0, 0, 28, 28]], dtype=torch.float32)
        labels = torch.tensor([self.labels[idx] + 1], dtype=torch.int64)
        target = {'boxes': boxes, 'labels': labels}
        return img, target

train_det = MNISTDetection(train_images, train_labels)
test_det = MNISTDetection(test_images, test_labels)

def collate_fn(batch):
    return tuple(zip(*batch))

train_loader_det = DataLoader(train_det, batch_size=4, shuffle=True, collate_fn=collate_fn)
test_loader_det = DataLoader(test_det, batch_size=4, shuffle=False, collate_fn=collate_fn)

In [None]:
rcnn_model = fasterrcnn_resnet50_fpn(pretrained=True)
in_features = rcnn_model.roi_heads.box_predictor.cls_score.in_features
rcnn_model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 11)
rcnn_model = rcnn_model.to(device)
optimizer_rcnn = optim.Adam(rcnn_model.parameters(), lr=0.001)

def train_rcnn(model, loader, optimizer, epochs=5):
    model.train()
    losses = []
    for epoch in range(epochs):
        epoch_loss = 0
        for images, targets in loader:
            images = [img.to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            loss_dict = model(images, targets)
            loss = sum(loss for loss in loss_dict.values())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        losses.append(epoch_loss / len(loader))
        print(f"Epoch {epoch+1}/{epochs}, Loss: {losses[-1]:.4f}")
    return losses

start_time = time.time()
rcnn_losses = train_rcnn(rcnn_model, train_loader_det, optimizer_rcnn)
rcnn_time = time.time() - start_time

In [None]:
def evaluate_rcnn(model, loader):
    model.eval()
    preds = []
    labels = []
    with torch.no_grad():
        for images, targets in loader:
            images = [img.to(device) for img in images]
            outputs = model(images)
            for out, tgt in zip(outputs, targets):
                if len(out['labels']) > 0:
                    pred = out['labels'][torch.argmax(out['scores'])].item() - 1
                    preds.append(pred)
                    labels.append(tgt['labels'][0].item() - 1)
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='macro')
    return acc, f1

rcnn_acc, rcnn_f1 = evaluate_rcnn(rcnn_model, test_loader_det)
print(f"Faster R-CNN - Acc: {rcnn_acc:.4f}, F1: {rcnn_f1:.4f}, Time: {rcnn_time:.2f}s")

In [None]:
print("Comparaison CNN vs Faster R-CNN:")
print(f"CNN: Acc {cnn_acc:.4f}, F1 {cnn_f1:.4f}, Time {cnn_time:.2f}s")
print(f"Faster R-CNN: Acc {rcnn_acc:.4f}, F1 {rcnn_f1:.4f}, Time {rcnn_time:.2f}s")

In [None]:
vgg_model = vgg16(pretrained=True)
vgg_model.features[0] = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1)
vgg_model.classifier[6] = nn.Linear(vgg_model.classifier[6].in_features, 10)
vgg_model = vgg_model.to(device)
optimizer_vgg = optim.Adam(vgg_model.parameters(), lr=0.001)

start_time = time.time()
vgg_losses = train_model(vgg_model, train_loader, optimizer_vgg, criterion)
vgg_time = time.time() - start_time

vgg_acc, vgg_f1 = evaluate_model(vgg_model, test_loader)
print(f"VGG16 - Acc: {vgg_acc:.4f}, F1: {vgg_f1:.4f}, Time: {vgg_time:.2f}s")

In [None]:
alex_model = alexnet(pretrained=True)
alex_model.features[0] = nn.Conv2d(1, 64, kernel_size=11, stride=4, padding=2)
alex_model.classifier[6] = nn.Linear(alex_model.classifier[6].in_features, 10)
alex_model = alex_model.to(device)
optimizer_alex = optim.Adam(alex_model.parameters(), lr=0.001)

start_time = time.time()
alex_losses = train_model(alex_model, train_loader, optimizer_alex, criterion)
alex_time = time.time() - start_time

alex_acc, alex_f1 = evaluate_model(alex_model, test_loader)
print(f"AlexNet - Acc: {alex_acc:.4f}, F1: {alex_f1:.4f}, Time: {alex_time:.2f}s")

In [None]:
print("Comparaison Tous Modèles Part 1:")
print(f"CNN: Acc {cnn_acc:.4f}, F1 {cnn_f1:.4f}, Time {cnn_time:.2f}s")
print(f"Faster R-CNN: Acc {rcnn_acc:.4f}, F1 {rcnn_f1:.4f}, Time {rcnn_time:.2f}s")
print(f"VGG16: Acc {vgg_acc:.4f}, F1 {vgg_f1:.4f}, Time {vgg_time:.2f}s")
print(f"AlexNet: Acc {alex_acc:.4f}, F1 {alex_f1:.4f}, Time {alex_time:.2f}s")

# Conclusion
print("Conclusion: Fine-tuned VGG16/AlexNet surpassent CNN ; Faster R-CNN overkill pour classification.")

In [None]:
def patchify(images, n_patches):
    n, c, h, w = images.shape
    patch_size = h // n_patches
    patches = torch.zeros(n, n_patches ** 2, c * patch_size ** 2, device=images.device)
    for idx, image in enumerate(images):
        for i in range(n_patches):
            for j in range(n_patches):
                patch = image[:, i * patch_size: (i + 1) * patch_size, j * patch_size: (j + 1) * patch_size]
                patches[idx, i * n_patches + j] = patch.flatten()
    return patches

def get_positional_embeddings(sequence_length, d):
    result = torch.ones(sequence_length, d)
    for i in range(sequence_length):
        for j in range(d):
            result[i][j] = np.sin(i / (10000 ** (j / d))) if j % 2 == 0 else np.cos(i / (10000 ** ((j - 1) / d)))
    return result

class MyViT(nn.Module):
    def __init__(self, n_patches=7, n_blocks=2, hidden_d=8, n_heads=2, out_d=10):
        super().__init__()
        self.n_patches = n_patches
        self.hidden_d = hidden_d
        self.linear_mapper = nn.Linear(1 * (28 // n_patches) ** 2, hidden_d)
        self.class_token = nn.Parameter(torch.rand(1, hidden_d))
        self.pos_embed = nn.Parameter(get_positional_embeddings(n_patches ** 2 + 1, hidden_d))
        self.blocks = nn.ModuleList([MyViTBlock(hidden_d, n_heads) for _ in range(n_blocks)])
        self.mlp = nn.Linear(hidden_d, out_d)

class MyViTBlock(nn.Module):
    def __init__(self, hidden_d, n_heads):
        super().__init__()
        self.norm1 = nn.LayerNorm(hidden_d)
        self.attn = nn.MultiheadAttention(hidden_d, n_heads)
        self.norm2 = nn.LayerNorm(hidden_d)
        self.mlp = nn.Sequential(nn.Linear(hidden_d, hidden_d * 4), nn.GELU(), nn.Linear(hidden_d * 4, hidden_d))

    def forward(self, x):
        x = x + self.attn(self.norm1(x), self.norm1(x), self.norm1(x))[0]
        x = x + self.mlp(self.norm2(x))
        return x

vit_model = MyViT().to(device)
optimizer_vit = optim.Adam(vit_model.parameters(), lr=0.001)

def forward_vit(model, images):
    patches = patchify(images, model.n_patches)
    tokens = model.linear_mapper(patches)
    tokens = torch.stack([torch.vstack((model.class_token, token)) for token in tokens])
    pos_embed = model.pos_embed.repeat(images.shape[0], 1, 1).to(device)
    out = tokens + pos_embed
    for block in model.blocks:
        out = block(out)
    out = out[:, 0]
    return model.mlp(out)

start_time = time.time()
vit_losses = train_model(vit_model, train_loader, optimizer_vit, criterion)
vit_time = time.time() - start_time

vit_acc, vit_f1 = evaluate_model(vit_model, test_loader)
print(f"ViT - Acc: {vit_acc:.4f}, F1: {vit_f1:.4f}, Time: {vit_time:.2f}s")

In [None]:
print("Comparaison ViT vs Part 1:")
print(f"ViT: Acc {vit_acc:.4f}, F1 {vit_f1:.4f}, Time {vit_time:.2f}s")
# Ajoutez autres

# Interprétation
print("ViT capture global features via attention, compétitif avec CNN mais plus compute-intensive sur MNIST.")