In [26]:

import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torch.utils.tensorboard import SummaryWriter
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
import torch.onnx

In [27]:
from torchvision import transforms

# Transformation avancée pour augmenter la robustesse du modèle
train_transform = transforms.Compose([
    transforms.RandomAffine(degrees=15, translate=(0.15, 0.15), scale=(0.9, 1.1)),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Utilise la transformation avancée pour le dataset d'entraînement
training_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=train_transform,
)

test_data = datasets.MNIST(
    root="data",
    train=False,
    download=True,
    transform=test_transform,
)

In [28]:
batch_size = 64

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64


In [29]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

# Define MLP model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

# Define CNN model
class CnnNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_stack = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.flatten = nn.Flatten()
        self.fc_stack = nn.Sequential(
            nn.Linear(64 * 7 * 7, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.conv_stack(x)
        x = self.flatten(x)
        logits = self.fc_stack(x)
        return logits

# Instanciation des deux modèles
mlp_model = NeuralNetwork().to(device)
cnn_model = CnnNetwork().to(device)
print('MLP Model:')
print(mlp_model)
print('CNN Model:')
print(cnn_model)

Using mps device
MLP Model:
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)
CNN Model:
CnnNetwork(
  (conv_stack): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc_stack): Sequential(
    (0): Linear(in_features=3136, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=10, bias=True)
  )
)


In [30]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(cnn_model.parameters(), lr=1e-3)

In [31]:
# Crée un writer distinct pour chaque modèle
writers = {
    'MLP': SummaryWriter("runs/fashion_mnist_experiment/MLP"),
    'CNN': SummaryWriter("runs/fashion_mnist_experiment/CNN")
}

def train(dataloader, model, loss_fn, optimizer, writer, epoch=None):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Log images to TensorBoard for the first batch of each epoch
        if batch == 0 and epoch is not None:
            # Take up to 16 images for visualization
            img_grid = X[:16]
            writer.add_images('train_images', img_grid, global_step=epoch)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        writer.add_scalar('training loss', loss.item(), batch + epoch * len(dataloader))

        if batch % 100 == 0:
            loss_val, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss_val:>7f}  [{current:>5d}/{size:>5d}]")
        

def test(dataloader, model, loss_fn, writer, epoch=None):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    if epoch is not None:
        writer.add_scalar('test loss', test_loss, epoch)
        writer.add_scalar('accuracy', correct, epoch)
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [32]:
epochs = 5  # Augmente le nombre d'époques pour un meilleur apprentissage
# Boucle sur les deux modèles pour l'entraînement et le test
models = [(mlp_model, 'MLP'), (cnn_model, 'CNN')]
for model, name in models:
    print(f"\n===== Entraînement et test du modèle {name} =====")
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)  # Utilise Adam pour une meilleure convergence
    writer = writers[name]  # Utilise le writer spécifique au modèle
    for t in range(epochs):
        print(f"Epoch {t+1}\n-------------------------------")
        train(train_dataloader, model, loss_fn, optimizer, writer, epoch=t)
        test(test_dataloader, model, loss_fn, writer, epoch=t)
    print(f"Done with {name}!")
    writer.flush()
    writer.close()


===== Entraînement et test du modèle MLP =====
Epoch 1
-------------------------------
loss: 2.308396  [   64/60000]
loss: 2.308396  [   64/60000]
loss: 0.715185  [ 6464/60000]
loss: 0.715185  [ 6464/60000]
loss: 0.530478  [12864/60000]
loss: 0.530478  [12864/60000]
loss: 0.358542  [19264/60000]
loss: 0.358542  [19264/60000]
loss: 0.331555  [25664/60000]
loss: 0.331555  [25664/60000]
loss: 0.264621  [32064/60000]
loss: 0.264621  [32064/60000]
loss: 0.316998  [38464/60000]
loss: 0.316998  [38464/60000]
loss: 0.309012  [44864/60000]
loss: 0.309012  [44864/60000]
loss: 0.366680  [51264/60000]
loss: 0.366680  [51264/60000]
loss: 0.456481  [57664/60000]
loss: 0.456481  [57664/60000]
Test Error: 
 Accuracy: 93.1%, Avg loss: 0.203356 

Epoch 2
-------------------------------
loss: 0.385536  [   64/60000]
Test Error: 
 Accuracy: 93.1%, Avg loss: 0.203356 

Epoch 2
-------------------------------
loss: 0.385536  [   64/60000]
loss: 0.291534  [ 6464/60000]
loss: 0.291534  [ 6464/60000]
loss: 0.

In [33]:
# Sauvegarde du modèle CNN
torch.save(cnn_model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")

# Exporte le modèle CNN en ONNX pour l'inférence côté navigateur
onnx_path = "model.onnx"
# Récupère le device du modèle CNN
cnn_device = next(cnn_model.parameters()).device
# Crée le dummy_input sur le même device que le modèle
dummy_input = torch.randn(1, 1, 28, 28, device=cnn_device)
# S'assure que le modèle est bien sur le bon device
cnn_model = cnn_model.to(cnn_device)
torch.onnx.export(
    cnn_model,
    dummy_input,
    onnx_path,
    input_names=['input'],
    output_names=['output'],
    dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}},
    opset_version=11
)
print(f"Modèle CNN exporté en ONNX : {onnx_path}")

Saved PyTorch Model State to model.pth
Modèle CNN exporté en ONNX : model.onnx


In [34]:
# Définir et charger le modèle CNN avant l'inférence
model = CnnNetwork().to(device)
model.load_state_dict(torch.load("model.pth"))
model.eval()

classes = [
    "0", "1", "2", "3", "4", "5", "6", "7", "8", "9"
]

# Prédiction sur un exemple du jeu de test
x, y = test_data[0][0], test_data[0][1]
with torch.no_grad():
    x = x.to(device).unsqueeze(0)  # Add batch dimension
    pred = model(x)
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f'Predicted: "{predicted}", Actual: "{actual}"')

Predicted: "7", Actual: "7"


In [35]:
# Envoie un batch d'images MNIST à TensorBoard (onglet Image)
writer = SummaryWriter("runs/mnist_images_demo")
examples = enumerate(test_dataloader)
batch_idx, (example_data, example_targets) = next(examples)
# On prend les 16 premières images du batch
img_grid = example_data[:16]
writer.add_images('MNIST_images', img_grid, global_step=0)
writer.close()
print("Batch d'images MNIST envoyé à TensorBoard (onglet Image)")

# Envoie un batch d'images MNIST à TensorBoard (onglet Image) pour MLP
writer_mlp = SummaryWriter("runs/mnist_images_demo/MLP")
examples = enumerate(test_dataloader)
batch_idx, (example_data, example_targets) = next(examples)
img_grid = example_data[:16]
writer_mlp.add_images('MNIST_images', img_grid, global_step=0)
writer_mlp.close()
print("Batch d'images MNIST (MLP) envoyé à TensorBoard (onglet Image)")

# Envoie un batch d'images MNIST à TensorBoard (onglet Image) pour CNN
writer_cnn = SummaryWriter("runs/mnist_images_demo/CNN")
examples = enumerate(test_dataloader)
batch_idx, (example_data, example_targets) = next(examples)
img_grid = example_data[:16]
writer_cnn.add_images('MNIST_images', img_grid, global_step=0)
writer_cnn.close()
print("Batch d'images MNIST (CNN) envoyé à TensorBoard (onglet Image)")

Batch d'images MNIST envoyé à TensorBoard (onglet Image)
Batch d'images MNIST (MLP) envoyé à TensorBoard (onglet Image)
Batch d'images MNIST (CNN) envoyé à TensorBoard (onglet Image)


# Modèles MLP et CNN pour la classification des chiffres MNIST

Les deux modèles suivants sont adaptés pour la classification des chiffres manuscrits du dataset MNIST.

In [36]:
# Modèle MLP pour MNIST
class MNISTMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.layers = nn.Sequential(
            nn.Linear(28*28, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        return self.layers(x)

mlp_model = MNISTMLP().to(device)
print(mlp_model)

MNISTMLP(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (layers): Sequential(
    (0): Linear(in_features=784, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=10, bias=True)
  )
)


In [37]:
# Modèle CNN pour MNIST
class MNISTCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_stack = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.flatten = nn.Flatten()
        self.fc_stack = nn.Sequential(
            nn.Linear(64 * 7 * 7, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.conv_stack(x)
        x = self.flatten(x)
        return self.fc_stack(x)

cnn_model = MNISTCNN().to(device)
print(cnn_model)

MNISTCNN(
  (conv_stack): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc_stack): Sequential(
    (0): Linear(in_features=3136, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=10, bias=True)
  )
)


# Modèle CNN optimisé pour MNIST avec MaxPool2d, Dropout, BatchNorm

Ce modèle CNN utilise MaxPool2d, Dropout et BatchNorm pour améliorer la robustesse et la généralisation sur MNIST.

In [38]:
class OptimizedMNISTCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_stack = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Dropout(0.25),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Dropout(0.25)
        )
        self.flatten = nn.Flatten()
        self.fc_stack = nn.Sequential(
            nn.Linear(64 * 7 * 7, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.conv_stack(x)
        x = self.flatten(x)
        return self.fc_stack(x)

optimized_cnn_model = OptimizedMNISTCNN().to(device)
print(optimized_cnn_model)

OptimizedMNISTCNN(
  (conv_stack): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Dropout(p=0.25, inplace=False)
    (5): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (9): Dropout(p=0.25, inplace=False)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc_stack): Sequential(
    (0): Linear(in_features=3136, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.5, inplace=False)
    (4): Linear(in_features=128, out_features=10, bias=True)
 

# Modèle MLP optimisé pour MNIST avec Dropout et BatchNorm

Ce modèle MLP utilise Dropout et BatchNorm pour améliorer la robustesse et la généralisation sur MNIST.

In [39]:
class OptimizedMNISTMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.layers = nn.Sequential(
            nn.Linear(28*28, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        return self.layers(x)

optimized_mlp_model = OptimizedMNISTMLP().to(device)
print(optimized_mlp_model)

OptimizedMNISTMLP(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (layers): Sequential(
    (0): Linear(in_features=784, out_features=256, bias=True)
    (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.3, inplace=False)
    (4): Linear(in_features=256, out_features=128, bias=True)
    (5): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Dropout(p=0.3, inplace=False)
    (8): Linear(in_features=128, out_features=10, bias=True)
  )
)


In [42]:
from PIL import Image, ImageOps
import numpy as np
import os

def preprocess_drawn_image(img_path):
    # Ouvre l'image, convertit en niveaux de gris, redimensionne à 28x28
    img = Image.open(img_path).convert('L').resize((28, 28), Image.LANCZOS)
    # Inverse les couleurs (MNIST: blanc sur noir)
    img = ImageOps.invert(img)
    # Convertit en numpy array et normalise entre 0 et 1
    img = np.array(img) / 255.0
    # Applique la normalisation MNIST
    img = (img - 0.1307) / 0.3081
    # Ajoute les dimensions batch et channel
    img = torch.tensor(img, dtype=torch.float32).unsqueeze(0).unsqueeze(0)
    return img

# Exemple d'utilisation avec une image dessinée (remplacez 'mon_dessin.png' par votre fichier)
img_path = 'trois.png'
if os.path.exists(img_path):
    input_tensor = preprocess_drawn_image(img_path).to(device)

    # Prédiction
    model.eval()
    with torch.no_grad():
        output = model(input_tensor)
        pred = output.argmax(1).item()
        print(f"Chiffre prédit pour le dessin : {pred}")
else:
    print(f"Le fichier '{img_path}' est introuvable. Veuillez fournir un chemin d'image valide.")


Chiffre prédit pour le dessin : 3


## Prétraitement côté web pour de bonnes prédictions MNIST

Pour que le modèle fonctionne aussi bien sur le site web que dans le notebook, appliquez ce prétraitement à l'image du canvas avant de l'envoyer au modèle ONNX :

```javascript
function preprocessCanvasImage(canvas) {
    // Redimensionne à 28x28
    const tmp = document.createElement('canvas');
    tmp.width = 28;
    tmp.height = 28;
    const ctx = tmp.getContext('2d');
    ctx.drawImage(canvas, 0, 0, 28, 28);

    // Récupère les données RGBA
    const imgData = ctx.getImageData(0, 0, 28, 28).data;
    const arr = [];
    for (let i = 0; i < 28 * 28; i++) {
        const r = imgData[i * 4];
        const g = imgData[i * 4 + 1];
        const b = imgData[i * 4 + 2];
        // Convertit en niveaux de gris
        let gray = (r + g + b) / 3 / 255;
        // Inverse (si fond blanc, chiffre noir)
        gray = 1 - gray;
        // Normalise comme MNIST
        gray = (gray - 0.1307) / 0.3081;
        arr.push(gray);
    }
    // Retourne un Float32Array de shape [1, 1, 28, 28]
    return new ort.Tensor('float32', arr, [1, 1, 28, 28]);
}
```

- Redimensionnez à 28x28
- Convertissez en niveaux de gris
- Inversez les couleurs (si besoin)
- Appliquez la normalisation MNIST
- Envoyez le tenseur au modèle ONNX avec la forme `[1, 1, 28, 28]`

Cela garantit des résultats cohérents entre le site web et le notebook.