In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from sklearn.metrics import accuracy_score, f1_score
import time

# Configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Transformation des données
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Chargement MNIST
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

# Architecture CNN
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64*7*7, 128)
        self.fc2 = nn.Linear(128, 10)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64*7*7)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Entraînement et évaluation
def train_cnn(epochs=10):
    model = CNN().to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    train_times = []
    for epoch in range(epochs):
        start_time = time.time()
        model.train()
        total_loss = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        train_time = time.time() - start_time
        train_times.append(train_time)

        # Évaluation
        model.eval()
        all_preds, all_targets = [], []
        with torch.no_grad():
            for images, labels in test_loader:
                outputs = model(images.to(device))
                preds = outputs.argmax(dim=1)
                all_preds.extend(preds.cpu().numpy())
                all_targets.extend(labels.cpu().numpy())

        accuracy = accuracy_score(all_targets, all_preds)
        f1 = f1_score(all_targets, all_preds, average='macro')
        print(f'Epoch {epoch+1}: Train Loss: {total_loss/len(train_loader):.4f}, Accuracy: {accuracy:.4f}, F1: {f1:.4f}, Time: {train_time:.2f}s')

    avg_time = sum(train_times)/len(train_times)
    print(f'Average epoch time: {avg_time:.2f}s')
    return model

# Exécution
cnn_model = train_cnn()

100%|██████████| 9.91M/9.91M [00:01<00:00, 5.00MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 130kB/s]
100%|██████████| 1.65M/1.65M [00:01<00:00, 1.25MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 6.27MB/s]


Epoch 1: Train Loss: 0.2062, Accuracy: 0.9848, F1: 0.9847, Time: 16.50s
Epoch 2: Train Loss: 0.0786, Accuracy: 0.9870, F1: 0.9868, Time: 15.95s
Epoch 3: Train Loss: 0.0589, Accuracy: 0.9911, F1: 0.9910, Time: 16.31s
Epoch 4: Train Loss: 0.0478, Accuracy: 0.9898, F1: 0.9897, Time: 16.40s
Epoch 5: Train Loss: 0.0402, Accuracy: 0.9918, F1: 0.9917, Time: 15.23s
Epoch 6: Train Loss: 0.0337, Accuracy: 0.9909, F1: 0.9908, Time: 15.27s
Epoch 7: Train Loss: 0.0306, Accuracy: 0.9923, F1: 0.9922, Time: 15.36s
Epoch 8: Train Loss: 0.0261, Accuracy: 0.9923, F1: 0.9922, Time: 15.28s
Epoch 9: Train Loss: 0.0240, Accuracy: 0.9905, F1: 0.9904, Time: 15.46s
Epoch 10: Train Loss: 0.0229, Accuracy: 0.9927, F1: 0.9926, Time: 15.05s
Average epoch time: 15.68s


In [3]:
# Architecture MiniFasterRCNN
class MiniFasterRCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 16, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(32*7*7, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        return self.classifier(x)

# Entraînement spécifique RCNN
def train_frcnn(epochs=5):
    model = MiniFasterRCNN().to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    # Adaptation des données pour RCNN
    def collate_fn(batch):
        images = torch.stack([item[0] for item in batch])
        targets = [{'labels': torch.tensor(item[1], dtype=torch.int64)} for item in batch]
        return images, targets

    frcnn_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)

    for epoch in range(epochs):
        start_time = time.time()
        model.train()
        total_loss = 0

        for images, targets in frcnn_loader:
            images = images.to(device)
            targets = [{k: v.to(device) for k,v in t.items()} for t in targets]

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, torch.stack([t['labels'] for t in targets]))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        # Évaluation simplifiée
        model.eval()
        correct = 0
        with torch.no_grad():
            for images, labels in test_loader:
                outputs = model(images.to(device))
                _, preds = torch.max(outputs, 1)
                correct += (preds == labels.to(device)).sum().item()

        acc = correct / len(test_dataset)
        print(f'Epoch {epoch+1}: Loss {total_loss/len(frcnn_loader):.4f}, Acc: {acc:.4f}, Time: {time.time()-start_time:.2f}s')

    return model

# Exécution
frcnn_model = train_frcnn()

Epoch 1: Loss 0.1305, Acc: 0.9805, Time: 20.27s
Epoch 2: Loss 0.0454, Acc: 0.9874, Time: 20.70s
Epoch 3: Loss 0.0299, Acc: 0.9891, Time: 20.71s
Epoch 4: Loss 0.0231, Acc: 0.9884, Time: 20.57s
Epoch 5: Loss 0.0173, Acc: 0.9897, Time: 20.27s


In [8]:
def compare_models(cnn_model, frcnn_model):
    # Fonction d'évaluation commune
    def evaluate(model, loader):
        model.eval()
        all_preds, all_targets = [], []
        with torch.no_grad():
            for images, labels in loader:
                outputs = model(images.to(device))
                preds = outputs.argmax(dim=1)
                all_preds.extend(preds.cpu().numpy())
                all_targets.extend(labels.cpu().numpy())
        return {
            'accuracy': accuracy_score(all_targets, all_preds),
            'f1': f1_score(all_targets, all_preds, average='macro')
        }

    # Mesure du temps d'inférence
    def benchmark(model, loader):
        model.eval()
        start = time.time()
        with torch.no_grad():
            for images, _ in loader:
                _ = model(images.to(device))
        return (time.time() - start) / len(loader.dataset)

    # Évaluation
    cnn_metrics = evaluate(cnn_model, test_loader)
    frcnn_metrics = evaluate(frcnn_model, test_loader)

    # Benchmark
    cnn_time = benchmark(cnn_model, test_loader)
    frcnn_time = benchmark(frcnn_model, test_loader)

    # Affichage
    print("\nComparaison détaillée:")
    print(f"{'Metric':<15}{'CNN':<15}{'MiniFasterRCNN':<15}")
    print(f"{'Accuracy':<15}{cnn_metrics['accuracy']:<15.4f}{frcnn_metrics['accuracy']:<15.4f}")
    print(f"{'F1 Score':<15}{cnn_metrics['f1']:<15.4f}{frcnn_metrics['f1']:<15.4f}")
    print(f"{'Inference Time':<15}{cnn_time:<15.6f}{frcnn_time:<15.6f}")

# Exécution
compare_models(cnn_model, frcnn_model)


Comparaison détaillée:
Metric         CNN            MiniFasterRCNN 
Accuracy       0.9927         0.9897         
F1 Score       0.9926         0.9896         
Inference Time 0.000356       0.000226       


In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import time


In [16]:
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),  # Convertir MNIST en RGB (3 canaux)
    transforms.Resize((224, 224)),  # Adapter aux modèles pré-entraînés
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalisation ImageNet
])

# Chargement des datasets
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform, download=True)

# Chargement des données
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Vérification du device (GPU si dispo)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Utilisation de :", device)


Utilisation de : cuda


In [17]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64 * 56 * 56, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialisation du modèle
cnn_model = SimpleCNN().to(device)


In [18]:
def train_model(model, train_loader, test_loader, num_epochs=3):
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    start_time = time.time()

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        epoch_acc = correct / total
        print(f'Epoch {epoch+1}/{num_epochs} - Loss: {running_loss/len(train_loader):.4f} - Acc: {epoch_acc:.4f}')

    train_time = time.time() - start_time
    print(f'Training Time: {train_time:.2f}s')

    return train_time


In [19]:

cnn_time = train_model(cnn_model, train_loader, test_loader)


Epoch 1/3 - Loss: 0.2026 - Acc: 0.9472
Epoch 2/3 - Loss: 0.0460 - Acc: 0.9858
Epoch 3/3 - Loss: 0.0253 - Acc: 0.9912
Training Time: 548.61s


In [20]:
def fine_tune_model(model_name, num_epochs=3):
    if model_name == 'vgg16':
        model = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)
        model.classifier[6] = nn.Linear(4096, 10)
    elif model_name == 'alexnet':
        model = models.alexnet(weights=models.AlexNet_Weights.IMAGENET1K_V1)
        model.classifier[6] = nn.Linear(4096, 10)

    model = model.to(device)

    optimizer = optim.AdamW(model.parameters(), lr=0.0001, weight_decay=0.01)
    criterion = nn.CrossEntropyLoss()

    train_loader_p = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader_p = DataLoader(test_dataset, batch_size=64, shuffle=False)

    start_time = time.time()

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for images, labels in train_loader_p:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        epoch_acc = correct / total
        print(f'{model_name} Epoch {epoch+1}/{num_epochs} - Loss: {running_loss/len(train_loader_p):.4f} - Acc: {epoch_acc:.4f}')

    train_time = time.time() - start_time
    print(f'{model_name} Training Time: {train_time:.2f}s')

    return train_time

# Entraînement de VGG16 et AlexNet
vgg_time = fine_tune_model('vgg16')
alex_time = fine_tune_model('alexnet')


vgg16 Epoch 1/3 - Loss: 0.0772 - Acc: 0.9776
vgg16 Epoch 2/3 - Loss: 0.0321 - Acc: 0.9909
vgg16 Epoch 3/3 - Loss: 0.0233 - Acc: 0.9932
vgg16 Training Time: 2939.66s
alexnet Epoch 1/3 - Loss: 0.0802 - Acc: 0.9762
alexnet Epoch 2/3 - Loss: 0.0374 - Acc: 0.9898
alexnet Epoch 3/3 - Loss: 0.0283 - Acc: 0.9919
alexnet Training Time: 507.70s


In [23]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import time

# Configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Transformation des données
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Chargement MNIST
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

class EfficientViT(nn.Module):
    def __init__(self, image_size=28, patch_size=7, num_classes=10, dim=48, depth=2, heads=3):
        super().__init__()
        num_patches = (image_size // patch_size) ** 2
        self.patch_size = patch_size

        # Patch embedding plus efficace
        self.patch_embed = nn.Conv2d(1, dim, kernel_size=patch_size, stride=patch_size)

        # Token de classe et position embedding
        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
        self.pos_embed = nn.Parameter(torch.randn(1, num_patches + 1, dim))

        # Transformer optimisé
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(
                d_model=dim,
                nhead=heads,
                dim_feedforward=dim*2,
                dropout=0.1,
                activation='gelu',
                batch_first=True,
                norm_first=True
            ),
            num_layers=depth
        )

        # Tête de classification
        self.head = nn.Sequential(
            nn.LayerNorm(dim),
            nn.Linear(dim, num_classes)
        )

    def forward(self, x):
        # Embedding des patches
        x = self.patch_embed(x)  # [B, dim, H', W']
        x = x.flatten(2).transpose(1, 2)  # [B, num_patches, dim]

        # Ajout du token [CLS]
        cls_tokens = self.cls_token.expand(x.size(0), -1, -1)
        x = torch.cat((cls_tokens, x), dim=1)

        # Ajout position embedding
        x += self.pos_embed

        # Transformer
        x = self.transformer(x)

        # Classification
        return self.head(x[:, 0])

def train_efficient_vit(epochs=5, batch_size=32):
    # Initialisation
    model = EfficientViT().to(device)
    optimizer = optim.AdamW(model.parameters(), lr=3e-4, weight_decay=0.01)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)
    criterion = nn.CrossEntropyLoss()

    # DataLoader
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

    # Entraînement
    print(f"\nTraining EfficientViT for {epochs} epochs...")
    start_time = time.time()

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        correct = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad(set_to_none=True)

            # Forward + backward
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # Métriques
            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == labels).sum().item()

        scheduler.step()
        epoch_acc = correct / len(train_dataset)
        print(f"Epoch {epoch+1}/{epochs} - Loss: {total_loss/len(train_loader):.4f} - Acc: {epoch_acc:.4f}")

    # Évaluation
    model.eval()
    correct = 0
    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images.to(device))
            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == labels.to(device)).sum().item()

    final_acc = correct / len(test_dataset)
    train_time = time.time() - start_time
    params = sum(p.numel() for p in model.parameters()) / 1e6

    print(f"\nTraining Complete!")
    print(f"- Final Accuracy: {final_acc:.4f}")
    print(f"- Training Time: {train_time:.2f}s")
    print(f"- Parameters: {params:.2f}M")

    return model, final_acc, train_time

# Exécution
vit_model, vit_acc, vit_time = train_efficient_vit()





Training EfficientViT for 5 epochs...
Epoch 1/5 - Loss: 0.6868 - Acc: 0.7847
Epoch 2/5 - Loss: 0.2952 - Acc: 0.9107
Epoch 3/5 - Loss: 0.2315 - Acc: 0.9291
Epoch 4/5 - Loss: 0.2053 - Acc: 0.9369
Epoch 5/5 - Loss: 0.1899 - Acc: 0.9411

Training Complete!
- Final Accuracy: 0.9567
- Training Time: 131.03s
- Parameters: 0.04M
