In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from sklearn.metrics import accuracy_score, f1_score
import time

# Configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Transformation des données
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Chargement MNIST
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

# Architecture CNN
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64*7*7, 128)
        self.fc2 = nn.Linear(128, 10)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64*7*7)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Entraînement et évaluation
def train_cnn(epochs=10):
    model = CNN().to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    train_times = []
    for epoch in range(epochs):
        start_time = time.time()
        model.train()
        total_loss = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        train_time = time.time() - start_time
        train_times.append(train_time)

        # Évaluation
        model.eval()
        all_preds, all_targets = [], []
        with torch.no_grad():
            for images, labels in test_loader:
                outputs = model(images.to(device))
                preds = outputs.argmax(dim=1)
                all_preds.extend(preds.cpu().numpy())
                all_targets.extend(labels.cpu().numpy())

        accuracy = accuracy_score(all_targets, all_preds)
        f1 = f1_score(all_targets, all_preds, average='macro')
        print(f'Epoch {epoch+1}: Train Loss: {total_loss/len(train_loader):.4f}, Accuracy: {accuracy:.4f}, F1: {f1:.4f}, Time: {train_time:.2f}s')

    avg_time = sum(train_times)/len(train_times)
    print(f'Average epoch time: {avg_time:.2f}s')
    return model

# Exécution
cnn_model = train_cnn()

Epoch 1: Train Loss: 0.2017, Accuracy: 0.9860, F1: 0.9859, Time: 16.70s
Epoch 2: Train Loss: 0.0790, Accuracy: 0.9885, F1: 0.9885, Time: 17.43s
Epoch 3: Train Loss: 0.0564, Accuracy: 0.9904, F1: 0.9903, Time: 17.14s
Epoch 4: Train Loss: 0.0467, Accuracy: 0.9917, F1: 0.9916, Time: 15.27s
Epoch 5: Train Loss: 0.0400, Accuracy: 0.9917, F1: 0.9916, Time: 15.34s
Epoch 6: Train Loss: 0.0345, Accuracy: 0.9927, F1: 0.9926, Time: 15.81s
Epoch 7: Train Loss: 0.0292, Accuracy: 0.9926, F1: 0.9925, Time: 15.35s
Epoch 8: Train Loss: 0.0285, Accuracy: 0.9923, F1: 0.9922, Time: 15.55s
Epoch 9: Train Loss: 0.0236, Accuracy: 0.9931, F1: 0.9930, Time: 15.25s
Epoch 10: Train Loss: 0.0216, Accuracy: 0.9920, F1: 0.9919, Time: 15.92s
Average epoch time: 15.98s


In [12]:
# Architecture MiniFasterRCNN
class MiniFasterRCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 16, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(32*7*7, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        return self.classifier(x)

# Entraînement spécifique RCNN
def train_frcnn(epochs=5):
    model = MiniFasterRCNN().to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    # Adaptation des données pour RCNN
    def collate_fn(batch):
        images = torch.stack([item[0] for item in batch])
        targets = [{'labels': torch.tensor(item[1], dtype=torch.int64)} for item in batch]
        return images, targets

    frcnn_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)

    for epoch in range(epochs):
        start_time = time.time()
        model.train()
        total_loss = 0

        for images, targets in frcnn_loader:
            images = images.to(device)
            targets = [{k: v.to(device) for k,v in t.items()} for t in targets]

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, torch.stack([t['labels'] for t in targets]))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        # Évaluation simplifiée
        model.eval()
        correct = 0
        with torch.no_grad():
            for images, labels in test_loader:
                outputs = model(images.to(device))
                _, preds = torch.max(outputs, 1)
                correct += (preds == labels.to(device)).sum().item()

        acc = correct / len(test_dataset)
        print(f'Epoch {epoch+1}: Loss {total_loss/len(frcnn_loader):.4f}, Acc: {acc:.4f}, Time: {time.time()-start_time:.2f}s')

    return model

# Exécution
frcnn_model = train_frcnn()

Epoch 1: Loss 0.1319, Acc: 0.9840, Time: 20.90s
Epoch 2: Loss 0.0435, Acc: 0.9876, Time: 21.12s
Epoch 3: Loss 0.0312, Acc: 0.9885, Time: 20.09s
Epoch 4: Loss 0.0235, Acc: 0.9845, Time: 20.86s
Epoch 5: Loss 0.0187, Acc: 0.9916, Time: 20.85s


In [13]:
def compare_models(cnn_model, frcnn_model):
    # Fonction d'évaluation commune
    def evaluate(model, loader):
        model.eval()
        all_preds, all_targets = [], []
        with torch.no_grad():
            for images, labels in loader:
                outputs = model(images.to(device))
                preds = outputs.argmax(dim=1)
                all_preds.extend(preds.cpu().numpy())
                all_targets.extend(labels.cpu().numpy())
        return {
            'accuracy': accuracy_score(all_targets, all_preds),
            'f1': f1_score(all_targets, all_preds, average='macro')
        }

    # Mesure du temps d'inférence
    def benchmark(model, loader):
        model.eval()
        start = time.time()
        with torch.no_grad():
            for images, _ in loader:
                _ = model(images.to(device))
        return (time.time() - start) / len(loader.dataset)

    # Évaluation
    cnn_metrics = evaluate(cnn_model, test_loader)
    frcnn_metrics = evaluate(frcnn_model, test_loader)

    # Benchmark
    cnn_time = benchmark(cnn_model, test_loader)
    frcnn_time = benchmark(frcnn_model, test_loader)

    # Affichage
    print("\nComparaison détaillée:")
    print(f"{'Metric':<15}{'CNN':<15}{'MiniFasterRCNN':<15}")
    print(f"{'Accuracy':<15}{cnn_metrics['accuracy']:<15.4f}{frcnn_metrics['accuracy']:<15.4f}")
    print(f"{'F1 Score':<15}{cnn_metrics['f1']:<15.4f}{frcnn_metrics['f1']:<15.4f}")
    print(f"{'Inference Time':<15}{cnn_time:<15.6f}{frcnn_time:<15.6f}")

# Exécution
compare_models(cnn_model, frcnn_model)


Comparaison détaillée:
Metric         CNN            MiniFasterRCNN 
Accuracy       0.9920         0.9916         
F1 Score       0.9919         0.9915         
Inference Time 0.000192       0.000196       


In [16]:
# Modifiez votre fonction fine_tune_model comme suit :
def fine_tune_model(model_name, num_epochs=3):
    try:
        # Chargement avec la nouvelle syntaxe (PyTorch >= 0.13)
        if model_name == 'vgg16':
            model = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)
            model.classifier[6] = nn.Linear(4096, 10)
        elif model_name == 'alexnet':
            model = models.alexnet(weights=models.AlexNet_Weights.IMAGENET1K_V1)
            model.classifier[6] = nn.Linear(4096, 10)

        model = model.to(device)

        # Réduire la taille du batch pour économiser de la mémoire
        train_loader_p = DataLoader(train_pretrain, batch_size=32, shuffle=True)
        test_loader_p = DataLoader(test_pretrain, batch_size=64, shuffle=False)

        # Optimiseur plus léger
        optimizer = optim.AdamW(model.parameters(), lr=0.0001, weight_decay=0.01)
        criterion = nn.CrossEntropyLoss()
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)

        print(f"\nFine-tuning {model_name} (batch_size=32)...")
        start_time = time.time()

        for epoch in range(num_epochs):
            model.train()
            running_loss = 0.0
            correct = 0
            total = 0

            for i, (images, labels) in enumerate(train_loader_p):
                # Libération explicite de la mémoire
                if i % 100 == 0:
                    torch.cuda.empty_cache()

                images, labels = images.to(device), labels.to(device)
                optimizer.zero_grad(set_to_none=True)  # Plus efficace pour la mémoire

                with torch.cuda.amp.autocast():  # Mixed precision
                    outputs = model(images)
                    loss = criterion(outputs, labels)

                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            scheduler.step()
            epoch_acc = correct / total
            print(f'Epoch {epoch+1}/{num_epochs} - Loss: {running_loss/len(train_loader_p):.4f} - Acc: {epoch_acc:.4f}')
            torch.cuda.empty_cache()

        # Évaluation avec garbage collection
        model.eval()
        correct = 0
        with torch.no_grad():
            for images, labels in test_loader_p:
                images = images.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                correct += (predicted == labels.to(device)).sum().item()
                del images, outputs, predicted
                torch.cuda.empty_cache()

        final_acc = correct / len(test_pretrain)
        train_time = time.time() - start_time
        print(f'{model_name} - Final Test Accuracy: {final_acc:.4f} - Training Time: {train_time:.2f}s')
        return final_acc, train_time

    except RuntimeError as e:
        print(f"Erreur avec {model_name}: {str(e)}")
        return 0.0, 0.0

# Configuration pour économiser la mémoire
torch.backends.cudnn.benchmark = True
torch.cuda.empty_cache()

# Exécution séquentielle pour économiser la mémoire
print("Début du fine-tuning...")
vgg_acc, vgg_time = fine_tune_model('vgg16')
alex_acc, alex_time = fine_tune_model('alexnet')

# Si VGG16 échoue toujours, essayer avec une version plus petite
if vgg_acc == 0.0:
    print("\nEssai avec VGG11...")
    model = models.vgg11(weights=models.VGG11_Weights.IMAGENET1K_V1)
    model.classifier[6] = nn.Linear(4096, 10)
    vgg_acc, vgg_time = fine_tune_model('vgg11')

Début du fine-tuning...

Fine-tuning vgg16 (batch_size=32)...


  with torch.cuda.amp.autocast():  # Mixed precision


Epoch 1/3 - Loss: 0.0782 - Acc: 0.9775
Epoch 2/3 - Loss: 0.0145 - Acc: 0.9960
Epoch 3/3 - Loss: 0.0075 - Acc: 0.9981
vgg16 - Final Test Accuracy: 0.9963 - Training Time: 1778.17s


Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [00:05<00:00, 44.7MB/s]



Fine-tuning alexnet (batch_size=32)...
Epoch 1/3 - Loss: 0.0820 - Acc: 0.9751
Epoch 2/3 - Loss: 0.0178 - Acc: 0.9945
Epoch 3/3 - Loss: 0.0108 - Acc: 0.9970
alexnet - Final Test Accuracy: 0.9948 - Training Time: 528.87s


In [18]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import time

# Configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Transformation des données
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Chargement MNIST
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

class EfficientViT(nn.Module):
    def __init__(self, image_size=28, patch_size=7, num_classes=10, dim=48, depth=2, heads=3):
        super().__init__()
        num_patches = (image_size // patch_size) ** 2
        self.patch_size = patch_size

        # Patch embedding plus efficace
        self.patch_embed = nn.Conv2d(1, dim, kernel_size=patch_size, stride=patch_size)

        # Token de classe et position embedding
        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
        self.pos_embed = nn.Parameter(torch.randn(1, num_patches + 1, dim))

        # Transformer optimisé
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(
                d_model=dim,
                nhead=heads,
                dim_feedforward=dim*2,
                dropout=0.1,
                activation='gelu',
                batch_first=True,
                norm_first=True
            ),
            num_layers=depth
        )

        # Tête de classification
        self.head = nn.Sequential(
            nn.LayerNorm(dim),
            nn.Linear(dim, num_classes)
        )

    def forward(self, x):
        # Embedding des patches
        x = self.patch_embed(x)  # [B, dim, H', W']
        x = x.flatten(2).transpose(1, 2)  # [B, num_patches, dim]

        # Ajout du token [CLS]
        cls_tokens = self.cls_token.expand(x.size(0), -1, -1)
        x = torch.cat((cls_tokens, x), dim=1)

        # Ajout position embedding
        x += self.pos_embed

        # Transformer
        x = self.transformer(x)

        # Classification
        return self.head(x[:, 0])

def train_efficient_vit(epochs=5, batch_size=32):
    # Initialisation
    model = EfficientViT().to(device)
    optimizer = optim.AdamW(model.parameters(), lr=3e-4, weight_decay=0.01)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)
    criterion = nn.CrossEntropyLoss()

    # DataLoader
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

    # Entraînement
    print(f"\nTraining EfficientViT for {epochs} epochs...")
    start_time = time.time()

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        correct = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad(set_to_none=True)

            # Forward + backward
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # Métriques
            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == labels).sum().item()

        scheduler.step()
        epoch_acc = correct / len(train_dataset)
        print(f"Epoch {epoch+1}/{epochs} - Loss: {total_loss/len(train_loader):.4f} - Acc: {epoch_acc:.4f}")

    # Évaluation
    model.eval()
    correct = 0
    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images.to(device))
            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == labels.to(device)).sum().item()

    final_acc = correct / len(test_dataset)
    train_time = time.time() - start_time
    params = sum(p.numel() for p in model.parameters()) / 1e6

    print(f"\nTraining Complete!")
    print(f"- Final Accuracy: {final_acc:.4f}")
    print(f"- Training Time: {train_time:.2f}s")
    print(f"- Parameters: {params:.2f}M")

    return model, final_acc, train_time

# Exécution
vit_model, vit_acc, vit_time = train_efficient_vit()

# Charger les résultats CNN si nécessaire
# cnn_model, cnn_acc, cnn_time = ...

# Comparaison
print("\nModel Comparison:")
print(f"{'Model':<15}{'Accuracy':<12}{'Time (s)':<12}{'Params (M)':<12}")
print(f"{'CNN':<15}{cnn_acc:<12.4f}{cnn_time:<12.2f}{sum(p.numel() for p in cnn_model.parameters())/1e6:<12.2f}")
print(f"{'EfficientViT':<15}{vit_acc:<12.4f}{vit_time:<12.2f}{sum(p.numel() for p in vit_model.parameters())/1e6:<12.2f}")




Training EfficientViT for 5 epochs...
Epoch 1/5 - Loss: 0.6906 - Acc: 0.7800
Epoch 2/5 - Loss: 0.3133 - Acc: 0.9042
Epoch 3/5 - Loss: 0.2506 - Acc: 0.9224
Epoch 4/5 - Loss: 0.2192 - Acc: 0.9324
Epoch 5/5 - Loss: 0.2041 - Acc: 0.9379

Training Complete!
- Final Accuracy: 0.9554
- Training Time: 133.88s
- Parameters: 0.04M

Model Comparison:
Model          Accuracy    Time (s)    Params (M)  
CNN            0.9901      53.24       0.42        
EfficientViT   0.9554      133.88      0.04        
