# Kaggle Project

Deep learning models are widely used for image classification. The goal of this project is to compare state-of-the-art models on a subset of ImageNet dataset.

The link of the project is : https://www.kaggle.com/competitions/modia-ml-2024/overview

**Notice** : You should upload a report, together with your code to Moodle, for the validation of the course. Do not copy-paste other people’s report or code.

**Due** : 27 Juin 2024.

## 1 Learn how to use Pytorch

Refer to the website https://pytorch.org/tutorials/beginner/basics/intro.html.

## 2 Train state-of-the-art CNN models

The goal is to achieve a good classification accuracy on the test dataset, using Pytorch.

 - Implement one or two models from the following list :
    - LeNet Model [1]
    - AlexNet Model [2]
    - ResNet Model [3]

 - In your report, you should give a precise definition of the model which you use, e.g. the number of layers and the type of each layer in CNN.

 - Pre-process your images in black and white

 - Pre-process your images so that they have the same input size to your model, e.g. use data augmentation.

 - Train your model using mini-batch SGD. Specify the optimization method which you use and report the total training time. To reduce the training time, you may use a GPU card.

 - Perform your parameter turning on a validation set to avoid over-fitting. Summarize your results in table/figure.

In [1]:
# Imports
import torch
import torch.nn as nn
import torchvision
import pandas as pd
import numpy as np
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split, Subset
from PIL import Image
import os
from tqdm import tqdm


# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Device: {device}')

Device: cuda


In [2]:
#Loading the dataset and preprocessing
class CustomImageDataset(torch.utils.data.Dataset):
    def __init__(self, labels_file, im_dir, transform=None):
        self.img_labels = pd.read_csv(labels_file)
        self.img_dir = im_dir
        self.transform = transform
        
    def __len__(self):
        return len(self.img_labels)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, str(self.img_labels.iloc[idx, 0]) + ".jpg")
        image = Image.open(img_path).convert("RGB")
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        return image, label

# Image transformations
IMAGE_SIZE = 256
transform = transforms.Compose([
    # Preprocess in black and white, and same size
    transforms.Grayscale(num_output_channels=1),
    transforms.CenterCrop(IMAGE_SIZE),
    
    # Data augmentation
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(10),
    
    transforms.ToTensor(),
    transforms.Normalize(mean=0.5,std=0.5)
])

# Data paths
label_path = './train.csv'
train_dir = './train/'
test_dir = './test/'

# Dataset
train_dataset = CustomImageDataset(labels_file=label_path, im_dir=train_dir, transform=transform)
test_dataset = CustomImageDataset(labels_file=label_path, im_dir=test_dir, transform=transform)

In [3]:
# Création du modèle LeNet
class LeNet(nn.Module):
    def __init__(self, num_channels=1, num_classes=4):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(num_channels, 6, kernel_size=5, padding=2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.fc1   = nn.Linear(62*62*16, 120)
        self.fc2   = nn.Linear(120, 84)
        self.fc3   = nn.Linear(84, num_classes)
    
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), kernel_size=(2, 2), stride=2)
        x = F.max_pool2d(F.relu(self.conv2(x)), kernel_size=(2, 2), stride=2)
        x = nn.Flatten()(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [4]:
# Hyperparameters
num_classes = 4
num_epochs = 10
batch_size = 128
learning_rate = 0.01
train_size = len(train_dataset)*0.8

model = LeNet(num_classes=num_classes)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.001, momentum = 0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5)

# Data loader
train_dataset, val_dataset = random_split(train_dataset, [int(train_size), len(train_dataset)-int(train_size)])
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=True)


In [5]:
# Training function
def train(model, criterion, optimizer):
    model.to(device)
    model.train()
    
    train_losses = []
    train_acc = []
    
    with tqdm(total=len(train_loader), desc="Training", unit="batch") as pbar:
        for i, (image, label) in enumerate(train_loader):
            image = image.to(device)
            label = label.to(device)
            
            # Forward pass
            output = model(image)
            loss_value = criterion(output, label)
            train_losses.append(loss_value)
            
            # Calculate softmax and accuracy
            soft_output = torch.nn.functional.softmax(output, dim=1)
            _, predicted = torch.max(soft_output, 1)
            correct = (predicted == label).sum().item()
            accuracy = correct / label.size(0)
            train_acc.append(accuracy)

            # Update progress bar
            loss_cur = loss_value.item()
            current = i * len(image) + len(image)
            acc_cur = np.mean(train_acc)
            pbar.set_postfix(loss=f"{loss_cur:.7f}", current=f"{current}/{len(train_loader.dataset)}", accuracy=f"{acc_cur:.7f}")

            # Backward and optimize
            optimizer.zero_grad()
            loss_value.backward()
            optimizer.step()
            
            pbar.update(1)

    return np.mean(train_losses), np.mean(train_acc)


    for images, labels in tqdm(train_loader):

        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        train_losses.append(loss.item())
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        # Compute accuracy
        _, argmax = torch.max(outputs, 1)
        accuracy = (labels == argmax).float().mean()
        train_acc.append(accuracy)
            
    print(f"Train Loss: {np.mean(train_losses):.4f}, Train Acc: {np.mean(train_acc):.4f}")
    return np.mean(train_losses), np.mean(train_acc)

# Validation function
def validate(model, criterion):
    model.eval()
    
    val_losses = []
    val_acc = []
    
    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)
            
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_losses.append(loss.item())
            
            # Compute accuracy
            _, argmax = torch.max(outputs, 1)
            accuracy = (labels == argmax).float().mean()
            val_acc.append(accuracy)
                
                
    print(f"Val Loss: {np.mean(val_losses):.4f}, Val Acc: {np.mean(val_acc):.4f}")
    return np.mean(val_losses), np.mean(val_acc)


In [6]:
# Training loop
train_losses = []
val_losses = []
train_acc = []
val_acc = []

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1} of {num_epochs}")
    train_loss, train_accuracy = train(model, criterion, optimizer)
    val_loss, val_accuracy = validate(model, criterion)
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_acc.append(train_accuracy)
    val_acc.append(val_accuracy)
    scheduler.step()
   
torch.save(model.state_dict(), "model")

Epoch 1 of 10


Training:   0%|          | 0/25 [00:00<?, ?batch/s]
/opt/conda/conda-bld/pytorch_1702400410390/work/aten/src/ATen/native/cuda/Loss.cu:250: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [1,0,0] Assertion `t >= 0 && t < n_classes` failed.
/opt/conda/conda-bld/pytorch_1702400410390/work/aten/src/ATen/native/cuda/Loss.cu:250: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [4,0,0] Assertion `t >= 0 && t < n_classes` failed.
/opt/conda/conda-bld/pytorch_1702400410390/work/aten/src/ATen/native/cuda/Loss.cu:250: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [12,0,0] Assertion `t >= 0 && t < n_classes` failed.
/opt/conda/conda-bld/pytorch_1702400410390/work/aten/src/ATen/native/cuda/Loss.cu:250: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [13,0,0] Assertion `t >= 0 && t < n_classes` failed.
/opt/conda/conda-bld/pytorch_1702400410390/work/aten/src/ATen/native/cuda/Loss.cu:250: nll_loss_forward_reduce_cuda_kernel_2d: block: 

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
import pandas as pd
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader, random_split, Subset
from torchvision import transforms, models
import torch.optim as optim
import tqdm
from tqdm.auto import tqdm

from LeNet2 import *

class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = f"{self.img_dir}/{self.img_labels.iloc[idx, 0]}.jpg"
        image = Image.open(img_path)
        #image = read_image(img_path)
        label = self.img_labels.iloc[idx, 1] - 1
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label 

# Lire le fichier CSV
csv_file = 'train.csv'
labels_df = pd.read_csv(csv_file)

# Dataset personnalisé
class CustomDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.labels_df = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.labels_df)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.labels_df.iloc[idx, 0])
        image = Image.open(img_name)
        label = self.labels_df.iloc[idx, 1]

        if self.transform:
            image = self.transform(image)

        return image, label

# Définir les transformations
data_transforms = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    # MODIF : J'ai modifié 224 en 256 dans le Resize
    # MODIF : Et j'ai finalement changé le Resize en CenterCrop
    # transforms.Resize((256, 256)),
    transforms.CenterCrop(256),
    # AJOUT DATA AUGMENTATION (Pour AlexNet, pas testé sur LeNet)
    transforms.RandomRotation(degrees=90),
    transforms.RandomVerticalFlip(),
    transforms.RandomHorizontalFlip(),
    # FIN DATA AUGMENTATION
    transforms.ToTensor(),
    transforms.Normalize(0.5,0.5)
])

# Charger le dataset complet
# MODIF : J'ai commenté la ligne ci-dessous pour mettre un CustomImageDataset 
# dataset = CustomDataset(csv_file=csv_file, root_dir='train/train', transform=data_transforms)
dataset = CustomImageDataset(annotations_file='train.csv', img_dir='train', transform=data_transforms)

# Séparer le dataset en train et validation
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
# MODIF : J'ai commenté la ligne ci-dessous pour mettre un Subset
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
#train_dataset = Subset(dataset, range(train_size))
#val_dataset = Subset(dataset, range(train_size, train_size+val_size))

# Créer des DataLoader pour les ensembles d'entraînement et de validation
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)

# Définir le modèle (utilisation d'un modèle pré-entraîné pour un transfert d'apprentissage)
# MODIF : 3 lignes ci-dessous commentées pour mettre mon propre modèle
#model = models.resnet18(pretrained=True)
#num_ftrs = model.fc.in_features
#model.fc = nn.Linear(num_ftrs, 4)  # Adapter la dernière couche au nombre de classes (ici 4 classes)
#model = LeNet()
model = LeNet2()

# Définir la fonction de perte et l'optimiseur
criterion = nn.CrossEntropyLoss()
#optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
optimizer = optim.Adam(model.parameters(), lr=0.001)


# Entraîner le modèle
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0 # AJOUT
    total = 0   # AJOUT
    for inputs, labels in tqdm(train_loader):
        #print(inputs[0])
        #print(inputs[1])
        optimizer.zero_grad()
        outputs = model(inputs)
        # MODIF : J'avais ajouté les deux lignes suivantes pour voir les sorties (qui étaient toutes identiques pour un batch donnée avec
        # la fonction sigmoïde, j'ai donc remplacé par des ReLu)
        #print(outputs[0]) 
        #print(outputs[16])
        # AJOUT : calcul de la train accuracy
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        # FIN AJOUT
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    
    
    epoch_loss = running_loss / train_size
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')

    print(f'Train Accuracy: {100 * correct / total:.2f}%')

    # Validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Validation Accuracy: {100 * correct / total:.2f}%')

print('Training complete.')

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch 1/10, Loss: 1.3930
Train Accuracy: 28.56%
Validation Accuracy: 32.12%


  0%|          | 0/25 [00:00<?, ?it/s]

Epoch 2/10, Loss: 1.3172
Train Accuracy: 35.34%
Validation Accuracy: 39.50%


  0%|          | 0/25 [00:00<?, ?it/s]

Epoch 3/10, Loss: 1.2788
Train Accuracy: 39.88%
Validation Accuracy: 41.50%


  0%|          | 0/25 [00:00<?, ?it/s]

Epoch 4/10, Loss: 1.2750
Train Accuracy: 40.78%
Validation Accuracy: 45.00%


  0%|          | 0/25 [00:00<?, ?it/s]

Epoch 5/10, Loss: 1.2421
Train Accuracy: 42.78%
Validation Accuracy: 46.88%


  0%|          | 0/25 [00:00<?, ?it/s]

Epoch 6/10, Loss: 1.2194
Train Accuracy: 44.00%
Validation Accuracy: 50.50%


  0%|          | 0/25 [00:00<?, ?it/s]

Epoch 7/10, Loss: 1.2052
Train Accuracy: 45.62%
Validation Accuracy: 48.38%


  0%|          | 0/25 [00:00<?, ?it/s]

Epoch 8/10, Loss: 1.1977
Train Accuracy: 45.81%
Validation Accuracy: 49.00%


  0%|          | 0/25 [00:00<?, ?it/s]

Epoch 9/10, Loss: 1.1712
Train Accuracy: 46.91%
Validation Accuracy: 54.25%


  0%|          | 0/25 [00:00<?, ?it/s]

Epoch 10/10, Loss: 1.1616
Train Accuracy: 48.28%
Validation Accuracy: 53.62%
Training complete.
