__DAT341 - Assignment 5 - Group 69__

This notebook is formatted as such :

- First, all the packages are imported and the data is loaded (first cell)
- Then, the utility functions are defined such as :
    - Training function for our models
    - Evaluation function
    - Blind test set prediction function
- Finally, differents models and techniques will be tried in the following cells.

**1 - Preparation**

In [1]:
# Basic packages
import os
import sys
import time
import numpy as np
from tqdm import tqdm

# PyTorch packages
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

# Torchvision packages
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torchvision.utils import make_grid
from torchvision import models

# Importing data
train_dir = 'a5_data/train'
val_dir = 'a5_data/val'
test_dir = 'a5_data/test_blind'

# Transformations, normalization and augmentation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset = ImageFolder(train_dir, transform=transform)
val_dataset = ImageFolder(val_dir, transform=transform)
test_dataset = ImageFolder(test_dir, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

**2 - Utility functions**

In [2]:
class Classifier():
    def __init__(self,model,criterion,optimizer,features_location=None):
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.features_location = features_location

        if self.features_location is not None:
            self.train_features = torch.load(f'{self.features_location}/train.pt')
            self.val_features = torch.load(f'{self.features_location}/val.pt')
            self.test_features = torch.load(f'{self.features_location}/test.pt')
            
        
    # To evaluate our model on the validation set
    def validate(self):
        self.model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for i,(images, labels) in tqdm(enumerate(val_loader)):
                images, labels = images.to(device), labels.to(device)
                if self.features_location is not None:
                    images = self.val_features[i*32:(i*32 + labels.size(0))].to(device)
                outputs = self.model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print(f'Accuracy on validation set: {100 * correct / total}%')
    
    def predict(self,name):
        if not os.path.exists('predictions'):
            os.makedirs('predictions')
        predictions = []
        with torch.no_grad():
            for i,(images, labels) in tqdm(enumerate(test_loader)):
                images, labels = images.to(device), labels.to(device)
                if self.features_location is not None:
                    images = self.test_features[i*32:(i*32 + labels.size(0))].to(device)
                outputs = self.model(images)
                _, predicted = torch.max(outputs.data, 1)
                predictions += ['MEL' if label == 0 else 'NV' for label in predicted]

            # Save the predictions to a file
            with open(f'predictions/{name}.txt', 'w') as f:
                for item in predictions:
                    f.write("%s\n" % item)
                    
    def train(self):
        start_time = time.time()
        # Training loop
        num_epochs = 10
        for epoch in range(num_epochs):
            self.model.train()
            running_loss = 0.0
            for i,(images, labels) in tqdm(enumerate(train_loader), total=len(train_loader)):
                images, labels = images.to(device), labels.to(device)
                if self.features_location is not None:
                    images = self.train_features[i*32:(i*32 + labels.size(0))].to(device)
                self.optimizer.zero_grad()
                outputs = self.model(images)
                loss = self.criterion(outputs, labels)
                loss.backward()
                self.optimizer.step()
                
                running_loss += loss.item()
            print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')

        print(f'Training finished - Elapsed time: {time.time() - start_time} s.')

**3 - Models**

1) Simple CNN - No fancy feature extractor or anything

In [3]:
class SimpleCNN(nn.Module):
  def __init__(self):
    super(SimpleCNN, self).__init__()
    self.conv1 = nn.Conv2d(3, 6, 5)
    self.pool = nn.MaxPool2d(2, 2)
    self.conv2 = nn.Conv2d(6, 16, 5)
    self.fc1 = nn.Linear(16 * 53 * 53, 120)
    self.fc2 = nn.Linear(120, 84)
    self.fc3 = nn.Linear(84, 2)

  def forward(self, x):
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))
    x = x.view(-1, 16 * 53 * 53)
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    return x
    
model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

Classifier1 = Classifier(model, criterion, optimizer)
Classifier1.train()
Classifier1.validate()
Classifier1.predict('simpleCNN')

Epoch 1, Loss: 0.51190748173206
Epoch 2, Loss: 0.4360037534242839
Epoch 3, Loss: 0.41872741860240253
Epoch 4, Loss: 0.40243225526157306
Epoch 5, Loss: 0.39853007787495703
Epoch 6, Loss: 0.3915621171988065
Epoch 7, Loss: 0.3823899925496448
Epoch 8, Loss: 0.3733556007419653
Epoch 9, Loss: 0.3721242454366304
Epoch 10, Loss: 0.36166907209365523
Training finished - Elapsed time: 166.8477909564972 s.
Accuracy on validation set: 80.03194888178913%


2 - ResNet

In [4]:
# Load a pre-trained resnet18 model
resnet = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)

# Freeze it
for param in resnet.parameters():
    param.requires_grad = False
    
# Modify the last layer of ResNet to match the input size of the first linear layer
num_ftrs = resnet.fc.in_features
resnet.fc = nn.Identity()

# Define the sequential model with ResNet and additional linear layers
model = nn.Sequential(
    resnet,
    nn.Linear(num_ftrs, 512),   # First linear layer
    nn.ReLU(),                  # ReLU activation
    nn.Linear(512, 256),        # Second linear layer
    nn.ReLU(),                  # ReLU activation
    nn.Linear(256, 2)           # Output layer with output size of 2
)
model.to(device)

optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

Classifier2 = Classifier(model, criterion, optimizer)
Classifier2.train()
Classifier2.validate()
Classifier2.predict('Resnet18')


Epoch 1, Loss: 0.5383621391965382
Epoch 2, Loss: 0.3876084038125935
Epoch 3, Loss: 0.36049412776581685
Epoch 4, Loss: 0.33995049339325273
Epoch 5, Loss: 0.32777240957638515
Epoch 6, Loss: 0.32433458347225663
Epoch 7, Loss: 0.3182627019004442
Epoch 8, Loss: 0.3093501931769931
Epoch 9, Loss: 0.30165900697755577
Epoch 10, Loss: 0.30813395250495984
Training finished - Elapsed time: 156.12875604629517 s.
Accuracy on validation set: 86.26198083067092%


3 - VGG19

In [5]:
# get the "features" portion of VGG19 (we will not need the "classifier" portion)
vgg = models.vgg19(weights=models.VGG19_Weights.DEFAULT).features

# freeze all parameters
for param in vgg.parameters():
    param.requires_grad_(False)
    
# Define the sequential model with VGG and additional linear layers
model = nn.Sequential(
    vgg,
    nn.Flatten(),               # Flatten the output of VGG
    nn.Linear(25088, 4096),     # First linear layer
    nn.ReLU(),                  # ReLU activation
    nn.Dropout(0.5),            # Dropout with p=0.5
    nn.Linear(4096, 4096),      # Second linear layer
    nn.ReLU(),                  # ReLU activation
    nn.Dropout(0.5),            # Dropout with p=0.5
    nn.Linear(4096, 2)          # Output layer with output size of 2
)
model.to(device)

optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

Classifier3 = Classifier(model, criterion, optimizer)
Classifier3.train()
Classifier3.validate()
Classifier3.predict('VGG19')

Epoch 1, Loss: 0.42754049599170685


KeyboardInterrupt: 

4 - State of the art : DINOv2

In [None]:
dinov2 = torch.hub.load("facebookresearch/dinov2", "dinov2_vits14")

# freeze all parameters
for param in vgg.parameters():
    param.requires_grad_(False)
    
# Define the sequential model with VGG and additional linear layers
model = nn.Sequential(
    dinov2,
    nn.Linear(384, 512),   # First linear layer
    nn.ReLU(),                  # ReLU activation
    nn.Linear(512, 256),        # Second linear layer
    nn.ReLU(),                  # ReLU activation
    nn.Linear(256, 2)           # Output layer with output size of 2
)
model.to(device)

optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

Classifier4 = Classifier(model, criterion, optimizer)
Classifier4.train()
Classifier4.validate()
Classifier4.predict('VGG19')

Using cache found in /home/quoniam/.cache/torch/hub/facebookresearch_dinov2_main
Epoch 1: 100%|██████████| 201/201 [01:05<00:00,  3.05it/s]


Epoch 1, Loss: 0.45611428554674877


Epoch 2: 100%|██████████| 201/201 [01:09<00:00,  2.91it/s]


Epoch 2, Loss: 0.3879722756755293


Epoch 3: 100%|██████████| 201/201 [01:11<00:00,  2.83it/s]


Epoch 3, Loss: 0.37896293735326225


Epoch 4: 100%|██████████| 201/201 [01:05<00:00,  3.06it/s]


Epoch 4, Loss: 0.35757238729231394


Epoch 5: 100%|██████████| 201/201 [01:06<00:00,  3.04it/s]


Epoch 5, Loss: 0.34731337984106436


Epoch 6: 100%|██████████| 201/201 [01:05<00:00,  3.06it/s]


Epoch 6, Loss: 0.344371156312933


Epoch 7: 100%|██████████| 201/201 [01:05<00:00,  3.06it/s]


Epoch 7, Loss: 0.32975243639886676


Epoch 8: 100%|██████████| 201/201 [01:06<00:00,  3.03it/s]


Epoch 8, Loss: 0.3149603682741597


Epoch 9: 100%|██████████| 201/201 [01:06<00:00,  3.03it/s]


Epoch 9, Loss: 0.3091469077180274


Epoch 10: 100%|██████████| 201/201 [01:05<00:00,  3.05it/s]


Epoch 10, Loss: 0.30815220894801676
Training finished - Elapsed time: 667.7542994022369 s.
Accuracy on validation set: 84.42492012779553%


As we can see, this takes time. Let's try to save vgg19 features and then loading them in the training process

In [None]:
def get_features(loader,name,model,model_name):
    if os.path.exists(f'.features/{model_name}/{name}.pt'):
        return torch.load(f'.features/{model_name}/{name}.pt')
    
    features = []
    for batch, _ in tqdm(loader, desc=f'Extracting features from {name}'):
        batch = batch.to(device)
        # Extract the features
        feature_batch = model(batch)
        features.append(feature_batch)
    features = torch.cat(features)
    if not os.path.exists('.features'):
        os.makedirs('.features')
    if not os.path.exists(f'.features/{model_name}'):
        os.makedirs(f'.features/{model_name}')
    torch.save(features,f'.features/{model_name}/{name}.pt')
    return features

train_features = get_features(train_loader,'train',vgg,'vgg')
val_features = get_features(val_loader,'val',vgg,'vgg')
test_features = get_features(test_loader,'test',vgg,'vgg')

# Define the sequential model with VGG and additional linear layers
model = nn.Sequential(
    nn.Flatten(),               # Flatten the output of VGG
    nn.Linear(25088, 4096),     # First linear layer
    nn.ReLU(),                  # ReLU activation
    nn.Dropout(0.5),            # Dropout with p=0.5
    nn.Linear(4096, 4096),      # Second linear layer
    nn.ReLU(),                  # ReLU activation
    nn.Dropout(0.5),            # Dropout with p=0.5
    nn.Linear(4096, 2)          # Output layer with output size of 2
)
model.to(device)

optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

Classifier2 = Classifier(model, criterion, optimizer,'.features/vgg')
Classifier2.train()
Classifier2.validate()
Classifier2.predict('VGG19_saved_features')

KeyboardInterrupt: 