# Special Project

Group 42 

Quang Long Ho NGO, 310781

In [None]:
# Imports 


import os
import copy
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn

from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

from sklearn.metrics import accuracy_score, f1_score
import json

from torch.utils.tensorboard import SummaryWriter # For Tensorboard
from torchvision.transforms import v2

torch.manual_seed(0)

# Image segmentation with DLIB

# Classification and Feature extration with ResNet and a custom classification layer

We are using a pre-trained Resnet which extract the features using a CNN. Since we are using a version that was trained on IMAGENET-1K, the classification is not usable as it is for our task. The original classification layer was designed to classify between 1000 different classes however we only have 16 classes in our dataset. 

The motivation to use a pre-trained model is that since our dataset is too small, we can't trained it in order for it to pickup all the required details about a coin. It is easier to use a CNN that was trained to distinguish between objects in general and then fine-tune it to coin detection. 

We will be using Pytorch as our machine learning framework. The first step is to create a usable dataset for Pytorch's DataLoaders. When loading the dataset, it is important to resize the images to the right dimensions (224x224) and normalize the images with the values it was used during pre-training (mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]). We also add transforms such as random rotation, color jitter and random erasing to improve the performance on the validation set by fighting overfitting on the training set. 

In [None]:
class CoinDataset(Dataset):
    """DHMC dataset using 2 classes"""

    def __init__(self, features_path : str, label_path, transform : bool = False) -> None:
        """
        Attributes:
            raw_data (list of dict): (M) List of M slides raw data as dictionaries. 
            train (bool): True if data are the training set. False otherwise
            
        Args:
            features_path (str): The path to the features file
            train (bool): Whether it is the training dataset or not
        """
        
        super().__init__()

        self.raw_data = []
        data_json = None
        with open(label_path, 'r') as f:
            data_json = json.load(f)
        for x in data_json:
            filename = x["filename"]
            img_path = f"{features_path}/{filename}"
            img = Image.open(img_path)
            self.raw_data.append({"image_features": img, "label": int(x["value"])})
        
        if transform:
            self.transform = v2.Compose([
                v2.Resize(224),
                v2.RandomRotation(degrees=(0, 300)),
                v2.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.4, hue=0.3),
                v2.ToTensor(),
                v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                v2.RandomErasing(p=0.5)
                ])
        else:
            self.transform = v2.Compose([
                v2.Resize(224),
                v2.CenterCrop(224),
                v2.ToTensor(),
                v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ])

    def __len__(self) -> int:
        """Returns the length of the dataset

        Returns:
            int: The length M of the dataset
        """

        n_data = len(self.raw_data)
        return n_data
    
    def __getitem__(self, index : int):
        """Returns the entry at index from the dataset

        Args:
            index (int): the requested entry index of the dataset

        Returns:
            features (torch.Tensor): (N, d) Feature tensor of the selected slide with N patches and d feature dimensions
            label (int): Ground truth label {0, ..., n_classes}
            wsi_id (str): Name of the WSI as "DHMC_xxx" where xxx is a unique id of the slide (train == False only)
            coordinates (torch.Tensor): (N, 2) xy coordinates of the N patches of the selected slide (train == False only)
        """

        features = None
        label = None

        features = self.raw_data[index]["image_features"]
        label = torch.tensor(self.raw_data[index]["label"])
    
        features = self.transform(features)
        return features, label


We only apply the transforms on the testing dataset. The train loader will use a batch_size of 32 which helps stabilize the training. We also shuffle so that the learning is not biased by the order of the images.

In [None]:
# Load all the img from output folder and make a dataset
train_coinDataset = CoinDataset('./output/', "train_data_split.json", transform=True)
val_coinDataset = CoinDataset('./output/', "val_data_split.json", transform=False)
# trans = tranosforms.Compose([transforms.Resize(256),transforms.ToTensor()])

train_loader = DataLoader(train_coinDataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_coinDataset, batch_size=len(val_coinDataset), shuffle=False)
# dataset = datasets.ImaVgeFolder('./output-1/', transform=transforms.ToTensor())

We then load the resnet152 model with pre-trained weights from IMAGENET1K. Dropout and batch normalization was added to the custom classfication layer to reduce overfitting.

In [None]:
from torchvision.models import resnet152, ResNet152_Weights

model = resnet152(weights=ResNet152_Weights.IMAGENET1K_V1)
model.fc = nn.Sequential(
    nn.Dropout(0.5),
    nn.Linear(2048, 256),
    nn.BatchNorm1d(256),
    nn.ReLU(),
    nn.Linear(256, 16)
)

The model is trained with an Adam optimizer with a learning rate of $1e-4$ and we are using a scheduler to reduce the learning rate if the validation loss is not going down during 10 consecutive epochs. Cross entropy loss is used which is standard for multiple class classification tasks. The model is trained for 25 epoch. 

In [None]:

def train(model, train_loader, val_loader, optimizer, scheduler, criterion, epochs=10):
    
    model.to(device)
    steps = 0
    for epoch in range(epochs):
        model.train()
        for i, (data, target) in enumerate(train_loader):
        
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            
            loss.backward()
            optimizer.step()
            acc = accuracy_score(target, output.argmax(dim=1, keepdim=True))
            writer.add_scalar("Acc/train", acc, steps)
            writer.add_scalar("Loss/train", loss, steps)
            steps+=1
            if i % 10 == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}, Acc: {:.3f}'.format(
                    epoch, i * len(data), len(train_loader.dataset),
                    100. * i / len(train_loader), loss.item(), acc*100))
                

        running_vloss = []
        val_f1micro = []
        val_f1macro = []
        correct = 0
        model.eval()
        with torch.no_grad():
            for i, vdata in enumerate(val_loader):
                vinputs, vlabels = vdata
                voutputs = model(vinputs)
                vloss = criterion(voutputs, vlabels)
                running_vloss.append(vloss)
                pred = voutputs.argmax(dim=1, keepdim=True)
                correct += pred.eq(vlabels.view_as(pred)).sum().item()
                val_f1micro.append(f1_score(pred, vlabels, average='micro'))
                val_f1macro.append(f1_score(pred, vlabels, average='macro'))

        avg_vloss = np.mean(running_vloss)
        avg_vf1micro = np.mean(val_f1micro)
        avg_vf1macro = np.mean(val_f1macro)

        val_accuracy = 100. * correct / len(val_loader.dataset)
        print('Val Epoch: {}\tLoss: {:.6f}, Acc: {:.3f}, F1 micro: {:.3f}'.format(
            epoch, avg_vloss, val_accuracy, avg_vf1micro
        ))

        writer.add_scalar("Loss/val", avg_vloss, (epoch + 1))
        writer.add_scalar("Acc/val", val_accuracy, (epoch + 1))
        writer.add_scalar("F1_micro/val", avg_vf1micro, epoch + 1)
        writer.add_scalar("F1_macro/val", avg_vf1macro, epoch + 1)
        scheduler.step(avg_vloss)
                
# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# Define the scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')

# Log in Tensorboard
writer = SummaryWriter('runs/efficientnet_v2_s_baseline')

train(model, train_loader, val_loader, optimizer, scheduler, nn.CrossEntropyLoss(), epochs=25)
writer.flush()
writer.close()

# Performances analysis

# Submit