In [None]:
import copy
import os
import random
import sys

import kagglehub
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
from PIL import Image
from sklearn.metrics import cohen_kappa_score, precision_score, recall_score, accuracy_score
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from torchvision.transforms.functional import to_pil_image
from tqdm import tqdm


In [None]:
from google.colab import drive

drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
# Download latest version
path = kagglehub.dataset_download("mariaherrerot/aptos2019")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/mariaherrerot/aptos2019?dataset_version_number=3...


100%|██████████| 8.01G/8.01G [06:50<00:00, 20.9MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/mariaherrerot/aptos2019/versions/3


In [None]:
batch_size = 24
num_classes = 5  # 5 DR levels
learning_rate = 0.0001
num_epochs = 4

In [None]:
preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
class APTOSDataset(Dataset):
    def __init__(self, ann_file, image_dir, transform=None, test=False):
        self.ann_file = ann_file
        self.image_dir = image_dir
        self.transform = transform

        self.test = test

        self.data = self.load_data()

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        return self.get_item(index)

    # 1. single image
    def load_data(self):
        df = pd.read_csv(self.ann_file)

        data = []
        for _, row in df.iterrows():
            file_info = dict()
            file_info['img_path'] = os.path.join(self.image_dir, row['id_code'] + ".png")
            file_info['diagnosis'] = row['diagnosis']
            data.append(file_info)
        return data

    def get_item(self, index):
        data = self.data[index]
        img = Image.open(data['img_path']).convert('RGB')
        if self.transform:
            img = self.transform(img)

        if not self.test:
            label = torch.tensor(data['diagnosis'], dtype=torch.int64)
            return img, label
        else:
            return img

In [None]:
def compute_metrics(preds, labels, per_class=False):
    kappa = cohen_kappa_score(labels, preds, weights='quadratic')
    accuracy = accuracy_score(labels, preds)
    precision = precision_score(labels, preds, average='weighted', zero_division=0)
    recall = recall_score(labels, preds, average='weighted', zero_division=0)

    # Calculate and print precision and recall for each class
    if per_class:
        precision_per_class = precision_score(labels, preds, average=None, zero_division=0)
        recall_per_class = recall_score(labels, preds, average=None, zero_division=0)
        return kappa, accuracy, precision, recall, precision_per_class, recall_per_class

    return kappa, accuracy, precision, recall

In [None]:
def evaluate_model(model, test_loader, device, test_only=False, prediction_path='./test_predictions.csv'):
    model.eval()

    all_preds = []
    all_labels = []
    all_image_ids = []

    with tqdm(total=len(test_loader), desc=f'Evaluating', unit=' batch', file=sys.stdout) as pbar:
        for i, data in enumerate(test_loader):

            if test_only:
                images = data
            else:
                images, labels = data

            if not isinstance(images, list):
                images = images.to(device)  # single image case
            else:
                images = [x.to(device) for x in images]  # dual images case

            with torch.no_grad():
                outputs = model(images)
                preds = torch.argmax(outputs, 1)

            if not isinstance(images, list):
                # single image case
                all_preds.extend(preds.cpu().numpy())
                image_ids = [
                    os.path.basename(test_loader.dataset.data[idx]['img_path']) for idx in
                    range(i * test_loader.batch_size, i * test_loader.batch_size + len(images))
                ]
                all_image_ids.extend(image_ids)
                if not test_only:
                    all_labels.extend(labels.numpy())
            else:
                # dual images case
                for k in range(2):
                    all_preds.extend(preds.cpu().numpy())
                    image_ids = [
                        os.path.basename(test_loader.dataset.data[idx][f'img_path{k + 1}']) for idx in
                        range(i * test_loader.batch_size, i * test_loader.batch_size + len(images[k]))
                    ]
                    all_image_ids.extend(image_ids)
                    if not test_only:
                        all_labels.extend(labels.numpy())

            pbar.update(1)

    # Save predictions to csv file for Kaggle online evaluation
    if test_only:
        df = pd.DataFrame({
            'ID': all_image_ids,
            'TARGET': all_preds
        })
        df.to_csv(prediction_path, index=False)
        print(f'[Test] Save predictions to {os.path.abspath(prediction_path)}')
    else:
        metrics = compute_metrics(all_preds, all_labels)
        return metrics

In [None]:
def train_model(model, train_loader, val_loader, device, criterion, optimizer, lr_scheduler, num_epochs=25,
                checkpoint_path='model.pth'):
    best_model = model.state_dict()
    best_epoch = None
    best_val_kappa = -1.0  # Initialize the best kappa score
    kappas = np.zeros(num_epochs)

    for epoch in range(1, num_epochs + 1):
        print(f'\nEpoch {epoch}/{num_epochs}')
        running_loss = []
        all_preds = []
        all_labels = []

        model.train()

        with tqdm(total=len(train_loader), desc=f'Training', unit=' batch', file=sys.stdout) as pbar:
            for images, labels in train_loader:
                if not isinstance(images, list):
                    images = images.to(device)  # single image case
                else:
                    images = [x.to(device) for x in images]  # dual images case

                labels = labels.to(device)

                optimizer.zero_grad()

                outputs = model(images)
                loss = criterion(outputs, labels.long())

                loss.backward()
                optimizer.step()

                preds = torch.argmax(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

                running_loss.append(loss.item())

                pbar.set_postfix({'lr': f'{optimizer.param_groups[0]["lr"]:.1e}', 'Loss': f'{loss.item():.4f}'})
                pbar.update(1)

        lr_scheduler.step()

        epoch_loss = sum(running_loss) / len(running_loss)

        train_metrics = compute_metrics(all_preds, all_labels, per_class=True)
        kappa, accuracy, precision, recall = train_metrics[:4]

        print(f'[Train] Kappa: {kappa:.4f} Accuracy: {accuracy:.4f} '
              f'Precision: {precision:.4f} Recall: {recall:.4f} Loss: {epoch_loss:.4f}')

        if len(train_metrics) > 4:
            precision_per_class, recall_per_class = train_metrics[4:]
            for i, (precision, recall) in enumerate(zip(precision_per_class, recall_per_class)):
                print(f'[Train] Class {i}: Precision: {precision:.4f}, Recall: {recall:.4f}')

        # Evaluation on the validation set at the end of each epoch
        val_metrics = evaluate_model(model, val_loader, device)
        val_kappa, val_accuracy, val_precision, val_recall = val_metrics[:4]
        print(f'[Val] Kappa: {val_kappa:.4f} Accuracy: {val_accuracy:.4f} '
              f'Precision: {val_precision:.4f} Recall: {val_recall:.4f}')
        kappas[epoch - 1] = val_kappa

        if val_kappa > best_val_kappa:
            best_val_kappa = val_kappa
            best_epoch = epoch
            best_model = model.state_dict()
            torch.save(best_model, checkpoint_path)

    print(f'[Val] Best kappa: {best_val_kappa:.4f}, Epoch {best_epoch}')

    return model, kappas

In [None]:
def train_and_save(model_tuple):
  name, model = model_tuple
  train_dataset = APTOSDataset(path + '/train_1.csv', path + '/train_images/train_images/', preprocess)
  val_dataset = APTOSDataset(path + '/valid.csv', path + '/val_images/val_images/', preprocess)
  test_dataset = APTOSDataset(path + '/test.csv', path + '/test_images/test_images/', preprocess, test=True)

  # Create dataloaders
  train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
  val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
  test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

  # Define the weighted CrossEntropyLoss
  criterion = nn.CrossEntropyLoss()

  # Use GPU device is possible
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  print('Device:', device)

  # Move class weights to the device
  model = model.to(device)

  # Optimizer and Learning rate scheduler
  optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate)
  lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

  # Train and evaluate the model with the training and validation set
  model, kappas = train_model(
      model, train_loader, val_loader, device, criterion, optimizer,
      lr_scheduler=lr_scheduler, num_epochs=num_epochs,
      checkpoint_path='./model_aptos-2019-pretrained_{}.pth'.format(name)
  )
  return model, kappas

In [None]:
# Create datasets
resnet18 = ("ResNET", models.resnet18(weights=models.ResNet18_Weights.DEFAULT))
vgg16 = ("VGG", models.vgg16(weights=models.VGG16_Weights.DEFAULT))
densenet161 = ("DenseNET", models.densenet161(weights=models.DenseNet161_Weights.DEFAULT))

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 224MB/s]
Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:02<00:00, 211MB/s]
Downloading: "https://download.pytorch.org/models/densenet161-8d451a50.pth" to /root/.cache/torch/hub/checkpoints/densenet161-8d451a50.pth
100%|██████████| 110M/110M [00:20<00:00, 5.61MB/s]


In [None]:
train_and_save(resnet18)

Device: cuda

Epoch 1/10
Training: 100%|██████████| 123/123 [05:54<00:00,  2.88s/ batch, lr=1.0e-04, Loss=1.0650]
[Train] Kappa: 0.0201 Accuracy: 0.7328 Precision: 0.7522 Recall: 0.7328 Loss: 1.2734
[Train] Class 0: Precision: 0.9465, Recall: 0.9372
[Train] Class 1: Precision: 0.5616, Recall: 0.4100
[Train] Class 2: Precision: 0.6397, Recall: 0.7339
[Train] Class 3: Precision: 0.4205, Recall: 0.2403
[Train] Class 4: Precision: 0.4132, Recall: 0.2137
[Train] Class 5: Precision: 0.0000, Recall: 0.0000
[Train] Class 6: Precision: 0.0000, Recall: 0.0000
[Train] Class 7: Precision: 0.0000, Recall: 0.0000
[Train] Class 8: Precision: 0.0000, Recall: 0.0000
[Train] Class 9: Precision: 0.0000, Recall: 0.0000
[Train] Class 10: Precision: 0.0000, Recall: 0.0000
[Train] Class 11: Precision: 0.0000, Recall: 0.0000
[Train] Class 12: Precision: 0.0000, Recall: 0.0000
[Train] Class 13: Precision: 0.0000, Recall: 0.0000
[Train] Class 14: Precision: 0.0000, Recall: 0.0000
[Train] Class 15: Precision: 0.

(ResNet(
   (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
   (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
   (relu): ReLU(inplace=True)
   (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
   (layer1): Sequential(
     (0): BasicBlock(
       (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
       (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
       (relu): ReLU(inplace=True)
       (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
       (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     )
     (1): BasicBlock(
       (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
       (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
       (relu): ReLU

In [None]:
train_and_save(vgg16)

Device: cuda

Epoch 1/4
Training: 100%|██████████| 123/123 [05:49<00:00,  2.84s/ batch, lr=1.0e-04, Loss=0.1651]
[Train] Kappa: 0.8085 Accuracy: 0.7587 Precision: 0.7402 Recall: 0.7587 Loss: 0.6529
[Train] Class 0: Precision: 0.9448, Recall: 0.9665
[Train] Class 1: Precision: 0.4843, Recall: 0.4100
[Train] Class 2: Precision: 0.6167, Recall: 0.7884
[Train] Class 3: Precision: 0.4848, Recall: 0.2078
[Train] Class 4: Precision: 0.4091, Recall: 0.1923
Evaluating: 100%|██████████| 16/16 [00:47<00:00,  3.00s/ batch]
[Val] Kappa: 0.7960 Accuracy: 0.7650 Precision: 0.7132 Recall: 0.7650

Epoch 2/4
Training: 100%|██████████| 123/123 [05:48<00:00,  2.84s/ batch, lr=1.0e-04, Loss=0.4721]
[Train] Kappa: 0.8632 Accuracy: 0.8007 Precision: 0.7880 Recall: 0.8007 Loss: 0.5117
[Train] Class 0: Precision: 0.9757, Recall: 0.9784
[Train] Class 1: Precision: 0.5869, Recall: 0.5067
[Train] Class 2: Precision: 0.6821, Recall: 0.8391
[Train] Class 3: Precision: 0.4545, Recall: 0.2273
[Train] Class 4: Precisi

(VGG(
   (features): Sequential(
     (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (1): ReLU(inplace=True)
     (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (3): ReLU(inplace=True)
     (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
     (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (6): ReLU(inplace=True)
     (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (8): ReLU(inplace=True)
     (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
     (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (11): ReLU(inplace=True)
     (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (13): ReLU(inplace=True)
     (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (15): ReLU(inplace=True)
     (16): MaxPool2d(kernel_size=2, stride=2, pa

In [None]:
train_and_save(densenet161)

Device: cuda

Epoch 1/4
Training: 100%|██████████| 123/123 [06:08<00:00,  3.00s/ batch, lr=1.0e-04, Loss=3.1172]
[Train] Kappa: 0.0293 Accuracy: 0.7481 Precision: 0.7620 Recall: 0.7481 Loss: 1.1436
[Train] Class 0: Precision: 0.9519, Recall: 0.9379
[Train] Class 1: Precision: 0.5524, Recall: 0.4567
[Train] Class 2: Precision: 0.6561, Recall: 0.7413
[Train] Class 3: Precision: 0.4535, Recall: 0.2532
[Train] Class 4: Precision: 0.4364, Recall: 0.3077
[Train] Class 5: Precision: 0.0000, Recall: 0.0000
[Train] Class 6: Precision: 0.0000, Recall: 0.0000
[Train] Class 7: Precision: 0.0000, Recall: 0.0000
[Train] Class 8: Precision: 0.0000, Recall: 0.0000
[Train] Class 9: Precision: 0.0000, Recall: 0.0000
[Train] Class 10: Precision: 0.0000, Recall: 0.0000
[Train] Class 11: Precision: 0.0000, Recall: 0.0000
[Train] Class 12: Precision: 0.0000, Recall: 0.0000
[Train] Class 13: Precision: 0.0000, Recall: 0.0000
[Train] Class 14: Precision: 0.0000, Recall: 0.0000
[Train] Class 15: Precision: 0.0

(DenseNet(
   (features): Sequential(
     (conv0): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
     (norm0): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (relu0): ReLU(inplace=True)
     (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
     (denseblock1): _DenseBlock(
       (denselayer1): _DenseLayer(
         (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
         (relu1): ReLU(inplace=True)
         (conv1): Conv2d(96, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
         (norm2): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
         (relu2): ReLU(inplace=True)
         (conv2): Conv2d(192, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
       )
       (denselayer2): _DenseLayer(
         (norm1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=T