<a href="https://colab.research.google.com/github/adam-lw/Glaucoma-Diagnosis/blob/main/ViT_Base.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch

# Verify that we are using GPU rather than CPU for faster execution
if torch.cuda.is_available():
  device = torch.device("cuda")
  print("Using cuda")
else:
  device = torch.device("cpu")
  print("Using CPU")

Using CPU


In [3]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m105.5 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.13.4-py3-none-any.whl (200 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.1/200.1 kB[0m [31m23.2 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m81.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.13.4 tokenizers-0.13.3 transformers-4.28.1


In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
# Import dependencies

import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import datasets, transforms
import os
import matplotlib.pyplot as plt
from pathlib import Path
import numpy as np
from torch.utils.data.sampler import SubsetRandomSampler
import math
from torch.utils.data import random_split
from tqdm import tqdm
from transformers import ViTModel, ViTConfig, ViTForImageClassification, AutoModel, ViTImageProcessor
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from torchsummary import summary
from torch.utils.tensorboard import SummaryWriter

# VitExperiments
Class for executing various Vision Transformer tests

In [None]:
class ViTExperiments():

    # Initialise datasets upon class creation
    def __init__(self, train_data, test_data, val_data):
      self.train_data = train_data
      self.test_data = test_data
      self.val_data = val_data



## Experiment Running Parameters
### Model Configuration Parameters:

*vitPretrainedModel:* PyTorch nn.Module object containing the architecture and weights of the pre-trained ViT

*vitNumHeads:* Number of Self-Attention Heads. Should be equal to the pre-trained configuration unless we are testing head removal.

*classifierHead:* Classification head to use on top of the ViT feature extractor.

*weightFreezeProfile:* Configuration for various levels of weight freezing to reduce overfitting and increase inference speed.

*embedding_dim:* Number of dimensions in the patch and position embedding, and thus the output size of the ViT before the MLP head.

*num_classes:* Number of classes in the given dataset. Used for the length of the MLP classification head to provide a classification.


### Data Augmentation & Normalisation Parameters:

*train_Layer1Transform:* PyTorch nn.Transform responsible for image processing and data augmentation on overall image.

*train_Layer2Transform:* PyTorch nn.Transform for processing and augmentation on 1st iteration "windowed" images from attention mechanism.

*train_Layer3Transform:* PyTorch nn.Transform for processing and augmentation on 2nd iteration "windowed" images from attention mechanism.

*test_Transform:* PyTorch nn.Transform for image processing on validation and test datasets.

### Training Config:

*lossFunction:* Which loss function to use.

*optimiser:* Which optimiser algorithm to use

*earlyStopThreshold:* The number of consecutive epochs where the validation set performance does not increase before we stop training


### Training Hyperparameters:

*batchSize:* How many images are processed before model weights are updated via backpropagation?

*learningRate:* How quickly/slowly are weights updated during backpropagation?

*dropoutRate:* Rate at which weights are randomly set to 0 during training to avoid overfitting

### Model Saving/Loading:

*savePath:* Path to file in which the model and optimiser's state_dicts are saved

*loadPath:* (optional) Path to file in which model is loaded. This is for if we'd like to load a pre-existing model and continue training

In [1]:
    def runExperiment(self, vitPretrainedModel, vitNumHeads, classifierHead, weightFreezeProfile, embedding_dim, num_classes, \
                      train_Layer1Transform, train_Layer2Transform, train_Layer3Transform, test_transform, \
                      lossFunction, optimiser, earlyStopThreshold, \
                      batchSize, learningRate, dropoutRate):

        # Initialise TensorBoard tracking
        writer = SummaryWriter()


        # Create dataloaders from dataset and provided batchSize hyperparameter
        train_dataloader = DataLoader(self.train_data, batch_size=batchSize, shuffle=True, pin_memory=True)
        val_dataloader = DataLoader(self.val_data, batch_size=128, shuffle=False, pin_memory=True)
        test_dataloader = DataLoader(self.test_data, batch_size=128, shuffle=False, pin_memory=True)

        # TODO: attention head removal

        # TODO: Weight freezing

        # TODO: dropout rates

        # Create model
        vitModel = nn.Sequential(
            vitPretrainedModel,
            BaseModelOutputWithPoolingToTensor(),
            nn.LayerNorm(normalized_shape=embedding_dim),
            classifierHead,
            nn.Softmax(dim=1)
        )

        # Load model onto GPU
        vitModel.to(device)

        # Initialise our optimiser function based on provided experiment parameters
        modelOptimiser = optimiser(vitModel.parameters(), lr=learningRate)


        bestLoss = None # Best loss score on validation dataset 
        lastImprovement = 0 # Epochs since last improvement

        epochNum = 0 # Used for TensorBoard tracking

        # Training loop
        while lastImprovement < earlyStopThreshold:
            
            # Set model to training mode
            vitModel.train()

            # Loop through entire dataset, splitting into batches specified upon creation of train_dataloader
            for batchTensor, labelTensor in tqdm(train_dataloader):
                
                # Ensure data is loaded on GPU
                batchTensor = batchTensor.to(device)
                labelTensor = labelTensor.to(device)
                
                # Reset previously calculated gradients
                modelOptimiser.zero_grad()

                # Calculate predicted labels based on training images
                trainOutput = vitModel(batchTensor)

                # Calculate loss based on difference between predicted and actual labels
                loss = lossFunction(trainOutput, labelTensor)

                # Backward pass: compute gradient of the loss with respect to model parameters
                loss.backward()

                # Perform a single optimization step (parameter update)
                modelOptimiser.step()
            
            # Update TensorBoard
            writer.add_scalar("Training Loss", loss.item(), global_step=epochNum)

            

            # Evaluate model performance on Validation set
            vitModel.eval()

            combinedValidationLoss = 0.0

            # Disable gradient calculations when evaluating
            with torch.no_grad():

                # For each 128 image batch, calculate the loss, and add this to the combined validation loss
                # The number of batches is arbitrary, it has no impact on model performance, but speeds up processing time
                for batchTensor, labelTensor in tqdm(val_dataloader):

                    # Calculate predicted labels on validation set
                    validationOutputs = vitModel(batchTensor.to(device))

                    # Calculate validation loss for the batch
                    validationLoss = lossFunction(validationOutputs, labelTensor)

                    # Add this to the combined total, weighted by the number of images in batch
                    combinedValidationLoss += validationLoss.item() * batchTensor.size(0)
            
            combinedValidationLoss = combinedValidationLoss / len(self.val_data)

            writer.add_scalar("Validation Loss", combinedValidationLoss, global_step=epochNum)

            epochNum += 1      


            if bestLoss is None:
                bestLoss = combinedValidationLoss
                lastImprovement = 0

                torch.save({
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict()
                    }, PATH)
            elif combinedValidationLoss < bestLoss:
                bestLoss = combinedValidationLoss
                lastImprovement = 0
            else:
                lastImprovement += 1

        

        # Extract attention weights here
        # Average them
        # Calculate windows
        # Optimise on windows, on copy of ViT

        return 


    def evaluateModel(self, vitModel, dataset):
        # Evaluation mode
        vitModel.eval()

        predictions = []
        actuals = []

        with torch.no_grad():
            for batchTensor, labelTensor in tqdm(test_dataloader):
                outputs = vitModel(batchTensor.to(device))
                predictions.extend(torch.argmax(outputs, axis=1).tolist())
                actuals.extend(labelTensor.tolist())
                # Calculate windows here, etc



        print("Preds " + str(predictions))
        print("Actuals " + str(actuals))

        accuracy = accuracy_score(actuals, predictions)

        precision = precision_score(actuals, predictions)

        recall = recall_score(actuals, predictions)

        f1 = f1_score(actuals, predictions)

        print("Results:")
        print("Accuracy: " + str(accuracy))
        print("Precision: " + str(precision))
        print("Recall: " + str(recall))
        print("F1: " + str(f1))

    def calculateAUROC(self, resultsTensor)



SyntaxError: ignored

In [None]:
class BaseModelOutputWithPoolingToTensor(nn.Module):
    # Convert Hugging Face model outputs to tensor, for use in an nn.Sequential.

    def __init__(self):
      super().__init__()

    def forward(self, input):

      return input.pooler_output

In [None]:
class SubsetWithTransform(Dataset):
    def __init__(self, subset, transform):
        self.subset = subset
        self.transform = transform
        
    def __getitem__(self, index):
        x, y = self.subset[index]
        if self.transform:
            x = self.transform(x)
        return x, y
        
    def __len__(self):
        return len(self.subset)

In [None]:
class customDataLoaders():

    def __init__(self):
        pass

    def mendeleyDataLoader(self):
        pass

    def goalsDataLoader(self, train_transform, test_transform, train_size, val_size, test_size):
        labelledSet = datasets.ImageFolder("/content/drive/MyDrive/GOALS-Dataset/train/Train/Image")
        
        train_data, val_data, test_data = random_split(labelledSet, [train_size, val_size, test_size], generator=torch.Generator().manual_seed(42))

        train_data = SubsetWithTransform(train_data, train_transform)
        val_data = SubsetWithTransform(val_data, test_transform)
        test_data = SubsetWithTransform(test_data, test_transform)

        return train_data, val_data, test_data

    def dukeDataLoader(self):
        pass

    def combinedOCTDataLoader(self):
        

In [None]:
# Initialise training and testing transforms for image normalisation, resizing and data augmentation

train_Layer1_transform = transforms.Compose([
          transforms.Resize((224,224)),
          transforms.ToTensor(),
          transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])
      ])

train_Layer2_transform = transforms.Compose([
          transforms.Resize((224,224)),
          transforms.ToTensor(),
          transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])
      ])

train_Layer3_transform = transforms.Compose([
          transforms.Resize((224,224)),
          transforms.ToTensor(),
          transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])
      ])

test_transform = transforms.Compose([
          transforms.Resize((224,224)),
          transforms.ToTensor(),
          transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])
      ])

# Load dataset with transforms
dl = customDataLoaders()

# 60% train, 20% validation, 20% test
train, val, test = dl.goalsDataLoader(train_Layer1_transform, test_transform, 0.6, 0.2, 0.2)

# Initialise our experiment manager with the train, validation and test data.
experimentManager = ViTExperiments(train, val, test)

# Specify embedding dimensionality
embedding_dim = 768

# Run an experiment with specified parameters
experimentManager.runExperiment(
    vitPretrainedModel = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k'),
    vitNumHeads = 12, 
    classifierHead = nn.Linear(in_features=embedding_dim, out_features=len(train.classes)),
    weightFreezeProfile = None,
    embedding_dim = embedding_dim,
    num_classes = len(train.classes),
    train_Layer1Transform = train_Layer1_transform,
    train_Layer2Transform = train_Layer2_transform,
    train_Layer3Transform = train_Layer3_transform,
    test_transform = test_transform,
    lossFunction = nn.CrossEntropyLoss,
    optimiser = torch.optim.AdamW,
    earlyStopThreshold = 5,
    batchSize = 1024,
    learningRate = 0.0001,
    dropoutRate = None
)




# Remember to start saving torch checkpoints outside of colab!