# Quark Gluon Training Tutorial(CoAt)
This notebook will give the reader a basic idea on how train the CoAt Net architecture to get somewhat similar results as described in the article.

Framework: Pytorch

## Setup

In [10]:
!git clone https://github.com/dc250601/GSOC.git
%mv GSOC Transformers
%mkdir /content/Checkpoints

In [2]:
!pip install wandb
!pip install sklearn
!pip install tqdm
!pip install timm
!pip install tensorflow_addons
!pip install einops

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting wandb
  Downloading wandb-0.13.4-py2.py3-none-any.whl (1.9 MB)
[K     |████████████████████████████████| 1.9 MB 35.3 MB/s 
Collecting sentry-sdk>=1.0.0
  Downloading sentry_sdk-1.9.10-py2.py3-none-any.whl (162 kB)
[K     |████████████████████████████████| 162 kB 51.6 MB/s 
Collecting setproctitle
  Downloading setproctitle-1.3.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)
Collecting shortuuid>=0.5.0
  Downloading shortuuid-1.0.9-py3-none-any.whl (9.4 kB)
Collecting docker-pycreds>=0.4.0
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting pathtools
  Downloading pathtools-0.1.2.tar.gz (11 kB)
Collecting GitPython>=1.0.0
  Downloading GitPython-3.1.28-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 68.0 MB/s 
Collecting gitdb<5,>=4.0.1
  Downloading gitdb-4.0.9-py3-n

## Downloading the prepared dataset.

In [3]:
#Only a small portion of the entire dataset is used here due to computational 
#limitations
#Feel free to contact me for the entire dataset.
!gdown 1VIJQM4iUvWUW7KEkBSnzrnuw_6U1BRWQ
!unzip data.zip
%mv Data_small_50 Data

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: Data_small_50/Train/1/67077.png  
  inflating: Data_small_50/Train/1/6708.png  
  inflating: Data_small_50/Train/1/67080.png  
  inflating: Data_small_50/Train/1/67081.png  
  inflating: Data_small_50/Train/1/67082.png  
  inflating: Data_small_50/Train/1/67083.png  
  inflating: Data_small_50/Train/1/67084.png  
  inflating: Data_small_50/Train/1/67085.png  
  inflating: Data_small_50/Train/1/67086.png  
  inflating: Data_small_50/Train/1/67087.png  
  inflating: Data_small_50/Train/1/67090.png  
  inflating: Data_small_50/Train/1/67091.png  
  inflating: Data_small_50/Train/1/67092.png  
  inflating: Data_small_50/Train/1/67093.png  
  inflating: Data_small_50/Train/1/67094.png  
  inflating: Data_small_50/Train/1/67095.png  
  inflating: Data_small_50/Train/1/67096.png  
  inflating: Data_small_50/Train/1/67097.png  
  inflating: Data_small_50/Train/1/67098.png  
  inflating: Data_small_50/Train/1/67099.pn

## Importing Libraries

In [4]:
%cd Transformers
import numpy as np
import torch.nn as nn
import torch
from torchvision import datasets, models, transforms

from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import numpy as np
from torchvision.utils import make_grid
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_curve
from sklearn import metrics
import gc
import wandb
import Model.Quark_Gluon.coat as coat

/content/Transformers


## Setting up the helper functions

In [5]:
def metric(y_true, y_pred):
    fpr, tpr, thresholds = roc_curve(y_true, y_pred)
    auc = metrics.auc(fpr, tpr)
    return auc

def straightner(a):
    A = np.zeros((a[0].shape[0]*len(a)))
    start_index = 0
    end_index = 0
    for i in range(len(a)):
        start_index = i*a[0].shape[0]
        end_index = start_index+a[0].shape[0]
        A[start_index:end_index] = a[i]
    return A

def predictor(outputs):
    return np.argmax(outputs, axis = 1)


## Building the model

In [6]:
  image_size = (128,128)
  in_channels = 3
  num_blocks = [2, 2, 3, 5, 2]
  channels = [64, 96, 192, 384, 768]
  num_classes = 1

  #-------------------------------------------------
  model = coat.CoAtNet(image_size = image_size,
                        in_channels = in_channels,
                        num_blocks = num_blocks,
                        channels = channels,
                        num_classes = num_classes)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


## Setting up the dataloaders

In [7]:
  train_transform = transforms.Compose([transforms.Resize((128,128)),
                              transforms.RandomHorizontalFlip(),
                              transforms.RandomVerticalFlip(),
                              transforms.RandomRotation(60),
                              transforms.ToTensor()
                              ])
  test_transform = transforms.Compose([transforms.Resize((128,128)),
                              transforms.ToTensor()
                              ])
  dataset_Train = datasets.ImageFolder(f'/content/Data/Train/', transform=train_transform)
  dataset_Test = datasets.ImageFolder(f'/content/Data/Test/', transform =test_transform)
  dataloader_train = torch.utils.data.DataLoader(dataset_Train, batch_size=256, shuffle=True, drop_last = True, num_workers=2, pin_memory = True)
  dataloader_test = torch.utils.data.DataLoader(dataset_Test, batch_size=256, shuffle=True, drop_last = True, num_workers=2, pin_memory = True)

## Setting up PyTorch loss and optimisers 

In [8]:
optimizer = torch.optim.AdamW(model.parameters(), lr = 0.001, weight_decay=0.05)
criterion = nn.BCEWithLogitsLoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', verbose = True,threshold = 0.001,patience = 3, factor = 0.5)
model = model.to("cuda")

### Setting up Weights And Biases for easier visualization

In [9]:
wandb.login()
wandb.init(
      project = "Tutorial_Transformers",
      name = "CoAt"
      )

ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 

··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mdc250601[0m. Use [1m`wandb login --relogin`[0m to force relogin


## Training script next

In [14]:

scaler = torch.cuda.amp.GradScaler()
#--------------------------
wandb.watch(model, log_freq=50)
#---------------------------
w_intr = 50

for epoch in range(50):
    train_loss = 0
    val_loss = 0
    train_steps = 0
    test_steps = 0
    label_list = []
    outputs_list = []
    train_auc = 0
    test_auc = 0
    model.train()
    for image, label in tqdm(dataloader_train):
        image = image.to("cuda")
        label = label.to("cuda")
        #optimizer.zero_grad()
        for param in model.parameters():
            param.grad = None

        with torch.cuda.amp.autocast():
          outputs = model(image)
          loss = criterion(outputs, label.float())

        label_list.append(label.detach().cpu().numpy())
        outputs_list.append(outputs.detach().cpu().numpy())
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        train_loss += loss.item()
        train_steps += 1
        if train_steps%w_intr == 0:
              wandb.log({"loss": loss.item()})
    with torch.no_grad():
        label_list = straightner(label_list)
        outputs_list = straightner(outputs_list)
        train_auc = metric(label_list, outputs_list)




    #-------------------------------------------------------------------
    model.eval()
    label_list = []
    outputs_list = []
    with torch.no_grad():
        for image, label in tqdm(dataloader_test):
            image = image.to("cuda")
            label = label.to("cuda")
            outputs = model(image)
            loss = criterion(outputs, label.float())
            label_list.append(label.detach().cpu().numpy())
            outputs_list.append(outputs.detach().cpu().numpy())
            val_loss += loss.item()
            test_steps +=1
            if test_steps%w_intr == 0:
              wandb.log({"val_loss": loss.item()})
        label_list = straightner(label_list)
        outputs_list = straightner(outputs_list)
        test_auc = metric(label_list, outputs_list)

    train_loss = train_loss/train_steps
    val_loss = val_loss/ test_steps


    print("----------------------------------------------------")
    print("Epoch No" , epoch)
    print("The Training loss of the epoch, ",train_loss)
    print("The Training AUC of the epoch,  %.3f"%train_auc)
    print("The validation loss of the epoch, ",val_loss)
    print("The validation AUC of the epoch, %.3f"%test_auc)
    print("----------------------------------------------------")
    PATH = f"/content/Checkpoints/model_epoch_{epoch}.pt"
    torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler': scheduler.state_dict()
            }, PATH)
    scheduler.step(test_auc)
    curr_lr = scheduler._last_lr[0]
    wandb.log({"Train_auc_epoch": train_auc,
              "Epoch": epoch,
              "Val_auc_epoch": test_auc,
              "Train_loss_epoch": train_loss,
              "Val_loss_epoch": val_loss,
              "Lr": curr_lr}
              )
    gc.collect()

    if curr_lr < 0.000001:
        break
wandb.finish()


### Great, you have just trained a state-of-the-art Transformer architecture
The Results might be a bit off due the small datatset size. Use the entire dataset to get reproducible results.