In [None]:
!pip install pytorch_lightning
!pip install transformers
!pip install pretrainedmodels
!pip install timm

In [None]:
import os
import numpy as np
import pandas as pd
import random
import math
import cv2
import timm

import torch
torch.cuda.empty_cache()
from torchvision import transforms, models
from torch.cuda.amp import GradScaler
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
import torch.nn.functional as F
#from transformers import BertTokenizer, BertModel
from transformers import AutoTokenizer, AutoModel
from sklearn.model_selection import train_test_split
from nltk.translate.bleu_score import sentence_bleu
from tqdm import tqdm
from PIL import Image
from random import choice
import matplotlib.pyplot as plt
import itertools
from UtilDataset import Label2Arr, MultiLabel
from ModelsEncoder import Resnext50, SwinT

import pretrainedmodels

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    random.seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

In [None]:
def load_all_data(args, remove = None):
    #### 2022 ####
    
    ##read_data
    #clef2022_train_path = '/home/mir/Notebooks/clef2022/Train'
    #clef2022_valid_path = '/home/mir/Notebooks/clef2022/Valid'
    #clef2022_path = '/home/mir/Notebooks/clef2022'
    
    clef2022_train_path = '/hdd2/datasets/ImageClef2022medCaption/clef2022/Train'
    clef2022_valid_path = '/hdd2/datasets/ImageClef2022medCaption/clef2022/Valid'
    clef2022_path = '/hdd2/datasets/ImageClef2022medCaption/clef2022'
    traindf2022 = pd.read_csv(os.path.join(clef2022_train_path, 'concept_detection_train.csv'),sep = '\t')
    validdf2022 = pd.read_csv(os.path.join(clef2022_valid_path, 'concept_detection_valid.csv'),sep = '\t')
    concepts = pd.read_csv(os.path.join('/hdd2/datasets/ImageClef2022medCaption/clef2022/concepts.csv'),sep = '\t')
    cuis = concepts['concept'].to_list()
    #labels = torch.from_numpy(np.asarray(labels)) 
    #concept_names = pd.DataFrame(concepts, columns = "concept_name")
    
    #Add img address to ID
    traindf2022['ID'] = traindf2022['ID'].apply(lambda x: os.path.join(clef2022_train_path, 'train', x + '.jpg'))
    validdf2022['ID'] = validdf2022['ID'].apply(lambda x: os.path.join(clef2022_valid_path, 'valid', x + '.jpg'))
    #testdf2022['ID'] = testdf2022['ID'].apply(lambda x: os.path.join(clef2022_path, 'Test_images', x + '.jpg'))
    #concat
    #train_df = traindf2022
    #half_df = len(validdf2022) // 2
    #valid_df = validdf2022.iloc[:half_df,]
    #test_df = validdf2022.iloc[half_df:,]
    #valid_df = validdf2022
    #test_df = validdf2022
    totaldf2022 = pd.concat([traindf2022,validdf2022])
    train_df,rest_df = train_test_split(totaldf2022, train_size=0.8, shuffle= False)
    train_df.reset_index(inplace = True, drop = True)
    rest_df.reset_index(inplace = True, drop = True)
    valid_df,test_df = train_test_split(rest_df, train_size=0.5, shuffle= False)
    
    valid_df.reset_index(inplace = True, drop = True)
    test_df.reset_index(inplace = True, drop = True)
    
    
    return train_df, valid_df, test_df, cuis

In [None]:
def Label2Arr(cap_list, cuis):
    labels = list(itertools.repeat(0, len(cuis)))
    for i in cap_list:
        for idx,cap in enumerate(cuis):
            if cap == i:
                labels[idx] = 1
    
    return labels
    

In [None]:
import argparse
import sys
#from utils import seed_everything, Model, VQAMed, train_one_epoch, validate, test, load_data, LabelSmoothing, train_img_only, val_img_only, test_img_only
#import wandb
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torchvision import transforms, models
from torch.cuda.amp import GradScaler
import os
import warnings
import albumentations as A
import pretrainedmodels
from albumentations.core.composition import OneOf
#from albumentations.pytorch.transforms import ToTensorV2
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
warnings.simplefilter("ignore", UserWarning)

In [None]:
sys.argv = ['-f']

parser = argparse.ArgumentParser(description = "args for 2022 ImageCap")

parser.add_argument('--run_name', type = str, required = False, default = "resnet52FFNN-05-1e-3", help = "run name for wandb")
parser.add_argument('--use_pretrained', action = 'store_true', default = False, help = "use pretrained weights or not")
parser.add_argument('--mixed_precision', action = 'store_true', default = False, help = "use mixed precision or not")
parser.add_argument('--clip', action = 'store_true', default = False, help = "clip the gradients or not")
parser.add_argument('--resume', action='store_true', required = False, default = True,  help='resume training or train from scratch')

parser.add_argument('--seed', type = int, required = False, default = 42, help = "set seed for reproducibility")
#parser.add_argument('--cuis', type = int, required = False, default = 42, help = "set seed for reproducibility")
parser.add_argument('--num_workers', type = int, required = False, default = 1, help = "number of workers")
parser.add_argument('--epochs', type = int, required = False, default = 100, help = "num epochs to train")
parser.add_argument('--train_pct', type = float, required = False, default = 1.0, help = "fraction of train samples to select")
parser.add_argument('--valid_pct', type = float, required = False, default = 1.0, help = "fraction of validation samples to select")
parser.add_argument('--test_pct', type = float, required = False, default = 1.0, help = "fraction of test samples to select")


parser.add_argument('--batch_size', type = int, required = False, default = 4, help = "batch size")
parser.add_argument('--lr', type = float, required = False, default = 1e-3, help = "learning rate'")
# parser.add_argument('--weight_decay', type = float, required = False, default = 1e-2, help = " weight decay for gradients")
parser.add_argument('--factor', type = float, required = False, default = 0.1, help = "factor for rlp")
parser.add_argument('--patience', type = int, required = False, default = 10, help = "patience for rlp")
# parser.add_argument('--lr_min', type = float, required = False, default = 1e-6, help = "minimum lr for Cosine Annealing")
parser.add_argument('--hidden_dropout_prob', type = float, required = False, default = 0.3, help = "hidden dropout probability")
parser.add_argument('--smoothing', type = float, required = False, default = None, help = "label smoothing")

parser.add_argument('--image_size', type = int, required = False, default = 224, help = "image size")
parser.add_argument('--threshold', type = float, required = False, default = 0.5 , help = "image size")
parser.add_argument('--hidden_size', type = int, required = False, default = 768, help = "hidden size") #og 312
parser.add_argument('--vocab_size', type = int, required = False, default = 30522, help = "vocab size")
parser.add_argument('--type_vocab_size', type = int, required = False, default = 2, help = "type vocab size")
parser.add_argument('--heads', type = int, required = False, default = 12, help = "heads")
parser.add_argument('--n_layers', type = int, required = False, default = 4, help = "num of layers")
parser.add_argument('--num_vis', type = int, required = False , default = 5, help = "num of visual embeddings") #num of conv2d Layers in the transformer, can be: 5, 3 or 1

args = parser.parse_args()

#wandb.init(project='medvqa', name = args.run_name, config = args)

seed_everything(args.seed)

Train

In [None]:
def train_one_epoch(loader, model, optimizer, criterion, device, scaler, args):
    
    model.train()
    train_loss = []
    PREDS = []
    TARGETS = []
    bar = tqdm(loader, leave = False)
    for (img, target) in bar:
        img,target = img.to(device), target.to(device)
        loss_func = criterion
        optimizer.zero_grad()#step??
        logits = model(img)
        loss = loss_func(logits, target)    
        loss.backward()
        optimizer.step()
        
        TARGETS.append(target)    
        pred = logits.detach()
        PREDS.append(pred)
        

        loss_np = loss.detach().cpu().numpy()
        train_loss.append(loss_np)
        bar.set_description('train_loss: %.5f' % (loss_np))
    
    acc = 0
    PREDS = torch.cat(PREDS).cpu().numpy()
    TARGETS = torch.cat(TARGETS).cpu().numpy()
    
    #PREDS = np.array(PREDS > args.threshold, dtype=float)

    #acc = (PREDS == TARGETS).mean() * 100.

    return np.mean(train_loss), PREDS, acc

Validation

In [None]:
def validate(loader, model, criterion, device, scaler, args):

    model.eval()
    val_loss = []

    PREDS = []
    TARGETS = []
    bar = tqdm(loader, leave=False)
    #with torch.no_grad():
    for (img, target) in bar:
        img, target = img.to(device), target.to(device)
        if args.mixed_precision:
            with torch.cuda.amp.autocast(): 
                logits = model(img)
                loss = criterion(logits, target)
        else:
            logits = model(img)
            loss = criterion(logits, target)


        loss_np = loss.detach().cpu().numpy()

        pred = logits.detach()

        PREDS.append(pred)
        TARGETS.append(target)
        val_loss.append(loss_np)
        bar.set_description('val_loss: %.5f' % (loss_np))
    
    val_loss = np.mean(val_loss)
    acc = 0

    PREDS = torch.cat(PREDS).cpu().numpy()
    TARGETS = torch.cat(TARGETS).cpu().numpy()
    
    #PREDS = np.array(PREDS > args.threshold, dtype=float)

    #acc = (PREDS == TARGETS).mean() * 100.
    return val_loss, PREDS, acc   

Test

In [None]:
def test(loader, model, criterion, device, scaler, args):
    
    model.eval()
    TARGETS = []
    PREDS = []
    test_loss = []
    for (img, target) in tqdm(loader, leave=False):
        img, target = img.to(device), target.to(device)   
        if args.mixed_precision:
            with torch.cuda.amp.autocast(): 
                logits = model(img)
                loss = criterion(logits, target)
        else:
            logits = model(img)
            loss = criterion(logits, target)


        loss_np = loss.detach().cpu().numpy()

        test_loss.append(loss_np)

        pred = logits.detach()
            
        PREDS.append(pred)
        TARGETS.append(target)
                

    test_loss = np.mean(test_loss)
    acc =0
    PREDS = torch.cat(PREDS).cpu().numpy()
    TARGETS = torch.cat(TARGETS).cpu().numpy()
    #PREDS = np.array(PREDS > args.threshold, dtype=float)

    #acc = (PREDS == TARGETS).mean() * 100.
    return test_loss, PREDS, acc, TARGETS

change this to switch between datasets


In [None]:
train_df, val_df, test_df, cuis = load_all_data(args)

train_tfm = transforms.Compose([transforms.ToPILImage(),
                                transforms.RandomResizedCrop(224,scale=(0.75,1.25),ratio=(0.75,1.25)),
                                transforms.RandomRotation(10),
                                # Cutout(),
                                transforms.ColorJitter(brightness=0.4,contrast=0.4,saturation=0.4,hue=0.4),
                                transforms.ToTensor(), 
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

val_tfm = transforms.Compose([transforms.ToPILImage(),
                              transforms.Resize((224,224)),
                              transforms.ToTensor(), 
                              transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

test_tfm = transforms.Compose([transforms.ToPILImage(),
                               transforms.Resize((224,224)),    
                               transforms.ToTensor(), 
                               transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
traindataset = MultiLabel(train_df, cuis, imgsize = args.image_size, tfm = train_tfm, args = args)
valdataset = MultiLabel(val_df, cuis, imgsize = args.image_size, tfm = val_tfm, args = args)
testdataset = MultiLabel(test_df, cuis, imgsize = args.image_size, tfm = test_tfm, args = args)

In [None]:
trainloader = DataLoader(traindataset, batch_size = args.batch_size, shuffle=True, num_workers = args.num_workers)
validloader = DataLoader(valdataset, batch_size = args.batch_size, shuffle=False, num_workers = args.num_workers)
testloader = DataLoader(testdataset, batch_size = args.batch_size, shuffle=False, num_workers = args.num_workers)

Model Architecture


In [None]:
model = SwinT(len(cuis))

Training Parameters

In [None]:
epochs = 10
best_acc = 0
best_loss = np.inf
counter = 0

In [None]:
batch_size = 4
max_epoch_number = 35
learning_rate = 1e-3
optimizer = optim.Adam(model.parameters(),lr=args.lr)

In [None]:
optimizer = optim.Adam(model.parameters(),lr=args.lr)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, patience = args.patience, factor = args.factor, verbose = True)
criterion = nn.BCELoss()
scaler = GradScaler()


Processing resource

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model.to(device)


Training models

In [None]:
args.run_name = "resnet52FFNN-05-1e-3"

In [None]:
for epoch in range(epochs):
    
    print(f'Epoch {epoch+1}/{epochs}')


    train_loss, _, train_acc = train_one_epoch(trainloader, model, optimizer, criterion, device, scaler, args)
    val_loss, val_predictions, val_acc = validate(validloader, model, criterion, device, scaler, args)
    test_loss, test_predictions, test_acc, test_targets = test(testloader, model, criterion, device, scaler, args)

    scheduler.step(val_loss)

    print("val_loss: " ,val_loss)
    print("val_acc: " ,val_acc)
    print("test_acc: " ,test_acc)
    
    f = open(f'{args.run_name}.txt', "a")
    f.write('\n\nepoch ' + str(epoch))
    f.write('\nAccuracy and Loss')
    f.write('\ntrain_acc: ' + str(train_acc) + '   train_loss: ' + str(train_loss) + ',')
    f.write('\nval_acc: ' + str(val_acc) + '   val_loss: ' + str(val_loss) + ',')
    f.write('\ntest_acc: ' + str(test_acc) + '   test_loss: ' + str(test_loss) + ',')
    f.write('\nlearning_rate: ' + str(optimizer.param_groups[0]["lr"]))

    if test_acc > best_acc:
        print('Saving model best acc')
        f.write('\nnew best test total acc')
        torch.save(model.state_dict(), f'{args.run_name}_bestacc.pt')
        best_acc=test_acc
    
    if val_loss < best_loss:
        print('Saving model best val loss')
        f.write('\nnew best val_loss')
        torch.save(model.state_dict(), f'{args.run_name}.pt')
        best_loss=val_loss
    elif val_loss > best_loss:
        print("Val_loss stopped decreasing")
        break
            
    f.close()