## prep

In [1]:
import argparse
import os, pdb, sys, glob, time
import numpy as np
import pandas as pd
from tqdm import tqdm
import cv2

import torch
import torch.nn as nn
import torchvision.models as models 

from datasets import XRaysTrainDataset  
from datasets import XRaysTestDataset

# import neccesary libraries for defining the optimizers
import torch.optim as optim

from trainer import fit
import config



In [2]:
# helper functions

def q(text = ''): # easy way to exiting the script. useful while debugging
    print('> ', text)
    sys.exit()
    
def count_parameters(model): 
    num_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return num_parameters/1e6 # in terms of millions

In [3]:
# configurations

data_path = ""
bs = 32 # batch size
lr = 1e-5
stage = 1
loss_func = "FocalLoss"
resume = False
ckpt = "" # path to checkpoint
test = False

In [4]:
# device diagnose
if torch.backends.mps.is_available():
    device = "mps"
elif torch.cuda.is_available():
    device = "cuda"
print(device)

mps


In [5]:
# get data

data_dir = os.path.join('data', data_path) # Data_Entry_2017.csv should be present in the mentioned path

# define a function to count the total number of trainable parameters


# make the datasets
XRayTrain_dataset = XRaysTrainDataset(data_dir, transform = config.transform)
train_percentage = 0.8
train_dataset, val_dataset = torch.utils.data.random_split(XRayTrain_dataset, [int(len(XRayTrain_dataset)*train_percentage), len(XRayTrain_dataset)-int(len(XRayTrain_dataset)*train_percentage)])

XRayTest_dataset = XRaysTestDataset(data_dir, transform = config.transform)

print('\n-----Initial Dataset Information-----')
print('num images in train_dataset   : {}'.format(len(train_dataset)))
print('num images in val_dataset     : {}'.format(len(val_dataset)))
print('num images in XRayTest_dataset: {}'.format(len(XRayTest_dataset)))
print('-------------------------------------')


data/Data_Entry_2017.csv found: True
self.df.shape: (112120, 2)

train_val_df.pickle: loaded
self.train_val_df.shape: (86524, 2)

Sampling the huuuge training dataset


100%|██████████████████████████████████| 86524/86524 [00:02<00:00, 32998.65it/s]



disease_classes.pickle: already exists

self.all_classes_dict: {'Atelectasis': 7702, 'Infiltration': 10004, 'Cardiomegaly': 1633, 'Mass': 3828, 'No Finding': 10000, 'Effusion': 7978, 'Nodule': 4423, 'Pneumothorax': 2523, 'Edema': 1221, 'Fibrosis': 1187, 'Emphysema': 1368, 'Consolidation': 2657, 'Pneumonia': 797, 'Pleural_Thickening': 2129, 'Hernia': 141}

self.df.shape: (112120, 2)

test_df.pickle: loaded
self.test_df.shape: (25596, 2)

-----Initial Dataset Information-----
num images in train_dataset   : 33790
num images in val_dataset     : 8448
num images in XRayTest_dataset: 25596
-------------------------------------


In [6]:
# make the dataloaders
batch_size = bs # 128 by default
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size = batch_size, shuffle = not True)
test_loader = torch.utils.data.DataLoader(XRayTest_dataset, batch_size = batch_size, shuffle = not True)

print('\n-----Initial Batchloaders Information -----')
print('num batches in train_loader: {}'.format(len(train_loader)))
print('num batches in val_loader  : {}'.format(len(val_loader)))
print('num batches in test_loader : {}'.format(len(test_loader)))
print('-------------------------------------------')


-----Initial Batchloaders Information -----
num batches in train_loader: 1056
num batches in val_loader  : 264
num batches in test_loader : 800
-------------------------------------------


In [16]:
def train_model(model, model_name="unnmaed", loss="FocalLoss", epochs = 5, lr = 1e-5):
    
    print('TRAINING THE MODEL FROM SCRATCH')

    script_start_time = time.time() # tells the total run time of this script

    # sanity check
    if len(XRayTrain_dataset.all_classes) != 15: # 15 is the unique number of diseases in this dataset
        q('\nnumber of classes not equal to 15 !')

    a,b = train_dataset[0]
    print(f'\nwe are working with \nImages shape: {a.shape} and \nTarget shape: {b.shape}')

    # make models directory, where the models and the loss plots will be saved
    if not os.path.exists(config.models_dir):
        os.mkdir(config.models_dir)

    # define the loss function
    if loss_func == 'FocalLoss': # by default
        from losses import FocalLoss
        loss_fn = FocalLoss(device = device, gamma = 2.).to(device)
    elif loss_func == 'BCE':
        loss_fn = nn.BCEWithLogitsLoss().to(device)

    print('\ntraining from scratch')
    # import pretrained model
    # model = models.resnet50(pretrained=True) # pretrained = False bydefault
    # change the last linear layer
    
    model.to(device)
    
    # since we are not resuming the training of the model
    epochs_till_now = 0

    # making empty lists to collect all the losses
    losses_dict = {'epoch_train_loss': [], 'epoch_val_loss': [], 'total_train_loss_list': [], 'total_val_loss_list': []}


    # printing some hyperparameters
    print('\n> loss_fn: {}'.format(loss_fn))
    print('> epochs_till_now: {}'.format(epochs_till_now))
    print('> batch_size: {}'.format(batch_size))
    print('> stage: {}'.format(stage))
    print('> lr: {}'.format(lr))

    for param in model.parameters(): # all requires_grad by default, are True initially
        param.requires_grad = True


    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr = lr)

    # make changes in the parameters of the following 'fit' function
    fit(device, XRayTrain_dataset, train_loader, val_loader,    
                                            test_loader, model, model_name, loss_fn, 
                                            optimizer, losses_dict,
                                            epochs_till_now = epochs_till_now, epochs = epochs,
                                            log_interval = 25, save_interval = 1,
                                            lr = lr, bs = batch_size,
                                            test_only = False, )

    script_time = time.time() - script_start_time
    m, s = divmod(script_time, 60)
    h, m = divmod(m, 60)
    print(f'total running time: {int(h)} h {int(m)}m')

def test_model(model_name, ckpt): # test = True, resume = False

    losses_dict = {'epoch_train_loss': [], 'epoch_val_loss': [], 'total_train_loss_list': [], 'total_val_loss_list': []}
    epochs_till_now = 0

    script_start_time = time.time() # tells the total run time of this script

    # sanity check
    if len(XRayTrain_dataset.all_classes) != 15: # 15 is the unique number of diseases in this dataset
        q('\nnumber of classes not equal to 15 !')

    a,b = train_dataset[0]
    print('\nwe are working with \nImages shape: {} and \nTarget shape: {}'.format( a.shape, b.shape))

    # make models directory, where the models and the loss plots will be saved
    if not os.path.exists(config.models_dir):
        os.mkdir(config.models_dir)

    # define the loss function
    if loss_func == 'FocalLoss': # by default
        from losses import FocalLoss
        loss_fn = FocalLoss(device = device, gamma = 2.).to(device)
    elif loss_func == 'BCE':
        loss_fn = nn.BCEWithLogitsLoss().to(device)

    # testing
    if ckpt == None:
        q('ERROR: Please select a checkpoint to load the testing model from')

    print('\ncheckpoint loaded: {}'.format(ckpt))
    ckpt = torch.load(os.path.join(config.models_dir, ckpt)) 

    # since we are resuming the training of the model
    epochs_till_now = ckpt['epochs']
    model = ckpt['model']
    model.to(device)

    # loading previous loss lists to collect future losses
    losses_dict = ckpt['losses_dict']

    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr = lr)

    # make changes in the parameters of the following 'fit' function
    fit(device, XRayTrain_dataset, train_loader, val_loader,    
                                            test_loader, model, model_name, loss_fn, 
                                            optimizer, losses_dict,
                                            epochs_till_now = epochs_till_now, epochs = 3,
                                            log_interval = 25, save_interval = 1,
                                            lr = lr, bs = batch_size,
                                            test_only = True)

    script_time = time.time() - script_start_time
    m, s = divmod(script_time, 60)
    h, m = divmod(m, 60)
    print(f'total running time: {int(h)} h {int(m)}m')

    

## resnet50

In [8]:
resnet50 = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
num_ftrs = resnet50.fc.in_features
resnet50.fc = nn.Linear(num_ftrs, len(XRayTrain_dataset.all_classes)) # 15 output classes 

In [None]:
train_model(resnet50, "resnet50")

In [None]:
test_model(ckpt="resnet50_1e-05_05.pth")


# we are working with 
# Images shape: torch.Size([3, 224, 224]) and 
# Target shape: torch.Size([15])

# checkpoint loaded: resnet50_1e-05_05.pth

# ======= Testing... =======

# 400/400 (100.00 %)
# NoFindingIndex:  10
# y_true.shape, y_probs.shape  (25596, 15) (25596, 15)

# class_roc_auc_list:  [0.715068159733321, 0.8317763860243287, 0.6807723553643712, 0.8192090286549367, 0.7875569096206302, 0.8586051173075546, 0.7716490627942258, 0.8370880548439736, 0.6483991160420525, 0.7230309983825862, 0.7076632816099293, 0.6843221061566516, 0.7107974986134953, 0.6627152371012439, 0.8176789116022511]

# useful_classes_roc_auc_list [0.715068159733321, 0.8317763860243287, 0.6807723553643712, 0.8192090286549367, 0.7875569096206302, 0.8586051173075546, 0.7716490627942258, 0.8370880548439736, 0.6483991160420525, 0.7230309983825862, 0.6843221061566516, 0.7107974986134953, 0.6627152371012439, 0.8176789116022511]
# test_roc_auc: 0.7534763530172588 in 9 mins 8 secs
# total running time: 0 h 9m

## ViT

In [8]:
vit_base = models.vit_b_16(weights=models.ViT_B_16_Weights.IMAGENET1K_V1)
num_ftrs = vit_base.heads.head.in_features
vit_base.heads.head = nn.Linear(num_ftrs, len(XRayTrain_dataset.all_classes))

In [9]:
train_model(vit_base, "vit_base")

TRAINING THE MODEL FROM SCRATCH

we are working with 
Images shape: torch.Size([3, 224, 224]) and 
Target shape: torch.Size([15])

training from scratch

> loss_fn: FocalLoss()
> epochs_till_now: 0
> batch_size: 32
> stage: 1
> lr: 1e-05


TRAINING
Train Loss for batch 025/1056 @epoch1/5: 0.06955 in 0 mins 2.11 secs
Train Loss for batch 050/1056 @epoch1/5: 0.07365 in 0 mins 2.11 secs
Train Loss for batch 075/1056 @epoch1/5: 0.06291 in 0 mins 2.16 secs
Train Loss for batch 100/1056 @epoch1/5: 0.07791 in 0 mins 2.15 secs
Train Loss for batch 125/1056 @epoch1/5: 0.08227 in 0 mins 2.14 secs
Train Loss for batch 150/1056 @epoch1/5: 0.0734 in 0 mins 2.25 secs
Train Loss for batch 175/1056 @epoch1/5: 0.06724 in 0 mins 2.11 secs
Train Loss for batch 200/1056 @epoch1/5: 0.0689 in 0 mins 2.09 secs
Train Loss for batch 225/1056 @epoch1/5: 0.05949 in 0 mins 2.14 secs
Train Loss for batch 250/1056 @epoch1/5: 0.07222 in 0 mins 2.14 secs
Train Loss for batch 275/1056 @epoch1/5: 0.06407 in 0 mins 2.12



Train Loss for batch 750/1056 @epoch1/5: 0.0556 in 0 mins 2.1 secs
Train Loss for batch 775/1056 @epoch1/5: 0.06176 in 0 mins 2.31 secs
Train Loss for batch 800/1056 @epoch1/5: 0.05982 in 0 mins 2.1 secs
Train Loss for batch 825/1056 @epoch1/5: 0.06278 in 0 mins 2.1 secs
Train Loss for batch 850/1056 @epoch1/5: 0.06544 in 0 mins 2.1 secs
Train Loss for batch 875/1056 @epoch1/5: 0.05748 in 0 mins 2.1 secs
Train Loss for batch 900/1056 @epoch1/5: 0.06402 in 0 mins 2.15 secs
Train Loss for batch 925/1056 @epoch1/5: 0.06282 in 0 mins 2.1 secs
Train Loss for batch 950/1056 @epoch1/5: 0.05648 in 0 mins 2.17 secs
Train Loss for batch 975/1056 @epoch1/5: 0.05841 in 0 mins 2.12 secs
Train Loss for batch 1000/1056 @epoch1/5: 0.06655 in 0 mins 2.16 secs
Train Loss for batch 1025/1056 @epoch1/5: 0.06637 in 0 mins 2.16 secs
Train Loss for batch 1050/1056 @epoch1/5: 0.05704 in 0 mins 2.27 secs
VALIDATION




Val Loss   for batch 025/264 @epoch1/5: 0.06762 in 0 mins 1.1 secs
Val Loss   for batch 050/264 @epoch1/5: 0.05624 in 0 mins 1.14 secs
Val Loss   for batch 075/264 @epoch1/5: 0.0571 in 0 mins 1.12 secs
Val Loss   for batch 100/264 @epoch1/5: 0.05797 in 0 mins 1.13 secs
Val Loss   for batch 125/264 @epoch1/5: 0.06125 in 0 mins 1.13 secs
Val Loss   for batch 150/264 @epoch1/5: 0.06351 in 0 mins 1.12 secs
Val Loss   for batch 175/264 @epoch1/5: 0.07386 in 0 mins 1.09 secs
Val Loss   for batch 200/264 @epoch1/5: 0.05963 in 0 mins 1.12 secs
Val Loss   for batch 225/264 @epoch1/5: 0.07046 in 0 mins 1.1 secs
Val Loss   for batch 250/264 @epoch1/5: 0.0702 in 0 mins 1.16 secs

NoFindingIndex:  10
y_true.shape, y_probs.shape  (8448, 15) (8448, 15)

class_roc_auc_list:  [0.7612283103469777, 0.8778066084620451, 0.7126378197362818, 0.8491325212255444, 0.8277837148594757, 0.7930051156406652, 0.7417160685062604, 0.7879520360916539, 0.6442228712433438, 0.732507276849407, 0.7466348074004436, 0.68489470

100%|██████████████████████████████████| 86524/86524 [00:02<00:00, 32012.79it/s]



self.all_classes_dict: {'Effusion': 7996, 'No Finding': 10000, 'Atelectasis': 7700, 'Infiltration': 10007, 'Edema': 1213, 'Consolidation': 2685, 'Nodule': 4422, 'Mass': 3846, 'Pneumothorax': 2518, 'Pneumonia': 779, 'Pleural_Thickening': 2125, 'Fibrosis': 1182, 'Cardiomegaly': 1627, 'Emphysema': 1354, 'Hernia': 141}

-----Resampled Dataset Information-----
num images in train_dataset   : 33794
num images in val_dataset     : 8449
---------------------------------------

-----Resampled Batchloaders Information -----
num batches in train_loader: 1057
num batches in val_loader  : 265
---------------------------------------------

TRAINING
Train Loss for batch 025/1057 @epoch2/5: 0.05298 in 0 mins 6.3 secs
Train Loss for batch 050/1057 @epoch2/5: 0.06296 in 0 mins 2.28 secs




Train Loss for batch 075/1057 @epoch2/5: 0.06147 in 0 mins 2.17 secs
Train Loss for batch 100/1057 @epoch2/5: 0.06346 in 0 mins 2.25 secs
Train Loss for batch 125/1057 @epoch2/5: 0.07509 in 0 mins 2.11 secs
Train Loss for batch 150/1057 @epoch2/5: 0.06877 in 0 mins 2.24 secs
Train Loss for batch 175/1057 @epoch2/5: 0.06881 in 0 mins 2.12 secs
Train Loss for batch 200/1057 @epoch2/5: 0.06832 in 0 mins 2.11 secs
Train Loss for batch 225/1057 @epoch2/5: 0.07287 in 0 mins 2.08 secs
Train Loss for batch 250/1057 @epoch2/5: 0.05551 in 0 mins 2.36 secs
Train Loss for batch 275/1057 @epoch2/5: 0.07896 in 0 mins 2.24 secs
Train Loss for batch 300/1057 @epoch2/5: 0.07051 in 0 mins 2.24 secs
Train Loss for batch 325/1057 @epoch2/5: 0.07286 in 0 mins 2.15 secs




Train Loss for batch 350/1057 @epoch2/5: 0.07099 in 0 mins 2.2 secs
Train Loss for batch 375/1057 @epoch2/5: 0.05838 in 0 mins 2.29 secs
Train Loss for batch 400/1057 @epoch2/5: 0.05197 in 0 mins 2.13 secs
Train Loss for batch 425/1057 @epoch2/5: 0.05993 in 0 mins 2.1 secs
Train Loss for batch 450/1057 @epoch2/5: 0.05634 in 0 mins 2.19 secs
Train Loss for batch 475/1057 @epoch2/5: 0.06981 in 0 mins 2.11 secs
Train Loss for batch 500/1057 @epoch2/5: 0.05464 in 0 mins 2.15 secs
Train Loss for batch 525/1057 @epoch2/5: 0.06736 in 0 mins 2.1 secs
Train Loss for batch 550/1057 @epoch2/5: 0.05671 in 0 mins 2.2 secs
Train Loss for batch 575/1057 @epoch2/5: 0.06992 in 0 mins 2.11 secs
Train Loss for batch 600/1057 @epoch2/5: 0.05992 in 0 mins 2.13 secs
Train Loss for batch 625/1057 @epoch2/5: 0.05246 in 0 mins 2.12 secs
Train Loss for batch 650/1057 @epoch2/5: 0.06864 in 0 mins 2.12 secs
Train Loss for batch 675/1057 @epoch2/5: 0.06475 in 0 mins 2.21 secs
Train Loss for batch 700/1057 @epoch2/

100%|██████████████████████████████████| 86524/86524 [00:02<00:00, 33754.56it/s]



self.all_classes_dict: {'Infiltration': 10007, 'Mass': 3835, 'No Finding': 10000, 'Atelectasis': 7702, 'Nodule': 4431, 'Cardiomegaly': 1626, 'Consolidation': 2663, 'Fibrosis': 1170, 'Effusion': 7991, 'Pneumothorax': 2523, 'Emphysema': 1356, 'Edema': 1211, 'Pneumonia': 794, 'Pleural_Thickening': 2115, 'Hernia': 141}

-----Resampled Dataset Information-----
num images in train_dataset   : 33792
num images in val_dataset     : 8449
---------------------------------------

-----Resampled Batchloaders Information -----
num batches in train_loader: 1056
num batches in val_loader  : 265
---------------------------------------------

TRAINING
Train Loss for batch 025/1056 @epoch3/5: 0.06015 in 0 mins 2.15 secs
Train Loss for batch 050/1056 @epoch3/5: 0.06859 in 0 mins 2.2 secs
Train Loss for batch 075/1056 @epoch3/5: 0.0583 in 0 mins 3.24 secs
Train Loss for batch 100/1056 @epoch3/5: 0.06603 in 0 mins 2.15 secs
Train Loss for batch 125/1056 @epoch3/5: 0.06668 in 0 mins 3.0 secs
Train Loss for



Train Loss for batch 625/1056 @epoch3/5: 0.05355 in 0 mins 2.14 secs
Train Loss for batch 650/1056 @epoch3/5: 0.0569 in 0 mins 2.11 secs
Train Loss for batch 675/1056 @epoch3/5: 0.06011 in 0 mins 2.16 secs
Train Loss for batch 700/1056 @epoch3/5: 0.0719 in 0 mins 2.1 secs
Train Loss for batch 725/1056 @epoch3/5: 0.05848 in 0 mins 2.13 secs
Train Loss for batch 750/1056 @epoch3/5: 0.05761 in 0 mins 2.18 secs
Train Loss for batch 775/1056 @epoch3/5: 0.05976 in 0 mins 2.43 secs
Train Loss for batch 800/1056 @epoch3/5: 0.06143 in 0 mins 2.17 secs
Train Loss for batch 825/1056 @epoch3/5: 0.05618 in 0 mins 2.1 secs
Train Loss for batch 850/1056 @epoch3/5: 0.05081 in 0 mins 2.12 secs
Train Loss for batch 875/1056 @epoch3/5: 0.06083 in 0 mins 2.11 secs
Train Loss for batch 900/1056 @epoch3/5: 0.06088 in 0 mins 2.11 secs
Train Loss for batch 925/1056 @epoch3/5: 0.06876 in 0 mins 2.16 secs
Train Loss for batch 950/1056 @epoch3/5: 0.04972 in 0 mins 2.13 secs
Train Loss for batch 975/1056 @epoch3/



Val Loss   for batch 125/265 @epoch3/5: 0.06772 in 0 mins 1.11 secs
Val Loss   for batch 150/265 @epoch3/5: 0.06052 in 0 mins 1.1 secs
Val Loss   for batch 175/265 @epoch3/5: 0.06136 in 0 mins 1.12 secs
Val Loss   for batch 200/265 @epoch3/5: 0.06545 in 0 mins 1.1 secs
Val Loss   for batch 225/265 @epoch3/5: 0.06015 in 0 mins 1.11 secs
Val Loss   for batch 250/265 @epoch3/5: 0.05794 in 0 mins 1.14 secs

NoFindingIndex:  10
y_true.shape, y_probs.shape  (8449, 15) (8449, 15)

class_roc_auc_list:  [0.7774063462644777, 0.9125281255559055, 0.7515237217724512, 0.8835762514891622, 0.8495399319780697, 0.8456499213786458, 0.7866543369131064, 0.7585428782046032, 0.667442430793746, 0.794789530403258, 0.754283905654369, 0.7270713477248603, 0.736474807239941, 0.6996622310630083, 0.8011934684367993]

useful_classes_roc_auc_list [0.7774063462644777, 0.9125281255559055, 0.7515237217724512, 0.8835762514891622, 0.8495399319780697, 0.8456499213786458, 0.7866543369131064, 0.7585428782046032, 0.66744243079

100%|██████████████████████████████████| 86524/86524 [00:02<00:00, 33764.61it/s]



self.all_classes_dict: {'No Finding': 10000, 'Nodule': 4399, 'Infiltration': 10004, 'Mass': 3820, 'Pleural_Thickening': 2125, 'Cardiomegaly': 1639, 'Atelectasis': 7702, 'Pneumothorax': 2514, 'Effusion': 7997, 'Consolidation': 2665, 'Hernia': 141, 'Emphysema': 1360, 'Pneumonia': 779, 'Fibrosis': 1199, 'Edema': 1211}

-----Resampled Dataset Information-----
num images in train_dataset   : 33790
num images in val_dataset     : 8448
---------------------------------------

-----Resampled Batchloaders Information -----
num batches in train_loader: 1056
num batches in val_loader  : 264
---------------------------------------------

TRAINING
Train Loss for batch 025/1056 @epoch4/5: 0.05756 in 0 mins 2.22 secs
Train Loss for batch 050/1056 @epoch4/5: 0.06506 in 0 mins 2.1 secs
Train Loss for batch 075/1056 @epoch4/5: 0.05234 in 0 mins 2.13 secs
Train Loss for batch 100/1056 @epoch4/5: 0.05795 in 0 mins 2.13 secs




Train Loss for batch 125/1056 @epoch4/5: 0.05906 in 0 mins 2.1 secs
Train Loss for batch 150/1056 @epoch4/5: 0.04793 in 0 mins 2.14 secs
Train Loss for batch 175/1056 @epoch4/5: 0.05667 in 0 mins 2.1 secs
Train Loss for batch 200/1056 @epoch4/5: 0.06226 in 0 mins 2.13 secs
Train Loss for batch 225/1056 @epoch4/5: 0.06428 in 0 mins 2.19 secs
Train Loss for batch 250/1056 @epoch4/5: 0.05358 in 0 mins 2.11 secs
Train Loss for batch 275/1056 @epoch4/5: 0.07202 in 0 mins 2.15 secs
Train Loss for batch 300/1056 @epoch4/5: 0.06527 in 0 mins 2.11 secs
Train Loss for batch 325/1056 @epoch4/5: 0.06785 in 0 mins 2.11 secs
Train Loss for batch 350/1056 @epoch4/5: 0.05737 in 0 mins 2.13 secs
Train Loss for batch 375/1056 @epoch4/5: 0.06216 in 0 mins 2.13 secs
Train Loss for batch 400/1056 @epoch4/5: 0.06055 in 0 mins 2.13 secs
Train Loss for batch 425/1056 @epoch4/5: 0.06776 in 0 mins 2.11 secs
Train Loss for batch 450/1056 @epoch4/5: 0.06096 in 0 mins 2.11 secs
Train Loss for batch 475/1056 @epoch



Train Loss for batch 975/1056 @epoch4/5: 0.06673 in 8 mins 33.67 secs
Train Loss for batch 1000/1056 @epoch4/5: 0.061 in 0 mins 2.13 secs
Train Loss for batch 1025/1056 @epoch4/5: 0.06035 in 0 mins 2.45 secs
Train Loss for batch 1050/1056 @epoch4/5: 0.06674 in 0 mins 3.15 secs
VALIDATION
Val Loss   for batch 025/264 @epoch4/5: 0.06183 in 16 mins 47.26 secs
Val Loss   for batch 050/264 @epoch4/5: 0.05778 in 0 mins 1.12 secs
Val Loss   for batch 075/264 @epoch4/5: 0.05825 in 0 mins 1.12 secs
Val Loss   for batch 100/264 @epoch4/5: 0.0587 in 0 mins 1.15 secs
Val Loss   for batch 125/264 @epoch4/5: 0.05946 in 0 mins 1.31 secs
Val Loss   for batch 150/264 @epoch4/5: 0.0558 in 0 mins 1.38 secs
Val Loss   for batch 175/264 @epoch4/5: 0.0617 in 0 mins 1.47 secs
Val Loss   for batch 200/264 @epoch4/5: 0.05486 in 0 mins 1.57 secs
Val Loss   for batch 225/264 @epoch4/5: 0.05517 in 0 mins 1.64 secs
Val Loss   for batch 250/264 @epoch4/5: 0.06374 in 0 mins 1.68 secs

NoFindingIndex:  10
y_true.shap

100%|██████████████████████████████████| 86524/86524 [00:02<00:00, 33115.97it/s]



self.all_classes_dict: {'Consolidation': 2658, 'No Finding': 10000, 'Effusion': 8014, 'Emphysema': 1367, 'Infiltration': 10008, 'Mass': 3813, 'Pneumothorax': 2529, 'Edema': 1189, 'Atelectasis': 7700, 'Pneumonia': 779, 'Pleural_Thickening': 2122, 'Cardiomegaly': 1632, 'Fibrosis': 1168, 'Nodule': 4436, 'Hernia': 141}

-----Resampled Dataset Information-----
num images in train_dataset   : 33796
num images in val_dataset     : 8449
---------------------------------------

-----Resampled Batchloaders Information -----
num batches in train_loader: 1057
num batches in val_loader  : 265
---------------------------------------------

TRAINING
Train Loss for batch 025/1057 @epoch5/5: 0.0593 in 0 mins 3.68 secs
Train Loss for batch 050/1057 @epoch5/5: 0.05565 in 0 mins 3.65 secs
Train Loss for batch 075/1057 @epoch5/5: 0.07191 in 0 mins 4.21 secs
Train Loss for batch 100/1057 @epoch5/5: 0.0596 in 0 mins 4.65 secs
Train Loss for batch 125/1057 @epoch5/5: 0.05753 in 0 mins 4.75 secs
Train Loss fo



Train Loss for batch 525/1057 @epoch5/5: 0.05788 in 0 mins 2.22 secs
Train Loss for batch 550/1057 @epoch5/5: 0.0577 in 0 mins 2.11 secs
Train Loss for batch 575/1057 @epoch5/5: 0.05351 in 0 mins 2.11 secs
Train Loss for batch 600/1057 @epoch5/5: 0.05978 in 0 mins 2.13 secs
Train Loss for batch 625/1057 @epoch5/5: 0.06678 in 0 mins 2.14 secs
Train Loss for batch 650/1057 @epoch5/5: 0.06493 in 0 mins 2.3 secs
Train Loss for batch 675/1057 @epoch5/5: 0.05934 in 0 mins 2.27 secs
Train Loss for batch 700/1057 @epoch5/5: 0.06079 in 0 mins 2.21 secs
Train Loss for batch 725/1057 @epoch5/5: 0.07162 in 0 mins 2.15 secs
Train Loss for batch 750/1057 @epoch5/5: 0.06451 in 15 mins 50.34 secs
Train Loss for batch 775/1057 @epoch5/5: 0.06387 in 0 mins 2.23 secs
Train Loss for batch 800/1057 @epoch5/5: 0.06163 in 0 mins 2.24 secs
Train Loss for batch 825/1057 @epoch5/5: 0.058 in 0 mins 2.09 secs
Train Loss for batch 850/1057 @epoch5/5: 0.05768 in 0 mins 2.18 secs




Train Loss for batch 875/1057 @epoch5/5: 0.05909 in 0 mins 2.11 secs
Train Loss for batch 900/1057 @epoch5/5: 0.05182 in 0 mins 2.14 secs
Train Loss for batch 925/1057 @epoch5/5: 0.05776 in 0 mins 2.1 secs
Train Loss for batch 950/1057 @epoch5/5: 0.0565 in 0 mins 2.11 secs
Train Loss for batch 975/1057 @epoch5/5: 0.0653 in 0 mins 2.11 secs
Train Loss for batch 1000/1057 @epoch5/5: 0.05407 in 0 mins 2.09 secs
Train Loss for batch 1025/1057 @epoch5/5: 0.05745 in 0 mins 2.14 secs
Train Loss for batch 1050/1057 @epoch5/5: 0.05607 in 0 mins 2.11 secs
VALIDATION
Val Loss   for batch 025/265 @epoch5/5: 0.05935 in 0 mins 1.11 secs
Val Loss   for batch 050/265 @epoch5/5: 0.06765 in 0 mins 1.11 secs
Val Loss   for batch 075/265 @epoch5/5: 0.05564 in 0 mins 1.1 secs
Val Loss   for batch 100/265 @epoch5/5: 0.05608 in 0 mins 1.2 secs
Val Loss   for batch 125/265 @epoch5/5: 0.05975 in 0 mins 1.13 secs
Val Loss   for batch 150/265 @epoch5/5: 0.05578 in 0 mins 1.11 secs
Val Loss   for batch 175/265 @e

In [19]:
test_model("vit_base", "vit_base_1e-05_05.pth")


we are working with 
Images shape: torch.Size([3, 224, 224]) and 
Target shape: torch.Size([15])

checkpoint loaded: vit_base_1e-05_05.pth


800/800 (100.00 %)
NoFindingIndex:  10
y_true.shape, y_probs.shape  (25596, 15) (25596, 15)

class_roc_auc_list:  [0.7283401320814121, 0.8589345591653008, 0.7091878103025275, 0.8303291642337485, 0.794394348794234, 0.8324456340260294, 0.7813288856545456, 0.8506167212128395, 0.6256159109541888, 0.7491304578117551, 0.7092328939465433, 0.6817418389074441, 0.7367450204167982, 0.6614958674980239, 0.805954415984719]

useful_classes_roc_auc_list [0.7283401320814121, 0.8589345591653008, 0.7091878103025275, 0.8303291642337485, 0.794394348794234, 0.8324456340260294, 0.7813288856545456, 0.8506167212128395, 0.6256159109541888, 0.7491304578117551, 0.6817418389074441, 0.7367450204167982, 0.6614958674980239, 0.805954415984719]
test_roc_auc: 0.760447197645969 in 15 mins 10 secs
total running time: 0 h 15m


## VGG

In [20]:
vgg16 = models.vgg16(weights="DEFAULT")

num_ftrs = vgg16.classifier[-1].in_features
vgg16.classifier[-1] = nn.Linear(in_features=num_ftrs, out_features=len(XRayTrain_dataset.all_classes))

In [21]:
train_model(vgg16, "vgg16") 

TRAINING THE MODEL FROM SCRATCH

we are working with 
Images shape: torch.Size([3, 224, 224]) and 
Target shape: torch.Size([15])

training from scratch

> loss_fn: FocalLoss()
> epochs_till_now: 0
> batch_size: 32
> stage: 1
> lr: 1e-05


TRAINING
Train Loss for batch 025/1056 @epoch1/5: 0.08661 in 0 mins 44.9 secs
Train Loss for batch 050/1056 @epoch1/5: 0.07364 in 0 mins 51.36 secs
Train Loss for batch 075/1056 @epoch1/5: 0.06935 in 0 mins 42.2 secs
Train Loss for batch 100/1056 @epoch1/5: 0.08475 in 0 mins 41.41 secs
Train Loss for batch 125/1056 @epoch1/5: 0.07611 in 0 mins 56.07 secs
Train Loss for batch 150/1056 @epoch1/5: 0.05866 in 0 mins 52.96 secs
Train Loss for batch 175/1056 @epoch1/5: 0.0771 in 0 mins 38.01 secs
Train Loss for batch 200/1056 @epoch1/5: 0.07359 in 0 mins 44.79 secs
Train Loss for batch 225/1056 @epoch1/5: 0.07062 in 1 mins 6.65 secs
Train Loss for batch 250/1056 @epoch1/5: 0.07582 in 0 mins 40.28 secs
Train Loss for batch 275/1056 @epoch1/5: 0.07885 in 1 m



Train Loss for batch 425/1056 @epoch1/5: 0.06686 in 0 mins 44.73 secs
Train Loss for batch 450/1056 @epoch1/5: 0.06819 in 0 mins 50.05 secs
Train Loss for batch 475/1056 @epoch1/5: 0.0666 in 0 mins 44.39 secs
Train Loss for batch 500/1056 @epoch1/5: 0.06896 in 0 mins 43.43 secs
Train Loss for batch 525/1056 @epoch1/5: 0.06345 in 1 mins 4.15 secs
Train Loss for batch 550/1056 @epoch1/5: 0.07325 in 0 mins 32.13 secs
Train Loss for batch 575/1056 @epoch1/5: 0.07377 in 0 mins 41.53 secs
Train Loss for batch 600/1056 @epoch1/5: 0.06116 in 0 mins 50.95 secs
Train Loss for batch 625/1056 @epoch1/5: 0.07141 in 0 mins 41.13 secs
Train Loss for batch 650/1056 @epoch1/5: 0.07108 in 0 mins 36.46 secs
Train Loss for batch 675/1056 @epoch1/5: 0.07433 in 0 mins 45.96 secs




Train Loss for batch 700/1056 @epoch1/5: 0.0698 in 0 mins 37.77 secs
Train Loss for batch 725/1056 @epoch1/5: 0.06767 in 0 mins 38.09 secs
Train Loss for batch 750/1056 @epoch1/5: 0.06568 in 1 mins 4.07 secs
Train Loss for batch 775/1056 @epoch1/5: 0.0765 in 0 mins 39.82 secs
Train Loss for batch 800/1056 @epoch1/5: 0.07042 in 0 mins 38.84 secs
Train Loss for batch 825/1056 @epoch1/5: 0.08157 in 0 mins 37.14 secs
Train Loss for batch 850/1056 @epoch1/5: 0.07345 in 0 mins 36.58 secs
Train Loss for batch 875/1056 @epoch1/5: 0.07357 in 0 mins 54.28 secs
Train Loss for batch 900/1056 @epoch1/5: 0.06414 in 0 mins 46.34 secs
Train Loss for batch 925/1056 @epoch1/5: 0.0604 in 0 mins 39.12 secs
Train Loss for batch 950/1056 @epoch1/5: 0.06012 in 0 mins 32.1 secs
Train Loss for batch 975/1056 @epoch1/5: 0.06689 in 0 mins 31.46 secs
Train Loss for batch 1000/1056 @epoch1/5: 0.06236 in 0 mins 51.86 secs
Train Loss for batch 1025/1056 @epoch1/5: 0.07011 in 0 mins 42.22 secs
Train Loss for batch 10

100%|█████████████████████████████████| 86524/86524 [00:02<00:00, 33475.08it/s]



self.all_classes_dict: {'No Finding': 10000, 'Atelectasis': 7709, 'Effusion': 8040, 'Nodule': 4416, 'Pleural_Thickening': 2131, 'Consolidation': 2655, 'Mass': 3838, 'Infiltration': 10005, 'Cardiomegaly': 1634, 'Pneumonia': 786, 'Edema': 1202, 'Emphysema': 1366, 'Pneumothorax': 2528, 'Fibrosis': 1171, 'Hernia': 141}

-----Resampled Dataset Information-----
num images in train_dataset   : 33795
num images in val_dataset     : 8449
---------------------------------------

-----Resampled Batchloaders Information -----
num batches in train_loader: 1057
num batches in val_loader  : 265
---------------------------------------------

TRAINING
Train Loss for batch 025/1057 @epoch2/5: 0.06106 in 0 mins 29.19 secs
Train Loss for batch 050/1057 @epoch2/5: 0.08219 in 0 mins 47.61 secs
Train Loss for batch 075/1057 @epoch2/5: 0.07099 in 0 mins 34.68 secs
Train Loss for batch 100/1057 @epoch2/5: 0.05893 in 0 mins 39.33 secs
Train Loss for batch 125/1057 @epoch2/5: 0.06663 in 0 mins 39.3 secs
Train L



Train Loss for batch 600/1057 @epoch2/5: 0.06514 in 0 mins 28.76 secs
Train Loss for batch 625/1057 @epoch2/5: 0.05438 in 0 mins 24.63 secs
Train Loss for batch 650/1057 @epoch2/5: 0.06547 in 0 mins 32.02 secs
Train Loss for batch 675/1057 @epoch2/5: 0.06805 in 0 mins 32.25 secs
Train Loss for batch 700/1057 @epoch2/5: 0.07123 in 0 mins 33.01 secs
Train Loss for batch 725/1057 @epoch2/5: 0.06575 in 0 mins 51.18 secs
Train Loss for batch 750/1057 @epoch2/5: 0.05689 in 0 mins 31.89 secs
Train Loss for batch 775/1057 @epoch2/5: 0.08006 in 0 mins 23.77 secs
Train Loss for batch 800/1057 @epoch2/5: 0.06445 in 0 mins 31.1 secs
Train Loss for batch 825/1057 @epoch2/5: 0.06137 in 0 mins 37.38 secs
Train Loss for batch 850/1057 @epoch2/5: 0.06416 in 0 mins 45.08 secs
Train Loss for batch 875/1057 @epoch2/5: 0.06393 in 0 mins 33.2 secs
Train Loss for batch 900/1057 @epoch2/5: 0.06435 in 0 mins 23.95 secs
Train Loss for batch 925/1057 @epoch2/5: 0.0714 in 0 mins 33.07 secs
Train Loss for batch 95



Train Loss for batch 1050/1057 @epoch2/5: 0.0732 in 0 mins 51.45 secs
VALIDATION
Val Loss   for batch 025/265 @epoch2/5: 0.06998 in 0 mins 4.02 secs
Val Loss   for batch 050/265 @epoch2/5: 0.06666 in 0 mins 7.03 secs
Val Loss   for batch 075/265 @epoch2/5: 0.06333 in 0 mins 3.74 secs
Val Loss   for batch 100/265 @epoch2/5: 0.05464 in 0 mins 3.94 secs
Val Loss   for batch 125/265 @epoch2/5: 0.0531 in 0 mins 4.01 secs
Val Loss   for batch 150/265 @epoch2/5: 0.06771 in 0 mins 0.82 secs
Val Loss   for batch 175/265 @epoch2/5: 0.0641 in 0 mins 0.82 secs
Val Loss   for batch 200/265 @epoch2/5: 0.05712 in 0 mins 0.8 secs
Val Loss   for batch 225/265 @epoch2/5: 0.06583 in 0 mins 0.83 secs
Val Loss   for batch 250/265 @epoch2/5: 0.06285 in 0 mins 0.83 secs

NoFindingIndex:  10
y_true.shape, y_probs.shape  (8449, 15) (8449, 15)

class_roc_auc_list:  [0.7549917665389188, 0.8594325991585189, 0.7199712123804529, 0.8778144314666194, 0.8386663278271917, 0.8380570436059346, 0.7805653186819107, 0.73926

100%|█████████████████████████████████| 86524/86524 [00:02<00:00, 33593.31it/s]



self.all_classes_dict: {'No Finding': 10000, 'Infiltration': 10006, 'Fibrosis': 1181, 'Consolidation': 2654, 'Pneumothorax': 2513, 'Effusion': 8015, 'Edema': 1218, 'Nodule': 4418, 'Pleural_Thickening': 2122, 'Atelectasis': 7688, 'Mass': 3818, 'Cardiomegaly': 1628, 'Emphysema': 1357, 'Pneumonia': 785, 'Hernia': 141}

-----Resampled Dataset Information-----
num images in train_dataset   : 33795
num images in val_dataset     : 8449
---------------------------------------

-----Resampled Batchloaders Information -----
num batches in train_loader: 1057
num batches in val_loader  : 265
---------------------------------------------

TRAINING


KeyboardInterrupt: 

In [24]:
for i in range(1,6):
    print(f"------test: epoch {i}------")
    test_model("resnet50", f"resnet50_1e-05_0{i}.pth")

------test: epoch 1------

we are working with 
Images shape: torch.Size([3, 224, 224]) and 
Target shape: torch.Size([15])

checkpoint loaded: resnet50_1e-05_01.pth


800/800 (100.00 %)
NoFindingIndex:  10
y_true.shape, y_probs.shape  (25596, 15) (25596, 15)

class_roc_auc_list:  [0.7189187178896098, 0.762261119768623, 0.6890372931234429, 0.7953925990357427, 0.7796063423218341, 0.7746592001972684, 0.7548162705738264, 0.6546769164850992, 0.6504116244930731, 0.6760494645915358, 0.6984763393888749, 0.6668943565824553, 0.6864999182998907, 0.610570915950094, 0.7961622202442222]

useful_classes_roc_auc_list [0.7189187178896098, 0.762261119768623, 0.6890372931234429, 0.7953925990357427, 0.7796063423218341, 0.7746592001972684, 0.7548162705738264, 0.6546769164850992, 0.6504116244930731, 0.6760494645915358, 0.6668943565824553, 0.6864999182998907, 0.610570915950094, 0.7961622202442222]
test_roc_auc: 0.715425497111194 in 9 mins 29 secs
total running time: 0 h 9m
------test: epoch 2------

we are 

In [25]:
for i in range(1,6):
    print(f"------test: epoch {i}------")
    test_model("vit", f"vit_base_1e-05_0{i}.pth")

------test: epoch 1------

we are working with 
Images shape: torch.Size([3, 224, 224]) and 
Target shape: torch.Size([15])

checkpoint loaded: vit_base_1e-05_01.pth


800/800 (100.00 %)
NoFindingIndex:  10
y_true.shape, y_probs.shape  (25596, 15) (25596, 15)

class_roc_auc_list:  [0.7315606805228217, 0.830098389499394, 0.7029371434912909, 0.8136356834318004, 0.7838216335693665, 0.7832738258350949, 0.7690119766633912, 0.8444504207196448, 0.6513337967735104, 0.7176907912008702, 0.6973975855751797, 0.6754647782410994, 0.7159947490103589, 0.6594201005845908, 0.7979335346769569]

useful_classes_roc_auc_list [0.7315606805228217, 0.830098389499394, 0.7029371434912909, 0.8136356834318004, 0.7838216335693665, 0.7832738258350949, 0.7690119766633912, 0.8444504207196448, 0.6513337967735104, 0.7176907912008702, 0.6754647782410994, 0.7159947490103589, 0.6594201005845908, 0.7979335346769569]
test_roc_auc: 0.7483305360157279 in 14 mins 24 secs
total running time: 0 h 14m
------test: epoch 2------

we

In [27]:
for i in range(1,3):
    print(f"------test: epoch {i}------")
    test_model("vgg16", f"vgg16_1e-05_0{i}.pth")

------test: epoch 1------

we are working with 
Images shape: torch.Size([3, 224, 224]) and 
Target shape: torch.Size([15])

checkpoint loaded: vgg16_1e-05_01.pth


800/800 (100.00 %)
NoFindingIndex:  10
y_true.shape, y_probs.shape  (25596, 15) (25596, 15)

class_roc_auc_list:  [0.704740004375392, 0.7726988828828527, 0.6882666707442789, 0.7711994715318455, 0.7669718087722729, 0.7048018169368062, 0.7722097736553606, 0.7190365383388184, 0.6362165213959958, 0.6516461017988067, 0.6966797332621566, 0.6788396856095477, 0.7073846988199799, 0.590438059960044, 0.7629467405397529]

useful_classes_roc_auc_list [0.704740004375392, 0.7726988828828527, 0.6882666707442789, 0.7711994715318455, 0.7669718087722729, 0.7048018169368062, 0.7722097736553606, 0.7190365383388184, 0.6362165213959958, 0.6516461017988067, 0.6788396856095477, 0.7073846988199799, 0.590438059960044, 0.7629467405397529]
test_roc_auc: 0.7090997696686968 in 14 mins 16 secs
total running time: 0 h 14m
------test: epoch 2------

we are 

In [16]:
count_parameters(resnet50), count_parameters(vgg16), count_parameters(vit_base)

(23.538767, 138.357544, 85.810191)