In [1]:
!nvidia-smi -L

GPU 0: Tesla T4 (UUID: GPU-0ec2a0cd-c6f2-5dd9-4fd4-fa206df1caed)


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!unzip /content/drive/MyDrive/Graduation_Project/CheXpert-v1.0-small.zip > /dev/null

In [4]:
!pip install timm > /dev/null

In [1]:
import cv2
import numpy as np 
import pandas as pd

import matplotlib.pyplot as plt

import torch 
from torch import nn
from torchsummary import summary

import timm
from torch.optim import Adam, lr_scheduler

from myGen import MyGen

from sklearn.metrics import roc_auc_score

# Zeros

In [2]:
train = pd.read_csv('/content/CheXpert-v1.0-small/train.csv')

In [3]:
train.loc[:, train.columns[5:]] = train.loc[:, train.columns[5:]].fillna(0)

zeros = ['Cardiomegaly', 'Consolidation', 'Pleural Effusion']
train.loc[:, zeros] = train.loc[:, zeros].replace({-1:0})
ones = ['Edema', 'Atelectasis']
train.loc[:, ones] = train.loc[:, ones].replace({-1:0})

train = train[['Path', 'Frontal/Lateral', 'Cardiomegaly', 'Edema', 'Consolidation', 'Atelectasis', 'Pleural Effusion']].copy() 

In [4]:
train.shape

(223414, 7)

In [5]:
train.to_csv('/content/CheXpert-v1.0-small/train_edited.csv', index=False)

In [6]:
save_weights_path = '/content/drive/MyDrive/Graduation_Project/xception_zeros_2.pth' #edit
load_weights_path = '/content/drive/MyDrive/Graduation_Project/xception_zeros_2.pth' #edit

In [7]:
timm.list_models(pretrained=True)

['adv_inception_v3',
 'bat_resnext26ts',
 'beit_base_patch16_224',
 'beit_base_patch16_224_in22k',
 'beit_base_patch16_384',
 'beit_large_patch16_224',
 'beit_large_patch16_224_in22k',
 'beit_large_patch16_384',
 'beit_large_patch16_512',
 'botnet26t_256',
 'cait_m36_384',
 'cait_m48_448',
 'cait_s24_224',
 'cait_s24_384',
 'cait_s36_384',
 'cait_xs24_384',
 'cait_xxs24_224',
 'cait_xxs24_384',
 'cait_xxs36_224',
 'cait_xxs36_384',
 'coat_lite_mini',
 'coat_lite_small',
 'coat_lite_tiny',
 'coat_mini',
 'coat_tiny',
 'convit_base',
 'convit_small',
 'convit_tiny',
 'convmixer_768_32',
 'convmixer_1024_20_ks9_p14',
 'convmixer_1536_20',
 'convnext_base',
 'convnext_base_384_in22ft1k',
 'convnext_base_in22ft1k',
 'convnext_base_in22k',
 'convnext_large',
 'convnext_large_384_in22ft1k',
 'convnext_large_in22ft1k',
 'convnext_large_in22k',
 'convnext_small',
 'convnext_tiny',
 'convnext_xlarge_384_in22ft1k',
 'convnext_xlarge_in22ft1k',
 'convnext_xlarge_in22k',
 'crossvit_9_240',
 'crossv

In [8]:
model = timm.create_model('xception', num_classes=5, pretrained=False) #edit
model = model.cuda()

summary(model, (3, 224, 224), batch_size=64, device='cuda')

model.load_state_dict(torch.load(load_weights_path))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [64, 32, 111, 111]             864
       BatchNorm2d-2         [64, 32, 111, 111]              64
              ReLU-3         [64, 32, 111, 111]               0
            Conv2d-4         [64, 64, 109, 109]          18,432
       BatchNorm2d-5         [64, 64, 109, 109]             128
              ReLU-6         [64, 64, 109, 109]               0
            Conv2d-7         [64, 64, 109, 109]             576
            Conv2d-8        [64, 128, 109, 109]           8,192
   SeparableConv2d-9        [64, 128, 109, 109]               0
      BatchNorm2d-10        [64, 128, 109, 109]             256
             ReLU-11        [64, 128, 109, 109]               0
           Conv2d-12        [64, 128, 109, 109]           1,152
           Conv2d-13        [64, 128, 109, 109]          16,384
  SeparableConv2d-14        [64, 128, 1

<All keys matched successfully>

In [9]:
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

In [10]:
root = '/content/'

trainGen = MyGen(image_root_path=root, 
                 csv_path='CheXpert-v1.0-small/train_edited.csv', 
                 image_size=224, #edit
                 frontal=True)

valGen =  MyGen(image_root_path=root, 
                 csv_path='CheXpert-v1.0-small/valid.csv', 
                 image_size=224,  #edit
                 frontal=True)

trainloader =  torch.utils.data.DataLoader(trainGen, batch_size=64, shuffle=True)
testloader =  torch.utils.data.DataLoader(valGen, batch_size=64, shuffle=False)

# define loss & optimizer
Loss = nn.BCELoss()

optimizer = Adam(model.parameters(), lr=0.000125)

scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)  # Decay LR by a factor of 0.5 every 5 epochs

#model.load_state_dict(torch.load(load_weights_path))

# training
const_threshold = 5

val_auc_mean = 0
best_val_auc = 0.8885
const_cnt = 0

for epoch in range(2):

    for idx, data in enumerate(trainloader):
      train_data, train_labels = data
      train_data, train_labels  = train_data.cuda(), train_labels.cuda()
      y_pred = model(train_data)
      y_pred = torch.sigmoid(y_pred)
      loss = Loss(y_pred, train_labels)

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
        
      # validation  
      if idx % 400 == 0:
         model.eval()
         with torch.no_grad():    
              test_pred = []
              test_true = [] 
              for data in testloader:
                  test_data, test_labels = data
                  test_data = test_data.cuda()
                  y_pred = model(test_data)
                  y_pred = torch.sigmoid(y_pred)
                  test_pred.append(y_pred.cpu().detach().numpy())            
                  test_true.append(test_labels.numpy())
            
              test_true = np.concatenate(test_true)
              test_pred = np.concatenate(test_pred)
              val_auc_mean =  roc_auc_score(test_true, test_pred) 
              model.train()

              if best_val_auc < val_auc_mean:
                  const_cnt = 0
                  scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
                  best_val_auc = val_auc_mean
                  torch.save(model.state_dict(), save_weights_path)
              else:
                  const_cnt += 1
                  scheduler.step()            

              if (const_cnt > const_threshold):
                  const_cnt = 0 
                  scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
                  model.load_state_dict(torch.load(save_weights_path))
                               
              lr = get_lr(optimizer)

              print ('Epoch=%s, BatchID=%s, Val_AUC=%.4f, Best_Val_AUC=%.4f, lr=%f' %(epoch, idx, val_auc_mean, best_val_auc, lr))

Epoch=0, BatchID=0, Val_AUC=0.8871, Best_Val_AUC=0.8885, lr=0.000125
Epoch=0, BatchID=400, Val_AUC=0.8685, Best_Val_AUC=0.8885, lr=0.000125
Epoch=0, BatchID=800, Val_AUC=0.8648, Best_Val_AUC=0.8885, lr=0.000125
Epoch=0, BatchID=1200, Val_AUC=0.8713, Best_Val_AUC=0.8885, lr=0.000125
Epoch=0, BatchID=1600, Val_AUC=0.8817, Best_Val_AUC=0.8885, lr=0.000063
Epoch=0, BatchID=2000, Val_AUC=0.8762, Best_Val_AUC=0.8885, lr=0.000063
Epoch=0, BatchID=2400, Val_AUC=0.8698, Best_Val_AUC=0.8885, lr=0.000063
Epoch=0, BatchID=2800, Val_AUC=0.8747, Best_Val_AUC=0.8885, lr=0.000063
Epoch=1, BatchID=0, Val_AUC=0.8701, Best_Val_AUC=0.8885, lr=0.000063
Epoch=1, BatchID=400, Val_AUC=0.8711, Best_Val_AUC=0.8885, lr=0.000063
Epoch=1, BatchID=800, Val_AUC=0.8793, Best_Val_AUC=0.8885, lr=0.000031
Epoch=1, BatchID=1200, Val_AUC=0.8731, Best_Val_AUC=0.8885, lr=0.000031
Epoch=1, BatchID=1600, Val_AUC=0.8766, Best_Val_AUC=0.8885, lr=0.000031
Epoch=1, BatchID=2000, Val_AUC=0.8803, Best_Val_AUC=0.8885, lr=0.000031
Ep

In [11]:
root = '/content/'
testGen =  MyGen(image_root_path=root, 
                 csv_path='CheXpert-v1.0-small/valid.csv', 
                 image_size=224, 
                 frontal=True)

testloader =  torch.utils.data.DataLoader(testGen, batch_size=1, shuffle=False)

In [12]:
model.load_state_dict(torch.load(save_weights_path))

<All keys matched successfully>

In [13]:
model.eval()
with torch.no_grad():    
    test_pred = []
    test_true = [] 
    for data in testloader:
        test_data, test_labels = data
        test_data = test_data.cuda()
        y_pred = model(test_data)
        y_pred = torch.sigmoid(y_pred)
        test_pred.append(y_pred.cpu().detach().numpy())
        test_true.append(test_labels.numpy())
  
    test_true = np.concatenate(test_true)
    test_pred = np.concatenate(test_pred)

In [14]:
labels = ['Cardiomegaly', 'Edema', 'Consolidation', 'Atelectasis', 'Pleural Effusion']

results = pd.DataFrame(index=labels)


scores = []
for i in range(5):
    score = roc_auc_score(test_true[:, i], test_pred[:, i])
    scores.append(score)
    
results['AUC'] = scores

In [15]:
results

Unnamed: 0,AUC
Cardiomegaly,0.833111
Edema,0.930655
Consolidation,0.929963
Atelectasis,0.813753
Pleural Effusion,0.935009


In [16]:
results['AUC'].mean()

0.888498170588767

# Ones

In [17]:
train = pd.read_csv('/content/CheXpert-v1.0-small/train.csv')

In [18]:
train.loc[:, train.columns[5:]] = train.loc[:, train.columns[5:]].fillna(0)

zeros = ['Cardiomegaly', 'Consolidation', 'Pleural Effusion']
train.loc[:, zeros] = train.loc[:, zeros].replace({-1:1})
ones = ['Edema', 'Atelectasis']
train.loc[:, ones] = train.loc[:, ones].replace({-1:1})

train = train[['Path', 'Frontal/Lateral', 'Cardiomegaly', 'Edema', 'Consolidation', 'Atelectasis', 'Pleural Effusion']].copy() 

In [19]:
train.shape

(223414, 7)

In [20]:
train.to_csv('/content/CheXpert-v1.0-small/train_edited.csv', index=False)

In [21]:
save_weights_path = '/content/drive/MyDrive/Graduation_Project/xception_ones_2.pth' #edit
load_weights_path = '/content/drive/MyDrive/Graduation_Project/xception_ones_2.pth' #edit

In [22]:
timm.list_models(pretrained=True)

['adv_inception_v3',
 'bat_resnext26ts',
 'beit_base_patch16_224',
 'beit_base_patch16_224_in22k',
 'beit_base_patch16_384',
 'beit_large_patch16_224',
 'beit_large_patch16_224_in22k',
 'beit_large_patch16_384',
 'beit_large_patch16_512',
 'botnet26t_256',
 'cait_m36_384',
 'cait_m48_448',
 'cait_s24_224',
 'cait_s24_384',
 'cait_s36_384',
 'cait_xs24_384',
 'cait_xxs24_224',
 'cait_xxs24_384',
 'cait_xxs36_224',
 'cait_xxs36_384',
 'coat_lite_mini',
 'coat_lite_small',
 'coat_lite_tiny',
 'coat_mini',
 'coat_tiny',
 'convit_base',
 'convit_small',
 'convit_tiny',
 'convmixer_768_32',
 'convmixer_1024_20_ks9_p14',
 'convmixer_1536_20',
 'convnext_base',
 'convnext_base_384_in22ft1k',
 'convnext_base_in22ft1k',
 'convnext_base_in22k',
 'convnext_large',
 'convnext_large_384_in22ft1k',
 'convnext_large_in22ft1k',
 'convnext_large_in22k',
 'convnext_small',
 'convnext_tiny',
 'convnext_xlarge_384_in22ft1k',
 'convnext_xlarge_in22ft1k',
 'convnext_xlarge_in22k',
 'crossvit_9_240',
 'crossv

In [23]:
model = timm.create_model('xception', num_classes=5, pretrained=False) #edit
model = model.cuda()

summary(model, (3, 224, 224), batch_size=64, device='cuda')

model.load_state_dict(torch.load(load_weights_path))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [64, 32, 111, 111]             864
       BatchNorm2d-2         [64, 32, 111, 111]              64
              ReLU-3         [64, 32, 111, 111]               0
            Conv2d-4         [64, 64, 109, 109]          18,432
       BatchNorm2d-5         [64, 64, 109, 109]             128
              ReLU-6         [64, 64, 109, 109]               0
            Conv2d-7         [64, 64, 109, 109]             576
            Conv2d-8        [64, 128, 109, 109]           8,192
   SeparableConv2d-9        [64, 128, 109, 109]               0
      BatchNorm2d-10        [64, 128, 109, 109]             256
             ReLU-11        [64, 128, 109, 109]               0
           Conv2d-12        [64, 128, 109, 109]           1,152
           Conv2d-13        [64, 128, 109, 109]          16,384
  SeparableConv2d-14        [64, 128, 1

<All keys matched successfully>

In [24]:
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

In [25]:
root = '/content/'

trainGen = MyGen(image_root_path=root, 
                 csv_path='CheXpert-v1.0-small/train_edited.csv', 
                 image_size=224, #edit
                 frontal=True)

valGen =  MyGen(image_root_path=root, 
                 csv_path='CheXpert-v1.0-small/valid.csv', 
                 image_size=224,  #edit
                 frontal=True)

trainloader =  torch.utils.data.DataLoader(trainGen, batch_size=64, shuffle=True)
testloader =  torch.utils.data.DataLoader(valGen, batch_size=64, shuffle=False)

# define loss & optimizer
Loss = nn.BCELoss()

optimizer = Adam(model.parameters(), lr=0.00025)

scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)  # Decay LR by a factor of 0.5 every 5 epochs

#model.load_state_dict(torch.load(load_weights_path))

# training
const_threshold = 5

val_auc_mean = 0
best_val_auc = 0.8950
const_cnt = 0

for epoch in range(2):

    for idx, data in enumerate(trainloader):
      train_data, train_labels = data
      train_data, train_labels  = train_data.cuda(), train_labels.cuda()
      y_pred = model(train_data)
      y_pred = torch.sigmoid(y_pred)
      loss = Loss(y_pred, train_labels)

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
        
      # validation  
      if idx % 400 == 0:
         model.eval()
         with torch.no_grad():    
              test_pred = []
              test_true = [] 
              for data in testloader:
                  test_data, test_labels = data
                  test_data = test_data.cuda()
                  y_pred = model(test_data)
                  y_pred = torch.sigmoid(y_pred)
                  test_pred.append(y_pred.cpu().detach().numpy())            
                  test_true.append(test_labels.numpy())
            
              test_true = np.concatenate(test_true)
              test_pred = np.concatenate(test_pred)
              val_auc_mean =  roc_auc_score(test_true, test_pred) 
              model.train()

              if best_val_auc < val_auc_mean:
                  const_cnt = 0
                  scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
                  best_val_auc = val_auc_mean
                  torch.save(model.state_dict(), save_weights_path)
              else:
                  const_cnt += 1
                  scheduler.step()            

              if (const_cnt > const_threshold):
                  const_cnt = 0 
                  scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
                  model.load_state_dict(torch.load(save_weights_path))
                               
              lr = get_lr(optimizer)

              print ('Epoch=%s, BatchID=%s, Val_AUC=%.4f, Best_Val_AUC=%.4f, lr=%f' %(epoch, idx, val_auc_mean, best_val_auc, lr))

Epoch=0, BatchID=0, Val_AUC=0.8935, Best_Val_AUC=0.8950, lr=0.000250
Epoch=0, BatchID=400, Val_AUC=0.8591, Best_Val_AUC=0.8950, lr=0.000250
Epoch=0, BatchID=800, Val_AUC=0.8722, Best_Val_AUC=0.8950, lr=0.000250
Epoch=0, BatchID=1200, Val_AUC=0.8771, Best_Val_AUC=0.8950, lr=0.000250
Epoch=0, BatchID=1600, Val_AUC=0.8690, Best_Val_AUC=0.8950, lr=0.000125
Epoch=0, BatchID=2000, Val_AUC=0.8754, Best_Val_AUC=0.8950, lr=0.000125
Epoch=0, BatchID=2400, Val_AUC=0.8693, Best_Val_AUC=0.8950, lr=0.000125
Epoch=0, BatchID=2800, Val_AUC=0.8761, Best_Val_AUC=0.8950, lr=0.000125
Epoch=1, BatchID=0, Val_AUC=0.8682, Best_Val_AUC=0.8950, lr=0.000125
Epoch=1, BatchID=400, Val_AUC=0.8716, Best_Val_AUC=0.8950, lr=0.000125
Epoch=1, BatchID=800, Val_AUC=0.8704, Best_Val_AUC=0.8950, lr=0.000063
Epoch=1, BatchID=1200, Val_AUC=0.8740, Best_Val_AUC=0.8950, lr=0.000063
Epoch=1, BatchID=1600, Val_AUC=0.8768, Best_Val_AUC=0.8950, lr=0.000063
Epoch=1, BatchID=2000, Val_AUC=0.8682, Best_Val_AUC=0.8950, lr=0.000063
Ep

In [26]:
root = '/content/'
testGen =  MyGen(image_root_path=root, 
                 csv_path='CheXpert-v1.0-small/valid.csv', 
                 image_size=224, 
                 frontal=True)

testloader =  torch.utils.data.DataLoader(testGen, batch_size=1, shuffle=False)

In [27]:
model.load_state_dict(torch.load(save_weights_path))

<All keys matched successfully>

In [28]:
model.eval()
with torch.no_grad():    
    test_pred = []
    test_true = [] 
    for data in testloader:
        test_data, test_labels = data
        test_data = test_data.cuda()
        y_pred = model(test_data)
        y_pred = torch.sigmoid(y_pred)
        test_pred.append(y_pred.cpu().detach().numpy())
        test_true.append(test_labels.numpy())
  
    test_true = np.concatenate(test_true)
    test_pred = np.concatenate(test_pred)

In [29]:
labels = ['Cardiomegaly', 'Edema', 'Consolidation', 'Atelectasis', 'Pleural Effusion']

results = pd.DataFrame(index=labels)


scores = []
for i in range(5):
    score = roc_auc_score(test_true[:, i], test_pred[:, i])
    scores.append(score)
    
results['AUC'] = scores

In [30]:
results

Unnamed: 0,AUC
Cardiomegaly,0.831328
Edema,0.940923
Consolidation,0.90864
Atelectasis,0.851129
Pleural Effusion,0.943048


In [31]:
results['AUC'].mean()

0.8950133853679969