In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!unzip /content/drive/MyDrive/Graduation_Project/CheXpert-v1.0-small.zip > /dev/null

In [3]:
!pip install libauc > /dev/null

In [4]:
pip install timm

Collecting timm
  Downloading timm-0.4.12-py3-none-any.whl (376 kB)
[?25l[K     |▉                               | 10 kB 18.2 MB/s eta 0:00:01[K     |█▊                              | 20 kB 19.5 MB/s eta 0:00:01[K     |██▋                             | 30 kB 8.6 MB/s eta 0:00:01[K     |███▌                            | 40 kB 5.5 MB/s eta 0:00:01[K     |████▍                           | 51 kB 4.3 MB/s eta 0:00:01[K     |█████▏                          | 61 kB 5.0 MB/s eta 0:00:01[K     |██████                          | 71 kB 5.8 MB/s eta 0:00:01[K     |███████                         | 81 kB 5.7 MB/s eta 0:00:01[K     |███████▉                        | 92 kB 6.3 MB/s eta 0:00:01[K     |████████▊                       | 102 kB 5.8 MB/s eta 0:00:01[K     |█████████▋                      | 112 kB 5.8 MB/s eta 0:00:01[K     |██████████▍                     | 122 kB 5.8 MB/s eta 0:00:01[K     |███████████▎                    | 133 kB 5.8 MB/s eta 0:00:01[K     |

In [5]:
import cv2
import numpy as np 
import pandas as pd

import matplotlib.pyplot as plt

import torch
from torchsummary import summary

import timm
from chexpert import CheXpert
from libauc.optimizers import PESG
from libauc.losses import AUCM_MultiLabel

from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split

cuda = torch.device('cuda:0')

In [None]:
all_data = pd.read_csv('clean_4_5.csv')
data_train, val_test = train_test_split(all_data, test_size=0.15, random_state=42)
data_val, data_test = train_test_split(val_test, test_size=0.5, random_state=42)

In [None]:
data_train.shape, data_val.shape, data_test.shape

In [None]:
data_train.to_csv('data_train.csv', index=False)
data_val.to_csv('data_val.csv', index=False)
data_test.to_csv('data_test.csv', index=False)

In [None]:
!mv /content/data_train.csv /content/CheXpert-v1.0-small
!mv /content/data_val.csv /content/CheXpert-v1.0-small
!mv /content/data_test.csv /content/CheXpert-v1.0-small

In [6]:
save_weights_path = '/content/drive/MyDrive/Graduation_Project/pretrained_model_2.pth'
load_weights_path = '/content/drive/MyDrive/Graduation_Project/pretrained_model.pth'

In [7]:
avail_pretrained_models = timm.list_models(pretrained=True)
avail_pretrained_models

['adv_inception_v3',
 'cait_m36_384',
 'cait_m48_448',
 'cait_s24_224',
 'cait_s24_384',
 'cait_s36_384',
 'cait_xs24_384',
 'cait_xxs24_224',
 'cait_xxs24_384',
 'cait_xxs36_224',
 'cait_xxs36_384',
 'coat_lite_mini',
 'coat_lite_small',
 'coat_lite_tiny',
 'coat_mini',
 'coat_tiny',
 'convit_base',
 'convit_small',
 'convit_tiny',
 'cspdarknet53',
 'cspresnet50',
 'cspresnext50',
 'deit_base_distilled_patch16_224',
 'deit_base_distilled_patch16_384',
 'deit_base_patch16_224',
 'deit_base_patch16_384',
 'deit_small_distilled_patch16_224',
 'deit_small_patch16_224',
 'deit_tiny_distilled_patch16_224',
 'deit_tiny_patch16_224',
 'densenet121',
 'densenet161',
 'densenet169',
 'densenet201',
 'densenetblur121d',
 'dla34',
 'dla46_c',
 'dla46x_c',
 'dla60',
 'dla60_res2net',
 'dla60_res2next',
 'dla60x',
 'dla60x_c',
 'dla102',
 'dla102x',
 'dla102x2',
 'dla169',
 'dm_nfnet_f0',
 'dm_nfnet_f1',
 'dm_nfnet_f2',
 'dm_nfnet_f3',
 'dm_nfnet_f4',
 'dm_nfnet_f5',
 'dm_nfnet_f6',
 'dpn68',
 'dpn

In [None]:
# dataloader
root = '/content/'

trainGen = CheXpert(image_root_path=root, 
                     csv_path='clean_4_5_new_2.csv', 
                     image_size=224,
                     use_frontal=True, 
                     transforms=None,
                     mode='train')

valGen =  CheXpert(image_root_path=root, 
                    csv_path='valid_5.csv', 
                    image_size=224,
                    use_frontal=True, 
                    transforms=None,
                    mode='valid')

trainloader =  torch.utils.data.DataLoader(trainGen, batch_size=32, num_workers=2, shuffle=True)
testloader =  torch.utils.data.DataLoader(valGen, batch_size=32, num_workers=2, shuffle=False)

lr = 0.1 
gamma = 1000
imratio = trainGen.imratio_list 
weight_decay = 1e-4
margin = 1.0

model = timm.create_model('tf_mobilenetv3_large_100', num_classes=5, pretrained=True)
model = model.cuda()

# define loss & optimizer
Loss = AUCM_MultiLabel(imratio=imratio, num_classes=5)
optimizer = PESG(model, 
                 a=Loss.a, 
                 b=Loss.b, 
                 alpha=Loss.alpha, 
                 lr=lr, 
                 gamma=gamma, 
                 margin=margin, 
                 weight_decay=weight_decay, device='cuda')

#model.load_state_dict(torch.load(load_weights_path))

# training
#diff_threshold = 0.01
const_threshold = 5

val_auc_mean = 0
best_val_auc = 0
const_cnt = 0
for epoch in range(5):

    for idx, data in enumerate(trainloader):
      train_data, train_labels = data
      train_data, train_labels  = train_data.cuda(), train_labels.cuda()
      y_pred = model(train_data)
      y_pred = torch.sigmoid(y_pred)
      loss = Loss(y_pred, train_labels)

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
        
      # validation  
      if idx % 400 == 0:
         model.eval()
         with torch.no_grad():    
              test_pred = []
              test_true = [] 
              for data in testloader:
                  test_data, test_labels = data
                  test_data = test_data.cuda()
                  y_pred = model(test_data)
                  y_pred = torch.sigmoid(y_pred)
                  test_pred.append(y_pred.cpu().detach().numpy())
                  test_true.append(test_labels.numpy())
            
              test_true = np.concatenate(test_true)
              test_pred = np.concatenate(test_pred)
              val_auc_mean =  roc_auc_score(test_true, test_pred) 
              model.train()

              if best_val_auc < val_auc_mean:
                  const_cnt = 0
                  best_val_auc = val_auc_mean
                  torch.save(model.state_dict(), save_weights_path)
              else:
                  const_cnt += 1            

              #val_diff = val_auc_mean-best_val_auc
              if (const_cnt >= const_threshold):
                  optimizer.update_regularizer(decay_factor=2)
                  model.load_state_dict(torch.load(save_weights_path))
                  const_cnt = 0 

              print ('Epoch=%s, BatchID=%s, Val_AUC=%.4f, Best_Val_AUC=%.4f'%(epoch, idx, val_auc_mean, best_val_auc))

Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_large_100-427764d5.pth" to /root/.cache/torch/hub/checkpoints/tf_mobilenetv3_large_100-427764d5.pth


Epoch=0, BatchID=0, Val_AUC=0.5476, Best_Val_AUC=0.5476
Epoch=0, BatchID=400, Val_AUC=0.6871, Best_Val_AUC=0.6871
Epoch=0, BatchID=800, Val_AUC=0.7801, Best_Val_AUC=0.7801
Epoch=0, BatchID=1200, Val_AUC=0.8153, Best_Val_AUC=0.8153
Epoch=0, BatchID=1600, Val_AUC=0.8226, Best_Val_AUC=0.8226
Epoch=0, BatchID=2000, Val_AUC=0.8125, Best_Val_AUC=0.8226
Epoch=0, BatchID=2400, Val_AUC=0.8399, Best_Val_AUC=0.8399
Epoch=0, BatchID=2800, Val_AUC=0.8353, Best_Val_AUC=0.8399
Epoch=0, BatchID=3200, Val_AUC=0.8267, Best_Val_AUC=0.8399
Epoch=0, BatchID=3600, Val_AUC=0.8335, Best_Val_AUC=0.8399
Epoch=0, BatchID=4000, Val_AUC=0.8492, Best_Val_AUC=0.8492
Epoch=0, BatchID=4400, Val_AUC=0.8463, Best_Val_AUC=0.8492
Epoch=0, BatchID=4800, Val_AUC=0.8472, Best_Val_AUC=0.8492
Epoch=1, BatchID=0, Val_AUC=0.8154, Best_Val_AUC=0.8492
Epoch=1, BatchID=400, Val_AUC=0.8382, Best_Val_AUC=0.8492
Reducing learning rate to 0.05000 @ T=5961!
Updating regularizer @ T=5961!
Epoch=1, BatchID=800, Val_AUC=0.8392, Best_Val_AU

# Valid set

In [None]:
root = '/content/'
testGen =  CheXpert(image_root_path=root, 
                    csv_path='valid_5.csv', 
                    image_size=224,
                    use_frontal=True, 
                    transforms=None,
                    mode='test')

testloader =  torch.utils.data.DataLoader(testGen, batch_size=32, num_workers=2, shuffle=False)

In [None]:
model = timm.create_model('tf_mobilenetv3_large_100', num_classes=5, pretrained=False)
model = model.cuda()
model.load_state_dict(torch.load(save_weights_path))

In [None]:
model.eval()
with torch.no_grad():    
    test_pred = []
    test_true = [] 
    for data in testloader:
        test_data, test_labels = data
        test_data = test_data.cuda()
        y_pred = model(test_data)
        y_pred = torch.sigmoid(y_pred)
        test_pred.append(y_pred.cpu().detach().numpy())
        test_true.append(test_labels.numpy())
  
    test_true = np.concatenate(test_true)
    test_pred = np.concatenate(test_pred)

In [None]:
labels = ['Cardiomegaly', 'Edema', 'Consolidation', 'Atelectasis', 'Pleural Effusion']

results = pd.DataFrame(index=labels)


scores = []
for i in range(5):
    score = roc_auc_score(test_true[:, i], test_pred[:, i])
    scores.append(score)
    
results['AUC'] = scores

In [None]:
results

In [None]:
results['AUC'].mean()