In [15]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import random
import timm
import torch
from albumentations import (
    Compose,
    Normalize,
    ShiftScaleRotate,
    RandomBrightnessContrast,
    MotionBlur,
    CLAHE,
    HorizontalFlip
)
from copy import deepcopy
from torch.utils.data import Dataset
from tqdm.auto import tqdm
from sklearn.metrics import roc_auc_score, accuracy_score

In [16]:
# from google.colab import drive
# drive.mount('/content/drive')

In [17]:
# %cd drive/MyDrive/'BIOMEDIN220-F2022'/

In [18]:
dataset_path = "vinbigdata-chest-xray-resized-png-256x256"
model_path = "vinbigdata-chest-xray-resized-png-256x256/save_models"

train_csv_path = os.path.join(dataset_path, 'vindrcxr_train.csv')
test_csv_path = os.path.join(dataset_path, 'vindrcxr_test.csv')
train_image_path = os.path.join(dataset_path, 'train')
test_image_path = os.path.join(dataset_path, 'test')
save_path = os.path.join(model_path, '')

print(train_image_path)
print(test_image_path)

vinbigdata-chest-xray-resized-png-256x256/train
vinbigdata-chest-xray-resized-png-256x256/test


In [19]:
!ls vinbigdata-chest-xray-resized-png-256x256

save_models  train	     vinbigdata-chest-xray-resized-png-256x256.zip
test	     train.csv	     vindrcxr_test.csv
test.csv     train_meta.csv  vindrcxr_train.csv


In [20]:
### Code from https://github.com/Scu-sen/VinBigData-Chest-X-ray-Abnormalities-Detection

class Dataset(Dataset):
    
    def __init__(self, df, image_path, transform=None):
        self.df = df
        self.image_path = image_path
        self.transform = transform
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        labels = torch.from_numpy(
            self.df.loc[idx,np.arange(0,15).astype(str).tolist()].values.astype(float)
        ).float()

        img = cv2.imread(
            self.image_path + '/' + str(self.df.image_id[idx]) + '.png'
        )
        
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        if self.transform:
            img = self.transform(image=img)['image']
        img = torch.from_numpy(img.transpose((2, 0, 1))).float()
            
        return img, labels

In [21]:
bs = 2
lr = 1e-3
N_EPOCHS = 10

In [22]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.deterministic = True

In [23]:
def train_model(model, data_loader, optimizer, criterion): # train 1 epoch
    """
    Trains the model for 1 epoch
    
    Parameters:
        model (torch.nn.Module): The model to be trained/validated.
        data_loader (torch.utils.data.DataLoader): Dataloader object for training/validation.
        optimizer (A torch.optim class): The optimizer.
        criterion (A function in torch.nn.modules.loss): The loss function. 
        
    Return: 
        avg_loss (float): The average loss.
    """
    
    model.train()
    
    running_loss = 0.0
    running_n = 0
    avg_loss = 0.0
    preds_list, targets_list = [], []

    optimizer.zero_grad()
    
    tk = tqdm(data_loader, total=len(data_loader), position=0, leave=True)
    for idx, (imgs, labels) in enumerate(tk):
        imgs, labels = imgs.cuda(), labels.cuda()
        output = model(imgs)
        
        loss = criterion(output, labels) 
        
        loss.backward()
        optimizer.step() 
        optimizer.zero_grad() 
        
        running_loss += loss.item() * imgs.size(0)
        running_n += imgs.size(0)
        tk.set_postfix(loss=running_loss / running_n)

        preds = torch.sigmoid(output).detach().cpu().numpy()
        labels = labels.detach().cpu().numpy()
      
        preds_list.append(preds)
        targets_list.append(labels.round().astype(int))
    print(running_n, len(data_loader))
    avg_loss = running_loss / running_n

    preds_list = np.concatenate(preds_list,axis=0).T
    targets_list = np.concatenate(targets_list,axis=0).T
    
    aucs = np.array(
        [roc_auc_score(i,j) if len(set(i))>1 else np.nan for i,j in zip(targets_list, preds_list)]
    )
    overall_auc = np.nanmean(aucs)

    thresholded_preds_list = np.round(preds_list)
    accs = np.array(
        [accuracy_score(i,j) if len(set(i))>1 else np.nan for i,j in zip(targets_list, thresholded_preds_list)]
    )
    overall_acc = np.nanmean(accs)

    return avg_loss, aucs, overall_auc, accs, overall_acc

In [24]:
def val_model(model, data_loader, criterion):
    """
    Test the model on the validation set
    
    Parameters:
        model (torch.nn.Module): The model to be trained/validated.
        data_loader (torch.utils.data.DataLoader): Dataloader object for training/validation.
        optimizer (A torch.optim class): The optimizer.
        criterion (A torch.nn.modules.loss class): The loss function. 
        
    Return: 
        avg_loss (float): The average loss.
    """
    model.eval()
    
    running_loss = 0.0
    running_n = 0
    avg_loss = 0.0
    preds_list, targets_list = [], []
    
    with torch.no_grad():
        tk = tqdm(data_loader, total=len(data_loader), position=0, leave=True)
        
        for idx, (imgs, labels) in enumerate(tk):  
            imgs, labels = imgs.cuda(), labels.cuda()
            output = model(imgs)
            
            loss = criterion(output, labels)
            running_loss += loss.item() * imgs.size(0)
            running_n += imgs.size(0)
            tk.set_postfix(loss=running_loss / running_n)
            
            preds = torch.sigmoid(output).detach().cpu().numpy()
            labels = labels.detach().cpu().numpy()
         
            preds_list.append(preds)
            targets_list.append(labels.round().astype(int))
        print(running_n, len(data_loader))
        avg_loss = running_loss / running_n

        preds_list = np.concatenate(preds_list,axis=0).T
        targets_list = np.concatenate(targets_list,axis=0).T
        
        aucs = np.array(
            [roc_auc_score(i,j) if len(set(i))>1 else np.nan for i,j in zip(targets_list, preds_list)]
        )
        overall_auc = np.nanmean(aucs)

        thresholded_preds_list = np.round(preds_list)
        accs = np.array(
            [accuracy_score(i,j) if len(set(i))>1 else np.nan for i,j in zip(targets_list, thresholded_preds_list)]
        )
        overall_acc = np.nanmean(accs)
        
    return avg_loss, aucs, overall_auc, accs, overall_acc

In [25]:
def main():
    seed_everything(42)

    train = pd.read_csv(train_csv_path)
    test = pd.read_csv(test_csv_path)

    train_transform = Compose([
        HorizontalFlip(p=0.5),
        ShiftScaleRotate(scale_limit = 0.15, rotate_limit = 10, p = 0.5),
        RandomBrightnessContrast(p=0.5),
        Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0)
    ])
    test_transform = Compose([
        Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0)
    ])

    trainset = Dataset(
        train,
        image_path=train_image_path,
        transform=train_transform
    )
    train_loader = torch.utils.data.DataLoader(
        trainset, batch_size=bs, num_workers=1,
        shuffle=True 
    )

    valset = Dataset(
        test,
        image_path=test_image_path,
        transform=test_transform
    )
    val_loader = torch.utils.data.DataLoader(
        valset, batch_size=bs, shuffle=False, num_workers=1
    )

    model = timm.create_model('tf_efficientnet_b4_ns',pretrained=True,num_classes=15).cuda()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    criterion = torch.nn.BCEWithLogitsLoss()
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=1, factor=0.1, mode='max')

    best_weights = deepcopy(model.state_dict())
    previous_lr = lr
    best_auc = 0
    best_aucs = [0]*15
    best_val_loss = 100
    es = 0

    train_loss_history, val_loss_history = [], []
    acc_train_history, acc_val_history = [], []
    for epoch in range(N_EPOCHS):
        avg_train_loss, aucs_train, auc_train, accs_train, acc_train = train_model(model, train_loader, optimizer, criterion)
        avg_val_loss, aucs_val, auc_val, accs_val, acc_val = val_model(model, val_loader, criterion)

        train_loss_history.append(avg_train_loss)
        val_loss_history.append(avg_val_loss)
        acc_train_history.append(acc_train)
        acc_val_history.append(acc_val)

        print('epoch:', epoch)
        print("Training Metrics")
        print('lr:', previous_lr, 'train_loss:', avg_train_loss, 'weighted avg auc:',auc_train, 'weighted avg acc:', acc_train)
        print('aucs:',aucs_train)
        print('accs:', accs_train)
        print("Validation Metrics")
        print('lr:', previous_lr, 'val_loss:',avg_val_loss, 'weighted avg auc:',auc_val, 'weighted avg acc:', acc_val)
        print('aucs:',aucs_val)
        print('accs:', accs_val)

        # Record the best weights if either of AUC or val_loss improved.
        if auc_val > best_auc or avg_val_loss < best_val_loss:
            print('saving best weight...')
            best_weights = deepcopy(model.state_dict())
            for k,v in best_weights.items():
                best_weights[k] = v.cpu()

        # Save the model weight if the AUC of any class is improved. 
        for i in range(len(best_aucs)):
            if aucs_val[i] > best_aucs[i]:
                best_aucs[i] = aucs_val[i]
                d = {'weight':model.state_dict(), 'auc':aucs_val[i], 'epoch':epoch}
                torch.save(d, save_path + f'multilabel_efnb4_v1_cls{i}.pth')

        # Update best avg_val_loss
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss

        # Update best weighted../../results/multilabel_cls/v2/ AUC and implement early stop
        if auc_val > best_auc:
            es = 0
            best_auc = auc_val
        else:
            es += 1
            if es > 10:
                break

        scheduler.step(auc_val)
    print(train_loss_history)
    print(val_loss_history)

    plt.plot(range(N_EPOCHS), train_loss_history, label='Training Loss')
    plt.plot(range(N_EPOCHS), val_loss_history, label='Validation Loss')

    # Add in a title and axes labels
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.savefig(save_path + f'multilabel_efnb4_v1_loss_history_fold.png')
    plt.show()

In [26]:
!nvidia-smi

Tue Dec  6 06:34:38 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.47.03    Driver Version: 510.47.03    CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   77C    P0    33W /  70W |   3720MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
main()

  0%|          | 0/7500 [00:00<?, ?it/s]

15000 7500


  0%|          | 0/1500 [00:00<?, ?it/s]

3000 1500
epoch: 0
Training Metrics
lr: 0.001 train_loss: 0.18043110528712472 weighted avg auc: 0.7260837853647407 weighted avg acc: 0.9437111111111111
aucs: [0.79088733 0.69854107 0.64785524 0.80491409 0.74950496 0.68106683
 0.72846119 0.75840634 0.66173173 0.72259139 0.7641829  0.73468793
 0.63213737 0.71302971 0.80325869]
accs: [0.85446667 0.9916     0.98106667 0.8816     0.99046667 0.98133333
 0.98206667 0.9546     0.97126667 0.97386667 0.95333333 0.94
 0.99173333 0.92713333 0.78113333]
Validation Metrics
lr: 0.001 val_loss: 0.2120949574370558 weighted avg auc: 0.7815185410977358 weighted avg acc: 0.9268222222222222
aucs: [0.84179039 0.72265806 0.72998766 0.88027333 0.73328455 0.78179103
 0.78924729 0.77595695 0.70340265 0.78880453 0.83593562 0.78143504
 0.74463447 0.76579662 0.84777993]
accs: [0.83533333 0.97133333 0.93533333 0.88666667 0.96766667 0.92633333
 0.97966667 0.96       0.941      0.96466667 0.91933333 0.922
 0.994      0.91333333 0.78566667]
saving best weight...


  0%|          | 0/7500 [00:00<?, ?it/s]