In [10]:
import os
import json
import time
import random
import numpy as np
import pandas as pd
import pydicom
import pickle
from PIL import Image
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings("ignore")

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
from sklearn.metrics import precision_recall_fscore_support, roc_auc_score
import matplotlib.pyplot as plt
import sklearn.metrics as metrics

In [3]:
class Config():
    base_path = "../../../dataset/pediatric_dataset/physionet.org/files/vindr-pcxr/1.0.0/"
    train_path = base_path + "train/"
    test_path = base_path + "test/"
    train_csv_path = base_path + "annotations_train.csv"
    test_csv_path = base_path + "annotations_test.csv"
    image_size = (224,224)
    BATCH_SIZE = 32
    pin_memory = True
    num_workers = 3
    lr=0.001
    EPOCHS=30
    gpu_id=5
    device = torch.device(f'cuda:{gpu_id}' if torch.cuda.is_available() else 'cpu')
    val_split = 0.1
    SEED=42
    return_logs=False
    load = False
    model_name = "shufflenet"
    roc_title = f'roc_{model_name}'
    checkpoint = f"../saved_models/{model_name}_checkpoint.pt"
    saved_path = f'../saved_models/{model_name}_v1.pt'
    loss_acc_path = f'../loss_acc_roc/loss-acc-{model_name}.svg'
    roc_path = f'../loss_acc_roc/roc-{model_name}.svg'
    fta_path = f'../roc_pickle_files/fta_{model_name}.pkl'

config = Config()

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"  
print(config.device)

random.seed(config.SEED)
np.random.seed(config.SEED)
torch.manual_seed(config.SEED)
torch.cuda.manual_seed(config.SEED)
torch.backends.cudnn.benchmarks = True
torch.backends.cudnn.deterministic = True
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True


class PCXRDataset():
    def __init__(self,csv_file,dir_path):
        self.dir_path = dir_path
        csv_ = pd.read_csv(csv_file)

        csv_ = csv_.drop_duplicates(subset=['image_id'])
        csv_.class_name = csv_.class_name.apply(lambda x:0 if x=='No finding' else 1)
        csv_ = np.array(csv_.loc[:,['image_id','class_name']])
        self.images = csv_

        self.transformations = torchvision.transforms.Compose([
            torchvision.transforms.Resize(config.image_size),
            torchvision.transforms.ToTensor()
        ])
            
    def __len__(self):
        return len(self.images)

    def __getitem__(self,idx):
        img_id, label = self.images[idx]
        img_path = os.path.join(self.dir_path,f'{img_id}.dicom')
        ds = pydicom.dcmread(img_path)
        new_img = ds.pixel_array.astype('float')
        new_img = np.maximum(new_img,0) / new_img.max()
        new_img = (new_img * 255).astype(np.uint8)
        final_img = Image.fromarray(new_img)
        final_img = final_img.convert('RGB')
        final_img = self.transformations(final_img)
        return final_img, label

def GetDataloader():
    train_data = PCXRDataset(config.train_csv_path,config.train_path)
    test_data = PCXRDataset(config.test_csv_path,config.test_path)
    total_len = len(train_data)
    val_len = int(config.val_split * total_len)
    train_len = total_len - val_len
    training_data,val_data = torch.utils.data.dataset.random_split(train_data,[train_len,val_len])

    train_loader = torch.utils.data.DataLoader(
        training_data,
        shuffle=True,
        batch_size=config.BATCH_SIZE,
        pin_memory = config.pin_memory,
        num_workers = config.num_workers
        )
    
    val_loader = torch.utils.data.DataLoader(
        val_data,
        shuffle=True,
        batch_size=config.BATCH_SIZE,
        pin_memory = config.pin_memory,
        num_workers = config.num_workers
        )

    test_loader = torch.utils.data.DataLoader(
        test_data,
        shuffle=True,
        batch_size=config.BATCH_SIZE,
        pin_memory = config.pin_memory,
        num_workers = config.num_workers
        )

    return train_loader, test_loader, val_loader, training_data, test_data, val_data

cuda:5


In [4]:
train_dl,test_dl,valid_dl, train_data,test_data, valid_data = GetDataloader()
criterion = nn.CrossEntropyLoss()

In [7]:
def evaluation(model,test_dl):
    model.eval()
    running_loss = 0.0
    running_corrects = 0
    total = 0
    preds = []
    pred_labels = []
    labels = []

            # Disable gradient calculation for validation or inference using torch.no_rad()
    with torch.no_grad():
                for x,y in test_dl:
                    x = x.to(config.device)
                    y = y.to(config.device) #CHW --> #HWC
                    valid_logits = model(x)
                    predict_prob = F.softmax(valid_logits)
                    _,predictions = predict_prob.max(1)
                    predictions = predictions.to('cpu')

                    _, valid_preds = torch.max(valid_logits, 1)
                    valid_loss = criterion(valid_logits,y)
                    running_loss += valid_loss.item() * x.size(0)
                    running_corrects += torch.sum(valid_preds == y.data)
                    total += y.size(0)
                    predict_prob = predict_prob.to('cpu')

                    pred_labels.extend(list(predictions.numpy()))
                    preds.extend(list(predict_prob.numpy()))
                    y = y.to('cpu')
                    labels.extend(list(y.numpy()))

    epoch_loss = running_loss / len(test_data)
    epoch_acc = running_corrects.double() / len(test_data)
    print("Test Loss is {}".format(epoch_loss))
    print("Test Accuracy is {}".format(epoch_acc.cpu()))
    return np.array(labels),np.array(pred_labels),np.array(preds)

In [16]:
efficientnet = models.efficientnet_b4(pretrained = True)
efficientnet.classifier[1] = nn.Linear(in_features = 1792, out_features = 2, bias = True)
model = efficientnet
model.load_state_dict(torch.load('Child/efficientnet_best.pt'))
model = model.to(config.device)
labels, pred_labels, preds = evaluation(model,test_dl)

Test Loss is 0.6257334280500433
Test Accuracy is 0.6943450250536864


In [17]:
print(metrics.classification_report(labels,pred_labels,target_names = ['abnormal','normal']))
cm = metrics.confusion_matrix(labels,pred_labels)
print('\n classwise accuracy')
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print(cm.diagonal())

print(roc_auc_score(1-np.array(labels), np.array(preds)[:,0]))

              precision    recall  f1-score   support

    abnormal       0.70      0.93      0.80       907
      normal       0.67      0.25      0.37       490

    accuracy                           0.69      1397
   macro avg       0.68      0.59      0.58      1397
weighted avg       0.69      0.69      0.65      1397


 classwise accuracy
[0.93384785 0.25102041]
0.6828724433544091


In [18]:
fpr_tpr_auc = {}
fpr,tpr,_ = metrics.roc_curve(labels,preds[:,1])
aucc = metrics.auc(fpr,tpr)
fpr_tpr_auc[1] = [fpr,tpr,aucc]
model.train()
with open('eff_b4.pkl','wb') as f:
    pickle.dump(fpr_tpr_auc,f)

# ShuffleNet

In [20]:
shufflenet = models.shufflenet_v2_x1_0(pretrained = True)
shufflenet.fc = nn.Linear(in_features = 1024, out_features = 2, bias=True)
model = shufflenet
model.load_state_dict(torch.load('Child/shufflenet_best.pt'))
model = model.to(config.device)
labels, pred_labels, preds = evaluation(model,test_dl)

Test Loss is 1.2722560653707002
Test Accuracy is 0.6993557623478883


In [21]:
print(metrics.classification_report(labels,pred_labels,target_names = ['abnormal','normal']))
cm = metrics.confusion_matrix(labels,pred_labels)
print('\n classwise accuracy')
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print(cm.diagonal())

print(roc_auc_score(1-np.array(labels), np.array(preds)[:,0]))

              precision    recall  f1-score   support

    abnormal       0.73      0.85      0.79       907
      normal       0.60      0.43      0.50       490

    accuracy                           0.70      1397
   macro avg       0.67      0.64      0.64      1397
weighted avg       0.69      0.70      0.68      1397


 classwise accuracy
[0.84674752 0.42653061]
0.6976182975946719


In [22]:
fpr_tpr_auc = {}
fpr,tpr,_ = metrics.roc_curve(labels,preds[:,1])
aucc = metrics.auc(fpr,tpr)
fpr_tpr_auc[1] = [fpr,tpr,aucc]
model.train()
with open('shuffle.pkl','wb') as f:
    pickle.dump(fpr_tpr_auc,f)

# ResNet

In [23]:
resnet50 = torchvision.models.resnet50(pretrained=True)
resnet50.fc = nn.Sequential(
    nn.Dropout(0.4),
    nn.Linear(in_features = 2048, out_features = 2, bias = True))
model = resnet50
model.load_state_dict(torch.load('Child/resnet_best.pt'))
model = model.to(config.device)
labels, pred_labels, preds = evaluation(model,test_dl)

Test Loss is 0.5901733895199761
Test Accuracy is 0.6986399427344309


In [24]:
print(metrics.classification_report(labels,pred_labels,target_names = ['abnormal','normal']))
cm = metrics.confusion_matrix(labels,pred_labels)
print('\n classwise accuracy')
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print(cm.diagonal())

print(roc_auc_score(1-np.array(labels), np.array(preds)[:,0]))

              precision    recall  f1-score   support

    abnormal       0.72      0.88      0.79       907
      normal       0.62      0.37      0.46       490

    accuracy                           0.70      1397
   macro avg       0.67      0.62      0.63      1397
weighted avg       0.68      0.70      0.68      1397


 classwise accuracy
[0.87541345 0.37142857]
0.6998312445154468


In [25]:
fpr_tpr_auc = {}
fpr,tpr,_ = metrics.roc_curve(labels,preds[:,1])
aucc = metrics.auc(fpr,tpr)
fpr_tpr_auc[1] = [fpr,tpr,aucc]
model.train()
with open('resnet.pkl','wb') as f:
    pickle.dump(fpr_tpr_auc,f)

# Squeezenet

In [26]:
squeezenet = torchvision.models.squeezenet1_0(pretrained=True)
squeezenet.classifier[1] = nn.Conv2d(512, 2, kernel_size=(1, 1), stride=(1, 1))
model = squeezenet
model.load_state_dict(torch.load('Child/squeezenet_best.pt'))
model = model.to(config.device)
labels, pred_labels, preds = evaluation(model,test_dl)

Test Loss is 0.6931474786958441
Test Accuracy is 0.6492483894058697


In [27]:
print(metrics.classification_report(labels,pred_labels,target_names = ['abnormal','normal']))
cm = metrics.confusion_matrix(labels,pred_labels)
print('\n classwise accuracy')
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print(cm.diagonal())

print(roc_auc_score(1-np.array(labels), np.array(preds)[:,0]))

              precision    recall  f1-score   support

    abnormal       0.65      1.00      0.79       907
      normal       0.00      0.00      0.00       490

    accuracy                           0.65      1397
   macro avg       0.32      0.50      0.39      1397
weighted avg       0.42      0.65      0.51      1397


 classwise accuracy
[1. 0.]
0.5


In [28]:
fpr_tpr_auc = {}
fpr,tpr,_ = metrics.roc_curve(labels,preds[:,1])
aucc = metrics.auc(fpr,tpr)
fpr_tpr_auc[1] = [fpr,tpr,aucc]
model.train()
with open('squeeze.pkl','wb') as f:
    pickle.dump(fpr_tpr_auc,f)