In [1]:
import numpy as np
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from src.dataset import ChestXrayDataSet, CLASS_NAMES
from src.model import DenseNet121
from src.utils import compute_AUCs, compute_score_with_logits, tile
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

# Autoreload modules so that changes to src automatically reflect
%load_ext autoreload
%autoreload 2

In [2]:
labels = pd.read_csv("data/labels/labels.csv")
size = 10000 # only using 10k out of the entire ~100k dataset
X, Y = labels.iloc[:size, 0], labels.iloc[:size, 1]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.4, random_state=42, shuffle=True)

In [3]:
N_CLASSES = len(CLASS_NAMES)
BATCH_SIZE = 16
DATA_DIR = "data/images"

In [4]:
normalize = transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])

train_dataset = ChestXrayDataSet(data_dir=DATA_DIR, X = X_train, Y = Y_train,
                          transform=transforms.Compose([
                              transforms.Resize(256),
                              transforms.TenCrop(224),
                              transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
                              transforms.Lambda(lambda crops: torch.stack([normalize(crop) for crop in crops]))
                              ]))

# TODO: enable async data loading (num_workers > 0), pin_memory?
train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=True)

test_dataset = ChestXrayDataSet(data_dir=DATA_DIR, X = X_test, Y = Y_test,
                          transform=transforms.Compose([
                              transforms.Resize(256),
                              transforms.TenCrop(224),
                              transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
                              transforms.Lambda(lambda crops: torch.stack([normalize(crop) for crop in crops]))
                              ]))
    
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=True)

In [5]:
CKPT_TRAINED_PATH = "model-trained.pth" # Model trained on top of ^
cudnn.benchmark = True # Fixed input size, enables tuning for optimal use

# TODO: add device variable for use with gpu or cpu, place on device directly
# initialize and load the model
model = DenseNet121(N_CLASSES).cuda()
model = torch.nn.DataParallel(model).cuda()

if os.path.isfile(CKPT_TRAINED_PATH):
    print("=> loading checkpoint")
    checkpoint = torch.load(CKPT_TRAINED_PATH)
    # Load directly into the module else the model gets screwed up
    # https://discuss.pytorch.org/t/solved-keyerror-unexpected-key-module-encoder-embedding-weight-in-state-dict/1686/15
    model.module.load_state_dict(checkpoint, strict=True)
    print("=> loaded checkpoint")
else:
    print("=> no checkpoint found")

=> loading checkpoint
=> loaded checkpoint


In [6]:
# initialize the ground truth and output tensor
gt = torch.FloatTensor()
pred = torch.FloatTensor()
gt = gt.cuda()
pred = pred.cuda()

# switch to evaluate mode
model.eval()

for i, (inp, target) in enumerate(test_loader):
    target = target.cuda()
    gt = torch.cat((gt, target), 0)
    bs, n_crops, c, h, w = inp.size()
    with torch.no_grad():
        input_var = torch.autograd.Variable(inp.view(-1, c, h, w).cuda())
        output = model(input_var)
        output_mean = output.view(bs, n_crops, -1).mean(1)
        pred = torch.cat((pred, output_mean.data), 0)

In [7]:
AUROCs = compute_AUCs(gt, pred, N_CLASSES)
AUROC_avg = np.array(AUROCs).mean()
print('The average AUROC is {AUROC_avg:.3f}'.format(AUROC_avg=AUROC_avg))
for i in range(N_CLASSES):
  print('The AUROC of {} is {}'.format(CLASS_NAMES[i], AUROCs[i]))

The average AUROC is 0.757
The AUROC of Atelectasis is 0.7432586132624253
The AUROC of Cardiomegaly is 0.8725316190627218
The AUROC of Effusion is 0.8372770326327436
The AUROC of Infiltration is 0.6569252232142857
The AUROC of Mass is 0.7753299402260756
The AUROC of Nodule is 0.5887260034904014
The AUROC of Pneumonia is 0.5739551703778678
The AUROC of Pneumothorax is 0.8176415399145044
The AUROC of Consolidation is 0.7404053527138116
The AUROC of Edema is 0.8851814307972443
The AUROC of Emphysema is 0.7911216287281194
The AUROC of Fibrosis is 0.7338391191808459
The AUROC of Pleural_Thickening is 0.6689639116169039
The AUROC of Hernia is 0.9092731829573936
