In [53]:
import torch
from PIL import Image
from torch.utils.data import Dataset
import torchvision
from torchvision import datasets, transforms
import os
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
from random import random
import numpy as np
from sklearn.metrics import roc_curve, auc

In [54]:
class XrayDataset(Dataset):
    def __init__(self, data_dir, train=True, convertRGB=False):
        sample_file = "valid.csv"
        sample_folder = "valid"
        self.train = train
        if train:
            sample_file = "train.csv"
            sample_folder = "train"
        self.sample_file = os.path.join(data_dir, sample_file)
        self.sample_folder = os.path.join(data_dir, sample_folder)
        self.convertRGB = convertRGB

        self.labels = []
        self.paths = []
        self.transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(240),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            
        ])

        with open(self.sample_file) as f:
            f = [line for line in f if random() <= .01]
            for line in f:
                if line.startswith("Path"):
                    continue
                parts = line.split(",")
                self.paths.append(os.path.join(data_dir, "..", parts[0]))
                multi_label = []
                for i in parts[5:]:
                    if i.strip() == "" or int(float(i.strip())) == -1:
                        multi_label.append(0)
                    else:
                        multi_label.append((int(float(i.strip()))))
                self.labels.append(multi_label)

    def __len__(self):
        if self.train:
            return int(len(self.paths) / 8)
        else:
            return int(len(self.paths))

    def __getitem__(self, index):

        #print(self.paths[index])
        if self.convertRGB:
            image = Image.open(self.paths[index]).convert('RGB')
        else:
            image = Image.open(self.paths[index])
        image = self.transform(image)

        X = image #torch.load(self.paths[index])
        #y = torch.LongTensor(self.labels[index])
        y = torch.FloatTensor([self.labels[index][6],self.labels[index][8],self.labels[index][10]]) # 10 = Pleural Effusion target AUC of (0.97)

        return X, y

In [55]:
# Training settings
batch_size=32 
epochs=10
log_interval=10
lr=0.001
momentum=0.5
no_cuda=False 
save_model=False
seed=1
test_batch_size=1000
number_of_classes = 3 # all classes is 14
model_to_use = "densenet"
freeze_pretrained = True
 

In [56]:
# load data
use_cuda = not no_cuda and torch.cuda.is_available()
if use_cuda:
    print("Using " + torch.cuda.get_device_name(0))
    torch.cuda.set_device(0)
device = torch.device("cuda" if use_cuda else "cpu")

# lets fix the random seeds for reproducibility.
torch.manual_seed(6250)
if torch.cuda.is_available():
    torch.cuda.manual_seed(6250)

kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}

if model_to_use == "densenet":
    rgb = True
else:
    rgb = False

train_dataset = XrayDataset("C:/Users/chait/Downloads/CheXpert-v1.0-small/CheXpert-v1.0-small", True,convertRGB=rgb)
test_dataset = XrayDataset("C:/Users/chait/Downloads/CheXpert-v1.0-small/CheXpert-v1.0-small", False,convertRGB=rgb)

train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=batch_size, shuffle=True, **kwargs)

In [37]:
# Define NN
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1)
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.fc1 = nn.Linear(57*57*50, 500)# in_features=64*9*9
        #self.fc1 = nn.Linear(224*224, 500)
        self.fc2 = nn.Linear(500, number_of_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 57*57*50)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

def train(model, device, train_loader, criterion, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                       100. * batch_idx / len(train_loader), loss.item()))

def test(model, device, criterion, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    outputs = []
    targets = []
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target)
            pred = (output > 0.5).float()
            outputs.extend(output.tolist())
            targets.extend(target.tolist())
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    aucs = calc_auc_roc(np.asarray(outputs), np.asarray(targets))
    print("AUC: " + " ".join(str(x) for x in aucs.values()))
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, (len(test_loader.dataset) * number_of_classes),
        100. * correct / (len(test_loader.dataset) * number_of_classes)))
    return aucs.values()

def calc_auc_roc(outputs, targets):
    aucs = {}
    for i in range(number_of_classes):
        t1 = targets[:, i]
        o1 = outputs[:, i]
        fpr, tpr, thresholds = roc_curve(t1, o1)
        aucs[i] = auc(fpr, tpr)
    return aucs


In [38]:
if model_to_use == "densenet":
        model = torchvision.models.densenet121(pretrained=True).to(device)
        if freeze_pretrained:
            for param in model.parameters():
                param.requires_grad = False
        #model = torchvision.models.resnet152(pretrained=True).to(device)
        #model = torchvision.models.densenet121(pretrained='imagenet').to(device)
        num_ftrs = model.classifier.in_features # model.fc for resnet, model.classifier for densenet
        model.classifier = nn.Linear(num_ftrs, number_of_classes)
else:
        model = Net().to(device)
if use_cuda:
    model.cuda()
    
#optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999))

In [39]:
best_aucs = [0] * number_of_classes
best_avg_auc = 0
decreasing_auc_epoch_count = 0

for epoch in range(1, epochs + 1):
        train(model, device, train_loader, criterion, optimizer, epoch)
        aucs = test(model, device, criterion, test_loader)
        avg_auc = sum(aucs) / len(aucs)
        if avg_auc > best_avg_auc:
            best_avg_auc = avg_auc
            best_aucs = aucs
            decreasing_auc_epoch_count = 0
        else:
            decreasing_auc_epoch_count += 1
            if decreasing_auc_epoch_count > 3:
                print("Exiting training early, AUC not increasing. Exited on epoch " + str(epoch))
                break
if (save_model):
    torch.save(model.state_dict(),"model.pt")
    
best_aucs





AUC: nan 0.0 nan

Test set: Average loss: 0.3419, Accuracy: 5/6 (83%)

AUC: nan 0.0 nan

Test set: Average loss: 0.2889, Accuracy: 5/6 (83%)

AUC: nan 0.0 nan

Test set: Average loss: 0.2600, Accuracy: 5/6 (83%)

AUC: nan 0.0 nan

Test set: Average loss: 0.2953, Accuracy: 5/6 (83%)

Exiting training early, AUC not increasing. Exited on epoch 4


[0, 0, 0]

In [52]:
sample_file = os.path.join("C:/Users/chait/Downloads/CheXpert-v1.0-small/CheXpert-v1.0-small/train.csv")
sample_folder = os.path.join("C:/Users/chait/Downloads/CheXpert-v1.0-small/CheXpert-v1.0-small/train")
import pandas as pd
sample_table = pd.read_csv(sample_file, keep_default_na=True)
sample_table['patient_id'] =  sample_table['Path'].str.split('/').str[2]
sample_table['study_id'] =  sample_table['Path'].str.split('/').str[3]

sample_table.head(5)

Unnamed: 0,Path,Sex,Age,Frontal/Lateral,AP/PA,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,...,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices,patient_id,study_id
0,CheXpert-v1.0-small/train/patient00001/study1/...,Female,68,Frontal,AP,1.0,,,,,...,,,,0.0,,,,1.0,patient00001,study1
1,CheXpert-v1.0-small/train/patient00002/study2/...,Female,87,Frontal,AP,,,-1.0,1.0,,...,-1.0,,-1.0,,-1.0,,1.0,,patient00002,study2
2,CheXpert-v1.0-small/train/patient00002/study1/...,Female,83,Frontal,AP,,,,1.0,,...,-1.0,,,,,,1.0,,patient00002,study1
3,CheXpert-v1.0-small/train/patient00002/study1/...,Female,83,Lateral,,,,,1.0,,...,-1.0,,,,,,1.0,,patient00002,study1
4,CheXpert-v1.0-small/train/patient00003/study1/...,Male,41,Frontal,AP,,,,,,...,,,,0.0,,,,,patient00003,study1


In [49]:


paths = []

with open(sample_file) as f:
            f = [line for line in f if random() <= .01]
            for line in f:
                if line.startswith("Path"):
                    continue
                parts = line.split(",")
                paths.append(os.path.join("C:/Users/chait/Downloads/CheXpert-v1.0-small/CheXpert-v1.0-small", "..", parts[0]))
                print(parts)
print(paths[0]) 
print(len(paths))

['CheXpert-v1.0-small/train/patient00011/study13/view2_lateral.jpg', 'Female', '22', 'Lateral', '', '', '0.0', '', '', '', '', '0.0', '', '0.0', '0.0', '0.0', '', '', '\n']
['CheXpert-v1.0-small/train/patient00023/study2/view2_lateral.jpg', 'Male', '62', 'Lateral', '', '', '', '', '', '', '', '', '', '', '', '1.0', '', '', '\n']
['CheXpert-v1.0-small/train/patient00038/study3/view1_frontal.jpg', 'Male', '78', 'Frontal', 'AP', '', '', '', '1.0', '', '', '', '', '', '', '1.0', '', '', '1.0\n']
['CheXpert-v1.0-small/train/patient00043/study1/view1_frontal.jpg', 'Male', '38', 'Frontal', 'PA', '1.0', '0.0', '0.0', '', '', '', '0.0', '', '', '', '0.0', '', '', '\n']
['CheXpert-v1.0-small/train/patient00044/study5/view1_frontal.jpg', 'Female', '49', 'Frontal', 'AP', '', '', '', '', '', '', '-1.0', '', '-1.0', '', '', '', '', '\n']
['CheXpert-v1.0-small/train/patient00098/study3/view1_frontal.jpg', 'Female', '53', 'Frontal', 'AP', '', '1.0', '', '1.0', '', '', '', '', '1.0', '1.0', '', '', '',

['CheXpert-v1.0-small/train/patient42719/study1/view1_frontal.jpg', 'Male', '73', 'Frontal', 'AP', '', '', '1.0', '', '', '1.0', '-1.0', '', '-1.0', '', '1.0', '', '', '\n']
['CheXpert-v1.0-small/train/patient42829/study6/view1_frontal.jpg', 'Male', '55', 'Frontal', 'AP', '', '', '-1.0', '1.0', '', '1.0', '', '', '', '', '1.0', '', '', '1.0\n']
['CheXpert-v1.0-small/train/patient42835/study3/view1_frontal.jpg', 'Male', '58', 'Frontal', 'AP', '', '', '', '1.0', '', '1.0', '', '', '', '', '', '', '', '1.0\n']
['CheXpert-v1.0-small/train/patient42847/study2/view1_frontal.jpg', 'Male', '19', 'Frontal', 'AP', '', '', '', '1.0', '', '', '', '', '', '1.0', '1.0', '', '', '0.0\n']
['CheXpert-v1.0-small/train/patient42907/study3/view1_frontal.jpg', 'Male', '58', 'Frontal', 'AP', '', '', '1.0', '', '', '', '-1.0', '', '-1.0', '', '1.0', '', '', '1.0\n']
['CheXpert-v1.0-small/train/patient42908/study9/view1_frontal.jpg', 'Male', '90', 'Frontal', 'AP', '', '', '1.0', '', '', '', '-1.0', '', '-1.0'

In [None]:
class XrayDataset(Dataset):
    def __init__(self, data_dir, train=True, convertRGB=False):
        sample_file = "valid.csv"
        sample_folder = "valid"
        self.train = train
        if train:
            sample_file = "train.csv"
            sample_folder = "train"
        self.sample_file = os.path.join(data_dir, sample_file)
        self.sample_folder = os.path.join(data_dir, sample_folder)
        self.convertRGB = convertRGB

        self.labels = []
        self.paths = []
        self.transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(240),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            
        ])

        with open(self.sample_file) as f:
            f = [line for line in f if random() <= .01]
            for line in f:
                if line.startswith("Path"):
                    continue
                parts = line.split(",")
                self.paths.append(os.path.join(data_dir, "..", parts[0]))
                multi_label = []
                for i in parts[5:]:
                    if i.strip() == "" or int(float(i.strip())) == -1:
                        multi_label.append(0)
                    else:
                        multi_label.append((int(float(i.strip()))))
                self.labels.append(multi_label)

    def __len__(self):
        if self.train:
            return int(len(self.paths) / 8)
        else:
            return int(len(self.paths))

    def __getitem__(self, index):

        #print(self.paths[index])
        if self.convertRGB:
            image = Image.open(self.paths[index]).convert('RGB')
        else:
            image = Image.open(self.paths[index])
        image = self.transform(image)

        X = image #torch.load(self.paths[index])
        #y = torch.LongTensor(self.labels[index])
        y = torch.FloatTensor([self.labels[index][6],self.labels[index][8],self.labels[index][10]]) # 10 = Pleural Effusion target AUC of (0.97)

        return X, y

In [None]:
dataiter = iter(train_loader)
X_samples, y_samples = dataiter.next()

print(X_samples)
print(y_samples)