In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import imageio
from os import listdir
import skimage.transform
import pickle
import sys, os
from sklearn.preprocessing import MultiLabelBinarizer
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from collections import defaultdict
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torchvision
import torchvision.transforms as transforms
from torch.autograd import Variable
from sklearn.metrics import roc_auc_score
import torch.optim as optim
os.environ['CUDA_VISIBLE_DEVICES'] = "0"

In [None]:
data_entry_path = '../input/data/Data_Entry_2017.csv'
meta_data = pd.read_csv(data_entry_path)

In [None]:
data_path = '../input/dataset/'
with open(data_path + "val_y_onehot.pkl", "rb") as f:
                val_y = pickle.load(f)
with open(data_path + "train_y_onehot.pkl", "rb") as f:
                train_y = pickle.load(f)
with open(data_path + "test_y_onehot.pkl", "rb") as f:
                test_y = pickle.load(f)
with open(data_path + "train_filename.pkl", "rb") as f:
                file_name_train = pickle.load(f)
with open(data_path + "test_filename.pkl", "rb") as f:
                file_name_test = pickle.load(f)
with open(data_path + "val_filename.pkl", "rb") as f:
                file_name_val = pickle.load(f)

In [None]:
label_map = {0:'Atelectasis.pkl',1:'Cardiomegaly.pkl',2:'Consolidation.pkl',3:'Edema.pkl',4:'Effusion.pkl',
             5:'Emphysema.pkl',6:'Fibrosis.pkl',7:'Hernia.pkl',8:'infiltration.pkl',9:'mass.pkl',
             11:'nodule.pkl',12:'pluralThickening.pkl',13:'pnuemonia.pkl',14:'pnuemotharax.pkl'               
            }

In [None]:
class_list = ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema',
       'Effusion', 'Emphysema', 'Fibrosis', 'Hernia', 'Infiltration',
       'Mass', 'No Finding', 'Nodule', 'Pleural_Thickening', 'Pneumonia',
       'Pnuemothorax']
class_list_1 = ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema',
       'Effusion', 'Emphysema', 'Fibrosis', 'Hernia', 'Infiltration',
       'Mass', 'Nodule', 'Pleural_Thickening', 'Pneumonia',
       'Pnuemothorax']

In [None]:
y = np.concatenate((train_y,test_y,val_y),axis=0)

In [None]:
samples_list = []
for i in range(15):
    samples_list.append(sum(y.T[i]))

In [None]:
samples_list_train = []
for i in range(15):
    samples_list_train.append(sum(train_y.T[i]))

In [None]:
samples_list_test = []
for i in range(15):
    samples_list_test.append(sum(y.T[i]))

In [None]:
for i in range(15):
    print(class_list[i],(samples_list[i]/len(y))*100)

In [None]:
for i in range(15):
    print(class_list[i],(samples_list_train[i]/len(y))*100)

In [None]:
for i in range(15):
    print(class_list[i],(samples_list_test[i]/len(y))*100)

In [None]:
from prettytable import PrettyTable
newTable = PrettyTable(["Disease_name/label", "percent_in_dataset"])
for i in range(15):
    if i!=10:
        newTable.add_row([class_list[i],(samples_list[i]/len(y))*100])
print(newTable)

In [None]:
class ChestXrayDataSet(Dataset):
    def __init__(self, train_or_valid = "train", transform=None):

        data_path = './'
        self.train_or_valid = train_or_valid
        if train_or_valid == "train":
            self.X = np.uint8(np.load(data_path + "train_X_small.npy")*255*255)
            with open( "../input/dataset/train_y_onehot.pkl", "rb") as f:
                self.y = pickle.load(f)
            with open( "../input/dataset/train_y_onehot.pkl", "rb") as f:
                a = pickle.load(f)
        elif train_or_valid == "valid":
            self.X = np.uint8(np.load(data_path + "valid_X_small.npy")*255*255)
            with open("../input/dataset/val_y_onehot.pkl", "rb") as f:
                self.y = pickle.load(f)
            with open("../input/dataset/val_y_onehot.pkl", "rb") as f:
                a = pickle.load(f)
        else:
            self.X = np.uint8(np.load(data_path + "test_X.npy")*255*255)
            with open("../input/dataset/test_y_onehot.pkl", "rb") as f:
                self.y = pickle.load(f)
            with open("../input/dataset/test_y_onehot.pkl", "rb") as f:
                a = pickle.load(f)
         
        self.label_weight_pos = len(a)/a.sum(axis=0)
        self.label_weight_neg = len(a)/(len(a)-a.sum(axis=0))
        self.transform = transform
    def __getitem__(self, index):
        """
        Args:
            index: the index of item 
        Returns:
            image and its labels
        """
        current_X = np.tile(self.X[index],3) 
        label = self.y[index]
        label_inverse = 1- label
        weight = np.add((label_inverse * self.label_weight_neg),(label * self.label_weight_pos))
        if self.transform is not None:
            image = self.transform(current_X)
        return image, torch.from_numpy(label).type(torch.FloatTensor), torch.from_numpy(weight).type(torch.FloatTensor)
        #return image, torch.from_numpy(np.asarray(label)).type(torch.FloatTensor)
    def __len__(self):
        return len(self.y)

In [None]:
class ChestXrayDataSet_individual(Dataset):
    def __init__(self, train_or_valid = "train", transform=None):

        data_path = './'
        self.train_or_valid = train_or_valid
        if train_or_valid == "train":
            self.X = np.uint8(np.load(data_path + "train_X_small.npy")*255*255)
            with open( "../input/dataset/train_y_onehot.pkl", "rb") as f:
                self.y = pickle.load(f)
        elif train_or_valid == "valid":
            self.X = np.uint8(np.load(data_path + "valid_X_small.npy")*255*255)
            with open("../input/dataset/val_y_onehot.pkl", "rb") as f:
                self.y = pickle.load(f)
        else:
            self.X = np.uint8(np.load(data_path + "test_X.npy")*255*255)
            with open("../input/dataset/test_y_onehot.pkl", "rb") as f:
                self.y = pickle.load(f)
        self.transform = transform
    def __getitem__(self, index):
        current_X = np.tile(self.X[index],3) 
        label = self.y[index]
        if self.transform is not None:
            image = self.transform(current_X)
        
        return image, torch.from_numpy(np.asarray(label)).type(torch.FloatTensor)
    def __len__(self):
        return len(self.y)

In [None]:
class DenseNet121(nn.Module):
    """Model modified.
    The architecture of our model is the same as standard DenseNet121
    except the classifier layer which has an additional sigmoid function.
    """
    def __init__(self, out_size):
        super(DenseNet121, self).__init__()
        self.densenet121 = torchvision.models.densenet121(pretrained=True)
        num_ftrs = self.densenet121.classifier.in_features
        self.densenet121.classifier = nn.Sequential(
            nn.Linear(num_ftrs, out_size),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.densenet121(x)
        return x

In [None]:
def test_util():
    test_X=[]
    for i in range(0,1000):
        image_path = file_name_test[i]    
        img = imageio.imread(image_path)
        if img.shape != (1024,1024): # there some image with shape (1024,1024,4) in training set
            img = img[:,:,0]
        img_resized = skimage.transform.resize(img,(256,256)) # or use img[::4] here
        test_X.append((np.array(img_resized)/255).reshape(256,256,1))
        if i==len(file_name_test)-1:
            break
    print(len(test_X))
    test_X = np.array(test_X)
    np.save(os.path.join('./',"test_X.npy"), test_X)

In [None]:
test_util()

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
def test():
    test_dataset = ChestXrayDataSet(train_or_valid="test",
                                    transform=transforms.Compose([
                                    transforms.ToPILImage(),
                                    transforms.CenterCrop(224),
                                    transforms.ToTensor(),
                                    transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
                                    ]))
    test_loader = DataLoader(dataset=test_dataset, batch_size=16, shuffle=False, num_workers=2)
    gt = torch.FloatTensor()
    gt = gt.cuda()
    pred = torch.FloatTensor()
    pred = pred.cuda()
    for i, (inp, target,weight) in enumerate(test_loader):
        target = target.cuda()
        gt = torch.cat((gt, target), 0)
        input_var = Variable(inp.view(-1, 3, 224, 224).cuda(), volatile=True)
        output = model(input_var)
        pred = torch.cat((pred, output.data), 0)
        
    AUROCs= compute_AUCs(gt, pred)
    AUROCs = np.array(AUROCs)
    AUROC_avg =0.0
    for i in range(15):
        if i!=10:
            AUROC_avg+=AUROCs[i]
    AUROC_avg = AUROC_avg/14
    print('The average AUROC is {AUROC_avg:.3f}'.format(AUROC_avg=AUROC_avg))
    accuracy = compute_accuracy(gt,pred)
    AUROC_dict = {}
    for i in range(15):
        if i!=10:
            print('The AUROC of {} is {}'.format(class_list[i], AUROCs[i]))
            print('The Accuracy of {} is {}'.format(class_list[i],accuracy[i]))
            AUROC_dict[class_list[i]] = AUROCs[i]
    
    return AUROC_avg,AUROC_dict

In [None]:
def compute_AUCs(gt, pred):

    AUROCs = []
    gt_np = gt.cpu().numpy()
    pred_np = pred.cpu().numpy()
    for i in range(15):
        AUROCs.append(roc_auc_score(gt_np[:, i], pred_np[:, i]))
    return AUROCs

In [None]:
def compute_accuracy(gt,pred):
    accuracy = []
    gt_np = gt.cpu().numpy()
    pred_np = pred.cpu().numpy()
    
    for j in range(15):
        pred_n = []
        for i in pred_np[:,j]:
            if i>0.5:
                pred_n.append(1)
            else:
                pred_n.append(0)
        accuracy.append(accuracy_score(gt_np[:,j],pred_n))
    return accuracy

In [None]:
with open("../input/model-joint/model_epoch2.pkl", "rb") as f:
                model = pickle.load(f)
cudnn.benchmark = True
Auroc_avg,Auroc_dict = test()

In [None]:
def test_individual(index):
    test_dataset = ChestXrayDataSet_individual(train_or_valid="test",
                                    transform=transforms.Compose([
                                    transforms.ToPILImage(),
                                    transforms.CenterCrop(224),
                                    transforms.ToTensor(),
                                    transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
                                ]))
    test_loader = DataLoader(dataset=test_dataset, batch_size=16, shuffle=False, num_workers=2)
    gt = torch.FloatTensor()
    gt = gt.cuda()
    pred = torch.FloatTensor()
    pred = pred.cuda()
    for i, (inp, target) in enumerate(test_loader):
        target = target.T[index]
        target = target.cuda()
        gt = torch.cat((gt, target), 0)
        input_var = Variable(inp.view(-1, 3, 224, 224).cuda())
        output = model(input_var)
        pred = torch.cat((pred, output.data), 0)
    AUROC = compute_AUCs_individual(gt, pred)
    #AUROC_avg = np.array(AUROCs).mean()
    accuracy = compute_accuracy_individual(gt,pred)
    print('The AUROC of {} is {}'.format(class_list[index], AUROC))
    print(accuracy)
    return AUROC

In [None]:
def compute_AUCs_individual(gt, pred):
    gt_np = gt.cpu().numpy()
    pred_np = pred.cpu().numpy()
    gt_np = np.reshape(gt_np,(gt_np.shape[0],1))
    #print(gt_np.shape, pred_np.shape)

    AUROC = roc_auc_score(gt_np, pred_np)
    return AUROC

In [None]:
def compute_accuracy_individual(gt, pred):
    gt_np = gt.cpu().numpy()
    pred_np = pred.cpu().numpy()
    gt_np = np.reshape(gt_np,(gt_np.shape[0],1))
    pred_n = []
    for i in pred_np:
        if i>0.5:
            pred_n.append(1)
        else:
            pred_n.append(0)
    
    accuracy = accuracy_score(gt_np, pred_n)
    return accuracy

In [None]:
models_path = '../input/model-labelweights'
#list_files = os.listdir(models_path)
aurocs_list  = []
for i in range(15):
    if i!=10:
        path = os.path.join(models_path,label_map[i])
        with open(path, "rb") as f:
                model = pickle.load(f)
        aurocs_list.append(test_individual(i))

In [None]:
print(sum(aurocs_list)/14)

In [None]:
models_path_1 = '../input/models-withoutweights'
#list_files = os.listdir(models_path)
aurocs_list_1  = []
for i in range(15):
    if i!=10:
        path = os.path.join(models_path_1,label_map[i])
        with open(path, "rb") as f:
                model = pickle.load(f)
        aurocs_list_1.append(test_individual(i))

In [None]:
print(sum(aurocs_list_1)/14)

In [None]:
models_path_2 = '../input/modeles'
#list_files = os.listdir(models_path)
aurocs_list_1  = []
for i in range(15):
    if i!=10:
        path = os.path.join(models_path_2,label_map[i])
        with open(path, "rb") as f:
                model = pickle.load(f)
        aurocs_list_1.append(test_individual(i))

In [None]:
print(sum(aurocs_list_1)/14)

In [None]:
import matplotlib.pyplot as plt

modellings = ['joint','single-withweights','single-withoutweights']
avg_auroc = [Auroc_avg,sum(aurocs_list)/14,sum(aurocs_list_1)/14]
plt.bar(modellings,avg_auroc)

plt.xlabel('modelling of labels')
plt.ylabel('average auroc across all labels')
plt.show()

In [None]:

plt.plot(class_list_1[0:7],list(Auroc_dict.values())[0:7],label = "joint modelling",marker='o')
plt.plot(class_list_1[0:7],aurocs_list[0:7],label = "independent modelling with label weights",marker='o')
plt.xlabel('labels')
plt.ylabel('auroc_score')
plt.legend()
plt.show()

In [None]:
plt.plot(class_list_1[7:14],list(Auroc_dict.values())[7:14],label = "joint modelling",marker='o')
plt.plot(class_list_1[7:14],aurocs_list[7:14],label = "independent modelling with label weights",marker='o')
plt.xlabel('labels')
plt.ylabel('auroc_score')
plt.legend()
plt.show()

In [None]:
plt.plot(class_list_1[0:7],list(Auroc_dict.values())[0:7],label = "joint modelling",marker='o')
plt.plot(class_list_1[0:7],aurocs_list_1[0:7],label = "independent modelling without label weights",marker='o')
plt.xlabel('labels')
plt.ylabel('auroc_score')
plt.legend()
plt.show()

In [None]:
plt.plot(class_list_1[7:14],list(Auroc_dict.values())[7:14],label = "joint modelling",marker='o')
plt.plot(class_list_1[7:14],aurocs_list_1[7:14],label = "independent modelling without label weights",marker='o')
plt.xlabel('labels')
plt.ylabel('auroc_score')
plt.legend()
plt.show()