In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import imageio
from os import listdir
import skimage.transform
import pickle
import sys, os
from sklearn.preprocessing import MultiLabelBinarizer
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from collections import defaultdict
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torchvision
import torchvision.transforms as transforms
from torch.autograd import Variable
from sklearn.metrics import roc_auc_score
import torch.optim as optim
os.environ['CUDA_VISIBLE_DEVICES'] = "0"

In [None]:
data_entry_path = '../input/data/Data_Entry_2017.csv'
meta_data = pd.read_csv(data_entry_path)

In [None]:
def get_labels(pic_id):
    try:
        labels =  meta_data.loc[meta_data["Image Index"]==pic_id,"Finding Labels"]
        return labels.tolist()[0].split("|")
    except:
        return []

In [None]:
test_y = []
file_name_test = []
train_y = []
file_name_train = [] 
val_y = []
file_name_val = [] 
directory = ['../input/data/images_001/images','../input/data/images_002/images','../input/data/images_003/images','../input/data/images_004/images',
            '../input/data/images_005/images','../input/data/images_006/images','../input/data/images_007/images','../input/data/images_008/images',
            '../input/data/images_009/images','../input/data/images_010/images','../input/data/images_011/images','../input/data/images_012/images']
y=0
for i in directory:
    for filename in os.listdir(i):
        image_path = os.path.join(i,filename)
        if len(get_labels(filename))!=0 and y<420:
            test_y.append(get_labels(filename))
            file_name_test.append(image_path)
            y=y+1
        elif len(get_labels(filename))!=0 and y<100000:
            train_y.append(get_labels(filename))
            file_name_train.append(image_path)
            y=y+1
        else:
            val_y.append(get_labels(filename))
            file_name_val.append(image_path)
            y=y+1
        if y%3000==0:
            print(y)

In [None]:
encoder = MultiLabelBinarizer()
encoder.fit(train_y+test_y+val_y)
train_y_onehot = encoder.transform(train_y)
test_y_onehot = encoder.transform(test_y)
val_y_onehot = encoder.transform(val_y)

In [None]:
encoder.classes_

In [None]:
label_weight_pos_train = (len(train_y_onehot)-(train_y_onehot).sum(axis=0))/len(train_y_onehot)
label_weight_neg_train = (train_y_onehot.sum(axis=0))/len(train_y_onehot)
label_weight_pos_test = (len(test_y_onehot)-(test_y_onehot).sum(axis=0))/len(test_y_onehot)
label_weight_neg_test = (test_y_onehot.sum(axis=0))/len(test_y_onehot)
label_weight_pos_val = (len(val_y_onehot)-(val_y_onehot).sum(axis=0))/len(val_y_onehot)
label_weight_neg_val = (val_y_onehot.sum(axis=0))/len(val_y_onehot)

In [None]:
with open('./' + "/train_y_onehot.pkl","wb") as f:
    pickle.dump(train_y_onehot, f)
with open('./' + "/test_y_onehot.pkl","wb") as f:
    pickle.dump(test_y_onehot, f)
with open('./' + "/val_y_onehot.pkl","wb") as f:
    pickle.dump(val_y_onehot, f)
with open('./' + "/train_filename.pkl","wb") as f:
    pickle.dump(file_name_train, f)
with open('./' + "/test_filename.pkl","wb") as f:
    pickle.dump(file_name_test, f)
with open('./' + "/val_filename.pkl","wb") as f:
    pickle.dump(file_name_val, f)

In [None]:
data_path = '../input/pickles/'
data_path1 = '../input/onehot/'
with open(data_path1 + "val_y_onehot.pkl", "rb") as f:
                val_y = pickle.load(f)
with open(data_path1 + "train_y_onehot.pkl", "rb") as f:
                train_y = pickle.load(f)
with open(data_path1 + "test_y_onehot.pkl", "rb") as f:
                test_y = pickle.load(f)
with open(data_path + "train_filename.pkl", "rb") as f:
                file_name_train = pickle.load(f)
with open(data_path + "test_filename.pkl", "rb") as f:
                file_name_test = pickle.load(f)
with open(data_path + "val_filename.pkl", "rb") as f:
                file_name_val = pickle.load(f)

In [None]:
class ChestXrayDataSet(Dataset):
    def __init__(self, train_or_valid = "train", transform=None):

        data_path = './'
        self.train_or_valid = train_or_valid
        if train_or_valid == "train":
            self.X = np.uint8(np.load(data_path + "train_X_small.npy")*255*255)
            with open(data_path + "train_y_onehot_.pkl", "rb") as f:
                self.y = pickle.load(f)
            #sub_bool = (self.y.sum(axis=1)!=0)
            #self.y = self.y[sub_bool,:]
            #self.X = self.X[sub_bool,:]
            with open( "../input/onehot/train_y_onehot.pkl", "rb") as f:
                a = pickle.load(f)
        elif train_or_valid == "valid":
            self.X = np.uint8(np.load(data_path + "valid_X_small.npy")*255*255)
            with open(data_path + "val_y_onehot_.pkl", "rb") as f:
                self.y = pickle.load(f)
            with open("../input/onehot/val_y_onehot.pkl", "rb") as f:
                a = pickle.load(f)
#         self.label_weight_pos = label_weight_po
#         self.label_weight_neg = label_weight_ne
        else:
            self.X = np.uint8(np.load(data_path + "test_X_small.npy")*255*255)
            with open(data_path + "test_y_onehot_.pkl", "rb") as f:
                self.y = pickle.load(f)
            with open("../input/onehot/test_y_onehot.pkl", "rb") as f:
                a = pickle.load(f)
#         self.label_weight_pos = label_weight_po
#         self.label_weight_neg = label_weight_ne
         
        self.label_weight_pos = len(a.T[1])/a.T[1].sum(axis=0)
        self.label_weight_neg = len(a.T[1])/(len(a.T[1])-a.T[1].sum(axis=0))
        self.transform = transform
    def __getitem__(self, index):
        """
        Args:
            index: the index of item 
        Returns:
            image and its labels
        """
        current_X = np.tile(self.X[index],3) 
        label = self.y[index]
        label_inverse = 1- label
        weight = np.add((label_inverse * self.label_weight_neg),(label * self.label_weight_pos))
        if self.transform is not None:
            image = self.transform(current_X)
        return image, torch.from_numpy(np.asarray(label)).type(torch.FloatTensor), torch.from_numpy(np.asarray(weight)).type(torch.FloatTensor)
        #return image, torch.from_numpy(np.asarray(label)).type(torch.FloatTensor)
    def __len__(self):
        return len(self.y)

In [None]:
class DenseNet121(nn.Module):
    """Model modified.
    The architecture of our model is the same as standard DenseNet121
    except the classifier layer which has an additional sigmoid function.
    """
    def __init__(self, out_size):
        super(DenseNet121, self).__init__()
        self.densenet121 = torchvision.models.densenet121(pretrained=True)
        num_ftrs = self.densenet121.classifier.in_features
        self.densenet121.classifier = nn.Sequential(
            nn.Linear(num_ftrs, out_size),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.densenet121(x)
        return x

In [None]:
with open("../input/model23/model_epoch (3).pkl", "rb") as f:
                model = pickle.load(f)

In [None]:
cudnn.benchmark = True
N_CLASSES = 1
BATCH_SIZE = 16

	# initialize and load the model
model = DenseNet121(N_CLASSES).cuda()
model = torch.nn.DataParallel(model).cuda()
optimizer = optim.Adam(model.parameters(),lr=0.0002, betas=(0.9, 0.999))

In [None]:
def train_util(index):
    train_X=[]
    for i in range(index,index+1000):
        image_path = file_name_train[i]    
        img = imageio.imread(image_path)
        if img.shape != (1024,1024): # there some image with shape (1024,1024,4) in training set
            img = img[:,:,0]
        img_resized = skimage.transform.resize(img,(256,256)) # or use img[::4] here
        train_X.append((np.array(img_resized)/255).reshape(256,256,1))
        if i==len(file_name_train)-1:
            break
    train_X = np.array(train_X)
    np.save(os.path.join('./',"train_X_small.npy"), train_X)
    train_y_ = train_y.T[1][index:i+1]
    with open('./' + "/train_y_onehot_.pkl","wb") as f:
        pickle.dump(train_y_, f)

In [None]:
def val_util(index):
    val_X=[]
    for i in range(index,index+1000):
        image_path = file_name_val[i]    
        img = imageio.imread(image_path)
        if img.shape != (1024,1024): # there some image with shape (1024,1024,4) in training set
            img = img[:,:,0]
        img_resized = skimage.transform.resize(img,(256,256)) # or use img[::4] here
        val_X.append((np.array(img_resized)/255).reshape(256,256,1))
        if i==len(file_name_val)-1:
            break
    val_X = np.array(val_X)
    np.save(os.path.join('./',"valid_X_small.npy"), val_X)
    val_y_ = val_y.T[1][index:i+1]
    with open('./' + "/val_y_onehot_.pkl","wb") as f:
        pickle.dump(val_y_, f)

In [None]:
def test_util(index):
    test_X=[]
    for i in range(index,index+1000):
        image_path = file_name_test[i]    
        img = imageio.imread(image_path)
        if img.shape != (1024,1024): # there some image with shape (1024,1024,4) in training set
            img = img[:,:,0]
        img_resized = skimage.transform.resize(img,(256,256)) # or use img[::4] here
        test_X.append((np.array(img_resized)/255).reshape(256,256,1))
        if i==len(file_name_test)-1:
            break
    print(len(test_X))
    test_X = np.array(test_X)
    np.save(os.path.join('./',"test_X_small.npy"), test_X)
    test_y_ = test_y.T[1][index:i+1]
    with open('./' + "/test_y_onehot_.pkl","wb") as f:
        pickle.dump(test_y_, f)

In [None]:
def train():
    for k in range(0,len(file_name_train),1000):
        train_util(k)
        train_dataset = ChestXrayDataSet(train_or_valid="train",
                                        transform=transforms.Compose([
                                            transforms.ToPILImage(),
                                            transforms.RandomCrop(224),
                                            transforms.RandomHorizontalFlip(),
                                            transforms.ToTensor(),
                                            transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
                                            ]))
        augment_img = []
        augment_label = []
        augment_weight = []

#         for i in range(4):
        for j in range(len(train_dataset)):
            single_img, single_label, single_weight = train_dataset[j]
            #single_img, single_label = train_dataset[j]
            augment_img.append(single_img)
            augment_label.append(single_label)
            augment_weight.append(single_weight)

        # shuffe data
        perm_index = torch.randperm(len(augment_label))
        augment_img = torch.stack(augment_img)[perm_index]
        augment_label = torch.stack(augment_label)[perm_index]
        augment_weight = torch.stack(augment_weight)[perm_index]

        perm_index = torch.randperm(len(augment_label))
        augment_img = augment_img[perm_index]
        augment_label = augment_label[perm_index]
        augment_weight = augment_weight[perm_index]

        optimizer = optim.Adam(model.parameters(),lr=0.0002, betas=(0.9, 0.999))
        total_length = len(augment_img)
        for index in range(0, total_length , BATCH_SIZE):
            if index+BATCH_SIZE >= total_length:
                break
    # zero the parameter gradients
            optimizer.zero_grad()
            inputs_sub = augment_img[index:index+BATCH_SIZE]
            labels_sub = augment_label[index:index+BATCH_SIZE]
            weights_sub = augment_weight[index:index+BATCH_SIZE]
            inputs_sub, labels_sub = Variable(inputs_sub.cuda()), Variable(labels_sub.cuda())
            weights_sub = Variable(weights_sub.cuda())

    # forward + backward + optimize
            outputs = model(inputs_sub)
            criterion = nn.BCELoss()
            loss = criterion(outputs, torch.reshape(labels_sub,(16,1)))
            loss.backward()
            optimizer.step()
        print(k)

In [None]:
def val():
    gt_all = torch.FloatTensor()
    gt_all = gt_all.cuda()
    pred_all = torch.FloatTensor()
    pred_all = pred_all.cuda()
    for k in range(0,len(file_name_val),1000):
        val_util(k)
        valid_dataset = ChestXrayDataSet(train_or_valid="valid",
					transform=transforms.Compose([
							transforms.ToPILImage(),
							transforms.CenterCrop(224),
							transforms.ToTensor(),
							transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
							]))
        valid_loader = DataLoader(dataset=valid_dataset, batch_size=16, shuffle=False, num_workers=16)
        gt = torch.FloatTensor()
        gt = gt.cuda()
        pred = torch.FloatTensor()
        pred = pred.cuda()
        for i, (inp, target,weight) in enumerate(valid_loader):
            target = target.cuda()
            gt = torch.cat((gt, target), 0)
            gt_all = torch.cat((gt_all, target), 0)
            #     bs, n_crops, c, h, w = inp.size()
            input_var = Variable(inp.view(-1, 3, 224, 224).cuda(), volatile=True)
            output = model(input_var)
			#     output_mean = output.view(bs, n_crops, -1).mean(1)
            pred = torch.cat((pred, output.data), 0)
            pred_all = torch.cat((pred_all, output.data), 0)
        print(k)
    CLASS_NAMES = ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema',
       'Effusion', 'Emphysema', 'Fibrosis', 'Hernia', 'Infiltration',
       'Mass', 'No Finding', 'Nodule', 'Pleural_Thickening', 'Pneumonia',
       'Pneumothorax']
    
    AUROCs = compute_AUCs(gt_all, pred_all)
    AUROC_avg = np.array(AUROCs).mean()
    print('The average AUROC is {AUROC_avg:.3f}'.format(AUROC_avg=AUROC_avg))
    for i in range(N_CLASSES):
        print('The AUROC of {} is {}'.format(CLASS_NAMES[i], AUROCs[i]))
    return AUROC_avg

In [None]:
len(file_name_test)

In [None]:
def test():
    gt_all = torch.FloatTensor()
    gt_all = gt_all.cuda()
    pred_all = torch.FloatTensor()
    pred_all = pred_all.cuda()
    for k in range(0,len(file_name_test),1000):
        test_util(k)
        test_dataset = ChestXrayDataSet(train_or_valid="test",
					transform=transforms.Compose([
							transforms.ToPILImage(),
							transforms.CenterCrop(224),
							transforms.ToTensor(),
							transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
							]))
        test_loader = DataLoader(dataset=test_dataset, batch_size=16, shuffle=False, num_workers=2)
        gt = torch.FloatTensor()
        gt = gt.cuda()
        pred = torch.FloatTensor()
        pred = pred.cuda()
        for i, (inp, target,weight) in enumerate(test_loader):
            target = target.cuda()
            gt = torch.cat((gt, target), 0)
            gt_all = torch.cat((gt_all, target), 0)
            #     bs, n_crops, c, h, w = inp.size()
            input_var = Variable(inp.view(-1, 3, 224, 224).cuda(), volatile=True)
            output = model(input_var)
			#     output_mean = output.view(bs, n_crops, -1).mean(1)
            pred = torch.cat((pred, output.data), 0)
            pred_all = torch.cat((pred_all, output.data), 0)
        print(k)
    CLASS_NAMES = ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema',
       'Effusion', 'Emphysema', 'Fibrosis', 'Hernia', 'Infiltration',
       'Mass', 'No Finding', 'Nodule', 'Pleural_Thickening', 'Pneumonia',
       'Pneumothorax']
    AUROCs = compute_AUCs(gt_all, pred_all)
    AUROC_avg = np.array(AUROCs).mean()
    print('The average AUROC is {AUROC_avg:.3f}'.format(AUROC_avg=AUROC_avg))
    for i in range(N_CLASSES):
        print('The AUROC of {} is {}'.format(CLASS_NAMES[i], AUROCs[i]))
    
    return AUROC_avg

In [None]:
def compute_AUCs(gt, pred):
	
    AUROCs = []
    gt_np = gt.cpu().numpy()
    pred_np = pred.cpu().numpy()
    gt_np = np.reshape(gt_np,(gt_np.shape[0],1))
    print(gt_np.shape, pred_np.shape)
    for i in range(N_CLASSES):
        AUROCs.append(roc_auc_score(gt_np[:, i], pred_np[:, i]))
    return AUROCs


In [None]:
epochs=2
for i in range(epochs):
    train()
    model.eval()
    avg_roc = val()
    torch.save(model.state_dict(),'DenseNet121_aug4_pretrain_noWeight_'+str(2)+'_'+str(avg_roc)+'.pkl')
    model.train()
  

In [None]:
#torch.save(model.state_dict(),'DenseNet121_aug4_pretrain_noWeight_'+str(1)+'_'+str(AUROC_avg)+'.pkl')

In [None]:
model

In [None]:
import pickle
with open('./' + "/model_epoch_Cardiomegaly2.pkl","wb") as f:
    pickle.dump(model, f)

In [None]:
test()