In [1]:
import torch.nn as nn            # containing various building blocks for your neural networks
import torch.optim as optim      # implementing various optimization algorithms
import torch.nn.functional as F  # a lower level (compared to torch.nn) interface

# torchvision: popular datasets, model architectures, and common image transformations for computer vision.
import torchvision
# transforms: transformations useful for image processing
import torchvision.transforms as transforms

from torch.utils.data import Dataset, DataLoader

import glob
import os.path as osp
import numpy as np
from PIL import Image
import csv
import pandas as pd

In [None]:
class CheXpertData(Dataset):
    """
    A customized data loader for Chexpert.
    """
    def __init__(self,
                 root,
                 transform=None,
                 preload=False,
                 policy="ones"):
        """ Intialize the cheXpert dataset
        
        Args:
            - root: root directory of the dataset
            - tranform: a custom tranform function
            - preload: if preload the dataset into memory
        """
        self.images = None
        self.labels = None
        self.filenames = []
        self.root = root
        self.transform = transform
        self.policy = policy

        # read filenames
        with open(self.root, 'r') as f:
            csvReader = csv.reader(f)
            next(csvReader, None)
            k = 0
            for line in csvReader:
                k+= 1
                image_name = line[0]
                label  = line[5:]
                self.filenames.append((image_name,label))
        
        # if preload dataset into memory
        if preload:
            self._preload()
            
        self.len = len(self.filenames)
                              
    def _applypolicy(self, label):
        policy = self.policy
        for i in range(14):
                    if label[i]:
                        a = float(label[i])
                        if a == 1:
                            label[i] = 1
                        elif a == -1:
                            if policy == "ones":
                                label[i] = 1
                            elif policy == "zeroes":
                                label[i] = 0
                            else:
                                label[i] = 0
                        else:
                            label[i] = 0
                    else:
                        label[i] = 0
        return label
        
    def _preload(self):
        """
        Preload dataset to memory
        """
        self.labels = []
        self.images = []
        for image_fn, label in self.filenames:            
            # load images
            image = Image.open('datasets/chexpert-small/' + image_fn).convert('RGB')
            self.images.append(image.copy())
            # avoid too many opened files bug
            image.close()
            label = self._applypolicy(label)
            self.labels.append(label)

    # probably the most important to customize.
    def __getitem__(self, index):
        """ Get a sample from the dataset
        """
        if self.images is not None:
            # If dataset is preloaded
            image = self.images[index]
            label = self.labels[index]
        else:
            # If on-demand data loading
            image_fn, label = self.filenames[index]
            label = self._applypolicy(label)
            image = Image.open('datasets/chexpert-small/' + image_fn).convert('RGB')
            
        # May use transform function to transform samples
        # e.g., random crop, whitening
        if self.transform is not None:
            image = self.transform(image)
        # return image and label
        return image, label

    def __len__(self):
        """
        Total number of samples in the dataset
        """
        return self.len

In [3]:
#get chexpert image names and labels
filenames_chexpert = []
with open('datasets/chexpert-small/CheXpert-v1.0-small/valid.csv', 'r') as f:
            csvReader = csv.reader(f)
            next(csvReader, None)
            k = 0
            for line in csvReader:
                k+= 1
                image_name = line[0]
                label  = line[5:]
                filenames_chexpert.append((image_name,label))

In [2]:
chexpert_tb = pd.read_csv('datasets/chexpert-small/CheXpert-v1.0-small/valid.csv')

In [14]:
chexpert_tb.iloc[100:120]

Unnamed: 0,Path,Sex,Age,Frontal/Lateral,AP/PA,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices
100,CheXpert-v1.0-small/valid/patient64615/study1/...,Male,66,Frontal,PA,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
101,CheXpert-v1.0-small/valid/patient64615/study1/...,Male,66,Lateral,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
102,CheXpert-v1.0-small/valid/patient64616/study1/...,Male,90,Frontal,PA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
103,CheXpert-v1.0-small/valid/patient64616/study1/...,Male,90,Lateral,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
104,CheXpert-v1.0-small/valid/patient64616/study1/...,Male,90,Lateral,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
105,CheXpert-v1.0-small/valid/patient64617/study1/...,Female,56,Frontal,AP,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
106,CheXpert-v1.0-small/valid/patient64618/study1/...,Female,55,Frontal,PA,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
107,CheXpert-v1.0-small/valid/patient64618/study1/...,Female,55,Lateral,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
108,CheXpert-v1.0-small/valid/patient64619/study1/...,Female,83,Frontal,PA,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
109,CheXpert-v1.0-small/valid/patient64620/study1/...,Female,77,Frontal,AP,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0


In [25]:
print(chexpert_tb.iloc[200]['Path']) # 116
chexpert_tb.iloc[200]

CheXpert-v1.0-small/valid/patient64707/study1/view1_frontal.jpg


Path                          CheXpert-v1.0-small/valid/patient64707/study1/...
Sex                                                                        Male
Age                                                                          47
Frontal/Lateral                                                         Frontal
AP/PA                                                                        AP
No Finding                                                                    0
Enlarged Cardiomediastinum                                                    0
Cardiomegaly                                                                  0
Lung Opacity                                                                  0
Lung Lesion                                                                   0
Edema                                                                         0
Consolidation                                                                 0
Pneumonia                               

In [4]:
image = Image.open('datasets/chexpert-small/' + filenames_chexpert[0][0])

In [5]:
image.size

(390, 320)

In [5]:
nih_table = pd.read_csv('datasets/nih-small/sample_labels.csv')

In [27]:
nih_table.iloc[350:370]

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImageWidth,OriginalImageHeight,OriginalImagePixelSpacing_x,OriginalImagePixelSpacing_y
350,00001855_023.png,Atelectasis|Infiltration|Pleural_Thickening,23,1855,013Y,M,AP,2500,2048,0.168,0.168
351,00001855_030.png,Atelectasis|Infiltration,30,1855,013Y,M,AP,2048,2500,0.168,0.168
352,00001872_014.png,No Finding,14,1872,041Y,M,PA,2992,2991,0.143,0.143
353,00001900_038.png,Effusion|Mass|Pleural_Thickening,38,1900,052Y,M,PA,2992,2991,0.143,0.143
354,00001906_002.png,No Finding,2,1906,043Y,M,PA,3056,2544,0.139,0.139
355,00001907_000.png,No Finding,0,1907,043Y,F,PA,2048,2500,0.168,0.168
356,00001912_002.png,No Finding,2,1912,055Y,F,PA,2992,2991,0.143,0.143
357,00001913_000.png,No Finding,0,1913,059Y,M,PA,2048,2500,0.171,0.171
358,00001931_001.png,No Finding,1,1931,022Y,M,AP,2500,2048,0.171,0.171
359,00001931_022.png,No Finding,22,1931,023Y,M,AP,2500,2048,0.168,0.168


In [11]:
#get nih image names and labels
filenames_nih = []
with open('datasets/nih-small/sample_labels.csv', 'r') as f:
            csvReader = csv.reader(f)
            next(csvReader, None)
            k = 0
            for line in csvReader:
                k+= 1
                image_name = line[0]
                label  = line[1]
                filenames_nih.append((image_name,label))

In [12]:
filenames_nih[0]

('00000013_005.png', 'Emphysema|Infiltration|Pleural_Thickening|Pneumothorax')

In [16]:
image_nih = Image.open('datasets/nih-small/images/' + filenames_nih[0][0])

In [17]:
image_nih.size

(1024, 1024)