In [8]:
import torch.nn as nn            # containing various building blocks for your neural networks
import torch.optim as optim      # implementing various optimization algorithms
import torch.nn.functional as F  # a lower level (compared to torch.nn) interface

# torchvision: popular datasets, model architectures, and common image transformations for computer vision.
import torchvision
# transforms: transformations useful for image processing
import torchvision.transforms as transforms

from torch.utils.data import Dataset, DataLoader

import glob
import os.path as osp
import numpy as np
from PIL import Image
import csv
import pandas as pd

In [None]:
class CheXpertData(Dataset):
    """
    A customized data loader for Chexpert.
    """
    def __init__(self,
                 root,
                 transform=None,
                 preload=False,
                 policy="ones"):
        """ Intialize the cheXpert dataset
        
        Args:
            - root: root directory of the dataset
            - tranform: a custom tranform function
            - preload: if preload the dataset into memory
        """
        self.images = None
        self.labels = None
        self.filenames = []
        self.root = root
        self.transform = transform
        self.policy = policy

        # read filenames
        with open(self.root, 'r') as f:
            csvReader = csv.reader(f)
            next(csvReader, None)
            k = 0
            for line in csvReader:
                k+= 1
                image_name = line[0]
                label  = line[5:]
                self.filenames.append((image_name,label))
        
        # if preload dataset into memory
        if preload:
            self._preload()
            
        self.len = len(self.filenames)
                              
    def _applypolicy(self, label):
        policy = self.policy
        for i in range(14):
                    if label[i]:
                        a = float(label[i])
                        if a == 1:
                            label[i] = 1
                        elif a == -1:
                            if policy == "ones":
                                label[i] = 1
                            elif policy == "zeroes":
                                label[i] = 0
                            else:
                                label[i] = 0
                        else:
                            label[i] = 0
                    else:
                        label[i] = 0
        return label
        
    def _preload(self):
        """
        Preload dataset to memory
        """
        self.labels = []
        self.images = []
        for image_fn, label in self.filenames:            
            # load images
            image = Image.open('datasets/chexpert-small/' + image_fn).convert('RGB')
            self.images.append(image.copy())
            # avoid too many opened files bug
            image.close()
            label = self._applypolicy(label)
            self.labels.append(label)

    # probably the most important to customize.
    def __getitem__(self, index):
        """ Get a sample from the dataset
        """
        if self.images is not None:
            # If dataset is preloaded
            image = self.images[index]
            label = self.labels[index]
        else:
            # If on-demand data loading
            image_fn, label = self.filenames[index]
            label = self._applypolicy(label)
            image = Image.open('datasets/chexpert-small/' + image_fn).convert('RGB')
            
        # May use transform function to transform samples
        # e.g., random crop, whitening
        if self.transform is not None:
            image = self.transform(image)
        # return image and label
        return image, label

    def __len__(self):
        """
        Total number of samples in the dataset
        """
        return self.len

In [3]:
#get chexpert image names and labels
filenames_chexpert = []
with open('datasets/chexpert-small/CheXpert-v1.0-small/valid.csv', 'r') as f:
            csvReader = csv.reader(f)
            next(csvReader, None)
            k = 0
            for line in csvReader:
                k+= 1
                image_name = line[0]
                label  = line[5:]
                filenames_chexpert.append((image_name,label))

In [4]:
image = Image.open('datasets/chexpert-small/' + filenames_chexpert[0][0])

In [5]:
image.size

(390, 320)

In [9]:
nih_table = pd.read_csv('datasets/nih-small/sample_labels.csv')

In [10]:
nih_table.head()

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImageWidth,OriginalImageHeight,OriginalImagePixelSpacing_x,OriginalImagePixelSpacing_y
0,00000013_005.png,Emphysema|Infiltration|Pleural_Thickening|Pneu...,5,13,060Y,M,AP,3056,2544,0.139,0.139
1,00000013_026.png,Cardiomegaly|Emphysema,26,13,057Y,M,AP,2500,2048,0.168,0.168
2,00000017_001.png,No Finding,1,17,077Y,M,AP,2500,2048,0.168,0.168
3,00000030_001.png,Atelectasis,1,30,079Y,M,PA,2992,2991,0.143,0.143
4,00000032_001.png,Cardiomegaly|Edema|Effusion,1,32,055Y,F,AP,2500,2048,0.168,0.168


In [11]:
#get nih image names and labels
filenames_nih = []
with open('datasets/nih-small/sample_labels.csv', 'r') as f:
            csvReader = csv.reader(f)
            next(csvReader, None)
            k = 0
            for line in csvReader:
                k+= 1
                image_name = line[0]
                label  = line[1]
                filenames_nih.append((image_name,label))

In [12]:
filenames_nih[0]

('00000013_005.png', 'Emphysema|Infiltration|Pleural_Thickening|Pneumothorax')

In [16]:
image_nih = Image.open('datasets/nih-small/images/' + filenames_nih[0][0])

In [17]:
image_nih.size

(1024, 1024)