In [52]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

import time
import os
from skimage import io, color

In [13]:
# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

No GPU available, using the CPU instead.


# Baseline

In [5]:
resnet152 = models.resnet152(pretrained=True)
resnet152 = models.resnet152(num_classes=2)

# Data loading

In [69]:
train_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize([364, 364]),
        transforms.RandomResizedCrop(320),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

validation_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize([364,364]),
        transforms.CenterCrop(320),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

class ChestXray(Dataset):

    def __init__(self, csv_file, image_root_dir, transform=None):

        self.data_frame = pd.read_csv(csv_file)
        self.image_root_dir = image_root_dir
        self.image_path = self.data_frame['image_index']
        self.transform = transform
        
    def __len__(self):
        return len(self.data_frame)
    
    def __getitem__(self, index):
        image_filename = self.image_root_dir + self.image_path[index]
        image = io.imread(image_filename, as_gray=True)
        
        # sample is a dictionary which includes the image and 2 labels
        sample = {}
        
        # since the input to pre-trained network should have 3 channels
        # we need to pad it with two repeatition
        # Note that we need to transpose it since the input size for ToPILImage() is 
        # H*W*C instead of C*H*W!!
        image = np.repeat(image[None,...], 3, axis=0).transpose(1, 2, 0)
        
        # transform the image if transform is not None
        if self.transform:
            image = self.transform(image)
            
        # add image into the sample dictionary
        sample["image"] = image
        
        # get the label for the image
        label_col_names = ["No Finding", "Enlarged Cardiomediastinum", "Cardiomegaly", "Lung Opacity", 
                           "Lung Lesion", "Edema", "Consolidation", "Pneumonia", "Atelectasis",
                           "Pneumothorax", "Pleural Effusion", "Pleural Other", "Fracture", "Support Devices"]
        
        # to get the label for each column
        # 0 --> negative
        # 1 --> positive
        # 2 --> uncertainty (No Finding has no 2)
        for label in label_col_names:
            if self.data_frame[label][index] == 0.0:
                sample[label] = torch.LongTensor([0])
            elif self.data_frame[label][index] == 1.0:
                sample[label] = torch.LongTensor([1])
            else:
                sample[label] = torch.LongTensor([2])
        
        return sample


if __name__ == "__main__":
    
    # use the real dataloader to test
    train_loader = DataLoader(CheXpertDataset(csv_file="./data/CheXpert-v1.0-small/train_preprocessed.csv", image_root_dir="./data/", transform=train_transform), batch_size=5, shuffle=True)
    
    # to get a sample data
    for batch_idx, data in enumerate(train_loader):
        if batch_idx == 1:
            break
        batched_samples = data    

    # to print the shape for each item in batched_samples
    for key, value in batched_samples.items():
        print(key, value.shape)
© 2020 GitHub, Inc.

In [61]:
train_df_path = '../../chest_xray_origin/train.csv'
val_df_path = '../../chest_xray_origin/val.csv'
root_dir = '../../chest_xray_origin/all/'

transformed_dataset = {'train': ChestXray(train_df_path, root_dir), \
                       'validate':ChestXray(val_df_path, root_dir)}
bs = 10
dataloader = {x: DataLoader(transformed_dataset[x], batch_size=bs, shuffle=True, num_workers=0) \
              for x in ['train', 'validate']}
data_sizes ={x: len(transformed_dataset[x]) for x in ['train', 'validate']}

# resnet152.to(devicex)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet152.parameters())

In [62]:
def train_model(model, dataloader, optimizer, loss_fn, num_epochs=50, verbose=False, scheduler=None):
    acc_dict = {'train':[], 'validate':[]}
    loss_dict = {'train':[], 'validate':[]}
    best_acc = 0
    phases = ['train', 'validate']
    since = time.time()
    for i in range(num_epochs):
        print('Epoch: {}/{}'.format(i, num_epochs-1))
        print('-'*10)
        for p in phases:
            running_correct = 0
            running_loss = 0
            running_total = 0
            if p == 'train':
                model.train()
            else:
                model.eval()
            for data in dataloader[p]:
                optimizer.zero_grad()
                image = data['x'].to(device,dtype=torch.float)
                label = data['y'].to(device,dtype=torch.long)
                output = model(image)
                loss = loss_fn(output, label)
                _, preds = torch.max(output, dim = 1)
                num_imgs = image.size()[0]
                running_correct += torch.sum(preds == label).item()
                running_loss += loss.item()*num_imgs
                running_total += num_imgs
                if p == 'train':
                    loss.backward()
                    optimizer.step()
            epoch_acc = float(running_correct/running_total)
            epoch_loss = float(running_loss/running_total)
            if verbose or (i%10 == 0):
                print('Phase:{}, epoch loss: {:.4f} Acc: {:.4f}'.format(p, epoch_loss, epoch_acc))
            
            acc_dict[p].append(epoch_acc)
            loss_dict[p].append(epoch_loss)
            if p == 'validate':
                if epoch_acc > best_acc:
                    best_acc = epoch_acc
                    best_model_wts = model.state_dict()
            else:
                if scheduler:
                    scheduler.step()
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val acc: {:4f}'.format(best_acc))
    
    model.load_state_dict(best_model_wts)
    
    return model, acc_dict, loss_dict

In [63]:
m1, a1, l1 = train_model(resnet152, dataloader, optimizer, criterion, verbose=True, scheduler=None)

Epoch: 0/49
----------


RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 0. Got 696 and 1295 in dimension 2 at /Users/distiller/project/conda/conda-bld/pytorch_1579022061893/work/aten/src/TH/generic/THTensor.cpp:612