In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os

# Any results you write to the current directory are saved as output.

In [2]:
from PIL import Image
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import torch
from torch import nn, optim
import torch.nn.functional as F
import torch.utils.data as utils
from torchvision import transforms, models

In [3]:
height = 224
width = height * 1.5
dataset_path = "./preprocess"

In [4]:
dataset_path = '/media/ryan/Ryan 1TB/data/fundus-caothang'
file_paths = []
for filename in os.listdir(dataset_path):
    if filename.endswith('jpg') and filename[0] != '.':
        file_paths.append(os.path.join(dataset_path, filename))

In [5]:
len(file_paths)

2098

In [4]:
file_paths = []
for filename in os.listdir(dataset_path):
    if 'left' in filename or 'right' in filename:
        file_paths.append(os.path.join(dataset_path, filename))

In [5]:
import random
from random import shuffle
random.seed(2019)

In [6]:
shuffle(file_paths)

In [7]:
train_ratio = 0.8
val_ratio = 0.1
test_ratio = 0.1
train_files = file_paths[:int(len(file_paths)*train_ratio)]
val_files = file_paths[int(len(file_paths)*train_ratio):int(len(file_paths)*(train_ratio + val_ratio))]
test_files = file_paths[int(len(file_paths)*(train_ratio + val_ratio)):]

In [6]:
class FundusDataset(utils.Dataset):   
    def __init__(self, image_paths, transform=None):
        self.image_paths_list = image_paths 
        # List of image paths      
        self.labels_list = [] 
        # List of labels correlated      
        self.transform = transform 
        # Transformation applying to each data piece            
        # Run through the folder and get the label of each image inside  
        for filename in image_paths:
            self.labels_list.append(0 if 'left' in filename else 1)
        
    def __getitem__(self, index):      
        '''      Is called when get DataLoader iterated      '''      
        # Get image path with index      
        image_path = self.image_paths_list[index]      
        # Read image with Pillow library      
        image = Image.open(image_path).convert('RGB')      
        # Get label      
        image_label = self.labels_list[index]      
        # Post-transformation apply for image      
        if self.transform != None:          
            image = self.transform(image)            
        return image, image_label, image_path      
    def __len__(self):      
        return len(self.image_paths_list)

In [7]:
transform = transforms.Compose([transforms.Resize((int(width), int(height))),                                
                                transforms.ToTensor(),                                
                                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) 

In [8]:
BATCH_SIZE = 64

In [9]:
test_dataset = FundusDataset(file_paths, transform)
testloader = utils.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [11]:
train_dataset = FundusDataset(train_files, transform)
trainloader = utils.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [12]:
val_dataset = FundusDataset(val_files, transform)
valloader = utils.DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [32]:
test_dataset = FundusDataset(test_files, transform)
testloader = utils.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [10]:
class FundusNet(nn.Module):
    def __init__(self, is_trained):
        super().__init__()
        self.resnet = models.resnet18(pretrained=is_trained)
        kernel_count = self.resnet.fc.in_features
        self.resnet.fc = nn.Sequential(nn.Linear(2560, 2),nn.LogSoftmax(dim=1))
    def forward(self, x):
        x = self.resnet(x)
        return x

In [11]:
train_on_gpu = torch.cuda.is_available()
if not train_on_gpu:    
    print('CUDA is not available.  Training on CPU ...')
else:    
    print('CUDA is available!  Training on GPU ...')

CUDA is available!  Training on GPU ...


In [12]:
from torch.optim.lr_scheduler import ReduceLROnPlateau
model = FundusNet(True)
if train_on_gpu:
    model = torch.nn.DataParallel(model).cuda()
state_dict = torch.load('best_model.pth')
model.load_state_dict(state_dict)
loss = nn.NLLLoss()
optimizer = optim.Adam (model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-5)
scheduler = ReduceLROnPlateau(optimizer, factor = 0.1, patience = 5, mode = 'min', verbose=True)

In [None]:
import sys

epochs = 100
train_losses, val_losses = [], []
best_loss = 999999999
for e in range(epochs):
    running_loss = 0
    for step, (images, labels) in enumerate(trainloader):
        
        if train_on_gpu:               
            images, labels = images.cuda(), labels.cuda()
        optimizer.zero_grad()
        ps = model(images)            
        loss_val = loss(ps, labels)
        loss_val.backward()            
        optimizer.step()
        running_loss += loss_val.item()
        sys.stdout.write(f"\rEpoch {e+1}/{epochs}... Training step {step+1}/{len(trainloader)}... Loss {running_loss/(step+1)}")
    else:
        val_loss = 0            
        accuracy = 0
        with torch.no_grad():                
            for step, (images, labels) in enumerate(valloader):                    
                if train_on_gpu:                       
                    images, labels = images.cuda(), labels.cuda()                    
                log_ps = model(images)
                val_loss += loss(log_ps, labels)
                ps = torch.exp(log_ps)                    
                top_p, top_class = ps.topk(1, dim=1)                    
                equals = top_class == labels.view(*top_class.shape)
                accuracy += torch.mean(equals.type(torch.FloatTensor))
                sys.stdout.write(f"\rEpoch {e+1}/{epochs}... Validating step {step+1}/{len(valloader)}... Loss {val_loss/(step+1)}")
        train_losses.append(running_loss/len(trainloader))
        val_losses.append(val_loss/len(valloader))
        scheduler.step(val_loss/len(valloader))
        print("\nEpoch: {}/{}.. ".format(e+1, epochs),                  
              "Training Loss: {:.3f}.. ".format(running_loss/len(trainloader)),                  
              "Val Loss: {:.3f}.. ".format(val_loss/len(valloader)),                  
              "Val Accuracy: {:.3f}".format(accuracy/len(valloader)))
        if best_loss > val_loss/len(valloader):
            print("Improve loss of model from {} to {}".format(best_loss, val_loss/len(valloader)))
            best_loss = val_loss/len(valloader)
            torch.save(model.state_dict(), 'best_model.pth')

Epoch 1/100... Validating step 139/139... Loss 0.08697611838579178
Epoch: 1/100..  Training Loss: 0.082..  Val Loss: 0.087..  Val Accuracy: 0.980
Improve loss of model from 999999999 to 0.08697611838579178
Epoch 2/100... Validating step 139/139... Loss 0.05596523731946945
Epoch: 2/100..  Training Loss: 0.059..  Val Loss: 0.056..  Val Accuracy: 0.985
Improve loss of model from 0.08697611838579178 to 0.05596523731946945
Epoch 3/100... Validating step 139/139... Loss 0.050387900322675705
Epoch: 3/100..  Training Loss: 0.052..  Val Loss: 0.050..  Val Accuracy: 0.986
Improve loss of model from 0.05596523731946945 to 0.050387900322675705
Epoch 4/100... Validating step 139/139... Loss 0.04691079631447792
Epoch: 4/100..  Training Loss: 0.048..  Val Loss: 0.047..  Val Accuracy: 0.988
Improve loss of model from 0.050387900322675705 to 0.04691079631447792
Epoch 5/100... Validating step 139/139... Loss 0.045561980456113815
Epoch: 5/100..  Training Loss: 0.046..  Val Loss: 0.046..  Val Accuracy: 0.

In [None]:
torch.save(model.state_dict(), 'best_model.pth')

In [13]:
groundtruths = []
predictions = []
probabilities = []

In [14]:
model.eval()

DataParallel(
  (module): FundusNet(
    (resnet): ResNet(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (1): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNorm2d(64

In [15]:
import sys
import pandas as pd
accuracy = 0
predictions = {}
with torch.no_grad():
    for step, (images, labels, paths) in enumerate(testloader):
        images, labels = images.cuda(), labels.cuda()
        path_lists = list(paths)
        for path in path_lists:
            predictions[path] = {}
        #for idx,label in enumerate(labels.cpu().view(-1).numpy().tolist()):
            #predictions[path_lists[idx]]['gt'] = label
        output = model.forward(images)
        ps = torch.exp(output)
        for idx, prob in enumerate(ps.cpu().view(-1,2).numpy().tolist()):
            predictions[path_lists[idx]]['prob'] = prob
        top_p, top_class = ps.topk(1, dim=1)
        for idx,pred in enumerate(top_class.cpu().view(-1).numpy().tolist()):
            predictions[path_lists[idx]]['pred'] = pred
        #equals = top_class == labels.view(*top_class.shape)
        #accuracy += torch.mean(equals.type(torch.FloatTensor))
        sys.stdout.write(f"\rStep {step+1}/{len(testloader)}")
#print(f"\nTest accuracy: {accuracy/len(testloader)}")

Step 33/33

In [18]:
import pandas as pd
#data = pd.DataFrame({'image': [], 'prob_left':[], 'prob_right': [], 'prediction': [],'groundtruth': []})
data = pd.DataFrame({'image': [], 'prob_left':[], 'prob_right': [], 'prediction': []})#,'groundtruth': []})

In [17]:
for file, val in predictions.items():
    row = {'image': file, 'prob_left': val['prob'][0], 'prob_right': val['prob'][1], 'prediction': val['pred'],'groundtruth': val['gt']}
    data = data.append(row, ignore_index=True)

KeyError: 'gt'

In [19]:
for file, val in predictions.items():
    row = {'image': file, 'prob_left': val['prob'][0], 'prob_right': val['prob'][1], 'prediction': val['pred']}
    data = data.append(row, ignore_index=True)
data.head()

Unnamed: 0,image,prob_left,prob_right,prediction
0,/media/ryan/Ryan 1TB/data/fundus-caothang/2016...,0.091106,0.908894,1.0
1,/media/ryan/Ryan 1TB/data/fundus-caothang/2016...,0.356699,0.643301,1.0
2,/media/ryan/Ryan 1TB/data/fundus-caothang/2016...,0.775021,0.224979,0.0
3,/media/ryan/Ryan 1TB/data/fundus-caothang/2016...,0.618606,0.381394,0.0
4,/media/ryan/Ryan 1TB/data/fundus-caothang/2016...,0.998531,0.001468,0.0


In [20]:
data.to_csv("caothang-result.csv", index=False)

In [44]:
data.head()

Unnamed: 0,image,prob_left,prob_right,prediction,groundtruth
0,./preprocess/33747_left.jpeg,0.99998,2e-05,0.0,0.0
1,./preprocess/37756_left.jpeg,0.981119,0.018881,0.0,0.0
2,./preprocess/17342_left.jpeg,0.999998,3e-06,0.0,0.0
3,./preprocess/28308_right.jpeg,0.005368,0.994632,1.0,1.0
4,./preprocess/14190_right.jpeg,0.002089,0.997911,1.0,1.0


In [46]:
data.to_csv("result.csv", index=False)