In [3]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os

# Any results you write to the current directory are saved as output.

In [4]:
from PIL import Image
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import torch
from torch import nn, optim
import torch.nn.functional as F
import torch.utils.data as utils
from torchvision import transforms, models

In [5]:
height = 224
width = height * 1.5
dataset_path = "./preprocess"

In [4]:
file_paths = []
for filename in os.listdir(dataset_path):
    if 'left' in filename or 'right' in filename:
        file_paths.append(os.path.join(dataset_path, filename))

In [5]:
import random
from random import shuffle
random.seed(2019)

In [6]:
shuffle(file_paths)

In [7]:
train_ratio = 0.8
val_ratio = 0.1
test_ratio = 0.1
train_files = file_paths[:int(len(file_paths)*train_ratio)]
val_files = file_paths[int(len(file_paths)*train_ratio):int(len(file_paths)*(train_ratio + val_ratio))]
test_files = file_paths[int(len(file_paths)*(train_ratio + val_ratio)):]

In [8]:
class FundusDataset(utils.Dataset):   
    def __init__(self, image_paths, transform=None):
        self.image_paths_list = image_paths 
        # List of image paths      
        self.labels_list = [] 
        # List of labels correlated      
        self.transform = transform 
        # Transformation applying to each data piece            
        # Run through the folder and get the label of each image inside  
        for filename in image_paths:
            self.labels_list.append([1,0] if 'left' in filename else [0,1])
        
    def __getitem__(self, index):      
        '''      Is called when get DataLoader iterated      '''      
        # Get image path with index      
        image_path = self.image_paths_list[index]      
        # Read image with Pillow library      
        image = Image.open(image_path).convert('RGB')      
        # Get label      
        image_label = torch.FloatTensor(self.labels_list[index])
        # Post-transformation apply for image      
        if self.transform != None:          
            image = self.transform(image)            
        return image, image_label      
    def __len__(self):      
        return len(self.image_paths_list)

In [9]:
transform = transforms.Compose([transforms.Resize((int(width), int(height))),                                
                                transforms.ToTensor(),                                
                                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) 

In [10]:
BATCH_SIZE = 128

In [11]:
train_dataset = FundusDataset(train_files, transform)
trainloader = utils.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [12]:
val_dataset = FundusDataset(val_files, transform)
valloader = utils.DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [13]:
test_dataset = FundusDataset(test_files, transform)
testloader = utils.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [14]:
class FundusNet(nn.Module):
    def __init__(self, is_trained):
        super().__init__()
        self.resnet = models.resnet18(pretrained=is_trained)
        kernel_count = self.resnet.fc.in_features
        self.resnet.fc = nn.Sequential(nn.Linear(2560, 2),nn.Sigmoid())
    def forward(self, x):
        x = self.resnet(x)
        return x

In [15]:
train_on_gpu = torch.cuda.is_available()
if not train_on_gpu:    
    print('CUDA is not available.  Training on CPU ...')
else:    
    print('CUDA is available!  Training on GPU ...')

CUDA is available!  Training on GPU ...


In [16]:
from torch.optim.lr_scheduler import ReduceLROnPlateau
model = FundusNet(True)
if train_on_gpu:
    model = nn.DataParallel(model).cuda()
#state_dict = torch.load('best_model.pth')
#model.load_state_dict(state_dict)
loss = nn.BCELoss(size_average = True)
optimizer = optim.Adam (model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-5)
scheduler = ReduceLROnPlateau(optimizer, factor = 0.1, patience = 5, mode = 'min', verbose=True)



In [19]:
import sys

epochs = 100
train_losses, val_losses = [], []
best_loss = 999999999
for e in range(epochs):
    running_loss = 0
    for step, (images, labels) in enumerate(trainloader):
        
        if train_on_gpu:               
            images, labels = images.cuda(), labels.cuda()
        optimizer.zero_grad()
        ps = model(images)            
        loss_val = loss(ps, labels)
        loss_val.backward()            
        optimizer.step()
        running_loss += loss_val.item()
        sys.stdout.write(f"\rEpoch {e+1}/{epochs}... Training step {step+1}/{len(trainloader)}... Loss {running_loss/(step+1)}")
    else:
        val_loss = 0            
        accuracy = 0
        with torch.no_grad():                
            for step, (images, labels) in enumerate(valloader):                    
                if train_on_gpu:                       
                    images, labels = images.cuda(), labels.cuda()                    
                log_ps = model(images)
                val_loss += loss(log_ps, labels)
                ps = torch.exp(log_ps)                    
                top_p, top_class = ps.topk(1, dim=1)                    
                equals = top_class == torch.argmax(labels, dim=1).view(*top_class.shape)
                accuracy += torch.mean(equals.type(torch.FloatTensor))
                sys.stdout.write(f"\rEpoch {e+1}/{epochs}... Validating step {step+1}/{len(valloader)}... Loss {val_loss/(step+1)}")
        train_losses.append(running_loss/len(trainloader))
        val_losses.append(val_loss/len(valloader))
        scheduler.step(val_loss/len(valloader))
        print("\nEpoch: {}/{}.. ".format(e+1, epochs),                  
              "Training Loss: {:.3f}.. ".format(running_loss/len(trainloader)),                  
              "Val Loss: {:.3f}.. ".format(val_loss/len(valloader)),                  
              "Val Accuracy: {:.3f}".format(accuracy/len(valloader)))
        if best_loss > val_loss/len(valloader):
            print("Improve loss of model from {} to {}".format(best_loss, val_loss/len(valloader)))
            best_loss = val_loss/len(valloader)
            torch.save(model.state_dict(), 'best_model.pth')

Epoch 1/100... Validating step 70/70... Loss 0.050665594637393956
Epoch: 1/100..  Training Loss: 0.049..  Val Loss: 0.051..  Val Accuracy: 0.986
Improve loss of model from 999999999 to 0.05066559463739395
Epoch 2/100... Validating step 70/70... Loss 0.048595219850540165
Epoch: 2/100..  Training Loss: 0.045..  Val Loss: 0.049..  Val Accuracy: 0.985
Improve loss of model from 0.05066559463739395 to 0.04859521985054016
Epoch 3/100... Validating step 70/70... Loss 0.045802999287843704
Epoch: 3/100..  Training Loss: 0.042..  Val Loss: 0.046..  Val Accuracy: 0.988
Improve loss of model from 0.04859521985054016 to 0.045802999287843704
Epoch 4/100... Validating step 70/70... Loss 0.047558862715959554
Epoch: 4/100..  Training Loss: 0.042..  Val Loss: 0.048..  Val Accuracy: 0.986
Epoch 5/100... Validating step 70/70... Loss 0.049132466316223145
Epoch: 5/100..  Training Loss: 0.040..  Val Loss: 0.049..  Val Accuracy: 0.986
Epoch 6/100... Validating step 70/70... Loss 0.048398818820714954
Epoch: 6

KeyboardInterrupt: 

In [None]:
torch.save(model.state_dict(), 'best_model.pth')

In [None]:
print("a")