In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os

# Any results you write to the current directory are saved as output.

In [2]:
from PIL import Image
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import torch
from torch import nn, optim
import torch.nn.functional as F
import torch.utils.data as utils
from torchvision import transforms, models

In [3]:
height = 224
width = height * 1.5
dataset_path = "./preprocess"

In [4]:
dataset_path = '/media/ryan/Ryan 1TB/data/fundus-caothang'
file_paths = []
for filename in os.listdir(dataset_path):
    if filename.endswith('jpg') and filename[0] != '.':
        file_paths.append(os.path.join(dataset_path, filename))

In [5]:
class FundusDataset(utils.Dataset):   
    def __init__(self, image_paths, transform=None):
        self.image_paths_list = image_paths 
        # List of image paths      
        self.labels_list = [] 
        # List of labels correlated      
        self.transform = transform 
        # Transformation applying to each data piece            
        # Run through the folder and get the label of each image inside  
        for filename in image_paths:
            self.labels_list.append(0 if 'left' in filename else 1)
        
    def __getitem__(self, index):      
        '''      Is called when get DataLoader iterated      '''      
        # Get image path with index      
        image_path = self.image_paths_list[index]      
        # Read image with Pillow library      
        image = Image.open(image_path).convert('RGB')      
        # Get label      
        image_label = self.labels_list[index]      
        # Post-transformation apply for image      
        if self.transform != None:          
            image = self.transform(image)            
        return image, image_label, image_path      
    def __len__(self):      
        return len(self.image_paths_list)

In [6]:
transform = transforms.Compose([transforms.Resize((int(width), int(height))),                                
                                transforms.ToTensor(),                                
                                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) 

In [7]:
BATCH_SIZE = 64

In [8]:
test_dataset = FundusDataset(file_paths, transform)
testloader = utils.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [9]:
class FundusNet(nn.Module):
    def __init__(self, is_trained):
        super().__init__()
        self.resnet = models.resnet18(pretrained=is_trained)
        kernel_count = self.resnet.fc.in_features
        self.resnet.fc = nn.Sequential(nn.Linear(2560, 2),nn.Sigmoid())
    def forward(self, x):
        x = self.resnet(x)
        return x

In [10]:
train_on_gpu = torch.cuda.is_available()
if not train_on_gpu:    
    print('CUDA is not available.  Training on CPU ...')
else:    
    print('CUDA is available!  Training on GPU ...')

CUDA is available!  Training on GPU ...


In [11]:
from torch.optim.lr_scheduler import ReduceLROnPlateau
model = FundusNet(True)
if train_on_gpu:
    model = torch.nn.DataParallel(model).cuda()
state_dict = torch.load('best_model.pth')
model.load_state_dict(state_dict)
loss = nn.BCELoss(size_average = True)
optimizer = optim.Adam (model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-5)
scheduler = ReduceLROnPlateau(optimizer, factor = 0.1, patience = 5, mode = 'min', verbose=True)



In [12]:
groundtruths = []
predictions = []
probabilities = []

In [13]:
_ = model.eval()

In [14]:
import sys
import pandas as pd
accuracy = 0
predictions = {}
with torch.no_grad():
    for step, (images, labels, paths) in enumerate(testloader):
        images, labels = images.cuda(), labels.cuda()
        path_lists = list(paths)
        for path in path_lists:
            predictions[path] = {}
        #for idx,label in enumerate(labels.cpu().view(-1).numpy().tolist()):
            #predictions[path_lists[idx]]['gt'] = label
        output = model.forward(images)
        ps = output
        for idx, prob in enumerate(ps.cpu().view(-1,2).numpy().tolist()):
            predictions[path_lists[idx]]['prob'] = prob
        top_p, top_class = ps.topk(1, dim=1)
        for idx,pred in enumerate(top_class.cpu().view(-1).numpy().tolist()):
            predictions[path_lists[idx]]['pred'] = pred
        #equals = top_class == labels.view(*top_class.shape)
        #accuracy += torch.mean(equals.type(torch.FloatTensor))
        sys.stdout.write(f"\rStep {step+1}/{len(testloader)}")
#print(f"\nTest accuracy: {accuracy/len(testloader)}")

Step 33/33

In [15]:
import pandas as pd
#data = pd.DataFrame({'image': [], 'prob_left':[], 'prob_right': [], 'prediction': [],'groundtruth': []})
data = pd.DataFrame({'image': [], 'prob_left':[], 'prob_right': [], 'prediction': []})#,'groundtruth': []})

In [16]:
for file, val in predictions.items():
    row = {'image': file, 'prob_left': val['prob'][0], 'prob_right': val['prob'][1], 'prediction': val['pred']}
    data = data.append(row, ignore_index=True)
data.head()

Unnamed: 0,image,prob_left,prob_right,prediction
0,/media/ryan/Ryan 1TB/data/fundus-caothang/2018...,0.008113,0.992014,1.0
1,/media/ryan/Ryan 1TB/data/fundus-caothang/2018...,0.041668,0.958,1.0
2,/media/ryan/Ryan 1TB/data/fundus-caothang/2016...,0.901916,0.096034,0.0
3,/media/ryan/Ryan 1TB/data/fundus-caothang/2016...,0.064806,0.933829,1.0
4,/media/ryan/Ryan 1TB/data/fundus-caothang/2016...,0.644123,0.352224,0.0


In [17]:
data.to_csv("caothang-result-bce.csv", index=False)

In [18]:
data.head()

Unnamed: 0,image,prob_left,prob_right,prediction
0,/media/ryan/Ryan 1TB/data/fundus-caothang/2018...,0.008113,0.992014,1.0
1,/media/ryan/Ryan 1TB/data/fundus-caothang/2018...,0.041668,0.958,1.0
2,/media/ryan/Ryan 1TB/data/fundus-caothang/2016...,0.901916,0.096034,0.0
3,/media/ryan/Ryan 1TB/data/fundus-caothang/2016...,0.064806,0.933829,1.0
4,/media/ryan/Ryan 1TB/data/fundus-caothang/2016...,0.644123,0.352224,0.0


In [19]:
def get_label(prob_left, prob_right):
    thresholds = [0.1, 0.3, 0.5, 0.7, 0.9]
    label = ""
    for idx, threshold in enumerate(thresholds):
        if prob_left < threshold:
            label = str(idx)
            break
    if label == "":
        label = str(len(thresholds))
    old_len = len(label)
    for idx, threshold in enumerate(thresholds):
        if prob_right < threshold:
            label += str(idx)
            break
    if len(label) == old_len:
        label += str(len(thresholds))
    return label

In [21]:
import sys, os
for file, val in predictions.items():
    label = get_label(val['prob'][0], val['prob'][1])
    #image_name = file.split('/')[-1]
    !mkdir -p caothang_result/{label}
    !cp "{file}" caothang_result/{label}