In [69]:
import os
import sys
import gc
import time
import random
import cv2
import glob

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from PIL import Image
from multiprocessing import cpu_count
from tqdm import tqdm_notebook as tqdm

import torch
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optim
import torch.nn.functional as F
import torchvision.models as models
import torchvision.transforms as transforms

from torch.utils.data import DataLoader, Dataset, SubsetRandomSampler

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score

import albumentations as A
from albumentations import torch as AT

In [35]:
config = {
    # settings
    'dir_path': os.path.join('..', 'input', 'aptos2019-blindness-detection'),
    'seed': 43,
    'num_workers': cpu_count(),
    
    # optimizer
    'optimizer_name': 'Adam',
    'lr': 0.0001,
    
    # model
    'model_name': 'resnet18',
    'weight_path': os.path.join('..', 'input', 'resnet18', 'resnet18.pth'),
    
    # loss
    'loss_name': 'CrossEntropy',
    
    # transform
    'library': 'albumentations',
    
    'pytorch': {
        'resize': True,
        'randomHorizontalFlip': True,
        'randomRotation': True,
        'toTensor': True,
        'normalize': True,
    },
    
    'albumentations': {
        'resize': True,
        'horizontalFlip': True,
        'rotate': True,
        'randomBrightness': True,
        'randomContrast': True,
        'randomBrightnrssContrast': True,
        'hueSaturationValue': True,
        'toTensor': True,
        'normalize': True
    },
    
    # train & validation dataset
    'image_size': 256,
    
    # train settings
    'epochs': 100,
    'patience': 10,
    'batch_size': 32,
    'valid_size': 0.1,
}

In [36]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

def get_model(model_name='resnet34', pretrained=False):
    
    if model_name == 'resnet18':
        model = models.resnet18(pretrained=pretrained)
        
    elif model_name == 'resnet34':
        model = models.resnet34(pretrained=pretrained)
        
    elif model_name == 'resnet50':
        model = models.resnet50(pretrained=pretrained)
        
    elif model_name == 'resnet101':
        model = models.resnet101(pretrained=pretrained)
        
    elif model_name == 'resnet152':
        model = models.resnet152(pretrained=pretrained)
    
    elif model_name == 'vgg11_bn':
        model = models.vgg11_bn(pretrained=pretrained)
    
    elif model_name == 'vgg13_bn':
        model = models.vgg13_bn(pretrained=pretrained)
    
    elif model_name == 'vgg16_bn':
        model = models.vgg16_bn(pretrained=pretrained)

    elif model_name == 'vgg19_bn':
        model = models.vgg19_bn(pretrained=pretrained)
        
    return model
    
def get_optimizer(optimizer_name='Adam', params=None, lr=0.0001): 
    
    if optimizer_name == 'SGD':
        optimizer = optim.SGD(params=params, lr=lr)
    
    elif optimizer_name == 'Adam':
        optimizer = optim.Adam(params=params, lr=lr)
    
    elif optimizer_name == 'Adagrad':
        optimizer = optim.Adagrad(params=params, lr=lr)
    
    elif optimizer_name == 'Adadelta':
        optimizer = optim.Adadelta(params=params, lr=lr)
    
    elif optimizer_name == 'RMSprop':
        optimizer = optim.RMSprop(params=params, lr=lr)
    
    return optimizer

def get_loss(loss_name='CrossEntropy'):
    
    if loss_name == 'MSE':
        loss = nn.MSELoss()
        
    elif loss_name == 'CrossEntropy':
        loss = nn.CrossEntropyLoss()
        
    elif loss_name == 'BCE':
        loss = nn.BCELoss()
    
    elif loss_name == 'BCEWithLogits':
        loss = nn.BCEWithLogitsLoss()
        
    return loss

def get_label_data(train):
    y = train['duagnosis'].values
    
    return y
        
def get_train_data(dir_path='../input', transform=None, batch_size=32, num_workers=4):
    train = pd.read_csv(os.path.join(dir_path, 'train.csv'))
    y = get_label_data(train)
    
    train_dataset = TrainDataset(train=train, transform=transform, y=y)
    tr, val = train_test_split(train.diagnosis, stratify=train.diagnosis, test_size=0.1)

    train_sampler = SubsetRandomSampler(list(tr.index))
    valid_sampler = SubsetRandomSampler(list(val.index))
    train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler, num_workers=num_workers)
    valid_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=valid_sampler, num_workers=num_workers)

    return train_loader, valid_loader
    
def get_test_data(dir_path='../input', transform=None, batch_size=32, num_workers=4):
    test = pd.read_csv(os.path.join(dir_path, 'test.csv'))

    test_dataset = TestDataset(test=test, transform=transform)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=num_workers)
    
    return test_loader

In [67]:
class DRModel(nn.Module):
    
    def __init__(self, model, weight_path):
        
        super(DRModel, self).__init__()
        self.model = model
        self.model.load_state_dict(torch.load(weight_path))
        
    def forward(self, x):
        
        x = self.model(x)
        x = F.softmax(x)
        
        return x
    

class DRResnetModel(DRModel):
    
    def __init__(self, model, weight_path):
        
        super(DRResnetModel, self).__init__(model, weight_path)
        self.model.fc = nn.Linear(in_features=self.model.fc.in_features, out_features=5, bias=True)
        
        
class DRVggModel(DRModel):
    
    def __init__(self, model, weight_path):
        
        super(DRVggModel, self).__init__(model, weight_path)    

In [68]:
def crop_image_from_gray(img,tol=7):
    
    if img.ndim == 2:
        mask = img > tol
        return img[np.ix_(mask.any(1), mask.any(0))]
   
    elif img.ndim == 3:
        gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        mask = gray_img > tol
        
        check_shape = img[:,:,0][np.ix_(mask.any(1), mask.any(0))].shape[0]
        if (check_shape == 0): 
            return img
        
        else:
            img1=img[:,:,0][np.ix_(mask.any(1), mask.any(0))]
            img2=img[:,:,1][np.ix_(mask.any(1), mask.any(0))]
            img3=img[:,:,2][np.ix_(mask.any(1), mask.any(0))]
            img = np.stack([img1,img2,img3],axis=-1)
        
        return img
    
def preprocess(image_name):
    image = cv2.imread(image_name)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = crop_image_from_gray(image)
    image = cv2.resize(image, (IMAGE_SIZE, IMAGE_SIZE))
    image = cv2.addWeighted(image, 4, cv2.GaussianBlur(image, (0, 0), 30), -4, 128)

    img = transforms.ToPILImage()(image)

    return image

In [37]:
class TrainDataset(Dataset):
    
    def __init__(self, train, path='aptos2019-blindness-detection', transform=None, y=None):
        
        self.image_name_list = [os.path.join('..', 'input', f'{path}', 'train_images', f'{i}.png') for i in tqdm(train['id_code'].values)]
        # TODO: multiprocessing
        self.image_list = [preprocess(image_name) for image_name in tqdm(self.image_name_list)]
        self.labels = y
        self.transform = transform
        
    def __len__(self):
        
        return len(self.image_name_list)
    
    def __getitem__(self, idx):
        image_name = self.image_name_list[idx]
        image = self.image_list[idx]
        
        if config['library'] == 'pytorch':
            image = transforms.ToPILImage()(image)
        
        image = self.transform(image)
        
        if config['library'] == 'albumentations':
            image = image['image']
            
        label = self.labels[idx]
        
        return image, label
    
    
class TestDataset(Dataset):
    
    def __init__(self, test, path='aptos2019-blindness-detection', transform=None, y=None):
        
        self.image_name_list = [os.path.join('..', 'input', f'{path}', 'test_images', f'{i}.png') for i in tqdm(test['id_code'].values)]
        self.labels = np.zeros((len(self.image_name_list), 5))
        self.transform = transform
        
    def __len__(self):
        
        return len(self.image_name_list)
    
    def __getitem__(self, idx):
        image_name = self.image_name_list[idx]
        image = preprocess(image_name)
        
        if config['library'] == 'pytorch':
            image = transforms.ToPILImage()(image)
        
        image = self.transform(image)
        
        if config['library'] == 'albumentations':
            image = image['image']
            
        label = self.labels[idx]
        
        return image, label, image_name

In [70]:
def train(model, criterion, optimizer, train_loader, device):
    model.train()
    
    running_loss = 0
    for _, (data, target) in enumerate(train_loader):
        if device == 'cuda':
            data, target = data.cuda(), target.cuda()
        
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output.float(), target)
        running_loss += loss.data
        
        loss.backward()
        optimizer.step()
        
    return running_loss / len(train_loader)

In [71]:
def valid(model, criterion, optimizer, valid_loader, device):
    model.eval()
    
    running_loss = 0
    for _, (data, target) in enumerate(valid_loader):
        if device == 'cuda':
            data, target = data.cuda(), target.cuda()
        
        output = model(data)
        loss = criterion(output.float(), target)
        running_loss += loss.data
        
    # TODO: implementation
#         output = output.cpu().detach().numpy()
#         target = target.cpu().detach().numpy()
#         res = np.zeros(output.shape[0])
#         for i, e in enumerate(output):
#             res[i] = np.argmax(e)
        
#         score = 1 - cohen_kappa_score(res, target)
#         print(score)
        
    return running_loss / len(valid_loader)

In [72]:
def test(model, test_loader, device, sub):
    model.eval()
    
    for (data, _, name) in test_loader:
        if device == 'cuda':
            data = data.cuda()
            
        output = model(data)
        output = output.cpu().detach().numpy()
        
        for i, (e, n) in enumerate(list(zip(output, name))):
#             sub.loc[sub['id_code'] == n.split('/')[-1].split('.')[0], 'diagnosis'] = le.inverse_transform([np.argmax(e)])
            sub.loc[sub['id_code'] == n.split('/')[-1].split('.')[0], 'diagnosis'] = np.argmax(e)
    
    return sub

In [73]:
class EarlyStopping:

    def __init__(self, patience=10, verbose=False):
 
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf

    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):

        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), 'best.pth')
        self.val_loss_min = val_loss

In [None]:
def main():
    seed_everything(config['seed'])
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    # TODO: config
    train_transforms = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation((-180, 180)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
    
    train_loader, valid_loader = get_train_data(dir_path=config['dir_path'], transform=train_transforms, batch_size=config['batch_size'], num_workers=config['num_workers'])
    
    if config['model_name'].startswith('resnet'):
        model = DRResnetModel(get_model(model_name=config['model_name']), config['weight_path'])
    else:
        # TODO: implementaion
        model = DRVggModel(get_model(model_name=config['model_name']), config['weight_path'])
    
    criterion = get_loss(loss_name=config['loss_name'])
    optimizer = get_optim(optimizer_name=config['optimizer_name'], params=model.parameters(), lr=config['lr'])
    
    early_stopping = EarlyStopping(patience=config['patience'], verbose=True)
    for epoch in tqdm(range(config['epochs'])):
        train_loss = train(model, criterion, optimizer, train_loader, device)
        val_loss = valid(model, criterion, optimizer, valid_loader, device)

        print('epoch {:d}, loss: {:.4f} val_loss: {:.4f}'.format(epoch, train_loss, val_loss))

        early_stopping(val_loss, model)
        if early_stopping.early_stop:
            print("Early stopping")
            break

            
    del train_loader, valid_loader
    gc.collect()
    
    test_transforms = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
    
    test_loader = get_test_data(dir_path=config['dir_path'], transform=test_transforms, batch_size=config['batch_size'], num_workers=config['num_workers'])
    
    sub = pd.read_csv(os.path.join(config['dir_path'], 'sample_submission.csv'))
    sub = test(model, test_loader, device, sub)
    sub.to_csv('submission.csv', index=False)