In [None]:
import os
import sys
import glob
import cv2
import time
import random

import pandas as pd
import numpy as np
from PIL import Image
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm_notebook as tqdm

import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import TensorDataset, DataLoader, Dataset
import torchvision
import torchvision.transforms as transforms

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score

import albumentations
from albumentations import torch as at

In [None]:
import warnings
warnings.filterwarnings('ignore')

PATH = os.path.join('..', 'input')

if torch.cuda.is_available():
    print('Cuda is available. GPU MODE!')
    device = 'cuda'
else:
    print('Cuda is not available. CPU MODE!')
    device = 'cpu'
    
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(43)

IMG_SIZE = 256

In [None]:
train = pd.read_csv(os.path.join(PATH, 'train.csv'))
test = pd.read_csv(os.path.join(PATH, 'test.csv'))

In [None]:
TEST_SIZE = test.shape[0]

In [None]:
# def prepare_labels(y):
#     label_encoder = LabelEncoder()
#     integer_encoded = label_encoder.fit_transform(y)

#     onehot_encoder = OneHotEncoder(sparse=False)
#     integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
#     onehot_encoded = onehot_encoder.fit_transform(integer_encoded)

#     y = onehot_encoded
#     return y, label_encoder

# y, le = prepare_labels(train['diagnosis'].values)

In [None]:
# y = pd.get_dummies(train['diagnosis']).values

# for i in tqdm(range(len(y))):
#     idx = np.argmax(y)
#     y[i][idx] += 1
#     if idx == 0:
#         y[i][idx + 1] += 1
#     elif idx == 4:
#         y[i][idx - 1] += 1
#     else:
#         y[i][idx + 1] += 1
#         y[i][idx - 1] += 1

In [None]:
y = train['diagnosis'].values

In [None]:
def crop_image_from_gray(img,tol=7):
    
    if img.ndim == 2:
        mask = img > tol
        return img[np.ix_(mask.any(1), mask.any(0))]
   
    elif img.ndim == 3:
        gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        mask = gray_img > tol
        
        check_shape = img[:,:,0][np.ix_(mask.any(1), mask.any(0))].shape[0]
        if (check_shape == 0): 
            return img
        
        else:
            img1=img[:,:,0][np.ix_(mask.any(1), mask.any(0))]
            img2=img[:,:,1][np.ix_(mask.any(1), mask.any(0))]
            img3=img[:,:,2][np.ix_(mask.any(1), mask.any(0))]
            img = np.stack([img1,img2,img3],axis=-1)
        
        return img

In [None]:
class DRDataset(Dataset):
      
    def __init__(self, df, datatype='train', transform=None, y=None):
        
        self.df = df
        self.datatype = datatype
        self.image_files_list = [f'../input/aptos2019-blindness-detection/{self.datatype}_images/{i}.png' for i in df['id_code'].values]
        self.image_list = [preprocess(img_name) for img_name in self.image_files_list]
        
        if self.datatype == 'train':
            self.labels = y
        else:
            self.labels = np.zeros((df.shape[0], 5))
        
        self.transform = transform
        
    def __len__(self):
        
        return len(self.image_files_list)
    
    def __getitem__(self, idx):
        img_name = self.image_files_list[idx]
        img = self.image_list[idx]
        img = self.transform(img)        
        label = self.labels[idx]
        if self.datatype == 'train':
            return img, label
        else:
            return img, label, img_name
        
def preprocess(img_name):
    img = cv2.imread(img_name)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = crop_image_from_gray(img)
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
    img = cv2.addWeighted(img, 4, cv2.GaussianBlur(img, (0, 0), 30), -4, 128)

    img = transforms.ToPILImage()(img)
    
    return img

In [None]:
train_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation((-180, 180)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

test_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

# train_transforms = albumentations.Compose([
#     albumentations.Resize(256, 256),
#     albumentations.HorizontalFlip(),
#     albumentations.ShiftScaleRotate(rotate_limit=[0, 1], scale_limit=[0.10, 0.10]),
#     at.ToTensor()
#     ])

# test_transforms = albumentations.Compose([
#     albumentations.Resize(256, 256),
#     at.ToTensor()
#     ])

In [None]:
train_dataset = DRDataset(df=train, datatype='train', transform=train_transforms, y=y)
test_dataset = DRDataset(df=test, datatype='test', transform=test_transforms)

tr, val = train_test_split(train.diagnosis, stratify=train.diagnosis, test_size=0.1)

train_sampler = SubsetRandomSampler(list(tr.index))
valid_sampler = SubsetRandomSampler(list(val.index))

batch_size = 32
num_workers = 0

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler, num_workers=num_workers)
valid_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, sampler=valid_sampler, num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, num_workers=num_workers)

In [None]:
FEATURES = 512

class DRDetect(nn.Module):
    
    def __init__(self):
        
        super(DRDetect, self).__init__()
        self.model = torchvision.models.resnet18(pretrained=False)
        self.model.load_state_dict(torch.load(os.path.join('..', 'input', 'resnet18', 'resnet18.pth')))
        self.model.fc = nn.Sequential(
            nn.BatchNorm1d(FEATURES, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
            nn.Dropout(p=0.25),
            nn.Linear(in_features=FEATURES, out_features=512, bias=True),
            nn.ReLU(),
            nn.BatchNorm1d(FEATURES, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=FEATURES, out_features=5, bias=True),
        )
    
#     def make_graph():

    def forward(self, x):
    
        x = self.model(x)
        x = F.softmax(x)
        return x

In [None]:
model = DRDetect().to(device)

In [None]:
print(model)

In [None]:
lr = 0.0001
# momentum = 0.99
# factor = 0.5
# patience = 5

n_epochs = 100

In [None]:
# criterion = nn.BCELoss()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
# optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
# scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, factor=factor, patience=patience)

In [None]:
def train(model, criterion, optimizer, train_loader, device):
    model.train()
    
    running_loss = 0
    for _, (data, target) in enumerate(train_loader):
        if device == 'cuda':
            data, target = data.cuda(), target.cuda()
        
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output.float(), target)
        running_loss += loss.data
        
        loss.backward()
        optimizer.step()
        
    return running_loss / len(train_loader)

In [None]:
def valid(model, criterion, optimizer, valid_loader, device):
    model.eval()
    
    running_loss = 0
    for _, (data, target) in enumerate(valid_loader):
        if device == 'cuda':
            data, target = data.cuda(), target.cuda()
        
        output = model(data)
        loss = criterion(output.float(), target)
        running_loss += loss.data
        
#         output = output.cpu().detach().numpy()
#         target = target.cpu().detach().numpy()
#         res = np.zeros(output.shape[0])
#         for i, e in enumerate(output):
#             res[i] = np.argmax(e)
        
#         score = 1 - cohen_kappa_score(res, target)
#         print(score)
        
    return running_loss / len(valid_loader)

In [None]:
def test(model, test_loader, device, sub):
    model.eval()
    
    for (data, _, name) in test_loader:
        if device == 'cuda':
            data = data.cuda()
            
        output = model(data)
        output = output.cpu().detach().numpy()
        
        for i, (e, n) in enumerate(list(zip(output, name))):
#             sub.loc[sub['id_code'] == n.split('/')[-1].split('.')[0], 'diagnosis'] = le.inverse_transform([np.argmax(e)])
            sub.loc[sub['id_code'] == n.split('/')[-1].split('.')[0], 'diagnosis'] = np.argmax(e)
    
    return sub

In [None]:
class EarlyStopping:

    def __init__(self, patience=10, verbose=False):
 
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf

    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):

        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), 'best.pth')
        self.val_loss_min = val_loss

In [None]:
early_stopping = EarlyStopping(patience=15, verbose=True)
for epoch in tqdm(range(n_epochs)):
    train_loss = train(model, criterion, optimizer, train_loader, device)
    val_loss = valid(model, criterion, optimizer, valid_loader, device)

    print('epoch {:d}, loss: {:.4f} val_loss: {:.4f}'.format(epoch, train_loss, val_loss))

    early_stopping(val_loss, model)
    if early_stopping.early_stop:
        print("Early stopping")
        break

# if TEST_SIZE == 1928:
#     early_stopping = EarlyStopping(patience=15, verbose=True)
#     for epoch in tqdm(range(n_epochs)):
#         train_loss = train(model, criterion, optimizer, train_loader, device)
#         val_loss = valid(model, criterion, optimizer, valid_loader, device)

#         print('epoch {:d}, loss: {:.4f} val_loss: {:.4f}'.format(epoch, train_loss, val_loss))

#         early_stopping(val_loss, model)
#         if early_stopping.early_stop:
#             print("Early stopping")
#             break
# else:
#     model.load_state_dict(torch.load('best.pth'))
#     model.cuda()

In [None]:
sub = pd.read_csv(os.path.join(PATH, 'sample_submission.csv'))
sub = test(model, test_loader, device, sub)
sub.to_csv('submission.csv', index=False)