This Kernel was inspired by https://www.kaggle.com/demonplus/fast-ai-starter-with-resnet-50


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
import sys
print(os.listdir('../input'))

# Any results you write to the current directory are saved as output.

In [None]:
import torch
from tqdm import tqdm, tqdm_notebook
import random, torch, os, numpy as np
from torchvision.datasets import CIFAR10
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
from torch.nn import CrossEntropyLoss
import scipy as sp

torch.backends.cudnn.deterministic = True

# def seed_everything(seed):
#     random.seed(seed)
#     os.environ['PYTHONHASHSEED'] = str(seed)
#     np.random.seed(seed)
#     torch.manual_seed(seed)
#     torch.cuda.manual_seed(seed)
#     torch.backends.cudnn.deterministic = True


# seed_everything(999)
torch.cuda.manual_seed(999)

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from zipfile import ZipFile
from fastai.vision import *
from fastai.metrics import error_rate
from fastai.vision import *
from fastai.callbacks import *
from sklearn.metrics import confusion_matrix, cohen_kappa_score

In [None]:
import PIL
import cv2

bs = 96

!ls

In [None]:
def crop_image1(img,tol=7):
    # img is image data
    # tol  is tolerance
        
    mask = img>tol
    return img[np.ix_(mask.any(1),mask.any(0))]

def crop_image_from_gray(img,tol=7):
    if img.ndim ==2:
        mask = img>tol
        return img[np.ix_(mask.any(1),mask.any(0))]
    elif img.ndim==3:
        gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        mask = gray_img>tol
        
        check_shape = img[:,:,0][np.ix_(mask.any(1),mask.any(0))].shape[0]
        if (check_shape == 0): # image is too dark so that we crop out everything,
            return img # return original image
        else:
            img1=img[:,:,0][np.ix_(mask.any(1),mask.any(0))]
            img2=img[:,:,1][np.ix_(mask.any(1),mask.any(0))]
            img3=img[:,:,2][np.ix_(mask.any(1),mask.any(0))]
    #         print(img1.shape,img2.shape,img3.shape)
            img = np.stack([img1,img2,img3],axis=-1)
    #         print(img.shape)
        return img

def load_ben_color(path, sigmaX=10):
    image = cv2.imread(path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = crop_image_from_gray(image)
    image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
    image=cv2.addWeighted ( image,4, cv2.GaussianBlur( image , (0,0) , sigmaX) ,-4 ,128)
        
    return image

IMG_SIZE = 512

def _load_format(path, convert_mode, after_open)->Image:
    image = cv2.imread(path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = crop_image_from_gray(image)
    image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
    image=cv2.addWeighted ( image,4, cv2.GaussianBlur( image , (0,0), 10) ,-4 ,128)
                    
    return Image(pil2tensor(image, np.float32).div_(255)) #return fastai Image format

vision.data.open_image = _load_format

In [None]:
# # copy pretrained weights for resnet50 to the folder fastai will search by default
Path('/tmp/.cache/torch/checkpoints/').mkdir(exist_ok=True, parents=True)
# !cp '../input/resnet50/resnet50.pth' '/tmp/.cache/torch/checkpoints/resnet50-19c8e357.pth'
#resnet50-19c8e357
#resnet34-333f7ec4

In [None]:
print(os.listdir('../input/resized-2015-2019-blindness-detection-images/'))

In [None]:
def get_df():
    base_image_dir = os.path.join('..', 'input/resized-2015-2019-blindness-detection-images/')
    train_dir = os.path.join(base_image_dir,'resized train 15/')
    df = pd.read_csv(os.path.join(base_image_dir, 'labels/trainLabels15.csv'))
    df['path'] = df['image'].map(lambda x: os.path.join(train_dir,'{}.jpg'.format(x)))
    df = df.drop(columns=['image'])
    df = df.sample(frac=1).reset_index(drop=True) #shuffle dataframe
    test_df = pd.read_csv('../input/aptos2019-blindness-detection/sample_submission.csv')
    return df, test_df

df, test_df = get_df()

In [None]:
print(df.head())
df.hist()

In [None]:
df = df.rename(columns={"level": "diagnosis"})

In [None]:
remove_n = 1000
drop_indices = np.random.choice(df[df.diagnosis==0].index, remove_n, replace=False)
df_sub = df.drop(drop_indices)

In [None]:
# create Stratified validation split (12.50%)
#fastai does not include stratify option in train test data split, however according to the lecturer, 
#imbalance classifiers will be handle by the deep learning quite well, not sure this is true in this case
from sklearn.model_selection import StratifiedKFold
cv = StratifiedKFold(n_splits=5, random_state=999)
tr_ids, val_ids = next(cv.split(df_sub.path, df_sub.diagnosis))
print(len(tr_ids), len(val_ids))
_ = df_sub.loc[val_ids].hist()

In [None]:
# print(val_ids)
# print(tr_ids)

In [None]:
# import zipfile
# with zipfile.ZipFile('./train_images.zip', 'r') as zip_ref:
#     zip_ref.extractall('./train_images')

In [None]:
# with zipfile.ZipFile('./test_images.zip', 'r') as zip_ref:
#     zip_ref.extractall('./test_images')

In [None]:
tfms = ([
#     RandTransform(tfm=TfmCrop (crop_pad), kwargs={'row_pct': (0.3, 1), 'col_pct': (0.1, 0.9),\
#                                                        'padding_mode': 'reflection'}, p=1.0, resolved={}, do_run=True, is_random=True, use_on_y=True),
#   RandTransform(tfm=TfmPixel (rgb_randomize), kwargs={'channel':0, 'thresh':0.1}, p=0.75, resolved={}, do_run=True, is_random=True, use_on_y=True),
#   RandTransform(tfm=TfmPixel (rgb_randomize), kwargs={'channel':2, 'thresh':0.1}, p=0.75, resolved={}, do_run=True, is_random=True, use_on_y=True),
  RandTransform(tfm=TfmAffine (dihedral_affine), kwargs={}, p=1.0, resolved={}, do_run=True, is_random=True, use_on_y=True),
  RandTransform(tfm=TfmAffine (rotate), kwargs={'degrees': (-10.0, 10.0)}, p=0.75, resolved={}, do_run=True, is_random=True, use_on_y=True),
  RandTransform(tfm=TfmAffine (zoom), kwargs={'scale': (1.0, 1.01), 'row_pct': (0, 1), 'col_pct': (0, 1)}, p=0.75, resolved={}, do_run=True, is_random=True, use_on_y=True),
  RandTransform(tfm=TfmLighting (brightness), kwargs={'change': (0.4, 0.6)}, p=0.75, resolved={}, do_run=True, is_random=True, use_on_y=True),
  RandTransform(tfm=TfmLighting (contrast), kwargs={'scale': (0.8, 1.25)}, p=0.75, resolved={}, do_run=True, is_random=True, use_on_y=True)],
  [RandTransform(tfm=TfmCrop (crop_pad), kwargs={}, p=1.0, resolved={}, do_run=True, is_random=True, use_on_y=True)])


In [None]:
#create data using fastai ImageDataBunch function, create from image list with lable.
#simple data augmentation with flip and rotate since this is an eyeball image, the image is normalized using default imagenet_stats, another possible 
#option would be use the aptos19_stats, which not sure how to derive from yet
# create image data bunch
# 
# create image data bunch
# data = ImageDataBunch.from_df('./', 
#                               df=df, 
#                               valid_pct=0.2,
#                               folder="../input/diabetic-retinopathy-resized/resized_train",
#                               suffix=".jpeg",
#                               ds_tfms=tfms,
#                               size=224,
#                               resize_method=ResizeMethod.SQUISH,
#                               bs=96, 
#                               num_workers=0,
#                              label_col='level', label_delim=',').normalize(imagenet_stats)

In [None]:
# data.show_batch(rows=3, figsize=(7,6))

In [None]:
data = (ImageList.from_df(df=df_sub,path='./', cols='path') 
        .split_by_idx(val_ids) 
        .label_from_df(cols='diagnosis',label_cls=FloatList) 
        .transform(tfms,size=224) 
        .databunch(bs=192,num_workers=16) 
        .normalize(imagenet_stats)  
       )

In [None]:
import torchvision.models as models
wide_resnet50_2 = models.wide_resnet50_2(pretrained=False)
model = nn.Sequential(
#     nn.Conv2d(1, 8, 3, padding=1, bias=False),           # conv1
#     *wresgroup(8, 8, k=2, N=6, dropout=0.3),             # conv2
#     *wresgroup(16, 16, k=2, N=6, stride=2, dropout=0.3), # conv3
#     *wresgroup(32, 32, k=2, N=6, stride=2, dropout=0.3), # conv4
    wide_resnet50_2,
    nn.BatchNorm1d(1000),
    nn.ReLU(inplace=True),
#     nn.AvgPool2d(1),                                     # the output of the last wresgroup is 7x7
    Flatten(),
    nn.Linear(1000, 1)
)

In [None]:
import math
import torch
from torch.optim.optimizer import Optimizer, required
import itertools as it
#from torch.optim import Optimizer
#credit - Lookahead implementation from LonePatient - https://github.com/lonePatient/lookahead_pytorch/blob/master/optimizer.py
#credit2 - RAdam code by https://github.com/LiyuanLucasLiu/RAdam/blob/master/radam.py


class Ranger(Optimizer):
    
    def __init__(self, params, lr=1e-2, alpha=0.5, k=10, betas=(.9,0.999), eps=1e-8, weight_decay=1e-6):
        #parameter checks
        if not 0.0 <= alpha <= 1.0:
            raise ValueError(f'Invalid slow update rate: {alpha}')
        if not 1 <= k:
            raise ValueError(f'Invalid lookahead steps: {k}')
        if not lr > 0:
            raise ValueError(f'Invalid Learning Rate: {lr}')
        if not eps > 0:
            raise ValueError(f'Invalid eps: {eps}')
        
        #prep defaults and init torch.optim base
        defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
        super().__init__(params,defaults)
        
        #now we can get to work...
        for group in self.param_groups:
            group["step_counter"] = 0
            #print("group step counter init")
                      
        #look ahead params
        self.alpha = alpha
        self.k = k 
        
        #radam buffer for state
        self.radam_buffer = [[None,None,None] for ind in range(10)]
        
        #lookahead weights
        self.slow_weights = [[p.clone().detach() for p in group['params']]
                                for group in self.param_groups]
        
        #don't use grad for lookahead weights
        for w in it.chain(*self.slow_weights):
            w.requires_grad = False
        
    def __setstate__(self, state):
        print("set state called")
        super(Ranger, self).__setstate__(state)
       
        
    def step(self, closure=None):
        loss = None
        #note - below is commented out b/c I have other work that passes back the loss as a float, and thus not a callable closure.  
        #Uncomment if you need to use the actual closure...
        
        #if closure is not None:
            #loss = closure()
            
        #------------ radam
        for group in self.param_groups:
    
            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data.float()
                if grad.is_sparse:
                    raise RuntimeError('RAdam does not support sparse gradients')
    
                p_data_fp32 = p.data.float()
    
                state = self.state[p]
    
                if len(state) == 0:
                    state['step'] = 0
                    state['exp_avg'] = torch.zeros_like(p_data_fp32)
                    state['exp_avg_sq'] = torch.zeros_like(p_data_fp32)
                else:
                    state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32)
                    state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32)
    
                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                beta1, beta2 = group['betas']
    
                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
                exp_avg.mul_(beta1).add_(1 - beta1, grad)
    
                state['step'] += 1
                buffered = self.radam_buffer[int(state['step'] % 10)]
                if state['step'] == buffered[0]:
                    N_sma, step_size = buffered[1], buffered[2]
                else:
                    buffered[0] = state['step']
                    beta2_t = beta2 ** state['step']
                    N_sma_max = 2 / (1 - beta2) - 1
                    N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t)
                    buffered[1] = N_sma
                    if N_sma > 5:
                        step_size = group['lr'] * math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step'])
                    else:
                        step_size = group['lr'] / (1 - beta1 ** state['step'])
                    buffered[2] = step_size
    
                if group['weight_decay'] != 0:
                    p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32)
    
                if N_sma > 5:                    
                    denom = exp_avg_sq.sqrt().add_(group['eps'])
                    p_data_fp32.addcdiv_(-step_size, exp_avg, denom)
                else:
                    p_data_fp32.add_(-step_size, exp_avg)
    
                p.data.copy_(p_data_fp32)
        
        
        #---------------- end radam step
        
        #look ahead tracking and updating if latest batch = k
        for group,slow_weights in zip(self.param_groups,self.slow_weights):
            group['step_counter'] += 1
            if group['step_counter'] % self.k != 0:
                continue
            for p,q in zip(group['params'],slow_weights):
                if p.grad is None:
                    continue
                q.data.add_(self.alpha,p.data - q.data)
                p.data.copy_(q.data)
            
        
            
        return loss

optar = partial(Ranger)

In [None]:
def quad_kappa(y_pred, y):
    return torch.tensor(cohen_kappa_score(torch.round(y_pred), y, weights='quadratic'), device='cuda:0')

In [None]:
learn = Learner(data, model, metrics=quad_kappa,
#                     loss_func = MSELossFlat(),
                    opt_func = optar,
                    callback_fns=[
                              partial(EarlyStoppingCallback, monitor='quad_kappa', min_delta=0.001, patience=2),
                              partial(ReduceLROnPlateauCallback),
                              partial(SaveModelCallback, every = 'improvement', monitor='quad_kappa', name='best2015')],
                    model_dir="/tmp/")

In [None]:
!ls ../input/wide-resnet-50-2-bottleneck-linear

In [None]:
!cp '../input/wide-resnet-50-2-bottleneck-linear/wideresnet-50-2-bottleneck-linearhead.pth' '/tmp/'

In [None]:
learn.load('wideresnet-50-2-bottleneck-linearhead');
learn.to_fp16()
learn.summary;

In [None]:
# learn.lr_find()

In [None]:
# learn.recorder.plot(suggestion=True)

In [None]:
# lrs = learn.recorder.lrs
# losses = learn.recorder.losses
# mg = (np.gradient(np.array(losses))).argmin()
# ml = np.argmin(losses)
# min_grad_lr = lrs[mg]
# print(min_grad_lr)
# min_loss_lr0 = lrs[ml]/10
# print(min_loss_lr0)
min_loss_lr0 = 3e-3

In [None]:
learn.fit_one_cycle(8,min_loss_lr0)
learn.load('best2015')
# learn.destroy()

In [None]:
sample_df = pd.read_csv('../input/aptos2019-blindness-detection/sample_submission.csv')
sample_df.head()
learn.data.add_test(ImageList.from_df(sample_df,'../input/aptos2019-blindness-detection',folder='test_images',suffix='.png'))
preds1,y = learn.get_preds(DatasetType.Test)

In [None]:
# from sklearn.model_selection import StratifiedKFold
# cv = StratifiedKFold(n_splits=4, random_state=888)
# tr_ids2, val_ids2 = next(cv.split(df_sub.path, df_sub.diagnosis))
# print(len(tr_ids2), len(val_ids2))
# _ = df_sub.loc[val_ids2].hist()
# data = (ImageList.from_df(df=df_sub,path='./', cols='path') 
#         .split_by_idx(tr_ids) 
#         .label_from_df(cols='diagnosis',label_cls=FloatList) 
#         .transform(tfms,size=224) 
#         .databunch(bs=176,num_workers=16) 
#         .normalize(imagenet_stats)  
#        )
# learn.data = data
# learn.to_fp16()

In [None]:
# learn.lr_find()
# lrs = learn.recorder.lrs
# losses = learn.recorder.losses
# learn.recorder.plot(suggestion=True)
# # mg = (np.gradient(np.array(losses))).argmin()
# ml = np.argmin(losses)
# # min_grad_lr = lrs[mg]
# # print(min_grad_lr)
# min_loss_lr = lrs[ml]/10
# min_loss_lr

In [None]:
# learn.fit_one_cycle(6,5e-5)

In [None]:
torch.cuda.manual_seed(888)
base_image_dir2 = os.path.join('..', 'input/aptos2019-blindness-detection/')
train_dir2 = os.path.join(base_image_dir2,'train_images/')
df2 = pd.read_csv(os.path.join(base_image_dir2, 'train.csv'))
df2['path'] = df2['id_code'].map(lambda x: os.path.join(train_dir2,'{}.png'.format(x)))
df2 = df2.drop(columns=['id_code'])
df2 = df2.sample(frac=1).reset_index(drop=True) #shuffle dataframe

In [None]:
data = (ImageList.from_df(df=df2,path='./',cols='path') 
        .split_by_rand_pct(0.2) 
        .label_from_df(cols='diagnosis',label_cls=FloatList) 
        .transform(tfms,size=224) 
        .databunch(bs=192,num_workers=16) 
        .normalize(imagenet_stats)  
       )

In [None]:
learn.destroy()

In [None]:
learn = Learner(data, model, metrics=quad_kappa,
#                     loss_func = MSELossFlat(),
#                     opt_func = optar,
                    callback_fns=[
                              partial(EarlyStoppingCallback, monitor='quad_kappa', min_delta=0.001, patience=2),
                              partial(ReduceLROnPlateauCallback),
                              ShowGraph,
                              partial(SaveModelCallback, every = 'improvement', monitor='quad_kappa', name='best2019')],
                    model_dir="/tmp/")

In [None]:
learn.load('best2015');
# learn.data = data
learn.to_fp16()
# learn.lr_find()

In [None]:
# # learn.recorder.plot(suggestion=True)
# lrs = learn.recorder.lrs
# losses = learn.recorder.losses
# learn.recorder.plot(suggestion=True)
# mg = (np.gradient(np.array(losses))).argmin()
# ml = np.argmin(losses)
# min_grad_lr = lrs[mg]
# print(min_grad_lr)
# min_loss_lr = lrs[ml]/10
# min_loss_lr

In [None]:
# !cp '../input/newcrop/bestmodel3.pth' '/tmp/'
# !ls /tmp

In [None]:
# learn.save('stage-1');
# learn.unfreeze()
# learn.lr_find(start_lr = 1e-10)

In [None]:
# learn.recorder.plot(suggestion=True)

In [None]:
learn.fit_one_cycle(7, 1e-4)
# learn.freeze_to(-3)
# learn.fit_one_cycle(10,slice(1e-6,1e-4))

In [None]:
learn.load('best2019');
# valid_preds = learn.get_preds(ds_type=DatasetType.Valid)
# train_preds = learn.get_preds(ds_type=DatasetType.Train)

In [None]:
#https://www.kaggle.com/abhishek/optimizer-for-quadratic-weighted-kappa
class OptimizedRounder(object):
    def __init__(self):
        self.coef_ = 0

    def _kappa_loss(self, coef, X, y):
        X_p = np.copy(X)
        for i, pred in enumerate(X_p):
            if pred < coef[0]:
                X_p[i] = 0
            elif pred >= coef[0] and pred < coef[1]:
                X_p[i] = 1
            elif pred >= coef[1] and pred < coef[2]:
                X_p[i] = 2
            elif pred >= coef[2] and pred < coef[3]:
                X_p[i] = 3
            else:
                X_p[i] = 4

        ll = metrics.cohen_kappa_score(y, X_p, weights='quadratic')
        return -ll

    def fit(self, X, y):
        loss_partial = partial(self._kappa_loss, X=X, y=y)
        initial_coef = [0.5, 1.5, 2.5, 3.5]
        self.coef_ = sp.optimize.minimize(loss_partial, initial_coef, method='nelder-mead')
        print(-loss_partial(self.coef_['x']))

    def predict(self, X, coef):
        X_p = np.copy(X)
        for i, pred in enumerate(X_p):
            if pred < coef[0]:
                X_p[i] = 0
            elif pred >= coef[0] and pred < coef[1]:
                X_p[i] = 1
            elif pred >= coef[1] and pred < coef[2]:
                X_p[i] = 2
            elif pred >= coef[2] and pred < coef[3]:
                X_p[i] = 3
            else:
                X_p[i] = 4
        return X_p

    def coefficients(self):
        return self.coef_['x']

In [None]:
sample_df = pd.read_csv('../input/aptos2019-blindness-detection/sample_submission.csv')
sample_df.head()

In [None]:
learn.data.add_test(ImageList.from_df(sample_df,'../input/aptos2019-blindness-detection',folder='test_images',suffix='.png'))

In [None]:
# def run_subm(learn=learn, coefficients=[0.57, 1.57, 2.57, 3.57])
#     opt = OptimizedRounder()
#     preds,y = learn.get_preds(DatasetType.Test)
#     tst_pred = opt.predict(preds, coefficients)
#     sample_df.diagnosis = tst_pred.astype(int)
#     sample_df.to_csv('submission.csv',index=False)
#     print ('done')

In [None]:
coefficients=[0.57, 1.57, 2.57, 3.57]
opt = OptimizedRounder()

In [None]:
preds2,y = learn.get_preds(DatasetType.Test)
preds = (0.7*preds1.numpy() + 0.3*preds2.numpy())

In [None]:
tst_pred = opt.predict(preds, coefficients)
sample_df.diagnosis = tst_pred.astype(int)
sample_df.to_csv('submission.csv',index=False)
sample_df.diagnosis.value_counts()

In [None]:
# run_subm()

In [None]:
!mv {learn.model_dir}/*.pth .
os.listdir()