Initially I forked from this [kernel](https://www.kaggle.com/khursani8/fast-ai-starter-resnet34), changed architecture to ResNet 50, added augmentation and did some initial tuning of parameters like learning rate.

# Libraries import

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input/"))

# Any results you write to the current directory are saved as output.

['aptos2019-blindness-detection', 'fastai-pretrained-models']


In [2]:
import fastai
from fastai.vision import *
fastai.__version__
import random
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

SEED = 999
seed_everything(SEED)
print('Make sure cudnn is enabled:', torch.backends.cudnn.enabled)
fastai.__version__

Make sure cudnn is enabled: True


'1.0.54'

In [3]:
# copy pretrained weights for resnet50 to the folder fastai will search by default
Path('/tmp/.cache/torch/checkpoints/').mkdir(exist_ok=True, parents=True)
!cp /kaggle/input/fastai-pretrained-models/* /tmp/.cache/torch/checkpoints/

# Data

In [4]:
PATH = Path('../input/aptos2019-blindness-detection')

In [5]:
df_train = pd.read_csv(PATH/'train.csv')
df_train.head()

Unnamed: 0,id_code,diagnosis
0,000c1434d8d7,2
1,001639a390f0,4
2,0024cdab0c1e,1
3,002c21358ce6,0
4,005b95c28852,0


In [6]:
!ls /kaggle/input/fastai-pretrained-models/

densenet121-a639ec97.pth  resnet50-19c8e357.pth
densenet169-b2777c0a.pth  squeezenet1_0-a815701f.pth
densenet201-c1103571.pth  squeezenet1_1-f364aa15.pth
resnet101-5d3b4d8f.pth	  vgg16_bn-6c64b313.pth


In [7]:
df_train.diagnosis.value_counts() 

0    1805
2     999
1     370
4     295
3     193
Name: diagnosis, dtype: int64

So our train set is definitely imbalanced, majority of images are normal (without illness).

# Model

In [8]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=1., gamma=1.):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, inputs, targets, **kwargs):
        CE_loss = nn.CrossEntropyLoss(reduction='none')(inputs, targets)
        pt = torch.exp(-CE_loss)
        F_loss = self.alpha * ((1-pt)**self.gamma) * CE_loss
        return F_loss.mean()

    
def roc_score(inp, target):
    _, indices = inp.max(1)
    return torch.Tensor([roc_auc_score(target, indices)])[0]

In [9]:
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state = SEED)

tfms = get_transforms(do_flip=True, flip_vert=True, max_rotate=0.10, max_zoom=1.2, max_warp=0.2, max_lighting=0.2,
                     xtra_tfms=[(symmetric_warp(magnitude=(-0,0), p=0)),])
kp_score = []
learn = ""
data_fold = ""
kappa = KappaScore()
kappa.weights = "quadratic"
loss_func = FocalLoss()

In [10]:
for train_index, val_index in skf.split(df_train["id_code"], df_train["diagnosis"]):
    del learn, data_fold
    gc.collect()
    print("TRAIN:", train_index, "VALIDATE:", val_index)
    data_fold = (ImageList.from_df(df_train, PATH, folder='/train_images/', cols="id_code",suffix='.png')
        .split_by_idxs(train_index, val_index)
        .label_from_df(cols='diagnosis')
        .transform(tfms,size=224,resize_method=ResizeMethod.SQUISH,padding_mode='zeros') #Data transform
        .databunch(bs=64)).normalize(imagenet_stats)
    learn = cnn_learner(data_fold, base_arch=models.densenet201 , metrics=[error_rate, kappa], loss_func=loss_func, model_dir="/kaggle/working")
    learn.fit_one_cycle(7, max_lr=slice(1e-06,1e-03))
    loss, err, kp = learn.validate()
    learn.unfreeze()
    kp_score.append(kp.numpy())

# Predictions

In [11]:
sample_df = pd.read_csv(PATH/'sample_submission.csv')
sample_df.head()

Unnamed: 0,id_code,diagnosis
0,0005cfc8afb6,0
1,003f0afdcd15,0
2,006efc72b638,0
3,00836aaacf06,0
4,009245722fa4,0


In [12]:
learn.data.add_test(ImageList.from_df(sample_df,PATH,folder='test_images',suffix='.png'))

In [13]:
preds,y = learn.TTA(ds_type=DatasetType.Test)

In [14]:
sample_df.diagnosis = preds.argmax(1)
sample_df.head()

Unnamed: 0,id_code,diagnosis
0,0005cfc8afb6,1
1,003f0afdcd15,2
2,006efc72b638,2
3,00836aaacf06,2
4,009245722fa4,2


In [15]:
sample_df.to_csv('submission.csv',index=False)

In [16]:
learn.save('/kaggle/working/aptos201')