In [1]:
import os
import copy
import shutil
import random
import logging
from pprint import pformat
from datetime import datetime
from contextlib import contextmanager
from pathlib import Path
from time import time, sleep
from math import cos, pi

import cv2
import numpy as np
import pandas as pd
import torch
from torch import nn, optim
from tqdm import tqdm
from PIL import Image
import torchvision
import sklearn.metrics
import torch.nn.functional as F
from sklearn.model_selection import KFold
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch.optim.lr_scheduler import _LRScheduler
from albumentations import CLAHE, HorizontalFlip, Compose, HueSaturationValue, RandomBrightness, RandomContrast, Normalize, Resize, ShiftScaleRotate, VerticalFlip, Cutout

In [2]:
class Config():
    def __init__(self, home=True):
        self.seed = 71
        self.batch_size = 16
        self.accum_time = 4
        self.train_dir = '../input/aptos2019-blindness-detection/train_images/'
        self.train_csv = '../input/aptos2019-blindness-detection/train.csv'
        self.test_dir = '../input/aptos2019-blindness-detection/test_images/'
        self.test_csv = '../input/aptos2019-blindness-detection/sample_submission.csv'
        self.device_name = 'cuda:0'
        self.image_size = 256
        self.n_splits = 5
        self.fold = 0
        self.num_epoch = 128
        self.lr_step_epoch = 64
        self.alpha = 1
        self.mixup = True
        self.init_lr = 1e-3
        self.eta_min = 1e-6
        self.num_workers = 16 if home else 4
        self.classes_num = 5
    
conf = Config(home=False)

In [3]:
def now():
    return datetime.now().strftime("%Y_%m_%d_%H_%M_%S")


def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.manual_seed(seed)

    
def load_csv(path):
    return pd.read_csv(path)


def count_parameter(model):
    return sum(p.numel() for p in model.parameters())


def get_lr(optimizer):
    lr = list()
    for param_group in optimizer.param_groups:
        lr.append(param_group['lr'])
    if len(lr) == 1:
        return lr[0]
    else:
        return lr

In [4]:
# setup for kernel
def setup(exp_name, config):
    """init experiment (directory setup etc...)"""

    result_dir = Path(f'../result/{exp_name}/')
    result_dir.mkdir(parents=True)
    # shutil.copy("Train.ipynb", result_dir)

    set_seed(config.seed)

    device = torch.device(config.device_name)

    log = Logger(exp_name, result_dir / 'exp.log')

    log.info("configuration is following...")
    log.info(pformat(config.__dict__))

    return device, log, result_dir

In [5]:
class Logger:
    """Logging Uitlity Class for monitoring and debugging
    """

    def __init__(self,
                 name,
                 log_fname,
                 log_level=logging.INFO,
                 custom_log_handler=None):

        self.name = name
        self.logger = logging.getLogger(name)
        self.logger.setLevel(log_level)
        ch = logging.FileHandler(log_fname)
        self.logger.addHandler(ch)
        self.logger.addHandler(logging.StreamHandler())

        if custom_log_handler:
            if isinstance(custom_log_handler, list):
                for handler in custom_log_handler:
                    self.logger.addHandler(handler)
            else:
                self.logger.addHandler(handler)

    def kiritori(self):
        self.logger.info('-'*80)

    def double_kiritori(self):
        self.logger.info('='*80)

    def space(self):
        self.logger.info('\n')

    @contextmanager
    def interval_timer(self, name):
        start_time = datetime.now()
        self.logger.info("\n")
        self.logger.info(f"Execution {name} start at {start_time}")
        try:
            yield
        finally:
            end_time = datetime.now()
            td = end_time - start_time
            self.logger.info(f"Execution {name} end at {end_time}")
            self.logger.info(f"Execution Time : {td}")
            self.logger.info("\n")

    def __getattr__(self, attr):
        """
        for calling logging class attribute
        if you call attributes of other class, raise AttributeError
        """
        # self.logger.info(f"{datetime.now()}")
        return getattr(self.logger, attr)

In [6]:
max_hole_size = conf.image_size // 10
train_transform = Compose([
    HorizontalFlip(),
    VerticalFlip(),
    ShiftScaleRotate(rotate_limit=120),
    RandomBrightness(limit=0.2),
    RandomContrast(limit=0.2),
    Resize(conf.image_size, conf.image_size),
#     Cutout(max_h_size=max_hole_size, max_w_size=max_hole_size, num_holes=8, p=0.2),
    Normalize(),
])

valid_transform = Compose([
    Resize(conf.image_size, conf.image_size),
    Normalize(),
])

In [7]:
# https://www.kaggle.com/ratthachat/aptos-updated-preprocessing-ben-s-cropping
def crop_image1(img, tol=7):
    # img is image data
    # tol  is tolerance
        
    mask = img > tol
    return img[np.ix_(mask.any(1),mask.any(0))]

def crop_image_from_gray(img, tol=7):
    if img.ndim ==2:
        mask = img > tol
        return img[np.ix_(mask.any(1), mask.any(0))]
    elif img.ndim==3:
        gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        mask = gray_img > tol
        
        check_shape = img[:,:,0][np.ix_(mask.any(1),mask.any(0))].shape[0]
        if (check_shape == 0): # image is too dark so that we crop out everything,
            return img # return original image
        else:
            img1=img[:,:,0][np.ix_(mask.any(1),mask.any(0))]
            img2=img[:,:,1][np.ix_(mask.any(1),mask.any(0))]
            img3=img[:,:,2][np.ix_(mask.any(1),mask.any(0))]
    #         print(img1.shape,img2.shape,img3.shape)
            img = np.stack([img1,img2,img3], axis=-1)
    #         print(img.shape)
        return img


def load_ben_color(path, sigmaX=30):
    image = cv2.imread(path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = crop_image_from_gray(image)
    image = cv2.resize(image, (conf.image_size, conf.image_size))
    image = cv2.addWeighted(image, 4, cv2.GaussianBlur(image , (0, 0) , sigmaX), -4, 128)
        
    return image

In [8]:
def convert_num(target):
    labels = np.zeros(conf.classes_num)
    labels[:(target+1)] = 1
    # labels[target] = 1
    return labels.astype(np.float32)


class APTOSDataset(Dataset):
    def __init__(self,
                 root_dir,
                 data_csv,
                 augment=None,
                 test=False,
                mixup=False):
        super().__init__()
        self.root_dir = Path(root_dir)
        self.data_csv = data_csv
        self.augment = augment
        self.test = test
        self.mixup = mixup
        
    def do_mixup(self, img, label, alpha=1.):
        index = np.random.randint(0,len(self.data_csv))
        row = self.data_csv.loc[index]
        fname = f"{row.id_code}.png"
        fpath = self.root_dir / fname
        # img2 = np.array(Image.open(fpath))
        img2 = load_ben_color(str(fpath))
        # img2 = np.load(fpath)
        if self.augment:
            img2 = self.augment(image=img2)['image']
            img2 = np.moveaxis(img2, -1, 0)
        
        label2 = row.diagnosis
        label2 = convert_num(label2)
        
        rate = np.random.beta(alpha,alpha)
        img = img*rate + img2*(1-rate)
        label = label*rate + label2*(1-rate)
        return img, label

    def __len__(self):
        return len(self.data_csv)

    def __getitem__(self, index):
        sample = dict()
        row = self.data_csv.loc[index]
        fname = f"{row.id_code}.png"
        fpath = self.root_dir / fname
        # image = np.array(Image.open(fpath))
        image = load_ben_color(str(fpath))
        # image = np.load(fpath)
        
        if self.augment:
            image = self.augment(image=image)['image']
            image = np.moveaxis(image, -1, 0)
        
        if self.test != "test":
            label = convert_num(row.diagnosis)
            if self.mixup and np.random.random()<0.5:
                image, label = self.do_mixup(image, label)
            sample['label'] = label

        sample['data'] = np.array(image)

        return sample

In [9]:
def worker_init_fn(worker_id):                                                          
    np.random.seed(conf.seed + worker_id)

def make_loader(df,
                root_dir,
                batch_size=conf.batch_size,
                shuffle=True,
                test="train",
                image_dataset=False,
                worker_init_fn=worker_init_fn,
                **kwargs):

    ds = APTOSDataset(
        root_dir,
        df,
        test=test,
        **kwargs)

    drop_last = test != "test"
    loader = DataLoader(
        ds, batch_size=batch_size, shuffle=shuffle,
        num_workers=conf.num_workers,
        drop_last=drop_last)
    return loader, len(ds)

In [10]:
class ResNet(nn.Module):
    def __init__(self,
                 arch_name='resnet18',
                 input_channel=3,
                 input_size=224,
                 num_classes=28):
        super(ResNet, self).__init__()
        self.base_model = torchvision.models.__dict__[arch_name](pretrained="imagenet")
        if isinstance(input_size, tuple):
            ksize = (input_size[0] // 16, input_size[1] // 16)
        else:
            ksize = input_size // 16

        self.base_model.bn0 = nn.BatchNorm2d(input_channel)
        self.base_model.avgpool = nn.AvgPool2d(kernel_size=ksize)

        self.dim_feats = self.base_model.fc.in_features  # = 2048
        self.base_model.fc = nn.Linear(self.dim_feats, num_classes)
        self.out_size = ksize

    def forward(self, data):
        # x = self.base_model.bn0(data)
        x = self.base_model.conv1(data)
        x = self.base_model.bn1(x)
        x = self.base_model.relu(x)

        x = self.base_model.layer1(x)
        x = self.base_model.layer2(x)
        x = self.base_model.layer3(x)
        x = self.base_model.layer4(x)
        x = self.base_model.avgpool(x)
        x = x.view(-1, self.dim_feats)
        x = self.base_model.fc(x)

        return x

In [11]:
class CosineLR(_LRScheduler):
    """SGD with cosine annealing.
    """

    def __init__(self, optimizer, step_size_min=1e-5, t0=100, tmult=2, curr_epoch=-1, last_epoch=-1):
        self.step_size_min = step_size_min
        self.t0 = t0
        self.tmult = tmult
        self.epochs_since_restart = curr_epoch
        super(CosineLR, self).__init__(optimizer, last_epoch)

    def get_lr(self):
        self.epochs_since_restart += 1

        if self.epochs_since_restart > self.t0:
            self.t0 *= self.tmult
            self.epochs_since_restart = 0

        lrs = [self.step_size_min + (
                    0.5 * (base_lr - self.step_size_min) * (1 + cos(self.epochs_since_restart * pi / self.t0)))
               for base_lr in self.base_lrs]

        # print(lrs)

        return lrs

In [12]:
# https://inclass.kaggle.com/gennadylaptev/qwk-loss-for-pytorch/data
# Categorical Crossentropyから途中で切り替えるのがいいらしい（https://arxiv.org/pdf/1612.00775.pdf）
def kappa_loss(p, y, n_classes=5, eps=1e-10):
    """
    QWK loss function as described in https://arxiv.org/pdf/1612.00775.pdf
    
    Arguments:
        p: a tensor with probability predictions, [batch_size, n_classes],
        y, a tensor with one-hot encoded class labels, [batch_size, n_classes]
    Returns:
        QWK loss
    """
    
    W = np.zeros((n_classes, n_classes))
    for i in range(n_classes):
        for j in range(n_classes):
            W[i,j] = (i-j)**2
    
    W = torch.from_numpy(W.astype(np.float32)).to(conf.device_name)
    
    p = p.sigmoid()
    O = torch.matmul(y.t(), p)
    E = torch.matmul(y.sum(dim=0).view(-1,1), p.sum(dim=0).view(1,-1)) / O.sum()
    
    return (W*O).sum() / ((W*E).sum() + eps)


def calc_loss(pred, labels, criterion):
    if isinstance(pred, list):
        pred_len = len(pred)
        pred_probs = 0
        clf_loss = 0
        for i, px in enumerate(pred):
            pred_probs += px.sigmoid().cpu().data.numpy()
            clf_loss += criterion(px, labels)
        return clf_loss / pred_len, pred_probs / pred_len
    else:
        pred_probs = pred.sigmoid().cpu().data.numpy()
        clf_loss = criterion(pred, labels)
        # print(pred, labels, clf_loss)
        return clf_loss, pred_probs

In [13]:
# https://www.kaggle.com/lextoumbourou/blindness-detection-resnet34-ordinal-targets
def get_preds(arr):
    mask = arr == 0
    return np.clip(np.where(mask.any(1), mask.argmax(1), 5) - 1, 0, 4)

def calc_qwk(pred, true):
    # pred = np.argmax(pred, axis=1).reshape(-1)
    pred = get_preds(pred > 0.5)
    true  = np.sum(true.astype(int), axis=1) - 1
    # print(pred, true)
    # true = np.argmax(true, axis=1).reshape(-1)
    score = sklearn.metrics.cohen_kappa_score(pred, true,
                                      labels=[0,1,2,3,4],
                                      weights='quadratic')
    return score


def train(model,
          optimizer,
          scheduler,
          train_df,
          aug,
          device,
          criterion,
          data_dir=conf.train_dir,
          undersampling=False):

    model.train()
    dataloader, ds_size = make_loader(
        train_df,
        data_dir,
        shuffle=True,
        test="train",
        mixup=conf.mixup,
        augment=aug)

    running_loss = 0.0
    all_trues = list()
    all_preds = list()
    sum_loss = 0

    # Iterate over data.
    optimizer.zero_grad()
    for i, sample in enumerate(dataloader):
        inputs = sample['data'].to(device)
        labels = sample['label'].to(device)
        all_trues.append(labels.cpu().data.numpy())

        outputs = model(inputs)

        # loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
        loss, pred_probs = calc_loss(outputs, labels, criterion)
        all_preds.append(pred_probs)
        loss.backward()

        # statistics
        running_loss += loss.item() * inputs.size(0)
        
        if (i + 1) % conf.accum_time == 0:
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()


    all_preds = np.concatenate(all_preds)
    all_trues = np.concatenate(all_trues)
    epoch_loss = running_loss / ds_size
    epoch_qwk = calc_qwk(all_preds, all_trues)

    result = {'loss': epoch_loss, 'qwk': epoch_qwk} 
    return all_preds, result


def validate(model, train_df,
             aug,
             device,
             criterion,
             data_dir='input/train'):

    model.eval()

    dataloader, ds_size = make_loader(
        train_df,
        data_dir,
        conf.batch_size,
        shuffle=False,
        test="valid",
        augment=aug)

    all_preds = []
    all_trues = []

    running_loss = 0.0

    # Iterate over data.
    for i, samples in enumerate(dataloader):
        with torch.set_grad_enabled(False):
            inputs = samples['data'].to(device)
            outputs = model(inputs)
            labels = samples['label'].to(device)

            loss, pred_probs = calc_loss(outputs, labels, criterion)
            all_preds.append(pred_probs)

            all_trues.append(labels.cpu().data.numpy())
            running_loss += loss.item() * inputs.size(0)

    all_preds = np.concatenate(all_preds)
    all_trues = np.concatenate(all_trues)
    epoch_qwk = calc_qwk(all_preds, all_trues)

    epoch_loss = running_loss / ds_size
    result = {'loss': epoch_loss, 'qwk': epoch_qwk}

    return all_preds, result

def predict(model, test_df,
            aug,
            device,
            data_dir='input/train'):

    model.eval()

    dataloader, ds_size = make_loader(
        test_df,
        data_dir,
        conf.batch_size,
        shuffle=False,
        test="test",
        augment=aug)

    all_preds = []
    # Iterate over data.
    t = dataloader
    for i, samples in enumerate(t):
        with torch.set_grad_enabled(False):
            inputs = samples['data'].to(device)
            outputs = model(inputs)
            all_preds.append(outputs.cpu().data.numpy())

    all_preds = np.concatenate(all_preds)
    return all_preds

In [14]:
def val_split(df, val_size=0.2, fold=0):
    y = df.diagnosis
    mskf = KFold(n_splits=int(1 / val_size), random_state=conf.seed)
    splitter = mskf.split(df.id_code, y)
    for _ in range(fold + 1):
        tr_ind, te_ind = next(splitter)
    train_df = df.iloc[tr_ind].reset_index(drop=True)
    val_df = df.iloc[te_ind].reset_index(drop=True)
    return {'train': train_df, 'val': val_df}

In [15]:
def train_model(train_df,
                test_df,
                base_model,
                criterion,
                log,
                device,
                fold=0,
                num_epoch=1,
                mask_epoch=1):

    ds = val_split(train_df, fold=fold)
    learn_start = time()

    log.info('classification learning start')
    log.info("-" * 20)
    model = base_model.to(device)
    # log.info(model)
    log.info(f'parameters {count_parameter(model)}')
    best_model_wts = copy.deepcopy(model.state_dict())
    best_qwk = 0

    # Observe that all parameters are being optimized
    log.info('Optimizer: Adam')
    optimizer = optim.Adam(model.parameters(), lr=conf.init_lr)

    log.info(
        f"Scheduler: StepLR, step_size={conf.lr_step_epoch}")
#     scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=conf.lr_step_epoch)
    # tmax = num_epoch
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                     T_max=conf.lr_step_epoch,
                                                     eta_min=conf.eta_min)
    train_ds, val_ds = ds['train'], ds['val']
#     scheduler = CosineLR(optimizer, step_size_min=conf.eta_min, 
#                                          t0=len(train_ds) * conf.lr_step_epoch // (conf.batch_size * conf.accum_time), 
#                                          tmult=1)

    for epoch in range(num_epoch):
        try:
            scheduler.step()
            start = time()

            _, train_res = train(model, optimizer, scheduler, train_ds, train_transform,
                                 device, criterion,
                                 data_dir=conf.train_dir)
            clf_loss = train_res['loss']
            train_qwk = train_res['qwk']

            val_preds, val_res = validate(model, val_ds, valid_transform,
                                          device, criterion,
                                          data_dir=conf.train_dir)
            val_clf = val_res['loss']
            val_qwk = val_res['qwk']

            calc_time = time() - start
            accum_time = time() - learn_start
            lr = get_lr(optimizer)

            log_msg = f"{epoch}\t{calc_time:.2f}\t{accum_time:.1f}\t{lr:.4f}\t"
            log_msg += f"{clf_loss:.4f}\t{train_qwk}\t"
            log_msg += f"{val_clf:.4f}\t{val_qwk}\t"
            log.info(log_msg)

            if val_qwk > best_qwk:
                best_model_wts = copy.deepcopy(model.state_dict())
                best_qwk = val_qwk
                best_val_preds = val_preds

        except KeyboardInterrupt:
            break

    log.info("-" * 20)
    log.info('Best val QWK: {:4f}'.format(best_qwk))

    # load best model weights
    model.load_state_dict(best_model_wts)
    test_preds = predict(model, test_df, valid_transform,
                         device, data_dir=conf.test_dir)

    return model, best_val_preds, test_preds

In [16]:
def main():
    exp_name = f'{now()}'
    device, log, result_dir = setup(exp_name, conf)

    train_df = load_csv(conf.train_csv)
    test_df = load_csv(conf.test_csv)

    log.info('done')
    for i in range(5):
        if i != conf.fold:
            continue
        model_arch = 'resnet18'
        model_ft = ResNet(arch_name=model_arch,
                          input_size=conf.image_size,
                          num_classes=conf.classes_num)
        model_ft.load_state_dict(torch.load("../result/for_pretrained_2019_07_28_18_18_17/model_0.pkl"))

        criterion = nn.BCEWithLogitsLoss()
        # criterion = kappa_loss
        criterion = criterion.to(device)

        model_ft, val_preds, test_preds = train_model(
            train_df,
            test_df,
            model_ft,
            criterion,
            log,
            device,
            fold=i,
            num_epoch=conf.num_epoch)
        torch.save(model_ft.state_dict(),  f'model_{i}.pkl')
        np.save( f'val_preds_{i}.npy', val_preds)
        np.save( f'test_preds_{i}.npy', test_preds)

In [17]:
main()

configuration is following...
{'accum_time': 4,
 'alpha': 1,
 'batch_size': 16,
 'classes_num': 5,
 'device_name': 'cuda:0',
 'eta_min': 1e-06,
 'fold': 0,
 'image_size': 256,
 'init_lr': 0.001,
 'lr_step_epoch': 64,
 'mixup': True,
 'n_splits': 5,
 'num_epoch': 128,
 'num_workers': 4,
 'seed': 71,
 'test_csv': '../input/aptos2019-blindness-detection/sample_submission.csv',
 'test_dir': '../input/aptos2019-blindness-detection/test_images/',
 'train_csv': '../input/aptos2019-blindness-detection/train.csv',
 'train_dir': '../input/aptos2019-blindness-detection/train_images/'}


FileNotFoundError: [Errno 2] File b'../input/aptos2019-blindness-detection/train.csv' does not exist: b'../input/aptos2019-blindness-detection/train.csv'