In [1]:
import os
import copy
import shutil
import random
import logging
from pprint import pformat
from datetime import datetime
from contextlib import contextmanager
from pathlib import Path
from time import time, sleep
from math import cos, pi

import cv2
import numpy as np
import pandas as pd
import torch
from torch import nn, optim
from tqdm import tqdm
from PIL import Image
import torchvision
import pretrainedmodels
import sklearn.metrics
import torch.nn.functional as F
from sklearn.model_selection import KFold
from sklearn.metrics import recall_score
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torchvision import transforms
from efficientnet_pytorch import EfficientNet
from torch.optim.lr_scheduler import _LRScheduler
from albumentations import HorizontalFlip, Compose, RandomCrop, RandomContrast, Normalize, Resize, ShiftScaleRotate, VerticalFlip, Cutout, IAASharpen, CLAHE

In [2]:
class Config():
    def __init__(self, home=True):
        self.seed = 71
        self.batch_size = 64
        self.accum_time = 1
        self.train_csv = '../input/train.csv'
        self.train_images = '../input/train.parquet'
        self.test_csv = '../input/sample_submission.csv'
        self.test_images = '../input/train.parquet'
        
        self.device_name = 'cuda:0'
        self.weighted_sample = False
        self.mixup_train = False
        self.image_size = (137, 236)
        self.n_splits = 5
        self.fold = 0
        self.num_epoch = 128
        self.lr_step_epoch = 32
        
        self.gr_size = 168
        self.vd_size = 11
        self.cd_size = 7
        
        self.alpha = 1
        self.mixup = False
        self.init_lr = 1e-3
        self.eta_min = 1e-6
        self.num_workers = 16 if home else 4
        self.classes_num = 1
    
conf = Config(home=True)
assert((conf.mixup_train and conf.mixup) == False)

In [3]:
def now():
    return datetime.now().strftime("%Y_%m_%d_%H_%M_%S")


def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.manual_seed(seed)

    
def load_csv(path):
    return pd.read_csv(path)


def count_parameter(model):
    return sum(p.numel() for p in model.parameters())


def get_lr(optimizer):
    lr = list()
    for param_group in optimizer.param_groups:
        lr.append(param_group['lr'])
    if len(lr) == 1:
        return lr[0]
    else:
        return lr

In [4]:
# setup for kernel
def setup(exp_name, config):
    """init experiment (directory setup etc...)"""

    result_dir = Path(f'../result/{exp_name}/')
    result_dir.mkdir(parents=True)
    shutil.copy("main.ipynb", result_dir)

    set_seed(config.seed)

    device = torch.device(config.device_name)

    log = Logger(exp_name, result_dir / 'exp.log')

    log.info("configuration is following...")
    log.info(pformat(config.__dict__))

    return device, log, result_dir

In [5]:
class Logger:
    """Logging Uitlity Class for monitoring and debugging
    """

    def __init__(self,
                 name,
                 log_fname,
                 log_level=logging.INFO,
                 custom_log_handler=None):

        self.name = name
        self.logger = logging.getLogger(name)
        self.logger.setLevel(log_level)
        ch = logging.FileHandler(log_fname)
        self.logger.addHandler(ch)
        self.logger.addHandler(logging.StreamHandler())

        if custom_log_handler:
            if isinstance(custom_log_handler, list):
                for handler in custom_log_handler:
                    self.logger.addHandler(handler)
            else:
                self.logger.addHandler(handler)

    def kiritori(self):
        self.logger.info('-'*80)

    def double_kiritori(self):
        self.logger.info('='*80)

    def space(self):
        self.logger.info('\n')

    @contextmanager
    def interval_timer(self, name):
        start_time = datetime.now()
        self.logger.info("\n")
        self.logger.info(f"Execution {name} start at {start_time}")
        try:
            yield
        finally:
            end_time = datetime.now()
            td = end_time - start_time
            self.logger.info(f"Execution {name} end at {end_time}")
            self.logger.info(f"Execution Time : {td}")
            self.logger.info("\n")

    def __getattr__(self, attr):
        """
        for calling logging class attribute
        if you call attributes of other class, raise AttributeError
        """
        # self.logger.info(f"{datetime.now()}")
        return getattr(self.logger, attr)

In [6]:
# max_hole_size = conf.image_size // 10
train_transform = Compose([
#     HorizontalFlip(),
#     VerticalFlip(),
    ShiftScaleRotate(rotate_limit=30),
    Normalize(),
])

valid_transform = Compose([
    Normalize(),
])

In [7]:
class BengalDataset(Dataset):
    def __init__(self,
                 metadata,
                 images,
                 augment=None,
                 test=False,
                mixup=False):
        super().__init__()
        self.metadata = metadata
        self.images = images
        self.augment = augment
        self.test = test
        self.mixup = mixup
        
    def do_mixup(self, img, label, alpha=1.):
        index = np.random.randint(0,len(self.data_csv))
        row = self.data_csv.loc[index]
        fname = f"{row.id_code}.npy"
        fpath = self.root_dir / fname
        img2 = np.load(fpath)
        if self.augment:
            img2 = self.augment(image=img2)['image']
            img2 = np.moveaxis(img2, -1, 0)
        
        label2 = row.diagnosis
        label2 = convert_num(label2)
        
        rate = np.random.beta(alpha, alpha)
        img = img*rate + img2*(1-rate)
        label = label*rate + label2*(1-rate)
        return img, label.astype(np.float32)

    def __len__(self):
        return len(self.metadata)

    def __getitem__(self, index):
        sample = dict()
        row_image = self.images.iloc[index]
        row_label = self.metadata.iloc[index]
        image = row_image.drop('image_id').values.astype(np.uint8).reshape(137, 236)
        image = np.repeat(image[:, :, np.newaxis], 3, axis=2)
        
        if self.augment:
            image = self.augment(image=image)['image']
            image = np.moveaxis(image, -1, 0)
        
        if self.test != "test":
            label = [
                row_label.grapheme_root,
                row_label.vowel_diacritic,
                row_label.consonant_diacritic
            ]
            if self.mixup and np.random.random() < 0.5:
                image, label = self.do_mixup(image, label)
            sample['label'] = np.array(label)

        sample['data'] = np.array(image)

        return sample

In [8]:
def worker_init_fn(worker_id):                                                          
    np.random.seed(conf.seed + worker_id)

def make_loader(df,
                images,
                batch_size=conf.batch_size,
                shuffle=True,
                test="train",
                worker_init_fn=worker_init_fn,
                **kwargs):

    ds = BengalDataset(
        df,
        images,
        test=test,
        **kwargs)

    sampler = None
    if test == "train":
        drop_last = True
        if conf.weighted_sample:
            class_count = df.diagnosis.value_counts()
            class_count = 1 / class_count
            df['weight'] = df.diagnosis.map(class_count)
            sampler = WeightedRandomSampler(df.weight, len(df))
    else:
        drop_last = False
    loader = DataLoader(
        ds, batch_size=batch_size, # shuffle=shuffle,
        num_workers=conf.num_workers,
        sampler=sampler,
        drop_last=drop_last)
    return loader, len(ds)

In [9]:
class ResNet(nn.Module):
    def __init__(self,
                 arch_name='resnet18',
                 input_channel=3,
                 input_size=224,
                 se=False):
        super(ResNet, self).__init__()
        if se:
            self.base_model = pretrainedmodels.__dict__[arch_name](pretrained="imagenet")
        else:
            self.base_model = torchvision.models.__dict__[arch_name](pretrained="imagenet")
        if isinstance(input_size, tuple):
            ksize = (input_size[0] // 16, input_size[1] // 16)
        else:
            ksize = input_size // 16

        self.base_model.bn0 = nn.BatchNorm2d(input_channel)
        self.base_model.avgpool = nn.AvgPool2d(kernel_size=ksize)
        if se:
            self.dim_feats = self.base_model.last_linear.in_features  # = 2048
        else:
            self.dim_feats = self.base_model.fc.in_features  # = 2048
        self.fc_gr = nn.Linear(self.dim_feats, conf.gr_size)
        self.fc_vd = nn.Linear(self.dim_feats, conf.vd_size)
        self.fc_cd = nn.Linear(self.dim_feats, conf.cd_size)            
        self.out_size = ksize
        self.se = se

    def forward(self, data):
        # x = self.base_model.bn0(data)
        if self.se:
            x = self.base_model.layer0(data)
        else:
            x = self.base_model.conv1(data)
            x = self.base_model.bn1(x)
            x = self.base_model.relu(x)

        x = self.base_model.layer1(x)
        x = self.base_model.layer2(x)
        x = self.base_model.layer3(x)
        x = self.base_model.layer4(x)
        x = self.base_model.avgpool(x)
        x = x.view(-1, self.dim_feats)

        gr = self.fc_gr(x)
        vd = self.fc_vd(x)
        cd = self.fc_cd(x)

        return np.array([gr, vd, cd])

In [10]:
class DenseNet(nn.Module):
    def __init__(self,
                 arch_name='densenet121',
                 input_channel=3,
                 input_size=224,
                 num_classes=28):
        super(DenseNet, self).__init__()
        self.base_model = torchvision.models.__dict__[arch_name](pretrained="imagenet")
        
        if isinstance(input_size, tuple):
            ksize = (input_size[0] // 32, input_size[1] // 32)
        else:
            ksize = input_size // 32
        
        self.avgpool = nn.AvgPool2d(kernel_size=ksize)

        self.dim_feats = self.base_model.classifier.in_features  # = 1024
        self.base_model.classifier = nn.Linear(self.dim_feats, num_classes)
        self.out_size = ksize

    def forward(self, data):
        x = self.base_model.features(data)
        x = self.avgpool(x)
        x = x.view(-1, self.dim_feats)
        x = self.base_model.classifier(x)
        return x

In [11]:
class CosineLR(_LRScheduler):
    """SGD with cosine annealing.
    """

    def __init__(self, optimizer, step_size_min=1e-5, t0=100, tmult=2, curr_epoch=-1, last_epoch=-1):
        self.step_size_min = step_size_min
        self.t0 = t0
        self.tmult = tmult
        self.epochs_since_restart = curr_epoch
        super(CosineLR, self).__init__(optimizer, last_epoch)

    def get_lr(self):
        self.epochs_since_restart += 1

        if self.epochs_since_restart > self.t0:
            self.t0 *= self.tmult
            self.epochs_since_restart = 0

        lrs = [self.step_size_min + (
                    0.5 * (base_lr - self.step_size_min) * (1 + cos(self.epochs_since_restart * pi / self.t0)))
               for base_lr in self.base_lrs]

        # print(lrs)

        return lrs

In [12]:
def calc_loss(pred, labels, criterion):
    pred_len = len(labels)
    pred_probs = list()
    each_loss = list()
    clf_loss = list()
    pred_class = list()
    loss = 0
    for i, px in enumerate(pred):
        pred_probs.append(px.softmax(dim=1).cpu().data.numpy() / pred_len)
        pred_class.append(px.argmax(dim=1).cpu().data.numpy())
        loss_i = criterion(px, labels[:, i]) / pred_len
        each_loss.append(loss_i)
        loss += loss_i
    return loss, each_loss, pred_probs, np.stack(pred_class, axis=1)

In [13]:
def weighted_macro_recall(trues, preds):
    scores = list()
    for i in range(3):
        s = recall_score(trues[:, i], preds[:, i], average='macro')
        scores.append(s)
    return np.average(scores, weights=[2,1,1])


def train(model,
          optimizer,
          scheduler,
          train_df,
          train_images,
          aug,
          device,
          criterion,
          undersampling=False):

    model.train()
    dataloader, ds_size = make_loader(
        train_df,
        train_images,
        shuffle=True,
        test="train",
        mixup=conf.mixup,
        augment=aug)

    running_loss = 0.0
    all_trues = list()
    all_preds = list()
    sum_loss = 0

    # Iterate over data.
    optimizer.zero_grad()
    for i, sample in enumerate(tqdm(dataloader)):
        inputs = sample['data'].to(device)
        labels = sample['label'].to(device)
        all_trues.append(labels.cpu().data.numpy())

        outputs = model(inputs)

        # loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
        loss, _, _, pred_class = calc_loss(outputs, labels, criterion)
        all_preds.append(pred_class)
        loss.backward()

        # statistics
        running_loss += loss.item() * inputs.size(0)
        
        if (i + 1) % conf.accum_time == 0:
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()

    all_preds = np.concatenate(all_preds)
    all_trues = np.concatenate(all_trues)
    epoch_loss = running_loss / ds_size
    epoch_recall = weighted_macro_recall(all_trues, all_preds)

    result = {'loss': epoch_loss, 'recall': epoch_recall}
    return all_preds, result


def validate(model, val_df, val_images,
             aug,
             device,
             criterion):

    model.eval()

    dataloader, ds_size = make_loader(
        val_df,
        val_images,
        conf.batch_size,
        shuffle=False,
        test="valid",
        augment=aug)

    all_preds = []
    all_trues = []

    running_loss = 0.0

    # Iterate over data.
    for i, samples in enumerate(dataloader):
        with torch.set_grad_enabled(False):
            inputs = samples['data'].to(device)
            outputs = model(inputs)
            labels = samples['label'].to(device)

            loss, _, _, pred_class = calc_loss(outputs, labels, criterion)
            all_preds.append(pred_class)            
            all_trues.append(labels.cpu().data.numpy())
            running_loss += loss.item() * inputs.size(0)

    all_preds = np.concatenate(all_preds)
    all_trues = np.concatenate(all_trues)
    epoch_loss = running_loss / ds_size
    epoch_recall = weighted_macro_recall(all_trues, all_preds)

    result = {'loss': epoch_loss, 'recall': epoch_recall}
    return all_preds, result

def predict(model, test_df,
            test_images,
            aug,
            device,
            data_dir='input/train'):

    model.eval()

    dataloader, ds_size = make_loader(
        test_df,
        test_images,
        conf.batch_size,
        shuffle=False,
        test="test",
        augment=aug)

    all_preds = []
    # Iterate over data.
    t = dataloader
    for i, samples in enumerate(t):
        with torch.set_grad_enabled(False):
            inputs = samples['data'].to(device)
            outputs = model(inputs)
            for px in outputs:
                all_preds.append(px.argmax(dim=1).cpu().data.numpy())

    all_preds = np.concatenate(all_preds)
    return all_preds

In [14]:
def mixup_data(x, y, alpha=1.0, use_cuda=True):
    '''Returns mixed inputs, pairs of targets, and lambda'''
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    if use_cuda:
        index = torch.randperm(batch_size).cuda()
    else:
        index = torch.randperm(batch_size)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam


def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

def mixup_train(model,
          optimizer,
          scheduler,
          train_df,
          aug,
          device,
          criterion,
          data_dir='../',
          undersampling=False):

    model.train()
    dataloader, ds_size = make_loader(
        train_df,
        data_dir,
        shuffle=True,
        test="train",
        mixup=False,
        augment=aug)

    running_loss = 0.0

    # Iterate over data.
    optimizer.zero_grad()
    for i, sample in enumerate(tqdm(dataloader)):
        inputs = sample['data'].to(device)
        labels = sample['label'].to(device)
        
        inputs, targets_a, targets_b, lam = mixup_data(inputs, labels, conf.alpha)
        inputs, targets_a, targets_b = map(torch.autograd.Variable, (inputs, targets_a, targets_b))
        outputs = model(inputs)
        loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
        loss.backward()

        # statistics
        running_loss += loss.item() * inputs.size(0)
        
        if (i + 1) % conf.accum_time == 0:
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()

    epoch_loss = running_loss / ds_size
#     epoch_qwk = calc_qwk(all_preds, all_trues)

    result = {'loss': epoch_loss} #, 'qwk': epoch_qwk}
    return result

In [15]:
def val_split(df, images, val_size=0.2, fold=0):
    mskf = KFold(n_splits=int(1 / val_size), shuffle=True, random_state=conf.seed)
    splitter = mskf.split(df.index)
    for _ in range(fold + 1):
        tr_ind, te_ind = next(splitter)
    train_df = df.iloc[tr_ind].reset_index(drop=True)
    val_df = df.iloc[te_ind].reset_index(drop=True)
    train_images = images.iloc[tr_ind].reset_index(drop=True)
    val_images = images.iloc[te_ind].reset_index(drop=True)
    return {'train': train_df, 'val': val_df, 'train_images': train_images, 'val_images': val_images}

In [16]:
def train_model(train_df,
                train_images,
                test_df,
                test_images,
                base_model,
                criterion,
                log,
                device,
                fold=0,
                num_epoch=1,
                mask_epoch=1):

    ds = val_split(train_df, train_images, fold=fold)
    learn_start = time()

    log.info('classification learning start')
    log.info("-" * 20)
    model = base_model.to(device)
    # log.info(model)
    log.info(f'parameters {count_parameter(model)}')
    best_model_wts = copy.deepcopy(model.state_dict())
    best_recall = 0
    best_clf = 100

    # Observe that all parameters are being optimized
    log.info('Optimizer: Adam')
    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=conf.init_lr, weight_decay=1e-5)

    log.info(
        f"Scheduler: CosineLR, period={conf.lr_step_epoch}")
    train_ds, val_ds, train_images, val_images = ds['train'], ds['val'], ds['train_images'], ds['val_images']
    
    scheduler = CosineLR(optimizer, step_size_min=conf.eta_min, 
                         t0=len(train_ds) * conf.lr_step_epoch // (conf.batch_size * conf.accum_time), 
                         tmult=1)

    for epoch in range(num_epoch):
        try:
            start = time()

            if conf.mixup_train:
                train_res = mixup_train(model, optimizer, scheduler, train_ds, train_transform,
                                     device, criterion,
                                     data_dir=conf.train_dir)
            else:
                _, train_res = train(model, optimizer, scheduler, 
                                     train_ds, train_images,
                                     train_transform,
                                     device, criterion)

            clf_loss = train_res['loss']
            train_recall = train_res['recall']
            val_preds, val_res = validate(model, val_ds, val_images,
                                          valid_transform,
                                          device, criterion)
            val_clf = val_res['loss']
            val_recall = val_res['recall']

            calc_time = time() - start
            accum_time = time() - learn_start
            lr = get_lr(optimizer)

            log_msg = f"{epoch}\t{calc_time:.2f}\t{accum_time:.1f}\t{lr:.4f}\t"
            log_msg += f"{clf_loss:.4f}\t{train_recall:.4f}\t"
            log_msg += f"{val_clf:.4f}\t{val_recall:.4f}\t"
            log.info(log_msg)

            if val_recall > best_recall:
                best_model_wts = copy.deepcopy(model.state_dict())
                best_recall = val_recall
                best_val_preds = val_preds

        except KeyboardInterrupt:
            break

    log.info("-" * 20)
    log.info('Best val Recall: {:4f}'.format(best_recall))

    # load best model weights
    model.load_state_dict(best_model_wts)
    test_preds = predict(model, test_df, test_images, valid_transform,
                         device)

    return model, best_val_preds, test_preds

In [17]:
exp_name = f'baseline_{now()}'
device, log, result_dir = setup(exp_name, conf)

train_df = load_csv(conf.train_csv)
train_images = pd.read_parquet(conf.train_images)
test_df = load_csv(conf.test_csv)
test_images = pd.read_parquet(conf.test_images)

log.info('done')
for i in range(5):
    if i != conf.fold:
        continue
    model_arch = 'resnet18'
    model_ft = ResNet(arch_name=model_arch, se=False,
                      input_size=conf.image_size)
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.to(device)

    model_ft, val_preds, test_preds = train_model(
        train_df,
        train_images,
        test_df,
        test_images,
        model_ft,
        criterion,
        log,
        device,
        fold=i,
        num_epoch=conf.num_epoch)

    torch.save(model_ft.state_dict(),  result_dir/f'model_{i}.pkl')
    np.save( result_dir/f'val_preds_{i}.npy', val_preds)
    np.save( result_dir/f'test_preds_{i}.npy', test_preds)

configuration is following...
{'accum_time': 1,
 'alpha': 1,
 'batch_size': 64,
 'cd_size': 7,
 'classes_num': 1,
 'device_name': 'cuda:0',
 'eta_min': 1e-06,
 'fold': 0,
 'gr_size': 168,
 'image_size': (137, 236),
 'init_lr': 0.001,
 'lr_step_epoch': 32,
 'mixup': False,
 'mixup_train': False,
 'n_splits': 5,
 'num_epoch': 128,
 'num_workers': 16,
 'seed': 71,
 'test_csv': '../input/sample_submission.csv',
 'test_images': '../input/train.parquet',
 'train_csv': '../input/train.csv',
 'train_images': '../input/train.parquet',
 'vd_size': 11,
 'weighted_sample': False}
done
classification learning start
--------------------
parameters 11784936
Optimizer: Adam
Scheduler: CosineLR, period=32
100%|██████████| 2510/2510 [12:00<00:00,  3.48it/s]
0	840.88	842.3	0.0010	0.0434	0.5688	0.0229	0.7746	
100%|██████████| 2510/2510 [11:57<00:00,  3.50it/s]
1	841.36	1683.6	0.0010	0.0231	0.7537	0.0208	0.7809	
100%|██████████| 2510/2510 [11:54<00:00,  3.51it/s]
2	838.37	2522.0	0.0010	0.0203	0.7844	0.0191

In [18]:
%debug

ERROR:root:No traceback has been produced, nothing to debug.
