## APTOS2019 train kernel (PyTorch)

### Flags for training

In [1]:
FOLD = 4
IMG_SIZE = 320
MODEL = 'efficientnet-b5'
APTOS2015_pretrained_path = '../input/aptosweights2/efficientnet-b5_fold'+str(FOLD)+'_v6.pth'

### train params

In [2]:
train_params = {
    'n_splits': 5,
    'n_epochs': 10,
    'lr': 1e-3,
    'base_lr': 1e-4,
    'max_lr': 3e-3,
    'step_factor': 5,
    'train_batch_size': 32,
    'test_batch_size': 32,
    'accumulation_steps': 10,
}

### packages

In [3]:
#! pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ../input/nvidia-apex/repository/NVIDIA-apex-665b2dd

In [4]:
! pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" git+https://github.com/NVIDIA/apex

  cmdoptions.check_install_build_global(options)
Created temporary directory: /tmp/pip-ephem-wheel-cache-a5ofq3ro
Created temporary directory: /tmp/pip-req-tracker-c6ewaq93
Created requirements tracker '/tmp/pip-req-tracker-c6ewaq93'
Created temporary directory: /tmp/pip-install-5yq8zi5o
Collecting git+https://github.com/NVIDIA/apex
  Created temporary directory: /tmp/pip-req-build-_5e0zopk
  Cloning https://github.com/NVIDIA/apex to /tmp/pip-req-build-_5e0zopk
  Running command git clone -q https://github.com/NVIDIA/apex /tmp/pip-req-build-_5e0zopk
  Added git+https://github.com/NVIDIA/apex to build tracker '/tmp/pip-req-tracker-c6ewaq93'
    Running setup.py (path:/tmp/pip-req-build-_5e0zopk/setup.py) egg_info for package from git+https://github.com/NVIDIA/apex
    Running command python setup.py egg_info
    torch.__version__  =  1.2.0
    running egg_info
    creating pip-egg-info/apex.egg-info
    writing pip-egg-info/apex.egg-info/PKG-INFO
    writing dependency_l

In [5]:
import sys
sys.path.append('../input/efficientnet-pytorch-repository/repository/lukemelas-EfficientNet-PyTorch-e5c8726')

In [6]:
import gc
import os
import random
import time
from contextlib import contextmanager
from pathlib import Path

import cv2
import numpy as np
import pandas as pd
import scipy as sp
from fastprogress import master_bar, progress_bar
from functools import partial
from sklearn.metrics import cohen_kappa_score

import torch
import torch.nn as nn
from torch.optim import Adam, SGD
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import DataLoader, Dataset

from efficientnet_pytorch import EfficientNet

from albumentations import (
    Compose, HorizontalFlip, IAAAdditiveGaussianNoise, Normalize, OneOf,
    RandomBrightness, RandomContrast, Resize, VerticalFlip, Rotate, ShiftScaleRotate,
    RandomBrightnessContrast, OpticalDistortion, GridDistortion, ElasticTransform, Cutout
)
from albumentations.pytorch import ToTensor

from apex import amp

from fastai.layers import Flatten, AdaptiveConcatPool2d

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

### utils

In [8]:
@contextmanager
def timer(name):
    t0 = time.time()
    LOGGER.info(f'[{name}] start')
    yield
    LOGGER.info(f'[{name}] done in {time.time() - t0:.0f} s.')

In [9]:
def init_logger(log_file='train.log'):
    from logging import getLogger, DEBUG, FileHandler,  Formatter,  StreamHandler
    
    log_format = '%(asctime)s %(levelname)s %(message)s'
    
    stream_handler = StreamHandler()
    stream_handler.setLevel(DEBUG)
    stream_handler.setFormatter(Formatter(log_format))
    
    file_handler = FileHandler(log_file)
    file_handler.setFormatter(Formatter(log_format))
    
    logger = getLogger('APTOS')
    logger.setLevel(DEBUG)
    logger.addHandler(stream_handler)
    logger.addHandler(file_handler)
    
    return logger

LOG_FILE = 'aptos-train.log'
LOGGER = init_logger(LOG_FILE)

In [10]:
def seed_torch(seed=777):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

SEED = 777
seed_torch(SEED)

In [11]:
def quadratic_weighted_kappa(y_hat, y):
    return cohen_kappa_score(y_hat, y, weights='quadratic')

In [12]:
class OptimizedRounder():
    def __init__(self):
        self.coef_ = 0

    def _kappa_loss(self, coef, X, y):
        X_p = np.copy(X)
        for i, pred in enumerate(X_p):
            if pred < coef[0]:
                X_p[i] = 0
            elif pred >= coef[0] and pred < coef[1]:
                X_p[i] = 1
            elif pred >= coef[1] and pred < coef[2]:
                X_p[i] = 2
            elif pred >= coef[2] and pred < coef[3]:
                X_p[i] = 3
            else:
                X_p[i] = 4

        ll = quadratic_weighted_kappa(y, X_p)
        return -ll

    def fit(self, X, y):
        loss_partial = partial(self._kappa_loss, X=X, y=y)
        initial_coef = [0.5, 1.5, 2.5, 3.5]
        self.coef_ = sp.optimize.minimize(loss_partial, initial_coef, method='nelder-mead')

    def predict(self, X, coef):
        X_p = np.copy(X)
        for i, pred in enumerate(X_p):
            if pred < coef[0]:
                X_p[i] = 0
            elif pred >= coef[0] and pred < coef[1]:
                X_p[i] = 1
            elif pred >= coef[1] and pred < coef[2]:
                X_p[i] = 2
            elif pred >= coef[2] and pred < coef[3]:
                X_p[i] = 3
            else:
                X_p[i] = 4
        return X_p

    def coefficients(self):
        return self.coef_['x']

In [13]:
# NOTE: official CyclicLR implementation doesn't work now

from torch.optim.lr_scheduler import _LRScheduler

class CyclicLR(_LRScheduler):
    def __init__(self, optimizer, base_lr, max_lr, step_size, gamma=0.99, mode='triangular', last_epoch=-1):
        self.optimizer = optimizer
        self.base_lr = base_lr
        self.max_lr = max_lr
        self.step_size = step_size
        self.gamma = gamma
        self.mode = mode
        assert mode in ['triangular', 'triangular2', 'exp_range']
        super().__init__(optimizer, last_epoch)

    def get_lr(self):
        new_lr = []
        # make sure that the length of base_lrs doesn't change. Dont care about the actual value
        for base_lr in self.base_lrs:
            cycle = np.floor(1 + self.last_epoch / (2 * self.step_size))
            x = np.abs(float(self.last_epoch) / self.step_size - 2 * cycle + 1)
            if self.mode == 'triangular':
                lr = self.base_lr + (self.max_lr - self.base_lr) * np.maximum(0, (1 - x))
            elif self.mode == 'triangular2':
                lr = self.base_lr + (self.max_lr - self.base_lr) * np.maximum(0, (1 - x)) / float(2 ** (cycle - 1))
            elif self.mode == 'exp_range':
                lr = self.base_lr + (self.max_lr - self.base_lr) * np.maximum(0, (1 - x)) * (self.gamma ** (self.last_epoch))
            new_lr.append(lr)
        return new_lr

### dataset

In [14]:
APTOS_DIR = Path('../input/aptos2019-blindness-detection')
APTOS_TRAIN_DIR = Path('../input/aptos-train-dataset')

APTOS_TRAIN_IMAGES = APTOS_TRAIN_DIR / 'aptos-train-images/aptos-train-images'

#APTOS_FOLDS = Path('../input/aptos-folds/folds.csv')
#APTOS_FOLDS = Path('../input/aptos-folds/jpeg_folds.csv')
#APTOS_FOLDS = Path('../input/aptos-folds/png_folds.csv')
APTOS_2015_FOLDS = Path('../input/aptos-folds/2015_5folds.csv')
APTOS_2019_FOLDS = Path('../input/aptos-folds/2019_5folds.csv')
PSEUDO_LABEL = Path('../input/aptos-folds/0842_pseudo_label.csv')

ID_COLUMN = 'id_code'
TARGET_COLUMN = 'diagnosis'

In [15]:
PRETRAINED_DIR = Path('../input/pytorch-pretrained-models')
EFFICIENTNET_PRETRAINED_DIR = Path('../input/efficientnet-pytorch')

PRETRAINED_MAPPING = {
    # ResNet
    'resnet18': PRETRAINED_DIR / 'resnet18-5c106cde.pth', 
    'resnet34': PRETRAINED_DIR / 'resnet34-333f7ec4.pth',
    'resnet50': PRETRAINED_DIR / 'resnet50-19c8e357.pth',
    'resnet101': PRETRAINED_DIR / 'resnet101-5d3b4d8f.pth',
    'resnet152': PRETRAINED_DIR / 'resnet152-b121ed2d.pth',

    # ResNeXt
    'resnext101_32x4d': PRETRAINED_DIR / 'resnext101_32x4d-29e315fa.pth',
    'resnext101_64x4d': PRETRAINED_DIR / 'resnext101_64x4d-e77a0586.pth',

    # WideResNet
    #'wideresnet50'

    # DenseNet
    'densenet121': PRETRAINED_DIR / 'densenet121-fbdb23505.pth',
    'densenet169': PRETRAINED_DIR / 'densenet169-f470b90a4.pth',
    'densenet201': PRETRAINED_DIR / 'densenet201-5750cbb1e.pth',
    'densenet161': PRETRAINED_DIR / 'densenet161-347e6b360.pth',

    # SE-ResNet
    'se_resnet50': PRETRAINED_DIR / 'se_resnet50-ce0d4300.pth',
    'se_resnet101': PRETRAINED_DIR / 'se_resnet101-7e38fcc6.pth',
    'se_resnet152': PRETRAINED_DIR / 'se_resnet152-d17c99b7.pth',

    # SE-ResNeXt
    'se_resnext50_32x4d': PRETRAINED_DIR / 'se_resnext50_32x4d-a260b3a4.pth',
    'se_resnext101_32x4d': PRETRAINED_DIR / 'se_resnext101_32x4d-3b2fe3d8.pth',

    # SE-Net
    'senet154': PRETRAINED_DIR / 'senet154-c7b49a05.pth',

    # InceptionV3
    'inceptionv3': PRETRAINED_DIR / 'inception_v3_google-1a9a5a14.pth',

    # InceptionV4
    'inceptionv4': PRETRAINED_DIR / 'inceptionv4-8e4777a0.pth',

    # BNInception
    'bninception': PRETRAINED_DIR / 'bn_inception-52deb4733.pth',

    # InceptionResNetV2
    'inceptionresnetv2': PRETRAINED_DIR / 'inceptionresnetv2-520b38e4.pth',

    # Xception
    'xception': PRETRAINED_DIR / 'xception-43020ad28.pth',

    # DualPathNet
    'dpn68': PRETRAINED_DIR / 'dpn68-4af7d88d2.pth',
    'dpn98': PRETRAINED_DIR / 'dpn98-722954780.pth',
    'dpn131': PRETRAINED_DIR / 'dpn131-7af84be88.pth',
    'dpn68b': PRETRAINED_DIR / 'dpn68b_extra-363ab9c19.pth',
    'dpn92': PRETRAINED_DIR / 'dpn92_extra-fda993c95.pth',
    'dpn107': PRETRAINED_DIR / 'dpn107_extra-b7f9f4cc9.pth',

    # PolyNet
    'polynet': PRETRAINED_DIR / 'polynet-f71d82a5.pth',

    # NasNet-A-Large
    'nasnetalarge': PRETRAINED_DIR / 'nasnetalarge-a1897284.pth',

    # PNasNet-5-Large
    'pnasnet5large': PRETRAINED_DIR / 'pnasnet5large-bf079911.pth',

    # EfficientNet
    'efficientnet-b0': EFFICIENTNET_PRETRAINED_DIR / 'efficientnet-b0-08094119.pth',
    'efficientnet-b1': EFFICIENTNET_PRETRAINED_DIR / 'efficientnet-b1-dbc7070a.pth',
    'efficientnet-b2': EFFICIENTNET_PRETRAINED_DIR / 'efficientnet-b2-27687264.pth',
    'efficientnet-b3': EFFICIENTNET_PRETRAINED_DIR / 'efficientnet-b3-c8376fa2.pth',
    'efficientnet-b4': EFFICIENTNET_PRETRAINED_DIR / 'efficientnet-b4-e116e8b3.pth',
    'efficientnet-b5': EFFICIENTNET_PRETRAINED_DIR / 'efficientnet-b5-586e6cc6.pth',
}

In [16]:
class APTOSTrainDataset(Dataset):
    def __init__(self, image_dir, file_paths, labels, transform=None):
        self.image_dir = image_dir
        self.file_paths = file_paths
        self.labels = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        #file_path = f'{self.image_dir}/{self.file_paths[idx]}'
        file_path = f'{self.file_paths[idx]}'
        label = torch.tensor(self.labels[idx]).float()
        
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        
        return image, label

### transforms

In [17]:
def get_transforms(*, data):
    assert data in ('train', 'valid')
    
    if data == 'train':
        return Compose([
            Resize(IMG_SIZE, IMG_SIZE),
            HorizontalFlip(p=0.5),
            VerticalFlip(p=0.5),
            Rotate(p=0.5), 
            #ShiftScaleRotate(p=0.5),
            #RandomBrightnessContrast(brightness_limit=0.5, contrast_limit=0.5, p=0.5),
            #OpticalDistortion(distort_limit=(0.9,1.0), shift_limit=0.05, interpolation=1, border_mode=4, 
            #                  value=None, always_apply=False, p=0.5),
            #GridDistortion(num_steps=5, distort_limit=0.3, interpolation=1, border_mode=4,
            #               value=None, always_apply=False, p=0.5),
            #ElasticTransform(alpha=1, sigma=50, alpha_affine=50, interpolation=1, border_mode=4,
            #                 value=None, always_apply=True, approximate=False, p=0.5),
            #Cutout(p=0.25, max_h_size=25, max_w_size=25, num_holes=8),
            #OneOf([
            #    RandomBrightness(0.1, p=1),
            #    RandomContrast(0.1, p=1),
            #], p=0.25),
            RandomContrast(0.5, p=0.5),
            IAAAdditiveGaussianNoise(p=0.25),
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensor(),
        ])
    
    elif data == 'valid':
        return Compose([
            Resize(IMG_SIZE, IMG_SIZE),
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensor(),
        ])

### model

In [18]:
class ClassifierModule(nn.Sequential):
    def __init__(self, n_features):
        super().__init__(
            nn.BatchNorm1d(n_features),
            nn.Dropout(0.5),
            nn.Linear(n_features, n_features),
            nn.PReLU(),
            nn.BatchNorm1d(n_features),
            nn.Dropout(0.2),
            nn.Linear(n_features, 1),
        )

In [19]:
class CustomResNet(nn.Module):
    def __init__(self, model_name='resnet50', weights_path=None):
        assert model_name in ('resnet50', 'resnet101', 'resnet152')
        super().__init__()
        
        self.net = pretrainedmodels.__dict__[model_name](pretrained=None)
        self.net.load_state_dict(torch.load(weights_path))
        
        n_features = self.net.last_linear.in_features
        
        self.net.avgpool = nn.AdaptiveAvgPool2d(1)
        # self.net.avgpool = AdaptiveConcatPool2d(1)
        self.net.last_linear = ClassifierModule(n_features)
        
    def forward(self, x):
        return self.net(x)

In [20]:
class CustomResNeXt(nn.Module):
    def __init__(self, model_name='resnext101_32x4d', weights_path=None):
        assert model_name in ('resnext101_32x4d', 'resnext101_64x4d')
        super().__init__()
        
        self.net = pretrainedmodels.__dict__[model_name](pretrained=None)
        self.net.load_state_dict(torch.load(weights_path))
        
        n_features = self.net.last_linear.in_features
        
        self.net.avg_pool = nn.AdaptiveAvgPool2d(1)
        # self.net.avg_pool = AdaptiveConcatPool2d(1)
        self.net.last_linear = ClassifierModule(n_features)
        
    def forward(self, x):
        return self.net(x)

In [21]:
class CustomSENet(nn.Module):
    def __init__(self, model_name='se_resnet50', weights_path=None):
        assert model_name in ('senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152', 'se_resnext50_32x4d', 'se_resnext101_32x4d')
        super().__init__()
        
        self.net = pretrainedmodels.__dict__[model_name](pretrained=None)
        self.net.load_state_dict(torch.load(weights_path))
        
        n_features = self.net.last_linear.in_features
        
        self.net.avg_pool = nn.AdaptiveAvgPool2d(1)
        # self.net.avg_pool = AdaptiveConcatPool2d(1)
        self.net.last_linear = ClassifierModule(n_features)
        
    def forward(self, x):
        return self.net(x)

In [22]:
class CustomEfficientNet(nn.Module):
    def __init__(self, model_name='efficientnet-b0', weights_path=None):
        assert model_name in ('efficientnet-b0', 'efficientnet-b1', 'efficientnet-b2', 'efficientnet-b3', 'efficientnet-b4', 'efficientnet-b5')
        super().__init__()
        
        self.net = EfficientNet.from_name(model_name)
        self.net.load_state_dict(torch.load(weights_path))
        
        n_features = self.net._fc.in_features
        
        self.net._fc = ClassifierModule(n_features)
        
    def forward(self, x):
        return self.net(x)

### entry point

In [23]:
LOGGER.debug(f'Fold: {FOLD}')
LOGGER.debug(f'Model: {MODEL}')
LOGGER.debug(f'Train params: {train_params}')

2019-08-31 03:20:48,664 DEBUG Fold: 4
2019-08-31 03:20:48,666 DEBUG Model: efficientnet-b5
2019-08-31 03:20:48,667 DEBUG Train params: {'n_splits': 5, 'n_epochs': 10, 'lr': 0.001, 'base_lr': 0.0001, 'max_lr': 0.003, 'step_factor': 5, 'train_batch_size': 32, 'test_batch_size': 32, 'accumulation_steps': 10}


In [24]:
with timer('Prepare train and valid sets'):
    with timer('  * load folds csv'):
        folds2019 = pd.read_csv(APTOS_2019_FOLDS)
        folds2019['id_code'] = folds2019['id_code'].apply(lambda x: \
                                                  '../input/aptos-train-dataset/aptos-train-images/aptos-train-images/'+x)
        train_fold2019 = folds2019[folds2019['fold'] != FOLD].reset_index(drop=True)
        valid_fold = folds2019[folds2019['fold'] == FOLD].reset_index(drop=True)
        folds2015 = pd.read_csv(APTOS_2015_FOLDS)
        folds2015['id_code'] = folds2015['id_code'].apply(lambda x: \
                                                  '../input/aptos-train-dataset/aptos-train-images/aptos-train-images/'+x)
        train_fold2015 = folds2015[folds2015['fold'] == 0].reset_index(drop=True)
        train_pseudo = pd.read_csv(PSEUDO_LABEL)
        train_pseudo['fold'] = FOLD
        train_pseudo['id_code'] = train_pseudo['id_code'].apply(lambda x: \
                              '../input/aptos2019-blindness-detection/test_images/'+x+'.png')
        #if FOLD==4:
            #train_fold = pd.concat([train_fold2019, train_fold2015]).reset_index(drop=True)[:-1]
        #else:
            #train_fold = pd.concat([train_fold2019, train_fold2015]).reset_index(drop=True)
        train_fold = pd.concat([train_fold2019, train_fold2015]).reset_index(drop=True)
        train_fold = pd.concat([train_fold, train_pseudo]).reset_index(drop=True)
    
    with timer('  * define dataset'):
        APTOSTrainDataset = partial(APTOSTrainDataset, image_dir=APTOS_TRAIN_IMAGES)
        train_dataset = APTOSTrainDataset(file_paths=train_fold.id_code.values,
                                          labels=train_fold.diagnosis.values[:, np.newaxis],
                                          transform=get_transforms(data='train'))
        valid_dataset = APTOSTrainDataset(file_paths=valid_fold.id_code.values,
                                          labels=valid_fold.diagnosis.values[:, np.newaxis],
                                          transform=get_transforms(data='valid'))
        
    with timer('  * define dataloader'):
        train_loader = DataLoader(train_dataset,
                                  batch_size=train_params['train_batch_size'],
                                  shuffle=True)
        valid_loader = DataLoader(valid_dataset,
                                  batch_size=train_params['test_batch_size'],
                                  shuffle=False)
        
LOGGER.debug(f'train size: {len(train_dataset)}, valid size: {len(valid_dataset)}')

2019-08-31 03:20:48,685 INFO [Prepare train and valid sets] start
2019-08-31 03:20:48,686 INFO [  * load folds csv] start
2019-08-31 03:20:48,753 INFO [  * load folds csv] done in 0 s.
2019-08-31 03:20:48,754 INFO [  * define dataset] start
2019-08-31 03:20:48,756 INFO [  * define dataset] done in 0 s.
2019-08-31 03:20:48,756 INFO [  * define dataloader] start
2019-08-31 03:20:48,757 INFO [  * define dataloader] done in 0 s.
2019-08-31 03:20:48,758 INFO [Prepare train and valid sets] done in 0 s.
2019-08-31 03:20:48,758 DEBUG train size: 11785, valid size: 704


In [25]:
with timer('Train model'):
    n_epochs = train_params['n_epochs']
    lr = train_params['lr']
    base_lr = train_params['base_lr']
    max_lr = train_params['max_lr']
    step_factor = train_params['step_factor']
    test_batch_size = train_params['test_batch_size']
    accumulation_steps = train_params['accumulation_steps']
    
    model = CustomEfficientNet(model_name=MODEL, weights_path=PRETRAINED_MAPPING[MODEL])
    model.load_state_dict(torch.load(APTOS2015_pretrained_path))
    model.to(device)
    
    optimizer = Adam(model.parameters(), lr=lr, amsgrad=False)
    #optimizer = SGD(model.parameters(), lr=lr, weight_decay=4e-5, momentum=0.9, nesterov=True)
    scheduler = CyclicLR(optimizer,
                         base_lr=base_lr,
                         max_lr=max_lr,
                         step_size=len(train_loader) * step_factor)

    model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
    
    criterion = nn.MSELoss()
    #criterion = nn.SmoothL1Loss()
    
    optimized_rounder = OptimizedRounder()
    y_true = valid_fold.diagnosis.values
    
    for epoch in range(n_epochs):
        start_time = time.time()

        model.train()
        avg_loss = 0.

        optimizer.zero_grad()

        for i, (images, labels) in enumerate(train_loader):
            if isinstance(scheduler, CyclicLR):
                scheduler.step()

            y_preds = model(images.to(device))
            loss = criterion(y_preds, labels.to(device))

            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()

            if (i+1) % accumulation_steps == 0:
                optimizer.step()
                optimizer.zero_grad()

            avg_loss += loss.item() / accumulation_steps / len(train_loader)

        if not isinstance(scheduler, CyclicLR):
            scheduler.step()

        model.eval()
        valid_preds = np.zeros((len(valid_dataset)))
        avg_val_loss = 0.

        for i, (images, labels) in enumerate(valid_loader):
            with torch.no_grad():
                y_preds = model(images.to(device)).detach()

            loss = criterion(y_preds, labels.to(device))
            valid_preds[i * test_batch_size: (i+1) * test_batch_size] = y_preds[:, 0].to('cpu').numpy()

            avg_val_loss += loss.item() / len(valid_loader)

        optimized_rounder.fit(valid_preds, y_true)
        coefficients = optimized_rounder.coefficients()
        final_preds = optimized_rounder.predict(valid_preds, coefficients)
        qwk = quadratic_weighted_kappa(y_true, final_preds)

        elapsed = time.time() - start_time

        LOGGER.debug(f'  Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.debug(f'          - qwk: {qwk:.6f}  coefficients: {coefficients}')

        # FIXME: save all epochs for debug
        torch.save(model.state_dict(), f'{MODEL}_fold{FOLD}_epoch{epoch+1}.pth')

2019-08-31 03:20:48,782 INFO [Train model] start
2019-08-31 03:33:27,906 DEBUG   Epoch 1 - avg_train_loss: 0.0260  avg_val_loss: 0.2574  time: 754s
2019-08-31 03:33:27,909 DEBUG           - qwk: 0.920715  coefficients: [0.54362  1.568374 2.368077 3.12557 ]
2019-08-31 03:45:24,492 DEBUG   Epoch 2 - avg_train_loss: 0.0263  avg_val_loss: 0.2699  time: 716s
2019-08-31 03:45:24,493 DEBUG           - qwk: 0.921703  coefficients: [0.50625 1.5     2.375   3.71875]
2019-08-31 03:57:24,023 DEBUG   Epoch 3 - avg_train_loss: 0.0284  avg_val_loss: 0.2676  time: 719s
2019-08-31 03:57:24,024 DEBUG           - qwk: 0.919194  coefficients: [0.57144  1.372908 2.205801 3.607848]
2019-08-31 04:09:24,277 DEBUG   Epoch 4 - avg_train_loss: 0.0305  avg_val_loss: 0.2785  time: 720s
2019-08-31 04:09:24,278 DEBUG           - qwk: 0.921806  coefficients: [0.537131 1.647616 2.662807 3.35864 ]
2019-08-31 04:21:25,944 DEBUG   Epoch 5 - avg_train_loss: 0.0353  avg_val_loss: 0.2740  time: 721s
2019-08-31 04:21:25,945 