In [None]:
import matplotlib.pyplot as plt
import sys
sys.path.append('../')
from panda_challenge import ClassifcationDatasetMultiCropOneImage
from panda_challenge.train_utils import QWKCallback, get_optimizer, get_scheduler
from panda_challenge import ClassifcationModel
from panda_challenge.models import AdaptiveConcatPool2d
from panda_challenge.utils import freeze, unfreeze

import pandas as pd

from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split

import albumentations as A
import numpy as np
from torch.utils.data import DataLoader
import torch.nn as nn
from torch.optim.lr_scheduler import OneCycleLR, CosineAnnealingLR
import torch
from catalyst.contrib.nn.schedulers.onecycle import OneCycleLRWithWarmup
from catalyst.contrib.nn.optimizers import RAdam, Lookahead
from warmup_scheduler import GradualWarmupScheduler
import collections

from sklearn.metrics import cohen_kappa_score

from catalyst.dl.runner import SupervisedRunner
from catalyst.dl.callbacks import CriterionCallback
from catalyst.core.callbacks import EarlyStoppingCallback
from catalyst.core.callbacks import MetricAggregationCallback
from catalyst.core.callbacks import CheckpointCallback

from pytorch_toolbelt.losses import BinaryFocalLoss

import os 

from collections import OrderedDict
from typing import List

from tqdm.auto import tqdm

%matplotlib inline

In [None]:
N_SPLITS = 5
N_TILES = 36
IMAGE_SIZE = 256
LEVEL = 1
BATCH_SIZE = 6
NUM_WORKERS = 64
N_EPOCHS = 100
N_FROZEN_ENCODER = 7
WARMUP_EPOCHS = 5
INIT_LR = 3e-4
WARMUP_FACTOR = 10
NUM_CLASSES = 5
IMAGE_FOLDER = '/data/personal_folders/skolchenko/panda/train_images/'
N_TILES_ROW = int(np.sqrt(N_TILES))
MODEL_NAME = 'resnet50'
LOG_DIR = f'/data/personal_folders/skolchenko/panda/logs/{MODEL_NAME}_size{IMAGE_SIZE}_tiles{N_TILES}_heavyHead_fold{}'
PATIENCE = 10

In [None]:
transforms_train_individual = A.Compose([
    A.OneOf(
    [
        A.Transpose(p=1.0),
        A.VerticalFlip(p=1.0),
        A.HorizontalFlip(p=1.0),
        A.RandomRotate90(p=1.0),
        A.NoOp()
    ], p=1.0),
    A.OneOf(
    [
        A.ElasticTransform(p=1.0),
        A.GridDistortion(p=1.0),
        A.OpticalDistortion(p=1.0),
        A.NoOp()
    ], p=1.0),
    A.OneOf(
    [
        A.GaussNoise(p=1.0),
        A.GaussianBlur(p=1.0),
        A.ISONoise(p=1.0),
        A.CoarseDropout(p=1.0, max_holes=16, max_height=16, max_width=16),
        A.NoOp()
    ], p=1.0)
])
transforms_train_global = A.Compose([
    A.Normalize(),
    A.OneOf(
    [
        A.Transpose(p=1.0),
        A.VerticalFlip(p=1.0),
        A.HorizontalFlip(p=1.0),
        A.RandomRotate90(p=1.0),
        A.NoOp()
    ], p=1.0),
    A.RandomGridShuffle(grid=(N_TILES_ROW, N_TILES_ROW))
])
transforms_train_global_tta = A.Compose([
    A.OneOf(
    [
        A.Transpose(p=1.0),
        A.VerticalFlip(p=1.0),
        A.HorizontalFlip(p=1.0),
        A.RandomRotate90(p=1.0),
        A.NoOp()
    ], p=1.0),
    A.RandomGridShuffle(grid=(N_TILES_ROW, N_TILES_ROW), p=1.0),
    A.Normalize()
])
transforms_valid_global = A.Compose([
    A.Normalize()
])

Split into validation and train

In [None]:
data = pd.read_csv('/data/personal_folders/skolchenko/panda/train_cleaned.csv')
data_train, data_holdout = train_test_split(data, test_size=0.15, random_state=42, shuffle=True, stratify=data.isup_grade)
data_train = data_train.reset_index(drop=True)
data_holdout = data_holdout.reset_index(drop=True)
data_train.loc[:, 'fold_idx'] = -1
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
for fold_idx, (train_index, test_index) in enumerate(skf.split(data_train, data_train.isup_grade.values, groups=data_train.isup_grade.values)):
    data_train.loc[test_index, 'fold_idx'] = fold_idx

In [None]:
def train_fold(fold_idx):
    criterion = nn.BCEWithLogitsLoss()
    #criterion = nn.MSELoss()
    model = ClassifcationModel(model_name=MODEL_NAME, num_classes=NUM_CLASSES, pretrained=True)
    model.head = nn.Sequential(
                AdaptiveConcatPool2d((1, 1)),
                nn.Flatten(),
                nn.ReLU(),
                nn.Dropout(0.5),
                nn.Linear(2*model.nc, model.nc//2),
                nn.ReLU(),
                nn.Dropout(0.15),                
                nn.Linear(model.nc//2, NUM_CLASSES))
    model.cuda()    
    
    fold_data_train = data_train.loc[data_train['fold_idx']!=fold_idx]
    fold_data_val = data_train.loc[data_train['fold_idx']==fold_idx]    
    
    train_dataset = ClassifcationDatasetMultiCropOneImage(
        fold_data_train, 
        IMAGE_SIZE,
        IMAGE_FOLDER,
        LEVEL,
        N_TILES,
        transform_individual=transforms_train_individual,
        transform_global=transforms_train_global,
        normalize=False,
        load_pickled_tiles=True,
        pickled_tiles_folder='/data/personal_folders/skolchenko/panda/pickled_tiled_images_{}_{}_{}'.format(LEVEL,
                                                                                                            N_TILES, 
                                                                                                            IMAGE_SIZE),
        output_type='ordinal'
        )
    val_dataset = ClassifcationDatasetMultiCropOneImage(
        fold_data_val, 
        IMAGE_SIZE,
        IMAGE_FOLDER,
        LEVEL,
        N_TILES,
        transform_global=transforms_valid_global,
        normalize=False,
        load_pickled_tiles=True,
        pickled_tiles_folder='/data/personal_folders/skolchenko/panda/pickled_tiled_images_{}_{}_{}'.format(LEVEL,
                                                                                                            N_TILES, 
                                                                                                            IMAGE_SIZE),
        output_type='ordinal'
        )
    holdout_dataset = ClassifcationDatasetMultiCropOneImage(
        data_holdout, 
        IMAGE_SIZE,
        IMAGE_FOLDER,
        LEVEL,
        N_TILES,
        transform_global=transforms_valid_global,
        normalize=False,
        pickled_tiles_folder='/data/personal_folders/skolchenko/panda/pickled_tiled_images_{}_{}_{}'.format(LEVEL,
                                                                                                            N_TILES, 
                                                                                                            IMAGE_SIZE),
        load_pickled_tiles=True,       
        output_type='ordinal'
        )    
    
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)  
    holdout_loader = DataLoader(holdout_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)  
    
    loaders = collections.OrderedDict()
    loaders["train"] = train_loader
    loaders["valid"] = val_loader
    loaders["holdout"] = holdout_loader
    losses = dict({
        'loss_isup': criterion,
    })
    runner = SupervisedRunner(
        input_key='features',
        input_target_key="targets_isup",
        output_key="logits_isup"
        )    
    
    callbacks = [
        CriterionCallback(
            input_key="targets_isup",
            output_key="logits_isup",
            prefix="loss_isup",
            criterion_key='loss_isup',
            multiplier=1.0
        ),
        QWKCallback(input_key="targets_isup", 
                    output_key='logits_isup',
                    qwk_name='ordinal'
                    #qwk_name='simple',
                   ),
        MetricAggregationCallback(
            prefix="loss",
            mode="weighted_sum",
            metrics={
                "loss_isup": 1.0
            }
        ),
        EarlyStoppingCallback(patience = PATIENCE, min_delta=1e-4),
        CheckpointCallback(save_n_best = 5)    
    ]    
    
    freeze(model)
    optimizer = RAdam(model.parameters(), lr=INIT_LR)
    optimizer = Lookahead(optimizer)
    runner.train(
        model=model,
        criterion=losses,
        optimizer=optimizer,
        callbacks=callbacks,
        loaders=loaders,
        logdir=LOG_DIR.format(fold_idx),
        main_metric='loss',
        num_epochs=N_FROZEN_ENCODER,
        verbose=True,
        minimize_metric=True,
        fp16=True
    )    
    model.load_state_dict(torch.load(LOG_DIR.format(fold_idx)+'/checkpoints/best.pth')['model_state_dict'])
    
    
    unfreeze(model)
    optimizer = RAdam(model.parameters(), lr=INIT_LR/WARMUP_FACTOR)
    optimizer = Lookahead(optimizer)
    scheduler_cosine = CosineAnnealingLR(
        optimizer,
        N_EPOCHS-WARMUP_EPOCHS)
    scheduler = GradualWarmupScheduler(
        optimizer,
        multiplier=WARMUP_FACTOR, 
        total_epoch=WARMUP_EPOCHS,
        after_scheduler=scheduler_cosine)    
    
    runner.train(
        model=model,
        criterion=losses,
        scheduler=scheduler,
        optimizer=optimizer,
        callbacks=callbacks,
        loaders=loaders,
        logdir=LOG_DIR.format(fold_idx),
        main_metric='loss',
        num_epochs=N_EPOCHS,
        verbose=True,
        minimize_metric=True,
        fp16=True
    )    

In [None]:
train_fold(0)

In [None]:
train_fold(1)

In [None]:
train_fold(2)

In [None]:
train_fold(3)

In [None]:
train_fold(4)

In [None]:
def average_weights(state_dicts: List[dict]):
    # source https://gist.github.com/qubvel/70c3d5e4cddcde731408f478e12ef87b
    everage_dict = OrderedDict()
    for k in state_dicts[0].keys():
        everage_dict[k] = sum([state_dict[k] for state_dict in state_dicts]) / len(state_dicts)
    return everage_dict

def evaluate_model(model, val_loader):
    # source https://gist.github.com/qubvel/70c3d5e4cddcde731408f478e12ef87b
    model.eval()
    predicted_class = []
    gt_class = []
    with torch.no_grad():
        for batch in tqdm(val_loader, total=len(val_loader)):
            predictions = model(batch['features'].cuda())
            predictions = nn.Sigmoid()(predictions)
            predicted_class.extend(predictions.sum(dim=1).cpu().round().numpy())
            gt_class.extend(batch['targets_isup'].sum(dim=1).cpu().numpy())
    gt_class = np.array(gt_class).astype(int)
    predicted_class = np.array(predicted_class).astype(int)
    return(cohen_kappa_score(predicted_class, gt_class, weights='quadratic'))

In [None]:
fold_epochs = [
    
]

In [None]:
#for fold_idx in range(5):
for fold_idx in range(1):
    print(f'====== Fold {fold_idx} =====')
    model = ClassifcationModel(model_name=MODEL_NAME, num_classes=NUM_CLASSES, pretrained=True)
    model.head = nn.Sequential(
                AdaptiveConcatPool2d((1, 1)),
                nn.Flatten(),
                nn.ReLU(),
                nn.Dropout(0.5),
                nn.Linear(2*model.nc, model.nc//2),
                nn.ReLU(),
                nn.Dropout(0.15),                
                nn.Linear(model.nc//2, NUM_CLASSES))
    model.cuda()    
    
    fold_data_train = data_train.loc[data_train['fold_idx']!=fold_idx]
    fold_data_val = data_train.loc[data_train['fold_idx']==fold_idx]    
    
    val_dataset = ClassifcationDatasetMultiCropOneImage(
        fold_data_val, 
        IMAGE_SIZE,
        IMAGE_FOLDER,
        LEVEL,
        N_TILES,
        transform_global=transforms_valid_global,
        normalize=False,
        #load_pickled_tiles=True,
        #pickled_tiles_folder='/data/personal_folders/skolchenko/panda/pickled_tiled_images_{}_{}_{}'.format(LEVEL,
        #                                                                                                    N_TILES, 
        #                                                                                                    IMAGE_SIZE)
        )    
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)  
    
    weights_path = [os.path.join(LOG_DIR.format(fold_idx), f'checkpoints/train.{epoch}.pth') for epoch in fold_epochs[fold_idx]]
    all_weights = [torch.load(path)['model_state_dict'] for path in weights_path]

    best_score = 0
    best_weights = []

    for w in all_weights:
        current_weights = best_weights + [w]
        average_dict = average_weights(current_weights)
        model.load_state_dict(average_dict)
        score = evaluate_model(model, val_loader)
        print(f'Score: {score}')
        if score > best_score:
            print(f'New best score {score}')
            best_score = score
            best_weights.append(w)    
            
    best_dict = average_weights(best_weights)
    model.load_state_dict(best_dict)
    torch.save(model.state_dict(), os.path.join(LOG_DIR.format(fold_idx), f'checkpoints/averaged_best.pth'))