In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import torch
from torch.utils.data import DataLoader, Subset

import timm
import albumentations as A
import argus
from argus.callbacks import MonitorCheckpoint, LoggingToFile, CosineAnnealingLR

from src.commons import config, get_best_model_path, seed_everything
from src.dataset import get_train_data, get_train_folds, transforms_soft, RANZCRDataset, filter_train_annotated_folds
from src.metrics import MultiAUC
from src.models import RANZCRStageZero, RANZCRStageOne

In [None]:
seed_everything(config.seed)

train = get_train_data()
folds = get_train_folds(train)
folds_annotated = filter_train_annotated_folds(train, folds)

## stage 0

In [None]:
dataset = RANZCRDataset(train, 'annotated', transform=A.Compose(transforms_soft))

plt.imshow(dataset[2][0].transpose(1, 2, 0))

In [None]:
params = {
    'nn_module': {
        'model_name': config.model_name,
        'pretrained': True,
        'num_classes': config.num_classes,
        'in_chans': 1,
    },
    'optimizer': {
        'lr': config.lr,
    },
    'device': config.device,
}

In [None]:
for i, (train_index, test_index) in enumerate(folds_annotated):
    model = RANZCRStageZero(params)
    model.set_device(config.devices)

    train_dataset, val_dataset = Subset(dataset, train_index), Subset(dataset, test_index)

    train_loader = DataLoader(train_dataset, batch_size=config.train_batch_size, num_workers=config.n_workers, drop_last=True)
    val_loader = DataLoader(val_dataset, batch_size=config.valid_batch_size, num_workers=config.n_workers)

    callbacks = [
        MonitorCheckpoint(dir_path=f'{config.experiment_name}_stage0_fold_{i}', monitor='val_multi_auc', max_saves=3),
        CosineAnnealingLR(T_max=5, eta_min=config.min_lr),
        LoggingToFile(f'{config.experiment_name}_stage0_fold_{i}.log'),
    ]

    model.fit(
        train_loader,
        val_loader=val_loader,
        num_epochs=5,
        metrics=['loss', 'multi_auc'],
        callbacks=callbacks,
        metrics_on_train=False
    )

## stage 1

In [None]:
dataset = RANZCRDataset(train, 'both', transform=A.Compose(transforms_soft, additional_targets={'orig': 'image'}))

(orig, img), label = dataset[2]
plt.subplot(1, 2, 1)
plt.imshow(orig.transpose(1, 2, 0))
plt.subplot(1, 2, 2)
plt.imshow(img.transpose(1, 2, 0))
plt.title(f'label: {label}')
plt.show() 

In [None]:
params = {
    'nn_module': {
        'model_name': config.model_name,
        'pretrained': True,
        'num_classes': config.num_classes,
        'in_chans': 1,
        'drop_rate': 0.3,
        'drop_path_rate': 0.2,
    },
    'optimizer': {
        'lr': config.lr,
    },
    'device': config.device,
}

In [None]:
for i, (train_index, test_index) in enumerate(folds_annotated):
    params['nn_module']['teacher_model_path'] = get_best_model_path(f'{config.experiment_name}_stage0_fold_{i}/')
    model = RANZCRStageOne(params)
    model.set_device(config.devices)

    train_dataset, val_dataset = Subset(dataset, train_index), Subset(dataset, test_index)

    train_loader = DataLoader(train_dataset, batch_size=config.train_batch_size, num_workers=config.n_workers, shuffle=True, drop_last=True)
    val_loader = DataLoader(val_dataset, batch_size=config.valid_batch_size, num_workers=config.n_workers)

    callbacks = [
        MonitorCheckpoint(dir_path=f'{config.experiment_name}_stage1_fold_{i}', monitor='val_multi_auc', max_saves=3),
        CosineAnnealingLR(T_max=10, eta_min=config.min_lr),
        LoggingToFile(f'{config.experiment_name}_stage1_fold_{i}.log'),
    ]

    model.fit(
        train_loader,
        val_loader=val_loader,
        num_epochs=10,
        metrics=['loss', 'multi_auc'],
        callbacks=callbacks,
        metrics_on_train=False
    )

## stage 2

In [None]:
dataset = RANZCRDataset(train, 'orig', transform=A.Compose(transforms_soft))

In [None]:
params = {
    'nn_module': {
        'model_name': config.model_name,
        'pretrained': False,
        'num_classes': config.num_classes,
        'in_chans': 1,
        'drop_rate': 0.3,
        'drop_path_rate': 0.2,
    },
    'optimizer': {
        'lr': config.lr / 2,
    },
    'device': config.device
}

In [None]:
orig, label = dataset[0]
plt.subplot(1, 2, 1)
plt.imshow(orig.transpose(1, 2, 0))
plt.title(f'label: {label}')

orig, label = dataset[2]
plt.subplot(1, 2, 2)
plt.imshow(orig.transpose(1, 2, 0))
plt.title(f'label: {label}')
plt.show() 

In [None]:
for i, (train_index, test_index) in enumerate(folds):
    model = RANZCRStageZero(params)
    model.load_from_stage_one(get_best_model_path(f'{config.experiment_name}_stage1_fold_{i}/'))
    model.set_device(config.devices)

    train_dataset, val_dataset = Subset(dataset, train_index), Subset(dataset, test_index)

    train_loader = DataLoader(train_dataset, batch_size=config.train_batch_size, num_workers=config.n_workers, shuffle=True, drop_last=True)
    val_loader = DataLoader(val_dataset, batch_size=config.valid_batch_size, num_workers=config.n_workers)

    callbacks = [
        MonitorCheckpoint(dir_path=f'{config.experiment_name}_stage2_fold_{i}', monitor='val_multi_auc', max_saves=3),
        CosineAnnealingLR(T_max=10, eta_min=config.min_lr),
        LoggingToFile(f'{config.experiment_name}_stage2_fold_{i}.log'),
    ]

    model.fit(
        train_loader,
        val_loader=val_loader,
        num_epochs=10,
        metrics=['loss', 'multi_auc'],
        callbacks=callbacks,
        metrics_on_train=False
    )