In [None]:
import os
import sys
import time
import math
import numpy as np
import pandas as pd
from shutil import copyfile
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

import cv2
from PIL import Image
import albumentations as A
from albumentations.pytorch import ToTensor

from sklearn.metrics import roc_auc_score
from sklearn.model_selection import KFold, StratifiedKFold

from apex import amp
import pretrainedmodels
from torchcontrib.optim import SWA

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.swa_utils import AveragedModel, SWALR
from tqdm import tqdm
from torch.optim import Adam, SGD, AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau, CyclicLR
from torch.utils.data.sampler import SubsetRandomSampler, RandomSampler, SequentialSampler
from torch.utils.data import DataLoader, Dataset

In [None]:
root = './input/'

df = pd.read_csv('./input/train.csv')
target_cols = df.iloc[:, 1:12].columns.tolist()
df.head()

In [None]:
transforms_train = A.Compose([                       
    A.Resize(height=256, width=256, p=1.0),
    A.ShiftScaleRotate(p=0.5),
    A.Flip(),
    A.RandomBrightnessContrast()
])

transforms_valid = A.Compose([
    A.Resize(height=256, width=256, p=1.0),
])

In [None]:
class config:
    TRAIN_IMG_PATH = './input/train/'
    KERNEL_TYPE = 'resnet34'
    NUM_EPOCHS = 10
    LR = 3e-4
    NUM_WORKERS = 4
    TRAIN_BS = 64
    VALID_BS = 64
    N_FOLDS = 3
    SEED = 42

In [None]:
class RANZCRDataset(Dataset):
    def __init__(self, df, labels, transform=None):
        self.df = df
        self.labels = labels
        self.transform = transform
  
    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        fname = self.df['StudyInstanceUID'].values[index]
        fpath = f'{config.TRAIN_IMG_PATH}{fname}.jpg'

        image = cv2.imread(fpath, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform:
            image = self.transform(image=image)
            image = image['image']

        label = self.labels.values[index]

        image = image.astype(np.float32)
        image /= 255.0
        image = image.transpose(2, 0, 1)

        return torch.tensor(image), torch.tensor(label)

In [None]:
train_image = RANZCRDataset(df[:1000].reset_index(drop=True), df[:1000].reset_index(drop=True)[target_cols], transform=transforms_train)

import matplotlib.pyplot as plt
from pylab import rcParams
rcParams['figure.figsize'] = 20, 10
for i in range(2):
    f, axarr = plt.subplots(1, 5)
    for p in range(5):
        idx = np.random.randint(0, len(train_image))
        img, label = train_image[idx]
        axarr[p].imshow(img.transpose(0, 1).transpose(1, 2))
        axarr[p].set_title(label)

In [None]:
model_name = 'resnet34' # could be fbresnet152 or inceptionresnetv2
model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet')

In [None]:
class resnet34(nn.Module):

    def __init__(self, model=model):
        super(resnet34, self).__init__()

        model = model
        model = list(model.children())
        model = nn.Sequential(*model[:-2])

        self.base_model = model
        self.adaptivepooling = nn.AdaptiveAvgPool2d(1)
        self.flatten = nn.Flatten()

        self.fc1 = nn.Linear(in_features=512, out_features=11)

    def forward(self, x):

        x = self.base_model(x)
        x = self.adaptivepooling(x)
        x = self.flatten(x)

        x1 = self.fc1(x)

        return x1

In [None]:
def train_loop_fn(model, loader, optimizer, loss_func, device):
    model.train()

    TRAIN_LOSS = []

    bar = tqdm(enumerate(loader), total=len(loader))

    for step, (data, target) in bar:
        data = data.to(device, dtype=torch.float)
        target = target.to(device, dtype=torch.float)

        optimizer.zero_grad()
        outputs = model(data)
        probs = torch.sigmoid(outputs)
        loss = loss_func(outputs, target)

        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()

        TRAIN_LOSS.append(loss.item())
        smooth_loss = np.mean(TRAIN_LOSS[-30:])
        bar.set_description(f'loss: {loss.item():.5f}, smth: {smooth_loss:.5f}')

        optimizer.step()

        avg_train_loss = np.mean(TRAIN_LOSS)

    return avg_train_loss

def val_loop_fn(model, loader, optimizer, loss_func, device):

    model.eval()

    VAL_LOSS = []
    PREDS = []
    TARGS = []

    bar = tqdm(enumerate(loader), total=len(loader))

    with torch.no_grad():
        for step, (data, target) in bar:
            data = data.to(device, dtype=torch.float)
            target = target.to(device, dtype=torch.float)

            outputs = model(data)
            probs = torch.sigmoid(outputs)
            loss = loss_func(outputs, target)

            VAL_LOSS.append(loss.item())

            smooth_loss = np.mean(VAL_LOSS[-30:])
            bar.set_description(f'loss: {loss.item():.5f}, smth: {smooth_loss:.5f}')

            PREDS += [target.detach().cpu()]
            TARGS += [probs.detach()]

        PREDS = torch.cat(PREDS).cpu().numpy()
        TARGS = torch.cat(TARGS).cpu().numpy()

        avg_val_loss = np.mean(VAL_LOSS)

    return avg_val_loss, PREDS, TARGS

In [None]:
folds = df.copy()
kf = KFold(n_splits=config.N_FOLDS, shuffle=True, random_state=config.SEED)

for fold, (train_idx, valid_idx) in enumerate(kf.split(folds)):

    train_test = folds.iloc[train_idx]
    train_test.reset_index(drop=True, inplace=True)  

    valid_test = folds.iloc[valid_idx]
    valid_test.reset_index(drop=True, inplace=True)

    train_dataset = RANZCRDataset(
        train_test,
        train_test[target_cols],
        transforms_train
    )

    valid_dataset = RANZCRDataset(
        valid_test,
        valid_test[target_cols],
        transforms_valid
    )

    train_loader = DataLoader(train_dataset, batch_size=config.TRAIN_BS, num_workers=config.NUM_WORKERS, sampler=RandomSampler(train_dataset))
    valid_loader = DataLoader(valid_dataset, batch_size=config.VALID_BS, num_workers=config.NUM_WORKERS, sampler=SequentialSampler(valid_dataset))

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = resnet34().to(device)
    optimizer = Adam(model.parameters(), lr=config.LR)
    model, optimizer = amp.initialize(model, optimizer, opt_level="O1")
    scheduler = CosineAnnealingLR(optimizer, config.NUM_EPOCHS)
    loss_func = nn.BCEWithLogitsLoss().to(device)

    best_file = f'{config.KERNEL_TYPE}_best_fold{fold}.bin'
    roc_auc_max = 0

    for epoch in range(config.NUM_EPOCHS):

        scheduler.step(epoch)
        avg_train_loss = train_loop_fn(model, train_loader, optimizer, loss_func, device)
        avg_val_loss, PREDS, TARGS = val_loop_fn(model, valid_loader, optimizer, loss_func, device)

        roc_auc = roc_auc_score(PREDS, TARGS, average='macro')
        print(f"Epoch: {epoch+1} | lr: {optimizer.param_groups[0]['lr']:.7f} | train loss: {avg_train_loss:.4f} | val loss: {avg_val_loss:.4f} | roc auc score: {roc_auc:.4f}")

        if roc_auc > roc_auc_max:
            print('score2 ({:.6f} --> {:.6f}).  Saving model ...'.format(roc_auc_max, roc_auc))
            print('\n')
            torch.save(model.state_dict(), best_file)
            roc_auc_max = roc_auc

        torch.save(model.state_dict(), f'{config.KERNEL_TYPE}_final_fold.bin')