## Summary
Hi, Kagglers. This is a public benchmark with resnet200d using `image_size = 512`. Since it's only the beginning of the competition, I feel like giving an idea of what can be achieved with solely images. Weights are available [here](http://https://www.kaggle.com/underwearfitting/resnet200d-baseline-benchmark-public).

Training details:
- batch_size = 64
- image_size = 512
- lr = 3e-5
- epochs = 30 with 1 epoch warmup at lr/10
- augmentations: 
```
transforms_train = albumentations.Compose([
     albumentations.RandomResizedCrop(image_size, image_size, scale=(0.9, 1), p=1), 
     albumentations.HorizontalFlip(p=0.5),
     albumentations.ShiftScaleRotate(p=0.5),
     albumentations.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=10, val_shift_limit=10, p=0.7),
     albumentations.RandomBrightnessContrast(brightness_limit=(-0.2,0.2), contrast_limit=(-0.2, 0.2), p=0.7),
     albumentations.CLAHE(clip_limit=(1,4), p=0.5),
     albumentations.OneOf([
         albumentations.OpticalDistortion(distort_limit=1.0),
         albumentations.GridDistortion(num_steps=5, distort_limit=1.),
         albumentations.ElasticTransform(alpha=3),
     ], p=0.2),
     albumentations.OneOf([
         albumentations.GaussNoise(var_limit=[10, 50]),
         albumentations.GaussianBlur(),
         albumentations.MotionBlur(),
         albumentations.MedianBlur(),
     ], p=0.2),
    albumentations.Resize(image_size, image_size),
    albumentations.OneOf([
    	JpegCompression(),
    	Downscale(scale_min=0.1, scale_max=0.15),
    ], p=0.2),
    IAAPiecewiseAffine(p=0.2),
    IAASharpen(p=0.2),
    albumentations.Cutout(max_h_size=int(image_size * 0.1), max_w_size=int(image_size * 0.1), num_holes=5, p=0.5),
    albumentations.Normalize(),
])
```
- hardware: RTX3090 x 2

## Configuration

In [1]:
import pandas as pd
batch_size = 1
image_size = 512
tta = True
# submit = (len(pd.read_csv('../input/ranzcr-clip-catheter-line-classification/sample_submission.csv')) != 3582)
submit = True
enet_type = ['resnet200d'] * 5
model_path = ['../input/resnet200d-baseline-benchmark-public/resnet200d_fold0_cv953.pth',
              '../input/resnet200d-baseline-benchmark-public/resnet200d_fold1_cv955.pth',
              '../input/resnet200d-baseline-benchmark-public/resnet200d_fold2_cv955.pth',
              '../input/resnet200d-baseline-benchmark-public/resnet200d_fold3_cv957.pth',
              '../input/resnet200d-baseline-benchmark-public/resnet200d_fold4_cv954.pth']
# you can save GPU quota using fast sub attached in the last markdown file
fast_sub = True
fast_sub_path = '../input/xxxxxx/your_submission.csv'

## Imports

In [2]:
import os
import sys
# sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')
# sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
sys.path.append('../input/pytorch-images-seresnet')
import numpy as np
DEBUG = False
import time
import cv2
import PIL.Image
from sklearn.metrics import accuracy_score
from tqdm.notebook import tqdm
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR
import albumentations
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from pylab import rcParams
import timm
from albumentations import *
from albumentations.pytorch import ToTensorV2
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Model

In [3]:
class RANZCRResNet200D(nn.Module):
    def __init__(self, model_name='resnet200d', out_dim=11, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=False)
        n_features = self.model.fc.in_features
        self.model.global_pool = nn.Identity()
        self.model.fc = nn.Identity()
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(n_features, out_dim)

    def forward(self, x):
        bs = x.size(0)
        features = self.model(x)
        pooled_features = self.pooling(features).view(bs, -1)
        output = self.fc(pooled_features)
        return output

## Transforms

In [4]:
transforms_test = albumentations.Compose([
    Resize(image_size, image_size),
    Normalize(
         mean=[0.485, 0.456, 0.406],
         std=[0.229, 0.224, 0.225],
     ),
    ToTensorV2()
])

## Dataset

In [5]:
class RANZCRDataset(Dataset):
    def __init__(self, df, mode, transform=None):
        
        self.df = df.reset_index(drop=True)
        self.mode = mode
        self.transform = transform
        self.labels = df[target_cols].values
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        row = self.df.loc[index]
        img = cv2.imread(row.file_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        if self.transform is not None:
            res = self.transform(image=img)
            img = res['image']
        label = torch.tensor(self.labels[index]).float()
        if self.mode == 'test':
            return img
        else:
            return img, label

In [6]:
test = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/sample_submission.csv')
if fast_sub:
    test = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/sample_submission.csv', nrows=5)
test['file_path'] = test.StudyInstanceUID.apply(lambda x: os.path.join('../input/ranzcr-clip-catheter-line-classification/test', f'{x}.jpg'))
target_cols = test.iloc[:, 1:12].columns.tolist()

test_dataset = RANZCRDataset(test, 'test', transform=transforms_test)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False,  num_workers=24)

## Utils

In [7]:
def inference_func(test_loader):
    model.eval()
    bar = tqdm(test_loader)
    LOGITS = []
    PREDS = []
    
    with torch.no_grad():
        for batch_idx, images in enumerate(bar):
            x = images.to(device)
            logits = model(x)
            LOGITS.append(logits.cpu())
            PREDS += [logits.sigmoid().detach().cpu()]
        PREDS = torch.cat(PREDS).cpu().numpy()
        LOGITS = torch.cat(LOGITS).cpu().numpy()
    return PREDS

def tta_inference_func(test_loader):
    model.eval()
    bar = tqdm(test_loader)
    PREDS = []
    LOGITS = []

    with torch.no_grad():
        for batch_idx, images in enumerate(bar):
            x = images.to(device)
            x = torch.stack([x,x.flip(-1), x.flip(-2)], 0) # hflip, vflip
            x = x.view(-1, 3, image_size, image_size)
            logits = model(x)
            logits = logits.view(batch_size, 3, -1).mean(1)
            PREDS += [logits.sigmoid().detach().cpu()]
            LOGITS.append(logits.cpu())
        PREDS = torch.cat(PREDS).cpu().numpy()
        
    return PREDS

## Submission

In [9]:
if submit:
    test_preds = []
    for i in range(len(enet_type)):
        if enet_type[i] == 'resnet200d':
            print('resnet200d loaded')
            model = RANZCRResNet200D(enet_type[i], out_dim=len(target_cols))
            model = model.to(device)
        model.load_state_dict(torch.load(model_path[i], map_location=device))
        if tta:
            test_preds += [tta_inference_func(test_loader)]
        else:
            test_preds += [inference_func(test_loader)]

    submission_ResNet200d_1 = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/sample_submission.csv')
    predictions_ResNet200d_1 = np.mean(test_preds, axis=0)
#     submission_ResNet200D_1[target_cols] = np.mean(test_preds, axis=0)
#     submission.to_csv('submission.csv', index=False)
else:
    pd.read_csv('../input/ranzcr-clip-catheter-line-classification/sample_submission.csv').to_csv('submission.csv', index=False)

resnet200d loaded


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


resnet200d loaded


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


resnet200d loaded


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


resnet200d loaded


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


resnet200d loaded


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))




In [10]:
##############################################################################

In [11]:
import os
import sys
sys.path.append('../input/pytorch-images-seresnet')

import os
import math
import time
import random
import shutil
from pathlib import Path
from contextlib import contextmanager
from collections import defaultdict, Counter

import scipy as sp
import numpy as np
import pandas as pd

from tqdm.auto import tqdm
from functools import partial

import cv2
from PIL import Image

from matplotlib import pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset
import albumentations
from albumentations import *
from albumentations.pytorch import ToTensorV2


import timm

from torch.cuda.amp import autocast, GradScaler

import warnings 
warnings.filterwarnings('ignore')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [12]:
BATCH_SIZE = 32
TEST_PATH = '../input/ranzcr-clip-catheter-line-classification/test'

In [13]:
test = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/sample_submission.csv')
if fast_sub:
    test = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/sample_submission.csv', nrows=5)

In [14]:
class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['StudyInstanceUID'].values
        self.transform = transform
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f'{TEST_PATH}/{file_name}.jpg'
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        return image

In [15]:
def get_transforms(image_size=640):
        return Compose([
            Resize(image_size, image_size),
            Normalize(),
            ToTensorV2(),
        ])

In [20]:
class PAM_Module(nn.Module):
    """ Position attention module"""
    #Ref from SAGAN
    def __init__(self, in_dim):
        super(PAM_Module, self).__init__()
        self.chanel_in = in_dim

        self.query_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim//8, kernel_size=1)
        self.key_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim//8, kernel_size=1)
        self.value_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1)
        self.gamma = nn.Parameter(torch.zeros(1))
        
    def forward(self, x):
        """
            inputs :
                x : input feature maps( B X C X H X W)
            returns :
                out : attention value + input feature
                attention: B X (HxW) X (HxW)
        """
        m_batchsize, C, height, width = x.size()
        proj_query = self.query_conv(x).view(m_batchsize, -1, width*height).permute(0, 2, 1)
        proj_key = self.key_conv(x).view(m_batchsize, -1, width*height)
        energy = torch.bmm(proj_query, proj_key)
        attention = torch.softmax(energy, dim=-1)
        proj_value = self.value_conv(x).view(m_batchsize, -1, width*height)

        out = torch.bmm(proj_value, attention.permute(0, 2, 1))
        out = out.view(m_batchsize, C, height, width)

        out = self.gamma*out + x
        return out
    
class CAM_Module(nn.Module):
    """ Channel attention module"""
    def __init__(self, in_dim):
        super(CAM_Module, self).__init__()
        self.chanel_in = in_dim
        self.gamma = nn.Parameter(torch.zeros(1))
        
    def forward(self,x):
        """
            inputs :
                x : input feature maps( B X C X H X W)
            returns :
                out : attention value + input feature
                attention: B X C X C
        """
        m_batchsize, C, height, width = x.size()
        proj_query = x.view(m_batchsize, C, -1)
        proj_key = x.view(m_batchsize, C, -1).permute(0, 2, 1)
        energy = torch.bmm(proj_query, proj_key)
        energy_new = torch.max(energy, -1, keepdim=True)[0].expand_as(energy)-energy
        attention = torch.softmax(energy_new, dim=-1)
        proj_value = x.view(m_batchsize, C, -1)

        out = torch.bmm(attention, proj_value)
        out = out.view(m_batchsize, C, height, width)

        out = self.gamma*out + x
        return out
    
class CBAM(nn.Module):
    def __init__(self, in_channels):
        # def __init__(self):
        super(CBAM, self).__init__()
        inter_channels = in_channels // 4
        self.conv1_c = nn.Sequential(nn.Conv2d(in_channels, inter_channels, 3, padding=1, bias=False),
                                     nn.BatchNorm2d(inter_channels),
                                     nn.ReLU(inplace=True))
        
        self.conv1_s = nn.Sequential(nn.Conv2d(in_channels, inter_channels, 3, padding=1, bias=False),
                                     nn.BatchNorm2d(inter_channels),
                                     nn.ReLU(inplace=True))

        self.channel_gate = CAM_Module(inter_channels)
        self.spatial_gate = PAM_Module(inter_channels)
        
        self.conv2_c = nn.Sequential(nn.Conv2d(inter_channels, in_channels, 3, padding=1, bias=False),
                                     nn.BatchNorm2d(in_channels),
                                     nn.ReLU(inplace=True))
        self.conv2_a = nn.Sequential(nn.Conv2d(inter_channels, in_channels, 3, padding=1, bias=False),
                                     nn.BatchNorm2d(in_channels),
                                     nn.ReLU(inplace=True))
        
    def forward(self, x):
        feat1 = self.conv1_c(x)
        chnl_att = self.channel_gate(feat1)
        chnl_att = self.conv2_c(chnl_att)

        feat2 = self.conv1_s(x)
        spat_att = self.spatial_gate(feat2)
        spat_att = self.conv2_a(spat_att)

        x_out = chnl_att + spat_att

        return x_out

In [21]:
class CustomResNet200D(nn.Module):
    def __init__(self, model_name='resnet200d_320', pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=False)
        if pretrained:
            pretrained_path = '../input/resnet200d-pretrained-weight/resnet200d_ra2-bdba9bf9.pth'
            self.model.load_state_dict(torch.load(pretrained_path))
            print(f'load {model_name} pretrained model')
        n_features = self.model.fc.in_features
        self.model.global_pool = nn.Identity()
        self.model.fc = nn.Identity()
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(n_features, 11)

        
    def forward(self, x):
        bs = x.size(0)
        features = self.model(x)
        pooled_features = self.pooling(features).view(bs, -1)
        output = self.fc(pooled_features)
        return output

class SeResNet152D(nn.Module):
    def __init__(self, model_name='seresnet152d_320'):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=False)
        n_features = self.model.fc.in_features
        self.model.global_pool = nn.Identity()
        self.model.fc = nn.Identity()
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(n_features, 11)

    def forward(self, x):
        bs = x.size(0)
        features = self.model(x)
        pooled_features = self.pooling(features).view(bs, -1)
        output = self.fc(pooled_features)
        return output
    
class EfficientNetB5(nn.Module):
    def __init__(self, model_name='tf_efficientnet_b5_ns'):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=False)
        n_features = self.model.classifier.in_features
        self.model.global_pool = nn.Identity()
        self.model.classifier = nn.Identity()
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.classifier = nn.Linear(n_features, 11)

    def forward(self, x):
        bs = x.size(0)
        features = self.model(x)
        pooled_features = self.pooling(features).view(bs, -1)
        output = self.classifier(pooled_features)
        return output
    
class CustomResNet200D_WLF(nn.Module):
    def __init__(self, model_name='resnet200d_320', pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=False)
        n_features  = self.model.fc.in_features
        self.model.fc = nn.Linear(n_features, 11)
        
        if pretrained:
            pretrained_path = '../input/startingpointschestx/resnet200d_320_chestx.pth'
#             self.model.load_state_dict(torch.load(pretrained_path, map_location=torch.device('cpu'))['model'])
            
            checkpoint = torch.load(pretrained_path, map_location='cpu')['model']
            for key in list(checkpoint.keys()):
                if 'model.' in key:
                    checkpoint[key.replace('model.', '')] = checkpoint[key]
                    del checkpoint[key]
            self.model.load_state_dict(checkpoint) 
            
            print(f'load {model_name} pretrained model')
        n_features = self.model.fc.in_features
        self.model.global_pool = nn.Identity()
        self.model.fc = nn.Identity()
        self.pooling = nn.AdaptiveAvgPool2d(1)
        
        self.local_fe = CBAM(n_features)
        self.classifier = nn.Sequential(nn.Linear(n_features + n_features, n_features), 
                                        nn.BatchNorm1d(n_features),
                                        nn.ReLU(inplace=True),
                                        nn.Linear(n_features, 11))
    def forward(self, x):
        bs = x.size(0)
        features = self.model(x)
        
        global_features = self.pooling(features).view(bs, -1)
        
        local_features = self.local_fe(features)
        local_features = torch.sum(local_features, dim=[2, 3])  # ここ、どうしてsum？GAPではない？
        
        all_features = torch.cat([global_features, local_features], dim=1)
        output = self.classifier(all_features)
        
        return output

In [22]:
def inference(models, test_loader, device):
    tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
    probs = []
    for i, (images) in tk0:
        images = images.to(device)
        avg_preds = []
        for model in models:
            with torch.no_grad():
                y_preds1 = model(images)
                y_preds2 = model(images.flip(-1))
            y_preds = (y_preds1.sigmoid().to('cpu').numpy() + y_preds2.sigmoid().to('cpu').numpy()) / 2
            avg_preds.append(y_preds)
        avg_preds = np.mean(avg_preds, axis=0)
        probs.append(avg_preds)
    probs = np.concatenate(probs)
    return probs

In [24]:
models200D_2 = []
model = CustomResNet200D()
model.load_state_dict(torch.load("../input/007-training-resnext-step3-data/resnet200d_320_fold0_best_loss_cpu.pth")['model'])
model.eval()
model.to(device)
models200D_2.append(model)

models200D_3 = []
model = CustomResNet200D_WLF()
model.load_state_dict(torch.load("../input/007-training-resnet200d-step3-ver-42/resnet200d_320_fold0_best_loss.pth")['model'])
model.eval()
model.to(device)
models200D_3.append(model)

models152D = []
model = SeResNet152D()
model.load_state_dict(torch.load('../input/training-seresnet152d-step3-2/seresnet152d_320_fold0_best_loss.pth', map_location='cpu')['model'])
model.eval()
model.to(device)
models152D.append(model)

modelsEfficientNet = []
model = EfficientNetB5()
model.load_state_dict(torch.load('../input/efficientnetb5cv9621/tf_efficientnet_b5_ns_CV96.21.pth', map_location='cpu')['model'])
model.eval()
model.to(device)
modelsEfficientNet.append(model)

In [25]:
if submit:
    test_dataset_640 = TestDataset(test, transform=get_transforms(image_size=640))
    test_loader_640 = DataLoader(test_dataset_640, batch_size=BATCH_SIZE, shuffle=False, num_workers=4 , pin_memory=True)

    predictions_ResNet200d_2 = inference(models200D_2, test_loader_640, device)
    predictions_ResNet200d_3 = inference(models200D_3, test_loader_640, device)
    predictions_SeResNet152d = inference(models152D, test_loader_640, device)
    predictions_EfficientNet = inference(modelsEfficientNet, test_loader_640, device)
    # predictions = (submission_ResNet200D_1 + predictions200d_2 + 0.50 * predictions152d) / 2.5

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




In [26]:
# target_cols = test.iloc[:, 1:12].columns.tolist()
# test[target_cols] = predictions
# test[['StudyInstanceUID'] + target_cols].to_csv('submission.csv', index=False)
# test.head()

In [27]:
###########################################################################################################

In [28]:
import gc
import os
import sys
import time
import copy
import random
import shutil
import typing as tp
from pathlib import Path
from argparse import ArgumentParser

import yaml
import numpy as np
import pandas as pd
from scipy.sparse import coo_matrix
from sklearn.metrics import roc_auc_score

from tqdm import tqdm
from joblib import Parallel, delayed

import cv2
import albumentations

from albumentations.core.transforms_interface import ImageOnlyTransform, DualTransform
from albumentations.pytorch import ToTensorV2

import torch
from torch import nn
from torch.utils import data
from torchvision import models as torchvision_models

sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')
import timm

In [29]:
ROOT = Path.cwd().parent
INPUT = ROOT / "input"
OUTPUT = ROOT / "output"
DATA = INPUT / "ranzcr-clip-catheter-line-classification"
TRAIN = DATA / "train"
TEST = DATA / "test"


TRAINED_MODEL = INPUT / "ranzcr-clip-weights-for-multi-head-model-v2"
TMP = ROOT / "tmp"
TMP.mkdir(exist_ok=True)

RANDAM_SEED = 1086
N_CLASSES = 11
FOLDS = [0, 1, 2, 3, 4]
N_FOLD = len(FOLDS)
IMAGE_SIZE = (512, 512)

CONVERT_TO_RANK = False
FAST_COMMIT = True

CLASSES = [
    'ETT - Abnormal',
    'ETT - Borderline',
    'ETT - Normal',
    'NGT - Abnormal',
    'NGT - Borderline',
    'NGT - Incompletely Imaged',
    'NGT - Normal',
    'CVC - Abnormal',
    'CVC - Borderline',
    'CVC - Normal',
    'Swan Ganz Catheter Present'
]

In [30]:
for p in DATA.iterdir():
    print(p.name)

train = pd.read_csv(DATA / "train.csv")
smpl_sub =  pd.read_csv(DATA / "sample_submission.csv")

train_tfrecords
sample_submission.csv
train_annotations.csv
test_tfrecords
train.csv
test
train


In [31]:
smpl_sub.shape

(3582, 12)

In [32]:
if fast_sub and len(smpl_sub) == 3582:
    smpl_sub = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/sample_submission.csv', nrows=5)

In [33]:
def multi_label_stratified_group_k_fold(label_arr: np.array, gid_arr: np.array, n_fold: int, seed: int=42):
    """
    create multi-label stratified group kfold indexs.

    reference: https://www.kaggle.com/jakubwasikowski/stratified-group-k-fold-cross-validation
    input:
        label_arr: numpy.ndarray, shape = (n_train, n_class)
            multi-label for each sample's index using multi-hot vectors
        gid_arr: numpy.array, shape = (n_train,)
            group id for each sample's index
        n_fold: int. number of fold.
        seed: random seed.
    output:
        yield indexs array list for each fold's train and validation.
    """
    np.random.seed(seed)
    random.seed(seed)
    start_time = time.time()
    n_train, n_class = label_arr.shape
    gid_unique = sorted(set(gid_arr))
    n_group = len(gid_unique)

    # # aid_arr: (n_train,), indicates alternative id for group id.
    # # generally, group ids are not 0-index and continuous or not integer.
    gid2aid = dict(zip(gid_unique, range(n_group)))
#     aid2gid = dict(zip(range(n_group), gid_unique))
    aid_arr = np.vectorize(lambda x: gid2aid[x])(gid_arr)

    # # count labels by class
    cnts_by_class = label_arr.sum(axis=0)  # (n_class, )

    # # count labels by group id.
    col, row = np.array(sorted(enumerate(aid_arr), key=lambda x: x[1])).T
    cnts_by_group = coo_matrix(
        (np.ones(len(label_arr)), (row, col))
    ).dot(coo_matrix(label_arr)).toarray().astype(int)
    del col
    del row
    cnts_by_fold = np.zeros((n_fold, n_class), int)

    groups_by_fold = [[] for fid in range(n_fold)]
    group_and_cnts = list(enumerate(cnts_by_group))  # pair of aid and cnt by group
    np.random.shuffle(group_and_cnts)
    print("finished preparation", time.time() - start_time)
    for aid, cnt_by_g in sorted(group_and_cnts, key=lambda x: -np.std(x[1])):
        best_fold = None
        min_eval = None
        for fid in range(n_fold):
            # # eval assignment.
            cnts_by_fold[fid] += cnt_by_g
            fold_eval = (cnts_by_fold / cnts_by_class).std(axis=0).mean()
            cnts_by_fold[fid] -= cnt_by_g

            if min_eval is None or fold_eval < min_eval:
                min_eval = fold_eval
                best_fold = fid

        cnts_by_fold[best_fold] += cnt_by_g
        groups_by_fold[best_fold].append(aid)
    print("finished assignment.", time.time() - start_time)

    gc.collect()
    idx_arr = np.arange(n_train)
    for fid in range(n_fold):
        val_groups = groups_by_fold[fid]

        val_indexs_bool = np.isin(aid_arr, val_groups)
        train_indexs = idx_arr[~val_indexs_bool]
        val_indexs = idx_arr[val_indexs_bool]

        print("[fold {}]".format(fid), end=" ")
        print("n_group: (train, val) = ({}, {})".format(n_group - len(val_groups), len(val_groups)), end=" ")
        print("n_sample: (train, val) = ({}, {})".format(len(train_indexs), len(val_indexs)))

        yield train_indexs, val_indexs

In [34]:
label_arr = train[CLASSES].values
group_id = train.PatientID.values

train_val_indexs = list(
    multi_label_stratified_group_k_fold(label_arr, group_id, N_FOLD, RANDAM_SEED))

finished preparation 0.12199878692626953
finished assignment. 0.9460818767547607
[fold 0] n_group: (train, val) = (2591, 664) n_sample: (train, val) = (24062, 6021)
[fold 1] n_group: (train, val) = (2600, 655) n_sample: (train, val) = (24124, 5959)
[fold 2] n_group: (train, val) = (2613, 642) n_sample: (train, val) = (23966, 6117)
[fold 3] n_group: (train, val) = (2608, 647) n_sample: (train, val) = (24143, 5940)
[fold 4] n_group: (train, val) = (2608, 647) n_sample: (train, val) = (24037, 6046)


In [35]:
train["fold"] = -1
for fold_id, (trn_idx, val_idx) in enumerate(train_val_indexs):
    train.loc[val_idx, "fold"] = fold_id
    
train.groupby("fold")[CLASSES].sum()

Unnamed: 0_level_0,ETT - Abnormal,ETT - Borderline,ETT - Normal,NGT - Abnormal,NGT - Borderline,NGT - Incompletely Imaged,NGT - Normal,CVC - Abnormal,CVC - Borderline,CVC - Normal,Swan Ganz Catheter Present
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,15,228,1448,56,106,550,960,639,1692,4265,166
1,16,228,1448,56,106,550,960,639,1692,4265,166
2,16,227,1448,56,105,549,959,639,1692,4265,166
3,16,228,1448,55,106,550,959,639,1692,4265,166
4,16,227,1448,56,106,549,959,639,1692,4264,166


In [36]:
def resize_images(img_id, input_dir, output_dir, resize_to=(512, 512), ext="png"):
    img_path = input_dir / f"{img_id}.jpg"
    save_path = output_dir / f"{img_id}.{ext}"
    
    img = cv2.imread(str(img_path), cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, resize_to)
    cv2.imwrite(str(save_path), img, )

TEST_RESIZED = TMP / "test_{0}x{1}".format(*IMAGE_SIZE)
TEST_RESIZED.mkdir(exist_ok=True)
TEST_RESIZED

_ = Parallel(n_jobs=2, verbose=5)([
    delayed(resize_images)(img_id, TEST, TEST_RESIZED, IMAGE_SIZE, "png")
    for img_id in smpl_sub.StudyInstanceUID.values
])

[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done   5 out of   5 | elapsed:    0.8s finished


In [37]:
def get_activation(activ_name: str="relu"):
    """"""
    act_dict = {
        "relu": nn.ReLU(inplace=True),
        "tanh": nn.Tanh(),
        "sigmoid": nn.Sigmoid(),
        "identity": nn.Identity()}
    if activ_name in act_dict:
        return act_dict[activ_name]
    else:
        raise NotImplementedError
        

class Conv2dBNActiv(nn.Module):
    """Conv2d -> (BN ->) -> Activation"""
    
    def __init__(
        self, in_channels: int, out_channels: int,
        kernel_size: int, stride: int=1, padding: int=0,
        bias: bool=False, use_bn: bool=True, activ: str="relu"
    ):
        """"""
        super(Conv2dBNActiv, self).__init__()
        layers = []
        layers.append(nn.Conv2d(
            in_channels, out_channels,
            kernel_size, stride, padding, bias=bias))
        if use_bn:
            layers.append(nn.BatchNorm2d(out_channels))
            
        layers.append(get_activation(activ))
        self.layers = nn.Sequential(*layers)
        
    def forward(self, x):
        """Forward"""
        return self.layers(x)
        

class SSEBlock(nn.Module):
    """channel `S`queeze and `s`patial `E`xcitation Block."""

    def __init__(self, in_channels: int):
        """Initialize."""
        super(SSEBlock, self).__init__()
        self.channel_squeeze = nn.Conv2d(
            in_channels=in_channels, out_channels=1,
            kernel_size=1, stride=1, padding=0, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        """Forward."""
        # # x: (bs, ch, h, w) => h: (bs, 1, h, w)
        h = self.sigmoid(self.channel_squeeze(x))
        # # x, h => return: (bs, ch, h, w)
        return x * h
    
    
class SpatialAttentionBlock(nn.Module):
    """Spatial Attention for (C, H, W) feature maps"""
    
    def __init__(
        self, in_channels: int,
        out_channels_list: tp.List[int],
    ):
        """Initialize"""
        super(SpatialAttentionBlock, self).__init__()
        self.n_layers = len(out_channels_list)
        channels_list = [in_channels] + out_channels_list
        assert self.n_layers > 0
        assert channels_list[-1] == 1
        
        for i in range(self.n_layers - 1):
            in_chs, out_chs = channels_list[i: i + 2]
            layer = Conv2dBNActiv(in_chs, out_chs, 3, 1, 1, activ="relu")
            setattr(self, f"conv{i + 1}", layer)
            
        in_chs, out_chs = channels_list[-2:]
        layer = Conv2dBNActiv(in_chs, out_chs, 3, 1, 1, activ="sigmoid")
        setattr(self, f"conv{self.n_layers}", layer)
    
    def forward(self, x):
        """Forward"""
        h = x
        for i in range(self.n_layers):
            h = getattr(self, f"conv{i + 1}")(h)
            
        h = h * x
        return h

In [38]:
class MultiHeadResNet200D(nn.Module):
    
    def __init__(
        self, out_dims_head: tp.List[int]=[3, 4, 3, 1], pretrained=False
    ):
        """"""
        self.base_name = "resnet200d_320"
        self.n_heads = len(out_dims_head)
        super(MultiHeadResNet200D, self).__init__()
        
        # # load base model
        base_model = timm.create_model(
            self.base_name, num_classes=sum(out_dims_head), pretrained=False)
        in_features = base_model.num_features
        
        if pretrained:
            pretrained_model_path = '../input/startingpointschestx/resnet200d_320_chestx.pth'
            state_dict = dict()
            for k, v in torch.load(pretrained_model_path, map_location='cpu')["model"].items():
                if k[:6] == "model.":
                    k = k.replace("model.", "")
                state_dict[k] = v
            base_model.load_state_dict(state_dict)
        
        # # remove global pooling and head classifier
        base_model.reset_classifier(0, '')
        
        # # Shared CNN Bacbone
        self.backbone = base_model
        
        # # Multi Heads.
        for i, out_dim in enumerate(out_dims_head):
            layer_name = f"head_{i}"
            layer = nn.Sequential(
                SpatialAttentionBlock(in_features, [64, 32, 16, 1]),
                nn.AdaptiveAvgPool2d(output_size=1),
                nn.Flatten(start_dim=1),
                nn.Linear(in_features, in_features),
                nn.ReLU(inplace=True),
                nn.Dropout(0.5),
                nn.Linear(in_features, out_dim))
            setattr(self, layer_name, layer)

    def forward(self, x):
        """"""
        h = self.backbone(x)
        hs = [
            getattr(self, f"head_{i}")(h) for i in range(self.n_heads)]
        y = torch.cat(hs, axis=1)
        return y
    

## forward test
m = MultiHeadResNet200D([3, 4, 3, 1], False)
m = m.eval()

x = torch.rand(1, 3, 256, 256)
with torch.no_grad():
    y = m(x)
print("[forward test]")
print("input:\t{}\noutput:\t{}".format(x.shape, y.shape))

del m; del x; del y
gc.collect()

[forward test]
input:	torch.Size([1, 3, 256, 256])
output:	torch.Size([1, 11])


0

In [39]:
class LabeledImageDataset(data.Dataset):
    """
    Dataset class for (image, label) pairs

    reads images and applys transforms to them.

    Attributes
    ----------
    file_list : List[Tuple[tp.Union[str, Path], tp.Union[int, float, np.ndarray]]]
        list of (image file, label) pair
    transform_list : List[Dict]
        list of dict representing image transform 
    """

    def __init__(
        self,
        file_list: tp.List[
            tp.Tuple[tp.Union[str, Path], tp.Union[int, float, np.ndarray]]],
        transform_list: tp.List[tp.Dict],
    ):
        """Initialize"""
        self.file_list = file_list
        self.transform = ImageTransformForCls(transform_list)

    def __len__(self):
        """Return Num of Images."""
        return len(self.file_list)

    def __getitem__(self, index):
        """Return transformed image and mask for given index."""
        img_path, label = self.file_list[index]
        img = self._read_image_as_array(img_path)
        
        img, label = self.transform((img, label))
        return img, label

    def _read_image_as_array(self, path: str):
        """Read image file and convert into numpy.ndarray"""
        img_arr = cv2.imread(str(path))
        img_arr = cv2.cvtColor(img_arr, cv2.COLOR_BGR2RGB)
        return img_arr

In [40]:
def get_dataloaders_for_inference(
    file_list: tp.List[tp.List], batch_size=32,
):
    """Create DataLoader"""
    dataset = LabeledImageDataset(
        file_list,
        transform_list=[
          ["Normalize", {
              "always_apply": True, "max_pixel_value": 255.0,
              "mean": ["0.4887381077884414"], "std": ["0.23064819430546407"]}],
          ["ToTensorV2", {"always_apply": True}],
        ])
    loader = data.DataLoader(
        dataset,
        batch_size=batch_size, shuffle=False,
        num_workers=2, pin_memory=True,
        drop_last=False)

    return loader

In [41]:
class ImageTransformBase:
    """
    Base Image Transform class.

    Args:
        data_augmentations: List of tuple(method: str, params :dict), each elems pass to albumentations
    """

    def __init__(self, data_augmentations: tp.List[tp.Tuple[str, tp.Dict]]):
        """Initialize."""
        augmentations_list = [
            self._get_augmentation(aug_name)(**params)
            for aug_name, params in data_augmentations]
        self.data_aug = albumentations.Compose(augmentations_list)

    def __call__(self, pair: tp.Tuple[np.ndarray]) -> tp.Tuple[np.ndarray]:
        """You have to implement this by task"""
        raise NotImplementedError

    def _get_augmentation(self, aug_name: str) -> tp.Tuple[ImageOnlyTransform, DualTransform]:
        """Get augmentations from albumentations"""
        if hasattr(albumentations, aug_name):
            return getattr(albumentations, aug_name)
        else:
            return eval(aug_name)


class ImageTransformForCls(ImageTransformBase):
    """Data Augmentor for Classification Task."""

    def __init__(self, data_augmentations: tp.List[tp.Tuple[str, tp.Dict]]):
        """Initialize."""
        super(ImageTransformForCls, self).__init__(data_augmentations)

    def __call__(self, in_arrs: tp.Tuple[np.ndarray]) -> tp.Tuple[np.ndarray]:
        """Apply Transform."""
        img, label = in_arrs
        augmented = self.data_aug(image=img)
        img = augmented["image"]

        return img, label

In [42]:
def load_setting_file(path: str):
    """Load YAML setting file."""
    with open(path) as f:
        settings = yaml.safe_load(f)
    return settings


def set_random_seed(seed: int = 42, deterministic: bool = False):
    """Set seeds"""
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    torch.backends.cudnn.deterministic = deterministic  # type: ignore
    

def run_inference_loop(stgs, model, loader, device):
    model.to(device)
    model.eval()
    pred_list = []
    with torch.no_grad():
        for x, t in tqdm(loader):
            y = model(x.to(device))
            pred_list.append(y.sigmoid().detach().cpu().numpy())
            # pred_list.append(y.detach().cpu().numpy())
        
    pred_arr = np.concatenate(pred_list)
    del pred_list
    return pred_arr

In [43]:
if not torch.cuda.is_available():
    device = torch.device("cpu")
else:
    device = torch.device("cuda")
print(device)

cpu


In [44]:
if submit:
    model_dir = TRAINED_MODEL
    test_dir = TEST_RESIZED

    test_file_list = [
        (test_dir / f"{img_id}.png", [-1] * 11)
        for img_id in smpl_sub["StudyInstanceUID"].values]
    test_loader = get_dataloaders_for_inference(test_file_list, batch_size=32)

    test_preds_arr = np.zeros((N_FOLD, len(smpl_sub), N_CLASSES))    
    for fold_id in FOLDS:
        print(f"[fold {fold_id}]")
        stgs = load_setting_file(model_dir / f"fold{fold_id}" / "settings.yml")
        # # prepare 
        stgs["model"]["params"]["pretrained"] = False
        model = MultiHeadResNet200D(**stgs["model"]["params"])
        model_path = model_dir / f"best_model_fold{fold_id}.pth"
        model.load_state_dict(torch.load(model_path, map_location=device))

        # # inference test
        test_pred = run_inference_loop(stgs, model, test_loader, device)
        test_preds_arr[fold_id] = test_pred

        del model
        torch.cuda.empty_cache()
        gc.collect()
    test_preds_arr = np.mean(test_preds_arr, axis=0)

[fold 0]


100%|██████████| 1/1 [00:13<00:00, 13.13s/it]


[fold 1]


100%|██████████| 1/1 [00:12<00:00, 12.34s/it]


[fold 2]


100%|██████████| 1/1 [00:12<00:00, 12.74s/it]


[fold 3]


100%|██████████| 1/1 [00:13<00:00, 13.15s/it]


[fold 4]


100%|██████████| 1/1 [00:12<00:00, 12.79s/it]


In [46]:
if submit:
    predictions = (predictions_ResNet200d_1 + predictions_ResNet200d_2 + 0.50 * predictions_ResNet200d_3 + 0.50 * predictions_SeResNet152d + 0.10 * predictions_EfficientNet + test_preds_arr) / 4.1
    sub = smpl_sub.copy()
    sub[CLASSES] = predictions
    sub.to_csv('submission.csv', index=False)
    display(sub)

Unnamed: 0,StudyInstanceUID,ETT - Abnormal,ETT - Borderline,ETT - Normal,NGT - Abnormal,NGT - Borderline,NGT - Incompletely Imaged,NGT - Normal,CVC - Abnormal,CVC - Borderline,CVC - Normal,Swan Ganz Catheter Present
0,1.2.826.0.1.3680043.8.498.46923145579096002617...,0.018368,0.31506,0.591304,0.003793,0.007428,0.02777,0.958853,0.04049,0.091036,0.820525,0.990823
1,1.2.826.0.1.3680043.8.498.84006870182611080091...,4.8e-05,0.000164,0.000475,0.000216,0.000212,0.000303,0.00042,0.006782,0.01034,0.981608,2.7e-05
2,1.2.826.0.1.3680043.8.498.12219033294413119947...,6.4e-05,0.00012,0.000204,0.000233,0.000244,0.000233,0.000299,0.008141,0.304478,0.613475,0.000113
3,1.2.826.0.1.3680043.8.498.84994474380235968109...,0.002121,0.014899,0.024498,0.025777,0.008953,0.945091,0.026044,0.027175,0.049916,0.844214,0.00196
4,1.2.826.0.1.3680043.8.498.35798987793805669662...,0.000147,0.000328,0.000619,0.001126,0.00076,0.000553,0.001395,0.012366,0.127769,0.851373,0.0001
