In [1]:
import os,re,gc,cv2,sys 
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 

from kaggle_datasets import KaggleDatasets 
from sklearn.model_selection import train_test_split 
import tensorflow as tf 
import tensorflow_addons as tfa 
import tensorflow.keras.applications.efficientnet as efn
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications import ResNet152

# timm 
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
from tqdm.notebook import tqdm
from tqdm.notebook import tqdm

# torch 
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR
import albumentations
import timm
from albumentations import *
from albumentations.pytorch import ToTensorV2
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [2]:
LABELS = np.array(['ETT - Abnormal', 'ETT - Borderline',
       'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline',
       'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal',
       'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present'])

model_path = ['../input/resnet200d-baseline-benchmark-public/resnet200d_fold0_cv953.pth',
              '../input/resnet200d-baseline-benchmark-public/resnet200d_fold1_cv955.pth',
              '../input/resnet200d-baseline-benchmark-public/resnet200d_fold2_cv955.pth',
              '../input/resnet200d-baseline-benchmark-public/resnet200d_fold3_cv957.pth',
              '../input/resnet200d-baseline-benchmark-public/resnet200d_fold4_cv954.pth']

N_LABELS = 11 
AUTO = tf.data.experimental.AUTOTUNE

class CONFIG1:
    tta = 5
    batchsize = 32
    imsize = (512,512)

class CONFIG2:
    tta = 1 
    batchsize = 24
    imsize = (768,768)
    
class CONFIG3:
    batchsize = 1
    imsize = 512

# Dataset

In [3]:
## decoder 
def decoder(path):
    file_bytes = tf.io.read_file(path)
    img = tf.io.decode_jpeg(file_bytes,channels=3)
    img = tf.cast(img,tf.float32)
    img /= 255.0 
    img = tf.image.resize(img,CONFIG2.imsize)
    return img


## Augmentation for TTA 
def augmenter(img):
    img = tf.image.random_flip_left_right(img)
    img = tf.image.random_flip_up_down(img)
    return img

## Make test data 
def make_test_dataset(paths,cache_dir=False,augment=False,repeat=False):
    if cache_dir:
        os.makedirs(cache_dir,exist_ok=True)
    dset = tf.data.Dataset.from_tensor_slices(paths)
    dset = dset.map(decoder,num_parallel_calls=AUTO)
    dset = dset.cache(cache_dir) if cache_dir else dset 
    dset = dset.map(augmenter,num_parallel_calls=AUTO) if augment else dset
    dset = dset.repeat() if repeat else dset
    dset = dset.batch(CONFIG2.batchsize)
    dset = dset.prefetch(AUTO)
    return dset

#----------------------------------------------------

## decoder 
def decode_fn(path):
    file_bytes = tf.io.read_file(path)
    img = tf.io.decode_jpeg(file_bytes,channels=3)
    img = tf.image.resize(img,CONFIG1.imsize)
    img = tf.cast(img,tf.uint8) 
    return img

## Augmentation 
transform = Compose([
    HorizontalFlip(p=0.5),
    VerticalFlip(p=0.5)
])

def aug_fn(image):
    aug_img = transform(image = image)["image"]
    aug_img = tf.cast(aug_img/255, tf.float32)
    aug_img = tf.image.resize(aug_img,CONFIG1.imsize) 
    return aug_img

def process_data(image):
    aug_img = tf.numpy_function(func=aug_fn, inp=[image], Tout=tf.float32)
    aug_img.set_shape((*CONFIG1.imsize,3))
    return aug_img


## Make CLAHE Data (No CLAHE)
def make_clahe_dataset(paths,cache_dir=False):
    if cache_dir:
        os.makedirs(cache_dir,exist_ok=True)
    dset = tf.data.Dataset.from_tensor_slices(paths)
    dset = dset.map(decode_fn,num_parallel_calls=AUTO)
    dset = dset.cache(cache_dir) if cache_dir else dset 
    dset = dset.map(process_data,num_parallel_calls=AUTO)
    dset = dset.repeat()
    dset = dset.batch(CONFIG1.batchsize)
    dset = dset.prefetch(AUTO)
    return dset

In [4]:
# torch 
# model 
class RANZCRResNet200D(nn.Module):
    def __init__(self, model_name='resnet200d', out_dim=11, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=False)
        n_features = self.model.fc.in_features
        self.model.global_pool = nn.Identity()
        self.model.fc = nn.Identity()
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(n_features, out_dim)

    def forward(self, x):
        bs = x.size(0)
        features = self.model(x)
        pooled_features = self.pooling(features).view(bs, -1)
        output = self.fc(pooled_features)
        return output

# transform 
transforms_test = albumentations.Compose([
    Resize(CONFIG3.imsize, CONFIG3.imsize),
    Normalize(
         mean=[0.485, 0.456, 0.406],
         std=[0.229, 0.224, 0.225],
     ),
    ToTensorV2()
])

# dataset 
class RANZCRDataset(Dataset):
    def __init__(self, df, mode, transform=None):
        
        self.df = df.reset_index(drop=True)
        self.mode = mode
        self.transform = transform
        self.labels = df[LABELS].values
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        row = self.df.loc[index]
        img = cv2.imread(row.file_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        if self.transform is not None:
            res = self.transform(image=img)
            img = res['image']
        label = torch.tensor(self.labels[index]).float()
        if self.mode == 'test':
            return img
        else:
            return img, label

# inference
def tta_inference_func(test_loader):
    model.eval()
    bar = tqdm(test_loader)
    PREDS = []
    LOGITS = []

    with torch.no_grad():
        for batch_idx, images in enumerate(bar):
            x = images.to(device)
            x = torch.stack([x,x.flip(-1)],0) # hflip
            x = x.view(-1, 3, CONFIG3.imsize, CONFIG3.imsize)
            logits = model(x)
            logits = logits.view(CONFIG3.batchsize, 2, -1).mean(1)
            PREDS += [logits.sigmoid().detach().cpu()]
            LOGITS.append(logits.cpu())
        PREDS = torch.cat(PREDS).cpu().numpy()
        
    return PREDS

In [5]:
sub_df = pd.read_csv("../input/ranzcr-clip-catheter-line-classification/sample_submission.csv")
test_paths = "../input/ranzcr-clip-catheter-line-classification/test/" + sub_df["StudyInstanceUID"] + ".jpg"

In [6]:
#dset512 = make_clahe_dataset(test_paths)
dset768 = make_test_dataset(test_paths,augment=True,repeat=True)

In [7]:
# torch 
sub_df['file_path'] = sub_df.StudyInstanceUID.apply(lambda x: os.path.join('../input/ranzcr-clip-catheter-line-classification/test', f'{x}.jpg'))

test_dataset = RANZCRDataset(sub_df, 'test', transform=transforms_test)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=CONFIG3.batchsize, shuffle=False,  num_workers=24)

sub_df.drop("file_path",axis = 1,inplace = True)

# Show Image

In [8]:
def view_image(ds,num=4):
    print(ds)
    fig = plt.figure(figsize=(22, 22))
    images = next(iter(ds))
    for i,img in enumerate(images):
        if i == num:
            break 
        img = img.numpy()
        ax = fig.add_subplot(3,4,i+1,xticks=[],yticks=[])
        ax.imshow(img)
    plt.show()

In [9]:
#view_image(tta_dset)  
#view_image(dset512)

# Model

In [10]:
def create_model(CONFIG):
    model = tf.keras.Sequential([
        ResNet152(input_shape=(*CONFIG.imsize,3),
                              weights=None,
                              include_top=False),
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(N_LABELS,activation="sigmoid")
    ])
    return model

# Make Model and Inference

In [11]:
def rank_average(pred1,pred2,weight):
    pred = np.zeros((pred1.shape[0],N_LABELS))
    for k in range(N_LABELS):
        pred1_ = np.argsort(np.argsort(pred1[:,k]))
        pred2_ = np.argsort(np.argsort(pred2[:,k]))
        pred[:,k] = pred1_*weight[0] + pred2_*weight[1] 
    return pred

In [12]:
TEST_NUM = sub_df.shape[0]
#ensemble_pred = np.zeros((TEST_NUM,N_LABELS))
#N_MODELS = 5 
#steps = (CONFIG1.tta*TEST_NUM + CONFIG1.batchsize - 1)//CONFIG1.batchsize

"""
for fold in range(5):
    print("-"*50)
    print(f"MODEL {1}, FOLD : {fold+1}")
    model = create_model(CONFIG1)
    model.load_weights(f"../input/model-nb13-5/model_nb13_5_{fold}.h5")
    pred = model.predict(dset512,steps=steps,verbose=1)[:CONFIG1.tta*TEST_NUM]
    pred = np.mean(pred.reshape((TEST_NUM,CONFIG1.tta,N_LABELS),order = "F"),axis = 1)
    ensemble_pred += pred
    del model,pred
    gc.collect()
"""
pred2 = np.zeros((TEST_NUM,N_LABELS))
steps = (CONFIG2.tta*TEST_NUM + CONFIG2.batchsize - 1)//CONFIG2.batchsize
for fold in range(5):
    print("-"*50)
    print(f"MODEL {2}, FOLD : {fold+1}")
    model = create_model(CONFIG2)
    model.load_weights(f"../input/nb13-7/model_nb13_7_{fold}.h5")
    pred = model.predict(dset768,steps=steps,verbose=1)[:CONFIG2.tta*TEST_NUM]
    pred = np.mean(pred.reshape((TEST_NUM,CONFIG2.tta,N_LABELS),order = "F"),axis = 1)
    pred2 += pred 
    del model,pred
    gc.collect()

pred3 = np.zeros((TEST_NUM,N_LABELS)) 
for fold in range(5):
    print("-"*50)
    model = RANZCRResNet200D("resnet200d")
    model = model.to(device)
    model.load_state_dict(torch.load(model_path[fold], map_location='cuda:0'))
    pred3 += tta_inference_func(test_loader)

pred2 /= 5.0  
pred3 /= 5.0 
ensemble_pred = pred2*pred3**4 

--------------------------------------------------
MODEL 2, FOLD : 1
--------------------------------------------------
MODEL 2, FOLD : 2
--------------------------------------------------
MODEL 2, FOLD : 3
--------------------------------------------------
MODEL 2, FOLD : 4
--------------------------------------------------
MODEL 2, FOLD : 5
--------------------------------------------------


  0%|          | 0/3582 [00:00<?, ?it/s]

--------------------------------------------------


  0%|          | 0/3582 [00:00<?, ?it/s]

--------------------------------------------------


  0%|          | 0/3582 [00:00<?, ?it/s]

--------------------------------------------------


  0%|          | 0/3582 [00:00<?, ?it/s]

--------------------------------------------------


  0%|          | 0/3582 [00:00<?, ?it/s]

# Submission

In [13]:
sub_df[LABELS] = ensemble_pred 
sub_df.to_csv('submission.csv', index=False)
sub_df.head()

Unnamed: 0,StudyInstanceUID,ETT - Abnormal,ETT - Borderline,ETT - Normal,NGT - Abnormal,NGT - Borderline,NGT - Incompletely Imaged,NGT - Normal,CVC - Abnormal,CVC - Borderline,CVC - Normal,Swan Ganz Catheter Present
0,1.2.826.0.1.3680043.8.498.46923145579096002617...,5.884066e-07,0.01895323,0.05141511,1.405963e-11,5.139898e-10,4.033014e-08,0.8651645,1.188472e-07,3.674036e-05,0.503464,0.9967776
1,1.2.826.0.1.3680043.8.498.84006870182611080091...,1.135671e-21,2.250702e-19,2.0578960000000002e-17,7.933253e-19,2.0785630000000002e-18,5.307002e-19,8.823121999999999e-19,6.111553e-12,1.006926e-10,0.961492,1.219554e-24
2,1.2.826.0.1.3680043.8.498.12219033294413119947...,3.4052739999999997e-20,1.219806e-19,8.477084e-20,8.568234e-18,6.236193e-19,7.041866e-20,3.789236e-19,1.598525e-10,0.01202142,0.09175,2.035084e-21
3,1.2.826.0.1.3680043.8.498.84994474380235968109...,5.557185e-11,9.654368e-08,1.733545e-08,9.997571e-07,1.907477e-08,0.8881177,1.861253e-07,5.262657e-08,7.888288e-06,0.548658,2.761723e-11
4,1.2.826.0.1.3680043.8.498.35798987793805669662...,1.600543e-18,3.3078500000000005e-17,5.044279e-17,2.592894e-16,2.680356e-16,3.289711e-18,4.083596e-15,2.023171e-09,0.0004124004,0.292719,1.239704e-21
