In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 

from sklearn.model_selection import train_test_split
import cv2 
import albumentations as albu
from albumentations.pytorch import ToTensorV2

import torch
from torch.utils.data import Dataset,DataLoader
import torch.nn as nn

# Configuration

In [None]:
df = pd.read_csv("../input/ranzcr-clip-catheter-line-classification/train.csv") 

LABELS = [
    'ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal',
    'NGT - Abnormal', 'NGT - Borderline', 'NGT - Incompletely Imaged', 'NGT - Normal', 
    'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal',
    'Swan Ganz Catheter Present'
]
OUTPUT_DIR = "./"

DEBUG = True
class CONFIG:
    batchsize = 16
    imsize = (512,512)
    model_name = "resnet200d"
    num_workers = 24 
    
    min_lr = 1e-6
    max_lr = 5e-4 
    epochs = 10
    
if DEBUG:
    df = df.sample(frac = 0.01).reset_index(drop = True)
train,valid = train_test_split(df,test_size=0.2)
print(f"TRAIN SHAPE {train.shape}")
print(f"TEST SHAPE {valid.shape}")

In [None]:
def seed_torch(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True # for faster training, but not deterministic

# Dataset 

In [None]:
class RANZCRDataset(Dataset):
    def __init__(self,df,mode,transform=None):
        self.df = df 
        self.label = df[LABELS].values 
        self.StudyInstanceUID = df["StudyInstanceUID"].values 
        self.transform = transform 
        self.mode = mode 
    
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self,idx):
        uid = self.StudyInstanceUID[idx] 
        path = "../input/ranzcr-clip-catheter-line-classification/train/" + uid + ".jpg"
        img = cv2.imread(path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        if self.transform is not None:
            img = self.transform(image = img)["image"]
        
        img = img.astype(np.float32)
        img /= 255.0 
        img = ToTensorV2()(image = img)["image"]
        if self.mode == "test":
            return img
        else: # train or valid
            label = self.label[idx]
            return img,label.astype(np.float32)
        
train_transform = albu.Compose([
    albu.HorizontalFlip(p=0.5),
    albu.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=10, val_shift_limit=10, p=0.7),
    albu.RandomBrightnessContrast(brightness_limit=(-0.2,0.2), contrast_limit=(-0.2, 0.2), p=0.7),
    albu.CLAHE(clip_limit=(1,4), p=0.5),
    albu.Resize(*CONFIG.imsize)
])

tta_transform = albu.Compose([
    albu.HorizontalFlip(p=0.5),
    albu.CLAHE(clip_limit=(1,4), p=0.5),
    albu.Resize(*CONFIG.imsize)
])

# Models

In [None]:
class RANZCRResNet200D(nn.Module):
    def __init__(self, model_name='resnet200d',out_dim=11, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=True)
        n_features = self.model.fc.in_features
        self.model.global_pool = nn.Identity()
        self.model.fc = nn.Identity()
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(n_features, out_dim)

    def forward(self, x):
        bs = x.size(0)
        features = self.model(x)
        pooled_features = self.pooling(features).view(bs, -1)
        output = self.fc(pooled_features)
        return output 

# Train & Valid function

In [None]:
def train_fn(train_loader,model,criterion,optimizer,scheduler,device):
    model.train()
    losses = [] 
    for step,(img,labels) in enumerate(train_loader):
        optimizer.zero_grad() 
        img.to(device)
        labels.to(device)
        pred = model(img)
        loss = criterion(pred,labels)
        losses.append(loss.item())
        loss.backward()
        optimizer.step()
    losses = np.array(losses)
    return losses.mean() 

def valid_fn(valid_loader,model,criterion,device):
    model.eval()
    losses = []
    trues = []
    preds = [] 
    for step,(img,labels) in enumerate(valid_loader):
        img.to(device)
        labels.to(device) 
        with torch.no_grad():
            pred = model(img) 
        loss = criterion(pred,labels)
        losses.append(loss.item())
        trues.append(labels.to('cpu').numpy())
        preds.append(pred.sigmoid().to('cpu').numpy())
    losses = np.array(losses)
    return losses.mean(),preds,trues

# Metric

In [None]:
def mean_roc_auc(targets,probabilities):
    roc_auc = [roc_auc_score(targets[:,k],probabilities[:,k]) for k in range(N_LABELS)]
    return np.average(roc_auc)

# CV

In [None]:
def cv_tuner():
    scores = []
    for fold in range(CONFIG.n_folds):
        print("-"*70)
        print(f"FOLD {fold+1}")
        seed_everything(CONFIG.seed)
        
        # Prepare Data 
        print("Prepare Data...")
        train = df.query(f"fold != {fold}").reset_index(drop=True)
        valid = df.query(f"fold == {fold}").reset_index(drop=True)
        
        train_dset = RANZCRDataset(train,mode="train",transform=train_transform)
        valid_dset = RANZCRDataset(valid,mode="valid",transform=None) 
        
        train_loader = DataLoader(train_dset,batch_size=CONFIG.batchsize,
                                  shuffle=True,num_workers=CONFIG.num_workers,
                                  pin_memory=True,drop_last=True)
        valid_loader = DataLoader(valid_dset,batch_size=CONFIG.batchsize*2,
                                  shuffle=False,num_workers=CONFIG.num_workers,
                                  pin_memory=True,drop_last=False) 
        
        # Prepare Model and Utils 
        print("Prepare Model and Utils...")
        model = RANZCRResNet200D(pretrained=True)
        criterion = nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam(model.parameters()) 
        scheduler = CosineAnnealingLR(optimizer, T_max=CONFIG.epochs, eta_min=CONFIG.min_lr, last_epoch=-1)
        device = "cuda" if torch.cuda.is_available() else "cpu"
        
        # Training
        print("Training...")
        best_score = 0
        best_loss = np.inf 
        for epoch in range(CONFIG.epochs):
            avg_loss = train_fn(train_loader,model,criterion,optimizer,scheduler,device)
            avg_val_loss, preds,trues  = valid_fn(valid_loader,model,criterion,device)
            scheduler.step()
            score, scores = mean_auc_score(valid_labels, preds)
            print(f"[{fold+1}][{epoch}] MEAN AUC : {score}")
            print(f"[{fold+1}][{epoch}] AUC : {scores}")
            
            if best_score < score:
                best_score = score 
                torch.save({'model': model.state_dict(), 'preds': preds},
                           OUTPUT_DIR+f'{CONFIG.model_name}_fold{fold}_best_score.pth')
            
            if best_loss < avg_val_loss:
                best_loss = avg_val_loss 
                torch.save({'model': model.state_dict(), 'preds': preds},
                           OUTPUT_DIR+f'{CONFIG.model_name}_fold{fold}_best_loss.pth')
        scores.append(best_score)
    scores = np.array(scores)
    print(f"AUC {scores.mean()}")

In [None]:
#cv_tuner() 