In [1]:
import numpy as np 
from sklearn.metrics import roc_auc_score 
import pandas as pd 
from numba import njit 

from sklearn.linear_model import LinearRegression 

In [2]:
LABELS = np.array(['ETT - Abnormal', 'ETT - Borderline',
       'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline',
       'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal',
       'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present'])
N_LABELS = 11 
## Metrics
def mean_roc_auc(targets,probabilities):
    roc_auc = [roc_auc_score(targets[:,k],probabilities[:,k]) for k in range(N_LABELS)]
    return np.average(roc_auc)

In [3]:
def rank_average(pred1,pred2,weight):
    pred = np.zeros((pred1.shape[0],N_LABELS))
    for k in range(N_LABELS):
        pred1_ = np.argsort(np.argsort(pred1[:,k]))
        pred2_ = np.argsort(np.argsort(pred2[:,k]))
        pred[:,k] = pred1_*weight[0] + pred2_*weight[1] 
    return pred

In [4]:
def cv_ensemble(mode):
    df = pd.read_csv("../input/ranzcr-sgkf-data/train_folds.csv")
    scores = [] 
    for fold in range(5):
        print("-"*50)
        print(f"FOLD {fold}")
        target = df[df.fold == fold][LABELS].values 
        pred1 = pd.read_csv(f"../input/ranzcrfoldensemble/pred512_1_{fold}.csv")[LABELS].values 
        pred2 = pd.read_csv(f"../input/ranzcrfoldensemble/pred768_1_{fold}.csv")[LABELS].values 
        pred3 = pd.read_csv(f"../input/ranzcr-ensemble-clahe/pred_clahe_1_{fold}.csv")[LABELS].values 
        pred4 = pd.read_csv(f"../input/ranzcrensembletorch/pred_torch_1_{fold}.csv")[LABELS].values 
        
        if mode == 1:
            pred = pred2*pred4**4 
        else:
            pred = rank_average(pred2,pred4,[0.1,0.9])
        auc = mean_roc_auc(target,pred)
        print(f"AUC : {auc}")
        scores.append(auc)
    print(f"SCORE : {np.array(scores).mean()}")

In [5]:
cv_ensemble(mode = 1)

--------------------------------------------------
FOLD 0
AUC : 0.9565153900748457
--------------------------------------------------
FOLD 1
AUC : 0.9591074042720277
--------------------------------------------------
FOLD 2
AUC : 0.9591848212524209
--------------------------------------------------
FOLD 3
AUC : 0.9606599834918133
--------------------------------------------------
FOLD 4
AUC : 0.9576358958537966
SCORE : 0.9586206989889808


In [6]:
cv_ensemble(mode = 2)

--------------------------------------------------
FOLD 0
AUC : 0.9564071651343173
--------------------------------------------------
FOLD 1
AUC : 0.9589473147374427
--------------------------------------------------
FOLD 2
AUC : 0.959221826709104
--------------------------------------------------
FOLD 3
AUC : 0.9607274116286956
--------------------------------------------------
FOLD 4
AUC : 0.95722404688108
SCORE : 0.9585055530181279
