# External Validation: Static vs Temporal (Experiment 2)

In [None]:
#Import necessary modules 

import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt

import statistics
from statistics import mean
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score 
from sklearn.metrics import make_scorer
from sklearn.metrics import f1_score

from sklearn.tree import DecisionTreeClassifier 
from sklearn.ensemble import RandomForestClassifier 
from sklearn.tree import plot_tree

from sklearn import metrics 
from sklearn.metrics import multilabel_confusion_matrix 
from sklearn.metrics import plot_confusion_matrix 
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV 

from sklearn.metrics import cohen_kappa_score 
from statsmodels.stats.inter_rater import fleiss_kappa 

In [None]:
#Connect to HiRID database

import psycopg2
from psycopg2 import Error

#Connect to HiRID
conn = psycopg2.connect(user="mimicuser",
                                  password="knowlabMIMIC",
                                  host="172.17.0.1",
                                  port="5433",
                                  database="HiRID")

#Cursor 
cur = conn.cursor()

In [None]:
import warnings
warnings.filterwarnings('ignore')

pd.pandas.set_option('display.max_columns', None)

# 1. Import Training Datasets

In [None]:
#Define funtion to add numeric label columns to all 11 QEUH annotated datasets

def num_labels(df):

    #Add numeric multiclass Annotation column - pooling D and E labels
    df['Annotation_Num'] = 0
    df.loc[df['Annotation'] == 'A', 'Annotation_Num'] = 0
    df.loc[df['Annotation'] == 'B', 'Annotation_Num'] = 1
    df.loc[df['Annotation'] == 'C', 'Annotation_Num'] = 2
    df.loc[df['Annotation'] == 'D', 'Annotation_Num'] = 3
    df.loc[df['Annotation'] == 'E', 'Annotation_Num'] = 4

    #Create binary class column: A=0, B/C/D/E = 1
    df['Ann_Bin_A'] = 0
    df.loc[df['Annotation'] == 'A', 'Ann_Bin_A'] = 0
    df.loc[df['Annotation'] == 'B', 'Ann_Bin_A'] = 1
    df.loc[df['Annotation'] == 'C', 'Ann_Bin_A'] = 1
    df.loc[df['Annotation'] == 'D', 'Ann_Bin_A'] = 1
    df.loc[df['Annotation'] == 'E', 'Ann_Bin_A'] = 1

    #Create binary class column: A/B = 0, C/D/E = 1
    df['Ann_Bin_B'] = 0
    df.loc[df['Annotation'] == 'A', 'Ann_Bin_B'] = 0
    df.loc[df['Annotation'] == 'B', 'Ann_Bin_B'] = 0
    df.loc[df['Annotation'] == 'C', 'Ann_Bin_B'] = 1
    df.loc[df['Annotation'] == 'D', 'Ann_Bin_B'] = 1
    df.loc[df['Annotation'] == 'E', 'Ann_Bin_B'] = 1

    #Create binary class column: A/B/C = 0, D/E = 1
    df['Ann_Bin_C'] = 0
    df.loc[df['Annotation'] == 'A', 'Ann_Bin_C'] = 0
    df.loc[df['Annotation'] == 'B', 'Ann_Bin_C'] = 0
    df.loc[df['Annotation'] == 'C', 'Ann_Bin_C'] = 0
    df.loc[df['Annotation'] == 'D', 'Ann_Bin_C'] = 1
    df.loc[df['Annotation'] == 'E', 'Ann_Bin_C'] = 1
    
    return df

In [None]:
#Import Consultant no.1 dataset

c1 = pd.read_excel('./p01.xlsx').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c1 = c1.drop(columns = cols)
c1 = c1.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c1['Adrenaline'] = c1['Adrenaline'].replace(np.nan, 0)
c1['Noradrenaline'] = c1['Noradrenaline'].replace(np.nan, 0)

c1 = num_labels(c1)

print(c1.shape)
c1.head()

In [None]:
#Import Consultant no.2 dataset
c2 = pd.read_csv('./p02.csv').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c2 = c2.drop(columns = cols)
c2 = c2.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c2['Adrenaline'] = c2['Adrenaline'].replace(np.nan, 0)
c2['Noradrenaline'] = c2['Noradrenaline'].replace(np.nan, 0)

c2 = num_labels(c2)

print(c2.shape)
c2.head()

In [None]:
#Import Consultant no.3 dataset

c3 = pd.read_csv('./p03.csv').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c3 = c3.drop(columns = cols)
c3 = c3.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c3['Adrenaline'] = c3['Adrenaline'].replace(np.nan, 0)
c3['Noradrenaline'] = c3['Noradrenaline'].replace(np.nan, 0)

c3 = num_labels(c3)

print(c3.shape)
c3.head()

In [None]:
#Import Consultant no.4 dataset

c4 = pd.read_excel('./p04.xlsx').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c4 = c4.drop(columns = cols)
c4 = c4.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c4['Adrenaline'] = c4['Adrenaline'].replace(np.nan, 0)
c4['Noradrenaline'] = c4['Noradrenaline'].replace(np.nan, 0)

c4 = num_labels(c4)

print(c4.shape)
c4.head()

In [None]:
#Import Consultant no.5 dataset

c5 = pd.read_csv('./p05.csv').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c5 = c5.drop(columns = cols)
c5 = c5.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c5['Adrenaline'] = c5['Adrenaline'].replace(np.nan, 0)
c5['Noradrenaline'] = c5['Noradrenaline'].replace(np.nan, 0)

c5 = num_labels(c5)

print(c5.shape)
c5.head()

In [None]:
#Import Consultant no.6 dataset

c6 = pd.read_excel('./p06.xlsx').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c6 = c6.drop(columns = cols)
c6 = c6.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c6['Adrenaline'] = c6['Adrenaline'].replace(np.nan, 0)
c6['Noradrenaline'] = c6['Noradrenaline'].replace(np.nan, 0)

c6 = num_labels(c6)

print(c6.shape)
c6.head()

In [None]:
#Import Consultant no.7 dataset

c7 = pd.read_csv('./p07.csv').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c7 = c7.drop(columns = cols)
c7 = c7.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c7['Adrenaline'] = c7['Adrenaline'].replace(np.nan, 0)
c7['Noradrenaline'] = c7['Noradrenaline'].replace(np.nan, 0)

c7 = num_labels(c7)

print(c7.shape)
c7.head()

In [None]:
#Import Consultant no.8 dataset

c8 = pd.read_csv('./p08.csv').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c8 = c8.drop(columns = cols)
c8 = c8.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c8['Adrenaline'] = c8['Adrenaline'].replace(np.nan, 0)
c8['Noradrenaline'] = c8['Noradrenaline'].replace(np.nan, 0)

c8 = num_labels(c8)

print(c8.shape)
c8.head()

In [None]:
#Import Consultant no.9 dataset

c9 = pd.read_csv('./p09.csv').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c9 = c9.drop(columns = cols)
c9 = c9.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c9['Adrenaline'] = c9['Adrenaline'].replace(np.nan, 0)
c9['Noradrenaline'] = c9['Noradrenaline'].replace(np.nan, 0)

c9 = num_labels(c9)

print(c9.shape)
c9.head()

In [None]:
#Import Consultant no.10 dataset

c10 = pd.read_csv('./p10.csv').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c10 = c10.drop(columns = cols)
c10 = c10.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c10['Adrenaline'] = c10['Adrenaline'].replace(np.nan, 0)
c10['Noradrenaline'] = c10['Noradrenaline'].replace(np.nan, 0)

c10 = num_labels(c10)

print(c10.shape)
c10.head()

In [None]:
#Import Consultant no.11 dataset

c11 = pd.read_excel('./p11.xlsx').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c11 = c11.drop(columns = cols)
c11 = c11.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c11['Adrenaline'] = c11['Adrenaline'].replace(np.nan, 0)
c11['Noradrenaline'] = c11['Noradrenaline'].replace(np.nan, 0)

c11['Annotation'] = c11['Annotation'].str.upper()

c11 = num_labels(c11)

print(c11.shape)
c11.head()

In [None]:
#Import Majority MV Consensus Dataset
##See jupyter notebook 'npjDM-MV_Consensus_Dataset' for steps to create this Majority MV Consensus Dataset

mv = pd.read_csv('MV-Consensus-Dataset.csv')
mv.drop(['Unnamed: 0'],axis=1,inplace=True)

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
mv['Adrenaline'] = mv['Adrenaline'].replace(np.nan, 0)
mv['Noradrenaline'] = mv['Noradrenaline'].replace(np.nan, 0)

mv = num_labels(mv)

print(mv.shape)
mv.head()

In [None]:
#TMV
##Create a TMV dataset by taking the majority-vote labels across only the expert annotated datasets which generate models that have high internal validation performance (i.e., where internal F1 >= 0.7).
##Top performaing models within internal validation: C2, C4, C8

c2_ann = pd.read_csv('./p02.csv').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c2_ann = c2_ann.drop(columns = cols)
c2_ann = c2_ann.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

c4_ann = pd.read_excel('./p04.xlsx').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c4_ann = c4_ann.drop(columns = cols)
c4_ann = c4_ann.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

c8_ann = pd.read_csv('./p09.csv').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c8_ann = c8_ann.drop(columns = cols)
c8_ann = c8_ann.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

cols = ['Adrenaline','Noradrenaline','FiO2','SpO2','MAP','HR']
ann_top = c2_ann.merge(c4_ann,on=cols).merge(c8_ann,on=cols)

ann_top.columns = ['Adrenaline','Noradrenaline','FiO2','SpO2','MAP','HR', 'c2_ann', 'c4_ann', 'c8_ann']

colsb = ['Adrenaline', 'Noradrenaline','FiO2','SpO2','MAP','HR']
ann_top.drop(colsb,axis=1,inplace=True)

ann_top['Annotation']= ann_top.mode(axis=1)[0]
colsc = ['c2_ann', 'c4_ann','c8_ann']
ann_top.drop(colsc,axis=1,inplace=True)

colsd = ['Adrenaline','Noradrenaline','FiO2','SpO2','MAP','HR']
tmv = c2_ann.merge(c4_ann,on=colsd).merge(c8_ann,on=colsd)
tmv.columns = ['Adrenaline','Noradrenaline','FiO2','SpO2','MAP','HR', 'c2_ann', 'c4_ann', 'c8_ann']

tmv = pd.concat([tmv,ann_top],axis=1)
colse = ['c2_ann', 'c4_ann','c8_ann']
tmv.drop(colse,axis=1,inplace=True)

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
tmv['Adrenaline'] = tmv['Adrenaline'].replace(np.nan, 0)
tmv['Noradrenaline'] = tmv['Noradrenaline'].replace(np.nan, 0)

tmv = num_labels(tmv)

print(tmv.shape)
tmv.head()

# 2. Internal Validation (DT QEUH Models)

In [None]:
#Define Parameter Grid for hyperparameter optimisation
##Create a dictionary with all DT parameter options 

parameters = {'max_depth': [1,2,3,4,5,6,7,9,11,12, None], 
              'max_features': ['auto', 'sqrt','log2', None],
              'criterion': ['gini','entropy']}

In [None]:
#Define Function - DT Model Evaluation via 5-fold CV

def do_cv_learning_dt(X, y, verbose=False, do_scale=False, random_state=1):
    
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)
    f1s = []

    if do_scale:
        sc = StandardScaler()
        X = sc.fit_transform(X)
        
    for i, (train,test) in enumerate(cv.split(X,y)):
        gcsv = GridSearchCV(DecisionTreeClassifier(random_state=1), 
                            param_grid=parameters, 
                            cv=5, 
                            scoring='f1_micro')
        grid_result = gcsv.fit(X[train],y[train])
        best_params = grid_result.best_params_
        if verbose:
            print('fold', i,'best_params', best_params)
        clf = grid_result.best_estimator_
        f1 = metrics.f1_score(y[test], clf.predict(X[test]), average='micro')
        f1s.append(f1)
    
    ##Performance metrics 
    dfdt_multi_f1data = [['ann', 'multi', 'F1_micro', np.mean(f1s), np.std(f1s)]]

    ##print data as DF
    dfdt_multi_f1data = pd.DataFrame(data=dfdt_multi_f1data)
    dfdt_multi_f1data.columns = ['Annotator','Model','Optimisation','F1_micro','S.D.']
    
    return dfdt_multi_f1data

In [None]:
#Define Function - Find highest performing model after 5-fold CV

def model_opt_dt(X, y, verbose=False, do_scale=False, random_state=1):
    
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)
    f1s = []
    models = []

    if do_scale:
        sc = StandardScaler()
        X = sc.fit_transform(X)
        
    for i, (train,test) in enumerate(cv.split(X,y)):
        gcsv = GridSearchCV(DecisionTreeClassifier(random_state=1), 
                            param_grid=parameters, 
                            cv=5, 
                            scoring='f1_micro')
        grid_result = gcsv.fit(X[train],y[train])
        best_params = grid_result.best_params_
        if verbose:
            print('fold', i,'best_params', best_params)
        clf = grid_result.best_estimator_
        f1 = metrics.f1_score(y[test], clf.predict(X[test]), average='micro')
        f1s.append(f1)
        models.append(grid_result.best_estimator_)
        
    #find opt model
    df_multi_opt = [f1s, models]
    max_val = max(df_multi_opt[0])
    max_index = df_multi_opt[0].index(max_val)
    opt_model = df_multi_opt[1][max_index]
    
    return opt_model

In [None]:
#C1 - IntVal

array = c1.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
c1dt_multi_f1data = do_cv_learning_dt(X,y)
c1dt_multi_f1data['Annotator'] = 'c1'

#Opt model
c1dt_multi_opt = model_opt_dt(X,y)

print(c1dt_multi_opt)
c1dt_multi_f1data

In [None]:
#C2 - IntVal

array = c2.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)
le.classes_

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
c2dt_multi_f1data = do_cv_learning_dt(X,y)
c2dt_multi_f1data['Annotator'] = 'c2'

#Opt model
c2dt_multi_opt = model_opt_dt(X,y)

print(c2dt_multi_opt)
c2dt_multi_f1data

In [None]:
#C3 - IntVal

array = c3.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)
le.classes_

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
c3dt_multi_f1data = do_cv_learning_dt(X,y)
c3dt_multi_f1data['Annotator'] = 'c3'

#Opt model
c3dt_multi_opt = model_opt_dt(X,y)

print(c3dt_multi_opt)
c3dt_multi_f1data

In [None]:
#C4 - IntVal

array = c4.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)
le.classes_

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
c4dt_multi_f1data = do_cv_learning_dt(X,y)
c4dt_multi_f1data['Annotator'] = 'c4'

#Opt model
c4dt_multi_opt = model_opt_dt(X,y)

print(c4dt_multi_opt)
c4dt_multi_f1data

In [None]:
#C5 - IntVal

array = c5.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)
le.classes_

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
c5dt_multi_f1data = do_cv_learning_dt(X,y)
c5dt_multi_f1data['Annotator'] = 'c5'

#Opt model
c5dt_multi_opt = model_opt_dt(X,y)

print(c5dt_multi_opt)
c5dt_multi_f1data

In [None]:
#C6 - IntVal

array = c6.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)
le.classes_

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
c6dt_multi_f1data = do_cv_learning_dt(X,y)
c6dt_multi_f1data['Annotator'] = 'c6'

#Opt model
c6dt_multi_opt = model_opt_dt(X,y)

print(c6dt_multi_opt)
c6dt_multi_f1data

In [None]:
#C7 - IntVal

array = c7.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)
le.classes_

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
c7dt_multi_f1data = do_cv_learning_dt(X,y)
c7dt_multi_f1data['Annotator'] = 'c7'

#Opt model
c7dt_multi_opt = model_opt_dt(X,y)

print(c7dt_multi_opt)
c7dt_multi_f1data

In [None]:
#C8 - IntVal

array = c8.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)
le.classes_

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
c8dt_multi_f1data = do_cv_learning_dt(X,y)
c8dt_multi_f1data['Annotator'] = 'c8'

#Opt model
c8dt_multi_opt = model_opt_dt(X,y)

print(c8dt_multi_opt)
c8dt_multi_f1data

In [None]:
#C9 - IntVal

array = c9.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)
le.classes_

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
c9dt_multi_f1data = do_cv_learning_dt(X,y)
c9dt_multi_f1data['Annotator'] = 'c9'

#Opt model
c9dt_multi_opt = model_opt_dt(X,y)

print(c9dt_multi_opt)
c9dt_multi_f1data

In [None]:
#c10 - IntVal

array = c10.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)
le.classes_

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
c10dt_multi_f1data = do_cv_learning_dt(X,y)
c10dt_multi_f1data['Annotator'] = 'c10'

#Opt model
c10dt_multi_opt = model_opt_dt(X,y)

print(c10dt_multi_opt)
c10dt_multi_f1data

In [None]:
#c11 - IntVal

array = c11.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)
le.classes_

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
c11dt_multi_f1data = do_cv_learning_dt(X,y)
c11dt_multi_f1data['Annotator'] = 'c11'

#Opt model
c11dt_multi_opt = model_opt_dt(X,y)

print(c11dt_multi_opt)
c11dt_multi_f1data

In [None]:
#MV - IntVal

array = mv.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)
le.classes_

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
mvdt_multi_f1data = do_cv_learning_dt(X,y)
mvdt_multi_f1data['Annotator'] = 'MV'

#Opt model
mvdt_multi_opt = model_opt_dt(X,y)

print(mvdt_multi_opt)
mvdt_multi_f1data

In [None]:
#TMV - IntVal

array = tmv.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)
le.classes_

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
tmvdt_multi_f1data = do_cv_learning_dt(X,y)
tmvdt_multi_f1data['Annotator'] = 'TMV'

#Opt model
tmvdt_multi_opt = model_opt_dt(X,y)

print(tmvdt_multi_opt)
tmvdt_multi_f1data

In [None]:
#Internal Validation Performance - Summary

frames = [c1dt_multi_f1data, c2dt_multi_f1data, c3dt_multi_f1data, c4dt_multi_f1data, 
          c5dt_multi_f1data, c6dt_multi_f1data, c7dt_multi_f1data, c8dt_multi_f1data,
          c9dt_multi_f1data, c10dt_multi_f1data, c11dt_multi_f1data, mvdt_multi_f1data,
          tmvdt_multi_f1data]

multi_int = pd.concat(frames)

print(multi_int.shape)
multi_int

In [None]:
#Plot chart - Internal Validation

plt.style.use('ggplot')

#Define x and y data
x1 = multi_int['Annotator']
y1 = multi_int['F1_micro']

#Plot chart data
plt.figure(figsize=(8,2.5))
plt.plot(x1, y1, color='#1F57C8', marker='o', linestyle="solid", label='Multi')

plt.ylim([0.0,1.1])
plt.yticks(np.arange(0.0,1.01, 0.2))

#Add title and labels
plt.title('Internal Validation: Multiclass - DT', fontsize=14)
plt.xlabel('Annotator', fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=14)
plt.ylabel('F1_micro', fontsize=14)
plt.grid(True)
plt.tight_layout()

plt.show()

# 3. External Validation Experiments

### 3.1 Static HiRID Validation Dataset

In [None]:
#Create Static HiRID Validation Dataset, using Temporal HiRID Validation Dataset
#See jupyter notebook 'npjDM-HiRID_ExtVal_Dataset' to see steps on creating the Temporal HiRID Validation Dataset

hirid_stat = pd.read_csv("hirid_extval_temporal_cohort.csv")
hirid_stat = hirid_stat.drop(['Unnamed: 0'],axis=1)

#generate binary discharge_status
hirid_stat['binary_discharge'] = np.where(hirid_stat['discharge_status']== 'alive', 0, 4)
hirid_stat = hirid_stat.sort_values(by='patientid',ascending=True)

#only keep records for 1hr before discharge/death
hirid_stat = hirid_stat[hirid_stat['Hrs before d_time']==1]

print(hirid_stat.shape)
print('Number of patients:', hirid_stat.patientid.nunique())
hirid_stat.head(10)

In [None]:
#checks
print(hirid_stat.patientid.nunique())
print(hirid_stat['Hrs before d_time'].value_counts())

In [None]:
#Define hirid (static) test dataset

array = hirid_stat.to_numpy()
X_test = array[:,2:8]  
y_test = array[:,9]  

X_test = X_test.astype(float) 
y_test = y_test.astype(int) 

len(X_test)
len(y_test)

print(X_test.shape)
print(y_test.shape)

In [None]:
X_test

In [None]:
#C1 - Static Ext val 

f1 = metrics.f1_score(y_test, c1dt_multi_opt.predict(X_test), average='micro')
c1dt_multi_ext  = [['c1', 'multi', 'F1_micro', f1]]

##print data as DF
c1dt_multi_ext = pd.DataFrame(data=c1dt_multi_ext)
c1dt_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
c1dt_multi_ext

In [None]:
#C1 - plot confusion matrix

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(c1dt_multi_opt, X_test, list(y_test),
                             cmap=plt.cm.Blues,
                             ax=ax)

cnf_matrix = confusion_matrix(list(y_test), c1dt_multi_opt.predict(X_test))
pred_labels = cnf_matrix.sum(axis=0)
print(pred_labels)
c1_A_pred = pred_labels[0]
c1_B_pred = pred_labels[1]
c1_C_pred = pred_labels[2]
c1_D_pred = pred_labels[3]
c1_E_pred = pred_labels[4]
print(c1_A_pred,c1_B_pred,c1_C_pred,c1_D_pred,c1_E_pred)

In [None]:
#C2 - Static Ext val 

f1 = metrics.f1_score(y_test, c2dt_multi_opt.predict(X_test), average='micro')
c2dt_multi_ext  = [['c2', 'multi', 'F1_micro', f1]]

##print data as DF
c2dt_multi_ext = pd.DataFrame(data=c2dt_multi_ext)
c2dt_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
c2dt_multi_ext

In [None]:
set(c2dt_multi_opt.predict(X_test))

In [None]:
#C2 - plot confusion matrix

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(c2dt_multi_opt, X_test, list(y_test),
                             cmap=plt.cm.Blues,
                             ax=ax)

In [None]:
cnf_matrix = confusion_matrix(list(y_test), c2dt_multi_opt.predict(X_test))

pred_labels = cnf_matrix.sum(axis=0)
print(pred_labels)

c2_A_pred = pred_labels[0]
c2_B_pred = pred_labels[1]
c2_C_pred = 0
c2_D_pred = pred_labels[2]
c2_E_pred = pred_labels[3]
print(c2_A_pred,c2_B_pred,c2_C_pred,c2_D_pred,c2_E_pred)

In [None]:
#C3 - Static Ext val 

f1 = metrics.f1_score(y_test, c3dt_multi_opt.predict(X_test), average='micro')
c3dt_multi_ext  = [['c3', 'multi', 'F1_micro', f1]]

##print data as DF
c3dt_multi_ext = pd.DataFrame(data=c3dt_multi_ext)
c3dt_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
c3dt_multi_ext

In [None]:
#C3 - plot confusion matrix

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(c3dt_multi_opt, X_test, list(y_test),
                             cmap=plt.cm.Blues,
                             ax=ax)

cnf_matrix = confusion_matrix(list(y_test), c3dt_multi_opt.predict(X_test))
pred_labels = cnf_matrix.sum(axis=0)
print(pred_labels)
c3_A_pred = pred_labels[0]
c3_B_pred = pred_labels[1]
c3_C_pred = pred_labels[2]
c3_D_pred = pred_labels[3]
c3_E_pred = pred_labels[4]
print(c3_A_pred,c3_B_pred,c3_C_pred,c3_D_pred,c3_E_pred)

In [None]:
#C4 - Static Ext val 

f1 = metrics.f1_score(y_test, c4dt_multi_opt.predict(X_test), average='micro')
c4dt_multi_ext  = [['c4', 'multi', 'F1_micro', f1]]

##print data as DF
c4dt_multi_ext = pd.DataFrame(data=c4dt_multi_ext)
c4dt_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
c4dt_multi_ext

In [None]:
#C4 - plot confusion matrix

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(c4dt_multi_opt, X_test, list(y_test),
                             cmap=plt.cm.Blues,
                             ax=ax)

cnf_matrix = confusion_matrix(list(y_test), c4dt_multi_opt.predict(X_test))
pred_labels = cnf_matrix.sum(axis=0)
print(pred_labels)
c4_A_pred = pred_labels[0]
c4_B_pred = pred_labels[1]
c4_C_pred = pred_labels[2]
c4_D_pred = pred_labels[3]
c4_E_pred = pred_labels[4]
print(c4_A_pred,c4_B_pred,c4_C_pred,c4_D_pred,c4_E_pred)

In [None]:
#C5 - Static Ext val 

f1 = metrics.f1_score(y_test, c5dt_multi_opt.predict(X_test), average='micro')
c5dt_multi_ext  = [['c5', 'multi', 'F1_micro', f1]]

##print data as DF
c5dt_multi_ext = pd.DataFrame(data=c5dt_multi_ext)
c5dt_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
c5dt_multi_ext

In [None]:
#C5 - plot confusion matrix

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(c5dt_multi_opt, X_test, list(y_test),
                             cmap=plt.cm.Blues,
                             ax=ax)

cnf_matrix = confusion_matrix(list(y_test), c5dt_multi_opt.predict(X_test))
pred_labels = cnf_matrix.sum(axis=0)
print(pred_labels)
c5_A_pred = pred_labels[0]
c5_B_pred = pred_labels[1]
c5_C_pred = pred_labels[2]
c5_D_pred = pred_labels[3]
c5_E_pred = pred_labels[4]
print(c5_A_pred,c5_B_pred,c5_C_pred,c5_D_pred,c5_E_pred)

In [None]:
#C6 - Static Ext val 

f1 = metrics.f1_score(y_test, c6dt_multi_opt.predict(X_test), average='micro')
c6dt_multi_ext  = [['c6', 'multi', 'F1_micro', f1]]

##print data as DF
c6dt_multi_ext = pd.DataFrame(data=c6dt_multi_ext)
c6dt_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
c6dt_multi_ext

In [None]:
#C6 - plot confusion matrix

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(c6dt_multi_opt, X_test, list(y_test),
                             cmap=plt.cm.Blues,
                             ax=ax)

cnf_matrix = confusion_matrix(list(y_test), c6dt_multi_opt.predict(X_test))
pred_labels = cnf_matrix.sum(axis=0)
print(pred_labels)
c6_A_pred = pred_labels[0]
c6_B_pred = pred_labels[1]
c6_C_pred = pred_labels[2]
c6_D_pred = pred_labels[3]
c6_E_pred = pred_labels[4]
print(c6_A_pred,c6_B_pred,c6_C_pred,c6_D_pred,c6_E_pred)

In [None]:
#C7 - Static Ext val 

f1 = metrics.f1_score(y_test, c7dt_multi_opt.predict(X_test), average='micro')
c7dt_multi_ext  = [['c7', 'multi', 'F1_micro', f1]]

##print data as DF
c7dt_multi_ext = pd.DataFrame(data=c7dt_multi_ext)
c7dt_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
c7dt_multi_ext

In [None]:
#C7 - plot confusion matrix

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(c7dt_multi_opt, X_test, list(y_test),
                             cmap=plt.cm.Blues,
                             ax=ax)

cnf_matrix = confusion_matrix(list(y_test), c7dt_multi_opt.predict(X_test))
pred_labels = cnf_matrix.sum(axis=0)
print(pred_labels)
c7_A_pred = pred_labels[0]
c7_B_pred = pred_labels[1]
c7_C_pred = pred_labels[2]
c7_D_pred = pred_labels[3]
c7_E_pred = pred_labels[4]
print(c7_A_pred,c7_B_pred,c7_C_pred,c7_D_pred,c7_E_pred)

In [None]:
#C8 - Static Ext val 

f1 = metrics.f1_score(y_test, c8dt_multi_opt.predict(X_test), average='micro')
c8dt_multi_ext  = [['c8', 'multi', 'F1_micro', f1]]

##print data as DF
c8dt_multi_ext = pd.DataFrame(data=c8dt_multi_ext)
c8dt_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
c8dt_multi_ext

In [None]:
#C8 - plot confusion matrix

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(c8dt_multi_opt, X_test, list(y_test),
                             cmap=plt.cm.Blues,
                             ax=ax)

cnf_matrix = confusion_matrix(list(y_test), c8dt_multi_opt.predict(X_test))
pred_labels = cnf_matrix.sum(axis=0)
print(pred_labels)
c8_A_pred = pred_labels[0]
c8_B_pred = pred_labels[1]
c8_C_pred = pred_labels[2]
c8_D_pred = pred_labels[3]
c8_E_pred = pred_labels[4]
print(c8_A_pred,c8_B_pred,c8_C_pred,c8_D_pred,c8_E_pred)

In [None]:
#C9 - Static Ext val 

f1 = metrics.f1_score(y_test, c9dt_multi_opt.predict(X_test), average='micro')
c9dt_multi_ext  = [['c9', 'multi', 'F1_micro', f1]]

##print data as DF
c9dt_multi_ext = pd.DataFrame(data=c9dt_multi_ext)
c9dt_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
c9dt_multi_ext

In [None]:
#C9 - plot confusion matrix

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(c9dt_multi_opt, X_test, list(y_test),
                             cmap=plt.cm.Blues,
                             ax=ax)

cnf_matrix = confusion_matrix(list(y_test), c9dt_multi_opt.predict(X_test))
pred_labels = cnf_matrix.sum(axis=0)
print(pred_labels)
c9_A_pred = pred_labels[0]
c9_B_pred = pred_labels[1]
c9_C_pred = pred_labels[2]
c9_D_pred = pred_labels[3]
c9_E_pred = pred_labels[4]
print(c9_A_pred,c9_B_pred,c9_C_pred,c9_D_pred,c9_E_pred)

In [None]:
#C10 -Static Ext val 

f1 = metrics.f1_score(y_test, c10dt_multi_opt.predict(X_test), average='micro')
c10dt_multi_ext  = [['c10', 'multi', 'F1_micro', f1]]

##print data as DF
c10dt_multi_ext = pd.DataFrame(data=c10dt_multi_ext)
c10dt_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
c10dt_multi_ext

In [None]:
#C10 - plot confusion matrix

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(c10dt_multi_opt, X_test, list(y_test),
                             cmap=plt.cm.Blues,
                             ax=ax)

cnf_matrix = confusion_matrix(list(y_test), c10dt_multi_opt.predict(X_test))
pred_labels = cnf_matrix.sum(axis=0)
print(pred_labels)
c10_A_pred = pred_labels[0]
c10_B_pred = pred_labels[1]
c10_C_pred = pred_labels[2]
c10_D_pred = pred_labels[3]
c10_E_pred = pred_labels[4]
print(c10_A_pred,c10_B_pred,c10_C_pred,c10_D_pred,c10_E_pred)

In [None]:
#c11 - Static Ext val 

f1 = metrics.f1_score(y_test, c11dt_multi_opt.predict(X_test), average='micro')
c11dt_multi_ext  = [['c11', 'multi', 'F1_micro', f1]]

##print data as DF
c11dt_multi_ext = pd.DataFrame(data=c11dt_multi_ext)
c11dt_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
c11dt_multi_ext

In [None]:
#C11 - plot confusion matrix

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(c11dt_multi_opt, X_test, list(y_test),
                             cmap=plt.cm.Blues,
                             ax=ax)

cnf_matrix = confusion_matrix(list(y_test), c11dt_multi_opt.predict(X_test))
pred_labels = cnf_matrix.sum(axis=0)
print(pred_labels)
c11_A_pred = pred_labels[0]
c11_B_pred = pred_labels[1]
c11_C_pred = pred_labels[2]
c11_D_pred = pred_labels[3]
c11_E_pred = pred_labels[4]
print(c11_A_pred,c11_B_pred,c11_C_pred,c11_D_pred,c11_E_pred)

In [None]:
#MV - Static Ext val 

f1 = metrics.f1_score(y_test, mvdt_multi_opt.predict(X_test), average='micro')
mvdt_multi_ext  = [['MV', 'multi', 'F1_micro', f1]]

##print data as DF
mvdt_multi_ext = pd.DataFrame(data=mvdt_multi_ext)
mvdt_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
mvdt_multi_ext

In [None]:
#MV - plot confusion matrix

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(mvdt_multi_opt, X_test, list(y_test),
                             cmap=plt.cm.Blues,
                             ax=ax)

cnf_matrix = confusion_matrix(list(y_test), mvdt_multi_opt.predict(X_test))
pred_labels = cnf_matrix.sum(axis=0)
print(pred_labels)
mv_A_pred = pred_labels[0]
mv_B_pred = pred_labels[1]
mv_C_pred = pred_labels[2]
mv_D_pred = pred_labels[3]
mv_E_pred = pred_labels[4]
print(mv_A_pred,mv_B_pred,mv_C_pred,mv_D_pred,mv_E_pred)

In [None]:
#TMV - Static Ext val 

f1 = metrics.f1_score(y_test, tmvdt_multi_opt.predict(X_test), average='micro')
tmvdt_multi_ext  = [['TMV', 'multi', 'F1_micro', f1]]

##print data as DF
tmvdt_multi_ext = pd.DataFrame(data=tmvdt_multi_ext)
tmvdt_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
tmvdt_multi_ext

In [None]:
set(tmvdt_multi_opt.predict(X_test))

In [None]:
#TMV - plot confusion matrix

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(tmvdt_multi_opt, X_test, list(y_test),cmap=plt.cm.Blues,ax=ax)

In [None]:
cnf_matrix = confusion_matrix(list(y_test), tmvdt_multi_opt.predict(X_test))

pred_labels = cnf_matrix.sum(axis=0)
print(pred_labels)

tmv_A_pred = pred_labels[0]
tmv_B_pred = pred_labels[1]
tmv_C_pred = 0
tmv_D_pred = pred_labels[2]
tmv_E_pred = pred_labels[3]
print(tmv_A_pred,tmv_B_pred,tmv_C_pred,tmv_D_pred,tmv_E_pred)

In [None]:
#External Validation Performances (Static) - Summary

frames = [c1dt_multi_ext, c2dt_multi_ext, c3dt_multi_ext, c4dt_multi_ext, 
          c5dt_multi_ext, c6dt_multi_ext, c7dt_multi_ext, c8dt_multi_ext,
          c9dt_multi_ext, c10dt_multi_ext, c11dt_multi_ext, mvdt_multi_ext,
          tmvdt_multi_ext]

multi_ext_stat = pd.concat(frames)

print(multi_ext_stat.shape)
multi_ext_stat

### 3.2 Temporal HiRID Validation Dataset

In [None]:
#Import Temporal HiRID Validation Dataset
##See jupyter notebook 'npjDM-HiRID_ExtVal_Dataset' to see steps on creating the Temporal HiRID Validation Dataset

hirid_val = pd.read_csv("hirid_extval_temporal_cohort.csv")
hirid_val = hirid_val.drop(['Unnamed: 0'],axis=1)

#generate binary discharge_status
hirid_val['binary_discharge'] = np.where(hirid_val['discharge_status']== 'alive', 0, 1)
hirid_val = hirid_val.sort_values(by='patientid',ascending=True)

print(hirid_val.shape)
print('Number of patients:', hirid_val.patientid.nunique())
hirid_val.head(20)

In [None]:
#Import HiRID patient table (contains discharge status info)

pat = pd.read_sql_query("SELECT * FROM hirid.patient", conn)

print(pat.shape)
pat.head()

In [None]:
pat_list = set(hirid_val['patientid'])

pat_rel = pat[pat['patientid'].isin(pat_list)]
pat_rel.discharge_status.value_counts()

In [None]:
print(hirid_val.discharge_status.value_counts())
print(hirid_val.binary_discharge.value_counts())

In [None]:
#Define HiRID Temporal vlidation data - X_test

array = hirid_val.to_numpy()
X_test = array[:,2:8]   

X_test = X_test.astype(float) 

In [None]:
X_test

In [None]:
hirid_val['c1_pred'] = c1dt_multi_opt.predict(X_test)
hirid_val['c2_pred'] = c2dt_multi_opt.predict(X_test)
hirid_val['c3_pred'] = c3dt_multi_opt.predict(X_test)
hirid_val['c4_pred'] = c4dt_multi_opt.predict(X_test)
hirid_val['c5_pred'] = c5dt_multi_opt.predict(X_test)
hirid_val['c6_pred'] = c6dt_multi_opt.predict(X_test)
hirid_val['c7_pred'] = c7dt_multi_opt.predict(X_test)
hirid_val['c8_pred'] = c8dt_multi_opt.predict(X_test)
hirid_val['c9_pred'] = c9dt_multi_opt.predict(X_test)
hirid_val['c10_pred'] = c10dt_multi_opt.predict(X_test)
hirid_val['c11_pred'] = c11dt_multi_opt.predict(X_test)
hirid_val['mv_pred'] = mvdt_multi_opt.predict(X_test)
hirid_val['tmv_pred'] = tmvdt_multi_opt.predict(X_test)

print(hirid_val.shape)
hirid_val.head()

In [None]:
hirid_val.columns

### 3.2.1 Approach 1: Weighted Sum

In [None]:
val_ws = hirid_val.copy(deep=True)

#define weights to each hour before event (discharge/death) - with higher bias towards hours nearer event
val_ws['weight'] = 0
val_ws.loc[val_ws['Hrs before d_time'] == 1, 'weight'] = 0.3
val_ws.loc[val_ws['Hrs before d_time'] == 2, 'weight'] = 0.3
val_ws.loc[val_ws['Hrs before d_time'] == 3, 'weight'] = 0.2
val_ws.loc[val_ws['Hrs before d_time'] == 4, 'weight'] = 0.1
val_ws.loc[val_ws['Hrs before d_time'] == 5, 'weight'] = 0.1

#Renumber to get rid of 0
cols = ['c1_pred','c2_pred', 'c3_pred', 'c4_pred', 'c5_pred', 'c6_pred', 'c7_pred','c8_pred', 'c9_pred', 
        'c10_pred', 'c11_pred', 'mv_pred', 'tmv_pred']

#relabel 0-4 predicted labels to 1-5 (still representing A-E)
val_ws[cols] = val_ws[cols].replace({0:1, 1:2, 2:3, 3:4, 4:5})

#weighted x prediction
val_ws['c1_ws_pred'] = val_ws['c1_pred']*val_ws['weight']
val_ws['c2_ws_pred'] = val_ws['c2_pred']*val_ws['weight']
val_ws['c3_ws_pred'] = val_ws['c3_pred']*val_ws['weight']
val_ws['c4_ws_pred'] = val_ws['c4_pred']*val_ws['weight']
val_ws['c5_ws_pred'] = val_ws['c5_pred']*val_ws['weight']
val_ws['c6_ws_pred'] = val_ws['c6_pred']*val_ws['weight']
val_ws['c7_ws_pred'] = val_ws['c7_pred']*val_ws['weight']
val_ws['c8_ws_pred'] = val_ws['c8_pred']*val_ws['weight']
val_ws['c9_ws_pred'] = val_ws['c9_pred']*val_ws['weight']
val_ws['c10_ws_pred'] = val_ws['c10_pred']*val_ws['weight']
val_ws['c11_ws_pred'] = val_ws['c11_pred']*val_ws['weight']
val_ws['mv_ws_pred'] = val_ws['mv_pred']*val_ws['weight']
val_ws['tmv_ws_pred'] = val_ws['tmv_pred']*val_ws['weight']

#drop original pred columns
val_ws = val_ws.drop(cols,axis=1)

print(val_ws.shape)
val_ws.head()

In [None]:
#calculate weighted sum per annotator predictions

c1_ws = val_ws.groupby(by=['patientid'])['c1_ws_pred'].sum().reset_index()
c2_ws = val_ws.groupby(by=['patientid'])['c2_ws_pred'].sum().reset_index()
c3_ws = val_ws.groupby(by=['patientid'])['c3_ws_pred'].sum().reset_index()
c4_ws = val_ws.groupby(by=['patientid'])['c4_ws_pred'].sum().reset_index()
c5_ws = val_ws.groupby(by=['patientid'])['c5_ws_pred'].sum().reset_index()
c6_ws = val_ws.groupby(by=['patientid'])['c6_ws_pred'].sum().reset_index()
c7_ws = val_ws.groupby(by=['patientid'])['c7_ws_pred'].sum().reset_index()
c8_ws = val_ws.groupby(by=['patientid'])['c8_ws_pred'].sum().reset_index()
c9_ws = val_ws.groupby(by=['patientid'])['c9_ws_pred'].sum().reset_index()
c10_ws = val_ws.groupby(by=['patientid'])['c10_ws_pred'].sum().reset_index()
c11_ws = val_ws.groupby(by=['patientid'])['c11_ws_pred'].sum().reset_index()
mv_ws = val_ws.groupby(by=['patientid'])['mv_ws_pred'].sum().reset_index()
tmv_ws = val_ws.groupby(by=['patientid'])['tmv_ws_pred'].sum().reset_index()

c7_ws

In [None]:
#concatenate weighted sum dfs

ann_pred = c1_ws.merge(c2_ws,on=['patientid']).merge(c3_ws,on=['patientid']).merge(c4_ws,on=['patientid']).merge(c5_ws,on=['patientid']).merge(c6_ws,on=['patientid']).merge(c7_ws,on=['patientid']).merge(c8_ws,on=['patientid']).merge(c9_ws,on=['patientid']).merge(c10_ws,on=['patientid']).merge(c11_ws,on=['patientid']).merge(mv_ws,on=['patientid']).merge(tmv_ws,on=['patientid'])
ann_pred = ann_pred.sort_values(by='patientid',ascending=True)
ann_pred
#range of all weighted sum pred labels: 1-5

In [None]:
#Check range of all columns
print('c1 range:', round(ann_pred['c1_ws_pred'].min(),3), '-', ann_pred['c1_ws_pred'].max() ) 
print('c2 range:', round(ann_pred['c2_ws_pred'].min(),3), '-', ann_pred['c2_ws_pred'].max() ) 
print('c3 range:', round(ann_pred['c3_ws_pred'].min(),3), '-', round(ann_pred['c3_ws_pred'].max(),3) ) 
print('c4 range:', round(ann_pred['c4_ws_pred'].min(),3), '-', ann_pred['c4_ws_pred'].max() ) 
print('c5 range:', round(ann_pred['c5_ws_pred'].min(),3), '-', ann_pred['c5_ws_pred'].max() ) 
print('c6 range:', round(ann_pred['c6_ws_pred'].min(),3), '-', ann_pred['c6_ws_pred'].max() ) 
print('c7 range:', round(ann_pred['c7_ws_pred'].min(),3), '-', ann_pred['c7_ws_pred'].max() ) 
print('c8 range:', round(ann_pred['c8_ws_pred'].min(),3), '-', ann_pred['c8_ws_pred'].max() ) 
print('c9 range:', round(ann_pred['c9_ws_pred'].min(),3), '-', ann_pred['c9_ws_pred'].max() ) 
print('c10 range:', round(ann_pred['c10_ws_pred'].min(),3), '-', ann_pred['c10_ws_pred'].max() ) 
print('c11 range:', round(ann_pred['c11_ws_pred'].min(),3), '-', ann_pred['c11_ws_pred'].max() ) 
print('mv range:', round(ann_pred['mv_ws_pred'].min(),3), '-', ann_pred['mv_ws_pred'].max() ) 
print('tmv range:', round(ann_pred['tmv_ws_pred'].min(),3), '-', ann_pred['tmv_ws_pred'].max() ) 

In [None]:
#Function defining extreme(1) cut-off 

def ex1_cat(row, col):
    if row[col] == 1 :
        return 0
    if row[col] > 4:
        return 1
    return 3

In [None]:
#convert to weighted sum labels to binary labels using following rules: 1 = discharged alive, >4 = died

ann_ex1_pred = ann_pred.copy(deep=True)

ann_ex1_pred['c1_ex1_pred'] = ann_ex1_pred.apply(lambda row: ex1_cat(row, 'c1_ws_pred'), axis=1)
ann_ex1_pred['c2_ex1_pred'] = ann_ex1_pred.apply(lambda row: ex1_cat(row, 'c2_ws_pred'), axis=1)
ann_ex1_pred['c3_ex1_pred'] = ann_ex1_pred.apply(lambda row: ex1_cat(row, 'c3_ws_pred'), axis=1)
ann_ex1_pred['c4_ex1_pred'] = ann_ex1_pred.apply(lambda row: ex1_cat(row, 'c4_ws_pred'), axis=1)
ann_ex1_pred['c5_ex1_pred'] = ann_ex1_pred.apply(lambda row: ex1_cat(row, 'c5_ws_pred'), axis=1)
ann_ex1_pred['c6_ex1_pred'] = ann_ex1_pred.apply(lambda row: ex1_cat(row, 'c6_ws_pred'), axis=1)
ann_ex1_pred['c7_ex1_pred'] = ann_ex1_pred.apply(lambda row: ex1_cat(row, 'c7_ws_pred'), axis=1)
ann_ex1_pred['c8_ex1_pred'] = ann_ex1_pred.apply(lambda row: ex1_cat(row, 'c8_ws_pred'), axis=1)
ann_ex1_pred['c9_ex1_pred'] = ann_ex1_pred.apply(lambda row: ex1_cat(row, 'c9_ws_pred'), axis=1)
ann_ex1_pred['c10_ex1_pred'] = ann_ex1_pred.apply(lambda row: ex1_cat(row, 'c10_ws_pred'), axis=1)
ann_ex1_pred['c11_ex1_pred'] = ann_ex1_pred.apply(lambda row: ex1_cat(row, 'c11_ws_pred'), axis=1)
ann_ex1_pred['mv_ex1_pred'] = ann_ex1_pred.apply(lambda row: ex1_cat(row, 'mv_ws_pred'), axis=1)
ann_ex1_pred['tmv_ex1_pred'] = ann_ex1_pred.apply(lambda row: ex1_cat(row, 'tmv_ws_pred'), axis=1)


#drop cols
cols = ['c1_ws_pred', 'c2_ws_pred', 'c3_ws_pred','c4_ws_pred', 'c5_ws_pred', 'c6_ws_pred', 'c7_ws_pred',
       'c8_ws_pred', 'c9_ws_pred', 'c10_ws_pred', 'c11_ws_pred','mv_ws_pred', 'tmv_ws_pred']

ann_ex1_pred = ann_ex1_pred.drop(cols, axis=1)

print(ann_ex1_pred.shape)
ann_ex1_pred

In [None]:
#Function defining extreme(2) cut-off 

def ex2_cat(row, col):
    if row[col] <= 2 :
        return 0
    if row[col] > 4:
        return 1
    return 3

In [None]:
#convert to binary labels using following rules: <=2 = discharged alive, >4 = died

ann_ex2_pred = ann_pred.copy(deep=True)

ann_ex2_pred['c1_ex2_pred'] = ann_ex2_pred.apply(lambda row: ex2_cat(row, 'c1_ws_pred'), axis=1)
ann_ex2_pred['c2_ex2_pred'] = ann_ex2_pred.apply(lambda row: ex2_cat(row, 'c2_ws_pred'), axis=1)
ann_ex2_pred['c3_ex2_pred'] = ann_ex2_pred.apply(lambda row: ex2_cat(row, 'c3_ws_pred'), axis=1)
ann_ex2_pred['c4_ex2_pred'] = ann_ex2_pred.apply(lambda row: ex2_cat(row, 'c4_ws_pred'), axis=1)
ann_ex2_pred['c5_ex2_pred'] = ann_ex2_pred.apply(lambda row: ex2_cat(row, 'c5_ws_pred'), axis=1)
ann_ex2_pred['c6_ex2_pred'] = ann_ex2_pred.apply(lambda row: ex2_cat(row, 'c6_ws_pred'), axis=1)
ann_ex2_pred['c7_ex2_pred'] = ann_ex2_pred.apply(lambda row: ex2_cat(row, 'c7_ws_pred'), axis=1)
ann_ex2_pred['c8_ex2_pred'] = ann_ex2_pred.apply(lambda row: ex2_cat(row, 'c8_ws_pred'), axis=1)
ann_ex2_pred['c9_ex2_pred'] = ann_ex2_pred.apply(lambda row: ex2_cat(row, 'c9_ws_pred'), axis=1)
ann_ex2_pred['c10_ex2_pred'] = ann_ex2_pred.apply(lambda row: ex2_cat(row, 'c10_ws_pred'), axis=1)
ann_ex2_pred['c11_ex2_pred'] = ann_ex2_pred.apply(lambda row: ex2_cat(row, 'c11_ws_pred'), axis=1)
ann_ex2_pred['mv_ex2_pred'] = ann_ex2_pred.apply(lambda row: ex2_cat(row, 'mv_ws_pred'), axis=1)
ann_ex2_pred['tmv_ex2_pred'] = ann_ex2_pred.apply(lambda row: ex2_cat(row, 'tmv_ws_pred'), axis=1)


#drop cols
cols = ['c1_ws_pred', 'c2_ws_pred', 'c3_ws_pred','c4_ws_pred', 'c5_ws_pred', 'c6_ws_pred', 'c7_ws_pred',
       'c8_ws_pred', 'c9_ws_pred', 'c10_ws_pred', 'c11_ws_pred','mv_ws_pred', 'tmv_ws_pred']

ann_ex2_pred = ann_ex2_pred.drop(cols, axis=1)

print(ann_ex2_pred.shape)
ann_ex2_pred

In [None]:
#Function define neutral cut-off 

def neut_cat(row, col):
    if row[col] <= 3 :
        return 0
    if row[col] >= 4:
        return 1
    return 3

In [None]:
#convert to binary labels using following rules: <=3 = discharged alive, >=4 = died

ann_neut_pred = ann_pred.copy(deep=True)

ann_neut_pred['c1_neut_pred'] = ann_neut_pred.apply(lambda row: neut_cat(row, 'c1_ws_pred'), axis=1)
ann_neut_pred['c2_neut_pred'] = ann_neut_pred.apply(lambda row: neut_cat(row, 'c2_ws_pred'), axis=1)
ann_neut_pred['c3_neut_pred'] = ann_neut_pred.apply(lambda row: neut_cat(row, 'c3_ws_pred'), axis=1)
ann_neut_pred['c4_neut_pred'] = ann_neut_pred.apply(lambda row: neut_cat(row, 'c4_ws_pred'), axis=1)
ann_neut_pred['c5_neut_pred'] = ann_neut_pred.apply(lambda row: neut_cat(row, 'c5_ws_pred'), axis=1)
ann_neut_pred['c6_neut_pred'] = ann_neut_pred.apply(lambda row: neut_cat(row, 'c6_ws_pred'), axis=1)
ann_neut_pred['c7_neut_pred'] = ann_neut_pred.apply(lambda row: neut_cat(row, 'c7_ws_pred'), axis=1)
ann_neut_pred['c8_neut_pred'] = ann_neut_pred.apply(lambda row: neut_cat(row, 'c8_ws_pred'), axis=1)
ann_neut_pred['c9_neut_pred'] = ann_neut_pred.apply(lambda row: neut_cat(row, 'c9_ws_pred'), axis=1)
ann_neut_pred['c10_neut_pred'] = ann_neut_pred.apply(lambda row: neut_cat(row, 'c10_ws_pred'), axis=1)
ann_neut_pred['c11_neut_pred'] = ann_neut_pred.apply(lambda row: neut_cat(row, 'c11_ws_pred'), axis=1)
ann_neut_pred['mv_neut_pred'] = ann_neut_pred.apply(lambda row: neut_cat(row, 'mv_ws_pred'), axis=1)
ann_neut_pred['tmv_neut_pred'] = ann_neut_pred.apply(lambda row: neut_cat(row, 'tmv_ws_pred'), axis=1)

#drop cols
cols = ['c1_ws_pred', 'c2_ws_pred', 'c3_ws_pred','c4_ws_pred', 'c5_ws_pred', 'c6_ws_pred', 'c7_ws_pred',
       'c8_ws_pred', 'c9_ws_pred', 'c10_ws_pred', 'c11_ws_pred','mv_ws_pred', 'tmv_ws_pred']

ann_neut_pred = ann_neut_pred.drop(cols, axis=1)

print(ann_neut_pred.shape)
ann_neut_pred

In [None]:
#Define y_test - part1

hirid_test = pd.read_csv("hirid_extval_temporal_cohort.csv")
hirid_test = hirid_test.drop(['Unnamed: 0','Hrs before d_time','Adrenaline','Nordrenaline','FiO2','SpO2','MAP','HR'],axis=1)

hirid_test = hirid_test.drop_duplicates()
hirid_test = hirid_test.sort_values(by='patientid',ascending=True)

#binary discharge status
hirid_test['binary_discharge'] = np.where(hirid_test['discharge_status']== 'alive', 0, 1)

print(hirid_test.shape)
print('Number of patients:', hirid_test.patientid.nunique())
hirid_test.head(20)

In [None]:
#Define y_test - part2

array = hirid_test.to_numpy()
y_test = array[:,2]  
y_test = y_test.astype(int) 

le = LabelEncoder()
y_train = le.fit_transform(y_test)
le.classes_

### WS - Extreme (1)

**Cut-offs: 1 = discharged alive, >4 = died**

In [None]:
#c1 - DT Temporal ex1t Val - WS ex1t

f1 = metrics.f1_score(y_test, ann_ex1_pred['c1_ex1_pred'].to_numpy(), average='micro')
c1dt_ex1_ws  = [['c1', 'multi', 'F1_micro', f1]]

##print data as DF
c1dt_ex1_ws = pd.DataFrame(data=c1dt_ex1_ws)
c1dt_ex1_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c1dt_ex1_ws

In [None]:
#C1 - Ex1 TPs

c1_ex1_pred = ann_ex1_pred[['patientid','c1_ex1_pred']]
c1_ex1_pred = pd.merge(c1_ex1_pred, hirid_test, on='patientid')
c1_ex1_pred = c1_ex1_pred.drop('discharge_status',axis=1)

c1_ex1_TP_alive = len(c1_ex1_pred[(c1_ex1_pred['c1_ex1_pred']==0) & (c1_ex1_pred['binary_discharge']==0)])
c1_ex1_TP_dead = len(c1_ex1_pred[(c1_ex1_pred['c1_ex1_pred']==1) & (c1_ex1_pred['binary_discharge']==1)])

print(c1_ex1_pred['c1_ex1_pred'].value_counts())
print('C1 TP - Discharged Alive: ', c1_ex1_TP_alive)
print('C1 TP - Discharged Dead: ', c1_ex1_TP_dead)

In [None]:
#c2 - DT Temporal ex1t Val - WS ex1t

f1 = metrics.f1_score(y_test, ann_ex1_pred['c2_ex1_pred'].to_numpy(), average='micro')
c2dt_ex1_ws  = [['c2', 'multi', 'F1_micro', f1]]

##print data as DF
c2dt_ex1_ws = pd.DataFrame(data=c2dt_ex1_ws)
c2dt_ex1_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c2dt_ex1_ws

In [None]:
#c2 - Ex1 TPs

c2_ex1_pred = ann_ex1_pred[['patientid','c2_ex1_pred']]
c2_ex1_pred = pd.merge(c2_ex1_pred, hirid_test, on='patientid')
c2_ex1_pred = c2_ex1_pred.drop('discharge_status',axis=1)

c2_ex1_TP_alive = len(c2_ex1_pred[(c2_ex1_pred['c2_ex1_pred']==0) & (c2_ex1_pred['binary_discharge']==0)])
c2_ex1_TP_dead = len(c2_ex1_pred[(c2_ex1_pred['c2_ex1_pred']==1) & (c2_ex1_pred['binary_discharge']==1)])

print(c2_ex1_pred['c2_ex1_pred'].value_counts())
print('c2 TP - Discharged Alive: ', c2_ex1_TP_alive)
print('c2 TP - Discharged Dead: ', c2_ex1_TP_dead)

In [None]:
#c3 - DT Temporal ex1t Val - WS ex1t

f1 = metrics.f1_score(y_test, ann_ex1_pred['c3_ex1_pred'].to_numpy(), average='micro')
c3dt_ex1_ws  = [['c3', 'multi', 'F1_micro', f1]]

##print data as DF
c3dt_ex1_ws = pd.DataFrame(data=c3dt_ex1_ws)
c3dt_ex1_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c3dt_ex1_ws

In [None]:
#c3 - Ex1 TPs

c3_ex1_pred = ann_ex1_pred[['patientid','c3_ex1_pred']]
c3_ex1_pred = pd.merge(c3_ex1_pred, hirid_test, on='patientid')
c3_ex1_pred = c3_ex1_pred.drop('discharge_status',axis=1)

c3_ex1_TP_alive = len(c3_ex1_pred[(c3_ex1_pred['c3_ex1_pred']==0) & (c3_ex1_pred['binary_discharge']==0)])
c3_ex1_TP_dead = len(c3_ex1_pred[(c3_ex1_pred['c3_ex1_pred']==1) & (c3_ex1_pred['binary_discharge']==1)])

print(c3_ex1_pred['c3_ex1_pred'].value_counts())
print('c3 TP - Discharged Alive: ', c3_ex1_TP_alive)
print('c3 TP - Discharged Dead: ', c3_ex1_TP_dead)

In [None]:
#c4 - DT Temporal ex1t Val - WS ex1t

f1 = metrics.f1_score(y_test, ann_ex1_pred['c4_ex1_pred'].to_numpy(), average='micro')
c4dt_ex1_ws  = [['c4', 'multi', 'F1_micro', f1]]

##print data as DF
c4dt_ex1_ws = pd.DataFrame(data=c4dt_ex1_ws)
c4dt_ex1_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c4dt_ex1_ws

In [None]:
#c4 - Ex1 TPs

c4_ex1_pred = ann_ex1_pred[['patientid','c4_ex1_pred']]
c4_ex1_pred = pd.merge(c4_ex1_pred, hirid_test, on='patientid')
c4_ex1_pred = c4_ex1_pred.drop('discharge_status',axis=1)

c4_ex1_TP_alive = len(c4_ex1_pred[(c4_ex1_pred['c4_ex1_pred']==0) & (c4_ex1_pred['binary_discharge']==0)])
c4_ex1_TP_dead = len(c4_ex1_pred[(c4_ex1_pred['c4_ex1_pred']==1) & (c4_ex1_pred['binary_discharge']==1)])

print(c4_ex1_pred['c4_ex1_pred'].value_counts())
print('c4 TP - Discharged Alive: ', c4_ex1_TP_alive)
print('c4 TP - Discharged Dead: ', c4_ex1_TP_dead)

In [None]:
#c5 - DT Temporal ex1t Val - WS ex1t

f1 = metrics.f1_score(y_test, ann_ex1_pred['c5_ex1_pred'].to_numpy(), average='micro')
c5dt_ex1_ws  = [['c5', 'multi', 'F1_micro', f1]]

##print data as DF
c5dt_ex1_ws = pd.DataFrame(data=c5dt_ex1_ws)
c5dt_ex1_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c5dt_ex1_ws

In [None]:
#c5 - Ex1 TPs

c5_ex1_pred = ann_ex1_pred[['patientid','c5_ex1_pred']]
c5_ex1_pred = pd.merge(c5_ex1_pred, hirid_test, on='patientid')
c5_ex1_pred = c5_ex1_pred.drop('discharge_status',axis=1)

c5_ex1_TP_alive = len(c5_ex1_pred[(c5_ex1_pred['c5_ex1_pred']==0) & (c5_ex1_pred['binary_discharge']==0)])
c5_ex1_TP_dead = len(c5_ex1_pred[(c5_ex1_pred['c5_ex1_pred']==1) & (c5_ex1_pred['binary_discharge']==1)])

print(c5_ex1_pred['c5_ex1_pred'].value_counts())
print('c5 TP - Discharged Alive: ', c5_ex1_TP_alive)
print('c5 TP - Discharged Dead: ', c5_ex1_TP_dead)

In [None]:
#c6 - DT Temporal ex1t Val - WS ex1t

f1 = metrics.f1_score(y_test, ann_ex1_pred['c6_ex1_pred'].to_numpy(), average='micro')
c6dt_ex1_ws  = [['c6', 'multi', 'F1_micro', f1]]

##print data as DF
c6dt_ex1_ws = pd.DataFrame(data=c6dt_ex1_ws)
c6dt_ex1_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c6dt_ex1_ws

In [None]:
#c6 - Ex1 TPs

c6_ex1_pred = ann_ex1_pred[['patientid','c6_ex1_pred']]
c6_ex1_pred = pd.merge(c6_ex1_pred, hirid_test, on='patientid')
c6_ex1_pred = c6_ex1_pred.drop('discharge_status',axis=1)

c6_ex1_TP_alive = len(c6_ex1_pred[(c6_ex1_pred['c6_ex1_pred']==0) & (c6_ex1_pred['binary_discharge']==0)])
c6_ex1_TP_dead = len(c6_ex1_pred[(c6_ex1_pred['c6_ex1_pred']==1) & (c6_ex1_pred['binary_discharge']==1)])

print(c6_ex1_pred['c6_ex1_pred'].value_counts())
print('c6 TP - Discharged Alive: ', c6_ex1_TP_alive)
print('c6 TP - Discharged Dead: ', c6_ex1_TP_dead)

In [None]:
#c7 - DT Temporal ex1t Val - WS ex1t

f1 = metrics.f1_score(y_test, ann_ex1_pred['c7_ex1_pred'].to_numpy(), average='micro')
c7dt_ex1_ws  = [['c7', 'multi', 'F1_micro', f1]]

##print data as DF
c7dt_ex1_ws = pd.DataFrame(data=c7dt_ex1_ws)
c7dt_ex1_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c7dt_ex1_ws

In [None]:
#c7 - Ex1 TPs

c7_ex1_pred = ann_ex1_pred[['patientid','c7_ex1_pred']]
c7_ex1_pred = pd.merge(c7_ex1_pred, hirid_test, on='patientid')
c7_ex1_pred = c7_ex1_pred.drop('discharge_status',axis=1)

c7_ex1_TP_alive = len(c7_ex1_pred[(c7_ex1_pred['c7_ex1_pred']==0) & (c7_ex1_pred['binary_discharge']==0)])
c7_ex1_TP_dead = len(c7_ex1_pred[(c7_ex1_pred['c7_ex1_pred']==1) & (c7_ex1_pred['binary_discharge']==1)])

print(c7_ex1_pred['c7_ex1_pred'].value_counts())
print('c7 TP - Discharged Alive: ', c7_ex1_TP_alive)
print('c7 TP - Discharged Dead: ', c7_ex1_TP_dead)

In [None]:
#c8 - DT Temporal ex1t Val - WS ex1t

f1 = metrics.f1_score(y_test, ann_ex1_pred['c8_ex1_pred'].to_numpy(), average='micro')
c8dt_ex1_ws  = [['c8', 'multi', 'F1_micro', f1]]

##print data as DF
c8dt_ex1_ws = pd.DataFrame(data=c8dt_ex1_ws)
c8dt_ex1_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c8dt_ex1_ws

In [None]:
#c8 - Ex1 TPs

c8_ex1_pred = ann_ex1_pred[['patientid','c8_ex1_pred']]
c8_ex1_pred = pd.merge(c8_ex1_pred, hirid_test, on='patientid')
c8_ex1_pred = c8_ex1_pred.drop('discharge_status',axis=1)

c8_ex1_TP_alive = len(c8_ex1_pred[(c8_ex1_pred['c8_ex1_pred']==0) & (c8_ex1_pred['binary_discharge']==0)])
c8_ex1_TP_dead = len(c8_ex1_pred[(c8_ex1_pred['c8_ex1_pred']==1) & (c8_ex1_pred['binary_discharge']==1)])

print(c8_ex1_pred['c8_ex1_pred'].value_counts())
print('c8 TP - Discharged Alive: ', c8_ex1_TP_alive)
print('c8 TP - Discharged Dead: ', c8_ex1_TP_dead)

In [None]:
#c9 - DT Temporal ex1t Val - WS ex1t

f1 = metrics.f1_score(y_test, ann_ex1_pred['c9_ex1_pred'].to_numpy(), average='micro')
c9dt_ex1_ws  = [['c9', 'multi', 'F1_micro', f1]]

##print data as DF
c9dt_ex1_ws = pd.DataFrame(data=c9dt_ex1_ws)
c9dt_ex1_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c9dt_ex1_ws

In [None]:
#c9 - Ex1 TPs

c9_ex1_pred = ann_ex1_pred[['patientid','c9_ex1_pred']]
c9_ex1_pred = pd.merge(c9_ex1_pred, hirid_test, on='patientid')
c9_ex1_pred = c9_ex1_pred.drop('discharge_status',axis=1)

c9_ex1_TP_alive = len(c9_ex1_pred[(c9_ex1_pred['c9_ex1_pred']==0) & (c9_ex1_pred['binary_discharge']==0)])
c9_ex1_TP_dead = len(c9_ex1_pred[(c9_ex1_pred['c9_ex1_pred']==1) & (c9_ex1_pred['binary_discharge']==1)])

print(c9_ex1_pred['c9_ex1_pred'].value_counts())
print('c9 TP - Discharged Alive: ', c9_ex1_TP_alive)
print('c9 TP - Discharged Dead: ', c9_ex1_TP_dead)

In [None]:
#c10 - DT Temporal ex1t Val - WS ex1t

f1 = metrics.f1_score(y_test, ann_ex1_pred['c10_ex1_pred'].to_numpy(), average='micro')
c10dt_ex1_ws  = [['c10', 'multi', 'F1_micro', f1]]

##print data as DF
c10dt_ex1_ws = pd.DataFrame(data=c10dt_ex1_ws)
c10dt_ex1_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c10dt_ex1_ws

In [None]:
#c10 - Ex1 TPs

c10_ex1_pred = ann_ex1_pred[['patientid','c10_ex1_pred']]
c10_ex1_pred = pd.merge(c10_ex1_pred, hirid_test, on='patientid')
c10_ex1_pred = c10_ex1_pred.drop('discharge_status',axis=1)

c10_ex1_TP_alive = len(c10_ex1_pred[(c10_ex1_pred['c10_ex1_pred']==0) & (c10_ex1_pred['binary_discharge']==0)])
c10_ex1_TP_dead = len(c10_ex1_pred[(c10_ex1_pred['c10_ex1_pred']==1) & (c10_ex1_pred['binary_discharge']==1)])

print(c10_ex1_pred['c10_ex1_pred'].value_counts())
print('c10 TP - Discharged Alive: ', c10_ex1_TP_alive)
print('c10 TP - Discharged Dead: ', c10_ex1_TP_dead)

In [None]:
#c11 - DT Temporal ex1t Val - WS ex1t

f1 = metrics.f1_score(y_test, ann_ex1_pred['c11_ex1_pred'].to_numpy(), average='micro')
c11dt_ex1_ws  = [['c11', 'multi', 'F1_micro', f1]]

##print data as DF
c11dt_ex1_ws = pd.DataFrame(data=c11dt_ex1_ws)
c11dt_ex1_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c11dt_ex1_ws

In [None]:
#c11 - Ex1 TPs

c11_ex1_pred = ann_ex1_pred[['patientid','c11_ex1_pred']]
c11_ex1_pred = pd.merge(c11_ex1_pred, hirid_test, on='patientid')
c11_ex1_pred = c11_ex1_pred.drop('discharge_status',axis=1)

c11_ex1_TP_alive = len(c11_ex1_pred[(c11_ex1_pred['c11_ex1_pred']==0) & (c11_ex1_pred['binary_discharge']==0)])
c11_ex1_TP_dead = len(c11_ex1_pred[(c11_ex1_pred['c11_ex1_pred']==1) & (c11_ex1_pred['binary_discharge']==1)])

print(c11_ex1_pred['c11_ex1_pred'].value_counts())
print('c11 TP - Discharged Alive: ', c11_ex1_TP_alive)
print('c11 TP - Discharged Dead: ', c11_ex1_TP_dead)

In [None]:
#mv - DT Temporal ex1t Val - WS ex1t

f1 = metrics.f1_score(y_test, ann_ex1_pred['mv_ex1_pred'].to_numpy(), average='micro')
mvdt_ex1_ws  = [['MV', 'multi', 'F1_micro', f1]]

##print data as DF
mvdt_ex1_ws = pd.DataFrame(data=mvdt_ex1_ws)
mvdt_ex1_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
mvdt_ex1_ws

In [None]:
#mv - Ex1 TPs

mv_ex1_pred = ann_ex1_pred[['patientid','mv_ex1_pred']]
mv_ex1_pred = pd.merge(mv_ex1_pred, hirid_test, on='patientid')
mv_ex1_pred = mv_ex1_pred.drop('discharge_status',axis=1)

mv_ex1_TP_alive = len(mv_ex1_pred[(mv_ex1_pred['mv_ex1_pred']==0) & (mv_ex1_pred['binary_discharge']==0)])
mv_ex1_TP_dead = len(mv_ex1_pred[(mv_ex1_pred['mv_ex1_pred']==1) & (mv_ex1_pred['binary_discharge']==1)])

print(mv_ex1_pred['mv_ex1_pred'].value_counts())
print('mv TP - Discharged Alive: ', mv_ex1_TP_alive)
print('mv TP - Discharged Dead: ', mv_ex1_TP_dead)

In [None]:
#tmv - DT Temporal ex1t Val - WS ex1t

f1 = metrics.f1_score(y_test, ann_ex1_pred['tmv_ex1_pred'].to_numpy(), average='micro')
tmvdt_ex1_ws  = [['TMV', 'multi', 'F1_micro', f1]]

##print data as DF
tmvdt_ex1_ws = pd.DataFrame(data=tmvdt_ex1_ws)
tmvdt_ex1_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
tmvdt_ex1_ws

In [None]:
#tmv - Ex1 TPs

tmv_ex1_pred = ann_ex1_pred[['patientid','tmv_ex1_pred']]
tmv_ex1_pred = pd.merge(tmv_ex1_pred, hirid_test, on='patientid')
tmv_ex1_pred = tmv_ex1_pred.drop('discharge_status',axis=1)

tmv_ex1_TP_alive = len(tmv_ex1_pred[(tmv_ex1_pred['tmv_ex1_pred']==0) & (tmv_ex1_pred['binary_discharge']==0)])
tmv_ex1_TP_dead = len(tmv_ex1_pred[(tmv_ex1_pred['tmv_ex1_pred']==1) & (tmv_ex1_pred['binary_discharge']==1)])

print(tmv_ex1_pred['tmv_ex1_pred'].value_counts())
print('tmv TP - Discharged Alive: ', tmv_ex1_TP_alive)
print('tmv TP - Discharged Dead: ', tmv_ex1_TP_dead)

### WS - Extreme (2)

**Cut-offs: <=2 = discharged alive, >4 = died**

In [None]:
#c1 - DT Temporal ex2t Val - WS ex2t

f1 = metrics.f1_score(y_test, ann_ex2_pred['c1_ex2_pred'].to_numpy(), average='micro')
c1dt_ex2_ws  = [['c1', 'multi', 'F1_micro', f1]]

##print data as DF
c1dt_ex2_ws = pd.DataFrame(data=c1dt_ex2_ws)
c1dt_ex2_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c1dt_ex2_ws

In [None]:
#C1 - ex2 TPs

c1_ex2_pred = ann_ex2_pred[['patientid','c1_ex2_pred']]
c1_ex2_pred = pd.merge(c1_ex2_pred, hirid_test, on='patientid')
c1_ex2_pred = c1_ex2_pred.drop('discharge_status',axis=1)

c1_ex2_TP_alive = len(c1_ex2_pred[(c1_ex2_pred['c1_ex2_pred']==0) & (c1_ex2_pred['binary_discharge']==0)])
c1_ex2_TP_dead = len(c1_ex2_pred[(c1_ex2_pred['c1_ex2_pred']==1) & (c1_ex2_pred['binary_discharge']==1)])

print(c1_ex2_pred['c1_ex2_pred'].value_counts())
print('C1 TP - Discharged Alive: ', c1_ex2_TP_alive)
print('C1 TP - Discharged Dead: ', c1_ex2_TP_dead)

In [None]:
#c2 - DT Temporal Ext Val - WS Ext

f1 = metrics.f1_score(y_test, ann_ex2_pred['c2_ex2_pred'].to_numpy(), average='micro')
c2dt_ex2_ws  = [['c2', 'multi', 'F1_micro', f1]]

##print data as DF
c2dt_ex2_ws = pd.DataFrame(data=c2dt_ex2_ws)
c2dt_ex2_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c2dt_ex2_ws

In [None]:
#c2 - ex2 TPs

c2_ex2_pred = ann_ex2_pred[['patientid','c2_ex2_pred']]
c2_ex2_pred = pd.merge(c2_ex2_pred, hirid_test, on='patientid')
c2_ex2_pred = c2_ex2_pred.drop('discharge_status',axis=1)

c2_ex2_TP_alive = len(c2_ex2_pred[(c2_ex2_pred['c2_ex2_pred']==0) & (c2_ex2_pred['binary_discharge']==0)])
c2_ex2_TP_dead = len(c2_ex2_pred[(c2_ex2_pred['c2_ex2_pred']==1) & (c2_ex2_pred['binary_discharge']==1)])

print(c2_ex2_pred['c2_ex2_pred'].value_counts())
print('c2 TP - Discharged Alive: ', c2_ex2_TP_alive)
print('c2 TP - Discharged Dead: ', c2_ex2_TP_dead)

In [None]:
#c3 - DT Temporal Ext Val - WS Ext

f1 = metrics.f1_score(y_test, ann_ex2_pred['c3_ex2_pred'].to_numpy(), average='micro')
c3dt_ex2_ws  = [['c3', 'multi', 'F1_micro', f1]]

##print data as DF
c3dt_ex2_ws = pd.DataFrame(data=c3dt_ex2_ws)
c3dt_ex2_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c3dt_ex2_ws

In [None]:
#c3 - ex2 TPs

c3_ex2_pred = ann_ex2_pred[['patientid','c3_ex2_pred']]
c3_ex2_pred = pd.merge(c3_ex2_pred, hirid_test, on='patientid')
c3_ex2_pred = c3_ex2_pred.drop('discharge_status',axis=1)

c3_ex2_TP_alive = len(c3_ex2_pred[(c3_ex2_pred['c3_ex2_pred']==0) & (c3_ex2_pred['binary_discharge']==0)])
c3_ex2_TP_dead = len(c3_ex2_pred[(c3_ex2_pred['c3_ex2_pred']==1) & (c3_ex2_pred['binary_discharge']==1)])

print(c3_ex2_pred['c3_ex2_pred'].value_counts())
print('c3 TP - Discharged Alive: ', c3_ex2_TP_alive)
print('c3 TP - Discharged Dead: ', c3_ex2_TP_dead)

In [None]:
#c4 - DT Temporal Ext Val - WS Ext

f1 = metrics.f1_score(y_test, ann_ex2_pred['c4_ex2_pred'].to_numpy(), average='micro')
c4dt_ex2_ws  = [['c4', 'multi', 'F1_micro', f1]]

##print data as DF
c4dt_ex2_ws = pd.DataFrame(data=c4dt_ex2_ws)
c4dt_ex2_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c4dt_ex2_ws

In [None]:
#c4 - ex2 TPs

c4_ex2_pred = ann_ex2_pred[['patientid','c4_ex2_pred']]
c4_ex2_pred = pd.merge(c4_ex2_pred, hirid_test, on='patientid')
c4_ex2_pred = c4_ex2_pred.drop('discharge_status',axis=1)

c4_ex2_TP_alive = len(c4_ex2_pred[(c4_ex2_pred['c4_ex2_pred']==0) & (c4_ex2_pred['binary_discharge']==0)])
c4_ex2_TP_dead = len(c4_ex2_pred[(c4_ex2_pred['c4_ex2_pred']==1) & (c4_ex2_pred['binary_discharge']==1)])

print(c4_ex2_pred['c4_ex2_pred'].value_counts())
print('c4 TP - Discharged Alive: ', c4_ex2_TP_alive)
print('c4 TP - Discharged Dead: ', c4_ex2_TP_dead)

In [None]:
#c5 - DT Temporal Ext Val - WS Ext

f1 = metrics.f1_score(y_test, ann_ex2_pred['c5_ex2_pred'].to_numpy(), average='micro')
c5dt_ex2_ws  = [['c5', 'multi', 'F1_micro', f1]]

##print data as DF
c5dt_ex2_ws = pd.DataFrame(data=c5dt_ex2_ws)
c5dt_ex2_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c5dt_ex2_ws

In [None]:
#c5 - ex2 TPs

c5_ex2_pred = ann_ex2_pred[['patientid','c5_ex2_pred']]
c5_ex2_pred = pd.merge(c5_ex2_pred, hirid_test, on='patientid')
c5_ex2_pred = c5_ex2_pred.drop('discharge_status',axis=1)

c5_ex2_TP_alive = len(c5_ex2_pred[(c5_ex2_pred['c5_ex2_pred']==0) & (c5_ex2_pred['binary_discharge']==0)])
c5_ex2_TP_dead = len(c5_ex2_pred[(c5_ex2_pred['c5_ex2_pred']==1) & (c5_ex2_pred['binary_discharge']==1)])

print(c5_ex2_pred['c5_ex2_pred'].value_counts())
print('c5 TP - Discharged Alive: ', c5_ex2_TP_alive)
print('c5 TP - Discharged Dead: ', c5_ex2_TP_dead)

In [None]:
#c6 - DT Temporal Ext Val - WS Ext

f1 = metrics.f1_score(y_test, ann_ex2_pred['c6_ex2_pred'].to_numpy(), average='micro')
c6dt_ex2_ws  = [['c6', 'multi', 'F1_micro', f1]]

##print data as DF
c6dt_ex2_ws = pd.DataFrame(data=c6dt_ex2_ws)
c6dt_ex2_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c6dt_ex2_ws

In [None]:
#c6 - ex2 TPs

c6_ex2_pred = ann_ex2_pred[['patientid','c6_ex2_pred']]
c6_ex2_pred = pd.merge(c6_ex2_pred, hirid_test, on='patientid')
c6_ex2_pred = c6_ex2_pred.drop('discharge_status',axis=1)

c6_ex2_TP_alive = len(c6_ex2_pred[(c6_ex2_pred['c6_ex2_pred']==0) & (c6_ex2_pred['binary_discharge']==0)])
c6_ex2_TP_dead = len(c6_ex2_pred[(c6_ex2_pred['c6_ex2_pred']==1) & (c6_ex2_pred['binary_discharge']==1)])

print(c6_ex2_pred['c6_ex2_pred'].value_counts())
print('c6 TP - Discharged Alive: ', c6_ex2_TP_alive)
print('c6 TP - Discharged Dead: ', c6_ex2_TP_dead)

In [None]:
#c7 - DT Temporal Ext Val - WS Ext

f1 = metrics.f1_score(y_test, ann_ex2_pred['c7_ex2_pred'].to_numpy(), average='micro')
c7dt_ex2_ws  = [['c7', 'multi', 'F1_micro', f1]]

##print data as DF
c7dt_ex2_ws = pd.DataFrame(data=c7dt_ex2_ws)
c7dt_ex2_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c7dt_ex2_ws

In [None]:
#c7 - ex2 TPs

c7_ex2_pred = ann_ex2_pred[['patientid','c7_ex2_pred']]
c7_ex2_pred = pd.merge(c7_ex2_pred, hirid_test, on='patientid')
c7_ex2_pred = c7_ex2_pred.drop('discharge_status',axis=1)

c7_ex2_TP_alive = len(c7_ex2_pred[(c7_ex2_pred['c7_ex2_pred']==0) & (c7_ex2_pred['binary_discharge']==0)])
c7_ex2_TP_dead = len(c7_ex2_pred[(c7_ex2_pred['c7_ex2_pred']==1) & (c7_ex2_pred['binary_discharge']==1)])

print(c7_ex2_pred['c7_ex2_pred'].value_counts())
print('c7 TP - Discharged Alive: ', c7_ex2_TP_alive)
print('c7 TP - Discharged Dead: ', c7_ex2_TP_dead)

In [None]:
#c8 - DT Temporal Ext Val - WS Ext

f1 = metrics.f1_score(y_test, ann_ex2_pred['c8_ex2_pred'].to_numpy(), average='micro')
c8dt_ex2_ws  = [['c8', 'multi', 'F1_micro', f1]]

##print data as DF
c8dt_ex2_ws = pd.DataFrame(data=c8dt_ex2_ws)
c8dt_ex2_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c8dt_ex2_ws

In [None]:
#c8 - ex2 TPs

c8_ex2_pred = ann_ex2_pred[['patientid','c8_ex2_pred']]
c8_ex2_pred = pd.merge(c8_ex2_pred, hirid_test, on='patientid')
c8_ex2_pred = c8_ex2_pred.drop('discharge_status',axis=1)

c8_ex2_TP_alive = len(c8_ex2_pred[(c8_ex2_pred['c8_ex2_pred']==0) & (c8_ex2_pred['binary_discharge']==0)])
c8_ex2_TP_dead = len(c8_ex2_pred[(c8_ex2_pred['c8_ex2_pred']==1) & (c8_ex2_pred['binary_discharge']==1)])

print(c8_ex2_pred['c8_ex2_pred'].value_counts())
print('c8 TP - Discharged Alive: ', c8_ex2_TP_alive)
print('c8 TP - Discharged Dead: ', c8_ex2_TP_dead)

In [None]:
#c9 - DT Temporal Ext Val - WS Ext

f1 = metrics.f1_score(y_test, ann_ex2_pred['c9_ex2_pred'].to_numpy(), average='micro')
c9dt_ex2_ws  = [['c9', 'multi', 'F1_micro', f1]]

##print data as DF
c9dt_ex2_ws = pd.DataFrame(data=c9dt_ex2_ws)
c9dt_ex2_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c9dt_ex2_ws

In [None]:
#c9 - ex2 TPs

c9_ex2_pred = ann_ex2_pred[['patientid','c9_ex2_pred']]
c9_ex2_pred = pd.merge(c9_ex2_pred, hirid_test, on='patientid')
c9_ex2_pred = c9_ex2_pred.drop('discharge_status',axis=1)

c9_ex2_TP_alive = len(c9_ex2_pred[(c9_ex2_pred['c9_ex2_pred']==0) & (c9_ex2_pred['binary_discharge']==0)])
c9_ex2_TP_dead = len(c9_ex2_pred[(c9_ex2_pred['c9_ex2_pred']==1) & (c9_ex2_pred['binary_discharge']==1)])

print(c9_ex2_pred['c9_ex2_pred'].value_counts())
print('c9 TP - Discharged Alive: ', c9_ex2_TP_alive)
print('c9 TP - Discharged Dead: ', c9_ex2_TP_dead)

In [None]:
#c10 - DT Temporal Ext Val - WS Ext

f1 = metrics.f1_score(y_test, ann_ex2_pred['c10_ex2_pred'].to_numpy(), average='micro')
c10dt_ex2_ws  = [['c10', 'multi', 'F1_micro', f1]]

##print data as DF
c10dt_ex2_ws = pd.DataFrame(data=c10dt_ex2_ws)
c10dt_ex2_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c10dt_ex2_ws

In [None]:
#c10 - ex2 TPs

c10_ex2_pred = ann_ex2_pred[['patientid','c10_ex2_pred']]
c10_ex2_pred = pd.merge(c10_ex2_pred, hirid_test, on='patientid')
c10_ex2_pred = c10_ex2_pred.drop('discharge_status',axis=1)

c10_ex2_TP_alive = len(c10_ex2_pred[(c10_ex2_pred['c10_ex2_pred']==0) & (c10_ex2_pred['binary_discharge']==0)])
c10_ex2_TP_dead = len(c10_ex2_pred[(c10_ex2_pred['c10_ex2_pred']==1) & (c10_ex2_pred['binary_discharge']==1)])

print(c10_ex2_pred['c10_ex2_pred'].value_counts())
print('c10 TP - Discharged Alive: ', c10_ex2_TP_alive)
print('c10 TP - Discharged Dead: ', c10_ex2_TP_dead)

In [None]:
#c11 - DT Temporal Ext Val - WS Ext

f1 = metrics.f1_score(y_test, ann_ex2_pred['c11_ex2_pred'].to_numpy(), average='micro')
c11dt_ex2_ws  = [['c11', 'multi', 'F1_micro', f1]]

##print data as DF
c11dt_ex2_ws = pd.DataFrame(data=c11dt_ex2_ws)
c11dt_ex2_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c11dt_ex2_ws

In [None]:
#c11 - ex2 TPs

c11_ex2_pred = ann_ex2_pred[['patientid','c11_ex2_pred']]
c11_ex2_pred = pd.merge(c11_ex2_pred, hirid_test, on='patientid')
c11_ex2_pred = c11_ex2_pred.drop('discharge_status',axis=1)

c11_ex2_TP_alive = len(c11_ex2_pred[(c11_ex2_pred['c11_ex2_pred']==0) & (c11_ex2_pred['binary_discharge']==0)])
c11_ex2_TP_dead = len(c11_ex2_pred[(c11_ex2_pred['c11_ex2_pred']==1) & (c11_ex2_pred['binary_discharge']==1)])

print(c11_ex2_pred['c11_ex2_pred'].value_counts())
print('c11 TP - Discharged Alive: ', c11_ex2_TP_alive)
print('c11 TP - Discharged Dead: ', c11_ex2_TP_dead)

In [None]:
#mv - DT Temporal Ext Val - WS Ext

f1 = metrics.f1_score(y_test, ann_ex2_pred['mv_ex2_pred'].to_numpy(), average='micro')
mvdt_ex2_ws  = [['MV', 'multi', 'F1_micro', f1]]

##print data as DF
mvdt_ex2_ws = pd.DataFrame(data=mvdt_ex2_ws)
mvdt_ex2_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
mvdt_ex2_ws

In [None]:
#mv - ex2 TPs

mv_ex2_pred = ann_ex2_pred[['patientid','mv_ex2_pred']]
mv_ex2_pred = pd.merge(mv_ex2_pred, hirid_test, on='patientid')
mv_ex2_pred = mv_ex2_pred.drop('discharge_status',axis=1)

mv_ex2_TP_alive = len(mv_ex2_pred[(mv_ex2_pred['mv_ex2_pred']==0) & (mv_ex2_pred['binary_discharge']==0)])
mv_ex2_TP_dead = len(mv_ex2_pred[(mv_ex2_pred['mv_ex2_pred']==1) & (mv_ex2_pred['binary_discharge']==1)])

print(mv_ex2_pred['mv_ex2_pred'].value_counts())
print('mv TP - Discharged Alive: ', mv_ex2_TP_alive)
print('mv TP - Discharged Dead: ', mv_ex2_TP_dead)

In [None]:
#tmv - DT Temporal Ext Val - WS Ext

f1 = metrics.f1_score(y_test, ann_ex2_pred['tmv_ex2_pred'].to_numpy(), average='micro')
tmvdt_ex2_ws  = [['TMV', 'multi', 'F1_micro', f1]]

##print data as DF
tmvdt_ex2_ws = pd.DataFrame(data=tmvdt_ex2_ws)
tmvdt_ex2_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
tmvdt_ex2_ws

In [None]:
#tmv - ex2 TPs

tmv_ex2_pred = ann_ex2_pred[['patientid','tmv_ex2_pred']]
tmv_ex2_pred = pd.merge(tmv_ex2_pred, hirid_test, on='patientid')
tmv_ex2_pred = tmv_ex2_pred.drop('discharge_status',axis=1)

tmv_ex2_TP_alive = len(tmv_ex2_pred[(tmv_ex2_pred['tmv_ex2_pred']==0) & (tmv_ex2_pred['binary_discharge']==0)])
tmv_ex2_TP_dead = len(tmv_ex2_pred[(tmv_ex2_pred['tmv_ex2_pred']==1) & (tmv_ex2_pred['binary_discharge']==1)])

print(tmv_ex2_pred['tmv_ex2_pred'].value_counts())
print('tmv TP - Discharged Alive: ', tmv_ex2_TP_alive)
print('tmv TP - Discharged Dead: ', tmv_ex2_TP_dead)

In [None]:
#Ext Val Extreme(2) - Summary

frames = [c1dt_ex2_ws, c2dt_ex2_ws, c3dt_ex2_ws, c4dt_ex2_ws, c5dt_ex2_ws, c6dt_ex2_ws, c7dt_ex2_ws, c8dt_ex2_ws,
          c9dt_ex2_ws, c10dt_ex2_ws, c11dt_ex2_ws, mvdt_ex2_ws, tmvdt_ex2_ws]

multi_ex2_pred = pd.concat(frames)
print(multi_ex2_pred.shape)
multi_ex2_pred

### WS - Neutral

**Cut-offs: <=3 = discharged alive, >=4 = died**

In [None]:
#c1 - DT Temporal Ext Val - WS Neutral

f1 = metrics.f1_score(y_test, ann_neut_pred['c1_neut_pred'].to_numpy(), average='micro')
c1dt_neut_ws  = [['c1', 'multi', 'F1_micro', f1]]

##print data as DF
c1dt_neut_ws = pd.DataFrame(data=c1dt_neut_ws)
c1dt_neut_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c1dt_neut_ws

In [None]:
#c1 - neut TPs

c1_neut_pred = ann_neut_pred[['patientid','c1_neut_pred']]
c1_neut_pred = pd.merge(c1_neut_pred, hirid_test, on='patientid')
c1_neut_pred = c1_neut_pred.drop('discharge_status',axis=1)

c1_neut_TP_alive = len(c1_neut_pred[(c1_neut_pred['c1_neut_pred']==0) & (c1_neut_pred['binary_discharge']==0)])
c1_neut_TP_dead = len(c1_neut_pred[(c1_neut_pred['c1_neut_pred']==1) & (c1_neut_pred['binary_discharge']==1)])

print(c1_neut_pred['c1_neut_pred'].value_counts())
print('c1 TP - Discharged Alive: ', c1_neut_TP_alive)
print('c1 TP - Discharged Dead: ', c1_neut_TP_dead)

In [None]:
#c2 - DT Temporal Ext Val - WS Neutral

f1 = metrics.f1_score(y_test, ann_neut_pred['c2_neut_pred'].to_numpy(), average='micro')
c2dt_neut_ws  = [['c2', 'multi', 'F1_micro', f1]]

##print data as DF
c2dt_neut_ws = pd.DataFrame(data=c2dt_neut_ws)
c2dt_neut_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c2dt_neut_ws

In [None]:
#c2 - neut TPs

c2_neut_pred = ann_neut_pred[['patientid','c2_neut_pred']]
c2_neut_pred = pd.merge(c2_neut_pred, hirid_test, on='patientid')
c2_neut_pred = c2_neut_pred.drop('discharge_status',axis=1)

c2_neut_TP_alive = len(c2_neut_pred[(c2_neut_pred['c2_neut_pred']==0) & (c2_neut_pred['binary_discharge']==0)])
c2_neut_TP_dead = len(c2_neut_pred[(c2_neut_pred['c2_neut_pred']==1) & (c2_neut_pred['binary_discharge']==1)])

print(c2_neut_pred['c2_neut_pred'].value_counts())
print('c2 TP - Discharged Alive: ', c2_neut_TP_alive)
print('c2 TP - Discharged Dead: ', c2_neut_TP_dead)

In [None]:
#c3 - DT Temporal Ext Val - WS Neutral

f1 = metrics.f1_score(y_test, ann_neut_pred['c3_neut_pred'].to_numpy(), average='micro')
c3dt_neut_ws  = [['c3', 'multi', 'F1_micro', f1]]

##print data as DF
c3dt_neut_ws = pd.DataFrame(data=c3dt_neut_ws)
c3dt_neut_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c3dt_neut_ws

In [None]:
#c3 - neut TPs

c3_neut_pred = ann_neut_pred[['patientid','c3_neut_pred']]
c3_neut_pred = pd.merge(c3_neut_pred, hirid_test, on='patientid')
c3_neut_pred = c3_neut_pred.drop('discharge_status',axis=1)

c3_neut_TP_alive = len(c3_neut_pred[(c3_neut_pred['c3_neut_pred']==0) & (c3_neut_pred['binary_discharge']==0)])
c3_neut_TP_dead = len(c3_neut_pred[(c3_neut_pred['c3_neut_pred']==1) & (c3_neut_pred['binary_discharge']==1)])

print(c3_neut_pred['c3_neut_pred'].value_counts())
print('c3 TP - Discharged Alive: ', c3_neut_TP_alive)
print('c3 TP - Discharged Dead: ', c3_neut_TP_dead)

In [None]:
#c4 - DT Temporal Ext Val - WS Neutral

f1 = metrics.f1_score(y_test, ann_neut_pred['c4_neut_pred'].to_numpy(), average='micro')
c4dt_neut_ws  = [['c4', 'multi', 'F1_micro', f1]]

##print data as DF
c4dt_neut_ws = pd.DataFrame(data=c4dt_neut_ws)
c4dt_neut_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c4dt_neut_ws

In [None]:
#c4 - neut TPs

c4_neut_pred = ann_neut_pred[['patientid','c4_neut_pred']]
c4_neut_pred = pd.merge(c4_neut_pred, hirid_test, on='patientid')
c4_neut_pred = c4_neut_pred.drop('discharge_status',axis=1)

c4_neut_TP_alive = len(c4_neut_pred[(c4_neut_pred['c4_neut_pred']==0) & (c4_neut_pred['binary_discharge']==0)])
c4_neut_TP_dead = len(c4_neut_pred[(c4_neut_pred['c4_neut_pred']==1) & (c4_neut_pred['binary_discharge']==1)])

print(c4_neut_pred['c4_neut_pred'].value_counts())
print('c4 TP - Discharged Alive: ', c4_neut_TP_alive)
print('c4 TP - Discharged Dead: ', c4_neut_TP_dead)

In [None]:
#c5 - DT Temporal Ext Val - WS Neutral

f1 = metrics.f1_score(y_test, ann_neut_pred['c5_neut_pred'].to_numpy(), average='micro')
c5dt_neut_ws  = [['c5', 'multi', 'F1_micro', f1]]

##print data as DF
c5dt_neut_ws = pd.DataFrame(data=c5dt_neut_ws)
c5dt_neut_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c5dt_neut_ws

In [None]:
#c5 - neut TPs

c5_neut_pred = ann_neut_pred[['patientid','c5_neut_pred']]
c5_neut_pred = pd.merge(c5_neut_pred, hirid_test, on='patientid')
c5_neut_pred = c5_neut_pred.drop('discharge_status',axis=1)

c5_neut_TP_alive = len(c5_neut_pred[(c5_neut_pred['c5_neut_pred']==0) & (c5_neut_pred['binary_discharge']==0)])
c5_neut_TP_dead = len(c5_neut_pred[(c5_neut_pred['c5_neut_pred']==1) & (c5_neut_pred['binary_discharge']==1)])

print(c5_neut_pred['c5_neut_pred'].value_counts())
print('c5 TP - Discharged Alive: ', c5_neut_TP_alive)
print('c5 TP - Discharged Dead: ', c5_neut_TP_dead)

In [None]:
#c6 - DT Temporal Ext Val - WS Neutral

f1 = metrics.f1_score(y_test, ann_neut_pred['c6_neut_pred'].to_numpy(), average='micro')
c6dt_neut_ws  = [['c6', 'multi', 'F1_micro', f1]]

##print data as DF
c6dt_neut_ws = pd.DataFrame(data=c6dt_neut_ws)
c6dt_neut_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c6dt_neut_ws

In [None]:
#c6 - neut TPs

c6_neut_pred = ann_neut_pred[['patientid','c6_neut_pred']]
c6_neut_pred = pd.merge(c6_neut_pred, hirid_test, on='patientid')
c6_neut_pred = c6_neut_pred.drop('discharge_status',axis=1)

c6_neut_TP_alive = len(c6_neut_pred[(c6_neut_pred['c6_neut_pred']==0) & (c6_neut_pred['binary_discharge']==0)])
c6_neut_TP_dead = len(c6_neut_pred[(c6_neut_pred['c6_neut_pred']==1) & (c6_neut_pred['binary_discharge']==1)])

print(c6_neut_pred['c6_neut_pred'].value_counts())
print('c6 TP - Discharged Alive: ', c6_neut_TP_alive)
print('c6 TP - Discharged Dead: ', c6_neut_TP_dead)

In [None]:
#c7 - DT Temporal Ext Val - WS Neutral

f1 = metrics.f1_score(y_test, ann_neut_pred['c7_neut_pred'].to_numpy(), average='micro')
c7dt_neut_ws  = [['c7', 'multi', 'F1_micro', f1]]

##print data as DF
c7dt_neut_ws = pd.DataFrame(data=c7dt_neut_ws)
c7dt_neut_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c7dt_neut_ws

In [None]:
#c7 - neut TPs

c7_neut_pred = ann_neut_pred[['patientid','c7_neut_pred']]
c7_neut_pred = pd.merge(c7_neut_pred, hirid_test, on='patientid')
c7_neut_pred = c7_neut_pred.drop('discharge_status',axis=1)

c7_neut_TP_alive = len(c7_neut_pred[(c7_neut_pred['c7_neut_pred']==0) & (c7_neut_pred['binary_discharge']==0)])
c7_neut_TP_dead = len(c7_neut_pred[(c7_neut_pred['c7_neut_pred']==1) & (c7_neut_pred['binary_discharge']==1)])

print(c7_neut_pred['c7_neut_pred'].value_counts())
print('c7 TP - Discharged Alive: ', c7_neut_TP_alive)
print('c7 TP - Discharged Dead: ', c7_neut_TP_dead)

In [None]:
#c8 - DT Temporal Ext Val - WS Neutral

f1 = metrics.f1_score(y_test, ann_neut_pred['c8_neut_pred'].to_numpy(), average='micro')
c8dt_neut_ws  = [['c8', 'multi', 'F1_micro', f1]]

##print data as DF
c8dt_neut_ws = pd.DataFrame(data=c8dt_neut_ws)
c8dt_neut_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c8dt_neut_ws

In [None]:
#c8 - neut TPs

c8_neut_pred = ann_neut_pred[['patientid','c8_neut_pred']]
c8_neut_pred = pd.merge(c8_neut_pred, hirid_test, on='patientid')
c8_neut_pred = c8_neut_pred.drop('discharge_status',axis=1)

c8_neut_TP_alive = len(c8_neut_pred[(c8_neut_pred['c8_neut_pred']==0) & (c8_neut_pred['binary_discharge']==0)])
c8_neut_TP_dead = len(c8_neut_pred[(c8_neut_pred['c8_neut_pred']==1) & (c8_neut_pred['binary_discharge']==1)])

print(c8_neut_pred['c8_neut_pred'].value_counts())
print('c8 TP - Discharged Alive: ', c8_neut_TP_alive)
print('c8 TP - Discharged Dead: ', c8_neut_TP_dead)

In [None]:
#c9 - DT Temporal Ext Val - WS Neutral

f1 = metrics.f1_score(y_test, ann_neut_pred['c9_neut_pred'].to_numpy(), average='micro')
c9dt_neut_ws  = [['c9', 'multi', 'F1_micro', f1]]

##print data as DF
c9dt_neut_ws = pd.DataFrame(data=c9dt_neut_ws)
c9dt_neut_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c9dt_neut_ws

In [None]:
#c9 - neut TPs

c9_neut_pred = ann_neut_pred[['patientid','c9_neut_pred']]
c9_neut_pred = pd.merge(c9_neut_pred, hirid_test, on='patientid')
c9_neut_pred = c9_neut_pred.drop('discharge_status',axis=1)

c9_neut_TP_alive = len(c9_neut_pred[(c9_neut_pred['c9_neut_pred']==0) & (c9_neut_pred['binary_discharge']==0)])
c9_neut_TP_dead = len(c9_neut_pred[(c9_neut_pred['c9_neut_pred']==1) & (c9_neut_pred['binary_discharge']==1)])

print(c9_neut_pred['c9_neut_pred'].value_counts())
print('c9 TP - Discharged Alive: ', c9_neut_TP_alive)
print('c9 TP - Discharged Dead: ', c9_neut_TP_dead)

In [None]:
#c10 - DT Temporal Ext Val - WS Neutral

f1 = metrics.f1_score(y_test, ann_neut_pred['c10_neut_pred'].to_numpy(), average='micro')
c10dt_neut_ws  = [['c10', 'multi', 'F1_micro', f1]]

##print data as DF
c10dt_neut_ws = pd.DataFrame(data=c10dt_neut_ws)
c10dt_neut_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c10dt_neut_ws

In [None]:
#c10 - neut TPs

c10_neut_pred = ann_neut_pred[['patientid','c10_neut_pred']]
c10_neut_pred = pd.merge(c10_neut_pred, hirid_test, on='patientid')
c10_neut_pred = c10_neut_pred.drop('discharge_status',axis=1)

c10_neut_TP_alive = len(c10_neut_pred[(c10_neut_pred['c10_neut_pred']==0) & (c10_neut_pred['binary_discharge']==0)])
c10_neut_TP_dead = len(c10_neut_pred[(c10_neut_pred['c10_neut_pred']==1) & (c10_neut_pred['binary_discharge']==1)])

print(c10_neut_pred['c10_neut_pred'].value_counts())
print('c10 TP - Discharged Alive: ', c10_neut_TP_alive)
print('c10 TP - Discharged Dead: ', c10_neut_TP_dead)

In [None]:
#c11 - DT Temporal Ext Val - WS Neutral

f1 = metrics.f1_score(y_test, ann_neut_pred['c11_neut_pred'].to_numpy(), average='micro')
c11dt_neut_ws  = [['c11', 'multi', 'F1_micro', f1]]

##print data as DF
c11dt_neut_ws = pd.DataFrame(data=c11dt_neut_ws)
c11dt_neut_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
c11dt_neut_ws

In [None]:
#c11 - neut TPs

c11_neut_pred = ann_neut_pred[['patientid','c11_neut_pred']]
c11_neut_pred = pd.merge(c11_neut_pred, hirid_test, on='patientid')
c11_neut_pred = c11_neut_pred.drop('discharge_status',axis=1)

c11_neut_TP_alive = len(c11_neut_pred[(c11_neut_pred['c11_neut_pred']==0) & (c11_neut_pred['binary_discharge']==0)])
c11_neut_TP_dead = len(c11_neut_pred[(c11_neut_pred['c11_neut_pred']==1) & (c11_neut_pred['binary_discharge']==1)])

print(c11_neut_pred['c11_neut_pred'].value_counts())
print('c11 TP - Discharged Alive: ', c11_neut_TP_alive)
print('c11 TP - Discharged Dead: ', c11_neut_TP_dead)

In [None]:
#mv - DT Temporal Ext Val - WS Neutral

f1 = metrics.f1_score(y_test, ann_neut_pred['mv_neut_pred'].to_numpy(), average='micro')
mvdt_neut_ws  = [['MV', 'multi', 'F1_micro', f1]]

##print data as DF
mvdt_neut_ws = pd.DataFrame(data=mvdt_neut_ws)
mvdt_neut_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
mvdt_neut_ws

In [None]:
#mv - neut TPs

mv_neut_pred = ann_neut_pred[['patientid','mv_neut_pred']]
mv_neut_pred = pd.merge(mv_neut_pred, hirid_test, on='patientid')
mv_neut_pred = mv_neut_pred.drop('discharge_status',axis=1)

mv_neut_TP_alive = len(mv_neut_pred[(mv_neut_pred['mv_neut_pred']==0) & (mv_neut_pred['binary_discharge']==0)])
mv_neut_TP_dead = len(mv_neut_pred[(mv_neut_pred['mv_neut_pred']==1) & (mv_neut_pred['binary_discharge']==1)])

print(mv_neut_pred['mv_neut_pred'].value_counts())
print('mv TP - Discharged Alive: ', mv_neut_TP_alive)
print('mv TP - Discharged Dead: ', mv_neut_TP_dead)

In [None]:
#tmv - DT Temporal Ext Val - WS Neutral

f1 = metrics.f1_score(y_test, ann_neut_pred['tmv_neut_pred'].to_numpy(), average='micro')
tmvdt_neut_ws  = [['TMV', 'multi', 'F1_micro', f1]]

##print data as DF
tmvdt_neut_ws = pd.DataFrame(data=tmvdt_neut_ws)
tmvdt_neut_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
tmvdt_neut_ws

In [None]:
#tmv - neut TPs

tmv_neut_pred = ann_neut_pred[['patientid','tmv_neut_pred']]
tmv_neut_pred = pd.merge(tmv_neut_pred, hirid_test, on='patientid')
tmv_neut_pred = tmv_neut_pred.drop('discharge_status',axis=1)

tmv_neut_TP_alive = len(tmv_neut_pred[(tmv_neut_pred['tmv_neut_pred']==0) & (tmv_neut_pred['binary_discharge']==0)])
tmv_neut_TP_dead = len(tmv_neut_pred[(tmv_neut_pred['tmv_neut_pred']==1) & (tmv_neut_pred['binary_discharge']==1)])

print(tmv_neut_pred['tmv_neut_pred'].value_counts())
print('tmv TP - Discharged Alive: ', tmv_neut_TP_alive)
print('tmv TP - Discharged Dead: ', tmv_neut_TP_dead)

### WS  - Consensus Fuzzy Weighting

#### Fuzzy Consensus (FC) Model

In [None]:
print(hirid_val.shape)
hirid_val.head()

In [None]:
#Temporal HiRID Validation Dataset
mod_hirid_val = hirid_val.copy(deep=True)

#drop mv_pred & tmv_pred (not needed)
mod_hirid_val = mod_hirid_val.drop(['mv_pred', 'tmv_pred'],axis=1)

cols = ['c1_pred','c2_pred', 'c3_pred', 'c4_pred', 'c5_pred', 'c6_pred', 'c7_pred','c8_pred', 'c9_pred', 
        'c10_pred', 'c11_pred']

#relabel 0-4 predicted labels to 1-5 (still representing A-E)
mod_hirid_val[cols] = mod_hirid_val[cols].replace({0:1, 1:2, 2:3, 3:4, 4:5})

#within annotator predicted labels, change 3 to 0 (as defining 'C' label as uncertain, therefore not considering in fuzzy weighted calculation)
mod_hirid_val[cols] = mod_hirid_val[cols].replace({3:0})

#count how many predicted 'C' labels (i.e. '0') there are for each instance
mod_hirid_val['zero_count'] = mod_hirid_val[cols].eq(0).sum(axis=1)

#denominator for fuzzy weighted calculation
mod_hirid_val['denominator'] = 11 - mod_hirid_val['zero_count'] 

mod_hirid_val['fuzzy_wa'] = mod_hirid_val[cols].sum(axis=1) / mod_hirid_val['denominator']

print(mod_hirid_val.shape)
print(mod_hirid_val.columns)
mod_hirid_val.head(10)

In [None]:
#Calculated Fuzzy Weighted Sum - Part 1 

#define weights to each hour before event (discharge/death) - with higher bias towards hours nearer event
mod_hirid_val['weight'] = 0
mod_hirid_val.loc[mod_hirid_val['Hrs before d_time'] == 1, 'weight'] = 0.3
mod_hirid_val.loc[mod_hirid_val['Hrs before d_time'] == 2, 'weight'] = 0.3
mod_hirid_val.loc[mod_hirid_val['Hrs before d_time'] == 3, 'weight'] = 0.2
mod_hirid_val.loc[mod_hirid_val['Hrs before d_time'] == 4, 'weight'] = 0.1
mod_hirid_val.loc[mod_hirid_val['Hrs before d_time'] == 5, 'weight'] = 0.1

#fuzzy weighted prediction
mod_hirid_val['fuzzy_ws'] = mod_hirid_val['fuzzy_wa']*mod_hirid_val['weight']

print(mod_hirid_val.shape)
mod_hirid_val.head()

In [None]:
#Calculated Fuzzy Weighted Sum - Part 2

fuzzy_ws = mod_hirid_val.groupby(by=['patientid'])['fuzzy_ws'].sum().reset_index()

print('fuzzy ws range:', round(fuzzy_ws['fuzzy_ws'].min(),3), '-', fuzzy_ws['fuzzy_ws'].max() ) 

print(fuzzy_ws.shape)
fuzzy_ws

In [None]:
#Apply Extreme(1) logic to get final fuzzy label for each patient: 1 = discharged alive, >4 = died
fuzzy_ws['Ext1_label'] = fuzzy_ws.apply(lambda row: ex1_cat(row, 'fuzzy_ws'), axis=1)

#Apply Extreme(1) logic to get final fuzzy label for each patient: <=2 = discharged alive, >4 = died
fuzzy_ws['Ext2_label'] = fuzzy_ws.apply(lambda row: ex2_cat(row, 'fuzzy_ws'), axis=1)

#Apply Neutral logic to get final fuzzy label for each patient: <=3 = discharged alive, >=4 = died
fuzzy_ws['Neut_label'] = fuzzy_ws.apply(lambda row: neut_cat(row, 'fuzzy_ws'), axis=1)

print(fuzzy_ws.shape)
fuzzy_ws

In [None]:
#Fuzzy Extreme(1) WS

f1 = metrics.f1_score(y_test, fuzzy_ws['Ext1_label'].to_numpy(), average='micro')
fcdt_ex1_ws  = [['FC', 'multi', 'F1_micro', f1]]

fcdt_ex1_ws = pd.DataFrame(data=fcdt_ex1_ws)
fcdt_ex1_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
fcdt_ex1_ws

In [None]:
#Fuzzy Extreme(2) WS

f1 = metrics.f1_score(y_test, fuzzy_ws['Ext2_label'].to_numpy(), average='micro')
fcdt_ex2_ws  = [['FC', 'multi', 'F1_micro', f1]]

fcdt_ex2_ws = pd.DataFrame(data=fcdt_ex2_ws)
fcdt_ex2_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
fcdt_ex2_ws

In [None]:
#Fuzzy Neutral WS

f1 = metrics.f1_score(y_test, fuzzy_ws['Neut_label'].to_numpy(), average='micro')
fcdt_neut_ws  = [['FC', 'multi', 'F1_micro', f1]]

fcdt_neut_ws = pd.DataFrame(data=fcdt_neut_ws)
fcdt_neut_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
fcdt_neut_ws

#### Top Fuzzy Consensus (TFC) Model

In [None]:
hirid_val.head()

In [None]:
#Temporal HiRID Validation Dataset
tfc_hirid_val = hirid_val.copy(deep=True)

#only keep top performing model predictions (C2, C4, C8 models had highest int validation performance)
drop = ['c1_pred', 'c3_pred',  'c5_pred', 'c6_pred', 'c7_pred','c9_pred', 'c10_pred', 'c11_pred']
tfc_hirid_val = tfc_hirid_val.drop(drop,axis=1)

#relabel 0-4 predicted labels to 1-5 (still representing A-E)
cols = ['c2_pred', 'c4_pred', 'c8_pred']
tfc_hirid_val[cols] = tfc_hirid_val[cols].replace({0:1, 1:2, 2:3, 3:4, 4:5})

#within annotator predicted labels, change 3 to 0 (as defining 'C' label as uncertain, therefore not considering in fuzzy weighted calculation)
tfc_hirid_val[cols] = tfc_hirid_val[cols].replace({3:0})

#count how many predicted 'C' labels (i.e. '0') there are for each instance
tfc_hirid_val['zero_count'] = tfc_hirid_val[cols].eq(0).sum(axis=1)

#denominator for fuzzy weighted calculation
tfc_hirid_val['denominator'] = 3 - tfc_hirid_val['zero_count'] 

tfc_hirid_val['fuzzy_wa'] = tfc_hirid_val[cols].sum(axis=1) / tfc_hirid_val['denominator']

print(tfc_hirid_val.shape)
print(tfc_hirid_val.columns)
tfc_hirid_val.head(10)

In [None]:
#Calculated Fuzzy Weighted Sum - Part 1 

#define weights to each hour before event (discharge/death) - with higher bias towards hours nearer event
tfc_hirid_val['weight'] = 0
tfc_hirid_val.loc[tfc_hirid_val['Hrs before d_time'] == 1, 'weight'] = 0.3
tfc_hirid_val.loc[tfc_hirid_val['Hrs before d_time'] == 2, 'weight'] = 0.3
tfc_hirid_val.loc[tfc_hirid_val['Hrs before d_time'] == 3, 'weight'] = 0.2
tfc_hirid_val.loc[tfc_hirid_val['Hrs before d_time'] == 4, 'weight'] = 0.1
tfc_hirid_val.loc[tfc_hirid_val['Hrs before d_time'] == 5, 'weight'] = 0.1

#fuzzy weighted prediction
tfc_hirid_val['fuzzy_ws'] = tfc_hirid_val['fuzzy_wa']*tfc_hirid_val['weight']

print(tfc_hirid_val.shape)
tfc_hirid_val.head(10)

In [None]:
#Calculated Fuzzy Weighted Sum - Part 2

tfc_fuzzy_ws = tfc_hirid_val.groupby(by=['patientid'])['fuzzy_ws'].sum().reset_index()

print('fuzzy ws range:', round(tfc_fuzzy_ws['fuzzy_ws'].min(),3), '-', tfc_fuzzy_ws['fuzzy_ws'].max() ) 

print(tfc_fuzzy_ws.shape)
tfc_fuzzy_ws

In [None]:
#Apply Extreme(1) logic to get final tfc_fuzzy label for each patient: 1 = discharged alive, >4 = died
tfc_fuzzy_ws['Ext1_label'] = tfc_fuzzy_ws.apply(lambda row: ex1_cat(row, 'fuzzy_ws'), axis=1)

#Apply Extreme(1) logic to get final tfc_fuzzy label for each patient: <=2 = discharged alive, >4 = died
tfc_fuzzy_ws['Ext2_label'] = tfc_fuzzy_ws.apply(lambda row: ex2_cat(row, 'fuzzy_ws'), axis=1)

#Apply Neutral logic to get final tfc_fuzzy label for each patient: <=3 = discharged alive, >=4 = died
tfc_fuzzy_ws['Neut_label'] = tfc_fuzzy_ws.apply(lambda row: neut_cat(row, 'fuzzy_ws'), axis=1)

print(tfc_fuzzy_ws.shape)
tfc_fuzzy_ws

In [None]:
#TFC - Fuzzy Extreme(1) WS

f1 = metrics.f1_score(y_test, tfc_fuzzy_ws['Ext1_label'].to_numpy(), average='micro')
tfcdt_ex1_ws  = [['TFC', 'multi', 'F1_micro', f1]]

tfcdt_ex1_ws = pd.DataFrame(data=tfcdt_ex1_ws)
tfcdt_ex1_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
tfcdt_ex1_ws

In [None]:
#TFC - Fuzzy Extreme(2) WS

f1 = metrics.f1_score(y_test, tfc_fuzzy_ws['Ext2_label'].to_numpy(), average='micro')
tfcdt_ex2_ws  = [['TFC', 'multi', 'F1_micro', f1]]

tfcdt_ex2_ws = pd.DataFrame(data=tfcdt_ex2_ws)
tfcdt_ex2_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
tfcdt_ex2_ws

In [None]:
#Fuzzy Neutral WS

f1 = metrics.f1_score(y_test, tfc_fuzzy_ws['Neut_label'].to_numpy(), average='micro')
tfcdt_neut_ws  = [['TFC', 'multi', 'F1_micro', f1]]

tfcdt_neut_ws = pd.DataFrame(data=tfcdt_neut_ws)
tfcdt_neut_ws.columns = ['Annotator','Model','Optimisation','F1_micro']
tfcdt_neut_ws

In [None]:
#Ext Val (Extreme1): multi - Summary

frames = [c1dt_ex1_ws, c2dt_ex1_ws, c3dt_ex1_ws, c4dt_ex1_ws, c5dt_ex1_ws, c6dt_ex1_ws, c7dt_ex1_ws, c8dt_ex1_ws,
          c9dt_ex1_ws, c10dt_ex1_ws, c11dt_ex1_ws, mvdt_ex1_ws, tmvdt_ex1_ws, fcdt_ex1_ws, tfcdt_ex1_ws]

multi_ex1_pred = pd.concat(frames)
multi_ex1_pred['Annotator'] = multi_ex1_pred['Annotator'].str.upper()
print(multi_ex1_pred.shape)
multi_ex1_pred

In [None]:
#Ext Val (Extreme2): multi - Summary

frames = [c1dt_ex2_ws, c2dt_ex2_ws, c3dt_ex2_ws, c4dt_ex2_ws, c5dt_ex2_ws, c6dt_ex2_ws, c7dt_ex2_ws, c8dt_ex2_ws,
          c9dt_ex2_ws, c10dt_ex2_ws, c11dt_ex2_ws, mvdt_ex2_ws, tmvdt_ex2_ws, fcdt_ex2_ws, tfcdt_ex2_ws]

multi_ex2_pred = pd.concat(frames)
print(multi_ex2_pred.shape)
multi_ex2_pred

In [None]:
#Ext Val: Temporal Neutral - Summary

frames = [c1dt_neut_ws, c2dt_neut_ws, c3dt_neut_ws, c4dt_neut_ws, c5dt_neut_ws, c6dt_neut_ws, c7dt_neut_ws, 
          c8dt_neut_ws, c9dt_neut_ws, c10dt_neut_ws, c11dt_neut_ws, mvdt_neut_ws,tmvdt_neut_ws, fcdt_neut_ws,
          tfcdt_neut_ws]

multi_neut_pred = pd.concat(frames)
multi_neut_pred['Annotator'] = multi_neut_pred['Annotator'].str.upper()

print(multi_neut_pred.shape)
multi_neut_pred

In [None]:
new_row_fc = {'Annotator':'FC', 'Model':'NA', 'Optimisation':'NA', 'F1_micro':0}
new_row_tfc = {'Annotator':'TFC', 'Model':'NA', 'Optimisation':'NA', 'F1_micro':0}

multi_ext_stat = multi_ext_stat.append(new_row_fc, ignore_index=True)
multi_ext_stat = multi_ext_stat.append(new_row_tfc, ignore_index=True)

multi_ext_stat

In [None]:
#ACTION - remove scatterpot point for FC Static
#Plot chart

plt.style.use('ggplot')

#Define x and y data
x1 = multi_ex1_pred['Annotator']
y1 = multi_ex1_pred['F1_micro']
y2 = multi_neut_pred['F1_micro']
y3 = multi_ext_stat['F1_micro']


#Plot chart data
plt.figure(figsize=(10,3.5))
plt.plot(x1, y1, color='#1F57C8', marker='o', linestyle="solid", label='Temporal: Extreme')
plt.plot(x1, y2, color='#DA4802', marker='o', linestyle="solid", label='Temporal: Neutral')
plt.plot(x1, y3, color='#62B463', marker='o', linestyle="solid", label='Static')

plt.ylim([0.0,1.1])
plt.yticks(np.arange(0.0,1.01, 0.2))

#Add title and labels
plt.title('HiRID External Validation Experiment 2: Static vs Temporal', fontsize=18)
plt.xlabel('Annotator', fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=14)
plt.ylabel('F1 micro', fontsize=14)
plt.grid(True)
plt.legend(loc=1, fontsize=12.5)
plt.tight_layout()


plt.show()

In [None]:
#Plot chart

plt.style.use('ggplot')

#Define x and y data
x1 = multi_ex1_pred['Annotator']
y1 = multi_ex1_pred['F1_micro']
y2 = multi_ex2_pred['F1_micro']
y3 = multi_neut_pred['F1_micro']
y4 = multi_ext_stat['F1_micro']


#Plot chart data
plt.figure(figsize=(10,3.5))
plt.plot(x1, y1, color='#1F57C8', marker='o', linestyle="solid", label='Temporal: Extreme')
plt.plot(x1, y2, color='#D4AF37', marker='o', linestyle="solid", label='Temporal: Extreme (2)')
plt.plot(x1, y3, color='#DA4802', marker='o', linestyle="solid", label='Temporal: Neutral')
plt.plot(x1, y4, color='#62B463', marker='o', linestyle="solid", label='Static')


plt.ylim([0.0,1.1])
plt.yticks(np.arange(0.0,1.01, 0.2))

#Add title and labels
plt.title('HiRID External Validation: Static vs Temporal', fontsize=18)
plt.xlabel('Annotator', fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=14)
plt.ylabel('F1 micro', fontsize=14)
plt.grid(True)
plt.legend(loc=1, fontsize=10)
plt.tight_layout()


plt.show()

### 3.2.2 Approach 2: Machine-Learning driven

In [None]:
#Import Temporal HiRID Validation Dataset
##See jupyter notebook 'npjDM-HiRID_ExtVal_Dataset' to see steps on creating the Temporal HiRID Validation Dataset

hirid_val = pd.read_csv("hirid_extval_temporal_cohort.csv")
hirid_val = hirid_val.drop(['Unnamed: 0'],axis=1)

#generate binary discharge_status
hirid_val['binary_discharge'] = np.where(hirid_val['discharge_status']== 'alive', 0, 1)
hirid_val = hirid_val.sort_values(by='patientid',ascending=True)

print(hirid_val.shape)
print('Number of patients:', hirid_val.patientid.nunique())
hirid_val.head(20)

#### Decision Tree

In [None]:
c1dt_multi_opt

In [None]:
#DT multi Ext val - Annotator predictions

hirid_val['c1_pred']= c1dt_multi_opt.predict(X_test)
hirid_val['c2_pred']= c2dt_multi_opt.predict(X_test)
hirid_val['c3_pred'] = c3dt_multi_opt.predict(X_test)
hirid_val['c4_pred'] = c4dt_multi_opt.predict(X_test)
hirid_val['c5_pred'] = c5dt_multi_opt.predict(X_test)
hirid_val['c6_pred'] = c6dt_multi_opt.predict(X_test)
hirid_val['c7_pred'] = c7dt_multi_opt.predict(X_test)
hirid_val['c8_pred'] = c8dt_multi_opt.predict(X_test)
hirid_val['c9_pred'] = c9dt_multi_opt.predict(X_test)
hirid_val['c10_pred'] = c10dt_multi_opt.predict(X_test)
hirid_val['c11_pred'] = c11dt_multi_opt.predict(X_test)
hirid_val['mv_pred'] = mvdt_multi_opt.predict(X_test)
hirid_val['tmv_pred'] = tmvdt_multi_opt.predict(X_test)

print(hirid_val.shape)
hirid_val.head(10)

In [None]:
hirid_val.columns

In [None]:
#Import HiRID patient table (contains discharge_status info)
pat = pd.read_sql_query("SELECT * FROM hirid.patient", conn)
pat = pat.drop(['admissiontime','sex','age'],axis=1)
pat['binary_discharge'] = np.where(pat['discharge_status'] == 'alive', 0, 1)
pat = pat.drop('discharge_status',axis=1)

#Reformat c1 predictions
c1_mlval = hirid_val.copy(deep=True)
c1_mlval = c1_mlval[['patientid','Hrs before d_time','c1_pred']]
c1_mlval = c1_mlval.groupby(['patientid','Hrs before d_time'])['c1_pred'].sum().unstack('Hrs before d_time')
c1_mlval = c1_mlval.reset_index()
c1_mlval = c1_mlval.rename_axis(None, axis=1)
dict = {1 : 'c1_1hr_before', 2 : 'c1_2hrs_before', 3 : 'c1_3hrs_before', 4 : 'c1_4hrs_before', 5 : 'c1_5hrs_before'}
c1_mlval.rename(columns=dict,inplace=True)
c1_mlval = pd.merge(c1_mlval, pat, how='left', on='patientid')

#Reformat c2 predictions
c2_mlval = hirid_val.copy(deep=True)
c2_mlval = c2_mlval[['patientid','Hrs before d_time','c2_pred']]
c2_mlval = c2_mlval.groupby(['patientid','Hrs before d_time'])['c2_pred'].sum().unstack('Hrs before d_time')
c2_mlval = c2_mlval.reset_index()
c2_mlval = c2_mlval.rename_axis(None, axis=1)
dict = {1 : 'c2_1hr_before', 2 : 'c2_2hrs_before', 3 : 'c2_3hrs_before', 4 : 'c2_4hrs_before', 5 : 'c2_5hrs_before'}
c2_mlval.rename(columns=dict,inplace=True)
c2_mlval = pd.merge(c2_mlval, pat, how='left', on='patientid')

#Reformat c3 predictions
c3_mlval = hirid_val.copy(deep=True)
c3_mlval = c3_mlval[['patientid','Hrs before d_time','c3_pred']]
c3_mlval = c3_mlval.groupby(['patientid','Hrs before d_time'])['c3_pred'].sum().unstack('Hrs before d_time')
c3_mlval = c3_mlval.reset_index()
c3_mlval = c3_mlval.rename_axis(None, axis=1)
dict = {1 : 'c3_1hr_before', 2 : 'c3_2hrs_before', 3 : 'c3_3hrs_before', 4 : 'c3_4hrs_before', 5 : 'c3_5hrs_before'}
c3_mlval.rename(columns=dict,inplace=True)
c3_mlval = pd.merge(c3_mlval, pat, how='left', on='patientid')

#Reformat c4 predictions
c4_mlval = hirid_val.copy(deep=True)
c4_mlval = c4_mlval[['patientid','Hrs before d_time','c4_pred']]
c4_mlval = c4_mlval.groupby(['patientid','Hrs before d_time'])['c4_pred'].sum().unstack('Hrs before d_time')
c4_mlval = c4_mlval.reset_index()
c4_mlval = c4_mlval.rename_axis(None, axis=1)
dict = {1 : 'c4_1hr_before', 2 : 'c4_2hrs_before', 3 : 'c4_3hrs_before', 4 : 'c4_4hrs_before', 5 : 'c4_5hrs_before'}
c4_mlval.rename(columns=dict,inplace=True)
c4_mlval = pd.merge(c4_mlval, pat, how='left', on='patientid')

#Reformat c5 predictions
c5_mlval = hirid_val.copy(deep=True)
c5_mlval = c5_mlval[['patientid','Hrs before d_time','c5_pred']]
c5_mlval = c5_mlval.groupby(['patientid','Hrs before d_time'])['c5_pred'].sum().unstack('Hrs before d_time')
c5_mlval = c5_mlval.reset_index()
c5_mlval = c5_mlval.rename_axis(None, axis=1)
dict = {1 : 'c5_1hr_before', 2 : 'c5_2hrs_before', 3 : 'c5_3hrs_before', 4 : 'c5_4hrs_before', 5 : 'c5_5hrs_before'}
c5_mlval.rename(columns=dict,inplace=True)
c5_mlval = pd.merge(c5_mlval, pat, how='left', on='patientid')

#Reformat c6 predictions
c6_mlval = hirid_val.copy(deep=True)
c6_mlval = c6_mlval[['patientid','Hrs before d_time','c6_pred']]
c6_mlval = c6_mlval.groupby(['patientid','Hrs before d_time'])['c6_pred'].sum().unstack('Hrs before d_time')
c6_mlval = c6_mlval.reset_index()
c6_mlval = c6_mlval.rename_axis(None, axis=1)
dict = {1 : 'c6_1hr_before', 2 : 'c6_2hrs_before', 3 : 'c6_3hrs_before', 4 : 'c6_4hrs_before', 5 : 'c6_5hrs_before'}
c6_mlval.rename(columns=dict,inplace=True)
c6_mlval = pd.merge(c6_mlval, pat, how='left', on='patientid')

#Reformat c7 predictions
c7_mlval = hirid_val.copy(deep=True)
c7_mlval = c7_mlval[['patientid','Hrs before d_time','c7_pred']]
c7_mlval = c7_mlval.groupby(['patientid','Hrs before d_time'])['c7_pred'].sum().unstack('Hrs before d_time')
c7_mlval = c7_mlval.reset_index()
c7_mlval = c7_mlval.rename_axis(None, axis=1)
dict = {1 : 'c7_1hr_before', 2 : 'c7_2hrs_before', 3 : 'c7_3hrs_before', 4 : 'c7_4hrs_before', 5 : 'c7_5hrs_before'}
c7_mlval.rename(columns=dict,inplace=True)
c7_mlval = pd.merge(c7_mlval, pat, how='left', on='patientid')

#Reformat c8 predictions
c8_mlval = hirid_val.copy(deep=True)
c8_mlval = c8_mlval[['patientid','Hrs before d_time','c8_pred']]
c8_mlval = c8_mlval.groupby(['patientid','Hrs before d_time'])['c8_pred'].sum().unstack('Hrs before d_time')
c8_mlval = c8_mlval.reset_index()
c8_mlval = c8_mlval.rename_axis(None, axis=1)
dict = {1 : 'c8_1hr_before', 2 : 'c8_2hrs_before', 3 : 'c8_3hrs_before', 4 : 'c8_4hrs_before', 5 : 'c8_5hrs_before'}
c8_mlval.rename(columns=dict,inplace=True)
c8_mlval = pd.merge(c8_mlval, pat, how='left', on='patientid')

#Reformat c9 predictions
c9_mlval = hirid_val.copy(deep=True)
c9_mlval = c9_mlval[['patientid','Hrs before d_time','c9_pred']]
c9_mlval = c9_mlval.groupby(['patientid','Hrs before d_time'])['c9_pred'].sum().unstack('Hrs before d_time')
c9_mlval = c9_mlval.reset_index()
c9_mlval = c9_mlval.rename_axis(None, axis=1)
dict = {1 : 'c9_1hr_before', 2 : 'c9_2hrs_before', 3 : 'c9_3hrs_before', 4 : 'c9_4hrs_before', 5 : 'c9_5hrs_before'}
c9_mlval.rename(columns=dict,inplace=True)
c9_mlval = pd.merge(c9_mlval, pat, how='left', on='patientid')

#Reformat c10 predictions
c10_mlval = hirid_val.copy(deep=True)
c10_mlval = c10_mlval[['patientid','Hrs before d_time','c10_pred']]
c10_mlval = c10_mlval.groupby(['patientid','Hrs before d_time'])['c10_pred'].sum().unstack('Hrs before d_time')
c10_mlval = c10_mlval.reset_index()
c10_mlval = c10_mlval.rename_axis(None, axis=1)
dict = {1 : 'c10_1hr_before', 2 : 'c10_2hrs_before', 3 : 'c10_3hrs_before', 4 : 'c10_4hrs_before', 5 : 'c10_5hrs_before'}
c10_mlval.rename(columns=dict,inplace=True)
c10_mlval = pd.merge(c10_mlval, pat, how='left', on='patientid')

#Reformat c11 predictions
c11_mlval = hirid_val.copy(deep=True)
c11_mlval = c11_mlval[['patientid','Hrs before d_time','c11_pred']]
c11_mlval = c11_mlval.groupby(['patientid','Hrs before d_time'])['c11_pred'].sum().unstack('Hrs before d_time')
c11_mlval = c11_mlval.reset_index()
c11_mlval = c11_mlval.rename_axis(None, axis=1)
dict = {1 : 'c11_1hr_before', 2 : 'c11_2hrs_before', 3 : 'c11_3hrs_before', 4 : 'c11_4hrs_before', 5 : 'c11_5hrs_before'}
c11_mlval.rename(columns=dict,inplace=True)
c11_mlval = pd.merge(c11_mlval, pat, how='left', on='patientid')

#Reformat mv predictions
mv_mlval = hirid_val.copy(deep=True)
mv_mlval = mv_mlval[['patientid','Hrs before d_time','mv_pred']]
mv_mlval = mv_mlval.groupby(['patientid','Hrs before d_time'])['mv_pred'].sum().unstack('Hrs before d_time')
mv_mlval = mv_mlval.reset_index()
mv_mlval = mv_mlval.rename_axis(None, axis=1)
dict = {1 : 'mv_1hr_before', 2 : 'mv_2hrs_before', 3 : 'mv_3hrs_before', 4 : 'mv_4hrs_before', 5 : 'mv_5hrs_before'}
mv_mlval.rename(columns=dict,inplace=True)
mv_mlval = pd.merge(mv_mlval, pat, how='left', on='patientid')

#Reformat tmv predictions
tmv_mlval = hirid_val.copy(deep=True)
tmv_mlval = tmv_mlval[['patientid','Hrs before d_time','tmv_pred']]
tmv_mlval = tmv_mlval.groupby(['patientid','Hrs before d_time'])['tmv_pred'].sum().unstack('Hrs before d_time')
tmv_mlval = tmv_mlval.reset_index()
tmv_mlval = tmv_mlval.rename_axis(None, axis=1)
dict = {1 : 'tmv_1hr_before', 2 : 'tmv_2hrs_before', 3 : 'tmv_3hrs_before', 4 : 'tmv_4hrs_before', 5 : 'tmv_5hrs_before'}
tmv_mlval.rename(columns=dict,inplace=True)
tmv_mlval = pd.merge(tmv_mlval, pat, how='left', on='patientid')

c1_mlval

In [None]:
#DT - C1

array = c1_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float)  
y = y.astype(int) 
y = le.fit_transform(y)

#5-fold CV Model Eval
c1_dt = do_cv_learning_dt(X,y)
c1_dt['Annotator'] = 'c1'

#Opt model
c1_opt = model_opt_dt(X,y)
c1_dt

In [None]:
#Plot decision tree - C1

pred_cols = ['1hr_before', '2hrs_before','3hrs_before','4hrs_before','5hrs_before']

plt.figure(figsize=(15, 7.5))
plot_tree(c1_opt, 
          fontsize=10,
          filled=True, 
          rounded=True, 
          feature_names=pred_cols);

In [None]:
#DT - C2

array = c2_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float) 
y = y.astype(int) 
y = le.fit_transform(y)

#5-fold CV Model Eval
c2_dt = do_cv_learning_dt(X,y)
c2_dt['Annotator'] = 'c2'

#Opt model
c2_opt = model_opt_dt(X,y)

c2_dt

In [None]:
#Plot decision tree - C2

plt.figure(figsize=(15, 7.5))
plot_tree(c2_opt, 
          fontsize=10,
          filled=True, 
          rounded=True, 
          feature_names=pred_cols);

In [None]:
#DT - C3

array = c3_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float) 
y = y.astype(int) 
y = le.fit_transform(y)

#5-fold CV Model Eval
c3_dt = do_cv_learning_dt(X,y)
c3_dt['Annotator'] = 'c3'

#Opt model
c3_opt = model_opt_dt(X,y)

c3_dt

In [None]:
#Plot decision tree - C3

plt.figure(figsize=(15, 7.5))
plot_tree(c3_opt, 
          fontsize=10,
          filled=True, 
          rounded=True, 
          feature_names=pred_cols);

In [None]:
#DT - C4

array = c4_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float) 
y = y.astype(int) 
y = le.fit_transform(y)

#5-fold CV Model Eval
c4_dt = do_cv_learning_dt(X,y)
c4_dt['Annotator'] = 'c4'

#Opt model
c4_opt = model_opt_dt(X,y)

c4_dt

In [None]:
#Plot decision tree - C4

plt.figure(figsize=(20, 10))
plot_tree(c4_opt, 
          fontsize=10,
          filled=True, 
          rounded=True, 
          feature_names=pred_cols);

In [None]:
#DT - C5

array = c5_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float) 
y = y.astype(int) 
y = le.fit_transform(y)

#5-fold CV Model Eval
c5_dt = do_cv_learning_dt(X,y)
c5_dt['Annotator'] = 'c5'

#Opt model
c5_opt = model_opt_dt(X,y)

c5_dt

In [None]:
#Plot decision tree - C5

plt.figure(figsize=(15, 7.5))
plot_tree(c5_opt, 
          filled=True, 
          rounded=True, 
          feature_names=pred_cols);

In [None]:
#DT - C6

array = c6_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float) 
y = y.astype(int) 
y = le.fit_transform(y)

#5-fold CV Model Eval
c6_dt = do_cv_learning_dt(X,y)
c6_dt['Annotator'] = 'c6'

#Opt model
c6_opt = model_opt_dt(X,y)

c6_dt

In [None]:
#Plot decision tree- C6

plt.figure(figsize=(15, 7.5))
plot_tree(c6_opt, 
          fontsize=10,
          filled=True, 
          rounded=True, 
          feature_names=pred_cols);

In [None]:
#DT - C7

array = c7_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float) 
y = y.astype(int) 
y = le.fit_transform(y)

#5-fold CV Model Eval
c7_dt = do_cv_learning_dt(X,y)
c7_dt['Annotator'] = 'c7'

#Opt model
c7_opt = model_opt_dt(X,y)

c7_dt

In [None]:
#Plot decision tree - C7

plt.figure(figsize=(15, 7.5))
plot_tree(c7_opt, 
          filled=True, 
          rounded=True, 
          feature_names=pred_cols);

In [None]:
#DT - C8

array = c8_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float) 
y = y.astype(int) 
y = le.fit_transform(y)

#5-fold CV Model Eval
c8_dt = do_cv_learning_dt(X,y)
c8_dt['Annotator'] = 'c8'

#Opt model
c8_opt = model_opt_dt(X,y)

c8_dt

In [None]:
#Plot decision tree - C8

plt.figure(figsize=(15, 7.5))
plot_tree(c8_opt, 
          fontsize=10,
          filled=True, 
          rounded=True, 
          feature_names=pred_cols);

In [None]:
#DT - C9

array = c9_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float) 
y = y.astype(int) 
y = le.fit_transform(y)

#5-fold CV Model Eval
c9_dt = do_cv_learning_dt(X,y)
c9_dt['Annotator'] = 'c9'

#Opt model
c9_opt = model_opt_dt(X,y)

c9_dt

In [None]:
#Plot decision tree - C9

plt.figure(figsize=(15, 7.5))
plot_tree(c9_opt, 
          filled=True, 
          rounded=True, 
          feature_names=pred_cols);

In [None]:
#DT - C10

array = c10_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float) 
y = y.astype(int) 
y = le.fit_transform(y)

#5-fold CV Model Eval
c10_dt = do_cv_learning_dt(X,y)
c10_dt['Annotator'] = 'c10'

#Opt model
c10_opt = model_opt_dt(X,y)

c10_dt

In [None]:
#Plot decision tree - C10

plt.figure(figsize=(15, 7.5))
plot_tree(c10_opt, 
          fontsize=10,
          filled=True, 
          rounded=True, 
          feature_names=pred_cols);

In [None]:
#DT - C11

array = c11_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float) 
y = y.astype(int) 
y = le.fit_transform(y)

#5-fold CV Model Eval
c11_dt = do_cv_learning_dt(X,y)
c11_dt['Annotator'] = 'c11'

#Opt model
c11_opt = model_opt_dt(X,y)

c11_dt

In [None]:
#Plot decision tree - C11

plt.figure(figsize=(15, 7.5))
plot_tree(c11_opt, 
          fontsize=10,
          filled=True, 
          rounded=True, 
          feature_names=pred_cols);

In [None]:
#DT - MV

array = mv_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float) 
y = y.astype(int) 
y = le.fit_transform(y)

#5-fold CV Model Eval
mv_dt = do_cv_learning_dt(X,y)
mv_dt['Annotator'] = 'MV'

#Opt model
mv_opt = model_opt_dt(X,y)

mv_dt

In [None]:
#Plot decision tree - MV

plt.figure(figsize=(15, 7.5))
plot_tree(mv_opt, 
          fontsize=10,
          filled=True, 
          rounded=True, 
          feature_names=pred_cols);

In [None]:
#DT - TMV

array = tmv_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float) 
y = y.astype(int) 
y = le.fit_transform(y)

#5-fold CV Model Eval
tmv_dt = do_cv_learning_dt(X,y)
tmv_dt['Annotator'] = 'TMV'

#Opt model
tmv_opt = model_opt_dt(X,y)

tmv_dt

In [None]:
#Plot decision tree - TMV

plt.figure(figsize=(15, 7.5))
plot_tree(tmv_opt, 
          fontsize=10,
          filled=True, 
          rounded=True, 
          feature_names=pred_cols);

In [None]:
#Ext Validation (DT) - Performance Summary

frames = [c1_dt, c2_dt, c3_dt, c4_dt, c5_dt, c6_dt, c7_dt, c8_dt, c9_dt, c10_dt, c11_dt, mv_dt, tmv_dt]

multi_dt = pd.concat(frames)

print(multi_dt.shape)
multi_dt

In [None]:
#Plot chart - Static vs Temporal Ext Val

plt.style.use('ggplot')

#Define x and y data
x1 = multi_ext_stat['Annotator']
y1 = multi_ex2_pred['F1_micro']
y2 = multi_neut_pred['F1_micro']
y3 = multi_ext_stat['F1_micro']
y4 = multi_dt['F1_micro']

#Plot chart data
plt.figure(figsize=(8,2.5))
plt.plot(x1, y1, color='#1F57C8', marker='o', linestyle="solid", label='Temporal WS')
plt.plot(x1, y4, color='#9467BD', marker='o', linestyle="solid", label='Temporal ML (DT)')
plt.plot(x1, y3, color='#62B463', marker='o', linestyle="solid", label='Static')

plt.ylim([0.0,1.1])
plt.yticks(np.arange(0.0,1.01, 0.2))

#Add title and labels
plt.title('HiRID External Validation: Static vs Temporal', fontsize=14)
plt.xlabel('Annotator', fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=14)
plt.ylabel('F1_micro', fontsize=14)
plt.grid(True)
plt.legend(bbox_to_anchor=(1, 1), fontsize=10)
plt.tight_layout()


plt.show()

In [None]:
#Plot chart - Static vs Temporal Ext Val

plt.style.use('ggplot')

#Define x and y data
x1 = multi_ext_stat['Annotator']
y1 = multi_ex2_pred['F1_micro']
y2 = multi_neut_pred['F1_micro']
y3 = multi_ext_stat['F1_micro']
y4 = multi_dt['F1_micro']

#Plot chart data
plt.figure(figsize=(10,3))
plt.plot(x1, y1, color='#1F57C8', marker='o', linestyle="solid", label='Temporal WS (Extreme)')
plt.plot(x1, y2, color='#DA4802', marker='o', linestyle="solid", label='Temporal WS (Neutral)')
plt.plot(x1, y4, color='#9467BD', marker='o', linestyle="solid", label='Temporal DT')
plt.plot(x1, y3, color='#62B463', marker='o', linestyle="solid", label='Static')

plt.ylim([0.0,1.1])
plt.yticks(np.arange(0.0,1.01, 0.2))

#Add title and labels
plt.title('HiRID External Validation: Static vs Temporal', fontsize=14)
plt.xlabel('Annotator', fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=14)
plt.ylabel('F1_micro', fontsize=14)
plt.grid(True)
plt.legend(bbox_to_anchor=(1, 0.5), fontsize=10)
plt.tight_layout()


plt.show()

### Logistic Regression

In [None]:
#Define Parameter Grid for hyperparameter optimisation
##Create a dictionary with all LR parameter options 

params_lr = {'penalty': ['l2'],
             'C': [100, 10, 1.0, 0.1, 0.01]}

In [None]:
#Define Function - LR Model Evaluation via 5-fold CV

def do_cv_learning_lr(X, y, verbose=False, do_scale=False, random_state=1):
    
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)
    f1s = []

    if do_scale:
        sc = StandardScaler()
        X = sc.fit_transform(X)
        
    for i, (train,test) in enumerate(cv.split(X,y)):
        gcsv = GridSearchCV(LogisticRegression(random_state=1), 
                            param_grid=params_lr, 
                            cv=5, 
                            scoring='f1_micro')
        grid_result = gcsv.fit(X[train],y[train])
        best_params = grid_result.best_params_
        if verbose:
            print('fold', i,'best_params', best_params)
        clf = grid_result.best_estimator_
        f1 = metrics.f1_score(y[test], clf.predict(X[test]), average='micro')
        f1s.append(f1)
    
    ##Performance metrics 
    dflr_multi_f1data = [['ann', 'multi', 'F1_micro', np.mean(f1s), np.std(f1s)]]

    ##print data as DF
    dflr_multi_f1data = pd.DataFrame(data=dflr_multi_f1data)
    dflr_multi_f1data.columns = ['Annotator','Model','Optimisation','F1_micro','S.D.']
    
    return dflr_multi_f1data

In [None]:
#Define Function - LR Odds Ratios

def or_lr(X, y, verbose=False, do_scale=False, random_state=1):
    
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)
    ORs = []

    if do_scale:
        sc = StandardScaler()
        X = sc.fit_transform(X)
        
    for i, (train,test) in enumerate(cv.split(X,y)):
        gcsv = GridSearchCV(LogisticRegression(random_state=1), 
                            param_grid=params_lr, 
                            cv=5, 
                            scoring='f1_micro')
        grid_result = gcsv.fit(X[train],y[train])
        best_params = grid_result.best_params_
        if verbose:
            print('fold', i,'best_params', best_params)
        clf = grid_result.best_estimator_
        if hasattr(clf, 'coef_'):
            ORs.append([math.exp(c) for c in clf.coef_[0]])
    
    return ORs

In [None]:
c1_mlval.head()

In [None]:
#LR - C1

array = c1_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float) 
y = y.astype(int) 
y = le.fit_transform(y)

#Model Output
c1_lr = do_cv_learning_lr(X,y)
c1_lr['Annotator'] = 'C1'

#Odds ratios
c1_or = or_lr(X,y)

c1_lr

In [None]:
c1_or

In [None]:
#Find avg ORs per hour - C1

c1_1hr = (c1_or[0][0] + c1_or[1][0] + c1_or[2][0] +  c1_or[3][0] + c1_or[4][0])/5
c1_2hr = (c1_or[0][1] + c1_or[1][1] + c1_or[2][1] + c1_or[3][1] + c1_or[4][1])/5
c1_3hr = (c1_or[0][2] + c1_or[1][2] + c1_or[2][2] + c1_or[3][2] + c1_or[4][2])/5
c1_4hr = (c1_or[0][3] + c1_or[1][3] + c1_or[2][3] + c1_or[3][3] + c1_or[4][3])/5
c1_5hr = (c1_or[0][4] + c1_or[1][4] + c1_or[2][4] + c1_or[3][4] + c1_or[4][4])/5

c1_or_avg = [c1_1hr, c1_2hr, c1_3hr, c1_4hr, c1_5hr]

#Display as DF
feature_names = ['c1_1hr_before','c1_2hrs_before','c1_3hrs_before','c1_4hrs_before','c1_5hrs_before']

c1_or_table = pd.DataFrame(feature_names, columns = ['Feature'])
c1_or_table['C1'] = c1_or_avg
c1_or_table['Feature'] = c1_or_table["Feature"].str[3:]
c1_or_table

In [None]:
#LR - c2

array = c2_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float) 
y = y.astype(int) 
y = le.fit_transform(y)

#Model Output
c2_lr = do_cv_learning_lr(X,y)
c2_lr['Annotator'] = 'C2'

#Odds ratios
c2_or = or_lr(X,y)

c2_lr

In [None]:
#Find avg ORs per hour - C2

c2_1hr = (c2_or[0][0] + c2_or[1][0] + c2_or[2][0] +  c2_or[3][0] + c2_or[4][0])/5
c2_2hr = (c2_or[0][1] + c2_or[1][1] + c2_or[2][1] + c2_or[3][1] + c2_or[4][1])/5
c2_3hr = (c2_or[0][2] + c2_or[1][2] + c2_or[2][2] + c2_or[3][2] + c2_or[4][2])/5
c2_4hr = (c2_or[0][3] + c2_or[1][3] + c2_or[2][3] + c2_or[3][3] + c2_or[4][3])/5
c2_5hr = (c2_or[0][4] + c2_or[1][4] + c2_or[2][4] + c2_or[3][4] + c2_or[4][4])/5

c2_or_avg = [c2_1hr, c2_2hr, c2_3hr, c2_4hr, c2_5hr]

#Display as DF
feature_names = ['c2_1hr_before','c2_2hrs_before','c2_3hrs_before','c2_4hrs_before','c2_5hrs_before']

c2_or_table = pd.DataFrame(feature_names, columns = ['Feature'])
c2_or_table['C2'] = c2_or_avg
c2_or_table['Feature'] = c2_or_table["Feature"].str[3:]
c2_or_table

In [None]:
#LR - C3

array = c3_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float) 
y = y.astype(int) 
y = le.fit_transform(y)

#Model Output
c3_lr = do_cv_learning_lr(X,y)
c3_lr['Annotator'] = 'C3'

#Odds ratios
c3_or = or_lr(X,y)

c3_lr

In [None]:
#Find avg ORs per hour - C3

c3_1hr = (c3_or[0][0] + c3_or[1][0] + c3_or[2][0] +  c3_or[3][0] + c3_or[4][0])/5
c3_2hr = (c3_or[0][1] + c3_or[1][1] + c3_or[2][1] + c3_or[3][1] + c3_or[4][1])/5
c3_3hr = (c3_or[0][2] + c3_or[1][2] + c3_or[2][2] + c3_or[3][2] + c3_or[4][2])/5
c3_4hr = (c3_or[0][3] + c3_or[1][3] + c3_or[2][3] + c3_or[3][3] + c3_or[4][3])/5
c3_5hr = (c3_or[0][4] + c3_or[1][4] + c3_or[2][4] + c3_or[3][4] + c3_or[4][4])/5

c3_or_avg = [c3_1hr, c3_2hr, c3_3hr, c3_4hr, c3_5hr]

#Display as DF
feature_names = ['c3_1hr_before','c3_2hrs_before','c3_3hrs_before','c3_4hrs_before','c3_5hrs_before']

c3_or_table = pd.DataFrame(feature_names, columns = ['Feature'])
c3_or_table['C3'] = c3_or_avg
c3_or_table['Feature'] = c3_or_table["Feature"].str[3:]
c3_or_table

In [None]:
#LR - C4

array = c4_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float) 
y = y.astype(int) 
y = le.fit_transform(y)

#Model Output
c4_lr = do_cv_learning_lr(X,y)
c4_lr['Annotator'] = 'C4'

#Odds ratios
c4_or = or_lr(X,y)

c4_lr

In [None]:
#Find avg ORs per hour - C4

c4_1hr = (c4_or[0][0] + c4_or[1][0] + c4_or[2][0] +  c4_or[3][0] + c4_or[4][0])/5
c4_2hr = (c4_or[0][1] + c4_or[1][1] + c4_or[2][1] + c4_or[3][1] + c4_or[4][1])/5
c4_3hr = (c4_or[0][2] + c4_or[1][2] + c4_or[2][2] + c4_or[3][2] + c4_or[4][2])/5
c4_4hr = (c4_or[0][3] + c4_or[1][3] + c4_or[2][3] + c4_or[3][3] + c4_or[4][3])/5
c4_5hr = (c4_or[0][4] + c4_or[1][4] + c4_or[2][4] + c4_or[3][4] + c4_or[4][4])/5

c4_or_avg = [c4_1hr, c4_2hr, c4_3hr, c4_4hr, c4_5hr]

#Display as DF
feature_names = ['c4_1hr_before','c4_2hrs_before','c4_3hrs_before','c4_4hrs_before','c4_5hrs_before']

c4_or_table = pd.DataFrame(feature_names, columns = ['Feature'])
c4_or_table['C4'] = c4_or_avg
c4_or_table['Feature'] = c4_or_table["Feature"].str[3:]
c4_or_table

In [None]:
#LR - C5

array = c5_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float) 
y = y.astype(int) 
y = le.fit_transform(y)

#Model Output
c5_lr = do_cv_learning_lr(X,y)
c5_lr['Annotator'] = 'C5'

#Odds ratios
c5_or = or_lr(X,y)

c5_lr

In [None]:
#Find avg ORs per hour - C5

c5_1hr = (c5_or[0][0] + c5_or[1][0] + c5_or[2][0] +  c5_or[3][0] + c5_or[4][0])/5
c5_2hr = (c5_or[0][1] + c5_or[1][1] + c5_or[2][1] + c5_or[3][1] + c5_or[4][1])/5
c5_3hr = (c5_or[0][2] + c5_or[1][2] + c5_or[2][2] + c5_or[3][2] + c5_or[4][2])/5
c5_4hr = (c5_or[0][3] + c5_or[1][3] + c5_or[2][3] + c5_or[3][3] + c5_or[4][3])/5
c5_5hr = (c5_or[0][4] + c5_or[1][4] + c5_or[2][4] + c5_or[3][4] + c5_or[4][4])/5

c5_or_avg = [c5_1hr, c5_2hr, c5_3hr, c5_4hr, c5_5hr]

#Display as DF
feature_names = ['c5_1hr_before','c5_2hrs_before','c5_3hrs_before','c5_4hrs_before','c5_5hrs_before']

c5_or_table = pd.DataFrame(feature_names, columns = ['Feature'])
c5_or_table['C5'] = c5_or_avg
c5_or_table['Feature'] = c5_or_table["Feature"].str[3:]
c5_or_table

In [None]:
#LR - C6

array = c6_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float) 
y = y.astype(int) 
y = le.fit_transform(y)

#Model Output
c6_lr = do_cv_learning_lr(X,y)
c6_lr['Annotator'] = 'C6'

#Odds ratios
c6_or = or_lr(X,y)

c6_lr

In [None]:
#Find avg ORs per hour - C6

c6_1hr = (c6_or[0][0] + c6_or[1][0] + c6_or[2][0] +  c6_or[3][0] + c6_or[4][0])/5
c6_2hr = (c6_or[0][1] + c6_or[1][1] + c6_or[2][1] + c6_or[3][1] + c6_or[4][1])/5
c6_3hr = (c6_or[0][2] + c6_or[1][2] + c6_or[2][2] + c6_or[3][2] + c6_or[4][2])/5
c6_4hr = (c6_or[0][3] + c6_or[1][3] + c6_or[2][3] + c6_or[3][3] + c6_or[4][3])/5
c6_5hr = (c6_or[0][4] + c6_or[1][4] + c6_or[2][4] + c6_or[3][4] + c6_or[4][4])/5

c6_or_avg = [c6_1hr, c6_2hr, c6_3hr, c6_4hr, c6_5hr]

#Display as DF
feature_names = ['c6_1hr_before','c6_2hrs_before','c6_3hrs_before','c6_4hrs_before','c6_5hrs_before']

c6_or_table = pd.DataFrame(feature_names, columns = ['Feature'])
c6_or_table['C6'] = c6_or_avg
c6_or_table['Feature'] = c6_or_table["Feature"].str[3:]
c6_or_table

In [None]:
#LR - C7

array = c7_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float) 
y = y.astype(int) 
y = le.fit_transform(y)

#Model Output
c7_lr = do_cv_learning_lr(X,y)
c7_lr['Annotator'] = 'C7'

#Odds ratios
c7_or = or_lr(X,y)

c7_lr

In [None]:
#Find avg ORs per hour - C7

c7_1hr = (c7_or[0][0] + c7_or[1][0] + c7_or[2][0] +  c7_or[3][0] + c7_or[4][0])/5
c7_2hr = (c7_or[0][1] + c7_or[1][1] + c7_or[2][1] + c7_or[3][1] + c7_or[4][1])/5
c7_3hr = (c7_or[0][2] + c7_or[1][2] + c7_or[2][2] + c7_or[3][2] + c7_or[4][2])/5
c7_4hr = (c7_or[0][3] + c7_or[1][3] + c7_or[2][3] + c7_or[3][3] + c7_or[4][3])/5
c7_5hr = (c7_or[0][4] + c7_or[1][4] + c7_or[2][4] + c7_or[3][4] + c7_or[4][4])/5

c7_or_avg = [c7_1hr, c7_2hr, c7_3hr, c7_4hr, c7_5hr]

#Display as DF
feature_names = ['c7_1hr_before','c7_2hrs_before','c7_3hrs_before','c7_4hrs_before','c7_5hrs_before']

c7_or_table = pd.DataFrame(feature_names, columns = ['Feature'])
c7_or_table['C7'] = c7_or_avg
c7_or_table['Feature'] = c7_or_table["Feature"].str[3:]
c7_or_table

In [None]:
#LR - C8

array = c8_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float) 
y = y.astype(int) 
y = le.fit_transform(y)

#Model Output
c8_lr = do_cv_learning_lr(X,y)
c8_lr['Annotator'] = 'C8'

#Odds ratios
c8_or = or_lr(X,y)

c8_lr

In [None]:
#Find avg ORs per hour - C8

c8_1hr = (c8_or[0][0] + c8_or[1][0] + c8_or[2][0] +  c8_or[3][0] + c8_or[4][0])/5
c8_2hr = (c8_or[0][1] + c8_or[1][1] + c8_or[2][1] + c8_or[3][1] + c8_or[4][1])/5
c8_3hr = (c8_or[0][2] + c8_or[1][2] + c8_or[2][2] + c8_or[3][2] + c8_or[4][2])/5
c8_4hr = (c8_or[0][3] + c8_or[1][3] + c8_or[2][3] + c8_or[3][3] + c8_or[4][3])/5
c8_5hr = (c8_or[0][4] + c8_or[1][4] + c8_or[2][4] + c8_or[3][4] + c8_or[4][4])/5

c8_or_avg = [c8_1hr, c8_2hr, c8_3hr, c8_4hr, c8_5hr]

#Display as DF
feature_names = ['c8_1hr_before','c8_2hrs_before','c8_3hrs_before','c8_4hrs_before','c8_5hrs_before']

c8_or_table = pd.DataFrame(feature_names, columns = ['Feature'])
c8_or_table['C8'] = c8_or_avg
c8_or_table['Feature'] = c8_or_table["Feature"].str[3:]
c8_or_table

In [None]:
#LR - C9

array = c9_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float) 
y = y.astype(int) 
y = le.fit_transform(y)

#Model Output
c9_lr = do_cv_learning_lr(X,y)
c9_lr['Annotator'] = 'C9'

#Odds ratios
c9_or = or_lr(X,y)

c9_lr

In [None]:
#Find avg ORs per hour - C9

c9_1hr = (c9_or[0][0] + c9_or[1][0] + c9_or[2][0] +  c9_or[3][0] + c9_or[4][0])/5
c9_2hr = (c9_or[0][1] + c9_or[1][1] + c9_or[2][1] + c9_or[3][1] + c9_or[4][1])/5
c9_3hr = (c9_or[0][2] + c9_or[1][2] + c9_or[2][2] + c9_or[3][2] + c9_or[4][2])/5
c9_4hr = (c9_or[0][3] + c9_or[1][3] + c9_or[2][3] + c9_or[3][3] + c9_or[4][3])/5
c9_5hr = (c9_or[0][4] + c9_or[1][4] + c9_or[2][4] + c9_or[3][4] + c9_or[4][4])/5

c9_or_avg = [c9_1hr, c9_2hr, c9_3hr, c9_4hr, c9_5hr]

#Display as DF
feature_names = ['c9_1hr_before','c9_2hrs_before','c9_3hrs_before','c9_4hrs_before','c9_5hrs_before']

c9_or_table = pd.DataFrame(feature_names, columns = ['Feature'])
c9_or_table['C9'] = c9_or_avg
c9_or_table['Feature'] = c9_or_table["Feature"].str[3:]
c9_or_table

In [None]:
#LR - C10

array = c10_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float) 
y = y.astype(int) 
y = le.fit_transform(y)

#Model Output
c10_lr = do_cv_learning_lr(X,y)
c10_lr['Annotator'] = 'C10'

#Odds ratios
c10_or = or_lr(X,y)

c10_lr

In [None]:
#Find avg ORs per hour - C10

c10_1hr = (c10_or[0][0] + c10_or[1][0] + c10_or[2][0] +  c10_or[3][0] + c10_or[4][0])/5
c10_2hr = (c10_or[0][1] + c10_or[1][1] + c10_or[2][1] + c10_or[3][1] + c10_or[4][1])/5
c10_3hr = (c10_or[0][2] + c10_or[1][2] + c10_or[2][2] + c10_or[3][2] + c10_or[4][2])/5
c10_4hr = (c10_or[0][3] + c10_or[1][3] + c10_or[2][3] + c10_or[3][3] + c10_or[4][3])/5
c10_5hr = (c10_or[0][4] + c10_or[1][4] + c10_or[2][4] + c10_or[3][4] + c10_or[4][4])/5

c10_or_avg = [c10_1hr, c10_2hr, c10_3hr, c10_4hr, c10_5hr]

#Display as DF
feature_names = ['c10_1hr_before','c10_2hrs_before','c10_3hrs_before','c10_4hrs_before','c10_5hrs_before']

c10_or_table = pd.DataFrame(feature_names, columns = ['Feature'])
c10_or_table['C10'] = c10_or_avg
c10_or_table['Feature'] = c10_or_table["Feature"].str[4:]
c10_or_table

In [None]:
#LR - C11

array = c11_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float) 
y = y.astype(int) 
y = le.fit_transform(y)

#Model Output
c11_lr = do_cv_learning_lr(X,y)
c11_lr['Annotator'] = 'C11'

#Odds ratios
c11_or = or_lr(X,y)

c11_lr

In [None]:
#Find avg ORs per hour - C11

c11_1hr = (c11_or[0][0] + c11_or[1][0] + c11_or[2][0] +  c11_or[3][0] + c11_or[4][0])/5
c11_2hr = (c11_or[0][1] + c11_or[1][1] + c11_or[2][1] + c11_or[3][1] + c11_or[4][1])/5
c11_3hr = (c11_or[0][2] + c11_or[1][2] + c11_or[2][2] + c11_or[3][2] + c11_or[4][2])/5
c11_4hr = (c11_or[0][3] + c11_or[1][3] + c11_or[2][3] + c11_or[3][3] + c11_or[4][3])/5
c11_5hr = (c11_or[0][4] + c11_or[1][4] + c11_or[2][4] + c11_or[3][4] + c11_or[4][4])/5

c11_or_avg = [c11_1hr, c11_2hr, c11_3hr, c11_4hr, c11_5hr]

#Display as DF
feature_names = ['c11_1hr_before','c11_2hrs_before','c11_3hrs_before','c11_4hrs_before','c11_5hrs_before']

c11_or_table = pd.DataFrame(feature_names, columns = ['Feature'])
c11_or_table['C11'] = c11_or_avg
c11_or_table['Feature'] = c11_or_table["Feature"].str[4:]
c11_or_table

In [None]:
#LR - MV

array = mv_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float) 
y = y.astype(int) 
y = le.fit_transform(y)

#Model Output
mv_lr = do_cv_learning_lr(X,y)
mv_lr['Annotator'] = 'MV'

#Odds ratios
mv_or = or_lr(X,y)

mv_lr

In [None]:
#Find avg ORs per hour - MV

mv_1hr = (mv_or[0][0] + mv_or[1][0] + mv_or[2][0] +  mv_or[3][0] + mv_or[4][0])/5
mv_2hr = (mv_or[0][1] + mv_or[1][1] + mv_or[2][1] + mv_or[3][1] + mv_or[4][1])/5
mv_3hr = (mv_or[0][2] + mv_or[1][2] + mv_or[2][2] + mv_or[3][2] + mv_or[4][2])/5
mv_4hr = (mv_or[0][3] + mv_or[1][3] + mv_or[2][3] + mv_or[3][3] + mv_or[4][3])/5
mv_5hr = (mv_or[0][4] + mv_or[1][4] + mv_or[2][4] + mv_or[3][4] + mv_or[4][4])/5

mv_or_avg = [mv_1hr, mv_2hr, mv_3hr, mv_4hr, mv_5hr]

#Display as DF
feature_names = ['mv_1hr_before','mv_2hrs_before','mv_3hrs_before','mv_4hrs_before','mv_5hrs_before']

mv_or_table = pd.DataFrame(feature_names, columns = ['Feature'])
mv_or_table['MV'] = mv_or_avg
mv_or_table['Feature'] = mv_or_table["Feature"].str[3:]
mv_or_table

In [None]:
#LR - TMV

array = tmv_mlval.to_numpy()
X = array[:,1:6]  
y = array[:,6]

X = X.astype(float) 
y = y.astype(int) 
y = le.fit_transform(y)

#Model Output
tmv_lr = do_cv_learning_lr(X,y)
tmv_lr['Annotator'] = 'TMV'

#Odds ratios
tmv_or = or_lr(X,y)

tmv_lr

In [None]:
#Find avg ORs per hour - TMV

tmv_1hr = (tmv_or[0][0] + tmv_or[1][0] + tmv_or[2][0] +  tmv_or[3][0] + tmv_or[4][0])/5
tmv_2hr = (tmv_or[0][1] + tmv_or[1][1] + tmv_or[2][1] + tmv_or[3][1] + tmv_or[4][1])/5
tmv_3hr = (tmv_or[0][2] + tmv_or[1][2] + tmv_or[2][2] + tmv_or[3][2] + tmv_or[4][2])/5
tmv_4hr = (tmv_or[0][3] + tmv_or[1][3] + tmv_or[2][3] + tmv_or[3][3] + tmv_or[4][3])/5
tmv_5hr = (tmv_or[0][4] + tmv_or[1][4] + tmv_or[2][4] + tmv_or[3][4] + tmv_or[4][4])/5

tmv_or_avg = [tmv_1hr, tmv_2hr, tmv_3hr, tmv_4hr, tmv_5hr]

#Display as DF
feature_names = ['tmv_1hr_before','tmv_2hrs_before','tmv_3hrs_before','tmv_4hrs_before','tmv_5hrs_before']

tmv_or_table = pd.DataFrame(feature_names, columns = ['Feature'])
tmv_or_table['TMV'] = tmv_or_avg
tmv_or_table['Feature'] = tmv_or_table["Feature"].str[4:]
tmv_or_table

In [None]:
#Ext Validation: LR - Performance Summary

frames = [c1_lr, c2_lr, c3_lr, c4_lr, c5_lr, c6_lr, c7_lr, c8_lr, c9_lr, c10_lr, c11_lr, mv_lr, tmv_lr]

multi_lr = pd.concat(frames)
print(multi_lr.shape)
multi_lr

In [None]:
#External Validation: LR Odds Ratios - Summary

ann_ORs = c1_or_table.merge(c2_or_table,on=['Feature']).merge(c3_or_table,on=['Feature']).merge(c4_or_table,on=['Feature']).merge(c5_or_table,on=['Feature']).merge(c6_or_table,on=['Feature']).merge(c7_or_table,on=['Feature']).merge(c8_or_table,on=['Feature']).merge(c9_or_table,on=['Feature']).merge(c10_or_table,on=['Feature']).merge(c11_or_table,on=['Feature']).merge(mv_or_table,on=['Feature']).merge(tmv_or_table,on=['Feature'])
ann_ORs = ann_ORs.transpose().reset_index()
ann_ORs = ann_ORs.rename(columns=ann_ORs.iloc[0])
ann_ORs = ann_ORs.iloc[1: , :]
ann_ORs.rename(columns = {'Feature':'Annotator'}, inplace = True)
ann_ORs

In [None]:
#Plot chart - Logistic Regresison: Static VS Temporal

plt.style.use('ggplot')

#Define x and y data
x1 = multi_ext_stat['Annotator']
y1 = multi_ex2_pred['F1_micro']
y2 = multi_neut_pred['F1_micro']
y3 = multi_ext_stat['F1_micro']

#Plot chart data
plt.figure(figsize=(10,3.5))
plt.plot(x1, y1, color='#1F57C8', marker='o', linestyle="solid", label='Temporal WS (Extreme)')
plt.plot(x1, y2, color='#DA4802', marker='o', linestyle="solid", label='Temporal WS (Neutral)')
plt.plot(x1, y3, color='#62B463', marker='o', linestyle="solid", label='Static')

plt.yticks(np.arange(0.0,1.01, 0.2))

#Add title and labels
plt.title('HiRID External Validation: Static vs Temporal', fontsize=14)
plt.xlabel('Annotator', fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=14)
plt.ylabel('F1 micro', fontsize=14)
plt.grid(True)
plt.legend(bbox_to_anchor=(1, 1), fontsize=11)
plt.tight_layout()

plt.show()

In [None]:
#Plot chart - Model Calibration (DT & LR)

plt.style.use('ggplot')

#Define x and y data
x1 = multi_ext_stat['Annotator']
y4 = multi_dt['F1_micro']
y5 = multi_lr['F1_micro']

#Plot chart data
plt.figure(figsize=(10,3.5))
plt.plot(x1, y4, color='#9467BD', marker='o', linestyle="solid", label='Decision Tree')
plt.plot(x1, y5, color='#ffa500', marker='o', linestyle="solid", label='Logistic Regression')

plt.yticks(np.arange(0.0,1.01, 0.2))

#Add title and labels
plt.title('HiRID Futher Analysis: Model Calibration', fontsize=16)
plt.xlabel('Annotator', fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=14)
plt.ylabel('F1 micro', fontsize=14)
plt.grid(True)
plt.legend(fontsize=13, loc=4)
plt.tight_layout()

plt.show()

In [None]:
ann_ORs

In [None]:
#Plot Feature Importance

##create dataframe to plot
index = ann_ORs['Annotator'].to_list()

a1hr_before = ann_ORs['1hr_before'].to_list()
a2hrs_before = ann_ORs['2hrs_before'].to_list()
a3hrs_before = ann_ORs['3hrs_before'].to_list()
a4hrs_before = ann_ORs['4hrs_before'].to_list()
a5hrs_before = ann_ORs['5hrs_before'].to_list()

df = pd.DataFrame({'1hr before discharge/death': a1hr_before,
                  '2hrs before discharge/death': a2hrs_before,
                  '3hrs before discharge/death': a3hrs_before,
                  '4hrs before discharge/death': a4hrs_before,
                  '5hrs before discharge/death': a5hrs_before}, index=index)

##plot grouped bar chart
ax = df.plot.bar(rot=0, color={'1hr before discharge/death': '#FFA319FF', '2hrs before discharge/death': '#CD534CFF', 
                               '3hrs before discharge/death': '#1f77b4', '4hrs before discharge/death': '#62B463',
                               '5hrs before discharge/death': '#9467BD'}, width=0.7)

##set Figure size
fig = ax.get_figure()
fig.set_size_inches(12, 3.5)

##set title and axis labels
ax.set_title("Odds Ratio distributions for HiRID predicted labels 1-5hrs before discharge/death", fontsize=15)
ax.set_xlabel("Annotator", fontsize=14)
ax.set_xticklabels(index,fontsize=14)
ax.set_ylabel("Odds Ratio", fontsize=14)
ax.axhline(y=1, color='#000000', linestyle='-')
plt.legend(loc=(1.02,0), fontsize=12)
plt.tight_layout()


## 4. External Validation - IAA Metrics

In [None]:
true_discharge = hirid_stat.copy(deep=True)
true_discharge = true_discharge[['patientid','discharge_status']]
true_discharge

### 4.1 Extreme 1 cut-off - Discharged Alive

In [None]:
ann_ex1_pred

In [None]:
#Select only Discharged alive - from ex1 cut-off

true_alive_ex1 = ann_ex1_pred.copy(deep=True)
true_alive_ex1 = pd.merge(true_alive_ex1, true_discharge, on='patientid')
true_alive_ex1 = true_alive_ex1[true_alive_ex1['discharge_status']=='alive']
true_alive_ex1.columns = ['patientid', 'C1', 'C2','C3','C4','C5','C6','C7','C8','C9','C10','C11','MV','TMV','discharge_status']
true_alive_ex1 = true_alive_ex1.drop(['patientid','MV','TMV','discharge_status'], axis = 1)
true_alive_ex1 = true_alive_ex1.applymap(str)
true_alive_ex1

In [None]:
#Calculate pairwise Cohen's kappa (Discharged Alive only)

c1_pred = true_alive_ex1.iloc[:,0]
c2_pred = true_alive_ex1.iloc[:,1]
c3_pred = true_alive_ex1.iloc[:,2]
c4_pred = true_alive_ex1.iloc[:,3]
c5_pred = true_alive_ex1.iloc[:,4]
c6_pred = true_alive_ex1.iloc[:,5]
c7_pred = true_alive_ex1.iloc[:,6]
c8_pred = true_alive_ex1.iloc[:,7]
c9_pred = true_alive_ex1.iloc[:,8]
c10_pred = true_alive_ex1.iloc[:,9]
c11_pred = true_alive_ex1.iloc[:,10]

c1_c2 = round(cohen_kappa_score(c1_pred, c2_pred),2)
c1_c3 = round(cohen_kappa_score(c1_pred, c3_pred),2)
c1_c4 = round(cohen_kappa_score(c1_pred, c4_pred),2)
c1_c5 = round(cohen_kappa_score(c1_pred, c5_pred),2)
c1_c6 = round(cohen_kappa_score(c1_pred, c6_pred),2)
c1_c7 = round(cohen_kappa_score(c1_pred, c7_pred),2)
c1_c8 = round(cohen_kappa_score(c1_pred, c8_pred),2)
c1_c9 = round(cohen_kappa_score(c1_pred, c9_pred),2)
c1_c10 = round(cohen_kappa_score(c1_pred, c10_pred),2)
c1_c11 = round(cohen_kappa_score(c1_pred, c11_pred),2)

c2_c3 = round(cohen_kappa_score(c2_pred, c3_pred),2)
c2_c4 = round(cohen_kappa_score(c2_pred, c4_pred),2)
c2_c5 = round(cohen_kappa_score(c2_pred, c5_pred),2)
c2_c6 = round(cohen_kappa_score(c2_pred, c6_pred),2)
c2_c7 = round(cohen_kappa_score(c2_pred, c7_pred),2)
c2_c8 = round(cohen_kappa_score(c2_pred, c8_pred),2)
c2_c9 = round(cohen_kappa_score(c2_pred, c9_pred),2)
c2_c10 = round(cohen_kappa_score(c2_pred, c10_pred),2)
c2_c11 = round(cohen_kappa_score(c2_pred, c11_pred),2)

c3_c4 = round(cohen_kappa_score(c3_pred, c4_pred),2)
c3_c5 = round(cohen_kappa_score(c3_pred, c5_pred),2)
c3_c6 = round(cohen_kappa_score(c3_pred, c6_pred),2)
c3_c7 = round(cohen_kappa_score(c3_pred, c7_pred),2)
c3_c8 = round(cohen_kappa_score(c3_pred, c8_pred),2)
c3_c9 = round(cohen_kappa_score(c3_pred, c9_pred),2)
c3_c10 = round(cohen_kappa_score(c3_pred, c10_pred),2)
c3_c11 = round(cohen_kappa_score(c3_pred, c11_pred),2)

c4_c5 = round(cohen_kappa_score(c4_pred, c5_pred),2)
c4_c6 = round(cohen_kappa_score(c4_pred, c6_pred),2)
c4_c7 = round(cohen_kappa_score(c4_pred, c7_pred),2)
c4_c8 = round(cohen_kappa_score(c4_pred, c8_pred),2)
c4_c9 = round(cohen_kappa_score(c4_pred, c9_pred),2)
c4_c10 = round(cohen_kappa_score(c4_pred, c10_pred),2)
c4_c11 = round(cohen_kappa_score(c4_pred, c11_pred),2)

c5_c6 = round(cohen_kappa_score(c5_pred, c6_pred),2)
c5_c7 = round(cohen_kappa_score(c5_pred, c7_pred),2)
c5_c8 = round(cohen_kappa_score(c5_pred, c8_pred),2)
c5_c9 = round(cohen_kappa_score(c5_pred, c9_pred),2)
c5_c10 = round(cohen_kappa_score(c5_pred, c10_pred),2)
c5_c11 = round(cohen_kappa_score(c5_pred, c11_pred),2)

c6_c7 = round(cohen_kappa_score(c6_pred, c7_pred),2)
c6_c8 = round(cohen_kappa_score(c6_pred, c8_pred),2)
c6_c9 = round(cohen_kappa_score(c6_pred, c9_pred),2)
c6_c10 = round(cohen_kappa_score(c6_pred, c10_pred),2)
c6_c11 = round(cohen_kappa_score(c6_pred, c11_pred),2)

c7_c8 = round(cohen_kappa_score(c7_pred, c8_pred),2)
c7_c9 = round(cohen_kappa_score(c7_pred, c9_pred),2)
c7_c10 = round(cohen_kappa_score(c7_pred, c10_pred),2)
c7_c11 = round(cohen_kappa_score(c7_pred, c11_pred),2)

c8_c9 = round(cohen_kappa_score(c8_pred, c9_pred),2)
c8_c10 = round(cohen_kappa_score(c8_pred, c10_pred),2)
c8_c11 = round(cohen_kappa_score(c8_pred, c11_pred),2)

c9_c10 = round(cohen_kappa_score(c9_pred, c10_pred),2)
c9_c11 = round(cohen_kappa_score(c9_pred, c11_pred),2)

c10_c11 = round(cohen_kappa_score(c10_pred, c11_pred),2)

In [None]:
#Pairwise Cohen's kappa (Discharged Alive only)

C0 = ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11"]
C1 = [1.00, c1_c2, c1_c3, c1_c4, c1_c5, c1_c6, c1_c7, c1_c8, c1_c9, c1_c10, c1_c11]
C2 = ["", 1.00, c2_c3, c2_c4, c2_c5, c2_c6, c2_c7, c2_c8, c2_c9, c2_c10, c2_c11]
C3 = ["", "", 1.00, c3_c4, c3_c5, c3_c6, c3_c7, c3_c8, c3_c9, c3_c10, c3_c11]
C4 = ["", "", "", 1.00, c4_c5, c4_c6, c4_c7, c4_c8, c4_c9, c4_c10, c4_c11]
C5 = ["", "", "", "", 1.00, c5_c6, c5_c7, c5_c8, c5_c9, c5_c10, c5_c11]
C6 = ["", "", "", "", "", 1.00, c6_c7, c6_c8, c6_c9, c6_c10, c6_c11]
C7 = ["", "", "", "", "", "", 1.00, c7_c8, c7_c9, c7_c10, c7_c11]
C8 = ["", "", "", "", "", "", "", 1.00, c8_c9, c8_c10, c8_c11]
C9 = ["", "", "", "", "", "", "", "", 1.00, c9_c10, c9_c11]
C10 = ["", "", "", "", "", "", "", "", "", 1.00, c10_c11]
C11 = ["", "", "", "", "", "" , "", "", "", "", 1.00]

C0 = pd.DataFrame(data=C0)
C1 = pd.DataFrame(data=C1)
C2 = pd.DataFrame(data=C2)
C3 = pd.DataFrame(data=C3)
C4 = pd.DataFrame(data=C4)
C5 = pd.DataFrame(data=C5)
C6 = pd.DataFrame(data=C6)
C7 = pd.DataFrame(data=C7)
C8 = pd.DataFrame(data=C8)
C9 = pd.DataFrame(data=C9)
C10 = pd.DataFrame(data=C10)
C11 = pd.DataFrame(data=C11)

C0.columns = [""]
C1.columns = ['C1']
C2.columns = ['C2']
C3.columns = ['C3']
C4.columns = ['C4']
C5.columns = ['C5']
C6.columns = ['C6']
C7.columns = ['C7']
C8.columns = ['C8']
C9.columns = ['C9']
C10.columns = ['C10']
C11.columns = ['C11']

frames = [C0,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11]

cohen_k_ex1_alive = pd.concat(frames, axis=1)
cohen_k_ex1_alive = cohen_k_ex1_alive.set_index("")

cohen_k_ex1_alive

In [None]:
#Plot pairwise Cohen's kappa (Discharged Alive only)

import seaborn as sns

cols = ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11"]
cohen_k_ex1_alive[cols] = cohen_k_ex1_alive[cols].apply(pd.to_numeric)

fig = plt.figure(num=None, figsize=(8, 5), dpi=80, facecolor='w', edgecolor='k')

res = sns.heatmap(cohen_k_ex1_alive, annot=True, vmin=0, vmax=1, 
                  fmt='.2f', cmap="YlGnBu", annot_kws={"fontsize":15})

res.set_xticklabels(res.get_xmajorticklabels(), fontsize = 15)
res.set_yticklabels(res.get_ymajorticklabels(), fontsize = 15)

plt.tight_layout()
plt.show()

#range cohen's k: -0.00 to 0.81

In [None]:
#Calculate average pairwise cohen's kappa values (Discharge alived)
 
#data
sample = [c1_c2, c1_c3, c1_c4, c1_c5, c1_c6, c1_c7, c1_c8, c1_c9, c1_c10, c1_c11,
          c2_c3, c2_c4, c2_c5, c2_c6, c2_c7, c2_c8, c2_c9, c2_c10, c2_c11,
          c3_c4, c3_c5, c3_c6, c3_c7, c3_c8, c3_c9, c3_c10, c3_c11, 
          c4_c5, c4_c6, c4_c7, c4_c8, c4_c9, c4_c10, c4_c11, 
          c5_c6, c5_c7, c5_c8, c5_c9, c5_c10, c5_c11,  
          c6_c7, c6_c8, c6_c9, c6_c10, c6_c11, 
          c7_c8, c7_c9, c7_c10, c7_c11,  
          c8_c9, c8_c10, c8_c11,  
          c9_c10, c9_c11,  
          c10_c11]

avg = round(mean(sample),3)
sd = round(statistics.stdev(sample),3)
 
# Prints average & standard deviation
print("Average:", avg)
print("Standard Deviation:", sd)

In [None]:
#Check Fleiss' kappa for Discharged Alive

all_alive_ex1 = true_alive_ex1.copy(deep=True)
all_alive_ex1['count_alive']  = all_alive_ex1.eq('0').sum(axis=1)
all_alive_ex1['count_dead']  = all_alive_ex1.eq('1').sum(axis=1)
all_alive_ex1['count_other']  = all_alive_ex1.eq('3').sum(axis=1)

##drop unncessary cols
cols = [ 'C1','C2','C3','C4','C5','C6','C7','C8','C9','C10','C11']

all_alive_ex1 = all_alive_ex1.drop(cols, axis = 1)

all_alive_ex1

In [None]:
#Calculate Fleiss' kappa - Discharged Alive

fleiss_k_ex1_alive = round(fleiss_kappa(all_alive_ex1, method='fleiss'),3)

print("Fleiss' kappa: {:.3f}".format(fleiss_k_ex1_alive))

### 4.2 Extreme 2 cut-off - Died

In [None]:
#Select only Discharged died - from ex1 cut-off

true_died_ex1 = ann_ex1_pred.copy(deep=True)
true_died_ex1 = pd.merge(true_died_ex1, true_discharge, on='patientid')
true_died_ex1 = true_died_ex1[true_died_ex1['discharge_status']=='dead']
true_died_ex1.columns = ['patientid', 'C1', 'C2','C3','C4','C5','C6','C7','C8','C9','C10','C11','MV','TMV','discharge_status']
true_died_ex1 = true_died_ex1.drop(['patientid','MV','TMV','discharge_status'], axis = 1)
true_died_ex1 = true_died_ex1.applymap(str)
true_died_ex1

In [None]:
#Calculate pairwise Cohen's kappa (Discharged died only)

c1_pred = true_died_ex1.iloc[:,0]
c2_pred = true_died_ex1.iloc[:,1]
c3_pred = true_died_ex1.iloc[:,2]
c4_pred = true_died_ex1.iloc[:,3]
c5_pred = true_died_ex1.iloc[:,4]
c6_pred = true_died_ex1.iloc[:,5]
c7_pred = true_died_ex1.iloc[:,6]
c8_pred = true_died_ex1.iloc[:,7]
c9_pred = true_died_ex1.iloc[:,8]
c10_pred = true_died_ex1.iloc[:,9]
c11_pred = true_died_ex1.iloc[:,10]

c1_c2 = round(cohen_kappa_score(c1_pred, c2_pred),2)
c1_c3 = round(cohen_kappa_score(c1_pred, c3_pred),2)
c1_c4 = round(cohen_kappa_score(c1_pred, c4_pred),2)
c1_c5 = round(cohen_kappa_score(c1_pred, c5_pred),2)
c1_c6 = round(cohen_kappa_score(c1_pred, c6_pred),2)
c1_c7 = round(cohen_kappa_score(c1_pred, c7_pred),2)
c1_c8 = round(cohen_kappa_score(c1_pred, c8_pred),2)
c1_c9 = round(cohen_kappa_score(c1_pred, c9_pred),2)
c1_c10 = round(cohen_kappa_score(c1_pred, c10_pred),2)
c1_c11 = round(cohen_kappa_score(c1_pred, c11_pred),2)

c2_c3 = round(cohen_kappa_score(c2_pred, c3_pred),2)
c2_c4 = round(cohen_kappa_score(c2_pred, c4_pred),2)
c2_c5 = round(cohen_kappa_score(c2_pred, c5_pred),2)
c2_c6 = round(cohen_kappa_score(c2_pred, c6_pred),2)
c2_c7 = round(cohen_kappa_score(c2_pred, c7_pred),2)
c2_c8 = round(cohen_kappa_score(c2_pred, c8_pred),2)
c2_c9 = round(cohen_kappa_score(c2_pred, c9_pred),2)
c2_c10 = round(cohen_kappa_score(c2_pred, c10_pred),2)
c2_c11 = round(cohen_kappa_score(c2_pred, c11_pred),2)

c3_c4 = round(cohen_kappa_score(c3_pred, c4_pred),2)
c3_c5 = round(cohen_kappa_score(c3_pred, c5_pred),2)
c3_c6 = round(cohen_kappa_score(c3_pred, c6_pred),2)
c3_c7 = round(cohen_kappa_score(c3_pred, c7_pred),2)
c3_c8 = round(cohen_kappa_score(c3_pred, c8_pred),2)
c3_c9 = round(cohen_kappa_score(c3_pred, c9_pred),2)
c3_c10 = round(cohen_kappa_score(c3_pred, c10_pred),2)
c3_c11 = round(cohen_kappa_score(c3_pred, c11_pred),2)

c4_c5 = round(cohen_kappa_score(c4_pred, c5_pred),2)
c4_c6 = round(cohen_kappa_score(c4_pred, c6_pred),2)
c4_c7 = round(cohen_kappa_score(c4_pred, c7_pred),2)
c4_c8 = round(cohen_kappa_score(c4_pred, c8_pred),2)
c4_c9 = round(cohen_kappa_score(c4_pred, c9_pred),2)
c4_c10 = round(cohen_kappa_score(c4_pred, c10_pred),2)
c4_c11 = round(cohen_kappa_score(c4_pred, c11_pred),2)

c5_c6 = round(cohen_kappa_score(c5_pred, c6_pred),2)
c5_c7 = round(cohen_kappa_score(c5_pred, c7_pred),2)
c5_c8 = round(cohen_kappa_score(c5_pred, c8_pred),2)
c5_c9 = round(cohen_kappa_score(c5_pred, c9_pred),2)
c5_c10 = round(cohen_kappa_score(c5_pred, c10_pred),2)
c5_c11 = round(cohen_kappa_score(c5_pred, c11_pred),2)

c6_c7 = round(cohen_kappa_score(c6_pred, c7_pred),2)
c6_c8 = round(cohen_kappa_score(c6_pred, c8_pred),2)
c6_c9 = round(cohen_kappa_score(c6_pred, c9_pred),2)
c6_c10 = round(cohen_kappa_score(c6_pred, c10_pred),2)
c6_c11 = round(cohen_kappa_score(c6_pred, c11_pred),2)

c7_c8 = round(cohen_kappa_score(c7_pred, c8_pred),2)
c7_c9 = round(cohen_kappa_score(c7_pred, c9_pred),2)
c7_c10 = round(cohen_kappa_score(c7_pred, c10_pred),2)
c7_c11 = round(cohen_kappa_score(c7_pred, c11_pred),2)

c8_c9 = round(cohen_kappa_score(c8_pred, c9_pred),2)
c8_c10 = round(cohen_kappa_score(c8_pred, c10_pred),2)
c8_c11 = round(cohen_kappa_score(c8_pred, c11_pred),2)

c9_c10 = round(cohen_kappa_score(c9_pred, c10_pred),2)
c9_c11 = round(cohen_kappa_score(c9_pred, c11_pred),2)

c10_c11 = round(cohen_kappa_score(c10_pred, c11_pred),2)

In [None]:
#Pairwise Cohen's kappa (Discharged died only)

C0 = ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11"]
C1 = [1.00, c1_c2, c1_c3, c1_c4, c1_c5, c1_c6, c1_c7, c1_c8, c1_c9, c1_c10, c1_c11]
C2 = ["", 1.00, c2_c3, c2_c4, c2_c5, c2_c6, c2_c7, c2_c8, c2_c9, c2_c10, c2_c11]
C3 = ["", "", 1.00, c3_c4, c3_c5, c3_c6, c3_c7, c3_c8, c3_c9, c3_c10, c3_c11]
C4 = ["", "", "", 1.00, c4_c5, c4_c6, c4_c7, c4_c8, c4_c9, c4_c10, c4_c11]
C5 = ["", "", "", "", 1.00, c5_c6, c5_c7, c5_c8, c5_c9, c5_c10, c5_c11]
C6 = ["", "", "", "", "", 1.00, c6_c7, c6_c8, c6_c9, c6_c10, c6_c11]
C7 = ["", "", "", "", "", "", 1.00, c7_c8, c7_c9, c7_c10, c7_c11]
C8 = ["", "", "", "", "", "", "", 1.00, c8_c9, c8_c10, c8_c11]
C9 = ["", "", "", "", "", "", "", "", 1.00, c9_c10, c9_c11]
C10 = ["", "", "", "", "", "", "", "", "", 1.00, c10_c11]
C11 = ["", "", "", "", "", "" , "", "", "", "", 1.00]

C0 = pd.DataFrame(data=C0)
C1 = pd.DataFrame(data=C1)
C2 = pd.DataFrame(data=C2)
C3 = pd.DataFrame(data=C3)
C4 = pd.DataFrame(data=C4)
C5 = pd.DataFrame(data=C5)
C6 = pd.DataFrame(data=C6)
C7 = pd.DataFrame(data=C7)
C8 = pd.DataFrame(data=C8)
C9 = pd.DataFrame(data=C9)
C10 = pd.DataFrame(data=C10)
C11 = pd.DataFrame(data=C11)

C0.columns = [""]
C1.columns = ['C1']
C2.columns = ['C2']
C3.columns = ['C3']
C4.columns = ['C4']
C5.columns = ['C5']
C6.columns = ['C6']
C7.columns = ['C7']
C8.columns = ['C8']
C9.columns = ['C9']
C10.columns = ['C10']
C11.columns = ['C11']

frames = [C0,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11]

cohen_k_ex1_died = pd.concat(frames, axis=1)
cohen_k_ex1_died = cohen_k_ex1_died.set_index("")

cohen_k_ex1_died

In [None]:
#Plot pairwise Cohen's kappa (Discharged Died only)

import seaborn as sns

cols = ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11"]
cohen_k_ex1_died[cols] = cohen_k_ex1_died[cols].apply(pd.to_numeric)

fig = plt.figure(num=None, figsize=(8, 5), dpi=80, facecolor='w', edgecolor='k')

res = sns.heatmap(cohen_k_ex1_died, annot=True, vmin=0, vmax=1, 
                  fmt='.2f', cmap="YlGnBu", annot_kws={"fontsize":15})

res.set_xticklabels(res.get_xmajorticklabels(), fontsize = 15)
res.set_yticklabels(res.get_ymajorticklabels(), fontsize = 15)

plt.tight_layout()
plt.show()

#range cohen's k: -0.01 to 0.79

In [None]:
#Calculate average pairwise cohen's kappa values (Discharge alived)
 
#data
sample = [c1_c2, c1_c3, c1_c4, c1_c5, c1_c6, c1_c7, c1_c8, c1_c9, c1_c10, c1_c11,
          c2_c3, c2_c4, c2_c5, c2_c6, c2_c7, c2_c8, c2_c9, c2_c10, c2_c11,
          c3_c4, c3_c5, c3_c6, c3_c7, c3_c8, c3_c9, c3_c10, c3_c11, 
          c4_c5, c4_c6, c4_c7, c4_c8, c4_c9, c4_c10, c4_c11, 
          c5_c6, c5_c7, c5_c8, c5_c9, c5_c10, c5_c11,  
          c6_c7, c6_c8, c6_c9, c6_c10, c6_c11, 
          c7_c8, c7_c9, c7_c10, c7_c11,  
          c8_c9, c8_c10, c8_c11,  
          c9_c10, c9_c11,  
          c10_c11]

avg = round(mean(sample),3)
sd = round(statistics.stdev(sample),3)
 
# Prints average & standard deviation
print("Average:", avg)
print("Standard Deviation:", sd)

In [None]:
#Check Fleiss' kappa for Discharged died

all_died_ex1 = true_died_ex1.copy(deep=True)
all_died_ex1['count_died']  = all_died_ex1.eq('0').sum(axis=1)
all_died_ex1['count_dead']  = all_died_ex1.eq('1').sum(axis=1)
all_died_ex1['count_other']  = all_died_ex1.eq('3').sum(axis=1)

##drop unncessary cols
cols = [ 'C1','C2','C3','C4','C5','C6','C7','C8','C9','C10','C11']

all_died_ex1 = all_died_ex1.drop(cols, axis = 1)

all_died_ex1

In [None]:
#Calculate Fleiss' kappa - Discharged Alive

fleiss_k_ex1_died = round(fleiss_kappa(all_died_ex1, method='fleiss'),3)

print("Fleiss' kappa: {:.3f}".format(fleiss_k_ex1_died))

### 4.3 Extreme 2 cut-off - Discharged Alive

In [None]:
ann_ex2_pred

In [None]:
#Select only Discharged alive - from Ex2 cut-off

true_alive_ex2 = ann_ex2_pred.copy(deep=True)
true_alive_ex2 = pd.merge(true_alive_ex2, true_discharge, on='patientid')
true_alive_ex2 = true_alive_ex2[true_alive_ex2['discharge_status']=='alive']
true_alive_ex2.columns = ['patientid', 'C1', 'C2','C3','C4','C5','C6','C7','C8','C9','C10','C11','MV','TMV','discharge_status']
true_alive_ex2 = true_alive_ex2.drop(['patientid','MV','TMV','discharge_status'], axis = 1)
true_alive_ex2 = true_alive_ex2.applymap(str)
true_alive_ex2

In [None]:
#Calculate pairwise Cohen's kappa (Discharged Alive only)

c1_pred = true_alive_ex2.iloc[:,0]
c2_pred = true_alive_ex2.iloc[:,1]
c3_pred = true_alive_ex2.iloc[:,2]
c4_pred = true_alive_ex2.iloc[:,3]
c5_pred = true_alive_ex2.iloc[:,4]
c6_pred = true_alive_ex2.iloc[:,5]
c7_pred = true_alive_ex2.iloc[:,6]
c8_pred = true_alive_ex2.iloc[:,7]
c9_pred = true_alive_ex2.iloc[:,8]
c10_pred = true_alive_ex2.iloc[:,9]
c11_pred = true_alive_ex2.iloc[:,10]

c1_c2 = round(cohen_kappa_score(c1_pred, c2_pred),2)
c1_c3 = round(cohen_kappa_score(c1_pred, c3_pred),2)
c1_c4 = round(cohen_kappa_score(c1_pred, c4_pred),2)
c1_c5 = round(cohen_kappa_score(c1_pred, c5_pred),2)
c1_c6 = round(cohen_kappa_score(c1_pred, c6_pred),2)
c1_c7 = round(cohen_kappa_score(c1_pred, c7_pred),2)
c1_c8 = round(cohen_kappa_score(c1_pred, c8_pred),2)
c1_c9 = round(cohen_kappa_score(c1_pred, c9_pred),2)
c1_c10 = round(cohen_kappa_score(c1_pred, c10_pred),2)
c1_c11 = round(cohen_kappa_score(c1_pred, c11_pred),2)

c2_c3 = round(cohen_kappa_score(c2_pred, c3_pred),2)
c2_c4 = round(cohen_kappa_score(c2_pred, c4_pred),2)
c2_c5 = round(cohen_kappa_score(c2_pred, c5_pred),2)
c2_c6 = round(cohen_kappa_score(c2_pred, c6_pred),2)
c2_c7 = round(cohen_kappa_score(c2_pred, c7_pred),2)
c2_c8 = round(cohen_kappa_score(c2_pred, c8_pred),2)
c2_c9 = round(cohen_kappa_score(c2_pred, c9_pred),2)
c2_c10 = round(cohen_kappa_score(c2_pred, c10_pred),2)
c2_c11 = round(cohen_kappa_score(c2_pred, c11_pred),2)

c3_c4 = round(cohen_kappa_score(c3_pred, c4_pred),2)
c3_c5 = round(cohen_kappa_score(c3_pred, c5_pred),2)
c3_c6 = round(cohen_kappa_score(c3_pred, c6_pred),2)
c3_c7 = round(cohen_kappa_score(c3_pred, c7_pred),2)
c3_c8 = round(cohen_kappa_score(c3_pred, c8_pred),2)
c3_c9 = round(cohen_kappa_score(c3_pred, c9_pred),2)
c3_c10 = round(cohen_kappa_score(c3_pred, c10_pred),2)
c3_c11 = round(cohen_kappa_score(c3_pred, c11_pred),2)

c4_c5 = round(cohen_kappa_score(c4_pred, c5_pred),2)
c4_c6 = round(cohen_kappa_score(c4_pred, c6_pred),2)
c4_c7 = round(cohen_kappa_score(c4_pred, c7_pred),2)
c4_c8 = round(cohen_kappa_score(c4_pred, c8_pred),2)
c4_c9 = round(cohen_kappa_score(c4_pred, c9_pred),2)
c4_c10 = round(cohen_kappa_score(c4_pred, c10_pred),2)
c4_c11 = round(cohen_kappa_score(c4_pred, c11_pred),2)

c5_c6 = round(cohen_kappa_score(c5_pred, c6_pred),2)
c5_c7 = round(cohen_kappa_score(c5_pred, c7_pred),2)
c5_c8 = round(cohen_kappa_score(c5_pred, c8_pred),2)
c5_c9 = round(cohen_kappa_score(c5_pred, c9_pred),2)
c5_c10 = round(cohen_kappa_score(c5_pred, c10_pred),2)
c5_c11 = round(cohen_kappa_score(c5_pred, c11_pred),2)

c6_c7 = round(cohen_kappa_score(c6_pred, c7_pred),2)
c6_c8 = round(cohen_kappa_score(c6_pred, c8_pred),2)
c6_c9 = round(cohen_kappa_score(c6_pred, c9_pred),2)
c6_c10 = round(cohen_kappa_score(c6_pred, c10_pred),2)
c6_c11 = round(cohen_kappa_score(c6_pred, c11_pred),2)

c7_c8 = round(cohen_kappa_score(c7_pred, c8_pred),2)
c7_c9 = round(cohen_kappa_score(c7_pred, c9_pred),2)
c7_c10 = round(cohen_kappa_score(c7_pred, c10_pred),2)
c7_c11 = round(cohen_kappa_score(c7_pred, c11_pred),2)

c8_c9 = round(cohen_kappa_score(c8_pred, c9_pred),2)
c8_c10 = round(cohen_kappa_score(c8_pred, c10_pred),2)
c8_c11 = round(cohen_kappa_score(c8_pred, c11_pred),2)

c9_c10 = round(cohen_kappa_score(c9_pred, c10_pred),2)
c9_c11 = round(cohen_kappa_score(c9_pred, c11_pred),2)

c10_c11 = round(cohen_kappa_score(c10_pred, c11_pred),2)

In [None]:
#Pairwise Cohen's kappa (Discharged Alive only)

C0 = ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11"]
C1 = [1.00, c1_c2, c1_c3, c1_c4, c1_c5, c1_c6, c1_c7, c1_c8, c1_c9, c1_c10, c1_c11]
C2 = ["", 1.00, c2_c3, c2_c4, c2_c5, c2_c6, c2_c7, c2_c8, c2_c9, c2_c10, c2_c11]
C3 = ["", "", 1.00, c3_c4, c3_c5, c3_c6, c3_c7, c3_c8, c3_c9, c3_c10, c3_c11]
C4 = ["", "", "", 1.00, c4_c5, c4_c6, c4_c7, c4_c8, c4_c9, c4_c10, c4_c11]
C5 = ["", "", "", "", 1.00, c5_c6, c5_c7, c5_c8, c5_c9, c5_c10, c5_c11]
C6 = ["", "", "", "", "", 1.00, c6_c7, c6_c8, c6_c9, c6_c10, c6_c11]
C7 = ["", "", "", "", "", "", 1.00, c7_c8, c7_c9, c7_c10, c7_c11]
C8 = ["", "", "", "", "", "", "", 1.00, c8_c9, c8_c10, c8_c11]
C9 = ["", "", "", "", "", "", "", "", 1.00, c9_c10, c9_c11]
C10 = ["", "", "", "", "", "", "", "", "", 1.00, c10_c11]
C11 = ["", "", "", "", "", "" , "", "", "", "", 1.00]

C0 = pd.DataFrame(data=C0)
C1 = pd.DataFrame(data=C1)
C2 = pd.DataFrame(data=C2)
C3 = pd.DataFrame(data=C3)
C4 = pd.DataFrame(data=C4)
C5 = pd.DataFrame(data=C5)
C6 = pd.DataFrame(data=C6)
C7 = pd.DataFrame(data=C7)
C8 = pd.DataFrame(data=C8)
C9 = pd.DataFrame(data=C9)
C10 = pd.DataFrame(data=C10)
C11 = pd.DataFrame(data=C11)

C0.columns = [""]
C1.columns = ['C1']
C2.columns = ['C2']
C3.columns = ['C3']
C4.columns = ['C4']
C5.columns = ['C5']
C6.columns = ['C6']
C7.columns = ['C7']
C8.columns = ['C8']
C9.columns = ['C9']
C10.columns = ['C10']
C11.columns = ['C11']

frames = [C0,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11]

cohen_k_ex2_alive = pd.concat(frames, axis=1)
cohen_k_ex2_alive = cohen_k_ex2_alive.set_index("")

cohen_k_ex2_alive

In [None]:
#Plot pairwise Cohen's kappa (Discharged Alive only)

import seaborn as sns

cols = ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11"]
cohen_k_ex2_alive[cols] = cohen_k_ex2_alive[cols].apply(pd.to_numeric)

fig = plt.figure(num=None, figsize=(8, 5), dpi=80, facecolor='w', edgecolor='k')

res = sns.heatmap(cohen_k_ex2_alive, annot=True, vmin=0, vmax=1, 
                  fmt='.2f', cmap="YlGnBu", annot_kws={"fontsize":15})

res.set_xticklabels(res.get_xmajorticklabels(), fontsize = 15)
res.set_yticklabels(res.get_ymajorticklabels(), fontsize = 15)

plt.tight_layout()
plt.show()

#range cohen's k: 0.06 to 0.88

In [None]:
#Calculate average pairwise cohen's kappa values (Discharge alived)
 
#data
sample = [c1_c2, c1_c3, c1_c4, c1_c5, c1_c6, c1_c7, c1_c8, c1_c9, c1_c10, c1_c11,
          c2_c3, c2_c4, c2_c5, c2_c6, c2_c7, c2_c8, c2_c9, c2_c10, c2_c11,
          c3_c4, c3_c5, c3_c6, c3_c7, c3_c8, c3_c9, c3_c10, c3_c11, 
          c4_c5, c4_c6, c4_c7, c4_c8, c4_c9, c4_c10, c4_c11, 
          c5_c6, c5_c7, c5_c8, c5_c9, c5_c10, c5_c11,  
          c6_c7, c6_c8, c6_c9, c6_c10, c6_c11, 
          c7_c8, c7_c9, c7_c10, c7_c11,  
          c8_c9, c8_c10, c8_c11,  
          c9_c10, c9_c11,  
          c10_c11]

avg = round(mean(sample),3)
sd = round(statistics.stdev(sample),3)
 
# Prints average & standard deviation
print("Average:", avg)
print("Standard Deviation:", sd)

In [None]:
#Check Fleiss' kappa for Discharged Alive

all_alive_ex2 = true_alive_ex2.copy(deep=True)
all_alive_ex2['count_alive']  = all_alive_ex2.eq('0').sum(axis=1)
all_alive_ex2['count_dead']  = all_alive_ex2.eq('1').sum(axis=1)
all_alive_ex2['count_other']  = all_alive_ex2.eq('3').sum(axis=1)

##drop unncessary cols
cols = [ 'C1','C2','C3','C4','C5','C6','C7','C8','C9','C10','C11']

all_alive_ex2 = all_alive_ex2.drop(cols, axis = 1)

all_alive_ex2

In [None]:
#Calculate Fleiss' kappa - Discharged Alive

fleiss_k_ex2_alive = round(fleiss_kappa(all_alive_ex2, method='fleiss'),3)

print("Fleiss' kappa: {:.3f}".format(fleiss_k_ex2_alive))

### 4.4 Extreme 2 cut-off - Died

In [None]:
#Select only Discharged died - from Ex2 cut-off

true_died_ex2 = ann_ex2_pred.copy(deep=True)
true_died_ex2 = pd.merge(true_died_ex2, true_discharge, on='patientid')
true_died_ex2 = true_died_ex2[true_died_ex2['discharge_status']=='dead']
true_died_ex2.columns = ['patientid', 'C1', 'C2','C3','C4','C5','C6','C7','C8','C9','C10','C11','MV','TMV','discharge_status']
true_died_ex2 = true_died_ex2.drop(['patientid','MV','TMV','discharge_status'], axis = 1)
true_died_ex2 = true_died_ex2.applymap(str)
true_died_ex2

In [None]:
#Calculate pairwise Cohen's kappa (Died only)

c1_pred = true_died_ex2.iloc[:,0]
c2_pred = true_died_ex2.iloc[:,1]
c3_pred = true_died_ex2.iloc[:,2]
c4_pred = true_died_ex2.iloc[:,3]
c5_pred = true_died_ex2.iloc[:,4]
c6_pred = true_died_ex2.iloc[:,5]
c7_pred = true_died_ex2.iloc[:,6]
c8_pred = true_died_ex2.iloc[:,7]
c9_pred = true_died_ex2.iloc[:,8]
c10_pred = true_died_ex2.iloc[:,9]
c11_pred = true_died_ex2.iloc[:,10]

c1_c2 = round(cohen_kappa_score(c1_pred, c2_pred),2)
c1_c3 = round(cohen_kappa_score(c1_pred, c3_pred),2)
c1_c4 = round(cohen_kappa_score(c1_pred, c4_pred),2)
c1_c5 = round(cohen_kappa_score(c1_pred, c5_pred),2)
c1_c6 = round(cohen_kappa_score(c1_pred, c6_pred),2)
c1_c7 = round(cohen_kappa_score(c1_pred, c7_pred),2)
c1_c8 = round(cohen_kappa_score(c1_pred, c8_pred),2)
c1_c9 = round(cohen_kappa_score(c1_pred, c9_pred),2)
c1_c10 = round(cohen_kappa_score(c1_pred, c10_pred),2)
c1_c11 = round(cohen_kappa_score(c1_pred, c11_pred),2)

c2_c3 = round(cohen_kappa_score(c2_pred, c3_pred),2)
c2_c4 = round(cohen_kappa_score(c2_pred, c4_pred),2)
c2_c5 = round(cohen_kappa_score(c2_pred, c5_pred),2)
c2_c6 = round(cohen_kappa_score(c2_pred, c6_pred),2)
c2_c7 = round(cohen_kappa_score(c2_pred, c7_pred),2)
c2_c8 = round(cohen_kappa_score(c2_pred, c8_pred),2)
c2_c9 = round(cohen_kappa_score(c2_pred, c9_pred),2)
c2_c10 = round(cohen_kappa_score(c2_pred, c10_pred),2)
c2_c11 = round(cohen_kappa_score(c2_pred, c11_pred),2)

c3_c4 = round(cohen_kappa_score(c3_pred, c4_pred),2)
c3_c5 = round(cohen_kappa_score(c3_pred, c5_pred),2)
c3_c6 = round(cohen_kappa_score(c3_pred, c6_pred),2)
c3_c7 = round(cohen_kappa_score(c3_pred, c7_pred),2)
c3_c8 = round(cohen_kappa_score(c3_pred, c8_pred),2)
c3_c9 = round(cohen_kappa_score(c3_pred, c9_pred),2)
c3_c10 = round(cohen_kappa_score(c3_pred, c10_pred),2)
c3_c11 = round(cohen_kappa_score(c3_pred, c11_pred),2)

c4_c5 = round(cohen_kappa_score(c4_pred, c5_pred),2)
c4_c6 = round(cohen_kappa_score(c4_pred, c6_pred),2)
c4_c7 = round(cohen_kappa_score(c4_pred, c7_pred),2)
c4_c8 = round(cohen_kappa_score(c4_pred, c8_pred),2)
c4_c9 = round(cohen_kappa_score(c4_pred, c9_pred),2)
c4_c10 = round(cohen_kappa_score(c4_pred, c10_pred),2)
c4_c11 = round(cohen_kappa_score(c4_pred, c11_pred),2)

c5_c6 = round(cohen_kappa_score(c5_pred, c6_pred),2)
c5_c7 = round(cohen_kappa_score(c5_pred, c7_pred),2)
c5_c8 = round(cohen_kappa_score(c5_pred, c8_pred),2)
c5_c9 = round(cohen_kappa_score(c5_pred, c9_pred),2)
c5_c10 = round(cohen_kappa_score(c5_pred, c10_pred),2)
c5_c11 = round(cohen_kappa_score(c5_pred, c11_pred),2)

c6_c7 = round(cohen_kappa_score(c6_pred, c7_pred),2)
c6_c8 = round(cohen_kappa_score(c6_pred, c8_pred),2)
c6_c9 = round(cohen_kappa_score(c6_pred, c9_pred),2)
c6_c10 = round(cohen_kappa_score(c6_pred, c10_pred),2)
c6_c11 = round(cohen_kappa_score(c6_pred, c11_pred),2)

c7_c8 = round(cohen_kappa_score(c7_pred, c8_pred),2)
c7_c9 = round(cohen_kappa_score(c7_pred, c9_pred),2)
c7_c10 = round(cohen_kappa_score(c7_pred, c10_pred),2)
c7_c11 = round(cohen_kappa_score(c7_pred, c11_pred),2)

c8_c9 = round(cohen_kappa_score(c8_pred, c9_pred),2)
c8_c10 = round(cohen_kappa_score(c8_pred, c10_pred),2)
c8_c11 = round(cohen_kappa_score(c8_pred, c11_pred),2)

c9_c10 = round(cohen_kappa_score(c9_pred, c10_pred),2)
c9_c11 = round(cohen_kappa_score(c9_pred, c11_pred),2)

c10_c11 = round(cohen_kappa_score(c10_pred, c11_pred),2)

In [None]:
#Pairwise Cohen's kappa (Died only)

C0 = ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11"]
C1 = [1.00, c1_c2, c1_c3, c1_c4, c1_c5, c1_c6, c1_c7, c1_c8, c1_c9, c1_c10, c1_c11]
C2 = ["", 1.00, c2_c3, c2_c4, c2_c5, c2_c6, c2_c7, c2_c8, c2_c9, c2_c10, c2_c11]
C3 = ["", "", 1.00, c3_c4, c3_c5, c3_c6, c3_c7, c3_c8, c3_c9, c3_c10, c3_c11]
C4 = ["", "", "", 1.00, c4_c5, c4_c6, c4_c7, c4_c8, c4_c9, c4_c10, c4_c11]
C5 = ["", "", "", "", 1.00, c5_c6, c5_c7, c5_c8, c5_c9, c5_c10, c5_c11]
C6 = ["", "", "", "", "", 1.00, c6_c7, c6_c8, c6_c9, c6_c10, c6_c11]
C7 = ["", "", "", "", "", "", 1.00, c7_c8, c7_c9, c7_c10, c7_c11]
C8 = ["", "", "", "", "", "", "", 1.00, c8_c9, c8_c10, c8_c11]
C9 = ["", "", "", "", "", "", "", "", 1.00, c9_c10, c9_c11]
C10 = ["", "", "", "", "", "", "", "", "", 1.00, c10_c11]
C11 = ["", "", "", "", "", "" , "", "", "", "", 1.00]

C0 = pd.DataFrame(data=C0)
C1 = pd.DataFrame(data=C1)
C2 = pd.DataFrame(data=C2)
C3 = pd.DataFrame(data=C3)
C4 = pd.DataFrame(data=C4)
C5 = pd.DataFrame(data=C5)
C6 = pd.DataFrame(data=C6)
C7 = pd.DataFrame(data=C7)
C8 = pd.DataFrame(data=C8)
C9 = pd.DataFrame(data=C9)
C10 = pd.DataFrame(data=C10)
C11 = pd.DataFrame(data=C11)

C0.columns = [""]
C1.columns = ['C1']
C2.columns = ['C2']
C3.columns = ['C3']
C4.columns = ['C4']
C5.columns = ['C5']
C6.columns = ['C6']
C7.columns = ['C7']
C8.columns = ['C8']
C9.columns = ['C9']
C10.columns = ['C10']
C11.columns = ['C11']

frames = [C0,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11]

cohen_k_ex2_died = pd.concat(frames, axis=1)
cohen_k_ex2_died = cohen_k_ex2_died.set_index("")

cohen_k_ex2_died

In [None]:
#Plot pairwise Cohen's kappa (Died only)

import seaborn as sns

cols = ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11"]
cohen_k_ex2_died[cols] = cohen_k_ex2_died[cols].apply(pd.to_numeric)

fig = plt.figure(num=None, figsize=(8, 5), dpi=80, facecolor='w', edgecolor='k')

res = sns.heatmap(cohen_k_ex2_died, annot=True, vmin=0, vmax=1, 
                  fmt='.2f', cmap="YlGnBu", annot_kws={"fontsize":15})

res.set_xticklabels(res.get_xmajorticklabels(), fontsize = 15)
res.set_yticklabels(res.get_ymajorticklabels(), fontsize = 15)

#plt.title("Experiment 1 (RF) - Pairwise Cohen's Kappa", fontsize=19, fontweight='bold')

#plt.legend(fontsize=14)

plt.tight_layout()
plt.show()

#range cohen's k:0.09 to 0.81

In [None]:
#Calculate average pairwise cohen's kappa values (Died)
 
#data
sample = [c1_c2, c1_c3, c1_c4, c1_c5, c1_c6, c1_c7, c1_c8, c1_c9, c1_c10, c1_c11,
          c2_c3, c2_c4, c2_c5, c2_c6, c2_c7, c2_c8, c2_c9, c2_c10, c2_c11,
          c3_c4, c3_c5, c3_c6, c3_c7, c3_c8, c3_c9, c3_c10, c3_c11, 
          c4_c5, c4_c6, c4_c7, c4_c8, c4_c9, c4_c10, c4_c11, 
          c5_c6, c5_c7, c5_c8, c5_c9, c5_c10, c5_c11,  
          c6_c7, c6_c8, c6_c9, c6_c10, c6_c11, 
          c7_c8, c7_c9, c7_c10, c7_c11,  
          c8_c9, c8_c10, c8_c11,  
          c9_c10, c9_c11,  
          c10_c11]

avg = round(mean(sample),3)
sd = round(statistics.stdev(sample),3)
 
# Prints average & standard deviation
print("Average:", avg)
print("Standard Deviation:", sd)

In [None]:
#Check Fleiss' kappa for Discharged died

all_died_ex2 = true_died_ex2.copy(deep=True)
all_died_ex2['count_died']  = all_died_ex2.eq('0').sum(axis=1)
all_died_ex2['count_dead']  = all_died_ex2.eq('1').sum(axis=1)
all_died_ex2['count_other']  = all_died_ex2.eq('3').sum(axis=1)

##drop unncessary cols
cols = [ 'C1','C2','C3','C4','C5','C6','C7','C8','C9','C10','C11']

all_died_ex2 = all_died_ex2.drop(cols, axis = 1)

all_died_ex2

In [None]:
#Calculate Fleiss' kappa - Discharged Died

fleiss_k_ex2_died = round(fleiss_kappa(all_died_ex2, method='fleiss'),3)

print("Fleiss' kappa: {:.3f}".format(fleiss_k_ex2_died))

### 4.5 Neutral cut-off - Discharged Alive

In [None]:
ann_neut_pred

In [None]:
#Select only Discharged alive - from neut cut-off

true_alive_neut = ann_neut_pred.copy(deep=True)
true_alive_neut = pd.merge(true_alive_neut, true_discharge, on='patientid')
true_alive_neut = true_alive_neut[true_alive_neut['discharge_status']=='alive']
true_alive_neut.columns = ['patientid', 'C1', 'C2','C3','C4','C5','C6','C7','C8','C9','C10','C11','MV','TMV','discharge_status']
true_alive_neut = true_alive_neut.drop(['patientid','MV','TMV','discharge_status'], axis = 1)
true_alive_neut = true_alive_neut.applymap(str)
true_alive_neut

In [None]:
#Calculate pairwise Cohen's kappa (Discharged Alive only)

c1_pred = true_alive_neut.iloc[:,0]
c2_pred = true_alive_neut.iloc[:,1]
c3_pred = true_alive_neut.iloc[:,2]
c4_pred = true_alive_neut.iloc[:,3]
c5_pred = true_alive_neut.iloc[:,4]
c6_pred = true_alive_neut.iloc[:,5]
c7_pred = true_alive_neut.iloc[:,6]
c8_pred = true_alive_neut.iloc[:,7]
c9_pred = true_alive_neut.iloc[:,8]
c10_pred = true_alive_neut.iloc[:,9]
c11_pred = true_alive_neut.iloc[:,10]

c1_c2 = round(cohen_kappa_score(c1_pred, c2_pred),2)
c1_c3 = round(cohen_kappa_score(c1_pred, c3_pred),2)
c1_c4 = round(cohen_kappa_score(c1_pred, c4_pred),2)
c1_c5 = round(cohen_kappa_score(c1_pred, c5_pred),2)
c1_c6 = round(cohen_kappa_score(c1_pred, c6_pred),2)
c1_c7 = round(cohen_kappa_score(c1_pred, c7_pred),2)
c1_c8 = round(cohen_kappa_score(c1_pred, c8_pred),2)
c1_c9 = round(cohen_kappa_score(c1_pred, c9_pred),2)
c1_c10 = round(cohen_kappa_score(c1_pred, c10_pred),2)
c1_c11 = round(cohen_kappa_score(c1_pred, c11_pred),2)

c2_c3 = round(cohen_kappa_score(c2_pred, c3_pred),2)
c2_c4 = round(cohen_kappa_score(c2_pred, c4_pred),2)
c2_c5 = round(cohen_kappa_score(c2_pred, c5_pred),2)
c2_c6 = round(cohen_kappa_score(c2_pred, c6_pred),2)
c2_c7 = round(cohen_kappa_score(c2_pred, c7_pred),2)
c2_c8 = round(cohen_kappa_score(c2_pred, c8_pred),2)
c2_c9 = round(cohen_kappa_score(c2_pred, c9_pred),2)
c2_c10 = round(cohen_kappa_score(c2_pred, c10_pred),2)
c2_c11 = round(cohen_kappa_score(c2_pred, c11_pred),2)

c3_c4 = round(cohen_kappa_score(c3_pred, c4_pred),2)
c3_c5 = round(cohen_kappa_score(c3_pred, c5_pred),2)
c3_c6 = round(cohen_kappa_score(c3_pred, c6_pred),2)
c3_c7 = round(cohen_kappa_score(c3_pred, c7_pred),2)
c3_c8 = round(cohen_kappa_score(c3_pred, c8_pred),2)
c3_c9 = round(cohen_kappa_score(c3_pred, c9_pred),2)
c3_c10 = round(cohen_kappa_score(c3_pred, c10_pred),2)
c3_c11 = round(cohen_kappa_score(c3_pred, c11_pred),2)

c4_c5 = round(cohen_kappa_score(c4_pred, c5_pred),2)
c4_c6 = round(cohen_kappa_score(c4_pred, c6_pred),2)
c4_c7 = round(cohen_kappa_score(c4_pred, c7_pred),2)
c4_c8 = round(cohen_kappa_score(c4_pred, c8_pred),2)
c4_c9 = round(cohen_kappa_score(c4_pred, c9_pred),2)
c4_c10 = round(cohen_kappa_score(c4_pred, c10_pred),2)
c4_c11 = round(cohen_kappa_score(c4_pred, c11_pred),2)

c5_c6 = round(cohen_kappa_score(c5_pred, c6_pred),2)
c5_c7 = round(cohen_kappa_score(c5_pred, c7_pred),2)
c5_c8 = round(cohen_kappa_score(c5_pred, c8_pred),2)
c5_c9 = round(cohen_kappa_score(c5_pred, c9_pred),2)
c5_c10 = round(cohen_kappa_score(c5_pred, c10_pred),2)
c5_c11 = round(cohen_kappa_score(c5_pred, c11_pred),2)

c6_c7 = round(cohen_kappa_score(c6_pred, c7_pred),2)
c6_c8 = round(cohen_kappa_score(c6_pred, c8_pred),2)
c6_c9 = round(cohen_kappa_score(c6_pred, c9_pred),2)
c6_c10 = round(cohen_kappa_score(c6_pred, c10_pred),2)
c6_c11 = round(cohen_kappa_score(c6_pred, c11_pred),2)

c7_c8 = round(cohen_kappa_score(c7_pred, c8_pred),2)
c7_c9 = round(cohen_kappa_score(c7_pred, c9_pred),2)
c7_c10 = round(cohen_kappa_score(c7_pred, c10_pred),2)
c7_c11 = round(cohen_kappa_score(c7_pred, c11_pred),2)

c8_c9 = round(cohen_kappa_score(c8_pred, c9_pred),2)
c8_c10 = round(cohen_kappa_score(c8_pred, c10_pred),2)
c8_c11 = round(cohen_kappa_score(c8_pred, c11_pred),2)

c9_c10 = round(cohen_kappa_score(c9_pred, c10_pred),2)
c9_c11 = round(cohen_kappa_score(c9_pred, c11_pred),2)

c10_c11 = round(cohen_kappa_score(c10_pred, c11_pred),2)

In [None]:
#Pairwise Cohen's kappa (Discharged Alive only)

C0 = ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11"]
C1 = [1.00, c1_c2, c1_c3, c1_c4, c1_c5, c1_c6, c1_c7, c1_c8, c1_c9, c1_c10, c1_c11]
C2 = ["", 1.00, c2_c3, c2_c4, c2_c5, c2_c6, c2_c7, c2_c8, c2_c9, c2_c10, c2_c11]
C3 = ["", "", 1.00, c3_c4, c3_c5, c3_c6, c3_c7, c3_c8, c3_c9, c3_c10, c3_c11]
C4 = ["", "", "", 1.00, c4_c5, c4_c6, c4_c7, c4_c8, c4_c9, c4_c10, c4_c11]
C5 = ["", "", "", "", 1.00, c5_c6, c5_c7, c5_c8, c5_c9, c5_c10, c5_c11]
C6 = ["", "", "", "", "", 1.00, c6_c7, c6_c8, c6_c9, c6_c10, c6_c11]
C7 = ["", "", "", "", "", "", 1.00, c7_c8, c7_c9, c7_c10, c7_c11]
C8 = ["", "", "", "", "", "", "", 1.00, c8_c9, c8_c10, c8_c11]
C9 = ["", "", "", "", "", "", "", "", 1.00, c9_c10, c9_c11]
C10 = ["", "", "", "", "", "", "", "", "", 1.00, c10_c11]
C11 = ["", "", "", "", "", "" , "", "", "", "", 1.00]

C0 = pd.DataFrame(data=C0)
C1 = pd.DataFrame(data=C1)
C2 = pd.DataFrame(data=C2)
C3 = pd.DataFrame(data=C3)
C4 = pd.DataFrame(data=C4)
C5 = pd.DataFrame(data=C5)
C6 = pd.DataFrame(data=C6)
C7 = pd.DataFrame(data=C7)
C8 = pd.DataFrame(data=C8)
C9 = pd.DataFrame(data=C9)
C10 = pd.DataFrame(data=C10)
C11 = pd.DataFrame(data=C11)

C0.columns = [""]
C1.columns = ['C1']
C2.columns = ['C2']
C3.columns = ['C3']
C4.columns = ['C4']
C5.columns = ['C5']
C6.columns = ['C6']
C7.columns = ['C7']
C8.columns = ['C8']
C9.columns = ['C9']
C10.columns = ['C10']
C11.columns = ['C11']

frames = [C0,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11]

cohen_k_neut_alive = pd.concat(frames, axis=1)
cohen_k_neut_alive = cohen_k_neut_alive.set_index("")

cohen_k_neut_alive

In [None]:
#Plot pairwise Cohen's kappa (Discharged Alive only)

import seaborn as sns

cols = ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11"]
cohen_k_neut_alive[cols] = cohen_k_neut_alive[cols].apply(pd.to_numeric)

fig = plt.figure(num=None, figsize=(8, 5), dpi=80, facecolor='w', edgecolor='k')

res = sns.heatmap(cohen_k_neut_alive, annot=True, vmin=0, vmax=1, 
                  fmt='.2f', cmap="YlGnBu", annot_kws={"fontsize":15})

res.set_xticklabels(res.get_xmajorticklabels(), fontsize = 15)
res.set_yticklabels(res.get_ymajorticklabels(), fontsize = 15)

plt.tight_layout()
plt.show()

#range cohen's k: 0.05 to 0.99

In [None]:
#Calculate average pairwise cohen's kappa values (Discharge alived)
 
#data
sample = [c1_c2, c1_c3, c1_c4, c1_c5, c1_c6, c1_c7, c1_c8, c1_c9, c1_c10, c1_c11,
          c2_c3, c2_c4, c2_c5, c2_c6, c2_c7, c2_c8, c2_c9, c2_c10, c2_c11,
          c3_c4, c3_c5, c3_c6, c3_c7, c3_c8, c3_c9, c3_c10, c3_c11, 
          c4_c5, c4_c6, c4_c7, c4_c8, c4_c9, c4_c10, c4_c11, 
          c5_c6, c5_c7, c5_c8, c5_c9, c5_c10, c5_c11,  
          c6_c7, c6_c8, c6_c9, c6_c10, c6_c11, 
          c7_c8, c7_c9, c7_c10, c7_c11,  
          c8_c9, c8_c10, c8_c11,  
          c9_c10, c9_c11,  
          c10_c11]

avg = round(mean(sample),3)
sd = round(statistics.stdev(sample),3)
 
# Prints average & standard deviation
print("Average:", avg)
print("Standard Deviation:", sd)

In [None]:
#Check Fleiss' kappa for Discharged Alive

all_alive_neut = true_alive_neut.copy(deep=True)
all_alive_neut['count_alive']  = all_alive_neut.eq('0').sum(axis=1)
all_alive_neut['count_dead']  = all_alive_neut.eq('1').sum(axis=1)
all_alive_neut['count_other']  = all_alive_neut.eq('3').sum(axis=1)

##drop unncessary cols
cols = [ 'C1','C2','C3','C4','C5','C6','C7','C8','C9','C10','C11']

all_alive_neut = all_alive_neut.drop(cols, axis = 1)

all_alive_neut

In [None]:
#Calculate Fleiss' kappa - Discharged Died

fleiss_k_neut_alive = round(fleiss_kappa(all_alive_neut, method='fleiss'),3)

print("Fleiss' kappa: {:.3f}".format(fleiss_k_neut_alive))

### 4.6 Neutral cut-off - Died

In [None]:
#Select only Discharged died - from neut cut-off

true_died_neut = ann_neut_pred.copy(deep=True)
true_died_neut = pd.merge(true_died_neut, true_discharge, on='patientid')
true_died_neut = true_died_neut[true_died_neut['discharge_status']=='dead']
true_died_neut.columns = ['patientid', 'C1', 'C2','C3','C4','C5','C6','C7','C8','C9','C10','C11','MV','TMV','discharge_status']
true_died_neut = true_died_neut.drop(['patientid','MV','TMV','discharge_status'], axis = 1)
true_died_neut = true_died_neut.applymap(str)
true_died_neut

In [None]:
#Calculate pairwise Cohen's kappa (Discharged died only)

c1_pred = true_died_neut.iloc[:,0]
c2_pred = true_died_neut.iloc[:,1]
c3_pred = true_died_neut.iloc[:,2]
c4_pred = true_died_neut.iloc[:,3]
c5_pred = true_died_neut.iloc[:,4]
c6_pred = true_died_neut.iloc[:,5]
c7_pred = true_died_neut.iloc[:,6]
c8_pred = true_died_neut.iloc[:,7]
c9_pred = true_died_neut.iloc[:,8]
c10_pred = true_died_neut.iloc[:,9]
c11_pred = true_died_neut.iloc[:,10]

c1_c2 = round(cohen_kappa_score(c1_pred, c2_pred),2)
c1_c3 = round(cohen_kappa_score(c1_pred, c3_pred),2)
c1_c4 = round(cohen_kappa_score(c1_pred, c4_pred),2)
c1_c5 = round(cohen_kappa_score(c1_pred, c5_pred),2)
c1_c6 = round(cohen_kappa_score(c1_pred, c6_pred),2)
c1_c7 = round(cohen_kappa_score(c1_pred, c7_pred),2)
c1_c8 = round(cohen_kappa_score(c1_pred, c8_pred),2)
c1_c9 = round(cohen_kappa_score(c1_pred, c9_pred),2)
c1_c10 = round(cohen_kappa_score(c1_pred, c10_pred),2)
c1_c11 = round(cohen_kappa_score(c1_pred, c11_pred),2)

c2_c3 = round(cohen_kappa_score(c2_pred, c3_pred),2)
c2_c4 = round(cohen_kappa_score(c2_pred, c4_pred),2)
c2_c5 = round(cohen_kappa_score(c2_pred, c5_pred),2)
c2_c6 = round(cohen_kappa_score(c2_pred, c6_pred),2)
c2_c7 = round(cohen_kappa_score(c2_pred, c7_pred),2)
c2_c8 = round(cohen_kappa_score(c2_pred, c8_pred),2)
c2_c9 = round(cohen_kappa_score(c2_pred, c9_pred),2)
c2_c10 = round(cohen_kappa_score(c2_pred, c10_pred),2)
c2_c11 = round(cohen_kappa_score(c2_pred, c11_pred),2)

c3_c4 = round(cohen_kappa_score(c3_pred, c4_pred),2)
c3_c5 = round(cohen_kappa_score(c3_pred, c5_pred),2)
c3_c6 = round(cohen_kappa_score(c3_pred, c6_pred),2)
c3_c7 = round(cohen_kappa_score(c3_pred, c7_pred),2)
c3_c8 = round(cohen_kappa_score(c3_pred, c8_pred),2)
c3_c9 = round(cohen_kappa_score(c3_pred, c9_pred),2)
c3_c10 = round(cohen_kappa_score(c3_pred, c10_pred),2)
c3_c11 = round(cohen_kappa_score(c3_pred, c11_pred),2)

c4_c5 = round(cohen_kappa_score(c4_pred, c5_pred),2)
c4_c6 = round(cohen_kappa_score(c4_pred, c6_pred),2)
c4_c7 = round(cohen_kappa_score(c4_pred, c7_pred),2)
c4_c8 = round(cohen_kappa_score(c4_pred, c8_pred),2)
c4_c9 = round(cohen_kappa_score(c4_pred, c9_pred),2)
c4_c10 = round(cohen_kappa_score(c4_pred, c10_pred),2)
c4_c11 = round(cohen_kappa_score(c4_pred, c11_pred),2)

c5_c6 = round(cohen_kappa_score(c5_pred, c6_pred),2)
c5_c7 = round(cohen_kappa_score(c5_pred, c7_pred),2)
c5_c8 = round(cohen_kappa_score(c5_pred, c8_pred),2)
c5_c9 = round(cohen_kappa_score(c5_pred, c9_pred),2)
c5_c10 = round(cohen_kappa_score(c5_pred, c10_pred),2)
c5_c11 = round(cohen_kappa_score(c5_pred, c11_pred),2)

c6_c7 = round(cohen_kappa_score(c6_pred, c7_pred),2)
c6_c8 = round(cohen_kappa_score(c6_pred, c8_pred),2)
c6_c9 = round(cohen_kappa_score(c6_pred, c9_pred),2)
c6_c10 = round(cohen_kappa_score(c6_pred, c10_pred),2)
c6_c11 = round(cohen_kappa_score(c6_pred, c11_pred),2)

c7_c8 = round(cohen_kappa_score(c7_pred, c8_pred),2)
c7_c9 = round(cohen_kappa_score(c7_pred, c9_pred),2)
c7_c10 = round(cohen_kappa_score(c7_pred, c10_pred),2)
c7_c11 = round(cohen_kappa_score(c7_pred, c11_pred),2)

c8_c9 = round(cohen_kappa_score(c8_pred, c9_pred),2)
c8_c10 = round(cohen_kappa_score(c8_pred, c10_pred),2)
c8_c11 = round(cohen_kappa_score(c8_pred, c11_pred),2)

c9_c10 = round(cohen_kappa_score(c9_pred, c10_pred),2)
c9_c11 = round(cohen_kappa_score(c9_pred, c11_pred),2)

c10_c11 = round(cohen_kappa_score(c10_pred, c11_pred),2)

In [None]:
#Pairwise Cohen's kappa (Discharged died only)

C0 = ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11"]
C1 = [1.00, c1_c2, c1_c3, c1_c4, c1_c5, c1_c6, c1_c7, c1_c8, c1_c9, c1_c10, c1_c11]
C2 = ["", 1.00, c2_c3, c2_c4, c2_c5, c2_c6, c2_c7, c2_c8, c2_c9, c2_c10, c2_c11]
C3 = ["", "", 1.00, c3_c4, c3_c5, c3_c6, c3_c7, c3_c8, c3_c9, c3_c10, c3_c11]
C4 = ["", "", "", 1.00, c4_c5, c4_c6, c4_c7, c4_c8, c4_c9, c4_c10, c4_c11]
C5 = ["", "", "", "", 1.00, c5_c6, c5_c7, c5_c8, c5_c9, c5_c10, c5_c11]
C6 = ["", "", "", "", "", 1.00, c6_c7, c6_c8, c6_c9, c6_c10, c6_c11]
C7 = ["", "", "", "", "", "", 1.00, c7_c8, c7_c9, c7_c10, c7_c11]
C8 = ["", "", "", "", "", "", "", 1.00, c8_c9, c8_c10, c8_c11]
C9 = ["", "", "", "", "", "", "", "", 1.00, c9_c10, c9_c11]
C10 = ["", "", "", "", "", "", "", "", "", 1.00, c10_c11]
C11 = ["", "", "", "", "", "" , "", "", "", "", 1.00]

C0 = pd.DataFrame(data=C0)
C1 = pd.DataFrame(data=C1)
C2 = pd.DataFrame(data=C2)
C3 = pd.DataFrame(data=C3)
C4 = pd.DataFrame(data=C4)
C5 = pd.DataFrame(data=C5)
C6 = pd.DataFrame(data=C6)
C7 = pd.DataFrame(data=C7)
C8 = pd.DataFrame(data=C8)
C9 = pd.DataFrame(data=C9)
C10 = pd.DataFrame(data=C10)
C11 = pd.DataFrame(data=C11)

C0.columns = [""]
C1.columns = ['C1']
C2.columns = ['C2']
C3.columns = ['C3']
C4.columns = ['C4']
C5.columns = ['C5']
C6.columns = ['C6']
C7.columns = ['C7']
C8.columns = ['C8']
C9.columns = ['C9']
C10.columns = ['C10']
C11.columns = ['C11']

frames = [C0,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11]

cohen_k_neut_died = pd.concat(frames, axis=1)
cohen_k_neut_died = cohen_k_neut_died.set_index("")

cohen_k_neut_died

In [None]:
#Plot pairwise Cohen's kappa (Discharged Alive only)

import seaborn as sns

cols = ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11"]
cohen_k_neut_died[cols] = cohen_k_neut_died[cols].apply(pd.to_numeric)

fig = plt.figure(num=None, figsize=(8, 5), dpi=80, facecolor='w', edgecolor='k')

res = sns.heatmap(cohen_k_neut_died, annot=True, vmin=0, vmax=1, 
                  fmt='.2f', cmap="YlGnBu", annot_kws={"fontsize":15})

res.set_xticklabels(res.get_xmajorticklabels(), fontsize = 15)
res.set_yticklabels(res.get_ymajorticklabels(), fontsize = 15)

plt.tight_layout()
plt.show()

#range cohen's k: 0.30 to 0.96

In [None]:
#Calculate average pairwise cohen's kappa values (Discharge alived)
 
#data
sample = [c1_c2, c1_c3, c1_c4, c1_c5, c1_c6, c1_c7, c1_c8, c1_c9, c1_c10, c1_c11,
          c2_c3, c2_c4, c2_c5, c2_c6, c2_c7, c2_c8, c2_c9, c2_c10, c2_c11,
          c3_c4, c3_c5, c3_c6, c3_c7, c3_c8, c3_c9, c3_c10, c3_c11, 
          c4_c5, c4_c6, c4_c7, c4_c8, c4_c9, c4_c10, c4_c11, 
          c5_c6, c5_c7, c5_c8, c5_c9, c5_c10, c5_c11,  
          c6_c7, c6_c8, c6_c9, c6_c10, c6_c11, 
          c7_c8, c7_c9, c7_c10, c7_c11,  
          c8_c9, c8_c10, c8_c11,  
          c9_c10, c9_c11,  
          c10_c11]

avg = round(mean(sample),3)
sd = round(statistics.stdev(sample),3)
 
# Prints average & standard deviation
print("Average:", avg)
print("Standard Deviation:", sd)

In [None]:
#Check Fleiss' kappa for Discharged died

all_died_neut = true_died_neut.copy(deep=True)
all_died_neut['count_died']  = all_died_neut.eq('0').sum(axis=1)
all_died_neut['count_dead']  = all_died_neut.eq('1').sum(axis=1)
all_died_neut['count_other']  = all_died_neut.eq('3').sum(axis=1)

##drop unncessary cols
cols = [ 'C1','C2','C3','C4','C5','C6','C7','C8','C9','C10','C11']

all_died_neut = all_died_neut.drop(cols, axis = 1)

all_died_neut

In [None]:
#Calculate Fleiss' kappa - Discharged Died

fleiss_k_neut_died = round(fleiss_kappa(all_died_neut, method='fleiss'),3)

print("Fleiss' kappa: {:.3f}".format(fleiss_k_neut_died))