# Research Study: The Impact of Inconsistent Human Annotations on AI driven Clinical Decision Making

In [None]:
#Import necessary modules 

import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt

import statistics
from statistics import mean
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score 
from sklearn.metrics import make_scorer
from sklearn.metrics import f1_score

from sklearn.ensemble import RandomForestClassifier 
from sklearn.tree import plot_tree

from sklearn import metrics 
from sklearn.metrics import multilabel_confusion_matrix 
from sklearn.metrics import plot_confusion_matrix 
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV 

from sklearn.metrics import cohen_kappa_score 
from statsmodels.stats.inter_rater import fleiss_kappa 

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
#Connect to HiRID database

import psycopg2
from psycopg2 import Error

#Connect to HiRID
conn = psycopg2.connect(user="mimicuser",
                                  password="knowlabMIMIC",
                                  host="172.17.0.1",
                                  port="5433",
                                  database="HiRID")

#Cursor 
cur = conn.cursor()

## 1. Import Training Datasets

Note: All annotated datasets were provided by the data controller (Prof. Malcolm Sim) as excel files. In this section, all datasets are imported in their raw format.

In [None]:
#Define funtion to add numeric label columns to all 11 QEUH annotated datasets

def num_labels(df):

    #Add numeric multiclass Annotation column
    df['Annotation_Num'] = 0
    df.loc[df['Annotation'] == 'A', 'Annotation_Num'] = 0
    df.loc[df['Annotation'] == 'B', 'Annotation_Num'] = 1
    df.loc[df['Annotation'] == 'C', 'Annotation_Num'] = 2
    df.loc[df['Annotation'] == 'D', 'Annotation_Num'] = 3
    df.loc[df['Annotation'] == 'E', 'Annotation_Num'] = 4

    #Create binary class column: A=0, B/C/D/E = 1
    df['Ann_Bin_A'] = 0
    df.loc[df['Annotation'] == 'A', 'Ann_Bin_A'] = 0
    df.loc[df['Annotation'] == 'B', 'Ann_Bin_A'] = 1
    df.loc[df['Annotation'] == 'C', 'Ann_Bin_A'] = 1
    df.loc[df['Annotation'] == 'D', 'Ann_Bin_A'] = 1
    df.loc[df['Annotation'] == 'E', 'Ann_Bin_A'] = 1

    #Create binary class column: A/B = 0, C/D/E = 1
    df['Ann_Bin_B'] = 0
    df.loc[df['Annotation'] == 'A', 'Ann_Bin_B'] = 0
    df.loc[df['Annotation'] == 'B', 'Ann_Bin_B'] = 0
    df.loc[df['Annotation'] == 'C', 'Ann_Bin_B'] = 1
    df.loc[df['Annotation'] == 'D', 'Ann_Bin_B'] = 1
    df.loc[df['Annotation'] == 'E', 'Ann_Bin_B'] = 1

    #Create binary class column: A/B/C = 0, D/E = 1
    df['Ann_Bin_C'] = 0
    df.loc[df['Annotation'] == 'A', 'Ann_Bin_C'] = 0
    df.loc[df['Annotation'] == 'B', 'Ann_Bin_C'] = 0
    df.loc[df['Annotation'] == 'C', 'Ann_Bin_C'] = 0
    df.loc[df['Annotation'] == 'D', 'Ann_Bin_C'] = 1
    df.loc[df['Annotation'] == 'E', 'Ann_Bin_C'] = 1
    
    return df

In [None]:
#Import Consultant no.1 dataset

c1 = pd.read_excel('./p01.xlsx').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c1 = c1.drop(columns = cols)
c1 = c1.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c1['Adrenaline'] = c1['Adrenaline'].replace(np.nan, 0)
c1['Noradrenaline'] = c1['Noradrenaline'].replace(np.nan, 0)

c1 = num_labels(c1)

print(c1.shape)
c1.head()

In [None]:
#Import Consultant no.2 dataset

c2 = pd.read_csv('./p02.csv').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c2 = c2.drop(columns = cols)
c2 = c2.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c2['Adrenaline'] = c2['Adrenaline'].replace(np.nan, 0)
c2['Noradrenaline'] = c2['Noradrenaline'].replace(np.nan, 0)

c2 = num_labels(c2)

print(c2.shape)
c2.head()

In [None]:
#Import Consultant no.3 dataset

c3 = pd.read_csv('./p03.csv').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c3 = c3.drop(columns = cols)
c3 = c3.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c3['Adrenaline'] = c3['Adrenaline'].replace(np.nan, 0)
c3['Noradrenaline'] = c3['Noradrenaline'].replace(np.nan, 0)

c3 = num_labels(c3)

print(c3.shape)
c3.head()

In [None]:
#Import Consultant no.4 dataset

c4 = pd.read_excel('./p04.xlsx').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c4 = c4.drop(columns = cols)
c4 = c4.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c4['Adrenaline'] = c4['Adrenaline'].replace(np.nan, 0)
c4['Noradrenaline'] = c4['Noradrenaline'].replace(np.nan, 0)

c4 = num_labels(c4)

print(c4.shape)
c4.head()

In [None]:
#Import Consultant no.5 dataset

c5 = pd.read_csv('./p05.csv').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c5 = c5.drop(columns = cols)
c5 = c5.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c5['Adrenaline'] = c5['Adrenaline'].replace(np.nan, 0)
c5['Noradrenaline'] = c5['Noradrenaline'].replace(np.nan, 0)

c5 = num_labels(c5)

print(c5.shape)
c5.head()

In [None]:
#Import Consultant no.6 dataset

c6 = pd.read_excel('./p06.xlsx').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c6 = c6.drop(columns = cols)
c6 = c6.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c6['Adrenaline'] = c6['Adrenaline'].replace(np.nan, 0)
c6['Noradrenaline'] = c6['Noradrenaline'].replace(np.nan, 0)

c6 = num_labels(c6)

print(c6.shape)
c6.head()

In [None]:
#Import Consultant no.7 dataset

c7 = pd.read_csv('./p07.csv').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c7 = c7.drop(columns = cols)
c7 = c7.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c7['Adrenaline'] = c7['Adrenaline'].replace(np.nan, 0)
c7['Noradrenaline'] = c7['Noradrenaline'].replace(np.nan, 0)

c7 = num_labels(c7)

print(c7.shape)
c7.head()

In [None]:
#Import Consultant no.8 dataset

c8 = pd.read_csv('./p08.csv').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c8 = c8.drop(columns = cols)
c8 = c8.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c8['Adrenaline'] = c8['Adrenaline'].replace(np.nan, 0)
c8['Noradrenaline'] = c8['Noradrenaline'].replace(np.nan, 0)

c8 = num_labels(c8)

print(c8.shape)
c8.head()

In [None]:
#Import Consultant no.9 dataset

c9 = pd.read_csv('./p09.csv').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c9 = c9.drop(columns = cols)
c9 = c9.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c9['Adrenaline'] = c9['Adrenaline'].replace(np.nan, 0)
c9['Noradrenaline'] = c9['Noradrenaline'].replace(np.nan, 0)

c9 = num_labels(c9)

print(c9.shape)
c9.head()

In [None]:
#Import Consultant no.10 dataset

c10 = pd.read_csv('./p10.csv').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c10 = c10.drop(columns = cols)
c10 = c10.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c10['Adrenaline'] = c10['Adrenaline'].replace(np.nan, 0)
c10['Noradrenaline'] = c10['Noradrenaline'].replace(np.nan, 0)

c10 = num_labels(c10)

print(c10.shape)
c10.head()

In [None]:
#Import Consultant no.11 dataset

c11 = pd.read_excel('./p11.xlsx').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c11 = c11.drop(columns = cols)
c11 = c11.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c11['Adrenaline'] = c11['Adrenaline'].replace(np.nan, 0)
c11['Noradrenaline'] = c11['Noradrenaline'].replace(np.nan, 0)

c11['Annotation'] = c11['Annotation'].str.upper()

c11 = num_labels(c11)

print(c11.shape)
c11.head()

In [None]:
#Import Majority MV Consensus Dataset
##See jupyter notebook 'npjDM-MV_Consensus_Dataset' for steps to create this Majority MV Consensus Dataset

mv = pd.read_csv('MV-Consensus-Dataset.csv')

mv = mv.drop('Unnamed: 0',axis=1)
mv = mv.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
mv['Adrenaline'] = mv['Adrenaline'].replace(np.nan, 0)
mv['Noradrenaline'] = mv['Noradrenaline'].replace(np.nan, 0)

mv = num_labels(mv)

print(mv.shape)
mv.head()

In [None]:
#TMV
##Create a TMV dataset by taking the majority-vote labels across only the expert annotated datasets which generate models that have high internal validation performance (i.e., where internal F1 >= 0.7).
##See jupyter notebook 'npjDM-IntVal-Top_Models' for steps to find top performing models
##Top performaing models within internal validation: C2, C4, C8

c2_ann = pd.read_csv('./p02.csv').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c2_ann = c2_ann.drop(columns = cols)
c2_ann = c2_ann.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

c4_ann = pd.read_excel('./p04.xlsx').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c4_ann = c4_ann.drop(columns = cols)
c4_ann = c4_ann.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

c8_ann = pd.read_csv('./p08.csv').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c8_ann = c8_ann.drop(columns = cols)
c8_ann = c8_ann.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

cols = ['Adrenaline','Noradrenaline','FiO2','SpO2','MAP','HR']
ann_top = c2_ann.merge(c4_ann,on=cols).merge(c8_ann,on=cols)

ann_top.columns = ['Adrenaline','Noradrenaline','FiO2','SpO2','MAP','HR', 'c2_ann', 'c4_ann', 'c8_ann']

colsb = ['Adrenaline', 'Noradrenaline','FiO2','SpO2','MAP','HR']
ann_top.drop(colsb,axis=1,inplace=True)

ann_top['Annotation']= ann_top.mode(axis=1)[0]
colsc = ['c2_ann', 'c4_ann','c8_ann']
ann_top.drop(colsc,axis=1,inplace=True)

colsd = ['Adrenaline','Noradrenaline','FiO2','SpO2','MAP','HR']
tmv = c2_ann.merge(c4_ann,on=colsd).merge(c8_ann,on=colsd)
tmv.columns = ['Adrenaline','Noradrenaline','FiO2','SpO2','MAP','HR', 'c2_ann', 'c4_ann', 'c8_ann']

tmv = pd.concat([tmv,ann_top],axis=1)
colse = ['c2_ann', 'c4_ann','c8_ann']
tmv.drop(colse,axis=1,inplace=True)

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
tmv['Adrenaline'] = tmv['Adrenaline'].replace(np.nan, 0)
tmv['Noradrenaline'] = tmv['Noradrenaline'].replace(np.nan, 0)

tmv = num_labels(tmv)

print(tmv.shape)
tmv.head()

## 2. Investigate Inter-Annotator Agreement (IAA) - Cohen's kappa

In [None]:
#Create dataframe with compiled annotation labels across all 11 consultant annotators

c1sub = c1.iloc[:,:7]
c1sub = c1sub.rename(columns={'Annotation': 'C1'})
                            
c2sub = c2.iloc[:,:7]
c2sub = c2sub.rename(columns={'Annotation': 'C2'})

c3sub = c3.iloc[:,:7]
c3sub = c3sub.rename(columns={'Annotation': 'C3'})

c4sub = c4.iloc[:,:7]
c4sub = c4sub.rename(columns={'Annotation': 'C4'})

c5sub = c5.iloc[:,:7]
c5sub = c5sub.rename(columns={'Annotation': 'C5'})

c6sub = c6.iloc[:,:7]
c6sub = c6sub.rename(columns={'Annotation': 'C6'})

c7sub = c7.iloc[:,:7]
c7sub = c7sub.rename(columns={'Annotation': 'C7'})

c8sub = c8.iloc[:,:7]
c8sub = c8sub.rename(columns={'Annotation': 'C8'})

c9sub = c9.iloc[:,:7]
c9sub = c9sub.rename(columns={'Annotation': 'C9'})

c10sub = c10.iloc[:,:7]
c10sub = c10sub.rename(columns={'Annotation': 'C10'})

c11sub = c11.iloc[:,:7]
c11sub = c11sub.rename(columns={'Annotation': 'C11'})

cols = ['Adrenaline','Noradrenaline','FiO2','SpO2','MAP','HR']
all_ann = c1sub.merge(c2sub,on=cols).merge(c3sub,on=cols).merge(c4sub,on=cols).merge(c5sub,on=cols).merge(c6sub,on=cols).merge(c7sub,on=cols).merge(c8sub,on=cols).merge(c9sub,on=cols).merge(c10sub,on=cols).merge(c11sub,on=cols)

print(all_ann.shape)
all_ann.head()

In [None]:
#Calculate pairwise Cohen's kappa values 

c1_ann = all_ann.iloc[:,6]
c2_ann = all_ann.iloc[:,7]
c3_ann = all_ann.iloc[:,8]
c4_ann = all_ann.iloc[:,9]
c5_ann = all_ann.iloc[:,10]
c6_ann = all_ann.iloc[:,11]
c7_ann = all_ann.iloc[:,12]
c8_ann = all_ann.iloc[:,13]
c9_ann = all_ann.iloc[:,14]
c10_ann = all_ann.iloc[:,15]
c11_ann = all_ann.iloc[:,16]

c1_c2 = round(cohen_kappa_score(c1_ann, c2_ann),2)
c1_c3 = round(cohen_kappa_score(c1_ann, c3_ann),2)
c1_c4 = round(cohen_kappa_score(c1_ann, c4_ann),2)
c1_c5 = round(cohen_kappa_score(c1_ann, c5_ann),2)
c1_c6 = round(cohen_kappa_score(c1_ann, c6_ann),2)
c1_c7 = round(cohen_kappa_score(c1_ann, c7_ann),2)
c1_c8 = round(cohen_kappa_score(c1_ann, c8_ann),2)
c1_c9 = round(cohen_kappa_score(c1_ann, c9_ann),2)
c1_c10 = round(cohen_kappa_score(c1_ann, c10_ann),2)
c1_c11 = round(cohen_kappa_score(c1_ann, c11_ann),2)

c2_c3 = round(cohen_kappa_score(c2_ann, c3_ann),2)
c2_c4 = round(cohen_kappa_score(c2_ann, c4_ann),2)
c2_c5 = round(cohen_kappa_score(c2_ann, c5_ann),2)
c2_c6 = round(cohen_kappa_score(c2_ann, c6_ann),2)
c2_c7 = round(cohen_kappa_score(c2_ann, c7_ann),2)
c2_c8 = round(cohen_kappa_score(c2_ann, c8_ann),2)
c2_c9 = round(cohen_kappa_score(c2_ann, c9_ann),2)
c2_c10 = round(cohen_kappa_score(c2_ann, c10_ann),2)
c2_c11 = round(cohen_kappa_score(c2_ann, c11_ann),2)

c3_c4 = round(cohen_kappa_score(c3_ann, c4_ann),2)
c3_c5 = round(cohen_kappa_score(c3_ann, c5_ann),2)
c3_c6 = round(cohen_kappa_score(c3_ann, c6_ann),2)
c3_c7 = round(cohen_kappa_score(c3_ann, c7_ann),2)
c3_c8 = round(cohen_kappa_score(c3_ann, c8_ann),2)
c3_c9 = round(cohen_kappa_score(c3_ann, c9_ann),2)
c3_c10 = round(cohen_kappa_score(c3_ann, c10_ann),2)
c3_c11 = round(cohen_kappa_score(c3_ann, c11_ann),2)

c4_c5 = round(cohen_kappa_score(c4_ann, c5_ann),2)
c4_c6 = round(cohen_kappa_score(c4_ann, c6_ann),2)
c4_c7 = round(cohen_kappa_score(c4_ann, c7_ann),2)
c4_c8 = round(cohen_kappa_score(c4_ann, c8_ann),2)
c4_c9 = round(cohen_kappa_score(c4_ann, c9_ann),2)
c4_c10 = round(cohen_kappa_score(c4_ann, c10_ann),2)
c4_c11 = round(cohen_kappa_score(c4_ann, c11_ann),2)

c5_c6 = round(cohen_kappa_score(c5_ann, c6_ann),2)
c5_c7 = round(cohen_kappa_score(c5_ann, c7_ann),2)
c5_c8 = round(cohen_kappa_score(c5_ann, c8_ann),2)
c5_c9 = round(cohen_kappa_score(c5_ann, c9_ann),2)
c5_c10 = round(cohen_kappa_score(c5_ann, c10_ann),2)
c5_c11 = round(cohen_kappa_score(c5_ann, c11_ann),2)

c6_c7 = round(cohen_kappa_score(c6_ann, c7_ann),2)
c6_c8 = round(cohen_kappa_score(c6_ann, c8_ann),2)
c6_c9 = round(cohen_kappa_score(c6_ann, c9_ann),2)
c6_c10 = round(cohen_kappa_score(c6_ann, c10_ann),2)
c6_c11 = round(cohen_kappa_score(c6_ann, c11_ann),2)

c7_c8 = round(cohen_kappa_score(c7_ann, c8_ann),2)
c7_c9 = round(cohen_kappa_score(c7_ann, c9_ann),2)
c7_c10 = round(cohen_kappa_score(c7_ann, c10_ann),2)
c7_c11 = round(cohen_kappa_score(c7_ann, c11_ann),2)

c8_c9 = round(cohen_kappa_score(c8_ann, c9_ann),2)
c8_c10 = round(cohen_kappa_score(c8_ann, c10_ann),2)
c8_c11 = round(cohen_kappa_score(c8_ann, c11_ann),2)

c9_c10 = round(cohen_kappa_score(c9_ann, c10_ann),2)
c9_c11 = round(cohen_kappa_score(c9_ann, c11_ann),2)

c10_c11 = round(cohen_kappa_score(c10_ann, c11_ann),2)

In [None]:
C0 = ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11"]
C1 = [1.00, c1_c2, c1_c3, c1_c4, c1_c5, c1_c6, c1_c7, c1_c8, c1_c9, c1_c10, c1_c11]
C2 = ["", 1.00, c2_c3, c2_c4, c2_c5, c2_c6, c2_c7, c2_c8, c2_c9, c2_c10, c2_c11]
C3 = ["", "", 1.00, c3_c4, c3_c5, c3_c6, c3_c7, c3_c8, c3_c9, c3_c10, c3_c11]
C4 = ["", "", "", 1.00, c4_c5, c4_c6, c4_c7, c4_c8, c4_c9, c4_c10, c4_c11]
C5 = ["", "", "", "", 1.00, c5_c6, c5_c7, c5_c8, c5_c9, c5_c10, c5_c11]
C6 = ["", "", "", "", "", 1.00, c6_c7, c6_c8, c6_c9, c6_c10, c6_c11]
C7 = ["", "", "", "", "", "", 1.00, c7_c8, c7_c9, c7_c10, c7_c11]
C8 = ["", "", "", "", "", "", "", 1.00, c8_c9, c8_c10, c8_c11]
C9 = ["", "", "", "", "", "", "", "", 1.00, c9_c10, c9_c11]
C10 = ["", "", "", "", "", "", "", "", "", 1.00, c10_c11]
C11 = ["", "", "", "", "", "" , "", "", "", "", 1.00]

C0 = pd.DataFrame(data=C0)
C1 = pd.DataFrame(data=C1)
C2 = pd.DataFrame(data=C2)
C3 = pd.DataFrame(data=C3)
C4 = pd.DataFrame(data=C4)
C5 = pd.DataFrame(data=C5)
C6 = pd.DataFrame(data=C6)
C7 = pd.DataFrame(data=C7)
C8 = pd.DataFrame(data=C8)
C9 = pd.DataFrame(data=C9)
C10 = pd.DataFrame(data=C10)
C11 = pd.DataFrame(data=C11)

C0.columns = [""]
C1.columns = ['C1']
C2.columns = ['C2']
C3.columns = ['C3']
C4.columns = ['C4']
C5.columns = ['C5']
C6.columns = ['C6']
C7.columns = ['C7']
C8.columns = ['C8']
C9.columns = ['C9']
C10.columns = ['C10']
C11.columns = ['C11']

frames = [C0,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11]

cohen_k = pd.concat(frames, axis=1)
cohen_k = cohen_k.set_index("")

cols = ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11"]
cohen_k[cols] = cohen_k[cols].apply(pd.to_numeric)

cohen_k.dtypes

cohen_k

In [None]:
#Plot pairwise Cohen's kappa

import seaborn as sns

fig = plt.figure(num=None, figsize=(8, 5), dpi=80, facecolor='w', edgecolor='k')

res = sns.heatmap(cohen_k, annot=True, vmin=0, vmax=1, 
                  fmt='.2f', cmap="YlGnBu", annot_kws={"fontsize":15})

res.set_xticklabels(res.get_xmajorticklabels(), fontsize = 15)
res.set_yticklabels(res.get_ymajorticklabels(), fontsize = 15)

plt.tight_layout()

plt.savefig("Multi-RF-Exp1-Pairwise-Cohen's.png")              
plt.show()

In [None]:
#Find average pairwise Cohen's kappa 

pair_iaa = [c1_c2, c1_c3, c1_c4, c1_c5, c1_c6, c1_c7, c1_c8, c1_c9, c1_c10, c1_c11, 
          c2_c3, c2_c4, c2_c5, c2_c6, c2_c7, c2_c8, c2_c9, c2_c10, c2_c11, 
          c3_c4, c3_c5, c3_c6, c3_c7, c3_c8, c3_c9, c3_c10, c3_c11, 
          c4_c5, c4_c6, c4_c7, c4_c8, c4_c9, c4_c10, c4_c11, 
          c5_c6, c5_c7, c5_c8, c5_c9, c5_c10, c5_c11, 
          c6_c7, c6_c8, c6_c9, c6_c10, c6_c11, 
          c7_c8, c7_c9, c7_c10, c7_c11,
          c8_c9, c8_c10, c8_c11, 
          c9_c10, c9_c11,  
          c10_c11]

print(len(pair_iaa))

avg = round(mean(pair_iaa),3)
sd = round(statistics.stdev(pair_iaa),3)
 
# Prints average & standard deviation
print("Average Cohen's kappa:", avg)
print("Standard Deviation:", sd)

In [None]:
#Compare C2, C4, C8 QEUH annotation distributions

#C2
c2_ann = all_ann['C2'].value_counts()
c2_ann_dist = pd.DataFrame(data=c2_ann).reset_index()
c2_ann_dist = c2_ann_dist.rename({'index':'QEUH Annotated Label'},axis=1)
c2_ann_dist['C2 %'] = ((c2_ann_dist['C2']/60)*100)

#C4
c4_ann = all_ann['C4'].value_counts()
c4_ann_dist = pd.DataFrame(data=c4_ann).reset_index()
c4_ann_dist = c4_ann_dist.rename({'index':'QEUH Annotated Label'},axis=1)
c4_ann_dist['C4 %'] = ((c4_ann_dist['C4']/60)*100)

#C8
c8_ann = all_ann['C8'].value_counts()
c8_ann_dist = pd.DataFrame(data=c8_ann).reset_index()
c8_ann_dist = c8_ann_dist.rename({'index':'QEUH Annotated Label'},axis=1)
c8_ann_dist['C8 %'] = ((c8_ann_dist['C8']/60)*100)

#merge

ann_dist = c2_ann_dist.merge(c4_ann_dist, on='QEUH Annotated Label').merge(c8_ann_dist, on='QEUH Annotated Label')
ann_dist = ann_dist.sort_values(by='QEUH Annotated Label', ascending=True)
ann_dist = ann_dist.drop(['C2','C4','C8'], axis=1)

ann_dist

# 3. Internal Validation Experiment

In [None]:
#Define Parameter Grid for hyperparameter optimisation
##Create a dictionary with all RF parameter options 

parameters = {'max_depth': [3, 4, 7, 9, 10, 20, 30, None], 
              'n_estimators': [10, 30, 50, 70, 100],
              'criterion': ['gini','entropy']}

In [None]:
#Define Function - RF Model Evaluation via 5-fold CV

def do_cv_learning_rf(X, y, verbose=False, do_scale=False, random_state=1):
    
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)
    f1s = []

    if do_scale:
        sc = StandardScaler()
        X = sc.fit_transform(X)
        
    for i, (train,test) in enumerate(cv.split(X,y)):
        gcsv = GridSearchCV(RandomForestClassifier(random_state=1), 
                            param_grid=parameters, 
                            cv=5, 
                            scoring='f1_micro')
        grid_result = gcsv.fit(X[train],y[train])
        best_params = grid_result.best_params_
        if verbose:
            print('fold', i,'best_params', best_params)
        clf = grid_result.best_estimator_
        f1 = metrics.f1_score(y[test], clf.predict(X[test]), average='micro')
        f1s.append(f1)
    
    ##Performance metrics 
    dfrf_multi_f1data = [['ann', 'multi', 'F1_micro', np.mean(f1s), np.std(f1s)]]

    ##print data as DF
    dfrf_multi_f1data = pd.DataFrame(data=dfrf_multi_f1data)
    dfrf_multi_f1data.columns = ['Annotator','Model','Optimisation','F1_micro','S.D.']
    
    return dfrf_multi_f1data

In [None]:
#Define Function - Find highest performing model after 5-fold CV

def model_opt_rf(X, y, verbose=False, do_scale=False, random_state=1):
    
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)
    f1s = []
    models = []

    if do_scale:
        sc = StandardScaler()
        X = sc.fit_transform(X)
        
    for i, (train,test) in enumerate(cv.split(X,y)):
        gcsv = GridSearchCV(RandomForestClassifier(random_state=1), 
                            param_grid=parameters, 
                            cv=5, 
                            scoring='f1_micro')
        grid_result = gcsv.fit(X[train],y[train])
        best_params = grid_result.best_params_
        if verbose:
            print('fold', i,'best_params', best_params)
        clf = grid_result.best_estimator_
        f1 = metrics.f1_score(y[test], clf.predict(X[test]), average='micro')
        f1s.append(f1)
        models.append(grid_result.best_estimator_)
        
    #find opt model
    df_multi_opt = [f1s, models]
    max_val = max(df_multi_opt[0])
    max_index = df_multi_opt[0].index(max_val)
    opt_model = df_multi_opt[1][max_index]
    
    return opt_model

In [None]:
#Define Function - Find Feature Importances (FI)
##FI indicates relative importance of each feature when making the classification 

def feat_imp(annrf_multi_opt):
    
    imp = list(annrf_multi_opt.feature_importances_)
    
    imp_data = [['ann', 'multi', 'F1', imp[0], imp[1], imp[2], imp[3], imp[4], imp[5]]]

    #print data as DF
    df_multi_imp = pd.DataFrame(data=imp_data)
    df_multi_imp.columns = ['Annotator','Model','Optimisation','Adrenaline', 'Noradrenaline', 'FiO2', 'SpO2', 'Mean', 
                           'HR']
    return df_multi_imp

In [None]:
#C1 - IntVal

array = c1.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
c1rf_multi_f1data = do_cv_learning_rf(X,y)
c1rf_multi_f1data['Annotator'] = 'C1'

#Find Opt model
c1rf_multi_opt = model_opt_rf(X,y)

#Feature Importances
c1rf_opt_fi = feat_imp(c1rf_multi_opt)
c1rf_opt_fi['Annotator'] = 'C1'

print(c1rf_multi_opt)
c1rf_multi_f1data

In [None]:
#C2 - IntVal

array = c2.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
c2rf_multi_f1data = do_cv_learning_rf(X,y)
c2rf_multi_f1data['Annotator'] = 'C2'

#Find Opt model
c2rf_multi_opt = model_opt_rf(X,y)

#Feature Importances
c2rf_opt_fi = feat_imp(c2rf_multi_opt)
c2rf_opt_fi['Annotator'] = 'C2'

print(c2rf_multi_opt)
c2rf_multi_f1data

In [None]:
#C3 - IntVal

array = c3.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
c3rf_multi_f1data = do_cv_learning_rf(X,y)
c3rf_multi_f1data['Annotator'] = 'C3'

#Find Opt model
c3rf_multi_opt = model_opt_rf(X,y)

#Feature Importances
c3rf_opt_fi = feat_imp(c3rf_multi_opt)
c3rf_opt_fi['Annotator'] = 'C3'

print(c3rf_multi_opt)
c3rf_multi_f1data

In [None]:
#C4 - IntVal

array = c4.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
c4rf_multi_f1data = do_cv_learning_rf(X,y)
c4rf_multi_f1data['Annotator'] = 'C4'

#Find Opt model
c4rf_multi_opt = model_opt_rf(X,y)

#Feature Importances
c4rf_opt_fi = feat_imp(c4rf_multi_opt)
c4rf_opt_fi['Annotator'] = 'C4'

print(c4rf_multi_opt)
c4rf_multi_f1data

In [None]:
#C5 - IntVal

array = c5.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
c5rf_multi_f1data = do_cv_learning_rf(X,y)
c5rf_multi_f1data['Annotator'] = 'C5'

#Find Opt model
c5rf_multi_opt = model_opt_rf(X,y)

#Feature Importances
c5rf_opt_fi = feat_imp(c5rf_multi_opt)
c5rf_opt_fi['Annotator'] = 'C5'

print(c5rf_multi_opt)
c5rf_multi_f1data

In [None]:
#C6 - IntVal

array = c6.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
c6rf_multi_f1data = do_cv_learning_rf(X,y)
c6rf_multi_f1data['Annotator'] = 'C6'

#Find Opt model
c6rf_multi_opt = model_opt_rf(X,y)

#Feature Importances
c6rf_opt_fi = feat_imp(c6rf_multi_opt)
c6rf_opt_fi['Annotator'] = 'C6'

print(c6rf_multi_opt)
c6rf_multi_f1data

In [None]:
#C7 - IntVal

array = c7.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
c7rf_multi_f1data = do_cv_learning_rf(X,y)
c7rf_multi_f1data['Annotator'] = 'C7'

#Find Opt model
c7rf_multi_opt = model_opt_rf(X,y)

#Feature Importances
c7rf_opt_fi = feat_imp(c7rf_multi_opt)
c7rf_opt_fi['Annotator'] = 'C7'

print(c7rf_multi_opt)
c7rf_multi_f1data

In [None]:
#C8 - IntVal

array = c8.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
c8rf_multi_f1data = do_cv_learning_rf(X,y)
c8rf_multi_f1data['Annotator'] = 'C8'

#Find Opt model
c8rf_multi_opt = model_opt_rf(X,y)

#Feature Importances
c8rf_opt_fi = feat_imp(c8rf_multi_opt)
c8rf_opt_fi['Annotator'] = 'C8'

print(c8rf_multi_opt)
c8rf_multi_f1data

In [None]:
#C9 - IntVal

array = c9.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
c9rf_multi_f1data = do_cv_learning_rf(X,y)
c9rf_multi_f1data['Annotator'] = 'C9'

#Find Opt model
c9rf_multi_opt = model_opt_rf(X,y)

#Feature Importances
c9rf_opt_fi = feat_imp(c9rf_multi_opt)
c9rf_opt_fi['Annotator'] = 'C9'

print(c9rf_multi_opt)
c9rf_multi_f1data

In [None]:
#C10 - IntVal

array = c10.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
c10rf_multi_f1data = do_cv_learning_rf(X,y)
c10rf_multi_f1data['Annotator'] = 'C10'

#Find Opt model
c10rf_multi_opt = model_opt_rf(X,y)

#Feature Importances
c10rf_opt_fi = feat_imp(c10rf_multi_opt)
c10rf_opt_fi['Annotator'] = 'C10'

print(c10rf_multi_opt)
c10rf_multi_f1data

In [None]:
#C11 - IntVal

array = c11.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
c11rf_multi_f1data = do_cv_learning_rf(X,y)
c11rf_multi_f1data['Annotator'] = 'C11'

#Find Opt model
c11rf_multi_opt = model_opt_rf(X,y)

#Feature Importances
c11rf_opt_fi = feat_imp(c11rf_multi_opt)
c11rf_opt_fi['Annotator'] = 'C11'

print(c11rf_multi_opt)
c11rf_multi_f1data

In [None]:
#MV - IntVal

array = mv.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
mvrf_multi_f1data = do_cv_learning_rf(X,y)
mvrf_multi_f1data['Annotator'] = 'MV'

#Find Opt model
mvrf_multi_opt = model_opt_rf(X,y)

#Feature Importances
mvrf_opt_fi = feat_imp(mvrf_multi_opt)
mvrf_opt_fi['Annotator'] = 'MV'

print(mvrf_multi_opt)
mvrf_multi_f1data

In [None]:
#TMV - IntVal

array = tmv.to_numpy()
X = array[:,0:6]  
y = array[:,7]  

X = X.astype(float) 
y = y.astype(int) 

le = LabelEncoder()
y = le.fit_transform(y)

print(X.shape)
print(y.shape)
print(le.classes_)

#5-fold CV Model Eval
tmvrf_multi_f1data = do_cv_learning_rf(X,y)
tmvrf_multi_f1data['Annotator'] = 'TMV'

#Find Opt model
tmvrf_multi_opt = model_opt_rf(X,y)

#Feature Importances
tmvrf_opt_fi = feat_imp(tmvrf_multi_opt)
tmvrf_opt_fi['Annotator'] = 'TMV'

print(tmvrf_multi_opt)
tmvrf_multi_f1data

In [None]:
#Internal Validation Performances - Summary

frames = [c1rf_multi_f1data, c2rf_multi_f1data, c3rf_multi_f1data, c4rf_multi_f1data, 
          c5rf_multi_f1data, c6rf_multi_f1data, c7rf_multi_f1data, c8rf_multi_f1data,
          c9rf_multi_f1data, c10rf_multi_f1data, c11rf_multi_f1data, mvrf_multi_f1data,
          tmvrf_multi_f1data]

multi_int = pd.concat(frames)
print(multi_int.shape)
multi_int

In [None]:
#Plot chart - Internal Validation F1 (micro)

plt.style.use('ggplot')

#Define x and y data
x1 = multi_int['Annotator']
y1 = multi_int['F1_micro']

#Plot chart data
plt.figure(figsize=(8,2.5))
plt.plot(x1, y1, color='#1F57C8', marker='o', linestyle="solid", label='Multi')

plt.ylim([0.0,1.1])
plt.yticks(np.arange(0.0,1.01, 0.2))

#Add title and labels
plt.title('Internal Validation: Multiclass - RF', fontsize=14)
plt.xlabel('Annotator', fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=14)
plt.ylabel('F1_micro', fontsize=14)
plt.grid(True)
plt.tight_layout()

plt.show()

In [None]:
#Concatenate all Feature Importance data

frames = [c1rf_opt_fi, c2rf_opt_fi, c3rf_opt_fi, c4rf_opt_fi, c5rf_opt_fi, c6rf_opt_fi,
          c7rf_opt_fi, c8rf_opt_fi, c9rf_opt_fi, c10rf_opt_fi, c11rf_opt_fi]

feat_imp = pd.concat(frames)

print(feat_imp.shape)
feat_imp

In [None]:
#Plot Feature Importance 

##Set width of bars
barwidth = 0.1

labels = feat_imp['Annotator']
 
##Set heights of bars
Adrenaline = feat_imp['Adrenaline']
Noradrenaline = feat_imp['Noradrenaline']
FiO2 = feat_imp['FiO2']
SpO2 = feat_imp['SpO2']
Mean = feat_imp['Mean']
HR = feat_imp['HR']

##Set position of bar on X axis
f1 = np.arange(len(Adrenaline))
f2 = [x + barwidth for x in f1]
f3 = [x + barwidth for x in f2]
f4 = [x + barwidth for x in f3]
f5 = [x + barwidth for x in f4]
f6 = [x + barwidth for x in f5]
 
##Make the plot
plt.figure(figsize=(10,4))
plt.bar(f1, FiO2, color='#1f77b4', width=barwidth,label='FiO2')
plt.bar(f2, Noradrenaline, color='#CD534CFF', width=barwidth,label='Noradrenaline')
plt.bar(f3, Mean, color='#9467BD', width=barwidth,label='Mean')
plt.bar(f4, SpO2, color='#62B463', width=barwidth,label='SpO2')
plt.bar(f5, Adrenaline, color='#FFA319FF', width=barwidth, label='Adrenaline')
plt.bar(f6, HR, color='#8C564B', width=barwidth,label='HR')
 
##Add xticks on the middle of the group bars
plt.title('RF Classifiers - Feature Importances', fontsize=18)
plt.xlabel('Annotator', fontsize=14)
plt.ylabel('Feature Importance', fontsize=14)
plt.xticks([r for r in range(len(Adrenaline))],labels,fontsize=16)
plt.yticks(fontsize=16)

##Set legend
plt.legend(bbox_to_anchor=(1, 1), fontsize=14)

##Save plot
plt.savefig('Multi-RF-IntVal-F1-feat_imp.png', dpi=100)   
    
##Show plot
plt.show()

# 4. External Validation Experiment

## 4.1 Define HiRID External Validation Dataset

In [None]:
#Import HiRID 'Patient' table (contains discharge_status info)

pat = pd.read_sql_query("SELECT * FROM hirid.patient", conn)

pat.to_csv('patient_table.csv')

print(pat.shape)
pat.head()

#33,905 records

In [None]:
#Import HiRID Validation Dataset - data for patients 1hr before discharge/death
##See jupyter notebook 'npjDM-HiRID_ExtVal_Dataset' to see steps on creating this HiRID External Validation Dataset

params1hr = pd.read_csv("HiRID_extval_params1hr.csv")
params1hr.drop('Unnamed: 0', axis=1, inplace=True)

params1hr['binary_status'] = np.where(params1hr['discharge_status']== 'alive', 0, 4)

print(params1hr.shape)
params1hr.head()

In [None]:
#Check dishcarge status classes are balanced

params1hr.discharge_status.value_counts()
params1hr.binary_status.value_counts()

In [None]:
#Define hirid validation dataset

array = params1hr.to_numpy()
X_test = array[:,3:9]  
y_test = array[:,12]  

X_test = X_test.astype(float) 

print(X_test.shape)
print(y_test.shape)

In [None]:
X_test

In [None]:
y_test

## 4.2 Run QEUH models on HiRID External Validation Dataset

In [None]:
#C1 - HiRID Ext val 

f1 = metrics.f1_score(list(y_test), c1rf_multi_opt.predict(X_test), average='micro')
c1rf_multi_ext  = [['C1', 'multi', 'F1_micro', f1]]

c1rf_multi_ext = pd.DataFrame(data=c1rf_multi_ext)
c1rf_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
c1rf_multi_ext

In [None]:
#C1 - plot confusion matrix
sns.reset_orig() 

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(c1rf_multi_opt, X_test, list(y_test),
                             cmap=plt.cm.Blues, colorbar=False,
                             ax=ax)

cnf_matrix = confusion_matrix(list(y_test), c1rf_multi_opt.predict(X_test))
pred_labels = cnf_matrix.sum(axis=0)

print(pred_labels)
c1_A_pred = pred_labels[0]
c1_B_pred = pred_labels[1]
c1_C_pred = pred_labels[2]
c1_D_pred = pred_labels[3]
c1_E_pred = pred_labels[4]

print(c1_A_pred,c1_B_pred,c1_C_pred,c1_D_pred,c1_E_pred)
fig.savefig('confusion_matrix_c1.eps',format='eps')

In [None]:
#C2 - HiRID Ext val 

f1 = metrics.f1_score(list(y_test), c2rf_multi_opt.predict(X_test), average='micro')
c2rf_multi_ext  = [['C2', 'multi', 'F1_micro', f1]]

c2rf_multi_ext = pd.DataFrame(data=c2rf_multi_ext)
c2rf_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
c2rf_multi_ext

In [None]:
#C2 - plot confusion matrix

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(c2rf_multi_opt, X_test, list(y_test),
                             cmap=plt.cm.Blues,
                             ax=ax)

cnf_matrix = confusion_matrix(list(y_test), c2rf_multi_opt.predict(X_test))
pred_labels = cnf_matrix.sum(axis=0)
print(pred_labels)
c2_A_pred = pred_labels[0]
c2_B_pred = pred_labels[1]
c2_C_pred = pred_labels[2]
c2_D_pred = pred_labels[3]
c2_E_pred = pred_labels[4]
print(c2_A_pred,c2_B_pred,c2_C_pred,c2_D_pred,c2_E_pred)

In [None]:
#C3 - HiRID Ext val 

f1 = metrics.f1_score(list(y_test), c3rf_multi_opt.predict(X_test), average='micro')
c3rf_multi_ext  = [['C3', 'multi', 'F1_micro', f1]]

##print data as DF
c3rf_multi_ext = pd.DataFrame(data=c3rf_multi_ext)
c3rf_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
c3rf_multi_ext

In [None]:
#C3 - plot confusion matrix

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(c3rf_multi_opt, X_test, list(y_test),
                             cmap=plt.cm.Blues, colorbar=False,
                             ax=ax)

cnf_matrix = confusion_matrix(list(y_test), c3rf_multi_opt.predict(X_test))
pred_labels = cnf_matrix.sum(axis=0)
print(pred_labels)
c3_A_pred = pred_labels[0]
c3_B_pred = pred_labels[1]
c3_C_pred = pred_labels[2]
c3_D_pred = pred_labels[3]
c3_E_pred = pred_labels[4]
print(c3_A_pred,c3_B_pred,c3_C_pred,c3_D_pred,c3_E_pred)

In [None]:
#C4 - HiRID Ext val  

f1 = metrics.f1_score(list(y_test), c4rf_multi_opt.predict(X_test), average='micro')
c4rf_multi_ext  = [['C4', 'multi', 'F1_micro', f1]]

c4rf_multi_ext = pd.DataFrame(data=c4rf_multi_ext)
c4rf_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
c4rf_multi_ext

In [None]:
#C4 - plot confusion matrix

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(c4rf_multi_opt, X_test, list(y_test),
                             cmap=plt.cm.Blues, colorbar=False,
                             ax=ax)

cnf_matrix = confusion_matrix(list(y_test), c4rf_multi_opt.predict(X_test))
pred_labels = cnf_matrix.sum(axis=0)
print(pred_labels)
c4_A_pred = pred_labels[0]
c4_B_pred = pred_labels[1]
c4_C_pred = pred_labels[2]
c4_D_pred = pred_labels[3]
c4_E_pred = pred_labels[4]
print(c4_A_pred,c4_B_pred,c4_C_pred,c4_D_pred,c4_E_pred)

In [None]:
#C5 - HiRID Ext val  

f1 = metrics.f1_score(list(y_test), c5rf_multi_opt.predict(X_test), average='micro')
c5rf_multi_ext  = [['C5', 'multi', 'F1_micro', f1]]

c5rf_multi_ext = pd.DataFrame(data=c5rf_multi_ext)
c5rf_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
c5rf_multi_ext

In [None]:
#C5 - plot confusion matrix

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(c5rf_multi_opt, X_test, list(y_test),
                             cmap=plt.cm.Blues, colorbar=False,
                             ax=ax)

cnf_matrix = confusion_matrix(list(y_test), c5rf_multi_opt.predict(X_test))
pred_labels = cnf_matrix.sum(axis=0)
print(pred_labels)
c5_A_pred = pred_labels[0]
c5_B_pred = pred_labels[1]
c5_C_pred = pred_labels[2]
c5_D_pred = pred_labels[3]
c5_E_pred = pred_labels[4]
print(c5_A_pred,c5_B_pred,c5_C_pred,c5_D_pred,c5_E_pred)

In [None]:
#C6 - HiRID Ext val 

f1 = metrics.f1_score(list(y_test), c6rf_multi_opt.predict(X_test), average='micro')
c6rf_multi_ext  = [['C6', 'multi', 'F1_micro', f1]]

c6rf_multi_ext = pd.DataFrame(data=c6rf_multi_ext)
c6rf_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
c6rf_multi_ext

In [None]:
#C6 - plot confusion matrix

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(c6rf_multi_opt, X_test, list(y_test),
                             cmap=plt.cm.Blues, colorbar=False,
                             ax=ax)

cnf_matrix = confusion_matrix(list(y_test), c6rf_multi_opt.predict(X_test))
pred_labels = cnf_matrix.sum(axis=0)
print(pred_labels)
c6_A_pred = pred_labels[0]
c6_B_pred = pred_labels[1]
c6_C_pred = pred_labels[2]
c6_D_pred = pred_labels[3]
c6_E_pred = pred_labels[4]
print(c6_A_pred,c6_B_pred,c6_C_pred,c6_D_pred,c6_E_pred)

In [None]:
#C7 - HiRID Ext val 

f1 = metrics.f1_score(list(y_test), c7rf_multi_opt.predict(X_test), average='micro')
c7rf_multi_ext  = [['C7', 'multi', 'F1_micro', f1]]

c7rf_multi_ext = pd.DataFrame(data=c7rf_multi_ext)
c7rf_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
c7rf_multi_ext

In [None]:
#C7 - plot confusion matrix

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(c7rf_multi_opt, X_test, list(y_test),
                             cmap=plt.cm.Blues, colorbar=False,
                             ax=ax)

cnf_matrix = confusion_matrix(list(y_test), c7rf_multi_opt.predict(X_test))
pred_labels = cnf_matrix.sum(axis=0)
print(pred_labels)
c7_A_pred = pred_labels[0]
c7_B_pred = pred_labels[1]
c7_C_pred = pred_labels[2]
c7_D_pred = pred_labels[3]
c7_E_pred = pred_labels[4]
print(c7_A_pred,c7_B_pred,c7_C_pred,c7_D_pred,c7_E_pred)

In [None]:
#C8 - HiRID Ext val 

f1 = metrics.f1_score(list(y_test), c8rf_multi_opt.predict(X_test), average='micro')
c8rf_multi_ext  = [['C8', 'multi', 'F1_micro', f1]]

c8rf_multi_ext = pd.DataFrame(data=c8rf_multi_ext)
c8rf_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
c8rf_multi_ext

In [None]:
#C8 - plot confusion matrix

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(c8rf_multi_opt, X_test, list(y_test),
                             cmap=plt.cm.Blues, colorbar=False,
                             ax=ax)

cnf_matrix = confusion_matrix(list(y_test), c8rf_multi_opt.predict(X_test))
pred_labels = cnf_matrix.sum(axis=0)
print(pred_labels)
c8_A_pred = pred_labels[0]
c8_B_pred = pred_labels[1]
c8_C_pred = pred_labels[2]
c8_D_pred = pred_labels[3]
c8_E_pred = pred_labels[4]
print(c8_A_pred,c8_B_pred,c8_C_pred,c8_D_pred,c8_E_pred)

In [None]:
#C9 - HiRID Ext val  

f1 = metrics.f1_score(list(y_test), c9rf_multi_opt.predict(X_test), average='micro')
c9rf_multi_ext  = [['C9', 'multi', 'F1_micro', f1]]

c9rf_multi_ext = pd.DataFrame(data=c9rf_multi_ext)
c9rf_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
c9rf_multi_ext

In [None]:
#C9 - plot confusion matrix

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(c9rf_multi_opt, X_test, list(y_test),
                             cmap=plt.cm.Blues, colorbar=False,
                             ax=ax)

cnf_matrix = confusion_matrix(list(y_test), c9rf_multi_opt.predict(X_test))
pred_labels = cnf_matrix.sum(axis=0)
print(pred_labels)
c9_A_pred = pred_labels[0]
c9_B_pred = pred_labels[1]
c9_C_pred = pred_labels[2]
c9_D_pred = pred_labels[3]
c9_E_pred = pred_labels[4]
print(c9_A_pred,c9_B_pred,c9_C_pred,c9_D_pred,c9_E_pred)

In [None]:
#C10 - HiRID Ext val 

f1 = metrics.f1_score(list(y_test), c10rf_multi_opt.predict(X_test), average='micro')
c10rf_multi_ext  = [['C10', 'multi', 'F1_micro', f1]]

c10rf_multi_ext = pd.DataFrame(data=c10rf_multi_ext)
c10rf_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
c10rf_multi_ext

In [None]:
#C10 - plot confusion matrix

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(c10rf_multi_opt, X_test, list(y_test),
                             cmap=plt.cm.Blues, colorbar=False,
                             ax=ax)

cnf_matrix = confusion_matrix(list(y_test), c10rf_multi_opt.predict(X_test))
pred_labels = cnf_matrix.sum(axis=0)
print(pred_labels)
c10_A_pred = pred_labels[0]
c10_B_pred = pred_labels[1]
c10_C_pred = pred_labels[2]
c10_D_pred = pred_labels[3]
c10_E_pred = pred_labels[4]
print(c10_A_pred,c10_B_pred,c10_C_pred,c10_D_pred,c10_E_pred)

In [None]:
#C11- HiRID Ext val 

f1 = metrics.f1_score(list(y_test), c11rf_multi_opt.predict(X_test), average='micro')
c11rf_multi_ext  = [['C11', 'multi', 'F1_micro', f1]]

c11rf_multi_ext = pd.DataFrame(data=c11rf_multi_ext)
c11rf_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
c11rf_multi_ext

In [None]:
#C11 - plot confusion matrix

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(c11rf_multi_opt, X_test, list(y_test),
                             cmap=plt.cm.Blues, colorbar=False,
                             ax=ax)

cnf_matrix = confusion_matrix(list(y_test), c11rf_multi_opt.predict(X_test))
pred_labels = cnf_matrix.sum(axis=0)
print(pred_labels)
c11_A_pred = pred_labels[0]
c11_B_pred = pred_labels[1]
c11_C_pred = pred_labels[2]
c11_D_pred = pred_labels[3]
c11_E_pred = pred_labels[4]
print(c11_A_pred,c11_B_pred,c11_C_pred,c11_D_pred,c11_E_pred)

In [None]:
#MV - HiRID Ext val 

f1 = metrics.f1_score(list(y_test), mvrf_multi_opt.predict(X_test), average='micro')
mvrf_multi_ext  = [['MV', 'multi', 'F1_micro', f1]]

mvrf_multi_ext = pd.DataFrame(data=mvrf_multi_ext)
mvrf_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
mvrf_multi_ext

In [None]:
#MV - plot confusion matrix

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(mvrf_multi_opt, X_test, list(y_test),
                             cmap=plt.cm.Blues, colorbar=False,
                             ax=ax)

cnf_matrix = confusion_matrix(list(y_test), mvrf_multi_opt.predict(X_test))
pred_labels = cnf_matrix.sum(axis=0)
print(pred_labels)
mv_A_pred = pred_labels[0]
mv_B_pred = pred_labels[1]
mv_C_pred = pred_labels[2]
mv_D_pred = pred_labels[3]
mv_E_pred = pred_labels[4]
print(mv_A_pred,mv_B_pred,mv_C_pred,mv_D_pred,mv_E_pred)

In [None]:
#TMV - HiRID Ext val 

f1 = metrics.f1_score(list(y_test), tmvrf_multi_opt.predict(X_test), average='micro')
tmvrf_multi_ext  = [['TMV', 'multi', 'F1_micro', f1]]

tmvrf_multi_ext = pd.DataFrame(data=tmvrf_multi_ext)
tmvrf_multi_ext.columns = ['Annotator','Model','Optimisation','F1_micro']
tmvrf_multi_ext

In [None]:
#TMV - plot confusion matrix

fig, ax = plt.subplots(figsize=(5,5))
plt.rcParams.update({'font.size': 16})
disp = plot_confusion_matrix(tmvrf_multi_opt, X_test, list(y_test),
                             cmap=plt.cm.Blues, colorbar=False,
                             ax=ax)

In [None]:
#investigate above ValueError

set(tmvrf_multi_opt.predict(X_test))

#TMV predicted labels contain no '3' labels (i.e. no 'D')

In [None]:
cnf_matrix = confusion_matrix(list(y_test), tmvrf_multi_opt.predict(X_test))
pred_labels = cnf_matrix.sum(axis=0)
print(pred_labels)

tmv_A_pred = pred_labels[0]
tmv_B_pred = pred_labels[1]
tmv_C_pred = pred_labels[2]
tmv_D_pred = 0
tmv_E_pred = pred_labels[3]

print(tmv_A_pred,tmv_B_pred,tmv_C_pred,tmv_D_pred, tmv_E_pred)

In [None]:
#External Validation - Summary

frames = [c1rf_multi_ext, c2rf_multi_ext, c3rf_multi_ext, c4rf_multi_ext, 
          c5rf_multi_ext, c6rf_multi_ext, c7rf_multi_ext, c8rf_multi_ext,
          c9rf_multi_ext, c10rf_multi_ext, c11rf_multi_ext, mvrf_multi_ext,
          tmvrf_multi_ext]

multi_ext = pd.concat(frames)

print(multi_ext.shape)
multi_ext

In [None]:
#External Validation Performance - Consultant Models

frames = [c1rf_multi_ext, c2rf_multi_ext, c3rf_multi_ext, c4rf_multi_ext, 
          c5rf_multi_ext, c6rf_multi_ext, c7rf_multi_ext, c8rf_multi_ext,
          c9rf_multi_ext, c10rf_multi_ext, c11rf_multi_ext]

multi_ext_ann = pd.concat(frames)

print(multi_ext_ann.shape)
multi_ext_ann

In [None]:
#External Validation Performance - Majority Vote Models

frames = [mvrf_multi_ext, tmvrf_multi_ext]

multi_ext_mvs = pd.concat(frames)
print(multi_ext_mvs.shape)
multi_ext_mvs

In [None]:
#Plot chart - External Validation

plt.style.use('seaborn')

#Define x and y data
x1 = multi_ext_ann['Annotator']
y1 = multi_ext_ann['F1_micro']
mv = multi_ext_mvs.iloc[0,3]
tmv = multi_ext_mvs.iloc[1,3]

#Plot chart data
plt.figure(figsize=(8.5,3.5))
plt.plot(x1, y1, color='#1F57C8', marker='o', linestyle="solid")
plt.ylim([0.0,0.61])
plt.yticks(np.arange(0.0,0.61, 0.1))
plt.axhline(y=mv, color='#DA4802', linestyle='-', label = 'Majority Vote (MV)')
plt.axhline(y=tmv, color='#65C314', linestyle='-', label = 'Top Majority Vote (TMV)')

#Add title and labels
plt.xlabel('Annotator', fontsize=14, labelpad=10)
plt.xticks(fontsize=12)
plt.yticks(fontsize=14)
plt.ylabel('F1 micro', fontsize=14, labelpad=10)
plt.grid(True)
plt.legend(fontsize=12, loc='upper center',  bbox_to_anchor=(0.5, -0.35), fancybox=False, shadow=False, ncol=2)
plt.tight_layout()

#Save plot
plt.savefig('Figure_6.eps', format='eps')

plt.show()

## 4.3 External Validation - IAA Metrics

In [None]:
#Consistency of predicted labels

c1_lab = pd.DataFrame(c1rf_multi_opt.predict(X_test).T)
c1_lab.columns =['C1']

c2_lab = pd.DataFrame(c2rf_multi_opt.predict(X_test).T)
c2_lab.columns =['C2']

c3_lab = pd.DataFrame(c3rf_multi_opt.predict(X_test).T)
c3_lab.columns =['C3']

c4_lab = pd.DataFrame(c4rf_multi_opt.predict(X_test).T)
c4_lab.columns =['C4']

c5_lab = pd.DataFrame(c5rf_multi_opt.predict(X_test).T)
c5_lab.columns =['C5']

c6_lab = pd.DataFrame(c6rf_multi_opt.predict(X_test).T)
c6_lab.columns =['C6']

c7_lab = pd.DataFrame(c7rf_multi_opt.predict(X_test).T)
c7_lab.columns =['C7']

c8_lab = pd.DataFrame(c8rf_multi_opt.predict(X_test).T)
c8_lab.columns =['C8']

c9_lab = pd.DataFrame(c9rf_multi_opt.predict(X_test).T)
c9_lab.columns =['C9']

c10_lab = pd.DataFrame(c10rf_multi_opt.predict(X_test).T)
c10_lab.columns =['C10']

c11_lab = pd.DataFrame(c11rf_multi_opt.predict(X_test).T)
c11_lab.columns =['C11']

true_lab = pd.DataFrame(y_test.T)
true_lab.columns =['True_label']

frames = [c1_lab, c2_lab, c3_lab, c4_lab, c5_lab, c6_lab, c7_lab, c8_lab, c9_lab, c10_lab, c11_lab, true_lab]

pred_lab = pd.concat(frames, axis=1)
pred_lab

#### 4.3.1 Discharged Alive - IAA

In [None]:
#Select only Discharged alive

true_alive = pred_lab.copy(deep=True)
true_alive = true_alive[true_alive['True_label']==0]
true_alive = true_alive.drop('True_label', axis = 1)
true_alive = true_alive.applymap(str)
true_alive

In [None]:
#Calculate pairwise Cohen's kappa (Discharged Alive only)

c1_pred = true_alive.iloc[:,0]
c2_pred = true_alive.iloc[:,1]
c3_pred = true_alive.iloc[:,2]
c4_pred = true_alive.iloc[:,3]
c5_pred = true_alive.iloc[:,4]
c6_pred = true_alive.iloc[:,5]
c7_pred = true_alive.iloc[:,6]
c8_pred = true_alive.iloc[:,7]
c9_pred = true_alive.iloc[:,8]
c10_pred = true_alive.iloc[:,9]
c11_pred = true_alive.iloc[:,10]

c1_c2 = round(cohen_kappa_score(c1_pred, c2_pred),2)
c1_c3 = round(cohen_kappa_score(c1_pred, c3_pred),2)
c1_c4 = round(cohen_kappa_score(c1_pred, c4_pred),2)
c1_c5 = round(cohen_kappa_score(c1_pred, c5_pred),2)
c1_c6 = round(cohen_kappa_score(c1_pred, c6_pred),2)
c1_c7 = round(cohen_kappa_score(c1_pred, c7_pred),2)
c1_c8 = round(cohen_kappa_score(c1_pred, c8_pred),2)
c1_c9 = round(cohen_kappa_score(c1_pred, c9_pred),2)
c1_c10 = round(cohen_kappa_score(c1_pred, c10_pred),2)
c1_c11 = round(cohen_kappa_score(c1_pred, c11_pred),2)

c2_c3 = round(cohen_kappa_score(c2_pred, c3_pred),2)
c2_c4 = round(cohen_kappa_score(c2_pred, c4_pred),2)
c2_c5 = round(cohen_kappa_score(c2_pred, c5_pred),2)
c2_c6 = round(cohen_kappa_score(c2_pred, c6_pred),2)
c2_c7 = round(cohen_kappa_score(c2_pred, c7_pred),2)
c2_c8 = round(cohen_kappa_score(c2_pred, c8_pred),2)
c2_c9 = round(cohen_kappa_score(c2_pred, c9_pred),2)
c2_c10 = round(cohen_kappa_score(c2_pred, c10_pred),2)
c2_c11 = round(cohen_kappa_score(c2_pred, c11_pred),2)

c3_c4 = round(cohen_kappa_score(c3_pred, c4_pred),2)
c3_c5 = round(cohen_kappa_score(c3_pred, c5_pred),2)
c3_c6 = round(cohen_kappa_score(c3_pred, c6_pred),2)
c3_c7 = round(cohen_kappa_score(c3_pred, c7_pred),2)
c3_c8 = round(cohen_kappa_score(c3_pred, c8_pred),2)
c3_c9 = round(cohen_kappa_score(c3_pred, c9_pred),2)
c3_c10 = round(cohen_kappa_score(c3_pred, c10_pred),2)
c3_c11 = round(cohen_kappa_score(c3_pred, c11_pred),2)

c4_c5 = round(cohen_kappa_score(c4_pred, c5_pred),2)
c4_c6 = round(cohen_kappa_score(c4_pred, c6_pred),2)
c4_c7 = round(cohen_kappa_score(c4_pred, c7_pred),2)
c4_c8 = round(cohen_kappa_score(c4_pred, c8_pred),2)
c4_c9 = round(cohen_kappa_score(c4_pred, c9_pred),2)
c4_c10 = round(cohen_kappa_score(c4_pred, c10_pred),2)
c4_c11 = round(cohen_kappa_score(c4_pred, c11_pred),2)

c5_c6 = round(cohen_kappa_score(c5_pred, c6_pred),2)
c5_c7 = round(cohen_kappa_score(c5_pred, c7_pred),2)
c5_c8 = round(cohen_kappa_score(c5_pred, c8_pred),2)
c5_c9 = round(cohen_kappa_score(c5_pred, c9_pred),2)
c5_c10 = round(cohen_kappa_score(c5_pred, c10_pred),2)
c5_c11 = round(cohen_kappa_score(c5_pred, c11_pred),2)

c6_c7 = round(cohen_kappa_score(c6_pred, c7_pred),2)
c6_c8 = round(cohen_kappa_score(c6_pred, c8_pred),2)
c6_c9 = round(cohen_kappa_score(c6_pred, c9_pred),2)
c6_c10 = round(cohen_kappa_score(c6_pred, c10_pred),2)
c6_c11 = round(cohen_kappa_score(c6_pred, c11_pred),2)

c7_c8 = round(cohen_kappa_score(c7_pred, c8_pred),2)
c7_c9 = round(cohen_kappa_score(c7_pred, c9_pred),2)
c7_c10 = round(cohen_kappa_score(c7_pred, c10_pred),2)
c7_c11 = round(cohen_kappa_score(c7_pred, c11_pred),2)

c8_c9 = round(cohen_kappa_score(c8_pred, c9_pred),2)
c8_c10 = round(cohen_kappa_score(c8_pred, c10_pred),2)
c8_c11 = round(cohen_kappa_score(c8_pred, c11_pred),2)

c9_c10 = round(cohen_kappa_score(c9_pred, c10_pred),2)
c9_c11 = round(cohen_kappa_score(c9_pred, c11_pred),2)

c10_c11 = round(cohen_kappa_score(c10_pred, c11_pred),2)

In [None]:
#Pairwise Cohen's kappa (Discharged Alive only)

C0 = ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11"]
C1 = [1.00, c1_c2, c1_c3, c1_c4, c1_c5, c1_c6, c1_c7, c1_c8, c1_c9, c1_c10, c1_c11]
C2 = ["", 1.00, c2_c3, c2_c4, c2_c5, c2_c6, c2_c7, c2_c8, c2_c9, c2_c10, c2_c11]
C3 = ["", "", 1.00, c3_c4, c3_c5, c3_c6, c3_c7, c3_c8, c3_c9, c3_c10, c3_c11]
C4 = ["", "", "", 1.00, c4_c5, c4_c6, c4_c7, c4_c8, c4_c9, c4_c10, c4_c11]
C5 = ["", "", "", "", 1.00, c5_c6, c5_c7, c5_c8, c5_c9, c5_c10, c5_c11]
C6 = ["", "", "", "", "", 1.00, c6_c7, c6_c8, c6_c9, c6_c10, c6_c11]
C7 = ["", "", "", "", "", "", 1.00, c7_c8, c7_c9, c7_c10, c7_c11]
C8 = ["", "", "", "", "", "", "", 1.00, c8_c9, c8_c10, c8_c11]
C9 = ["", "", "", "", "", "", "", "", 1.00, c9_c10, c9_c11]
C10 = ["", "", "", "", "", "", "", "", "", 1.00, c10_c11]
C11 = ["", "", "", "", "", "" , "", "", "", "", 1.00]

C0 = pd.DataFrame(data=C0)
C1 = pd.DataFrame(data=C1)
C2 = pd.DataFrame(data=C2)
C3 = pd.DataFrame(data=C3)
C4 = pd.DataFrame(data=C4)
C5 = pd.DataFrame(data=C5)
C6 = pd.DataFrame(data=C6)
C7 = pd.DataFrame(data=C7)
C8 = pd.DataFrame(data=C8)
C9 = pd.DataFrame(data=C9)
C10 = pd.DataFrame(data=C10)
C11 = pd.DataFrame(data=C11)

C0.columns = [""]
C1.columns = ['C1']
C2.columns = ['C2']
C3.columns = ['C3']
C4.columns = ['C4']
C5.columns = ['C5']
C6.columns = ['C6']
C7.columns = ['C7']
C8.columns = ['C8']
C9.columns = ['C9']
C10.columns = ['C10']
C11.columns = ['C11']

frames = [C0,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11]

cohen_k = pd.concat(frames, axis=1)
cohen_k = cohen_k.set_index("")

cohen_k

In [None]:
#Plot pairwise Cohen's kappa (Discharged Alive only)

import seaborn as sns

cols = ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11"]
cohen_k[cols] = cohen_k[cols].apply(pd.to_numeric)

fig = plt.figure(num=None, figsize=(8, 5), dpi=80, facecolor='w', edgecolor='k')

res = sns.heatmap(cohen_k, annot=True, vmin=0, vmax=1,
                  fmt='.2f', cmap="YlGnBu", annot_kws={"fontsize":14})

res.set_xticklabels(res.get_xmajorticklabels(), fontsize = 15)
res.set_yticklabels(res.get_ymajorticklabels(), fontsize = 15)

plt.tight_layout()
fig.savefig('confusion_matrix_c1.eps',format='eps')
plt.show()

In [None]:
#Calculate average pairwise cohen's kappa values (Discharge alived)
import statistics
from statistics import mean
 
#data
sample = [c1_c2, c1_c3, c1_c4, c1_c5, c1_c6, c1_c7, c1_c8, c1_c9, c1_c10, c1_c11,
          c2_c3, c2_c4, c2_c5, c2_c6, c2_c7, c2_c8, c2_c9, c2_c10, c2_c11,
          c3_c4, c3_c5, c3_c6, c3_c7, c3_c8, c3_c9, c3_c10, c3_c11, 
          c4_c5, c4_c6, c4_c7, c4_c8, c4_c9, c4_c10, c4_c11, 
          c5_c6, c5_c7, c5_c8, c5_c9, c5_c10, c5_c11,  
          c6_c7, c6_c8, c6_c9, c6_c10, c6_c11, 
          c7_c8, c7_c9, c7_c10, c7_c11,  
          c8_c9, c8_c10, c8_c11,  
          c9_c10, c9_c11,  
          c10_c11]

avg = round(mean(sample),3)
sd = round(statistics.stdev(sample),3)
 
# Prints average & standard deviation
print("Average:", avg)
print("Standard Deviation:", sd)

In [None]:
#Check Fleiss' kappa for Discharged Alive

all_alive = true_alive.copy(deep=True)
all_alive['count_A']  = all_alive.eq('0').sum(axis=1)
all_alive['count_B']  = all_alive.eq('1').sum(axis=1)
all_alive['count_C']  = all_alive.eq('2').sum(axis=1)
all_alive['count_D']  = all_alive.eq('3').sum(axis=1)
all_alive['count_E']  = all_alive.eq('4').sum(axis=1)

##drop unncessary cols
cols = [ 'C1','C2','C3','C4','C5','C6','C7','C8','C9','C10','C11']

all_alive = all_alive.drop(cols, axis = 1)

all_alive

In [None]:
#Calculate Fleiss' kappa - Discharged Alive

fleiss_k = round(fleiss_kappa(all_alive, method='fleiss'),3)

print("Fleiss' kappa: {:.3f}".format(fleiss_k))

### 4.3.3 Died in ICU - IAA

In [None]:
#Select only Discharged died

true_died = pred_lab.copy(deep=True)
true_died = true_died[true_died['True_label']==4]
true_died = true_died.drop('True_label', axis = 1)
true_died = true_died.applymap(str)
true_died

In [None]:
#Calculate pairwise Cohen's kappa (Died only)

c1_pred = true_died.iloc[:,0]
c2_pred = true_died.iloc[:,1]
c3_pred = true_died.iloc[:,2]
c4_pred = true_died.iloc[:,3]
c5_pred = true_died.iloc[:,4]
c6_pred = true_died.iloc[:,5]
c7_pred = true_died.iloc[:,6]
c8_pred = true_died.iloc[:,7]
c9_pred = true_died.iloc[:,8]
c10_pred = true_died.iloc[:,9]
c11_pred = true_died.iloc[:,10]

c1_c2 = round(cohen_kappa_score(c1_pred, c2_pred),2)
c1_c3 = round(cohen_kappa_score(c1_pred, c3_pred),2)
c1_c4 = round(cohen_kappa_score(c1_pred, c4_pred),2)
c1_c5 = round(cohen_kappa_score(c1_pred, c5_pred),2)
c1_c6 = round(cohen_kappa_score(c1_pred, c6_pred),2)
c1_c7 = round(cohen_kappa_score(c1_pred, c7_pred),2)
c1_c8 = round(cohen_kappa_score(c1_pred, c8_pred),2)
c1_c9 = round(cohen_kappa_score(c1_pred, c9_pred),2)
c1_c10 = round(cohen_kappa_score(c1_pred, c10_pred),2)
c1_c11 = round(cohen_kappa_score(c1_pred, c11_pred),2)

c2_c3 = round(cohen_kappa_score(c2_pred, c3_pred),2)
c2_c4 = round(cohen_kappa_score(c2_pred, c4_pred),2)
c2_c5 = round(cohen_kappa_score(c2_pred, c5_pred),2)
c2_c6 = round(cohen_kappa_score(c2_pred, c6_pred),2)
c2_c7 = round(cohen_kappa_score(c2_pred, c7_pred),2)
c2_c8 = round(cohen_kappa_score(c2_pred, c8_pred),2)
c2_c9 = round(cohen_kappa_score(c2_pred, c9_pred),2)
c2_c10 = round(cohen_kappa_score(c2_pred, c10_pred),2)
c2_c11 = round(cohen_kappa_score(c2_pred, c11_pred),2)

c3_c4 = round(cohen_kappa_score(c3_pred, c4_pred),2)
c3_c5 = round(cohen_kappa_score(c3_pred, c5_pred),2)
c3_c6 = round(cohen_kappa_score(c3_pred, c6_pred),2)
c3_c7 = round(cohen_kappa_score(c3_pred, c7_pred),2)
c3_c8 = round(cohen_kappa_score(c3_pred, c8_pred),2)
c3_c9 = round(cohen_kappa_score(c3_pred, c9_pred),2)
c3_c10 = round(cohen_kappa_score(c3_pred, c10_pred),2)
c3_c11 = round(cohen_kappa_score(c3_pred, c11_pred),2)

c4_c5 = round(cohen_kappa_score(c4_pred, c5_pred),2)
c4_c6 = round(cohen_kappa_score(c4_pred, c6_pred),2)
c4_c7 = round(cohen_kappa_score(c4_pred, c7_pred),2)
c4_c8 = round(cohen_kappa_score(c4_pred, c8_pred),2)
c4_c9 = round(cohen_kappa_score(c4_pred, c9_pred),2)
c4_c10 = round(cohen_kappa_score(c4_pred, c10_pred),2)
c4_c11 = round(cohen_kappa_score(c4_pred, c11_pred),2)

c5_c6 = round(cohen_kappa_score(c5_pred, c6_pred),2)
c5_c7 = round(cohen_kappa_score(c5_pred, c7_pred),2)
c5_c8 = round(cohen_kappa_score(c5_pred, c8_pred),2)
c5_c9 = round(cohen_kappa_score(c5_pred, c9_pred),2)
c5_c10 = round(cohen_kappa_score(c5_pred, c10_pred),2)
c5_c11 = round(cohen_kappa_score(c5_pred, c11_pred),2)

c6_c7 = round(cohen_kappa_score(c6_pred, c7_pred),2)
c6_c8 = round(cohen_kappa_score(c6_pred, c8_pred),2)
c6_c9 = round(cohen_kappa_score(c6_pred, c9_pred),2)
c6_c10 = round(cohen_kappa_score(c6_pred, c10_pred),2)
c6_c11 = round(cohen_kappa_score(c6_pred, c11_pred),2)

c7_c8 = round(cohen_kappa_score(c7_pred, c8_pred),2)
c7_c9 = round(cohen_kappa_score(c7_pred, c9_pred),2)
c7_c10 = round(cohen_kappa_score(c7_pred, c10_pred),2)
c7_c11 = round(cohen_kappa_score(c7_pred, c11_pred),2)

c8_c9 = round(cohen_kappa_score(c8_pred, c9_pred),2)
c8_c10 = round(cohen_kappa_score(c8_pred, c10_pred),2)
c8_c11 = round(cohen_kappa_score(c8_pred, c11_pred),2)

c9_c10 = round(cohen_kappa_score(c9_pred, c10_pred),2)
c9_c11 = round(cohen_kappa_score(c9_pred, c11_pred),2)

c10_c11 = round(cohen_kappa_score(c10_pred, c11_pred),2)

In [None]:
#Pairwise Cohen's kappa (Died only)

C0 = ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11"]
C1 = [1.00, c1_c2, c1_c3, c1_c4, c1_c5, c1_c6, c1_c7, c1_c8, c1_c9, c1_c10, c1_c11]
C2 = ["", 1.00, c2_c3, c2_c4, c2_c5, c2_c6, c2_c7, c2_c8, c2_c9, c2_c10, c2_c11]
C3 = ["", "", 1.00, c3_c4, c3_c5, c3_c6, c3_c7, c3_c8, c3_c9, c3_c10, c3_c11]
C4 = ["", "", "", 1.00, c4_c5, c4_c6, c4_c7, c4_c8, c4_c9, c4_c10, c4_c11]
C5 = ["", "", "", "", 1.00, c5_c6, c5_c7, c5_c8, c5_c9, c5_c10, c5_c11]
C6 = ["", "", "", "", "", 1.00, c6_c7, c6_c8, c6_c9, c6_c10, c6_c11]
C7 = ["", "", "", "", "", "", 1.00, c7_c8, c7_c9, c7_c10, c7_c11]
C8 = ["", "", "", "", "", "", "", 1.00, c8_c9, c8_c10, c8_c11]
C9 = ["", "", "", "", "", "", "", "", 1.00, c9_c10, c9_c11]
C10 = ["", "", "", "", "", "", "", "", "", 1.00, c10_c11]
C11 = ["", "", "", "", "", "" , "", "", "", "", 1.00]

C0 = pd.DataFrame(data=C0)
C1 = pd.DataFrame(data=C1)
C2 = pd.DataFrame(data=C2)
C3 = pd.DataFrame(data=C3)
C4 = pd.DataFrame(data=C4)
C5 = pd.DataFrame(data=C5)
C6 = pd.DataFrame(data=C6)
C7 = pd.DataFrame(data=C7)
C8 = pd.DataFrame(data=C8)
C9 = pd.DataFrame(data=C9)
C10 = pd.DataFrame(data=C10)
C11 = pd.DataFrame(data=C11)

C0.columns = [""]
C1.columns = ['C1']
C2.columns = ['C2']
C3.columns = ['C3']
C4.columns = ['C4']
C5.columns = ['C5']
C6.columns = ['C6']
C7.columns = ['C7']
C8.columns = ['C8']
C9.columns = ['C9']
C10.columns = ['C10']
C11.columns = ['C11']

frames = [C0,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11]

cohen_k = pd.concat(frames, axis=1)
cohen_k = cohen_k.set_index("")

cohen_k

In [None]:
#Plot pairwise Cohen's kappa (Died only)

import seaborn as sns

cols = ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11"]
cohen_k[cols] = cohen_k[cols].apply(pd.to_numeric)

fig = plt.figure(num=None, figsize=(7, 5), dpi=80, facecolor='w', edgecolor='k')

res = sns.heatmap(cohen_k, annot=True, vmin=0, vmax=1, cbar=False,
                  fmt='.2f', cmap="YlGnBu", annot_kws={"fontsize":15})

res.set_xticklabels(res.get_xmajorticklabels(), fontsize = 15)
res.set_yticklabels(res.get_ymajorticklabels(), fontsize = 15)

plt.tight_layout()
plt.show()

#range cohen's k:0.01 to 0.56

In [None]:
#Calculate average pairwise cohen's kappa values (Died)
import statistics
from statistics import mean
 
#data
sample = [c1_c2, c1_c3, c1_c4, c1_c5, c1_c6, c1_c7, c1_c8, c1_c9, c1_c10, c1_c11,
          c2_c3, c2_c4, c2_c5, c2_c6, c2_c7, c2_c8, c2_c9, c2_c10, c2_c11,
          c3_c4, c3_c5, c3_c6, c3_c7, c3_c8, c3_c9, c3_c10, c3_c11, 
          c4_c5, c4_c6, c4_c7, c4_c8, c4_c9, c4_c10, c4_c11, 
          c5_c6, c5_c7, c5_c8, c5_c9, c5_c10, c5_c11,  
          c6_c7, c6_c8, c6_c9, c6_c10, c6_c11, 
          c7_c8, c7_c9, c7_c10, c7_c11,  
          c8_c9, c8_c10, c8_c11,  
          c9_c10, c9_c11,  
          c10_c11]

avg = round(mean(sample),3)
sd = round(statistics.stdev(sample),3)
 
# Prints average & standard deviation
print("Average:", avg)
print("Standard Deviation:", sd)

In [None]:
#Check Fleiss' kappa for Died

all_died = true_died.copy(deep=True)
all_died['count_A']  = all_died.eq('0').sum(axis=1)
all_died['count_B']  = all_died.eq('1').sum(axis=1)
all_died['count_C']  = all_died.eq('2').sum(axis=1)
all_died['count_D']  = all_died.eq('3').sum(axis=1)
all_died['count_E']  = all_died.eq('4').sum(axis=1)

##drop unncessary cols
cols = [ 'C1','C2','C3','C4','C5','C6','C7','C8','C9','C10','C11']

all_died = all_died.drop(cols, axis = 1)

all_died

In [None]:
#Calculate Fleiss' kappa - Discharged Died

fleiss_k = round(fleiss_kappa(all_died, method='fleiss'),3)

print("Fleiss' kappa: {:.3f}".format(fleiss_k))

## 4.4 HiRID Predicted Label Distributions

In [None]:
#Values of each group
A_bar = [c1_A_pred, c2_A_pred, c3_A_pred, c4_A_pred, c5_A_pred, c6_A_pred, c7_A_pred, c8_A_pred, c9_A_pred, c10_A_pred, c11_A_pred]
B_bar = [c1_B_pred, c2_B_pred, c3_B_pred, c4_B_pred, c5_B_pred, c6_B_pred, c7_B_pred, c8_B_pred, c9_B_pred, c10_B_pred, c11_B_pred]
C_bar = [c1_C_pred, c2_C_pred, c3_C_pred, c4_C_pred, c5_C_pred, c6_C_pred, c7_C_pred, c8_C_pred, c9_C_pred, c10_C_pred, c11_C_pred]
D_bar = [c1_D_pred, c2_D_pred, c3_D_pred, c4_D_pred, c5_D_pred, c6_D_pred, c7_D_pred, c8_D_pred, c9_D_pred, c10_D_pred, c11_D_pred]
E_bar = [c1_E_pred, c2_E_pred, c3_E_pred, c4_E_pred, c5_E_pred, c6_E_pred, c7_E_pred, c8_E_pred, c9_E_pred, c10_E_pred, c11_E_pred]

print(A_bar)
print(B_bar)
print(C_bar)
print(D_bar)
print(E_bar)

In [None]:
#Stacked bar chart - predicted label distributions

#Values of each group
A_bar = [c1_A_pred, c2_A_pred, c3_A_pred, c4_A_pred, c5_A_pred, c6_A_pred, c7_A_pred, c8_A_pred, c9_A_pred, c10_A_pred, c11_A_pred]
B_bar = [c1_B_pred, c2_B_pred, c3_B_pred, c4_B_pred, c5_B_pred, c6_B_pred, c7_B_pred, c8_B_pred, c9_B_pred, c10_B_pred, c11_B_pred]
C_bar = [c1_C_pred, c2_C_pred, c3_C_pred, c4_C_pred, c5_C_pred, c6_C_pred, c7_C_pred, c8_C_pred, c9_C_pred, c10_C_pred, c11_C_pred]
D_bar = [c1_D_pred, c2_D_pred, c3_D_pred, c4_D_pred, c5_D_pred, c6_D_pred, c7_D_pred, c8_D_pred, c9_D_pred, c10_D_pred, c11_D_pred]
E_bar = [c1_E_pred, c2_E_pred, c3_E_pred, c4_E_pred, c5_E_pred, c6_E_pred, c7_E_pred, c8_E_pred, c9_E_pred, c10_E_pred, c11_E_pred]

#Heights of A_bar + B_bar + C_bar + D_bar
list_AB = np.array([A_bar,B_bar])
AB_bars = np.sum(list_AB, axis=0)

list_ABC = np.array([A_bar,B_bar,C_bar])
ABC_bars = np.sum(list_ABC, axis=0)

list_ABCD = np.array([A_bar,B_bar,C_bar,D_bar])
ABCD_bars = np.sum(list_ABCD, axis=0)

#Position of the bars on the x-axis
r = [0,1,2,3,4,5,6,7,8,9,10]

#X-axis labels
names = ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11"]
 
#Plot bars
barwidth = 1
plt.style.use("seaborn")
plt.figure(figsize=(8,5))
plt.bar(r, A_bar, color='#4e79a7', edgecolor='white', width=barwidth, label = 'A')
plt.bar(r, B_bar, bottom=A_bar, color='#59a14f', edgecolor='white', width=barwidth, label = 'B')
plt.bar(r, C_bar, bottom=AB_bars, color='#ddaa33', edgecolor='white', width=barwidth, label = 'C')
plt.bar(r, D_bar, bottom=ABC_bars, color='#b07aa1', edgecolor='white', width=barwidth, label = 'D')
plt.bar(r, E_bar, bottom=ABCD_bars, color='#d6604d', edgecolor='white', width=barwidth, label = 'E')

#Title and axis labels
plt.xlabel("Annotator", fontsize=16, labelpad=10)
plt.xticks(r, names, fontsize=16)
plt.ylabel("Predicted Label Distribution", fontsize=16, labelpad=10)
plt.yticks(fontsize=16)
plt.ylim((0,2700))

#Show legend
plt.legend(loc=(1.02,0),fontsize=18)
plt.tight_layout()

#Save plot
plt.savefig('Multi-RF-Exp1-PredLabels.eps', format='eps')
 
# Show graphic
plt.show()

### 4.4.1 Pairwise Cohen's kappa - HiRID Predicted Labels

In [None]:
c1_pred = list(c1rf_multi_opt.predict(X_test))
c2_pred = list(c2rf_multi_opt.predict(X_test))
c3_pred = list(c3rf_multi_opt.predict(X_test))
c4_pred = list(c4rf_multi_opt.predict(X_test))
c5_pred = list(c5rf_multi_opt.predict(X_test))
c6_pred = list(c6rf_multi_opt.predict(X_test))
c7_pred = list(c7rf_multi_opt.predict(X_test))
c8_pred = list(c8rf_multi_opt.predict(X_test))
c9_pred = list(c9rf_multi_opt.predict(X_test))
c10_pred = list(c10rf_multi_opt.predict(X_test))
c11_pred = list(c11rf_multi_opt.predict(X_test))


c1_pred = pd.DataFrame(data=c1_pred)
c2_pred = pd.DataFrame(data=c2_pred)
c3_pred = pd.DataFrame(data=c3_pred)
c4_pred = pd.DataFrame(data=c4_pred)
c5_pred = pd.DataFrame(data=c5_pred)
c6_pred = pd.DataFrame(data=c6_pred)
c7_pred = pd.DataFrame(data=c7_pred)
c8_pred = pd.DataFrame(data=c8_pred)
c9_pred = pd.DataFrame(data=c9_pred)
c10_pred = pd.DataFrame(data=c10_pred)
c11_pred = pd.DataFrame(data=c11_pred)


c1_pred.columns = ["C1"]
c2_pred.columns = ["C2"]
c3_pred.columns = ["C3"]
c4_pred.columns = ["C4"]
c5_pred.columns = ["C5"]
c6_pred.columns = ["C6"]
c7_pred.columns = ["C7"]
c8_pred.columns = ["C8"]
c9_pred.columns = ["C9"]
c10_pred.columns = ["C10"]
c11_pred.columns = ["C11"]

frames = [c1_pred, c2_pred, c3_pred, c4_pred, c5_pred, c6_pred, c7_pred, c8_pred, c9_pred, c10_pred, c11_pred]

ann_pred = pd.concat(frames, axis=1)

print(ann_pred.shape)
ann_pred

In [None]:
#Compare C2, C4, C8 HiRID predicted labels

#C2
c2_labs = {'HiRID Predicted Label': ['A', 'B', 'C', 'D', 'E'], 'C2': [c2_A_pred, c2_B_pred, c2_C_pred, c2_D_pred, c2_E_pred]}
c2_pred_labs = pd.DataFrame(data=c2_labs)
c2_pred_labs['C2 %'] = ((c2_pred_labs['C2']/2600)*100)

#C4
c4_labs = {'HiRID Predicted Label': ['A', 'B', 'C', 'D', 'E'], 'C4': [c4_A_pred, c4_B_pred, c4_C_pred, c4_D_pred, c4_E_pred]}
c4_pred_labs = pd.DataFrame(data=c4_labs)
c4_pred_labs['C4 %'] = ((c4_pred_labs['C4']/2600)*100)

#C8
c8_labs = {'HiRID Predicted Label': ['A', 'B', 'C', 'D', 'E'], 'C8': [c4_A_pred, c4_B_pred, c4_C_pred, c4_D_pred, c4_E_pred]}
c8_pred_labs = pd.DataFrame(data=c8_labs)
c8_pred_labs['C8 %'] = ((c8_pred_labs['C8']/2600)*100)


#MERGE
pred_dist = c2_pred_labs.merge(c4_pred_labs, on='HiRID Predicted Label').merge(c8_pred_labs, on='HiRID Predicted Label')
pred_dist = pred_dist.sort_values(by='HiRID Predicted Label', ascending=True)
pred_dist = pred_dist.drop(['C2','C4','C8'], axis=1)

pred_dist

In [None]:
#Calculate pairwise Cohen's kappa

from sklearn.metrics import cohen_kappa_score 

c1_pred = ann_pred.iloc[:,0]
c2_pred = ann_pred.iloc[:,1]
c3_pred = ann_pred.iloc[:,2]
c4_pred = ann_pred.iloc[:,3]
c5_pred = ann_pred.iloc[:,4]
c6_pred = ann_pred.iloc[:,5]
c7_pred = ann_pred.iloc[:,6]
c8_pred = ann_pred.iloc[:,7]
c9_pred = ann_pred.iloc[:,8]
c10_pred = ann_pred.iloc[:,9]
c11_pred = ann_pred.iloc[:,10]

c1_c2 = round(cohen_kappa_score(c1_pred, c2_pred),2)
c1_c3 = round(cohen_kappa_score(c1_pred, c3_pred),2)
c1_c4 = round(cohen_kappa_score(c1_pred, c4_pred),2)
c1_c5 = round(cohen_kappa_score(c1_pred, c5_pred),2)
c1_c6 = round(cohen_kappa_score(c1_pred, c6_pred),2)
c1_c7 = round(cohen_kappa_score(c1_pred, c7_pred),2)
c1_c8 = round(cohen_kappa_score(c1_pred, c8_pred),2)
c1_c9 = round(cohen_kappa_score(c1_pred, c9_pred),2)
c1_c10 = round(cohen_kappa_score(c1_pred, c10_pred),2)
c1_c11 = round(cohen_kappa_score(c1_pred, c11_pred),2)

c2_c3 = round(cohen_kappa_score(c2_pred, c3_pred),2)
c2_c4 = round(cohen_kappa_score(c2_pred, c4_pred),2)
c2_c5 = round(cohen_kappa_score(c2_pred, c5_pred),2)
c2_c6 = round(cohen_kappa_score(c2_pred, c6_pred),2)
c2_c7 = round(cohen_kappa_score(c2_pred, c7_pred),2)
c2_c8 = round(cohen_kappa_score(c2_pred, c8_pred),2)
c2_c9 = round(cohen_kappa_score(c2_pred, c9_pred),2)
c2_c10 = round(cohen_kappa_score(c2_pred, c10_pred),2)
c2_c11 = round(cohen_kappa_score(c2_pred, c11_pred),2)

c3_c4 = round(cohen_kappa_score(c3_pred, c4_pred),2)
c3_c5 = round(cohen_kappa_score(c3_pred, c5_pred),2)
c3_c6 = round(cohen_kappa_score(c3_pred, c6_pred),2)
c3_c7 = round(cohen_kappa_score(c3_pred, c7_pred),2)
c3_c8 = round(cohen_kappa_score(c3_pred, c8_pred),2)
c3_c9 = round(cohen_kappa_score(c3_pred, c9_pred),2)
c3_c10 = round(cohen_kappa_score(c3_pred, c10_pred),2)
c3_c11 = round(cohen_kappa_score(c3_pred, c11_pred),2)

c4_c5 = round(cohen_kappa_score(c4_pred, c5_pred),2)
c4_c6 = round(cohen_kappa_score(c4_pred, c6_pred),2)
c4_c7 = round(cohen_kappa_score(c4_pred, c7_pred),2)
c4_c8 = round(cohen_kappa_score(c4_pred, c8_pred),2)
c4_c9 = round(cohen_kappa_score(c4_pred, c9_pred),2)
c4_c10 = round(cohen_kappa_score(c4_pred, c10_pred),2)
c4_c11 = round(cohen_kappa_score(c4_pred, c11_pred),2)

c5_c6 = round(cohen_kappa_score(c5_pred, c6_pred),2)
c5_c7 = round(cohen_kappa_score(c5_pred, c7_pred),2)
c5_c8 = round(cohen_kappa_score(c5_pred, c8_pred),2)
c5_c9 = round(cohen_kappa_score(c5_pred, c9_pred),2)
c5_c10 = round(cohen_kappa_score(c5_pred, c10_pred),2)
c5_c11 = round(cohen_kappa_score(c5_pred, c11_pred),2)

c6_c7 = round(cohen_kappa_score(c6_pred, c7_pred),2)
c6_c8 = round(cohen_kappa_score(c6_pred, c8_pred),2)
c6_c9 = round(cohen_kappa_score(c6_pred, c9_pred),2)
c6_c10 = round(cohen_kappa_score(c6_pred, c10_pred),2)
c6_c11 = round(cohen_kappa_score(c6_pred, c11_pred),2)

c7_c8 = round(cohen_kappa_score(c7_pred, c8_pred),2)
c7_c9 = round(cohen_kappa_score(c7_pred, c9_pred),2)
c7_c10 = round(cohen_kappa_score(c7_pred, c10_pred),2)
c7_c11 = round(cohen_kappa_score(c7_pred, c11_pred),2)

c8_c9 = round(cohen_kappa_score(c8_pred, c9_pred),2)
c8_c10 = round(cohen_kappa_score(c8_pred, c10_pred),2)
c8_c11 = round(cohen_kappa_score(c8_pred, c11_pred),2)

c9_c10 = round(cohen_kappa_score(c9_pred, c10_pred),2)
c9_c11 = round(cohen_kappa_score(c9_pred, c11_pred),2)

c10_c11 = round(cohen_kappa_score(c10_pred, c11_pred),2)

In [None]:
C0 = ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11"]
C1 = [1.00, c1_c2, c1_c3, c1_c4, c1_c5, c1_c6, c1_c7, c1_c8, c1_c9, c1_c10, c1_c11]
C2 = ["", 1.00, c2_c3, c2_c4, c2_c5, c2_c6, c2_c7, c2_c8, c2_c9, c2_c10, c2_c11]
C3 = ["", "", 1.00, c3_c4, c3_c5, c3_c6, c3_c7, c3_c8, c3_c9, c3_c10, c3_c11]
C4 = ["", "", "", 1.00, c4_c5, c4_c6, c4_c7, c4_c8, c4_c9, c4_c10, c4_c11]
C5 = ["", "", "", "", 1.00, c5_c6, c5_c7, c5_c8, c5_c9, c5_c10, c5_c11]
C6 = ["", "", "", "", "", 1.00, c6_c7, c6_c8, c6_c9, c6_c10, c6_c11]
C7 = ["", "", "", "", "", "", 1.00, c7_c8, c7_c9, c7_c10, c7_c11]
C8 = ["", "", "", "", "", "", "", 1.00, c8_c9, c8_c10, c8_c11]
C9 = ["", "", "", "", "", "", "", "", 1.00, c9_c10, c9_c11]
C10 = ["", "", "", "", "", "", "", "", "", 1.00, c10_c11]
C11 = ["", "", "", "", "", "" , "", "", "", "", 1.00]

C0 = pd.DataFrame(data=C0)
C1 = pd.DataFrame(data=C1)
C2 = pd.DataFrame(data=C2)
C3 = pd.DataFrame(data=C3)
C4 = pd.DataFrame(data=C4)
C5 = pd.DataFrame(data=C5)
C6 = pd.DataFrame(data=C6)
C7 = pd.DataFrame(data=C7)
C8 = pd.DataFrame(data=C8)
C9 = pd.DataFrame(data=C9)
C10 = pd.DataFrame(data=C10)
C11 = pd.DataFrame(data=C11)

C0.columns = [""]
C1.columns = ['C1']
C2.columns = ['C2']
C3.columns = ['C3']
C4.columns = ['C4']
C5.columns = ['C5']
C6.columns = ['C6']
C7.columns = ['C7']
C8.columns = ['C8']
C9.columns = ['C9']
C10.columns = ['C10']
C11.columns = ['C11']


frames = [C0,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11]

cohen_k = pd.concat(frames, axis=1)
cohen_k = cohen_k.set_index("")

cohen_k

In [None]:
cols = ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11"]
cohen_k[cols] = cohen_k[cols].apply(pd.to_numeric)

cohen_k.dtypes

In [None]:
#Plot pairwise Cohen's kappa

import seaborn as sns

fig = plt.figure(num=None, figsize=(6, 4), dpi=80, facecolor='w', edgecolor='k')

res = sns.heatmap(cohen_k, annot=True, vmin=0, vmax=1, cbar=False,
                  fmt='.2f', cmap="YlGnBu", annot_kws={"fontsize":12})

res.set_xticklabels(res.get_xmajorticklabels(), fontsize = 12)
res.set_yticklabels(res.get_ymajorticklabels(), fontsize = 12)

plt.tight_layout()
plt.savefig('Multi-RF-Exp1-Pairwise-CohenK.eps', format='eps')
plt.show()

In [None]:
#Calculate average & SD of pairwise cohen's kappa values (all labels)

pred_iaa = [c1_c2, c1_c3, c1_c4, c1_c5, c1_c6, c1_c7, c1_c8, c1_c9, c1_c10, c1_c11,
          c2_c3, c2_c4, c2_c5, c2_c6, c2_c7, c2_c8, c2_c9, c2_c10, c2_c11, 
          c3_c4, c3_c5, c3_c6, c3_c7, c3_c8, c3_c9, c3_c10, c3_c11, 
          c4_c5, c4_c6, c4_c7, c4_c8, c4_c9, c4_c10, c4_c11, 
          c5_c6, c5_c7, c5_c8, c5_c9, c5_c10, c5_c11, 
          c6_c7, c6_c8, c6_c9, c6_c10, c6_c11, 
          c7_c8, c7_c9, c7_c10, c7_c11, 
          c8_c9, c8_c10, c8_c11, 
          c9_c10, c9_c11,
          c10_c11]

avg = round(mean(pred_iaa),3)
sd = round(statistics.stdev(pred_iaa),3)
 
# Prints average & standard deviation
print("Average:", avg)
print("Standard Deviation:", sd)

In [None]:
#Fleiss' kappa (All Predicted labels) - part 1

all_ann = ann_pred.copy(deep=True)
all_ann = all_ann.applymap(str)
all_ann['count_A']  = all_ann.eq('0').sum(axis=1)
all_ann['count_B']  = all_ann.eq('1').sum(axis=1)
all_ann['count_C']  = all_ann.eq('2').sum(axis=1)
all_ann['count_D']  = all_ann.eq('3').sum(axis=1)
all_ann['count_E']  = all_ann.eq('4').sum(axis=1)

##drop unncessary cols
cols = [ 'C1','C2','C3','C4','C5','C6','C7','C8','C9','C10','C11']

all_ann = all_ann.drop(cols, axis = 1)

all_ann

In [None]:
#Fleiss' kappa (All Predicted labels) - part 2

fleiss_k = round(fleiss_kappa(all_ann, method='fleiss'),3)

print("Fleiss' kappa: {:.3f}".format(fleiss_k))