# Majority-Vote Consensus Dataset

This jupyter notebook outlines the steps to create the **Majority-Vote Consensus Dataset** - taking the majority-vote ICU-PSS class label across all annotators for each instance. 

In [None]:
#Import necessary modules 

import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
from sklearn.metrics import cohen_kappa_score
from statsmodels.stats.inter_rater import fleiss_kappa

# 1. Import Annotated Datasets

In [None]:
#Import Consultant no.1 dataset

c1 = pd.read_excel('./p01.xlsx').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c1 = c1.drop(columns = cols)
c1 = c1.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c1['Adrenaline'] = c1['Adrenaline'].replace(np.nan, 0)
c1['Noradrenaline'] = c1['Noradrenaline'].replace(np.nan, 0)

print(c1.shape)
c1.head()

In [None]:
#Import Consultant no.2 dataset

c2 = pd.read_csv('./p02.csv').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c2 = c2.drop(columns = cols)
c2 = c2.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c2['Adrenaline'] = c2['Adrenaline'].replace(np.nan, 0)
c2['Noradrenaline'] = c2['Noradrenaline'].replace(np.nan, 0)

print(c2.shape)
c2.head()

In [None]:
#Import Consultant no.3 dataset

c3 = pd.read_csv('./p03.csv').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c3 = c3.drop(columns = cols)
c3 = c3.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c3['Adrenaline'] = c3['Adrenaline'].replace(np.nan, 0)
c3['Noradrenaline'] = c3['Noradrenaline'].replace(np.nan, 0)

print(c3.shape)
c3.head()

In [None]:
#Import Consultant no.4 dataset

c4 = pd.read_excel('./p04.xlsx').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c4 = c4.drop(columns = cols)
c4 = c4.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c4['Adrenaline'] = c4['Adrenaline'].replace(np.nan, 0)
c4['Noradrenaline'] = c4['Noradrenaline'].replace(np.nan, 0)

print(c4.shape)
c4.head()

In [None]:
#Import Consultant no.5 dataset

c5 = pd.read_csv('./p05.csv').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c5 = c5.drop(columns = cols)
c5 = c5.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c5['Adrenaline'] = c5['Adrenaline'].replace(np.nan, 0)
c5['Noradrenaline'] = c5['Noradrenaline'].replace(np.nan, 0)

print(c5.shape)
c5.head()

In [None]:
#Import Consultant no.6 dataset

c6 = pd.read_excel('./p06.xlsx').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c6 = c6.drop(columns = cols)
c6 = c6.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c6['Adrenaline'] = c6['Adrenaline'].replace(np.nan, 0)
c6['Noradrenaline'] = c6['Noradrenaline'].replace(np.nan, 0)

print(c6.shape)
c6.head()

In [None]:
#Import Consultant no.7 dataset

c7 = pd.read_csv('./p07.csv').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c7 = c7.drop(columns = cols)
c7 = c7.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c7['Adrenaline'] = c7['Adrenaline'].replace(np.nan, 0)
c7['Noradrenaline'] = c7['Noradrenaline'].replace(np.nan, 0)

print(c7.shape)
c7.head()

In [None]:
#Import Consultant no.8 dataset

c8 = pd.read_csv('./p08.csv').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c8 = c8.drop(columns = cols)
c8 = c8.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c8['Adrenaline'] = c8['Adrenaline'].replace(np.nan, 0)
c8['Noradrenaline'] = c8['Noradrenaline'].replace(np.nan, 0)

print(c8.shape)
c8.head()

In [None]:
#Import Consultant no.9 dataset

c9 = pd.read_csv('./p09.csv').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c9 = c9.drop(columns = cols)
c9 = c9.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c9['Adrenaline'] = c9['Adrenaline'].replace(np.nan, 0)
c9['Noradrenaline'] = c9['Noradrenaline'].replace(np.nan, 0)

print(c9.shape)
c9.head()

In [None]:
#Import Consultant no.10 dataset

c10 = pd.read_csv('./p10.csv').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c10 = c10.drop(columns = cols)
c10 = c10.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c10['Adrenaline'] = c10['Adrenaline'].replace(np.nan, 0)
c10['Noradrenaline'] = c10['Noradrenaline'].replace(np.nan, 0)

print(c10.shape)
c10.head()

In [None]:
#Import Consultant no.11 dataset

c11 = pd.read_excel('./p11.xlsx').sort_values(by = ['PseudoID'], ascending=[True])
cols = ['Dobutamine','Time','Bckgrnd','PseudoID','Line of Selected Timepoint']
c11 = c11.drop(columns = cols)
c11 = c11.rename(columns={'Mean': 'MAP'}) #rename Mean to MAP

#Replace null with 0 in drug fields (as blank value indicates value=0, as confirmed by Prof Sim)
c11['Adrenaline'] = c11['Adrenaline'].replace(np.nan, 0)
c11['Noradrenaline'] = c11['Noradrenaline'].replace(np.nan, 0)

c11['Annotation'] = c11['Annotation'].str.upper()

print(c11.shape)
c11.head()

## 2. Create MV Consensus Dataset

Create MV Consensus Dataset using majority-vote labels across all 11 annotatotors.

In [None]:
c1sub = c1.iloc[:,:7]
c1sub = c1sub.rename(columns={'Annotation': 'C1'})
                            
c2sub = c2.iloc[:,:7]
c2sub = c2sub.rename(columns={'Annotation': 'C2'})

c3sub = c3.iloc[:,:7]
c3sub = c3sub.rename(columns={'Annotation': 'C3'})

c4sub = c4.iloc[:,:7]
c4sub = c4sub.rename(columns={'Annotation': 'C4'})

c5sub = c5.iloc[:,:7]
c5sub = c5sub.rename(columns={'Annotation': 'C5'})

c6sub = c6.iloc[:,:7]
c6sub = c6sub.rename(columns={'Annotation': 'C6'})

c7sub = c7.iloc[:,:7]
c7sub = c7sub.rename(columns={'Annotation': 'C7'})

c8sub = c8.iloc[:,:7]
c8sub = c8sub.rename(columns={'Annotation': 'C8'})

c9sub = c9.iloc[:,:7]
c9sub = c9sub.rename(columns={'Annotation': 'C9'})

c10sub = c10.iloc[:,:7]
c10sub = c10sub.rename(columns={'Annotation': 'C10'})

c11sub = c11.iloc[:,:7]
c11sub = c11sub.rename(columns={'Annotation': 'C11'})

In [None]:
#Majority vote

cols = ['Adrenaline','Noradrenaline','FiO2','SpO2','MAP','HR']
all_ann = c1sub.merge(c2sub,on=cols).merge(c3sub,on=cols).merge(c4sub,on=cols).merge(c5sub,on=cols).merge(c6sub,on=cols).merge(c7sub,on=cols).merge(c8sub,on=cols).merge(c9sub,on=cols).merge(c10sub,on=cols).merge(c11sub,on=cols)

all_ann['Annotation']= all_ann.mode(axis=1)[0]  #majority vote

print(all_ann.shape)
print(all_ann.Annotation.value_counts())
all_ann.head()

In [None]:
#Drop unnecessary columns

mv= all_ann.copy(deep=True)
cols = [6,7,8,9,10,11,12,13,14,15,16]
mv.drop(all_ann.columns[cols],axis=1,inplace=True)

##save majority-vote dataset
mv.to_csv("MV-Consensus-Dataset.csv")

print(mv.shape)
mv.head()