In [13]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.metrics import roc_auc_score as auc
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectFromModel
from sklearn import svm
from xgboost import XGBRegressor
from xgboost import XGBClassifier

pd.set_option('display.max_columns', 500)

In [2]:
# Importing the Data

train_features = pd.read_csv('train_features.csv')
train_labels = pd.read_csv('train_labels.csv')
test_features = pd.read_csv('test_features.csv')

In [14]:
Y = train_labels.sort_values('pid')
X = train_features.sort_values(['pid', 'Time'])
X_test =  test_features.sort_values(['pid', 'Time'])

#all labels
x_for_y_names = ['pid', "BaseExcess", "Fibrinogen",
		"AST", "Alkalinephos", "Bilirubin_total",
		"Lactate", "TroponinI", "SaO2", 
		"Bilirubin_direct", "EtCO2", 
		'RRate', 'ABPm', 'SpO2', 'Heartrate']
x_lag = X[x_for_y_names]
x_test_lag = X_test[x_for_y_names]

In [24]:
sum(np.isnan(X['EtCO2'])) / len(X['EtCO2'])

0.9570808107396683

In [17]:
x_lag

Unnamed: 0,pid,BaseExcess,Fibrinogen,AST,Alkalinephos,Bilirubin_total,Lactate,TroponinI,SaO2,Bilirubin_direct,EtCO2,RRate,ABPm,SpO2,Heartrate
0,1,-2.0,,,,,,,,,,16.0,84.0,100.0,94.0
1,1,-2.0,,,,,,,,,,16.0,81.0,100.0,99.0
2,1,0.0,,,,,,,,,,18.0,74.0,100.0,92.0
3,1,0.0,,,,,,,,,,18.0,66.0,100.0,88.0
4,1,,,,,,,,,,,18.0,63.0,100.0,81.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
172879,31658,,,,,,,,,,,,91.0,96.0,71.0
172880,31658,,,,,,,,,,,,,,
172881,31658,,,,,,,,,,,,,,
172882,31658,,,,,,,,,,,,108.0,96.0,71.0


In [27]:
shifted = x_lag.groupby('pid').shift(+1)
shifted.columns = ["BaseExcess_lag", "Fibrinogen_lag",
		"AST_lag", "Alkalinephos_lag", "Bilirubin_total_lag",
		"Lactate_lag", "TroponinI_lag", "SaO2_lag", 
		"Bilirubin_direct_lag", "EtCO2_lag",'RRate_lag', 
		'ABPm_lag', 'SpO2_lag', 'Heartrate_lag']

shifted_T = x_test_lag.groupby('pid').shift(+1)
shifted_T.columns = ["BaseExcess_lag", "Fibrinogen_lag",
		"AST_lag", "Alkalinephos_lag", "Bilirubin_total_lag",
		"Lactate_lag", "TroponinI_lag", "SaO2_lag", 
		"Bilirubin_direct_lag", "EtCO2_lag",'RRate_lag', 
		'ABPm_lag', 'SpO2_lag', 'Heartrate_lag']

X = pd.concat([X, shifted], axis = 1)
X_test = pd.concat([X_test, shifted_T], axis = 1)

X_fit = X.groupby('pid', group_keys = False).mean()
X_test_fit = X_test.groupby('pid', group_keys = False).mean()
r_pid = pd.Series(X_fit.index.copy()).astype('int')
r_pid_T = pd.Series(X_test_fit.index.copy()).astype('int')

X_fit.index = range(X_fit.shape[0])
X_test_fit.index = range(X_test_fit.shape[0])

X_fit = pd.concat([r_pid, X_fit], axis = 1)
X_test_fit = pd.concat([r_pid_T, X_test_fit], axis = 1)

In [28]:
X_fit

Unnamed: 0,pid,Time,Age,EtCO2,PTT,BUN,Lactate,Temp,Hgb,HCO3,BaseExcess,RRate,Fibrinogen,Phosphate,WBC,Creatinine,PaCO2,AST,FiO2,Platelets,SaO2,Glucose,ABPm,Magnesium,Potassium,ABPd,Calcium,Alkalinephos,SpO2,Bilirubin_direct,Chloride,Hct,Heartrate,Bilirubin_total,TroponinI,ABPs,pH,BaseExcess_lag,Fibrinogen_lag,AST_lag,Alkalinephos_lag,Bilirubin_total_lag,Lactate_lag,TroponinI_lag,SaO2_lag,Bilirubin_direct_lag,EtCO2_lag,RRate_lag,ABPm_lag,SpO2_lag,Heartrate_lag
0,1,8.5,34.0,,,12.0,,36.750000,8.566667,25.333333,-0.666667,17.000000,,4.60,5.233333,0.500,43.333333,,0.425000,143.0,,120.000000,68.333333,1.80,4.000000,50.250000,7.6,,100.000000,,112.0,23.20,77.083333,,,114.500000,7.370000,-0.666667,,,,,,,,,,16.909091,69.727273,100.000000,77.818182
1,2,6.5,86.0,,31.8,32.0,,36.000000,13.100000,,,18.000000,,3.80,9.300000,2.120,,,,226.0,,111.000000,94.636364,2.10,3.800000,70.090909,9.5,,96.000000,,,40.20,59.000000,,0.44,131.181818,,,,,,,,0.44,,,,17.900000,94.200000,96.200000,59.000000
2,4,6.5,66.0,,34.6,8.0,,36.666667,10.550000,,,14.636364,,3.10,7.500000,0.530,,34.0,,269.0,,100.166667,80.909091,2.45,3.500000,49.600000,8.2,130.0,99.272727,0.1,,33.55,72.545455,0.6,0.02,125.454545,,,,34.0,130.0,0.6,,0.02,,0.1,,14.700000,82.800000,99.500000,72.600000
3,6,7.5,66.0,,53.8,32.0,1.8,37.166667,10.316667,19.500000,-2.857143,15.833333,,3.10,39.700000,1.350,39.000000,,0.566667,105.0,95.0,113.500000,65.750000,3.10,4.633333,48.833333,7.8,,99.333333,,113.5,30.39,87.333333,,,100.666667,7.352857,-2.857143,,,,,1.8,,95.0,,,15.272727,65.636364,99.454545,87.090909
4,8,6.5,42.0,,,18.0,,36.000000,,,,17.181818,,3.30,,6.460,,,,,,118.000000,143.900000,2.10,4.300000,108.954545,7.9,,97.800000,,,,81.181818,,0.08,204.545455,,,,,,,,0.08,,,,17.300000,142.777778,97.777778,81.200000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18990,31653,6.5,52.0,,25.8,11.0,1.7,36.000000,9.500000,22.666667,0.000000,15.545455,633.0,4.40,16.033333,0.700,49.000000,,,489.0,,137.000000,65.636364,2.20,4.866667,49.166667,7.9,,96.181818,,106.0,31.56,108.272727,,,87.333333,7.330000,0.000000,633.0,,,,1.7,,,,,14.600000,63.300000,96.500000,104.300000
18991,31654,6.5,66.0,,,33.0,,37.500000,11.200000,,,16.909091,,5.45,2.000000,10.335,,,,124.0,,87.500000,122.363636,2.10,5.700000,94.181818,8.5,,96.090909,,,34.60,92.909091,,,165.000000,,,,,,,,,,,,17.100000,123.500000,96.000000,93.200000
18992,31656,6.5,44.0,,,15.0,,38.000000,12.400000,24.000000,-3.500000,24.900000,,,24.000000,0.700,45.250000,,0.400000,345.0,,115.000000,78.600000,,4.000000,63.900000,,,100.000000,,97.0,36.70,99.100000,,,100.300000,7.300000,-3.500000,,,,,,,,,,24.900000,78.600000,100.000000,99.100000
18993,31657,6.5,70.0,,,,,36.500000,,,,15.500000,,,,,,,,,,,73.600000,,,62.900000,,,99.100000,,,,64.400000,,,110.500000,,,,,,,,,,,,15.000000,74.111111,99.111111,64.555556


In [29]:
X

Unnamed: 0,pid,Time,Age,EtCO2,PTT,BUN,Lactate,Temp,Hgb,HCO3,BaseExcess,RRate,Fibrinogen,Phosphate,WBC,Creatinine,PaCO2,AST,FiO2,Platelets,SaO2,Glucose,ABPm,Magnesium,Potassium,ABPd,Calcium,Alkalinephos,SpO2,Bilirubin_direct,Chloride,Hct,Heartrate,Bilirubin_total,TroponinI,ABPs,pH,BaseExcess_lag,Fibrinogen_lag,AST_lag,Alkalinephos_lag,Bilirubin_total_lag,Lactate_lag,TroponinI_lag,SaO2_lag,Bilirubin_direct_lag,EtCO2_lag,RRate_lag,ABPm_lag,SpO2_lag,Heartrate_lag
0,1,3,34.0,,,12.0,,36.0,8.7,24.0,-2.0,16.0,,,6.3,,45.0,,,,,,84.0,1.2,3.8,61.0,,,100.0,,114.0,24.6,94.0,,,142.0,7.33,,,,,,,,,,,,,,
1,1,4,34.0,,,,,36.0,,,-2.0,16.0,,,,,,,0.5,,,,81.0,,,62.5,,,100.0,,,,99.0,,,125.0,7.33,-2.0,,,,,,,,,,16.0,84.0,100.0,94.0
2,1,5,34.0,,,,,36.0,,,0.0,18.0,,,,,43.0,,0.4,,,,74.0,,,59.0,,,100.0,,,,92.0,,,110.0,7.37,-2.0,,,,,,,,,,16.0,81.0,100.0,99.0
3,1,6,34.0,,,,,37.0,,,0.0,18.0,,,,,,,,,,,66.0,,,49.5,,,100.0,,,,88.0,,,104.0,7.37,0.0,,,,,,,,,,18.0,74.0,100.0,92.0
4,1,7,34.0,,,,,,,,,18.0,,,,,,,,,,,63.0,1.8,,48.0,,,100.0,,,22.4,81.0,,,100.0,7.41,0.0,,,,,,,,,,18.0,66.0,100.0,88.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
172879,31658,8,60.0,,,,,37.0,,,,,,,,,,,,,,,91.0,,,70.0,,,96.0,,,,71.0,,,127.0,,,,,,,,,,,,,91.0,96.0,71.0
172880,31658,9,60.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,91.0,96.0,71.0
172881,31658,10,60.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
172882,31658,11,60.0,,,,,,,,,,,,,,,,,,,110.0,108.0,,,81.0,,,96.0,,,,71.0,,,135.0,,,,,,,,,,,,,,,


In [30]:
X_fit = X_fit.drop(['pid', 'Time'], axis = 1)
X_test_pid = X_test_fit['pid']
X_test_fit = X_test_fit.drop(['pid', 'Time'], axis = 1)


In [35]:
X_fit

Unnamed: 0,Age,EtCO2,PTT,BUN,Lactate,Temp,Hgb,HCO3,BaseExcess,RRate,Fibrinogen,Phosphate,WBC,Creatinine,PaCO2,AST,FiO2,Platelets,SaO2,Glucose,ABPm,Magnesium,Potassium,ABPd,Calcium,Alkalinephos,SpO2,Bilirubin_direct,Chloride,Hct,Heartrate,Bilirubin_total,TroponinI,ABPs,pH,BaseExcess_lag,Fibrinogen_lag,AST_lag,Alkalinephos_lag,Bilirubin_total_lag,Lactate_lag,TroponinI_lag,SaO2_lag,Bilirubin_direct_lag,EtCO2_lag,RRate_lag,ABPm_lag,SpO2_lag,Heartrate_lag
0,34.0,,,12.0,,36.750000,8.566667,25.333333,-0.666667,17.000000,,4.60,5.233333,0.500,43.333333,,0.425000,143.0,,120.000000,68.333333,1.80,4.000000,50.250000,7.6,,100.000000,,112.0,23.20,77.083333,,,114.500000,7.370000,-0.666667,,,,,,,,,,16.909091,69.727273,100.000000,77.818182
1,86.0,,31.8,32.0,,36.000000,13.100000,,,18.000000,,3.80,9.300000,2.120,,,,226.0,,111.000000,94.636364,2.10,3.800000,70.090909,9.5,,96.000000,,,40.20,59.000000,,0.44,131.181818,,,,,,,,0.44,,,,17.900000,94.200000,96.200000,59.000000
2,66.0,,34.6,8.0,,36.666667,10.550000,,,14.636364,,3.10,7.500000,0.530,,34.0,,269.0,,100.166667,80.909091,2.45,3.500000,49.600000,8.2,130.0,99.272727,0.1,,33.55,72.545455,0.6,0.02,125.454545,,,,34.0,130.0,0.6,,0.02,,0.1,,14.700000,82.800000,99.500000,72.600000
3,66.0,,53.8,32.0,1.8,37.166667,10.316667,19.500000,-2.857143,15.833333,,3.10,39.700000,1.350,39.000000,,0.566667,105.0,95.0,113.500000,65.750000,3.10,4.633333,48.833333,7.8,,99.333333,,113.5,30.39,87.333333,,,100.666667,7.352857,-2.857143,,,,,1.8,,95.0,,,15.272727,65.636364,99.454545,87.090909
4,42.0,,,18.0,,36.000000,,,,17.181818,,3.30,,6.460,,,,,,118.000000,143.900000,2.10,4.300000,108.954545,7.9,,97.800000,,,,81.181818,,0.08,204.545455,,,,,,,,0.08,,,,17.300000,142.777778,97.777778,81.200000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18990,52.0,,25.8,11.0,1.7,36.000000,9.500000,22.666667,0.000000,15.545455,633.0,4.40,16.033333,0.700,49.000000,,,489.0,,137.000000,65.636364,2.20,4.866667,49.166667,7.9,,96.181818,,106.0,31.56,108.272727,,,87.333333,7.330000,0.000000,633.0,,,,1.7,,,,,14.600000,63.300000,96.500000,104.300000
18991,66.0,,,33.0,,37.500000,11.200000,,,16.909091,,5.45,2.000000,10.335,,,,124.0,,87.500000,122.363636,2.10,5.700000,94.181818,8.5,,96.090909,,,34.60,92.909091,,,165.000000,,,,,,,,,,,,17.100000,123.500000,96.000000,93.200000
18992,44.0,,,15.0,,38.000000,12.400000,24.000000,-3.500000,24.900000,,,24.000000,0.700,45.250000,,0.400000,345.0,,115.000000,78.600000,,4.000000,63.900000,,,100.000000,,97.0,36.70,99.100000,,,100.300000,7.300000,-3.500000,,,,,,,,,,24.900000,78.600000,100.000000,99.100000
18993,70.0,,,,,36.500000,,,,15.500000,,,,,,,,,,,73.600000,,,62.900000,,,99.100000,,,,64.400000,,,110.500000,,,,,,,,,,,,15.000000,74.111111,99.111111,64.555556


In [33]:
n_class = ["LABEL_BaseExcess", "LABEL_Fibrinogen",
		"LABEL_AST", "LABEL_Alkalinephos", "LABEL_Bilirubin_total",
		"LABEL_Lactate", "LABEL_TroponinI", "LABEL_SaO2", 
		"LABEL_Bilirubin_direct", "LABEL_EtCO2", "LABEL_Sepsis"]

In [34]:
i = 0
output_classifiaction = np.zeros((X_test_fit.shape[0], len(n_class)))
for y_name in n_class:

	# Model Fitting
	# ------------------------------------------------------------------------------------------------
	rf_model = XGBClassifier()
	y_fit = Y[y_name]


	rf_model.fit(X_fit, y_fit)
	y_test_p = rf_model.predict_proba(X_test_fit)
	output_classifiaction[:,i] = y_test_p[:,1]
	i = i + 1

output_classifiaction = pd.DataFrame(output_classifiaction)
output_classifiaction.columns = n_class



