## Import libraries and other useful stuff:

In [1]:
import pandas as pd
import numpy as np
from scipy.special import expit
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from imblearn.ensemble import BalancedRandomForestClassifier
from sklearn.model_selection import cross_validate
from imblearn.under_sampling import RandomUnderSampler
from sklearn.metrics import r2_score
from sklearn.linear_model import RidgeCV
from sklearn.model_selection import train_test_split
import sklearn


## Preprocessing:

Import the data:

In [2]:
train_ft = pd.read_csv('train_features.csv', index_col = 'pid')
train_labels = pd.read_csv('train_labels.csv', index_col = 'pid')
test_ft = pd.read_csv('test_features.csv', index_col = 'pid')

In [3]:
print(train_ft.shape)

(227940, 36)


Define labels:

In [4]:
TESTS = ['LABEL_BaseExcess', 'LABEL_Fibrinogen', 'LABEL_AST', 'LABEL_Alkalinephos', 'LABEL_Bilirubin_total',
         'LABEL_Lactate', 'LABEL_TroponinI', 'LABEL_SaO2',
         'LABEL_Bilirubin_direct', 'LABEL_EtCO2']
SEPSIS = ['LABEL_Sepsis']
VITALS = ['LABEL_RRate', 'LABEL_ABPm', 'LABEL_SpO2', 'LABEL_Heartrate']

Suppress outliers using sigmoid:

In [5]:
train_ft = train_ft.copy(deep=True)
train_ft['Time'] = np.tile(np.arange(1,13),18995)
test_ft = test_ft.copy(deep=True)
test_ft['Time'] = np.tile(np.arange(1,13),12664)

In [6]:
train_ft = train_ft.apply(lambda col: expit((col - np.mean(col))/np.std(col)))
test_ft = test_ft.apply(lambda col: expit((col - np.mean(col))/np.std(col)))

In [7]:
def get_mean(df , feature):
    if np.all(np.isnan(df)):
        return np.nanmedian(train_ft[feature].values[:])
    else:
        return np.nanmean(df)

def get_std(df):
    return df.std()

def get_num_tests(df):
    return np.sum(np.isfinite(df))

def get_last(X):
    return X.iloc[-1]

In [8]:
imputer = IterativeImputer(max_iter=40, random_state=0,sample_posterior=False)

# from sklearn.impute import KNNImputer
# imputer = KNNImputer(n_neighbors=2)

# from sklearn.impute import SimpleImputer
# imputer = SimpleImputer(missing_values=np.nan, strategy='mean')


train_ft.iloc[:, :] = imputer.fit_transform(train_ft)
test_ft.iloc[:, :] = imputer.fit_transform(test_ft)
# features_test.iloc[:, :] = imputer.transform(features_test)

#print('It took the iterative imputer '+str(end-start)+' seconds to impute the data.')
print(train_ft.shape)
print(train_ft.shape)

(227940, 36)
(227940, 36)


In [9]:
train_ft

Unnamed: 0_level_0,Time,Age,EtCO2,PTT,BUN,Lactate,Temp,Hgb,HCO3,BaseExcess,...,Alkalinephos,SpO2,Bilirubin_direct,Chloride,Hct,Heartrate,Bilirubin_total,TroponinI,ABPs,pH
pid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.168926,0.153628,0.272579,0.427561,0.363786,0.449482,0.274142,0.283058,0.529029,0.454763,...,0.480561,0.698175,0.476484,0.787231,0.238997,0.631156,0.475788,0.469746,0.699190,0.377413
1,0.213564,0.153628,0.546370,0.449319,0.437191,0.518252,0.274142,0.470059,0.468346,0.454763,...,0.483090,0.698175,0.476141,0.515961,0.484948,0.694357,0.480136,0.480472,0.528222,0.377413
1,0.266219,0.153628,0.529810,0.449461,0.452410,0.488083,0.274142,0.474178,0.529328,0.573364,...,0.483401,0.698175,0.476130,0.459441,0.472766,0.604397,0.481153,0.476249,0.370168,0.509304
1,0.326467,0.153628,0.538923,0.434929,0.422818,0.452627,0.542140,0.462723,0.526108,0.573364,...,0.481972,0.698175,0.476055,0.500600,0.469115,0.549118,0.480163,0.478018,0.312320,0.509304
1,0.393047,0.153628,0.505163,0.440781,0.434893,0.448855,0.544101,0.221845,0.454448,0.537857,...,0.482348,0.698175,0.476116,0.515592,0.176616,0.450254,0.481496,0.476927,0.276646,0.639912
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9999,0.606953,0.801156,0.499879,0.504343,0.521321,0.455115,0.464290,0.483345,0.514007,0.502837,...,0.484318,0.500229,0.476211,0.512671,0.497678,0.436268,0.476211,0.477958,0.370168,0.521847
9999,0.673533,0.801156,0.500252,0.498775,0.508953,0.450621,0.467570,0.489283,0.520239,0.508085,...,0.483893,0.515458,0.476240,0.515997,0.503230,0.478442,0.475999,0.477852,0.506768,0.530633
9999,0.733781,0.801156,0.499972,0.539441,0.541195,0.495155,0.274142,0.484982,0.508074,0.496682,...,0.485036,0.530162,0.476221,0.495797,0.508755,0.436268,0.476587,0.477282,0.661856,0.524438
9999,0.786436,0.801156,0.420691,0.557516,0.526487,0.458370,0.484846,0.448586,0.557694,0.499811,...,0.491171,0.530162,0.476664,0.541854,0.487738,0.368254,0.470822,0.470351,0.528222,0.527532


In [10]:
train_ft_agg = train_ft.groupby(train_ft.index).agg(['mean','min', 'max','std', get_last])
test_ft_agg = test_ft.groupby(test_ft.index).agg(['mean','min', 'max','std', get_last])

In [11]:
train_ft_agg = train_ft_agg.drop('Time', axis = 1)
test_ft_agg = test_ft_agg.drop('Time', axis = 1)

train_ft_agg = train_ft_agg.drop(('Age', 'min'), axis = 1)
train_ft_agg = train_ft_agg.drop(('Age', 'std'), axis = 1)
train_ft_agg = train_ft_agg.drop(('Age', 'max'), axis = 1)

test_ft_agg = test_ft_agg.drop(('Age', 'min'), axis = 1)
test_ft_agg = test_ft_agg.drop(('Age', 'std'), axis = 1)
test_ft_agg = test_ft_agg.drop(('Age', 'max'), axis = 1)
train_ft_agg = train_ft_agg.drop(('Age', 'get_last'), axis = 1)
test_ft_agg = test_ft_agg.drop(('Age', 'get_last'), axis = 1)

In [12]:
corr_matrix = train_ft_agg.corr().abs()
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))
to_drop = [column for column in upper.columns if any(upper[column] > 0.8)]
print(to_drop)

train_ft_agg.drop(train_ft_agg[to_drop], axis=1,inplace=True)
test_ft_agg.drop(test_ft_agg[to_drop], axis=1,inplace=True)


[('PTT', 'std'), ('BUN', 'mean'), ('BUN', 'std'), ('BaseExcess', 'mean'), ('WBC', 'std'), ('Creatinine', 'max'), ('Creatinine', 'std'), ('FiO2', 'max'), ('FiO2', 'std'), ('SaO2', 'std'), ('Glucose', 'std'), ('ABPm', 'min'), ('ABPm', 'max'), ('ABPd', 'mean'), ('ABPd', 'min'), ('ABPd', 'max'), ('ABPd', 'std'), ('ABPd', 'get_last'), ('Alkalinephos', 'max'), ('Alkalinephos', 'std'), ('SpO2', 'min'), ('SpO2', 'std'), ('Bilirubin_direct', 'max'), ('Bilirubin_direct', 'std'), ('Hct', 'mean'), ('Hct', 'min'), ('Hct', 'max'), ('Hct', 'std'), ('Hct', 'get_last'), ('Heartrate', 'min'), ('Heartrate', 'max'), ('Heartrate', 'get_last'), ('Bilirubin_total', 'max'), ('Bilirubin_total', 'std'), ('TroponinI', 'max'), ('TroponinI', 'std'), ('ABPs', 'mean'), ('ABPs', 'min'), ('ABPs', 'max'), ('ABPs', 'get_last')]


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))


## Subtask 1

In [13]:
train_labels.sort_index(inplace=True)
test_labels = pd.DataFrame(np.zeros((np.shape(test_ft_agg)[0], len(train_labels.columns.values))), columns = train_labels.columns.values)
train_cv = train_labels.copy()

In [14]:
pid = np.sort(test_ft.index.unique())

In [15]:
print(train_ft_agg.shape)
print(test_ft_agg.shape)
print(train_labels.shape)
print(test_labels.shape)

(18995, 131)
(12664, 131)
(18995, 15)
(12664, 15)


In [16]:

from xgboost import XGBClassifier
from sklearn.datasets import load_iris
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, KFold
import xgboost as xgb

In [17]:
ypred = pd.DataFrame(columns=np.hstack(['pid', TESTS, SEPSIS, VITALS]))


In [18]:
ypred = pd.DataFrame(columns=np.hstack(['pid', TESTS, SEPSIS, VITALS]))

for i in TESTS:
       print(i)
       xtrain = train_ft_agg
       xtest = test_ft_agg
       ytrain = train_labels.loc[:,i]

       #data_dmatrix = xgb.DMatrix(data=X,label=y)
       xgbc = XGBClassifier()
       XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, learning_rate=0.1,
              max_delta_step=0, max_depth=3, min_child_weight=1, missing=None,
              n_estimators=100, n_jobs=1, nthread=None,
              objective='multi:softprob', random_state=0, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=1, seed=None, silent=None,
              subsample=1, verbosity=1) 


       param_grid = {
              "max_depth": [3, 4, 5, 7],
              "learning_rate": [0.1, 0.01, 0.05],
              "gamma": [0, 0.25, 1],
              "reg_lambda": [0, 1, 10],
              "scale_pos_weight": [1, 3, 5],
              "subsample": [0.8],
              "colsample_bytree": [0.5],
              }             

       xgbc.fit(xtrain, ytrain)

       scores = cross_val_score(xgbc, xtrain, ytrain, cv=5)
       print("Mean cross-validation score: %.2f" % scores.mean())


       kfold = KFold(n_splits=10, shuffle=True)
       kf_cv_scores = cross_val_score(xgbc, xtrain, ytrain, cv=kfold )
       print("K-fold CV average score: %.2f" % kf_cv_scores.mean())
       ypred.loc[:,i] = xgbc.predict_proba(xtest)[:,1]
        


LABEL_BaseExcess
Mean cross-validation score: 0.85
K-fold CV average score: 0.85
LABEL_Fibrinogen
Mean cross-validation score: 0.94
K-fold CV average score: 0.93
LABEL_AST
Mean cross-validation score: 0.78
K-fold CV average score: 0.78
LABEL_Alkalinephos
Mean cross-validation score: 0.78
K-fold CV average score: 0.78
LABEL_Bilirubin_total
Mean cross-validation score: 0.77
K-fold CV average score: 0.78
LABEL_Lactate
Mean cross-validation score: 0.82
K-fold CV average score: 0.82
LABEL_TroponinI
Mean cross-validation score: 0.92
K-fold CV average score: 0.92
LABEL_SaO2
Mean cross-validation score: 0.82
K-fold CV average score: 0.82
LABEL_Bilirubin_direct
Mean cross-validation score: 0.97
K-fold CV average score: 0.97
LABEL_EtCO2
Mean cross-validation score: 0.97
K-fold CV average score: 0.97


In [19]:
from xgboost import XGBRFClassifier

for i in SEPSIS:
       print(i)
       xtrain = train_ft_agg
       xtest = test_ft_agg
       ytrain = train_labels.loc[:,i]

       #data_dmatrix = xgb.DMatrix(data=X,label=y)
       xgbc = XGBRFClassifier()
       XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, learning_rate=0.1,
              max_delta_step=0, max_depth=3, min_child_weight=1, missing=None,
              n_estimators=100, n_jobs=1, nthread=None,
              objective='multi:softprob', random_state=0, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=1, seed=None, silent=None,
              subsample=1, verbosity=1) 


       param_grid = {
              "max_depth": [3, 4, 5, 7],
              "learning_rate": [0.1, 0.01, 0.05],
              "gamma": [0, 0.25, 1],
              "reg_lambda": [0, 1, 10],
              "scale_pos_weight": [1, 3, 5],
              "subsample": [0.8],
              "colsample_bytree": [0.5],
              }             
      
       xgbc.fit(xtrain, ytrain)

       scores = cross_val_score(xgbc, xtrain, ytrain, cv=5)
       print("Mean cross-validation score: %.2f" % scores.mean())


       kfold = KFold(n_splits=10, shuffle=True)
       kf_cv_scores = cross_val_score(xgbc, xtrain, ytrain, cv=kfold )
       print("K-fold CV average score: %.2f" % kf_cv_scores.mean())
       ypred.loc[:,i] = xgbc.predict_proba(xtest)[:,1]

LABEL_Sepsis
Mean cross-validation score: 0.94
K-fold CV average score: 0.94


## Subtask 3

In [20]:
import catboost as cb
from sklearn.metrics import mean_squared_error

In [21]:
X_train, X_test, y_train, y_test = train_test_split(train_ft.reset_index('pid').set_index(['pid', 'Time']).unstack(), train_labels[VITALS[0]], test_size = 0.2, random_state=5)

In [22]:
predicted_vitals = pd.DataFrame(columns=VITALS)
pred = pd.DataFrame(columns=VITALS)

In [23]:
X_train, X_test, y_train, y_test = train_test_split(train_ft_agg, train_labels, test_size = 0.2, random_state=5)

In [24]:
for label in VITALS:

    #X_train, X_test, y_train, y_test = train_test_split(train_ft_agg, train_labels[label], test_size = 0.2, random_state=5)

    train_dataset = cb.Pool(X_train, y_train[label]) 
    test_dataset = cb.Pool(X_test, y_test[label])

    model = cb.CatBoostRegressor(loss_function='RMSE')

    grid = {'iterations': [200, 400, 600],
        'learning_rate': [0.1, 0.5, 1.0],
        'depth': [4],
        'l2_leaf_reg': [0.5]}
    model.grid_search(grid, train_dataset)

    pred[label] = model.predict(X_test)
    rmse = (np.sqrt(mean_squared_error(y_test[label], pred[label])))
    r2 = r2_score(y_test[label], pred[label])
    print("Testing performance")
    print('RMSE: {:.2f}'.format(rmse))
    print('R2: {:.2f}'.format(r2))
    predicted_vitals[label] = model.predict(test_ft_agg)

    

0:	learn: 17.2738705	test: 17.2781393	best: 17.2781393 (0)	total: 79.7ms	remaining: 15.9s
1:	learn: 15.6204819	test: 15.6239896	best: 15.6239896 (1)	total: 92.8ms	remaining: 9.18s
2:	learn: 14.1390911	test: 14.1409414	best: 14.1409414 (2)	total: 99.2ms	remaining: 6.51s
3:	learn: 12.8141408	test: 12.8142498	best: 12.8142498 (3)	total: 107ms	remaining: 5.26s
4:	learn: 11.6182445	test: 11.6205691	best: 11.6205691 (4)	total: 116ms	remaining: 4.53s
5:	learn: 10.5610505	test: 10.5617833	best: 10.5617833 (5)	total: 125ms	remaining: 4.04s
6:	learn: 9.6122286	test: 9.6106126	best: 9.6106126 (6)	total: 134ms	remaining: 3.71s
7:	learn: 8.7506030	test: 8.7472508	best: 8.7472508 (7)	total: 150ms	remaining: 3.6s
8:	learn: 7.9792723	test: 7.9743426	best: 7.9743426 (8)	total: 157ms	remaining: 3.33s
9:	learn: 7.3044288	test: 7.2976057	best: 7.2976057 (9)	total: 163ms	remaining: 3.1s
10:	learn: 6.7152744	test: 6.7075931	best: 6.7075931 (10)	total: 171ms	remaining: 2.94s
11:	learn: 6.1804702	test: 6.1705

In [25]:
ypred[VITALS] = predicted_vitals

In [26]:
np.mean([0.5 + 0.5 * np.maximum(0, sklearn.metrics.r2_score(y_test[entry], pred[entry])) for entry in VITALS])

0.7545277017003393

## Performance Evaluation Metrics

In [27]:
test_ft_agg

Unnamed: 0_level_0,Age,EtCO2,EtCO2,EtCO2,EtCO2,EtCO2,PTT,PTT,PTT,PTT,...,Bilirubin_total,TroponinI,TroponinI,TroponinI,ABPs,pH,pH,pH,pH,pH
Unnamed: 0_level_1,mean,mean,min,max,std,get_last,mean,min,max,get_last,...,get_last,mean,min,get_last,std,mean,min,max,std,get_last
pid,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
0,0.195102,0.711704,0.311713,1.857353,0.481097,0.511208,0.476099,0.435325,0.542138,0.467307,...,0.481544,0.504062,0.464031,0.477190,0.086918,0.502303,0.400401,0.604680,0.047996,0.515706
3,0.792013,0.487881,0.480241,0.491990,0.004388,0.483197,0.510625,0.483227,0.558749,0.544427,...,0.488012,0.470288,0.462391,0.465032,0.202020,0.509996,0.486226,0.523866,0.010806,0.517323
5,0.497639,0.499358,0.494089,0.503020,0.002607,0.495525,0.479406,0.459254,0.503439,0.503439,...,0.478168,0.474886,0.472281,0.477304,0.112646,0.508374,0.499460,0.519891,0.006001,0.515110
7,0.632142,0.564488,0.174895,1.643828,0.360929,1.643828,0.534719,0.456209,0.987373,0.987373,...,0.502221,0.499734,0.390329,0.911657,0.150505,0.507910,0.468161,0.604680,0.034225,0.518387
9,0.335655,0.570948,0.495151,0.930675,0.161387,0.495151,0.476255,0.455227,0.517818,0.507090,...,0.489690,0.475086,0.439777,0.469979,0.087179,0.503403,0.485663,0.522157,0.012530,0.507131
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31647,0.116165,0.515830,0.512452,0.520297,0.002378,0.515039,0.453416,0.439150,0.471145,0.461026,...,0.480875,0.473332,0.470688,0.476406,0.051935,0.497669,0.484010,0.515661,0.010584,0.515661
31649,0.781750,0.498699,0.058443,0.766080,0.167178,0.058443,0.473101,0.391510,0.504066,0.391510,...,0.404815,0.469678,0.423539,0.520180,0.122543,0.443632,0.181108,0.548692,0.141348,0.277614
31651,0.673715,0.369269,-0.623551,0.599595,0.345904,-0.623551,0.475101,0.423236,0.527064,0.485653,...,0.469264,0.436543,0.325597,0.325597,0.081697,0.505956,0.367739,0.637178,0.060259,0.465105
31652,0.204893,0.517116,0.505640,0.556046,0.017471,0.552277,0.460551,0.449246,0.479147,0.462572,...,0.487409,0.471822,0.424571,0.424571,0.175279,0.503093,0.493094,0.519421,0.008625,0.518834


In [28]:
ypred['pid'] = test_ft_agg.index.get_level_values('pid')

In [29]:
test_ft_agg

Unnamed: 0_level_0,Age,EtCO2,EtCO2,EtCO2,EtCO2,EtCO2,PTT,PTT,PTT,PTT,...,Bilirubin_total,TroponinI,TroponinI,TroponinI,ABPs,pH,pH,pH,pH,pH
Unnamed: 0_level_1,mean,mean,min,max,std,get_last,mean,min,max,get_last,...,get_last,mean,min,get_last,std,mean,min,max,std,get_last
pid,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
0,0.195102,0.711704,0.311713,1.857353,0.481097,0.511208,0.476099,0.435325,0.542138,0.467307,...,0.481544,0.504062,0.464031,0.477190,0.086918,0.502303,0.400401,0.604680,0.047996,0.515706
3,0.792013,0.487881,0.480241,0.491990,0.004388,0.483197,0.510625,0.483227,0.558749,0.544427,...,0.488012,0.470288,0.462391,0.465032,0.202020,0.509996,0.486226,0.523866,0.010806,0.517323
5,0.497639,0.499358,0.494089,0.503020,0.002607,0.495525,0.479406,0.459254,0.503439,0.503439,...,0.478168,0.474886,0.472281,0.477304,0.112646,0.508374,0.499460,0.519891,0.006001,0.515110
7,0.632142,0.564488,0.174895,1.643828,0.360929,1.643828,0.534719,0.456209,0.987373,0.987373,...,0.502221,0.499734,0.390329,0.911657,0.150505,0.507910,0.468161,0.604680,0.034225,0.518387
9,0.335655,0.570948,0.495151,0.930675,0.161387,0.495151,0.476255,0.455227,0.517818,0.507090,...,0.489690,0.475086,0.439777,0.469979,0.087179,0.503403,0.485663,0.522157,0.012530,0.507131
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31647,0.116165,0.515830,0.512452,0.520297,0.002378,0.515039,0.453416,0.439150,0.471145,0.461026,...,0.480875,0.473332,0.470688,0.476406,0.051935,0.497669,0.484010,0.515661,0.010584,0.515661
31649,0.781750,0.498699,0.058443,0.766080,0.167178,0.058443,0.473101,0.391510,0.504066,0.391510,...,0.404815,0.469678,0.423539,0.520180,0.122543,0.443632,0.181108,0.548692,0.141348,0.277614
31651,0.673715,0.369269,-0.623551,0.599595,0.345904,-0.623551,0.475101,0.423236,0.527064,0.485653,...,0.469264,0.436543,0.325597,0.325597,0.081697,0.505956,0.367739,0.637178,0.060259,0.465105
31652,0.204893,0.517116,0.505640,0.556046,0.017471,0.552277,0.460551,0.449246,0.479147,0.462572,...,0.487409,0.471822,0.424571,0.424571,0.175279,0.503093,0.493094,0.519421,0.008625,0.518834


In [30]:
ypred

Unnamed: 0,pid,LABEL_BaseExcess,LABEL_Fibrinogen,LABEL_AST,LABEL_Alkalinephos,LABEL_Bilirubin_total,LABEL_Lactate,LABEL_TroponinI,LABEL_SaO2,LABEL_Bilirubin_direct,LABEL_EtCO2,LABEL_Sepsis,LABEL_RRate,LABEL_ABPm,LABEL_SpO2,LABEL_Heartrate
0,0,0.545773,0.049472,0.804899,0.817219,0.908255,0.636174,0.005180,0.538992,0.010465,0.000067,0.166717,16.248343,83.247078,98.671869,85.924025
1,3,0.138541,0.022800,0.313762,0.389698,0.240660,0.096647,0.821520,0.196580,0.007064,0.004755,0.147701,17.162941,84.169452,97.661980,90.025895
2,5,0.032409,0.531655,0.408040,0.452028,0.673608,0.044242,0.473168,0.051582,0.070491,0.002615,0.147539,18.604228,73.987263,96.191498,64.262113
3,7,0.331138,0.904108,0.942576,0.955728,0.999190,0.936625,0.017979,0.830424,0.228678,0.011778,0.272388,16.649827,89.148126,98.612310,89.239681
4,9,0.014892,0.011082,0.362638,0.264912,0.577847,0.142464,0.405961,0.019904,0.010531,0.000442,0.163829,19.848833,92.346594,96.157819,89.829481
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12659,31647,0.103219,0.239739,0.720062,0.198509,0.239441,0.452528,0.011821,0.070350,0.094071,0.002833,0.143094,16.392595,72.666071,96.912690,69.334630
12660,31649,0.140697,0.004242,0.924691,0.773840,0.174887,0.289284,0.566301,0.141191,0.011016,0.126198,0.159940,16.258791,85.554876,96.799751,92.792245
12661,31651,0.129858,0.004809,0.348076,0.072058,0.069971,0.751567,0.036221,0.476282,0.003824,0.016665,0.163714,19.702486,75.778341,98.684319,84.436048
12662,31652,0.063199,0.498029,0.666070,0.967300,0.430012,0.180696,0.022326,0.020503,0.257275,0.014452,0.153286,20.144270,95.168466,97.959198,113.886743


In [31]:

def get_score(df_true, df_submission):
    df_submission = df_submission.sort_values('pid')
    df_true = df_true.sort_values('pid')
    task1 = np.mean([sklearn.metrics.roc_auc_score(df_true[entry], df_submission[entry]) for entry in TESTS])
    task2 = sklearn.metrics.roc_auc_score(df_true['LABEL_Sepsis'], df_submission['LABEL_Sepsis'])
    task3 = np.mean([0.5 + 0.5 * np.maximum(0, sklearn.metrics.r2_score(df_true[entry], df_submission[entry])) for entry in VITALS])
    score = np.mean([task1, task2, task3])
    return score

get_score(train_labels, train_cv)

1.0

## Output

In [32]:
test_labels

Unnamed: 0,LABEL_BaseExcess,LABEL_Fibrinogen,LABEL_AST,LABEL_Alkalinephos,LABEL_Bilirubin_total,LABEL_Lactate,LABEL_TroponinI,LABEL_SaO2,LABEL_Bilirubin_direct,LABEL_EtCO2,LABEL_Sepsis,LABEL_RRate,LABEL_ABPm,LABEL_SpO2,LABEL_Heartrate
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12659,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12660,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12661,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12662,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [33]:

#output
output = np.c_[pid, test_labels]
output = pd.DataFrame(output)

#print(output)

output.columns = ['pid', 'LABEL_BaseExcess', 'LABEL_Fibrinogen', 'LABEL_AST', 'LABEL_Alkalinephos', 
                             'LABEL_Bilirubin_total', 'LABEL_Lactate', 'LABEL_TroponinI', 'LABEL_SaO2', 
                             'LABEL_Bilirubin_direct', 'LABEL_EtCO2', 'LABEL_Sepsis',
                             'LABEL_RRate', 'LABEL_ABPm', 'LABEL_SpO2', 'LABEL_Heartrate']

output.to_csv('prediction.zip', index=False, float_format='%.3f', compression='zip')

In [34]:
ypred.to_csv('prediction.zip', index=False, float_format='%.3f', compression='zip')

In [35]:
ypred

Unnamed: 0,pid,LABEL_BaseExcess,LABEL_Fibrinogen,LABEL_AST,LABEL_Alkalinephos,LABEL_Bilirubin_total,LABEL_Lactate,LABEL_TroponinI,LABEL_SaO2,LABEL_Bilirubin_direct,LABEL_EtCO2,LABEL_Sepsis,LABEL_RRate,LABEL_ABPm,LABEL_SpO2,LABEL_Heartrate
0,0,0.545773,0.049472,0.804899,0.817219,0.908255,0.636174,0.005180,0.538992,0.010465,0.000067,0.166717,16.248343,83.247078,98.671869,85.924025
1,3,0.138541,0.022800,0.313762,0.389698,0.240660,0.096647,0.821520,0.196580,0.007064,0.004755,0.147701,17.162941,84.169452,97.661980,90.025895
2,5,0.032409,0.531655,0.408040,0.452028,0.673608,0.044242,0.473168,0.051582,0.070491,0.002615,0.147539,18.604228,73.987263,96.191498,64.262113
3,7,0.331138,0.904108,0.942576,0.955728,0.999190,0.936625,0.017979,0.830424,0.228678,0.011778,0.272388,16.649827,89.148126,98.612310,89.239681
4,9,0.014892,0.011082,0.362638,0.264912,0.577847,0.142464,0.405961,0.019904,0.010531,0.000442,0.163829,19.848833,92.346594,96.157819,89.829481
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12659,31647,0.103219,0.239739,0.720062,0.198509,0.239441,0.452528,0.011821,0.070350,0.094071,0.002833,0.143094,16.392595,72.666071,96.912690,69.334630
12660,31649,0.140697,0.004242,0.924691,0.773840,0.174887,0.289284,0.566301,0.141191,0.011016,0.126198,0.159940,16.258791,85.554876,96.799751,92.792245
12661,31651,0.129858,0.004809,0.348076,0.072058,0.069971,0.751567,0.036221,0.476282,0.003824,0.016665,0.163714,19.702486,75.778341,98.684319,84.436048
12662,31652,0.063199,0.498029,0.666070,0.967300,0.430012,0.180696,0.022326,0.020503,0.257275,0.014452,0.153286,20.144270,95.168466,97.959198,113.886743
