In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm

np.random.seed(1337)
np.set_printoptions(suppress=True)
pd.set_option('display.max_columns', None)  # or 1000
pd.set_option('display.max_rows', None)  # or 1000

# Data

In [2]:
# load the training data from the respective filepaths
train_features_filepath = 'data/train_features.csv'
test_features_filepath = 'data/test_features.csv'
train_labels_filepath = 'data/train_labels.csv'

# create the dataframes
train_features_df = pd.read_csv(train_features_filepath, float_precision="round_trip")
test_features_df = pd.read_csv(test_features_filepath, float_precision="round_trip")
train_labels_df = pd.read_csv(train_labels_filepath, float_precision="round_trip")
print("Train shape:", train_features_df.shape, "| Train label shape:", train_labels_df.shape, "| Test shape:", test_features_df.shape)
display(train_features_df.head(40))
display(train_labels_df.head())
display(test_features_df.head(30))

Train shape: (227940, 37) | Train label shape: (18995, 16) | Test shape: (151968, 37)


Unnamed: 0,pid,Time,Age,EtCO2,PTT,BUN,Lactate,Temp,Hgb,HCO3,BaseExcess,RRate,Fibrinogen,Phosphate,WBC,Creatinine,PaCO2,AST,FiO2,Platelets,SaO2,Glucose,ABPm,Magnesium,Potassium,ABPd,Calcium,Alkalinephos,SpO2,Bilirubin_direct,Chloride,Hct,Heartrate,Bilirubin_total,TroponinI,ABPs,pH
0,1,3,34.0,,,12.0,,36.0,8.7,24.0,-2.0,16.0,,,6.3,,45.0,,,,,,84.0,1.2,3.8,61.0,,,100.0,,114.0,24.6,94.0,,,142.0,7.33
1,1,4,34.0,,,,,36.0,,,-2.0,16.0,,,,,,,0.5,,,,81.0,,,62.5,,,100.0,,,,99.0,,,125.0,7.33
2,1,5,34.0,,,,,36.0,,,0.0,18.0,,,,,43.0,,0.4,,,,74.0,,,59.0,,,100.0,,,,92.0,,,110.0,7.37
3,1,6,34.0,,,,,37.0,,,0.0,18.0,,,,,,,,,,,66.0,,,49.5,,,100.0,,,,88.0,,,104.0,7.37
4,1,7,34.0,,,,,,,,,18.0,,,,,,,,,,,63.0,1.8,,48.0,,,100.0,,,22.4,81.0,,,100.0,7.41
5,1,8,34.0,,,,,37.0,,,,16.0,,,,,,,0.4,,,,68.0,1.8,,51.0,,,100.0,,,22.4,82.0,,,106.0,
6,1,9,34.0,,,,,37.0,,,,18.0,,,,,,,,,,,65.0,,,46.0,,,100.0,,,,67.0,,,112.0,
7,1,10,34.0,,,,,37.0,,,,18.0,,,,,,,,,,,68.0,,,47.0,,,100.0,,,,62.0,,,121.0,
8,1,11,34.0,,,12.0,,,8.5,26.0,,12.0,,4.6,4.7,0.5,,,,143.0,,120.0,67.0,2.1,4.1,47.0,7.6,,100.0,,111.0,23.8,58.0,,,118.0,
9,1,12,34.0,,,12.0,,38.0,8.5,26.0,0.0,18.0,,,4.7,,42.0,,0.4,,,,62.0,2.1,4.1,44.0,,,100.0,,111.0,23.8,66.0,,,110.0,7.39


Unnamed: 0,pid,LABEL_BaseExcess,LABEL_Fibrinogen,LABEL_AST,LABEL_Alkalinephos,LABEL_Bilirubin_total,LABEL_Lactate,LABEL_TroponinI,LABEL_SaO2,LABEL_Bilirubin_direct,LABEL_EtCO2,LABEL_Sepsis,LABEL_RRate,LABEL_ABPm,LABEL_SpO2,LABEL_Heartrate
0,1,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,12.1,85.4,100.0,59.9
1,10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.8,100.6,95.5,85.5
2,100,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,16.5,88.3,96.5,108.1
3,1000,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,19.4,77.2,98.3,80.9
4,10000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.6,76.8,97.7,95.3


Unnamed: 0,pid,Time,Age,EtCO2,PTT,BUN,Lactate,Temp,Hgb,HCO3,BaseExcess,RRate,Fibrinogen,Phosphate,WBC,Creatinine,PaCO2,AST,FiO2,Platelets,SaO2,Glucose,ABPm,Magnesium,Potassium,ABPd,Calcium,Alkalinephos,SpO2,Bilirubin_direct,Chloride,Hct,Heartrate,Bilirubin_total,TroponinI,ABPs,pH
0,0,1,39.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,0,2,39.0,,44.2,17.0,,36.0,10.2,13.0,,,147.0,6.0,17.5,2.2,,32.0,0.6,194.0,,273.0,77.0,2.2,4.6,76.0,8.0,119.0,100.0,,98.0,31.0,82.0,21.8,,119.0,
2,0,3,39.0,,,,,,,,-9.0,13.0,,,,,26.0,,0.55,,,,78.0,,,72.5,,,100.0,,,,78.0,,,125.0,7.34
3,0,4,39.0,,,,,,,,,12.0,,,,,,,0.5,,,,87.0,,,66.0,,,100.0,,,,80.0,,,136.0,
4,0,5,39.0,,,,,,,,,,,,,,,,,,,,86.0,,,65.0,,,100.0,,,,83.0,,,135.0,
5,0,6,39.0,,,,,36.0,,,,10.0,,,,,,,,,,,89.0,,,66.0,,,100.0,,,,88.0,,,144.0,
6,0,7,39.0,,38.5,20.0,,,9.1,16.0,,12.0,,4.8,18.5,2.4,,31.0,0.5,193.0,,162.0,,2.5,3.7,,8.3,109.0,100.0,,102.0,25.9,,26.4,,,
7,0,8,39.0,,,,,36.0,,,-4.0,12.0,,,,,30.0,,0.5,,,,80.0,,,59.0,,,100.0,,,,90.0,,,129.0,7.4
8,0,9,39.0,,,,,36.0,,,,12.0,,,,,,,,,,,75.0,,,56.5,,,100.0,,,,90.0,,,121.0,
9,0,10,39.0,,,,,36.0,,,,11.0,,,,,,,,,,,74.0,,,55.0,,,100.0,,,,85.0,,,120.0,


## Grouping

* groupby the `'pid'` column values
* ignore `'Time'` col

In [3]:
def group_columns_in_df(df):
    return df.loc[:, df.columns != 'Time'].groupby('pid', as_index=False, sort=False).agg(np.nanmean)

X_df = group_columns_in_df(train_features_df)
display(X_df.shape)
display(X_df.head(30))

X_test_df = group_columns_in_df(test_features_df)

(18995, 36)

Unnamed: 0,pid,Age,EtCO2,PTT,BUN,Lactate,Temp,Hgb,HCO3,BaseExcess,RRate,Fibrinogen,Phosphate,WBC,Creatinine,PaCO2,AST,FiO2,Platelets,SaO2,Glucose,ABPm,Magnesium,Potassium,ABPd,Calcium,Alkalinephos,SpO2,Bilirubin_direct,Chloride,Hct,Heartrate,Bilirubin_total,TroponinI,ABPs,pH
0,1,34.0,,,12.0,,36.75,8.566667,25.333333,-0.666667,17.0,,4.6,5.233333,0.5,43.333333,,0.425,143.0,,120.0,68.333333,1.8,4.0,50.25,7.6,,100.0,,112.0,23.2,77.083333,,,114.5,7.37
1,10,71.0,,27.8,12.0,,36.0,14.6,,,18.090909,,2.5,11.5,0.82,,20.0,,207.0,,152.0,101.727273,1.5,3.2,83.272727,8.6,68.0,98.0,,,42.1,78.818182,1.3,0.01,132.909091,
2,100,68.0,,20.9,21.0,,36.25,12.5,27.0,,14.833333,,3.5,12.5,1.1,,,,204.0,,243.0,81.833333,1.7,3.6,62.833333,9.0,,96.5,,101.0,36.8,109.083333,,,117.0,
3,1000,79.0,31.863636,,22.0,3.855,36.818182,9.2,,,12.0,,1.9,19.6,0.96,44.0,,0.4,158.0,98.0,128.625,83.454545,2.0,3.966667,62.818182,3.463333,,98.818182,,,27.3,86.363636,,,141.909091,7.3
4,10000,76.0,,28.55,22.0,,36.75,10.7,25.5,1.5,12.090909,,,7.75,1.0,44.5,,0.5,135.0,98.25,121.75,69.090909,1.4,3.9,48.227273,,,98.545455,,103.5,30.3,77.090909,,,123.0,7.39
5,10002,73.0,19.0,31.3,18.0,3.005,37.0,10.4,,,19.625,161.0,3.0,10.3,0.98,40.5,41.0,0.8,83.0,,127.166667,69.818182,2.1,4.475,48.5,3.12,38.0,99.181818,,107.0,30.3,67.090909,0.8,,132.090909,7.375
6,10006,51.0,,,,,37.5,,,,18.888889,,,,,,,,,,200.5,70.555556,,,48.714286,,,96.666667,,,,82.0,,,117.888889,
7,10007,60.0,,,,,38.0,,,,21.909091,,2.4,,,,,,,,87.0,108.181818,1.6,3.6,88.363636,,,94.909091,,,,79.909091,,0.08,139.363636,
8,10009,69.0,,86.05,15.0,,37.25,12.2,21.0,,22.5,,3.1,8.7,0.5,,17.0,,182.0,,109.0,65.909091,1.2,4.1,65.227273,8.1,64.0,97.090909,,104.0,34.0,97.727273,0.8,,90.909091,
9,1001,36.0,,31.2,10.0,1.8,37.666667,10.4,31.0,7.5,13.363636,,2.1,8.7,0.5,40.25,,0.416667,205.0,,98.666667,85.909091,2.0,3.1,69.181818,8.5,,100.0,,105.0,29.8,106.727273,,,113.090909,7.515


## Mean value imputation

In [4]:
def mean_impute_df(df):
    return df.fillna(df.mean())

X_df = mean_impute_df(X_df)
display(X_df.head(30))

X_test_df = mean_impute_df(X_test_df)

Unnamed: 0,pid,Age,EtCO2,PTT,BUN,Lactate,Temp,Hgb,HCO3,BaseExcess,RRate,Fibrinogen,Phosphate,WBC,Creatinine,PaCO2,AST,FiO2,Platelets,SaO2,Glucose,ABPm,Magnesium,Potassium,ABPd,Calcium,Alkalinephos,SpO2,Bilirubin_direct,Chloride,Hct,Heartrate,Bilirubin_total,TroponinI,ABPs,pH
0,1,34.0,32.829817,38.801443,12.0,2.383041,36.75,8.566667,25.333333,-0.666667,17.0,272.937922,4.6,5.233333,0.5,43.333333,158.859112,0.425,143.0,93.310435,120.0,68.333333,1.8,4.0,50.25,7.6,96.675671,100.0,1.170195,112.0,23.2,77.083333,1.544023,6.414359,114.5,7.37
1,10,71.0,32.829817,27.8,12.0,2.383041,36.0,14.6,23.801594,-0.789992,18.090909,272.937922,2.5,11.5,0.82,40.940753,20.0,0.67129,207.0,93.310435,152.0,101.727273,1.5,3.2,83.272727,8.6,68.0,98.0,1.170195,105.925903,42.1,78.818182,1.3,0.01,132.909091,7.375355
2,100,68.0,32.829817,20.9,21.0,2.383041,36.25,12.5,27.0,-0.789992,14.833333,272.937922,3.5,12.5,1.1,40.940753,158.859112,0.67129,204.0,93.310435,243.0,81.833333,1.7,3.6,62.833333,9.0,96.675671,96.5,1.170195,101.0,36.8,109.083333,1.544023,6.414359,117.0,7.375355
3,1000,79.0,31.863636,38.801443,22.0,3.855,36.818182,9.2,23.801594,-0.789992,12.0,272.937922,1.9,19.6,0.96,44.0,158.859112,0.4,158.0,98.0,128.625,83.454545,2.0,3.966667,62.818182,3.463333,96.675671,98.818182,1.170195,105.925903,27.3,86.363636,1.544023,6.414359,141.909091,7.3
4,10000,76.0,32.829817,28.55,22.0,2.383041,36.75,10.7,25.5,1.5,12.090909,272.937922,3.618139,7.75,1.0,44.5,158.859112,0.5,135.0,98.25,121.75,69.090909,1.4,3.9,48.227273,7.738113,96.675671,98.545455,1.170195,103.5,30.3,77.090909,1.544023,6.414359,123.0,7.39
5,10002,73.0,19.0,31.3,18.0,3.005,37.0,10.4,23.801594,-0.789992,19.625,161.0,3.0,10.3,0.98,40.5,41.0,0.8,83.0,93.310435,127.166667,69.818182,2.1,4.475,48.5,3.12,38.0,99.181818,1.170195,107.0,30.3,67.090909,0.8,6.414359,132.090909,7.375
6,10006,51.0,32.829817,38.801443,23.000573,2.383041,37.5,10.748832,23.801594,-0.789992,18.888889,272.937922,3.618139,11.561759,1.469968,40.940753,158.859112,0.67129,210.708276,93.310435,200.5,70.555556,1.995272,4.113012,48.714286,7.738113,96.675671,96.666667,1.170195,105.925903,32.005177,82.0,1.544023,6.414359,117.888889,7.375355
7,10007,60.0,32.829817,38.801443,23.000573,2.383041,38.0,10.748832,23.801594,-0.789992,21.909091,272.937922,2.4,11.561759,1.469968,40.940753,158.859112,0.67129,210.708276,93.310435,87.0,108.181818,1.6,3.6,88.363636,7.738113,96.675671,94.909091,1.170195,105.925903,32.005177,79.909091,1.544023,0.08,139.363636,7.375355
8,10009,69.0,32.829817,86.05,15.0,2.383041,37.25,12.2,21.0,-0.789992,22.5,272.937922,3.1,8.7,0.5,40.940753,17.0,0.67129,182.0,93.310435,109.0,65.909091,1.2,4.1,65.227273,8.1,64.0,97.090909,1.170195,104.0,34.0,97.727273,0.8,6.414359,90.909091,7.375355
9,1001,36.0,32.829817,31.2,10.0,1.8,37.666667,10.4,31.0,7.5,13.363636,272.937922,2.1,8.7,0.5,40.25,158.859112,0.416667,205.0,93.310435,98.666667,85.909091,2.0,3.1,69.181818,8.5,96.675671,100.0,1.170195,105.0,29.8,106.727273,1.544023,6.414359,113.090909,7.515


## Scaling / normalization

In [5]:
from sklearn import preprocessing

# Scale the dataframe
def scale_df(df):
    scaler = preprocessing.MinMaxScaler()
    df.loc[:, df.columns != 'pid'] = scaler.fit_transform(df.loc[:, df.columns != 'pid'])

scale_df(X_df)
display(X_df.head(30))

scale_df(X_test_df)

Unnamed: 0,pid,Age,EtCO2,PTT,BUN,Lactate,Temp,Hgb,HCO3,BaseExcess,RRate,Fibrinogen,Phosphate,WBC,Creatinine,PaCO2,AST,FiO2,Platelets,SaO2,Glucose,ABPm,Magnesium,Potassium,ABPd,Calcium,Alkalinephos,SpO2,Bilirubin_direct,Chloride,Hct,Heartrate,Bilirubin_total,TroponinI,ABPs,pH
0,1,0.223529,0.253665,0.093957,0.041199,0.088892,0.489691,0.253268,0.43295,0.477124,0.167539,0.228567,0.264033,0.013268,0.009569,0.383142,0.015854,0.000425,0.059965,0.897084,0.130575,0.267541,0.203125,0.232955,0.164146,0.336735,0.022161,1.0,0.054752,0.565445,0.242224,0.369278,0.031121,0.014556,0.383223,0.615063
1,10,0.658824,0.253665,0.046761,0.041199,0.088892,0.412371,0.54902,0.397738,0.474706,0.178962,0.228567,0.133056,0.029465,0.017225,0.355641,0.001546,0.000671,0.087575,0.897084,0.171946,0.501481,0.15625,0.142045,0.40068,0.387755,0.014656,0.972603,0.054752,0.47004,0.598492,0.382,0.025862,0.0,0.494322,0.621784
2,100,0.623529,0.253665,0.01716,0.074906,0.088892,0.438144,0.446078,0.471264,0.474706,0.144852,0.228567,0.195426,0.03205,0.023923,0.355641,0.015854,0.000671,0.086281,0.897084,0.289593,0.362115,0.1875,0.1875,0.254278,0.408163,0.022161,0.952055,0.054752,0.39267,0.498586,0.603944,0.031121,0.014556,0.39831,0.621784
3,1000,0.752941,0.242929,0.093957,0.078652,0.151707,0.49672,0.284314,0.397738,0.474706,0.115183,0.228567,0.095634,0.050401,0.020574,0.390805,0.015854,0.0004,0.066437,0.969231,0.141726,0.373472,0.234375,0.229167,0.254169,0.12568,0.022161,0.983811,0.054752,0.47004,0.31951,0.437333,0.031121,0.014556,0.548637,0.527197
4,10000,0.717647,0.253665,0.049979,0.078652,0.088892,0.489691,0.357843,0.436782,0.519608,0.116135,0.228567,0.202795,0.019773,0.021531,0.396552,0.015854,0.0005,0.056514,0.973077,0.132838,0.272849,0.140625,0.221591,0.149658,0.343781,0.022161,0.980075,0.054752,0.431937,0.37606,0.369333,0.031121,0.014556,0.43452,0.640167
5,10002,0.682353,0.1,0.061776,0.06367,0.115434,0.515464,0.343137,0.397738,0.474706,0.195026,0.121037,0.164241,0.026363,0.021053,0.350575,0.003709,0.0008,0.034081,0.897084,0.139841,0.277943,0.25,0.286932,0.151612,0.108163,0.006805,0.988792,0.054752,0.486911,0.37606,0.296,0.015086,0.014556,0.489384,0.621339
6,10006,0.423529,0.253665,0.093957,0.082399,0.088892,0.56701,0.360237,0.397738,0.474706,0.187318,0.228567,0.202795,0.029625,0.032774,0.355641,0.015854,0.000671,0.089175,0.897084,0.234648,0.283109,0.233636,0.245797,0.153146,0.343781,0.022161,0.954338,0.054752,0.47004,0.408203,0.405333,0.031121,0.014556,0.403675,0.621784
7,10007,0.529412,0.253665,0.093957,0.082399,0.088892,0.618557,0.360237,0.397738,0.474706,0.218943,0.228567,0.126819,0.029625,0.032774,0.355641,0.015854,0.000671,0.089175,0.897084,0.087912,0.546698,0.171875,0.1875,0.437145,0.343781,0.022161,0.930262,0.054752,0.47004,0.408203,0.39,0.031121,0.000159,0.533275,0.621784
8,10009,0.635294,0.253665,0.296654,0.052434,0.088892,0.541237,0.431373,0.333333,0.474706,0.225131,0.228567,0.170478,0.022228,0.009569,0.355641,0.001236,0.000671,0.07679,0.897084,0.116354,0.250559,0.109375,0.244318,0.271425,0.362245,0.013609,0.960149,0.054752,0.439791,0.445806,0.520667,0.015086,0.014556,0.240851,0.621784
9,1001,0.247059,0.253665,0.061347,0.033708,0.064011,0.584192,0.343137,0.563218,0.637255,0.129462,0.228567,0.108108,0.022228,0.009569,0.347701,0.015854,0.000417,0.086713,0.897084,0.102995,0.390667,0.234375,0.130682,0.29975,0.382653,0.022161,1.0,0.054752,0.455497,0.366635,0.586667,0.031121,0.014556,0.374719,0.797071


# SVM Training

In [6]:
X_df = X_df.iloc[:, :]
train_labels_df = train_labels_df.iloc[:, :]

# Prepare train set
# Assert that the pids are matching in train features / train labels dfs
assert(X_df.iloc[:, 0].equals(train_labels_df.iloc[:, 0]))

X = X_df.iloc[:, 1:].to_numpy()
y = train_labels_df.iloc[:, 1:].to_numpy()

assert(X.shape[0] == y.shape[0])

In [7]:
from sklearn.multioutput import MultiOutputRegressor
from sklearn.svm import SVR

model = MultiOutputRegressor(SVR(verbose=True))

model.fit(X, y)

[LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM]

MultiOutputRegressor(estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3,
                                   epsilon=0.1, gamma='scale', kernel='rbf',
                                   max_iter=-1, shrinking=True, tol=0.001,
                                   verbose=True),
                     n_jobs=None)

# Predictions

In [8]:
def predict(df):
    # Create X_test by removing 'pid' column
    X_test = df.iloc[:, 1:].to_numpy()
    
    # Predict
    predictions = model.predict(X_test)
    predictions[:, :-4] = np.divide(1, 1+np.exp(-predictions[:, :-4]))
    
    # Create predictions df
    predict_labels_df = pd.DataFrame(columns=train_labels_df.columns)
    predict_labels_df[['pid']] = df[['pid']]
    predict_labels_df.loc[:, predict_labels_df.columns != 'pid'] = predictions
    
    return predict_labels_df

## Train set

In [9]:
prediction_labels_df = predict(X_df.iloc[:, :])

print(prediction_labels_df.shape)
display(prediction_labels_df.head(15))
display(train_labels_df.head(15))

(18995, 16)


Unnamed: 0,pid,LABEL_BaseExcess,LABEL_Fibrinogen,LABEL_AST,LABEL_Alkalinephos,LABEL_Bilirubin_total,LABEL_Lactate,LABEL_TroponinI,LABEL_SaO2,LABEL_Bilirubin_direct,LABEL_EtCO2,LABEL_Sepsis,LABEL_RRate,LABEL_ABPm,LABEL_SpO2,LABEL_Heartrate
0,1,0.56062,0.524714,0.523915,0.523785,0.524225,0.526291,0.524789,0.5277,0.524437,0.523085,0.525008,17.685578,73.467487,98.495299,80.118046
1,10,0.523718,0.52352,0.519347,0.519076,0.51927,0.525304,0.517547,0.526422,0.524878,0.524728,0.524995,18.113467,96.190364,97.257727,79.30255
2,100,0.515548,0.524636,0.524884,0.525026,0.524804,0.521702,0.524781,0.51998,0.524927,0.523992,0.524872,16.97173,80.944109,96.449393,101.973637
3,1000,0.485813,0.52425,0.52081,0.520846,0.520719,0.570326,0.524799,0.543804,0.524224,0.527349,0.524891,16.071778,83.584656,97.229498,86.018041
4,10000,0.512307,0.524582,0.52399,0.524129,0.523985,0.520449,0.524978,0.514505,0.524659,0.5238,0.524901,15.718971,73.098764,97.502918,78.66199
5,10002,0.515064,0.523704,0.510684,0.510007,0.509881,0.551629,0.524891,0.531889,0.523805,0.534041,0.524792,19.817797,74.033993,97.220365,73.136345
6,10006,0.534374,0.524766,0.523922,0.523835,0.52379,0.524387,0.524877,0.52457,0.524851,0.523638,0.524873,18.851122,74.006127,96.55007,81.304211
7,10007,0.512544,0.524822,0.526272,0.526584,0.526365,0.523466,0.518614,0.52389,0.524886,0.524701,0.524938,20.194433,101.166784,95.881992,78.692426
8,10009,0.519988,0.522401,0.512995,0.51235,0.513102,0.522574,0.524474,0.520518,0.524049,0.520738,0.524727,20.826442,69.555718,96.734918,93.771414
9,1001,0.629378,0.524313,0.525915,0.525704,0.525744,0.529491,0.524548,0.52322,0.524152,0.520738,0.525034,16.33911,84.586043,98.789683,99.731446


Unnamed: 0,pid,LABEL_BaseExcess,LABEL_Fibrinogen,LABEL_AST,LABEL_Alkalinephos,LABEL_Bilirubin_total,LABEL_Lactate,LABEL_TroponinI,LABEL_SaO2,LABEL_Bilirubin_direct,LABEL_EtCO2,LABEL_Sepsis,LABEL_RRate,LABEL_ABPm,LABEL_SpO2,LABEL_Heartrate
0,1,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,12.1,85.4,100.0,59.9
1,10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.8,100.6,95.5,85.5
2,100,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,16.5,88.3,96.5,108.1
3,1000,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,19.4,77.2,98.3,80.9
4,10000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.6,76.8,97.7,95.3
5,10002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.5,67.4,99.1,64.6
6,10006,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.2,84.9,96.8,90.5
7,10007,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,23.8,97.0,94.3,76.0
8,10009,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,18.9,72.0,95.3,91.5
9,1001,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,14.9,83.3,99.9,117.3


In [10]:
prediction_labels_df.to_csv('data/prediction_train.csv', index=False, float_format='%.3f')

In [11]:
train_labels_df.to_csv('data/prediction_gold.csv', index=False, float_format='%.3f')

## Test set

In [12]:
prediction_test_labels_df = predict(X_test_df)

print(prediction_test_labels_df.shape)
display(prediction_test_labels_df.head(15))

(12664, 16)


Unnamed: 0,pid,LABEL_BaseExcess,LABEL_Fibrinogen,LABEL_AST,LABEL_Alkalinephos,LABEL_Bilirubin_total,LABEL_Lactate,LABEL_TroponinI,LABEL_SaO2,LABEL_Bilirubin_direct,LABEL_EtCO2,LABEL_Sepsis,LABEL_RRate,LABEL_ABPm,LABEL_SpO2,LABEL_Heartrate
0,0,0.635368,0.552015,0.713145,0.713503,0.714193,0.568709,0.525712,0.524767,0.528037,0.520184,0.526465,20.581073,89.891861,99.0401,80.570799
1,10001,0.533355,0.522622,0.533915,0.534506,0.533872,0.527232,0.528636,0.534813,0.524062,0.525291,0.525054,26.82066,96.967766,93.502543,97.205909
2,10003,0.554645,0.521691,0.529569,0.530117,0.529823,0.524164,0.52868,0.535659,0.524083,0.5246,0.524827,27.22547,88.875091,97.948389,87.088184
3,10004,0.517985,0.51996,0.514113,0.515369,0.514084,0.526606,0.528832,0.526043,0.524295,0.524311,0.524883,24.057048,79.116047,94.552077,83.079637
4,10005,0.466833,0.521062,0.530189,0.530709,0.529927,0.523998,0.528881,0.512056,0.523595,0.525325,0.524774,28.810381,84.095443,95.246103,57.767978
5,10008,0.668711,0.523537,0.533731,0.535264,0.534352,0.529078,0.529607,0.544254,0.523724,0.52415,0.52509,27.720951,101.60767,97.41466,70.19223
6,10011,0.546549,0.52352,0.529305,0.530243,0.52985,0.525893,0.527998,0.525592,0.524323,0.525066,0.525047,24.351484,105.052455,98.407813,68.908579
7,10017,0.599732,0.520257,0.519542,0.520077,0.519711,0.529916,0.52789,0.538421,0.523504,0.525859,0.525133,30.859022,113.992944,97.156083,93.86055
8,10018,0.503948,0.524636,0.533262,0.534127,0.534146,0.523028,0.52839,0.532404,0.524004,0.525556,0.524612,28.802224,86.907541,93.480368,95.907003
9,10019,0.523424,0.521625,0.530384,0.53095,0.53095,0.529732,0.529065,0.532215,0.523717,0.52564,0.524876,25.253278,91.122413,95.287792,67.893686


In [13]:
prediction_test_labels_df.to_csv('data/prediction.csv', index=False, float_format='%.3f')