In [1]:
import numpy as np
import pandas as pd
import os
# explicitly require this experimental feature
from sklearn.experimental import enable_iterative_imputer  # noqa
# now you can import normally from sklearn.impute
from sklearn.impute import IterativeImputer, SimpleImputer
from sklearn.metrics import mean_absolute_error
from sklearn.svm import LinearSVR, SVR, SVC,LinearSVC
from sklearn.datasets import make_regression
from sklearn.preprocessing import normalize 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression, RidgeClassifier, RidgeClassifierCV
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from utils import dataset_imputer, get_score

In [None]:
# Read in data
df_training_features = pd.read_csv('train_features.csv')
df_training_labels = pd.read_csv('train_labels.csv')
all_pids = [pid for pid in df_training_features['pid'].unique()]

pids_train, pids_val = train_test_split(all_pids, test_size = 0.1)

In [12]:
X_pid_train = dataset_imputer(df_training_features, method='count', pid_list=pids_train, fillna=True)
X_pid_val = dataset_imputer(df_training_features, method='count', pid_list=pids_val, fillna=True)

0.0 %
10.0 %
20.0 %
30.0 %
40.0 %
50.0 %
60.0 %
70.0 %
80.0 %
90.0 %
100.0 % - completed

0.0 %
10.0 %
20.0 %
30.0 %
40.0 %
50.0 %
60.0 %
70.0 %
80.0 %
90.0 %
100.0 % - completed



In [13]:
Y_pid_train = dataset_imputer(df_training_labels, method=None, pid_list=pids_train, fillna=True)
Y_pid_val = dataset_imputer(df_training_labels, method=None, pid_list=pids_val, fillna=True)

100.0 % - completed

100.0 % - completed



In [21]:
#x_train = X_pid_train[['pid', 'PTT', 'HCO3', 'BaseExcess', 'PaCO2', 'FiO2', 'SaO2','Chloride', 'Hct', 'pH']]
x_train, x_val = X_pid_train.copy(), X_pid_val.copy()
y_train, y_val = Y_pid_train.copy(), Y_pid_val.copy()

feature_columns = ['pid', 'Age', 'EtCO2', 'PTT', 'BUN', 'Lactate', 'Temp', 'Hgb',
                 'HCO3', 'BaseExcess', 'RRate', 'Fibrinogen', 'Phosphate', 'WBC',
                 'Creatinine', 'PaCO2', 'AST', 'FiO2', 'Platelets', 'SaO2', 'Glucose',
                 'ABPm', 'Magnesium', 'Potassium', 'ABPd', 'Calcium', 'Alkalinephos',
                 'SpO2', 'Bilirubin_direct', 'Chloride', 'Hct', 'Heartrate',
                 'Bilirubin_total', 'TroponinI', 'ABPs', 'pH']

x_train, x_val = x_train[feature_columns], x_val[feature_columns]

label_columns = ['pid', 'LABEL_BaseExcess', 'LABEL_Fibrinogen', 'LABEL_AST',
                 'LABEL_Alkalinephos', 'LABEL_Bilirubin_total', 'LABEL_Lactate',
                 'LABEL_TroponinI', 'LABEL_SaO2', 'LABEL_Bilirubin_direct',
                 'LABEL_EtCO2']

y_train, y_val = y_train[label_columns], y_val[label_columns]

print(x_train.head())
print('*'*100)
print(y_train.head())


  pid Age EtCO2 PTT BUN Lactate Temp Hgb HCO3 BaseExcess ... Alkalinephos  \
0   2  86     0   1   1       0    3   1    0          0 ...            0   
0   4  66     0   1   2       0    3   2    0          0 ...            1   
0   8  42     0   0   1       0    3   0    0          0 ...            0   
0  10  71     0   1   1       0    2   1    0          0 ...            1   
0  13  73     0   1   2       0    5   2    2          4 ...            0   

  SpO2 Bilirubin_direct Chloride Hct Heartrate Bilirubin_total TroponinI ABPs  \
0   11                0        0   1        11               0         1   11   
0   11                1        0   2        11               1         1   11   
0   10                0        0   0        11               0         1   11   
0   11                0        0   1        11               1         1   11   
0   12                0        2   2        12               0         0   11   

  pH  
0  0  
0  0  
0  0  
0  0  
0  4  

[5 rows

In [22]:
print(x_train.shape)
print(y_train.shape)

(17095, 36)
(17095, 11)


In [23]:
# Train MLPClassifier
#regr =   KNeighborsClassifier(3)
regr = MLPClassifier(alpha=1e-5,hidden_layer_sizes=(100,100), random_state=1, solver='sgd', max_iter=200)
#regr = RidgeClassifierCV()
#regr = RandomForestClassifier()
regr.fit(x_train.iloc[:,1:], y_train.iloc[:,1:])



MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100, 100), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=1, shuffle=True, solver='sgd',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [24]:
print(x_train.head())
print(y_train.iloc[:5,1:])

  pid Age EtCO2 PTT BUN Lactate Temp Hgb HCO3 BaseExcess ... Alkalinephos  \
0   2  86     0   1   1       0    3   1    0          0 ...            0   
0   4  66     0   1   2       0    3   2    0          0 ...            1   
0   8  42     0   0   1       0    3   0    0          0 ...            0   
0  10  71     0   1   1       0    2   1    0          0 ...            1   
0  13  73     0   1   2       0    5   2    2          4 ...            0   

  SpO2 Bilirubin_direct Chloride Hct Heartrate Bilirubin_total TroponinI ABPs  \
0   11                0        0   1        11               0         1   11   
0   11                1        0   2        11               1         1   11   
0   10                0        0   0        11               0         1   11   
0   11                0        0   1        11               1         1   11   
0   12                0        2   2        12               0         0   11   

  pH  
0  0  
0  0  
0  0  
0  0  
0  4  

[5 rows

In [25]:
print(x_val.head())
print(y_val.iloc[:5,1:])

  pid Age EtCO2 PTT BUN Lactate Temp Hgb HCO3 BaseExcess ... Alkalinephos  \
0   1  34     0   0   3       0    8   3    3          6 ...            0   
0   6  66     0   1   2       2   12   6    2          7 ...            0   
0  24  85     0   1   1       2   12   2    1          7 ...            0   
0  45  53     0   0   0       0    5   0    0          0 ...            0   
0  49  79     0   1   3       2    5   3    3          0 ...            3   

  SpO2 Bilirubin_direct Chloride Hct Heartrate Bilirubin_total TroponinI ABPs  \
0   12                0        3   6        12               0         0   12   
0   12                0        2  10        12               0         0   12   
0   11                0        1   4        12               0         0   12   
0   11                0        0   0        11               0         0   11   
0   11                0        3   3        11               2         0    4   

  pH  
0  7  
0  7  
0  7  
0  0  
0  0  

[5 rows

In [27]:
f = regr.predict_proba(x_val.iloc[:,1:])
f = pd.DataFrame(f)
f.columns = y_val.columns[1:]
#f.columns = ['LABEL_BaseExcess1', 'LABEL_BaseExcess']
f['pid'] = x_val.iloc[:,0].reset_index(drop=True)

print(get_score(y_val, f, tasks=['task1'])[1])
#print(f)


0.8086742979465722


In [2]:
# TEST TIME!

df_test_features = pd.read_csv('test_features.csv')

all_pids_test = [pid for pid in df_test_features['pid'].unique()]

X_pid_test = dataset_imputer(df_test_features, method='count', pid_list=all_pids_test, fillna=True)

x_test = X_pid_test.copy()

feature_columns = ['pid', 'Age', 'EtCO2', 'PTT', 'BUN', 'Lactate', 'Temp', 'Hgb',
                 'HCO3', 'BaseExcess', 'RRate', 'Fibrinogen', 'Phosphate', 'WBC',
                 'Creatinine', 'PaCO2', 'AST', 'FiO2', 'Platelets', 'SaO2', 'Glucose',
                 'ABPm', 'Magnesium', 'Potassium', 'ABPd', 'Calcium', 'Alkalinephos',
                 'SpO2', 'Bilirubin_direct', 'Chloride', 'Hct', 'Heartrate',
                 'Bilirubin_total', 'TroponinI', 'ABPs', 'pH']

x_test= x_test[feature_columns]

print(x_test.head())
print('*'*100)

f = regr.predict_proba(x_test.iloc[:,1:])
f = pd.DataFrame(f)
f.columns = ['LABEL_BaseExcess', 'LABEL_Fibrinogen', 'LABEL_AST',
             'LABEL_Alkalinephos', 'LABEL_Bilirubin_total', 'LABEL_Lactate',
             'LABEL_TroponinI', 'LABEL_SaO2', 'LABEL_Bilirubin_direct',
             'LABEL_EtCO2']
f['pid'] = x_test.iloc[:,0].reset_index(drop=True)

f.to_csv('task1_test.csv', index=None)

0.0 %
10.0 %
20.0 %
30.0 %
40.0 %
50.0 %
60.0 %
70.0 %
80.0 %
90.0 %
100.0 %
100.0 % - completed

   pid  Age  EtCO2  PTT  BUN  Lactate  Temp  Hgb  HCO3  BaseExcess  ...  \
0    0   39      0    2    2        0     5    2     2           2  ...   
0    3   84      0    0    0        0     4    0     0           0  ...   
0    5   62      0    0    0        0     2    0     0           0  ...   
0    7   71      0    3    3        0    10    5     3           2  ...   
0    9   51      0    1    2        0     2    2     2           0  ...   

   Alkalinephos  SpO2  Bilirubin_direct  Chloride  Hct  Heartrate  \
0             2     9                 0         2    2          8   
0             0     6                 0         0    0          6   
0             0    11                 0         0    0         11   
0             3    11                 2         3    5         11   
0             0    11                 0         2    2         11   

   Bilirubin_total  TroponinI  ABPs 

NameError: name 'regr' is not defined