In [None]:
import numpy as np 
import matplotlib.pyplot as plk
import pandas as pd

In [None]:
from sklearn.svm import *
from sklearn.model_selection import *
from sklearn.pipeline import *
from sklearn.impute import *
from sklearn.preprocessing import *
import sklearn.metrics as metrics

In [None]:
train_features = pd.read_csv('train_features.csv')
train_labels = pd.read_csv('train_labels.csv')
test_features = pd.read_csv('test_features.csv')

In [None]:
labels = ['LABEL_BaseExcess', 'LABEL_Fibrinogen', 'LABEL_AST',
         'LABEL_Alkalinephos', 'LABEL_Bilirubin_total', 
         'LABEL_Lactate', 'LABEL_TroponinI', 'LABEL_SaO2',
         'LABEL_Bilirubin_direct', 'LABEL_EtCO2']

labels_2 = ['LABEL_Sepsis']

labels_3 = ['LABEL_RRate','LABEL_ABPm', 'LABEL_SpO2', 'LABEL_Heartrate']

In [None]:
def process_patient(patient_data):
    avgarray = np.nanmean(patient_data,axis=0)
    minarray = np.nanmin(patient_data,axis=0)
    maxarray = np.nanmax(patient_data,axis=0)
    vararray = np.nanvar(patient_data,axis=0)
    medarray = np.nanmedian(patient_data,axis=0)
    numarray = (~np.isnan(patient_data)).sum(axis=0)
    output = np.ravel(np.array([avgarray,minarray,maxarray,vararray,medarray,numarray]))
    return output

def compute_features_for_patient(X_train):
    output = []
    nbPatient = int(X_train.shape[0] / 12)
    for i in range(nbPatient):
        patient_data = X_train[12*i:12*(i+1),:]
        output.append(process_patient(patient_data))
    return np.array(output)

In [None]:
X_train = compute_features_for_patient(np.asarray(train_features)[:,3:])
X_train = np.insert(X_train,0, np.asarray(train_features.groupby(['pid'],as_index=False,sort=False).mean())[:,2],axis=1)
X_test  = compute_features_for_patient(np.asarray(test_features)[:,3:])
X_test  = np.insert(X_test,0, np.asarray(test_features.groupby(['pid'],as_index=False,sort=False).mean())[:,2],axis=1)

  
  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.
  """
  overwrite_input=overwrite_input)


## Subtasks 1-3

In [None]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.ensemble import HistGradientBoostingRegressor

output = []

#Subtask 1
for i in labels:
    pipeline = make_pipeline(SimpleImputer(strategy = "mean"),StandardScaler(),HistGradientBoostingClassifier())
    pipeline.fit(X_train,np.asarray(train_labels[i]))
    output.append(pipeline.predict_proba(X_test)[:,1]) 

#Subtask 2
pipeline_2 = make_pipeline(SimpleImputer(strategy = "mean"),StandardScaler(),HistGradientBoostingClassifier())
pipeline_2.fit(X_train, np.asarray(train_labels[labels_2]))
output.append(pipeline_2.predict_proba(X_test)[:,1])  

#Subtask 3
for i in labels_3:
    pipeline_3 = make_pipeline(SimpleImputer(strategy = "mean"),StandardScaler(),HistGradientBoostingRegressor())
    pipeline_3.fit(X_train,np.asarray(train_labels[i]))
    output.append(pipeline_3.predict(X_test))

output = np.array(output)

  y = column_or_1d(y, warn=True)


## Processing output

In [None]:
data = pd.DataFrame(np.transpose(output), columns=labels+labels_2+labels_3)
pid = (compute_features_for_patient(np.asarray(test_features)))[:,0]


data.insert(0, 'pid', pid)
data.head(10)

  
  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.
  """
  overwrite_input=overwrite_input)


Unnamed: 0,pid,LABEL_BaseExcess,LABEL_Fibrinogen,LABEL_AST,LABEL_Alkalinephos,LABEL_Bilirubin_total,LABEL_Lactate,LABEL_TroponinI,LABEL_SaO2,LABEL_Bilirubin_direct,LABEL_EtCO2,LABEL_Sepsis,LABEL_RRate,LABEL_ABPm,LABEL_SpO2,LABEL_Heartrate
0,0.0,0.966958,0.595994,0.868515,0.893595,0.919503,0.58422,0.002319,0.23314,0.043451,0.003493,0.059665,13.630846,83.828241,98.836685,86.021261
1,10001.0,0.021409,0.038665,0.28233,0.254959,0.293267,0.045003,0.068447,0.057774,0.022696,0.015245,0.016609,17.410669,86.089578,94.876741,99.979831
2,10003.0,0.005113,0.011733,0.086139,0.048582,0.106362,0.107175,0.050824,0.180226,0.009035,0.006757,0.017355,19.053113,79.136062,98.350174,90.31085
3,10004.0,0.022827,0.019806,0.322862,0.454677,0.234211,0.049907,0.033418,0.097897,0.015519,0.040422,0.016317,16.412464,75.96759,95.940048,88.135103
4,10005.0,0.100996,0.02294,0.11325,0.112557,0.116912,0.085668,0.002136,0.048432,0.002304,0.000226,0.014138,19.397262,73.426885,95.86229,62.153426
5,10008.0,0.950417,0.030916,0.685321,0.565943,0.322593,0.512111,0.00292,0.054216,0.009933,0.001265,0.438415,17.83689,94.014372,96.935761,73.173042
6,10011.0,0.008473,0.026577,0.099776,0.116588,0.104996,0.020624,0.030772,0.045003,0.004776,0.021916,0.021584,16.457546,97.940134,98.501559,70.930516
7,10017.0,0.043217,0.163254,0.253344,0.208154,0.302394,0.153302,0.01413,0.047592,0.001979,0.039698,0.013587,20.464282,109.319428,97.888775,104.068481
8,10018.0,0.026012,0.053686,0.18791,0.185255,0.242427,0.14568,0.07633,0.33793,0.012921,0.332275,0.045221,18.742456,80.701055,94.68204,102.521953
9,10019.0,0.530934,0.014831,0.0954,0.044555,0.086392,0.117739,0.006305,0.716256,0.001664,0.000307,0.013553,17.501302,80.279763,95.816511,74.227386


In [None]:
#Output csv
data.to_csv('output.csv', index=False, header=True, float_format='%.3f')