# Intro ML - Project 2

## Load data

In [1]:
import numpy as np
import pandas as pd
train_data = pd.read_csv('train_features.csv')
labels = pd.read_csv('train_labels.csv')

## Data preprocessing

In [8]:
def calculate_time_features(data, n_samples):
    x = []
    features = [np.nanmedian, np.nanmean, np.nanvar, np.nanmin,
           np.nanmax]
    for index in range(int(data.shape[0] / n_samples)):
        patient_data = data[n_samples * index:n_samples * (index + 1), 2:]
        feature_values = np.empty((len(features), train_data_numpy[:, 2:].shape[1]))
        for i, feature in enumerate(features):
            feature_values[i] = feature(patient_data, axis=0)
        x.append(feature_values.ravel())
    ids = data[0::n_samples, 0].astype(np.uint16).copy()
    bits = np.unpackbits(
        np.expand_dims(ids, axis=1).view(np.uint8),
            axis=1)
    x = np.hstack((bits, np.array(x)))
    return x

In [10]:
train_data_numpy = train_data.to_numpy()
x_train = calculate_time_features(train_data_numpy, 12)

  if __name__ == '__main__':
  if __name__ == '__main__':
  if __name__ == '__main__':


## Learning Pipeline - Subtask 1

In [11]:
labels_ids = ['LABEL_BaseExcess', 'LABEL_Fibrinogen', 'LABEL_AST',
         'LABEL_Alkalinephos', 'LABEL_Bilirubin_total', 
         'LABEL_Lactate', 'LABEL_TroponinI', 'LABEL_SaO2',
         'LABEL_Bilirubin_direct', 'LABEL_EtCO2']
y_train = labels[labels_ids].to_numpy()

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import (GridSearchCV,
    cross_val_score, KFold)
from sklearn.multiclass import OneVsRestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler


pipeline = make_pipeline(
                    SimpleImputer(strategy='median'),
                    StandardScaler(),
                    OneVsRestClassifier(
                        MLPClassifier(
                        hidden_layer_sizes=(64, 64, 64),
                        verbose=True)))
parameter_space = {
    'onevsrestclassifier__estimator__learning_rate_init':
    np.logspace(-4, -2, 1)
}
inner_cv = KFold(n_splits=2, shuffle=True)
outer_cv = KFold(n_splits=2, shuffle=True)
classifier = GridSearchCV(pipeline, parameter_space,
                               n_jobs=-1, scoring='roc_auc',
                               iid=True,
                               refit=True,
                               cv=inner_cv,
                               verbose=True)
scores = cross_val_score(classifier, x_train, y_train,
                            cv=outer_cv,
                            scoring='roc_auc')
print("Cross-validation score is {score:.3f},"
      " standard deviation is {err:.3f}"
      .format(score = scores.mean(), err = scores.std()))
classifier = classifier.fit(x_train, y_train.ravel())

Fitting 2 folds for each of 1 candidates, totalling 2 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:  2.4min finished


Iteration 1, loss = 0.64274840
Iteration 2, loss = 0.56896656
Iteration 3, loss = 0.53228856
Iteration 4, loss = 0.50133224
Iteration 5, loss = 0.47465596
Iteration 6, loss = 0.45177469
Iteration 7, loss = 0.43369398
Iteration 8, loss = 0.41945798
Iteration 9, loss = 0.40833100
Iteration 10, loss = 0.39925023
Iteration 11, loss = 0.39153959
Iteration 12, loss = 0.38513620
Iteration 13, loss = 0.37940331
Iteration 14, loss = 0.37447019
Iteration 15, loss = 0.36982887
Iteration 16, loss = 0.36538219
Iteration 17, loss = 0.36148769
Iteration 18, loss = 0.35798073
Iteration 19, loss = 0.35429249
Iteration 20, loss = 0.35097011
Iteration 21, loss = 0.34748475
Iteration 22, loss = 0.34431427
Iteration 23, loss = 0.34117717
Iteration 24, loss = 0.33806943
Iteration 25, loss = 0.33515382
Iteration 26, loss = 0.33229305
Iteration 27, loss = 0.32940784
Iteration 28, loss = 0.32651491
Iteration 29, loss = 0.32392486
Iteration 30, loss = 0.32127641
Iteration 31, loss = 0.31854807
Iteration 32, los



Iteration 1, loss = 0.71589515
Iteration 2, loss = 0.48908256
Iteration 3, loss = 0.37960369
Iteration 4, loss = 0.32328801
Iteration 5, loss = 0.29390824
Iteration 6, loss = 0.27599881
Iteration 7, loss = 0.26335490
Iteration 8, loss = 0.25401969
Iteration 9, loss = 0.24696556
Iteration 10, loss = 0.24130611
Iteration 11, loss = 0.23656197
Iteration 12, loss = 0.23220718
Iteration 13, loss = 0.22847423
Iteration 14, loss = 0.22501969
Iteration 15, loss = 0.22184697
Iteration 16, loss = 0.21878799
Iteration 17, loss = 0.21620716
Iteration 18, loss = 0.21363840
Iteration 19, loss = 0.21128959
Iteration 20, loss = 0.20903222
Iteration 21, loss = 0.20686844
Iteration 22, loss = 0.20493945
Iteration 23, loss = 0.20284430
Iteration 24, loss = 0.20092991
Iteration 25, loss = 0.19907204
Iteration 26, loss = 0.19726760
Iteration 27, loss = 0.19556585
Iteration 28, loss = 0.19369517
Iteration 29, loss = 0.19200637
Iteration 30, loss = 0.19028558
Iteration 31, loss = 0.18885854
Iteration 32, los



Iteration 2, loss = 0.59765059
Iteration 3, loss = 0.57009081
Iteration 4, loss = 0.55106382
Iteration 5, loss = 0.53693808
Iteration 6, loss = 0.52634604
Iteration 7, loss = 0.51813915
Iteration 8, loss = 0.51192529
Iteration 9, loss = 0.50673144
Iteration 10, loss = 0.50220034
Iteration 11, loss = 0.49820462
Iteration 12, loss = 0.49479674
Iteration 13, loss = 0.49165333
Iteration 14, loss = 0.48824877
Iteration 15, loss = 0.48535864
Iteration 16, loss = 0.48266347
Iteration 17, loss = 0.47995874
Iteration 18, loss = 0.47745254
Iteration 19, loss = 0.47537911
Iteration 20, loss = 0.47298840
Iteration 21, loss = 0.47050557
Iteration 22, loss = 0.46822766
Iteration 23, loss = 0.46619347
Iteration 24, loss = 0.46415754
Iteration 25, loss = 0.46209110
Iteration 26, loss = 0.45993991
Iteration 27, loss = 0.45803707
Iteration 28, loss = 0.45603799
Iteration 29, loss = 0.45377515
Iteration 30, loss = 0.45178459
Iteration 31, loss = 0.44975915
Iteration 32, loss = 0.44781409
Iteration 33, lo



Iteration 1, loss = 0.62083451
Iteration 2, loss = 0.57541053
Iteration 3, loss = 0.55290951
Iteration 4, loss = 0.53698297
Iteration 5, loss = 0.52544869
Iteration 6, loss = 0.51643265
Iteration 7, loss = 0.50947865
Iteration 8, loss = 0.50389883
Iteration 9, loss = 0.49887346
Iteration 10, loss = 0.49458876
Iteration 11, loss = 0.49075368
Iteration 12, loss = 0.48727309
Iteration 13, loss = 0.48389492
Iteration 14, loss = 0.48077411
Iteration 15, loss = 0.47774922
Iteration 16, loss = 0.47482993
Iteration 17, loss = 0.47230339
Iteration 18, loss = 0.46959080
Iteration 19, loss = 0.46690720
Iteration 20, loss = 0.46425444
Iteration 21, loss = 0.46181398
Iteration 22, loss = 0.45942735
Iteration 23, loss = 0.45712168
Iteration 24, loss = 0.45485880
Iteration 25, loss = 0.45260810
Iteration 26, loss = 0.45020458
Iteration 27, loss = 0.44824090
Iteration 28, loss = 0.44571066
Iteration 29, loss = 0.44339413
Iteration 30, loss = 0.44122742
Iteration 31, loss = 0.43904001
Iteration 32, los



Iteration 1, loss = 0.59920141
Iteration 2, loss = 0.56833947
Iteration 3, loss = 0.54932117
Iteration 4, loss = 0.53528107
Iteration 5, loss = 0.52456178
Iteration 6, loss = 0.51619126
Iteration 7, loss = 0.50964691
Iteration 8, loss = 0.50391122
Iteration 9, loss = 0.49918485
Iteration 10, loss = 0.49516690
Iteration 11, loss = 0.49153370
Iteration 12, loss = 0.48818221
Iteration 13, loss = 0.48474202
Iteration 14, loss = 0.48198675
Iteration 15, loss = 0.47901564
Iteration 16, loss = 0.47614331
Iteration 17, loss = 0.47357077
Iteration 18, loss = 0.47107074
Iteration 19, loss = 0.46864699
Iteration 20, loss = 0.46606604
Iteration 21, loss = 0.46375423
Iteration 22, loss = 0.46158594
Iteration 23, loss = 0.45920422
Iteration 24, loss = 0.45691454
Iteration 25, loss = 0.45464791
Iteration 26, loss = 0.45239359
Iteration 27, loss = 0.45021661
Iteration 28, loss = 0.44816856
Iteration 29, loss = 0.44614757
Iteration 30, loss = 0.44403618
Iteration 31, loss = 0.44189422
Iteration 32, los



Iteration 2, loss = 0.53662555
Iteration 3, loss = 0.49572966
Iteration 4, loss = 0.46753403
Iteration 5, loss = 0.44848358
Iteration 6, loss = 0.43605188
Iteration 7, loss = 0.42747243
Iteration 8, loss = 0.42085867
Iteration 9, loss = 0.41545721
Iteration 10, loss = 0.41091615
Iteration 11, loss = 0.40712039
Iteration 12, loss = 0.40327820
Iteration 13, loss = 0.40013775
Iteration 14, loss = 0.39714277
Iteration 15, loss = 0.39400912
Iteration 16, loss = 0.39124236
Iteration 17, loss = 0.38874194
Iteration 18, loss = 0.38616358
Iteration 19, loss = 0.38411207
Iteration 20, loss = 0.38168552
Iteration 21, loss = 0.37945697
Iteration 22, loss = 0.37711695
Iteration 23, loss = 0.37507691
Iteration 24, loss = 0.37313858
Iteration 25, loss = 0.37101169
Iteration 26, loss = 0.36898047
Iteration 27, loss = 0.36722016
Iteration 28, loss = 0.36511379
Iteration 29, loss = 0.36360639
Iteration 30, loss = 0.36173428
Iteration 31, loss = 0.35943351
Iteration 32, loss = 0.35756452
Iteration 33, lo