## Ensure columns are in correct format

In [3]:
import pandas as pd
import numpy as np
import psycopg2
from scipy.stats import chi2, norm
import matplotlib.pyplot as plt
import chart_studio.plotly as py
import plotly.graph_objs as go
import plotly.express as px
import pandas_profiling as pp
# To make pretty pandas dataframes
from IPython.display import display, HTML

## Connect to Postgres database

In [4]:
conn = psycopg2.connect(host='localhost',dbname='postgres',user='postgres',password='postgres')

## Grab static data

In [5]:
static = pd.read_sql_query(
    """
    SELECT * FROM mimic_derived.sepsis3_static_24hours
    """,
    con=conn
)



In [6]:
static.describe()

Unnamed: 0,subject_id,stay_id,sofa_score,respiration,coagulation,liver,cardiovascular,cns,renal,hadm_id,...,resp_rate_mean,temperature_min,temperature_max,temperature_mean,spo2_min,spo2_max,spo2_mean,glucose_min,glucose_max,glucose_mean
count,35010.0,35010.0,35010.0,35010.0,35010.0,35010.0,35010.0,35010.0,35010.0,35010.0,...,34968.0,33568.0,33568.0,33568.0,34968.0,34968.0,34968.0,34838.0,34838.0,34838.0
mean,14991680.0,34991810.0,3.673836,0.631962,0.496458,0.285461,1.116681,0.468238,0.675036,24997380.0,...,19.758361,36.258026,37.484067,36.871476,91.091692,99.563801,96.897251,137.960745,843.2263,276.097728
std,2879843.0,2884187.0,2.046508,1.040647,0.847443,0.794183,1.24364,0.912042,1.070962,2877899.0,...,4.107213,0.787952,0.812823,0.599082,7.344132,1.13747,2.451192,5357.182696,25613.74,7478.689481
min,10001880.0,30000480.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,20000150.0,...,8.695652,15.0,30.89,30.89,0.3,66.0,46.6,0.12,38.0,38.0
25%,12489420.0,32503020.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,22523790.0,...,16.777778,36.0,37.0,36.585,90.0,100.0,95.75,86.0,130.0,112.5
50%,14993180.0,34976420.0,3.0,0.0,0.0,0.0,1.0,0.0,0.0,25007050.0,...,19.125,36.44,37.33,36.843333,92.0,100.0,97.230769,103.0,166.0,131.5
75%,17477300.0,37481100.0,4.0,1.0,1.0,0.0,1.0,1.0,1.0,27465590.0,...,22.153846,36.67,37.89,37.18,95.0,100.0,98.533333,125.0,215.0,161.770833
max,19999840.0,39999300.0,20.0,4.0,4.0,4.0,4.0,4.0,4.0,29999620.0,...,44.433333,39.8,42.3,40.104118,100.0,100.0,100.0,999999.0,1276100.0,999999.0


In [7]:
pd.set_option('display.max_rows',200)
static.dtypes

subject_id                           int64
stay_id                              int64
antibiotic_time             datetime64[ns]
culture_time                datetime64[ns]
suspected_infection_time    datetime64[ns]
sofa_time                   datetime64[ns]
sofa_score                           int64
respiration                          int64
coagulation                          int64
liver                                int64
cardiovascular                       int64
cns                                  int64
renal                                int64
sepsis3                               bool
hadm_id                              int64
gender                              object
dod                                 object
admittime                   datetime64[ns]
dischtime                   datetime64[ns]
los_hospital                       float64
admission_age                      float64
ethnicity                           object
hospital_expire_flag                 int64
hospstay_se

- We can remove sepsis3 as all are TRUE

In [8]:
static.drop('sepsis3', axis=1, inplace=True)

- We can drop duplicate subject_ids and only keep the first stay in the icu: explained later below

- Gender is expressed as an object, but there are 2 distinct values in the whole dataset, we can express this as categorical

In [9]:
static.gender = static.gender.astype('category')

- dod is expressed as an object but we know this is a date, as specified in the DDL of the table in postgres. There are a huge number of missing values for this, as we know that absence of death means that a patient did not die, but was still discharged. I think a sensible alternative is to drop this column and instead use hospital_expire_flag. This is a binary flag which indicates whether the patient died within the given hospitalization. 1 indicates death in the hospital, and 0 indicates survival to hospital discharge. One point of confusion is the difference in number of hospital expire flags = 1, and the number of date of deaths. My suspicion is that people can only die once, and so potentially a patient who had multiple stays and ultimately died would have multiple dates of death, and only one hospital expire flag. This can be confirmed by only considering individuals on their first icu stay of their first hospital stay, which has the added bonus of being in agreement with one of the assumptions of logistic regression - that is to say that the observations are independent of each other and not coming from repeated or paired data. Hospital expire flag can be used in conjunction with the discharge date to confirm the dod. I will need to confirm this by comparing the count of the number of patients with the count of the number of patients on their first icu stay of their first hospital stay - they should be the same. 

In [10]:
num_pat = static.subject_id.unique().size
num_stays = static.stay_id.unique().size
print('Number of patients:', num_pat)
print('Number of stays:', num_stays)

Number of patients: 27139
Number of stays: 35010


In [11]:
static_temp = static.query('first_icu_stay == True')
num_pat = static_temp.subject_id.unique().size
num_stays = static_temp.stay_id.unique().size
print('Number of patients:', num_pat)
print('Number of stays:', num_stays)

Number of patients: 26319
Number of stays: 31983


This is slightly odd. We'd expect to have less stays, but not less patients. We checked to see if it was someone's first ICU stay. If a patient only has stays that are not their first, then it must mean that they had previous stays where they did not have sepsis. I think the solution to this is to take the MINIMUM of the icu_stay_seq for each patient. This way, we are looking at the first ICU stay where this patient had sepsis. This would mean we have no duplicate patients and thus satisfy our assumptions for logistic regression.

We want to group by subject_id and have an aggregation function to find the minimum of icu_stay_seq. Here we should then have a number of unique patients that matches our whole dataset. 

In [12]:
min_icu_stay = static.groupby('subject_id').icustay_seq.agg('min')
min_icu_stay = min_icu_stay.to_frame()
min_icu_stay.reset_index(inplace=True)


min_icu_stay.subject_id.unique().size
display(min_icu_stay)

Unnamed: 0,subject_id,icustay_seq
0,10001884,1
1,10002013,1
2,10002155,1
3,10002348,1
4,10002428,1
...,...,...
27134,19998878,1
27135,19999297,1
27136,19999442,1
27137,19999625,1


I again got this confused. Lets take this back a notch. We have people who have had multiple ICU stays. These multiple ICU stays can happen during one or more hospital stays. We also have people who have been in the ICU before but didn't have sepsis, and so this can be their second hospital stay or more, but the first time they have sepsis. 

So, if we take the minimum of someones hospital stay, we get the first hospital stay in which they had sepsis. 

If we **also** take the minimum icu stay, then we get the first icu stay during the first hospital stay in which they had sepsis. So all we actually need to do is do a group by aggregation function for min for both hosp stay and icu stay. That way we will have a list of unique patients, **equal** to the number of unique patients in the whole dataset, but with a number of stays equal to the number of patients, thus conforming to the assumptions of logistic regression. 

In [13]:
static_temp = static
first_sepsis_stay = static_temp.groupby(['subject_id','stay_id']).agg({'hospstay_seq':'min','icustay_seq':'min'})
display(first_sepsis_stay)

Unnamed: 0_level_0,Unnamed: 1_level_0,hospstay_seq,icustay_seq
subject_id,stay_id,Unnamed: 2_level_1,Unnamed: 3_level_1
10001884,37510196,1,1
10002013,39060235,1,1
10002155,31090461,2,1
10002155,32358465,3,1
10002155,33685454,1,1
...,...,...,...
19998878,34403689,2,1
19999297,37364566,1,1
19999442,32336619,1,1
19999625,31070865,1,1


In [14]:
static_temp = static
first_sepsis_stay = static_temp.groupby(['subject_id','hospstay_seq','icustay_seq']).nth(0)
display(first_sepsis_stay)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,stay_id,antibiotic_time,culture_time,suspected_infection_time,sofa_time,sofa_score,respiration,coagulation,liver,cardiovascular,...,resp_rate_mean,temperature_min,temperature_max,temperature_mean,spo2_min,spo2_max,spo2_mean,glucose_min,glucose_max,glucose_mean
subject_id,hospstay_seq,icustay_seq,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
10001884,1,1,37510196,2131-01-11 08:00:00,2131-01-10 16:36:00,2131-01-10 16:36:00,2131-01-11 05:00:00,3,0,0,0,3,...,17.775862,35.10,37.20,36.573684,89.0,100.0,98.259259,140.0,148.0,144.333333
10002013,1,1,39060235,2160-05-18 11:00:00,2160-05-18 14:40:00,2160-05-18 11:00:00,2160-05-18 15:00:00,2,1,0,0,1,...,15.409091,36.22,37.70,37.242353,92.0,100.0,97.050000,98.0,216.0,138.052632
10002155,1,1,33685454,2129-08-05 14:00:00,2129-08-04 17:04:00,2129-08-04 17:04:00,2129-08-05 06:00:00,2,0,0,0,1,...,16.080000,35.50,36.83,36.294286,89.0,97.0,92.640000,95.0,107.0,101.000000
10002155,2,1,31090461,2130-09-24 10:00:00,2130-09-23 20:30:00,2130-09-23 20:30:00,2130-09-24 04:00:00,3,0,0,0,1,...,16.400000,36.06,37.33,36.452857,91.0,98.0,93.840000,106.0,106.0,106.000000
10002155,3,1,32358465,2131-03-10 01:00:00,2131-03-09 22:34:00,2131-03-09 22:34:00,2131-03-10 00:00:00,2,0,0,0,1,...,19.937500,34.44,36.50,35.490000,80.0,100.0,95.687500,166.0,235.0,197.666667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19998878,2,1,34403689,2132-10-01 00:00:00,2132-10-01 00:38:00,2132-10-01 00:00:00,2132-10-01 00:00:00,3,0,2,0,0,...,13.473684,36.61,37.06,36.800000,94.0,100.0,98.842105,159.0,379.0,244.833333
19999297,1,1,37364566,2162-08-19 16:00:00,2162-08-16 00:00:00,2162-08-16 00:00:00,2162-08-16 06:00:00,7,0,3,3,0,...,19.310345,35.56,36.50,35.985714,89.0,97.0,93.866667,67.0,173.0,120.875000
19999442,1,1,32336619,2148-11-19 18:00:00,2148-11-19 17:00:00,2148-11-19 17:00:00,2148-11-19 17:00:00,3,0,1,0,0,...,17.806452,36.83,37.61,37.266000,96.0,100.0,98.964286,134.0,168.0,147.142857
19999625,1,1,31070865,2139-10-10 23:00:00,2139-10-10 16:45:00,2139-10-10 16:45:00,2139-10-10 22:00:00,2,0,0,0,1,...,18.480000,36.44,37.50,36.926667,96.0,100.0,99.600000,121.0,175.0,147.000000


In [15]:
static_temp = static
first_sepsis_stay = static_temp.groupby(['subject_id','icustay_seq'], as_index=False).nth(0)
num_pat = first_sepsis_stay.subject_id.unique().size
num_stays = first_sepsis_stay.stay_id.unique().size
print('Number of patients:', num_pat)
print('Number of stays:', num_stays)

Number of patients: 27139
Number of stays: 29277


In [16]:
static_temp = static
first_sepsis_stay = static_temp.groupby(['subject_id','icustay_seq'], as_index=False).nth(0)
first_sepsis_stay = first_sepsis_stay.groupby(['subject_id','hospstay_seq'], as_index=False).nth(0)
num_pat = first_sepsis_stay.subject_id.unique().size
num_stays = first_sepsis_stay.stay_id.unique().size
print('Number of patients:', num_pat)
print('Number of stays:', num_stays)

Number of patients: 27139
Number of stays: 27737


In [17]:
static_temp = static
static_temp.groupby(['subject_id','hospstay_seq','icustay_seq'], as_index=False).nth(0)
num_pat = first_sepsis_stay.subject_id.unique().size
num_stays = first_sepsis_stay.stay_id.unique().size
print('Number of patients:', num_pat)
print('Number of stays:', num_stays)

Number of patients: 27139
Number of stays: 27737


In [18]:
first_sepsis_stay.to_csv('first_sepsis_stay.csv')

In [19]:
static.sort_values(by=['subject_id','hospstay_seq','icustay_seq'], inplace=True)
static_temp = static.groupby('subject_id', as_index=False).first()

num_pat = static_temp.subject_id.unique().size
num_stays = static_temp.stay_id.unique().size
print('Number of patients:', num_pat)
print('Number of stays:', num_stays)

Number of patients: 27139
Number of stays: 27139


In [20]:
static = static_temp

- ethnicity can be made categorical. Can confirm there are 8 categories with significant numbers for each. Note: there are three categories: other, unknown and unable to obtain, with 0 missing. I think unknown and unable to obtain should be dropped, and other kept. Unknown and unable to obtain can be merged. 

In [21]:
static.loc[static['ethnicity'] == 'UNKNOWN','ethnicity'] = np.nan
static.loc[static['ethnicity'] == 'UNABLE TO OBTAIN','ethnicity'] = np.nan

In [22]:
static.ethnicity = static.ethnicity.astype('category')

In [23]:
static.ethnicity.value_counts()

WHITE                            18277
BLACK/AFRICAN AMERICAN            2378
OTHER                             1269
HISPANIC/LATINO                    924
ASIAN                              777
AMERICAN INDIAN/ALASKA NATIVE       53
Name: ethnicity, dtype: int64

It would be good to express the antibiotic time, culture time, suspected infection time, and sofa time as time elapsed since admission, rather than a datetime.
We should just be able to subtract the icu_admit_time
Antibiotic, culture, suspected infection and sofa are all expressed as datetime64[ns]
icu intime is the same

In [24]:
static_temp = static
static_temp.antibiotic_time = static_temp.antibiotic_time - static_temp.icu_intime
static_temp.culture_time = static_temp.culture_time - static_temp.icu_intime
static_temp.suspected_infection_time = static_temp.suspected_infection_time - static_temp.icu_intime
static_temp.sofa_time = static_temp.sofa_time - static_temp.icu_intime

static = static_temp

# Feature scaling

We want to exclude certain columns from scaling. We have our unique identifier, stay_id/subject_id, so we can drop
-

In [25]:
list(static)

['subject_id',
 'stay_id',
 'antibiotic_time',
 'culture_time',
 'suspected_infection_time',
 'sofa_time',
 'sofa_score',
 'respiration',
 'coagulation',
 'liver',
 'cardiovascular',
 'cns',
 'renal',
 'hadm_id',
 'gender',
 'dod',
 'admittime',
 'dischtime',
 'los_hospital',
 'admission_age',
 'ethnicity',
 'hospital_expire_flag',
 'hospstay_seq',
 'first_hosp_stay',
 'icu_intime',
 'icu_outtime',
 'los_icu',
 'icustay_seq',
 'first_icu_stay',
 'hematocrit_min',
 'hematocrit_max',
 'hemoglobin_min',
 'hemoglobin_max',
 'platelets_min',
 'platelets_max',
 'wbc_min',
 'wbc_max',
 'albumin_min',
 'albumin_max',
 'globulin_min',
 'globulin_max',
 'total_protein_min',
 'total_protein_max',
 'aniongap_min',
 'aniongap_max',
 'bicarbonate_min',
 'bicarbonate_max',
 'bun_min',
 'bun_max',
 'calcium_min',
 'calcium_max',
 'chloride_min',
 'chloride_max',
 'creatinine_min',
 'creatinine_max',
 'sodium_min',
 'sodium_max',
 'potassium_min',
 'potassium_max',
 'abs_basophils_min',
 'abs_basophils

In [26]:
static[['stay_id',
        'hadm_id',
        'dod',
        'admittime',
        'dischtime','hospital_expire_flag','icu_intime','culture_time','suspected_infection_time','sofa_time','sofa_score','respiration','coagulation',
        'liver',
        'cardiovascular',
        'cns',
        'renal',
        'los_hospital',
        'hospstay_seq',
        'first_hosp_stay',
        'los_icu',
        'icustay_seq',
        'first_icu_stay',
        'hematocrit_min',
        'hematocrit_max',
        'hemoglobin_min',
        'hemoglobin_max',
        'platelets_min',
        'platelets_max',
        'wbc_min',
        'wbc_max',
        'albumin_min',
        'albumin_max',
        'globulin_min',
        'globulin_max',
        'total_protein_min',
        'total_protein_max',
        'aniongap_min',
        'aniongap_max',
        'bicarbonate_min',
        'bicarbonate_max',
        'bun_min',
        'bun_max',
        'calcium_min',
        'calcium_max',
        'chloride_min',
        'chloride_max',
        'creatinine_min',
        'creatinine_max',
        'sodium_min',
        'sodium_max',
        'potassium_min',
        'potassium_max',
        'abs_basophils_min',
        'abs_basophils_max',
        'abs_eosinophils_min',
        'abs_eosinophils_max',
        'abs_lymphocytes_min',
        'abs_lymphocytes_max',
        'abs_monocytes_min',
        'abs_monocytes_max',
        'abs_neutrophils_min',
        'abs_neutrophils_max',
        'atyps_min',
        'atyps_max',
        'bands_min',
        'bands_max',
        'imm_granulocytes_min',
        'imm_granulocytes_max',
        'metas_min',
        'metas_max',
        'nrbc_min',
        'nrbc_max',
        'd_dimer_min',
        'd_dimer_max',
        'fibrinogen_min',
        'fibrinogen_max',
        'thrombin_min',
        'thrombin_max',
        'inr_min',
        'inr_max',
        'pt_min',
        'pt_max',
        'ptt_min',
        'ptt_max',
        'alt_min',
        'alt_max',
        'alp_min',
        'alp_max',
        'ast_min',
        'ast_max',
        'amylase_min',
        'amylase_max',
        'bilirubin_total_min',
        'bilirubin_total_max',
        'bilirubin_direct_min',
        'bilirubin_direct_max',
        'bilirubin_indirect_min',
        'bilirubin_indirect_max',
        'ck_cpk_min',
        'ck_cpk_max',
        'ck_mb_min',
        'ck_mb_max',
        'ggt_min',
        'ggt_max',
        'ld_ldh_min',
        'ld_ldh_max',
        'heart_rate_min',
        'heart_rate_max',
        'heart_rate_mean',
        'sbp_min',
        'sbp_max',
        'sbp_mean',
        'dbp_min',
        'dbp_max',
        'dbp_mean',
        'mbp_min',
        'mbp_max',
        'mbp_mean',
        'resp_rate_min',
        'resp_rate_max',
        'resp_rate_mean',
        'temperature_min',
        'temperature_max',
        'temperature_mean',
        'spo2_min',
        'spo2_max',
        'spo2_mean',
        'glucose_min',
        'glucose_max',
        'glucose_mean'
         ]]

Unnamed: 0,stay_id,hadm_id,dod,admittime,dischtime,hospital_expire_flag,icu_intime,culture_time,suspected_infection_time,sofa_time,...,resp_rate_mean,temperature_min,temperature_max,temperature_mean,spo2_min,spo2_max,spo2_mean,glucose_min,glucose_max,glucose_mean
0,37510196,26184834,2131-01-20,2131-01-07 20:39:00,2131-01-20 05:15:00,1,2131-01-11 04:20:05,-1 days +12:15:55,-1 days +12:15:55,0 days 00:39:55,...,17.775862,35.10,37.20,36.573684,89.0,100.0,98.259259,140.0,148.0,144.333333
1,39060235,23581541,,2160-05-18 07:45:00,2160-05-23 13:30:00,0,2160-05-18 10:00:53,0 days 04:39:07,0 days 00:59:07,0 days 04:59:07,...,15.409091,36.22,37.70,37.242353,92.0,100.0,97.050000,98.0,216.0,138.052632
2,33685454,23822395,2131-03-10,2129-08-04 12:44:00,2129-08-18 16:53:00,0,2129-08-04 12:45:00,0 days 04:19:00,0 days 04:19:00,0 days 17:15:00,...,16.080000,35.50,36.83,36.294286,89.0,97.0,92.640000,95.0,107.0,101.000000
3,32610785,22725460,,2112-11-30 22:22:00,2112-12-10 17:56:00,0,2112-11-30 23:24:00,2 days 14:22:00,2 days 14:22:00,0 days 14:36:00,...,18.600000,36.56,36.94,36.731667,90.0,97.0,93.760000,108.0,127.0,121.400000
4,38875437,28662225,,2156-04-12 14:16:00,2156-04-29 16:26:00,0,2156-04-19 18:11:19,-1 days +22:16:41,-1 days +22:16:41,0 days 00:48:41,...,21.920732,36.94,38.39,37.453333,91.0,100.0,99.196721,147.0,199.0,166.666667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27134,34403689,26489544,,2132-09-30 20:06:00,2132-10-02 12:00:00,0,2132-09-30 21:55:00,0 days 02:43:00,0 days 02:05:00,0 days 02:05:00,...,13.473684,36.61,37.06,36.800000,94.0,100.0,98.842105,159.0,379.0,244.833333
27135,37364566,21439025,2162-08-23,2162-08-14 23:55:00,2162-08-23 04:16:00,1,2162-08-16 05:48:32,-1 days +18:11:28,-1 days +18:11:28,0 days 00:11:28,...,19.310345,35.56,36.50,35.985714,89.0,97.0,93.866667,67.0,173.0,120.875000
27136,32336619,26785317,,2148-11-19 10:00:00,2148-12-04 16:25:00,0,2148-11-19 14:23:43,0 days 02:36:17,0 days 02:36:17,0 days 02:36:17,...,17.806452,36.83,37.61,37.266000,96.0,100.0,98.964286,134.0,168.0,147.142857
27137,31070865,25304202,,2139-10-10 18:06:00,2139-10-16 03:30:00,0,2139-10-10 19:18:00,-1 days +21:27:00,-1 days +21:27:00,0 days 02:42:00,...,18.480000,36.44,37.50,36.926667,96.0,100.0,99.600000,121.0,175.0,147.000000


In [28]:
features = static[['culture_time',
                   'suspected_infection_time',
                   'sofa_time',
                   'sofa_score',
                   'respiration',
                   'coagulation',
                   'first_hosp_stay',
                   'los_icu',
                   'icustay_seq',
                   'first_icu_stay',
                   'hematocrit_min',
                   'hematocrit_max',
                   'hemoglobin_min',
                   'hemoglobin_max',
                   'platelets_min',
                   'platelets_max',
                   'wbc_min',
                   'wbc_max',
                   'albumin_min',
                   'albumin_max',
                   'globulin_min',
                   'globulin_max',
                   'total_protein_min',
                   'total_protein_max',
                   'aniongap_min',
                   'aniongap_max',
                   'bicarbonate_min',
                   'bicarbonate_max',
                   'bun_min',
                   'bun_max',
                   'calcium_min',
                   'calcium_max',
                   'chloride_min',
                   'chloride_max',
                   'creatinine_min',
                   'creatinine_max',
                   'sodium_min',
                   'sodium_max',
                   'potassium_min',
                   'potassium_max',
                   'abs_basophils_min',
                   'abs_basophils_max',
                   'abs_eosinophils_min',
                   'abs_eosinophils_max',
                   'abs_lymphocytes_min',
                   'abs_lymphocytes_max',
                   'abs_monocytes_min',
                   'abs_monocytes_max',
                   'abs_neutrophils_min',
                   'abs_neutrophils_max',
                   'atyps_min',
                   'atyps_max',
                   'bands_min',
                   'bands_max',
                   'imm_granulocytes_min',
                   'imm_granulocytes_max',
                   'metas_min',
                   'metas_max',
                   'nrbc_min',
                   'nrbc_max',
                   'd_dimer_min',
                   'd_dimer_max',
                   'fibrinogen_min',
                   'fibrinogen_max',
                   'thrombin_min',
                   'thrombin_max',
                   'inr_min',
                   'inr_max',
                   'pt_min',
                   'pt_max',
                   'ptt_min',
                   'ptt_max',
                   'alt_min',
                   'alt_max',
                   'alp_min',
                   'alp_max',
                   'ast_min',
                   'ast_max',
                   'amylase_min',
                   'amylase_max',
                   'bilirubin_total_min',
                   'bilirubin_total_max',
                   'bilirubin_direct_min',
                   'bilirubin_direct_max',
                   'bilirubin_indirect_min',
                   'bilirubin_indirect_max',
                   'ck_cpk_min',
                   'ck_cpk_max',
                   'ck_mb_min',
                   'ck_mb_max',
                   'ggt_min',
                   'ggt_max',
                   'ld_ldh_min',
                   'ld_ldh_max',
                   'heart_rate_min',
                   'heart_rate_max',
                   'heart_rate_mean',
                   'sbp_min',
                   'sbp_max',
                   'sbp_mean',
                   'dbp_min',
                   'dbp_max',
                   'dbp_mean',
                   'mbp_min',
                   'mbp_max',
                   'mbp_mean',
                   'resp_rate_min',
                   'resp_rate_max',
                   'resp_rate_mean',
                   'temperature_min',
                   'temperature_max',
                   'temperature_mean',
                   'spo2_min',
                   'spo2_max',
                   'spo2_mean',
                   'glucose_min',
                   'glucose_max',
                   'glucose_mean']]


In [29]:
from sklearn.preprocessing import RobustScaler

transformer = RobustScaler().fit(features)

TypeError: float() argument must be a string or a number, not 'Timedelta'