## Import Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.utils import resample

## Import Tables

In [2]:
# # Doug's Path
diagnoses_icd = pd.read_csv('./data/mimic-iv-0.4/hosp/diagnoses_icd.csv.gz', compression='gzip')
d_icd_diagnoses = pd.read_csv('./data/mimic-iv-0.4/hosp/d_icd_diagnoses.csv.gz', compression='gzip')
admissions = pd.read_csv('./data/mimic-iv-0.4/core/admissions.csv.gz', compression='gzip')
d_labitems = pd.read_csv('./data/mimic-iv-0.4/hosp/d_labitems.csv.gz', compression='gzip')

# Stephen's Path
# path = "D:\\Bootcamp\\MIMIC IV\\"
# admissions = pd.read_csv(path + "core\\admissions.csv.gz", compression='gzip')
# diagnoses_icd = pd.read_csv(path + "hosp\\diagnoses_icd.csv.gz", compression='gzip')
# d_icd_diagnoses = pd.read_csv(path + "hosp\\d_icd_diagnoses.csv.gz", compression='gzip')
# d_labitems = pd.read_csv(path + "hosp\\d_labitems.csv.gz", compression='gzip')

## Pull codes related to Congestive Heart Failure

In [3]:
# Congestive Heart Failure
chf_codes = list(map(str, d_icd_diagnoses[(d_icd_diagnoses['long_title'].str.lower().str.contains('congestive')) & (d_icd_diagnoses['long_title'].str.lower().str.contains('heart'))]['icd_code']))

# kidney_list = list(d_icd_diagnoses[(d_icd_diagnoses['long_title'].str.lower().str.contains('kidney')) & (d_icd_diagnoses['long_title'].str.lower().str.contains('failure'))]['icd_code'])
# pd.DataFrame(diagnoses_icd[diagnoses_icd['icd_code'].isin(kidney_list)].icd_code.value_counts()).reset_index().head(20).merge(d_icd_diagnoses, left_index = True, left_on = "index", right_on = 'icd_code', how = 'left')

# ICD Codes we care about
# 5849, N179, 5845, N170
# akf_codes = ['5849   ', 'N179   ', '5845   ', 'N170   ']
chf_codes

['39891  ',
 '4280   ',
 'I5020  ',
 'I5021  ',
 'I5022  ',
 'I5023  ',
 'I5030  ',
 'I5031  ',
 'I5032  ',
 'I5033  ',
 'I5040  ',
 'I5041  ',
 'I5042  ',
 'I5043  ']

## Start filtering down to only records where AKF is True

In [5]:
diagnoses_icd_chf = diagnoses_icd[diagnoses_icd['icd_code'].isin(chf_codes)]


In [6]:
diagnoses_icd_chf.head()

Unnamed: 0,subject_id,hadm_id,seq_num,icd_code,icd_version
886,16569548,24870770,12,4280,9
1050,18460230,25771608,2,4280,9
2342,19661870,22135027,3,4280,9
2683,18932584,23973570,10,4280,9
2961,16723797,28913496,30,4280,9


## Generate new Admissions table with hospital stay duration and CHF flag

In [7]:
admissions_chf = admissions[['subject_id', 'hadm_id', 'admittime', 'dischtime', 'ethnicity']]

admissions_chf['CHF_subj'] = np.where(admissions_chf.subject_id.isin(diagnoses_icd_chf.subject_id), 1, 0)
admissions_chf['CHF_adm'] = np.where(admissions_chf.hadm_id.isin(diagnoses_icd_chf.hadm_id), 1, 0)
admissions_chf['time_spent'] = pd.to_datetime(admissions_chf['dischtime']) - pd.to_datetime(admissions_chf['admittime'])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


In [8]:
admissions_chf.head(10)

Unnamed: 0,subject_id,hadm_id,admittime,dischtime,ethnicity,CHF_subj,CHF_adm,time_spent
0,12427812,21593330,2184-01-06 11:51:00,2184-01-10 11:45:00,UNKNOWN,0,0,3 days 23:54:00
1,14029832,22059088,2120-01-18 01:28:00,2120-01-20 16:13:00,OTHER,0,0,2 days 14:45:00
2,14495017,22484010,2175-01-28 15:41:00,2175-01-29 16:00:00,WHITE,0,0,1 days 00:19:00
3,13676048,23865469,2193-01-19 05:27:00,2193-01-24 18:59:00,WHITE,0,0,5 days 13:32:00
4,13831972,27763544,2131-01-27 04:03:00,2131-01-27 05:39:00,WHITE,0,0,0 days 01:36:00
5,18523038,25414328,2142-08-26 17:14:00,2142-08-27 10:00:00,WHITE,0,0,0 days 16:46:00
6,16705931,20580522,2174-10-24 11:30:00,2174-10-24 18:45:00,WHITE,0,0,0 days 07:15:00
7,19963742,24951506,2171-07-04 15:58:00,2171-07-05 13:37:00,UNKNOWN,0,0,0 days 21:39:00
8,10903424,22568585,2181-01-31 13:09:00,2181-01-31 13:42:00,WHITE,0,0,0 days 00:33:00
9,15030422,21975876,2149-09-21 23:54:00,2149-09-22 03:59:00,WHITE,0,0,0 days 04:05:00


## Sample Target Patients for Modeling

In [9]:
target_sample = admissions_chf[['subject_id', 'CHF_subj']].drop_duplicates()

target_sample = resample(target_sample, n_samples = 10000, replace = False, stratify = target_sample.CHF_subj, random_state = 0)

print('Number of unique Patients: ', target_sample.subject_id.nunique())
print('\nSample Breakdown:')
print(target_sample.CHF_subj.value_counts())

Number of unique Patients:  10000

Sample Breakdown:
0    9179
1     821
Name: CHF_subj, dtype: int64


In [27]:
# identify first akf visit for NON-AKF patients
sample_non = target_sample[target_sample.CHF_subj==0]
admissions_non = admissions[admissions.subject_id.isin(sample_non.subject_id)]
admissions_non['admittime'] = pd.to_datetime(admissions_non.admittime)
first_non = admissions_non.loc[admissions_non.groupby('subject_id').admittime.idxmin()]
# sanity checks
print(first_non.shape)                 # number of rows
print(first_non.subject_id.nunique())  # matches number of patients
print(first_non.hadm_id.nunique())     # matches number of visits

# pull out hospital visits for target non-akf samples
sample_non = sample_non.merge(first_non, how='inner', on='subject_id')
sample_non = sample_non[['subject_id', 'hadm_id', 'CHF_subj']]
sample_non = sample_non.rename({'CHF_subj':'CHF'}, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


(9179, 15)
9179
9179


In [28]:
# identify first chf visit for all CHF patients
sample_chf = target_sample[target_sample.CHF_subj==1]
first_chf = admissions_chf[admissions_chf.subject_id.isin(sample_chf.subject_id)]
first_chf['admittime'] = pd.to_datetime(first_chf.admittime)
first_chf = first_chf.loc[first_chf[first_chf.CHF_adm==1].groupby('subject_id').admittime.idxmin()]
first_chf = first_chf[['subject_id', 'hadm_id', 'CHF_adm']]
# sanity checks
print(np.mean(first_chf.CHF_adm))   # make sure everyone has AKF
print(first_chf.shape)                 # make sure number of rows...
print(first_chf.subject_id.nunique())  # ...matches number of patients
print(first_chf.hadm_id.nunique())     # ...matches number of visits

# pull out hospital visits for target akf samples
sample_chf = sample_chf.merge(first_chf, how='inner', on='subject_id')
sample_chf = sample_chf[['subject_id', 'hadm_id', 'CHF_adm']]
sample_chf = sample_chf.rename({'CHF_adm':'CHF'}, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


1.0
(821, 3)
821
821


In [29]:
# merge back together and write to file
admissions_sample = pd.concat([sample_chf, sample_non])
admissions_sample = admissions_sample.reset_index(drop=True)
print(admissions_sample.shape)                  # sanity check
print(admissions_sample.subject_id.nunique())   # sanity check

(10000, 3)
10000


Generate pickle file so we all work on the exact same data

In [30]:
admissions_sample.to_pickle('admissions_sample_chf.pkl')

In [13]:
# admissions_sample = pd.read_pickle("admissions_sample.pkl")

## Pull Chart and Lab data only where it matches our sample patient list

In [16]:
# %%time #run this for next time

# chunksize = 3*(10 ** 6)
# counter=0
# chartevents_sample = []
# for chunk in pd.read_csv('./data/mimic-iv-0.4/icu/chartevents.csv.gz', compression='gzip', chunksize=chunksize):
#     chartevents_sample.append(chunk[chunk['subject_id'].isin(list(admissions_sample.subject_id))])
#     counter+=1
#     print(f'chunk {counter} processed')

# chartevents_final = pd.concat(chartevents_sample)


In [15]:
# chartevents_final.to_pickle("chartevents_final.pkl")

In [16]:
# chartevents_final = pd.read_pickle("chartevents_final.pkl")

In [17]:
chartevents_final.shape

(7602370, 10)

## Load Lab Events table

In [17]:
# %%time
chunksize = 3*(10 ** 6)
counter=0
lab_events_sample = []
for chunk in pd.read_csv('./data/mimic-iv-0.4/hosp/labevents.csv.gz', compression='gzip', chunksize=chunksize):
    lab_events_sample.append(chunk[chunk['subject_id'].isin(list(admissions_sample.subject_id))])
    counter+=1
    print(f'chunk {counter} processed')

lab_events_final = pd.concat(lab_events_sample)
# 3min 52s

chunk 1 processed
chunk 2 processed
chunk 3 processed


  interactivity=interactivity, compiler=compiler, result=result)


chunk 4 processed
chunk 5 processed
chunk 6 processed
chunk 7 processed
chunk 8 processed
chunk 9 processed


  interactivity=interactivity, compiler=compiler, result=result)


chunk 10 processed


  interactivity=interactivity, compiler=compiler, result=result)


chunk 11 processed
chunk 12 processed
chunk 13 processed
chunk 14 processed
chunk 15 processed


  interactivity=interactivity, compiler=compiler, result=result)


chunk 16 processed
chunk 17 processed
chunk 18 processed
chunk 19 processed
chunk 20 processed
chunk 21 processed
chunk 22 processed
chunk 23 processed


  interactivity=interactivity, compiler=compiler, result=result)


chunk 24 processed
chunk 25 processed
chunk 26 processed
chunk 27 processed
chunk 28 processed
chunk 29 processed
chunk 30 processed
chunk 31 processed
chunk 32 processed
chunk 33 processed
chunk 34 processed
chunk 35 processed
chunk 36 processed
chunk 37 processed
chunk 38 processed
chunk 39 processed
chunk 40 processed
chunk 41 processed


In [18]:
lab_events_final.to_pickle("lab_events_final_chf.pkl")

In [20]:
# lab_events_final = pd.read_pickle("lab_events_final.pkl")

In [19]:
lab_events_final.shape

(4573368, 15)

## Manipulating the Data

Take only records related to our sample patients

Filter lab events based on first hospital visit of our sample population

In [22]:
admissions_sample = pd.read_pickle("admissions_sample.pkl")

In [21]:
# lab_events_sampled = lab_events_final.copy()
%time lab_events_sampled = lab_events_final[lab_events_final['hadm_id'].isin(admissions_sample.hadm_id)]


CPU times: user 2.22 s, sys: 3.44 s, total: 5.66 s
Wall time: 11.7 s


Check how many patients have had each test done

In [22]:
lab_events_sampled.groupby('itemid')['subject_id'].nunique().sort_values(ascending = False).head(10)

itemid
51221    6187
51265    6093
51222    6040
51301    6038
51248    6034
51249    6034
51277    6034
51279    6034
51250    6034
50971    5548
Name: subject_id, dtype: int64

### Most common tests performed on patients with CHF

In [31]:
lab_events_sampled[lab_events_sampled['subject_id'].isin(list(admissions_sample[admissions_sample['CHF'] == 1].subject_id))].groupby('itemid')['subject_id'].nunique().sort_values(ascending = False).head(10)


itemid
50971    799
50912    797
50983    797
51006    796
50902    796
50868    792
50882    792
51265    789
51221    789
50931    788
Name: subject_id, dtype: int64

In [33]:
# admissions_sample.sample(50)

Item Black List

In [40]:
item_black_list = [50920, 51466, 51486, 51487, 51464, 51484, 51478, 51514, 51492, 51506, 51491]
# 50920 - test results are text in comments - need to come back and figure out how to handle this
# 51466 - measures blood in urine; value and comments sometimes differ; need to add mapping
# 51486 - same as above, leukocytes in the urine
# 51487 - same as above, nitrite in the urine
# 51464 - same as above, bilirubin in the urine
# 51484 - same as above, ketone in the urine
# 51478 - same as above, glucose in the urine
# 51514 - same as above, urobilinogen in the urine
# 51492 - same as above, protein in the urine
# 51506 - same as above, urine appearance

Take only the X most commonly performed tests

In [35]:
top_test_num = 30

lab_events_valid = lab_events_sampled[~lab_events_sampled['itemid'].isin(item_black_list)]

# On patients with AKF
itemid_sub_sample = lab_events_valid[lab_events_valid['subject_id'].isin(list(admissions_sample[admissions_sample['CHF'] == 1].subject_id))].groupby('itemid')['subject_id'].nunique().sort_values(ascending = False).head(top_test_num).reset_index().rename(columns = {'index' : 'itemid'}).itemid.to_list()
# On all sample patients
# itemid_sub_sample = lab_events_sampled.groupby('itemid')['subject_id'].nunique().sort_values(ascending = False).head(top_test_num).reset_index().rename(columns = {'index' : 'itemid'}).itemid.to_list()

lab_events_sampled_sub = lab_events_sampled[lab_events_sampled['itemid'].isin(itemid_sub_sample)]

lab_events_sampled_sub


Unnamed: 0,labevent_id,subject_id,hadm_id,specimen_id,itemid,charttime,storetime,value,valuenum,valueuom,ref_range_lower,ref_range_upper,flag,priority,comments
23611,30566894,12469262,25365926.0,10015871,51237,2177-02-09 06:25:00,2177-02-09 07:18:00,1.8,1.80,,0.9,1.1,abnormal,ROUTINE,
23612,30566895,12469262,25365926.0,10015871,51274,2177-02-09 06:25:00,2177-02-09 07:18:00,19.2,19.20,sec,9.4,12.5,abnormal,ROUTINE,
23613,30566896,12469262,25365926.0,39375115,51221,2177-02-09 06:25:00,2177-02-09 07:09:00,23.9,23.90,%,36.0,48.0,abnormal,ROUTINE,
23614,30566897,12469262,25365926.0,39375115,51222,2177-02-09 06:25:00,2177-02-09 07:09:00,7.7,7.70,g/dL,12.0,16.0,abnormal,ROUTINE,
23615,30566898,12469262,25365926.0,39375115,51248,2177-02-09 06:25:00,2177-02-09 07:09:00,37.4,37.40,pg,27.0,32.0,abnormal,ROUTINE,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122235567,29776052,12408953,21986775.0,92842480,51250,2156-08-18 06:31:00,2156-08-18 07:27:00,93,93.00,fL,82.0,98.0,,ROUTINE,
122235568,29776053,12408953,21986775.0,92842480,51265,2156-08-18 06:31:00,2156-08-18 07:27:00,403,403.00,K/uL,150.0,440.0,,ROUTINE,
122235569,29776054,12408953,21986775.0,92842480,51277,2156-08-18 06:31:00,2156-08-18 07:27:00,17.4,17.40,%,10.5,15.5,abnormal,ROUTINE,
122235570,29776055,12408953,21986775.0,92842480,51279,2156-08-18 06:31:00,2156-08-18 07:27:00,3.09,3.09,m/uL,4.2,5.4,abnormal,ROUTINE,


In [36]:
lab_events_sampled_sub[lab_events_sampled_sub['itemid'] == 50920].comments.value_counts()

Series([], Name: comments, dtype: int64)

## Code to look into specific test values

In [29]:
# lab_events_sampled_sub.groupby(['subject_id', 'hadm_id', 'itemid']).count()


In [30]:
lab_events_sampled_sub_tiny = lab_events_sampled_sub[(lab_events_sampled_sub['subject_id'] == 19988493) & (lab_events_sampled_sub['hadm_id'] == 25600709.0) & (lab_events_sampled_sub['itemid'].isin([51237]))]#.valuenum.describe()
# lab_events_sampled_sub.loc[(lab_events_sampled_sub['subject_id'] == 19988493) & (lab_events_sampled_sub['hadm_id'] == 25600709.0) & (lab_events_sampled_sub['itemid'].isin([51478, 51484])),]



In [31]:
# lab_events_sampled_sub_tiny['comments'] = lab_events_sampled_sub_tiny['comments'].replace(np.nan, 'NA')

lab_events_sampled_sub_tiny


Unnamed: 0,labevent_id,subject_id,hadm_id,specimen_id,itemid,charttime,storetime,value,valuenum,valueuom,ref_range_lower,ref_range_upper,flag,priority,comments
116765206,124205250,19988493,25600709.0,29865343,51237,2116-09-26 23:36:00,2116-09-27 00:26:00,1.1,1.1,,0.9,1.1,,STAT,
116765231,124205276,19988493,25600709.0,1870784,51237,2116-09-27 13:35:00,2116-09-27 14:09:00,1.2,1.2,,0.9,1.1,abnormal,STAT,
116765292,124205338,19988493,25600709.0,57401788,51237,2116-09-28 03:53:00,2116-09-28 05:02:00,1.0,1.0,,0.9,1.1,,ROUTINE,
116765394,124205451,19988493,25600709.0,75955093,51237,2116-09-30 06:20:00,2116-09-30 08:16:00,1.0,1.0,,0.9,1.1,,ROUTINE,


## Special updates for Urine sample tests

In [32]:
print(lab_events_sampled_sub[(lab_events_sampled_sub['itemid'] == 51484)].value.unique())

print(lab_events_sampled_sub[(lab_events_sampled_sub['itemid'] == 51478)].value.unique())


[]
[]


In [33]:
# lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51237) & (lab_events_sampled_sub['value'].isin(['>21.8', '>19.2', 'ERROR', 'UNABLE TO REPORT'])), ]


In [37]:
# Other invalid value imputation
# For now set them all to normal/most common value
lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51237) & (lab_events_sampled_sub['value'].isin([np.nan, 'ERROR', 'UNABLE TO REPORT'])), ['value', 'valuenum']] = 1.1
lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51237) & (lab_events_sampled_sub['value'].isin(['>21.8'])), ['value', 'valuenum']] = 22
lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51237) & (lab_events_sampled_sub['value'].isin(['>19.2'])), ['value', 'valuenum']] = 20

lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51275) &  (lab_events_sampled_sub['value'].isin(['>150'])), ['value', 'valuenum']] = 175

# Should we just delete these?
# For 5k patients there are 6 records here
lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51274) &  (lab_events_sampled_sub['value'].isin(['ERROR'])), ['value', 'valuenum']] = 11

# Should we just delete these?
# For 5k patients there are 1 records here
lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51275) &  (lab_events_sampled_sub['value'].isin(['UNABLE TO REPORT'])), ['value', 'valuenum']] = 31

# Clumsy..
lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51275) &  (lab_events_sampled_sub['value'].isin(['34..3'])), ['value', 'valuenum']] = 34.3


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [38]:
# Used to investigate bad data

lab_events_sampled_sub[lab_events_sampled_sub['value'] == ' ']


Unnamed: 0,labevent_id,subject_id,hadm_id,specimen_id,itemid,charttime,storetime,value,valuenum,valueuom,ref_range_lower,ref_range_upper,flag,priority,comments
26820103,123256387,19913465,28293201.0,90246682,51491,2167-08-04 17:00:00,2167-08-04 17:51:00,,,units,5.0,8.0,,STAT,UNABLE TO REPORT.
119090733,1874983,10153439,22115349.0,60839603,51491,2121-03-14 21:17:00,2121-03-15 07:46:00,,,units,5.0,8.0,,ROUTINE,UNABLE TO REPORT.


In [36]:
# Used to investigate bad data
pd.set_option('display.max_rows', 60)

lab_events_sampled_sub[lab_events_sampled_sub['itemid'] == 51498].value.value_counts().reset_index().sort_values(by = 'index')


Unnamed: 0,index,value
44,,3
56,1.000,1
45,1.001,3
38,1.002,6
33,1.003,9
23,1.004,30
19,1.005,38
16,1.006,53
15,1.007,59
7,1.008,79


In [37]:
# lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51466)]['value'].map({'' : 0.0, 'NEG' : 0.0, 'TR' : 1.0, 'SM' : 2.0, 'MOD' : 3.0, 'LG' : 4.0, 'LGE' : 4.0}).fillna(0.0)
# lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51466)]['value']
# = lab_events_sampled_sub['GarageFinish'].map({'NA' : 0.0, 'Unf' : 1.0, 'RFn' : 2.0, 'Fin' : 3.0})


In [44]:
lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51484) & (lab_events_sampled_sub['value'].isin([np.nan, ' ', 'TR', 'NEG'])), ['value', 'valuenum']] = 0
lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51478) & (lab_events_sampled_sub['value'].isin([np.nan, ' ', 'TR', 'NEG'])), ['value', 'valuenum']] = 0

lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51484) & (lab_events_sampled_sub['ref_range_lower'].isnull()), 'ref_range_lower'] = 0
lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51478) & (lab_events_sampled_sub['ref_range_lower'].isnull()), 'ref_range_lower'] = 0

lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51484) & (lab_events_sampled_sub['ref_range_upper'].isnull()), 'ref_range_upper'] = 0
lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51478) & (lab_events_sampled_sub['ref_range_upper'].isnull()), 'ref_range_upper'] = 0

# What are good replacement values for these?
lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51484) & (lab_events_sampled_sub['value'].isin(['>80'])), ['value', 'valuenum']] = 120
lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51478) & (lab_events_sampled_sub['value'].isin(['>1000'])), ['value', 'valuenum']] = 1250

lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51498) & (lab_events_sampled_sub['value'].isin(['<1.005'])), ['value', 'valuenum']] = 1.000
lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51498) & (lab_events_sampled_sub['value'].isin(['<=1.005'])), ['value', 'valuenum']] = 1.000
lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51498) & (lab_events_sampled_sub['value'].isin(['>1.030'])), ['value', 'valuenum']] = 1.035
lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51498) & (lab_events_sampled_sub['value'].isin(['>1.035'])), ['value', 'valuenum']] = 1.040
lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51498) & (lab_events_sampled_sub['value'].isin(['>1.050'])), ['value', 'valuenum']] = 1.055
lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51498) & (lab_events_sampled_sub['value'].isin(['>=1.035'])), ['value', 'valuenum']] = 1.040
lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51498) & (lab_events_sampled_sub['value'].isin([' '])), ['value', 'valuenum']] = 1.015

lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51491) & (lab_events_sampled_sub['value'].isin([' '])), ['value', 'valuenum']] = 6.0

# lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51484) & (lab_events_sampled_sub['value'].astype(float) > 0), 'flag'] = 'abnormal'
# lab_events_sampled_sub.loc[(lab_events_sampled_sub['itemid'] == 51478) & (lab_events_sampled_sub['value'].astype(float) > 0), 'flag'] = 'abnormal'


In [39]:
# Check on differences between value and valuenum columns

lab_events_sampled_sub.loc[((lab_events_sampled_sub['value'].astype(str) + str('.0')) != lab_events_sampled_sub['valuenum'].astype(str)) & ((lab_events_sampled_sub['value'].astype(str) + str('0')) != lab_events_sampled_sub['valuenum'].astype(str)) & ((lab_events_sampled_sub['value'].astype(str)) != lab_events_sampled_sub['valuenum'].astype(str)) & ((lab_events_sampled_sub['value'].astype(str)) != lab_events_sampled_sub['valuenum'].astype(str) + str('0')) & ((str('0') + lab_events_sampled_sub['value'].astype(str)) != lab_events_sampled_sub['valuenum'].astype(str)), ]


Unnamed: 0,labevent_id,subject_id,hadm_id,specimen_id,itemid,charttime,storetime,value,valuenum,valueuom,ref_range_lower,ref_range_upper,flag,priority,comments
336856,33996144,12745561,20731073.0,57802208,51498,2181-01-08 20:52:00,2181-01-08 20:59:00,1.007,1.007,,1.001,1.035,,STAT,
1409444,45059123,13629545,25775076.0,90762997,51498,2165-01-26 10:29:00,2165-01-26 11:31:00,1.017,1.017,,1.001,1.035,,ROUTINE,
1666394,47199633,13809869,27450240.0,16343571,51498,2118-05-14 23:23:00,2118-05-15 00:52:00,1.007,1.007,,1.001,1.035,,ROUTINE,
2273592,53256330,14285139,26961757.0,44892615,51498,2123-09-06 10:52:00,2123-09-06 11:39:00,1.007,1.007,,1.001,1.035,,ROUTINE,
2381428,54601194,14394997,20327732.0,62837261,51498,2157-12-31 23:05:00,2157-12-31 23:27:00,1.017,1.017,,1.001,1.035,,STAT,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120434256,11150480,10915996,22515797.0,34269915,51498,2178-09-21 14:16:00,2178-09-21 17:32:00,1.007,1.007,,1.001,1.035,,ROUTINE,
120436628,11178342,10918410,21954505.0,65846117,51498,2148-01-17 17:00:00,2148-01-17 17:25:00,1.027,1.027,,1.001,1.035,,ROUTINE,
120951761,16272155,11319328,29695432.0,17340353,51498,2142-06-18 22:06:00,2142-06-18 22:43:00,1.017,1.017,,1.001,1.035,,STAT,
121517405,22034085,11791735,26645051.0,65960542,51498,2181-02-22 10:03:00,2181-02-22 10:46:00,1.019,1.019,,1.001,1.035,,ROUTINE,


In [40]:
# lab_events_sampled_sub.loc[(lab_events_sampled_sub['subject_id'] == 19988493) & (lab_events_sampled_sub['hadm_id'] == 25600709.0) & (lab_events_sampled_sub['itemid'].isin([51478, 51484])),]


## Get Aggregate Test Values

In [45]:
%time lab_events_sampled_sub_grouped = lab_events_sampled_sub.groupby(['subject_id', 'hadm_id', 'itemid']).agg({'itemid' : ['count'], 'valuenum' : ['min', 'max', 'mean'], 'flag' : ['count'], 'ref_range_lower' : ['min'], 'ref_range_upper' : ['min']})
# 'comments' : ['unique'], 


CPU times: user 503 ms, sys: 191 ms, total: 694 ms
Wall time: 1.25 s


In [46]:
lab_events_sampled_sub_grouped.reset_index(inplace = True)
lab_events_sampled_sub_grouped.columns = ['subject_id', 'hadm_id', 'itemid', 'count', 'min', 'max', 'mean', 'abn_percent', 'range_min', 'range_max']
# 'comments', 


In [47]:
# lab_events_sampled_sub_grouped[lab_events_sampled_sub_grouped['itemid'] == 51221]

missing_vals = lab_events_sampled_sub_grouped.isnull().sum(axis = 0) 
missing_vals[missing_vals > 0].sort_values()

min     1234
max     1234
mean    1234
dtype: int64

In [48]:
lab_events_sampled_sub_grouped['below_min'] = np.where(lab_events_sampled_sub_grouped['min'] < lab_events_sampled_sub_grouped['range_min'], lab_events_sampled_sub_grouped['range_min'] - lab_events_sampled_sub_grouped['min'], 0)
lab_events_sampled_sub_grouped['above_max'] = np.where(lab_events_sampled_sub_grouped['max'] > lab_events_sampled_sub_grouped['range_max'], lab_events_sampled_sub_grouped['max'] - lab_events_sampled_sub_grouped['range_max'], 0)
lab_events_sampled_sub_grouped['abn_percent'] = lab_events_sampled_sub_grouped['abn_percent'] / lab_events_sampled_sub_grouped['count']


In [49]:
lab_events_sampled_sub_grouped

Unnamed: 0,subject_id,hadm_id,itemid,count,min,max,mean,abn_percent,range_min,range_max,below_min,above_max
0,10002264,23205369.0,50885,1,4.10,4.10,4.100000,0.000000,0.0,12.0,0.00,0.0
1,10002495,24982426.0,50861,3,36.00,44.00,39.333333,0.333333,0.0,40.0,0.00,4.0
2,10002495,24982426.0,50863,3,75.00,99.00,86.000000,0.000000,40.0,130.0,0.00,0.0
3,10002495,24982426.0,50868,10,15.00,23.00,18.900000,0.200000,8.0,20.0,0.00,3.0
4,10002495,24982426.0,50878,3,32.00,210.00,96.333333,0.666667,0.0,40.0,0.00,170.0
...,...,...,...,...,...,...,...,...,...,...,...,...
140777,19999204,29046609.0,51275,3,31.80,34.70,33.200000,0.000000,25.0,36.5,0.00,0.0
140778,19999204,29046609.0,51277,8,15.80,16.10,15.925000,1.000000,10.5,15.5,0.00,0.6
140779,19999204,29046609.0,51279,8,3.34,3.56,3.472500,1.000000,4.6,6.1,1.26,0.0
140780,19999204,29046609.0,51301,8,7.80,11.70,9.325000,0.250000,4.0,10.0,0.00,1.7


## Merge Admissions data so we can group by ethnicity/gender to grab average range min and max

In [46]:
# Here is where we merge Admissions

# lab_events_sampled_sub_grouped.merge(d_icd_diagnoses, left_index = True, left_on = "index", right_on = 'icd_code', how = 'left')


In [50]:
# For now aggregate all patients together

lab_range_min_max = lab_events_sampled_sub_grouped.groupby('itemid').agg({'range_min' : 'mean', 'range_max' : 'mean'})
lab_range_min_max.reset_index(inplace = True)

lab_range_dic = dict(zip(lab_range_min_max['itemid'], zip(lab_range_min_max['range_min'], lab_range_min_max['range_max'])))
lab_range_dic


{50861: (0.0, 40.0),
 50863: (37.366042345276874, 125.65757328990227),
 50868: (8.355709595038308, 19.572053994892375),
 50878: (0.0, 40.0),
 50882: (22.0, 32.0),
 50885: (0.0, 4.034812141795797),
 50893: (8.406765327696299, 10.290761099366144),
 50902: (96.0, 108.0),
 50911: (0.0, 10.0),
 50912: (0.44450771491552543, 1.1445077149154852),
 50931: (69.86519840516424, 100.84203531422062),
 50960: (1.598434891485942, 2.593739565943461),
 50970: (2.7223794212220396, 4.536227224008577),
 50971: (3.328334534967258, 5.142501802450993),
 50983: (133.35400797968808, 145.35400797968808),
 51003: (0.0, 0.009999999999999912),
 51006: (6.0, 20.0),
 51221: (37.33020850169711, 49.35930176175852),
 51222: (12.559139072847234, 16.73243377483429),
 51237: (0.9000000000000571, 1.0999999999999366),
 51248: (26.327974809413323, 31.912993039443155),
 51249: (31.322671528007955, 35.64534305601591),
 51250: (81.65197215777262, 98.34802784222738),
 51265: (150.0, 425.67536517314954),
 51274: (9.747572815534639

Pivot the table so we have feature columns related to test results

In [48]:
lab_events_sampled_sub_grouped[lab_events_sampled_sub_grouped['hadm_id'] == 23248240.0]

Unnamed: 0,subject_id,hadm_id,itemid,count,min,max,mean,abn_percent,range_min,range_max,below_min,above_max


In [51]:
lab_events_sampled_sub_grouped['new_index'] = list(zip(lab_events_sampled_sub_grouped['subject_id'], lab_events_sampled_sub_grouped['hadm_id']))


In [52]:
%time lab_events_sampled_pivot = lab_events_sampled_sub_grouped.pivot(index = 'new_index', columns = 'itemid', values = ['min', 'max', 'mean', 'abn_percent', 'below_min', 'above_max'])
# 'comments', 


CPU times: user 118 ms, sys: 30.4 ms, total: 148 ms
Wall time: 359 ms


In [53]:
pd.options.display.max_columns = 500

lab_events_sampled_pivot = lab_events_sampled_pivot.reset_index()
lab_events_sampled_pivot['subject_id'], lab_events_sampled_pivot['hadm_id'] = zip(*lab_events_sampled_pivot['new_index'])
lab_events_sampled_pivot.drop(['new_index'], axis = 1, inplace = True)


  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


In [52]:
# missing_vals = lab_events_sampled_pivot.isnull().sum(axis = 0) 
# missing_vals[missing_vals > 0].sort_values()

## Start Data Imputation

Copy table, retain original for Random Forest

In [54]:
lab_events_impute = lab_events_sampled_pivot.copy()

In [54]:
# lab_events_sampled_pivot = lab_events_impute

In [55]:
# d_labitems = pd.read_csv('data/mimic-iv-0.4/hosp/d_labitems.csv.gz', compression='gzip')


In [55]:
# lab_events_impute.columns

d_labitems[d_labitems['itemid'].isin(itemid_sub_sample)]

Unnamed: 0,itemid,label,fluid,category,loinc_code
52,50861,Alanine Aminotransferase (ALT),Blood,Chemistry,1742-6
76,50863,Alkaline Phosphatase,Blood,Chemistry,6768-6
115,50868,Anion Gap,Blood,Chemistry,1863-0
153,50878,Asparate Aminotransferase (AST),Blood,Chemistry,1920-8
212,50882,Bicarbonate,Blood,Chemistry,1963-8
229,50885,"Bilirubin, Total",Blood,Chemistry,1975-2
282,50893,"Calcium, Total",Blood,Chemistry,2000-8
442,50902,Chloride,Blood,Chemistry,2075-0
511,50911,"Creatine Kinase, MB Isoenzyme",Blood,Chemistry,6773-6
512,50912,Creatinine,Blood,Chemistry,2160-0


In [56]:
%%time

np.random.seed(0)

for labitem in lab_range_dic:
    for ind in lab_events_sampled_pivot[lab_events_sampled_pivot[('mean', labitem)].isnull()].index:
        val_max = lab_range_dic[labitem][1]
        val_min = lab_range_dic[labitem][0]

        val_ave = (val_max + val_min) / 2
        val_std = (val_max - val_ave) * .333

        ran_vals = np.random.normal(val_ave, val_std, 50)
        impute_min = min(ran_vals)
        impute_max = max(ran_vals)
        impute_mean = np.mean(ran_vals)
        
        val_min = lab_events_sampled_pivot[('min', labitem)][ind]
        val_max = lab_events_sampled_pivot[('max', labitem)][ind]
        val_mean = lab_events_sampled_pivot[('mean', labitem)][ind]
        
        lab_events_sampled_pivot[('min', labitem)][ind] = np.where(np.isnan(val_min), impute_min, val_min)
        lab_events_sampled_pivot[('max', labitem)][ind] = np.where(np.isnan(val_max), impute_max, val_max)
        lab_events_sampled_pivot[('mean', labitem)][ind] = np.where(np.isnan(val_mean), impute_mean, val_mean)
#         patient[('mean', labitem)].replace(np.nan, impute_mean)
#         patient[('abn_count', labitem)].replace(np.nan, 0)
# 10 items - 2.5s
# 20 items - 6s
# 30 items - 28.6s

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


CPU times: user 3min 2s, sys: 4.65 s, total: 3min 7s
Wall time: 6min 24s


In [59]:
for lab_itemid in lab_range_dic:
    lab_events_sampled_pivot[( 'above_max', lab_itemid)] = lab_events_sampled_pivot[( 'above_max', lab_itemid)].replace(np.nan, 0)
    lab_events_sampled_pivot[( 'below_min', lab_itemid)] = lab_events_sampled_pivot[( 'below_min', lab_itemid)].replace(np.nan, 0)
    lab_events_sampled_pivot[( 'abn_percent', lab_itemid)] = lab_events_sampled_pivot[( 'abn_percent', lab_itemid)].replace(np.nan, 0)
#     lab_events_sampled_pivot[( 'comments', lab_itemid)] = lab_events_sampled_pivot[( 'comments', lab_itemid)].replace(np.nan, np.array(np.nan))


In [60]:
missing_vals = lab_events_sampled_pivot.isnull().sum(axis = 0) 
missing_vals[missing_vals > 0].sort_values()

Series([], dtype: int64)

In [63]:
lab_events_sampled_pivot.isna().sum()

            itemid
min         50861     0
            50863     0
            50868     0
            50878     0
            50882     0
                     ..
above_max   51279     0
            51301     0
            51491     0
subject_id            0
hadm_id               0
Length: 182, dtype: int64

In [65]:
lab_events_impute_KNN = lab_events_impute.copy()

In [66]:
lab_events_impute_KNN

Unnamed: 0_level_0,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,subject_id,hadm_id
itemid,50861,50863,50868,50878,50882,50885,50893,50902,50911,50912,50931,50960,50970,50971,50983,51003,51006,51221,51222,51237,51248,51249,51250,51265,51274,51275,51277,51279,51301,51491,50861,50863,50868,50878,50882,50885,50893,50902,50911,50912,50931,50960,50970,50971,50983,51003,51006,51221,51222,51237,51248,51249,51250,51265,51274,51275,51277,51279,51301,51491,50861,50863,50868,50878,50882,50885,50893,50902,50911,50912,50931,50960,50970,50971,50983,51003,51006,51221,51222,51237,51248,51249,51250,51265,51274,51275,51277,51279,51301,51491,50861,50863,50868,50878,50882,50885,50893,50902,50911,50912,50931,50960,50970,50971,50983,51003,51006,51221,51222,51237,51248,51249,51250,51265,51274,51275,51277,51279,51301,51491,50861,50863,50868,50878,50882,50885,50893,50902,50911,50912,50931,50960,50970,50971,50983,51003,51006,51221,51222,51237,51248,51249,51250,51265,51274,51275,51277,51279,51301,51491,50861,50863,50868,50878,50882,50885,50893,50902,50911,50912,50931,50960,50970,50971,50983,51003,51006,51221,51222,51237,51248,51249,51250,51265,51274,51275,51277,51279,51301,51491,Unnamed: 181_level_1,Unnamed: 182_level_1
0,,,,,,4.1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.100000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.000000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,10002264,23205369.0
1,36.0,75.0,15.0,32.0,14.0,0.4,7.7,95.0,44.0,1.1,165.0,1.7,3.0,3.5,129.0,0.69,30.0,33.1,11.1,1.1,30.4,32.1,92.0,112.0,11.8,24.3,12.3,3.59,11.0,5.5,44.0,99.0,23.0,210.0,22.0,0.6,8.7,103.0,170.0,1.6,370.0,2.4,4.1,4.8,137.0,6.74,45.0,43.0,14.3,1.7,31.8,34.5,97.0,216.0,18.9,62.5,13.2,4.58,36.8,5.5,39.333333,86.000000,18.900000,96.333333,18.700000,0.500000,8.270000,99.545455,112.166667,1.420000,252.900000,2.172727,3.530000,4.000000,133.090909,2.813333,40.000000,36.927273,12.330000,1.218182,31.110000,33.400000,93.400000,168.181818,13.445455,41.418182,12.760000,3.960000,21.620000,5.5,0.333333,0.000000,0.200000,0.666667,0.900000,0.000000,0.600000,0.090909,1.0,0.800000,1.000000,0.000000,0.000000,0.000000,0.363636,1.0,1.000000,0.818182,0.8,0.545455,0.0,0.000000,0.0,0.272727,0.545455,0.727273,0.0,1.0,1.000000,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.7,1.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,6.9,2.6,0.0,0.0,0.0,0.0,38.0,0.0,0.7,0.0,1.01,0.0,0.0,4.0,0.0,3.0,170.0,0.0,0.0,0.0,0.0,160.0,0.4,270.0,0.0,0.0,0.0,0.0,6.73,25.0,0.0,0.0,0.6,0.0,0.0,0.0,0.0,6.4,26.0,0.0,0.0,26.8,0.0,10002495,24982426.0
2,,,9.0,,23.0,,,103.0,,1.0,96.0,1.9,,4.2,139.0,,11.0,26.4,9.9,1.3,29.6,35.9,80.0,151.0,14.6,34.8,13.9,3.28,7.0,,,,13.0,,31.0,,,113.0,,1.6,138.0,2.3,,4.6,143.0,,18.0,34.8,12.2,1.5,30.7,38.4,83.0,184.0,16.6,43.1,14.7,3.98,12.4,,,,10.666667,,27.250000,,,107.500000,,1.233333,117.666667,2.100000,,4.416667,140.666667,,15.833333,30.800000,11.200000,1.400000,30.140000,37.300000,81.200000,172.200000,15.600000,38.950000,14.300000,3.706000,9.560000,,,,0.000000,,0.000000,,,0.500000,,0.333333,0.666667,0.000000,,0.000000,0.000000,,0.000000,1.000000,1.0,1.000000,0.0,1.000000,0.6,0.000000,1.000000,0.500000,0.0,1.0,0.200000,,,,0.0,,0.0,,,0.0,,0.0,0.0,0.0,,0.0,0.0,,0.0,13.6,4.1,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.32,0.0,,,,0.0,,0.0,,,5.0,,0.4,33.0,0.0,,0.0,0.0,,0.0,0.0,0.0,0.4,0.0,3.4,0.0,0.0,3.2,8.1,0.0,0.0,1.4,,10002527,29112696.0
3,,,,,,2.6,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.6,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.600000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.000000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,10005078,24046197.0
4,,,,,,7.6,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7.6,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7.600000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.000000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.6,,,,,,,,,,,,,,,,,,,,,,,,,10006062,21916409.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8162,,,,,,,,,,,,,,,,,,33.4,12.2,,30.3,33.6,90.0,205.0,,,13.8,4.03,8.5,,,,,,,,,,,,,,,,,,,36.3,12.2,,30.3,33.6,90.0,205.0,,,13.8,4.03,8.5,,,,,,,,,,,,,,,,,,,34.850000,12.200000,,30.300000,33.600000,90.000000,205.000000,,,13.800000,4.030000,8.500000,,,,,,,,,,,,,,,,,,,0.500000,0.0,,0.0,0.000000,0.0,0.000000,,,0.0,0.0,0.000000,,,,,,,,,,,,,,,,,,,0.6,0.0,,0.0,0.0,0.0,0.0,,,0.0,0.00,0.0,,,,,,,,,,,,,,,,,,,0.0,0.0,,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,,19991798,26825654.0
8163,,,12.0,,26.0,,9.1,104.0,,1.5,89.0,2.1,3.8,5.1,142.0,,21.0,36.8,11.9,,32.2,32.3,100.0,201.0,,,12.7,3.69,8.9,,,,12.0,,26.0,,9.1,104.0,,1.5,89.0,2.1,3.8,5.1,142.0,,21.0,37.6,11.9,,32.2,32.3,100.0,201.0,,,12.7,3.69,8.9,,,,12.000000,,26.000000,,9.100000,104.000000,,1.500000,89.000000,2.100000,3.800000,5.100000,142.000000,,21.000000,37.200000,11.900000,,32.200000,32.300000,100.000000,201.000000,,,12.700000,3.690000,8.900000,,,,0.000000,,0.000000,,0.000000,0.000000,,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,,1.000000,1.000000,1.0,,1.0,0.000000,1.0,0.000000,,,0.0,1.0,0.000000,,,,0.0,,0.0,,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,3.2,1.8,,0.0,0.0,0.0,0.0,,,0.0,0.91,0.0,,,,0.0,,0.0,,0.0,0.0,,0.3,0.0,0.0,0.0,0.0,0.0,,1.0,0.0,0.0,,0.2,0.0,2.0,0.0,,,0.0,0.0,0.0,,19995478,24108472.0
8164,9.0,64.0,10.0,11.0,16.0,0.3,7.5,86.0,,0.8,148.0,1.4,3.1,4.1,114.0,,13.0,23.1,7.7,1.2,26.7,33.3,77.0,226.0,13.2,29.2,11.7,2.85,7.9,6.0,9.0,64.0,18.0,11.0,23.0,0.3,8.7,97.0,,1.2,312.0,2.6,4.0,5.4,133.0,,20.0,30.6,10.5,1.3,27.3,35.0,81.0,334.0,14.2,31.4,12.6,3.87,11.7,6.0,9.000000,64.000000,14.055556,11.000000,19.888889,0.300000,8.293333,91.055556,,0.936842,215.666667,1.906667,3.433333,4.688889,125.222222,,16.888889,26.222222,8.977778,1.260000,26.966667,34.211111,78.777778,280.555556,13.740000,30.120000,12.233333,3.328889,9.500000,6.0,0.000000,0.000000,0.000000,0.000000,0.611111,0.000000,0.533333,0.888889,,0.000000,1.000000,0.066667,0.000000,0.000000,1.000000,,0.000000,1.000000,1.0,1.000000,0.0,0.000000,1.0,0.000000,1.000000,0.000000,0.0,1.0,0.333333,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.9,10.0,,0.0,0.0,0.2,0.0,0.0,21.0,,0.0,16.9,6.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,1.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,212.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,1.7,0.0,0.0,0.0,1.7,0.0,19996783,25894657.0
8165,15.0,57.0,5.0,25.0,22.0,0.8,7.8,92.0,,0.5,76.0,1.8,1.3,2.8,131.0,,14.0,26.7,8.4,1.3,29.3,30.9,90.0,81.0,14.4,25.9,16.8,2.80,5.4,6.5,118.0,149.0,16.0,180.0,33.0,3.0,9.5,120.0,,0.8,225.0,2.8,5.1,5.1,155.0,,42.0,41.4,12.9,2.5,31.0,33.9,97.0,185.0,27.5,54.9,21.0,4.41,26.7,7.5,44.379310,112.517241,10.214286,59.379310,27.642857,1.358621,8.602857,99.190476,,0.611905,129.547619,2.139024,2.808571,4.364286,137.095238,,29.642857,30.673529,9.944118,1.620690,30.038235,32.432353,92.735294,128.617647,17.682759,34.413793,18.632353,3.312647,10.855882,7.0,0.379310,0.310345,0.404762,0.724138,0.023810,0.206897,0.285714,0.476190,,0.000000,0.904762,0.048780,0.485714,0.047619,0.500000,,0.809524,0.970588,1.0,1.000000,0.0,0.235294,0.0,0.764706,1.000000,0.206897,1.0,1.0,0.323529,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.6,4.0,,0.0,0.0,0.0,1.4,0.7,4.0,,0.0,13.3,5.3,0.0,0.0,1.1,0.0,69.0,0.0,0.0,0.0,1.80,0.0,0.0,78.0,19.0,0.0,140.0,1.0,1.5,0.0,12.0,,0.0,125.0,0.2,0.6,0.0,8.0,,22.0,0.0,0.0,1.4,0.0,0.0,0.0,0.0,15.0,18.4,5.5,0.0,16.7,0.0,19997886,20793010.0


In [67]:
missing_vals = lab_events_impute_KNN.isnull().sum(axis = 0) 
missing_vals[missing_vals > 0].sort_values()

             itemid
mean         51221     1980
below_min    51221     1980
abn_percent  51221     1980
above_max    51221     1980
max          51221     1980
                       ... 
min          50911     6912
mean         50911     6912
             51003     7460
max          51003     7460
min          51003     7460
Length: 180, dtype: int64

In [68]:
%%time

from sklearn.impute import KNNImputer
imputer = KNNImputer(n_neighbors=10)
lab_events_impute_KNN_imputed = imputer.fit_transform(lab_events_impute_KNN)

CPU times: user 2min 30s, sys: 28.5 s, total: 2min 59s
Wall time: 3min 22s


In [69]:
lab_events_impute_KNN_imputed = pd.DataFrame(lab_events_impute_KNN_imputed)
lab_events_impute_KNN_imputed.columns = lab_events_impute_KNN.columns
lab_events_impute_KNN_imputed

Unnamed: 0_level_0,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,subject_id,hadm_id
itemid,50861,50863,50868,50878,50882,50885,50893,50902,50911,50912,50931,50960,50970,50971,50983,51003,51006,51221,51222,51237,51248,51249,51250,51265,51274,51275,51277,51279,51301,51491,50861,50863,50868,50878,50882,50885,50893,50902,50911,50912,50931,50960,50970,50971,50983,51003,51006,51221,51222,51237,51248,51249,51250,51265,51274,51275,51277,51279,51301,51491,50861,50863,50868,50878,50882,50885,50893,50902,50911,50912,50931,50960,50970,50971,50983,51003,51006,51221,51222,51237,51248,51249,51250,51265,51274,51275,51277,51279,51301,51491,50861,50863,50868,50878,50882,50885,50893,50902,50911,50912,50931,50960,50970,50971,50983,51003,51006,51221,51222,51237,51248,51249,51250,51265,51274,51275,51277,51279,51301,51491,50861,50863,50868,50878,50882,50885,50893,50902,50911,50912,50931,50960,50970,50971,50983,51003,51006,51221,51222,51237,51248,51249,51250,51265,51274,51275,51277,51279,51301,51491,50861,50863,50868,50878,50882,50885,50893,50902,50911,50912,50931,50960,50970,50971,50983,51003,51006,51221,51222,51237,51248,51249,51250,51265,51274,51275,51277,51279,51301,51491,Unnamed: 181_level_1,Unnamed: 182_level_1
0,30.6,88.6,13.3,32.1,22.6,4.10,8.53,101.8,3.1,0.93,103.9,1.81,2.61,3.74,138.2,0.366,14.3,36.68,12.30,1.30,29.70,33.06,88.6,175.4,14.45,31.44,13.85,4.116,7.83,5.70,54.2,104.9,15.9,42.6,25.7,4.10,8.99,105.9,18.6,1.07,156.3,2.08,3.41,4.32,141.2,0.692,19.1,39.87,13.18,1.83,30.07,33.95,90.3,216.3,19.40,46.58,14.09,4.431,10.71,6.00,42.261111,97.755556,14.735119,38.216667,24.171230,4.100000,8.741282,103.732954,8.722500,0.995317,128.125119,1.935372,3.022759,4.015141,139.522516,0.525450,16.373306,38.235000,12.771494,1.476429,29.875179,33.469286,89.416071,192.387662,16.124714,36.367259,13.981607,4.281036,9.067500,5.835714,0.200000,0.100000,0.000000,0.266667,0.240972,0.000000,0.192766,0.029891,0.075000,0.100000,0.708929,0.014286,0.319071,0.047826,0.012500,0.457143,0.321841,0.450000,0.564286,0.752381,0.050000,0.250000,0.000000,0.285714,0.752381,0.374339,0.000000,0.607143,0.233929,0.000000,0.0,0.0,0.0,0.0,0.8,0.0,0.14,0.0,0.0,0.00,1.1,0.02,0.35,0.09,0.1,0.0,0.2,3.84,1.55,0.0,0.00,0.20,0.0,20.0,0.0,0.36,0.0,0.530,0.00,0.0,29.3,23.9,0.0,13.7,0.0,0.00,0.00,0.7,12.8,0.01,56.9,0.00,0.00,0.00,0.0,0.438,2.3,0.00,0.00,0.73,0.09,0.15,0.0,0.0,6.78,14.53,0.00,0.000,2.07,0.00,10002264.0,23205369.0
1,36.0,75.0,15.0,32.0,14.0,0.40,7.70,95.0,44.0,1.10,165.0,1.70,3.00,3.50,129.0,0.690,30.0,33.10,11.10,1.10,30.40,32.10,92.0,112.0,11.80,24.30,12.30,3.590,11.00,5.50,44.0,99.0,23.0,210.0,22.0,0.60,8.70,103.0,170.0,1.60,370.0,2.40,4.10,4.80,137.0,6.740,45.0,43.00,14.30,1.70,31.80,34.50,97.0,216.0,18.90,62.50,13.20,4.580,36.80,5.50,39.333333,86.000000,18.900000,96.333333,18.700000,0.500000,8.270000,99.545455,112.166667,1.420000,252.900000,2.172727,3.530000,4.000000,133.090909,2.813333,40.000000,36.927273,12.330000,1.218182,31.110000,33.400000,93.400000,168.181818,13.445455,41.418182,12.760000,3.960000,21.620000,5.500000,0.333333,0.000000,0.200000,0.666667,0.900000,0.000000,0.600000,0.090909,1.000000,0.800000,1.000000,0.000000,0.000000,0.000000,0.363636,1.000000,1.000000,0.818182,0.800000,0.545455,0.000000,0.000000,0.000000,0.272727,0.545455,0.727273,0.000000,1.000000,1.000000,0.000000,0.0,0.0,0.0,0.0,8.0,0.0,0.70,1.0,0.0,0.00,0.0,0.00,0.00,0.00,4.0,0.0,0.0,6.90,2.60,0.0,0.00,0.00,0.0,38.0,0.0,0.70,0.0,1.010,0.00,0.0,4.0,0.0,3.0,170.0,0.0,0.00,0.00,0.0,160.0,0.40,270.0,0.00,0.00,0.00,0.0,6.730,25.0,0.00,0.00,0.60,0.00,0.00,0.0,0.0,6.40,26.00,0.00,0.000,26.80,0.00,10002495.0,24982426.0
2,98.1,87.2,9.0,46.6,23.0,1.36,8.61,103.0,3.5,1.00,96.0,1.90,2.89,4.20,139.0,0.472,11.0,26.40,9.90,1.30,29.60,35.90,80.0,151.0,14.60,34.80,13.90,3.280,7.00,5.65,636.8,113.8,13.0,462.3,31.0,2.32,9.05,113.0,6.4,1.60,138.0,2.30,3.80,4.60,143.0,0.560,18.0,34.80,12.20,1.50,30.70,38.40,83.0,184.0,16.60,43.10,14.70,3.980,12.40,6.15,359.740000,98.687302,10.666667,178.349722,27.250000,1.847028,8.793643,107.500000,4.850000,1.233333,117.666667,2.100000,3.355667,4.416667,140.666667,0.512250,15.833333,30.800000,11.200000,1.400000,30.140000,37.300000,81.200000,172.200000,15.600000,38.950000,14.300000,3.706000,9.560000,5.800000,0.362500,0.358333,0.000000,0.508333,0.000000,0.416667,0.189048,0.500000,0.080000,0.333333,0.666667,0.000000,0.188095,0.000000,0.000000,0.600000,0.000000,1.000000,1.000000,1.000000,0.000000,1.000000,0.600000,0.000000,1.000000,0.500000,0.000000,1.000000,0.200000,0.012500,0.0,3.5,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.00,0.0,0.00,0.24,0.00,0.0,0.0,0.0,13.60,4.10,0.0,0.00,0.00,2.0,0.0,0.0,0.00,0.0,1.320,0.00,0.0,610.3,15.7,0.0,427.9,0.0,0.98,0.05,5.0,1.5,0.40,33.0,0.00,0.23,0.00,0.0,0.511,0.0,0.00,0.00,0.40,0.00,3.40,0.0,0.0,3.20,8.10,0.00,0.000,1.40,0.05,10002527.0,29112696.0
3,26.4,92.5,10.4,26.8,22.3,2.60,8.20,98.8,16.8,0.82,86.3,1.63,2.62,3.45,133.8,0.362,11.2,35.33,12.39,1.07,32.81,32.94,97.8,200.5,11.96,27.13,15.39,3.717,9.97,5.85,69.7,117.2,15.7,116.3,28.4,2.60,9.13,107.0,51.5,1.68,155.8,2.18,4.25,5.34,141.3,1.264,34.4,41.41,13.80,1.45,33.30,34.06,99.7,339.0,16.11,53.80,15.94,4.187,12.36,6.55,43.256410,103.160256,13.100714,60.460256,25.182738,2.600000,8.686764,102.492083,29.379167,1.051457,116.749762,1.941740,3.313750,4.201748,137.398161,0.718988,17.342821,38.055644,12.973143,1.195543,33.042429,33.476714,98.803810,244.099420,13.351882,39.880591,15.686619,3.916462,11.093286,6.166667,0.288462,0.250000,0.038810,0.229487,0.364583,0.000000,0.400779,0.171511,0.133333,0.255238,0.502202,0.090604,0.511905,0.067767,0.211690,0.483333,0.358077,0.616667,0.623333,0.360367,0.566667,0.152857,0.500000,0.130435,0.357735,0.260126,0.500000,0.686667,0.171429,0.008333,0.0,1.6,0.4,0.0,1.8,0.0,0.51,1.4,0.0,0.02,2.8,0.12,0.59,0.16,1.0,0.0,0.5,4.89,1.45,0.0,0.00,0.26,0.0,7.5,0.0,0.36,0.0,0.635,0.00,0.0,44.2,13.8,0.4,84.6,1.5,0.00,0.00,1.3,33.4,0.76,55.7,0.01,0.35,0.68,1.2,0.067,18.8,0.28,0.35,0.37,2.18,0.15,2.6,80.0,3.74,21.26,1.16,0.003,0.90,0.05,10005078.0,24046197.0
4,28.1,86.4,11.0,36.8,23.5,7.60,8.17,102.2,3.1,0.90,92.2,1.87,2.97,3.81,136.8,0.155,13.6,30.69,10.30,1.20,29.99,32.52,90.7,206.4,13.39,29.68,13.68,3.391,7.38,5.95,40.9,131.7,14.1,61.4,27.3,7.60,8.84,107.4,3.7,1.20,148.2,2.18,4.06,4.60,140.6,0.230,19.3,35.26,11.75,1.41,30.95,33.88,93.4,334.1,15.55,33.80,14.65,3.860,11.76,6.10,33.920833,108.905357,12.394643,44.592262,25.611905,7.600000,8.602000,104.461905,3.300000,1.057133,114.227381,2.028389,3.530536,4.225893,138.586905,0.186129,16.060333,32.401905,10.795403,1.289417,30.552780,33.234468,91.931965,266.908121,14.315417,31.743333,14.117154,3.546197,9.211862,6.008333,0.300000,0.337500,0.012500,0.457143,0.095833,1.000000,0.406667,0.120833,0.000000,0.175000,0.458929,0.000000,0.124643,0.054167,0.125000,0.200000,0.317500,0.733333,0.724242,0.600000,0.312235,0.154000,0.298118,0.281324,0.600000,0.212500,0.228699,0.800000,0.291057,0.000000,0.0,0.1,0.1,0.0,1.0,0.0,0.50,0.0,0.0,0.00,0.0,0.00,0.10,0.10,0.5,0.0,0.1,7.66,2.66,0.0,0.23,0.19,0.5,16.1,0.0,0.30,0.0,0.954,0.12,0.0,8.8,43.0,0.0,28.6,0.0,0.60,0.00,2.1,0.0,0.29,52.9,0.00,0.19,0.02,0.5,0.013,4.6,0.00,0.00,0.32,0.96,0.00,1.3,59.2,3.04,1.84,0.55,0.000,2.63,0.00,10006062.0,21916409.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8162,45.1,95.4,11.0,43.2,22.9,1.07,8.42,99.0,5.0,0.93,92.7,1.92,3.17,3.84,135.6,0.429,19.6,33.40,12.20,1.57,30.30,33.60,90.0,205.0,16.52,28.82,13.80,4.030,8.50,6.00,214.0,107.9,15.6,181.3,28.1,1.54,8.89,104.3,11.2,1.11,179.5,2.33,4.10,4.62,140.0,0.909,31.3,36.30,12.20,1.88,30.30,33.60,90.0,205.0,19.27,38.44,13.80,4.030,8.50,6.05,88.582353,100.878431,13.260031,86.499020,25.470677,1.279020,8.659048,101.348941,7.866667,1.015610,121.775765,2.118948,3.637738,4.200624,137.924008,0.657358,24.589724,34.850000,12.200000,1.723333,30.300000,33.600000,90.000000,205.000000,17.857333,32.469667,13.800000,4.030000,8.500000,6.025000,0.294118,0.341176,0.053459,0.426471,0.144430,0.098039,0.317857,0.205865,0.016667,0.200000,0.683974,0.055556,0.280060,0.052137,0.207692,0.300000,0.544762,0.500000,0.000000,0.801667,0.000000,0.000000,0.000000,0.000000,0.801667,0.256667,0.000000,0.000000,0.000000,0.000000,0.0,1.9,0.2,0.0,0.7,0.0,0.25,1.3,0.0,0.00,2.0,0.02,0.13,0.01,1.3,0.0,0.0,0.60,0.00,0.0,0.00,0.00,0.0,0.0,0.0,0.00,0.0,0.000,0.00,0.0,181.0,12.8,0.2,150.6,0.2,0.77,0.00,0.5,0.3,0.20,79.0,0.02,0.19,0.05,0.0,0.019,13.3,0.00,0.00,0.79,0.00,0.00,0.0,0.0,6.54,6.79,0.00,0.000,0.00,0.00,19991798.0,26825654.0
8163,26.9,96.3,12.0,35.3,26.0,1.37,9.10,104.0,3.5,1.50,89.0,2.10,3.80,5.10,142.0,0.153,21.0,36.80,11.90,1.12,32.20,32.30,100.0,201.0,12.49,28.17,12.70,3.690,8.90,5.95,41.6,120.3,12.0,54.4,26.0,1.57,9.10,104.0,5.3,1.50,89.0,2.10,3.80,5.10,142.0,0.227,21.0,37.60,11.90,1.39,32.20,32.30,100.0,201.0,15.17,40.80,12.70,3.690,8.90,6.55,32.460000,108.073333,12.000000,40.320000,26.000000,1.473333,9.100000,104.000000,4.110000,1.500000,89.000000,2.100000,3.800000,5.100000,142.000000,0.191083,21.000000,37.200000,11.900000,1.214417,32.200000,32.300000,100.000000,201.000000,13.474167,34.912857,12.700000,3.690000,8.900000,6.264167,0.350000,0.200000,0.000000,0.270000,0.000000,0.250000,0.000000,0.000000,0.140000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.250000,1.000000,1.000000,1.000000,0.433333,1.000000,0.000000,1.000000,0.000000,0.433333,0.333333,0.000000,1.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.00,0.0,0.00,0.00,0.00,0.0,0.0,0.0,3.20,1.80,0.0,0.00,0.00,0.0,0.0,0.0,0.18,0.0,0.910,0.00,0.0,15.2,38.9,0.0,21.7,0.0,0.22,0.00,0.0,1.3,0.30,0.0,0.00,0.00,0.00,0.0,0.032,1.0,0.00,0.00,0.30,0.20,0.00,2.0,0.0,2.67,10.54,0.00,0.000,0.00,0.00,19995478.0,24108472.0
8164,9.0,64.0,10.0,11.0,16.0,0.30,7.50,86.0,14.3,0.80,148.0,1.40,3.10,4.10,114.0,0.798,13.0,23.10,7.70,1.20,26.70,33.30,77.0,226.0,13.20,29.20,11.70,2.850,7.90,6.00,9.0,64.0,18.0,11.0,23.0,0.30,8.70,97.0,41.1,1.20,312.0,2.60,4.00,5.40,133.0,2.081,20.0,30.60,10.50,1.30,27.30,35.00,81.0,334.0,14.20,31.40,12.60,3.870,11.70,6.00,9.000000,64.000000,14.055556,11.000000,19.888889,0.300000,8.293333,91.055556,25.193333,0.936842,215.666667,1.906667,3.433333,4.688889,125.222222,1.259375,16.888889,26.222222,8.977778,1.260000,26.966667,34.211111,78.777778,280.555556,13.740000,30.120000,12.233333,3.328889,9.500000,6.000000,0.000000,0.000000,0.000000,0.000000,0.611111,0.000000,0.533333,0.888889,0.343333,0.000000,1.000000,0.066667,0.000000,0.000000,1.000000,0.400000,0.000000,1.000000,1.000000,1.000000,0.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.000000,1.000000,0.333333,0.000000,0.0,0.0,0.0,0.0,6.0,0.0,0.90,10.0,0.0,0.00,0.0,0.20,0.00,0.00,21.0,0.0,0.0,16.90,6.00,0.0,0.00,0.00,5.0,0.0,0.0,0.00,0.0,1.750,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.0,34.7,0.00,212.0,0.00,0.00,0.00,0.0,1.191,0.0,0.00,0.00,0.20,0.00,0.00,0.0,0.0,1.70,0.00,0.00,0.000,1.70,0.00,19996783.0,25894657.0
8165,15.0,57.0,5.0,25.0,22.0,0.80,7.80,92.0,3.9,0.50,76.0,1.80,1.30,2.80,131.0,0.237,14.0,26.70,8.40,1.30,29.30,30.90,90.0,81.0,14.40,25.90,16.80,2.800,5.40,6.50,118.0,149.0,16.0,180.0,33.0,3.00,9.50,120.0,5.4,0.80,225.0,2.80,5.10,5.10,155.0,0.393,42.0,41.40,12.90,2.50,31.00,33.90,97.0,185.0,27.50,54.90,21.00,4.410,26.70,7.50,44.379310,112.517241,10.214286,59.379310,27.642857,1.358621,8.602857,99.190476,4.736667,0.611905,129.547619,2.139024,2.808571,4.364286,137.095238,0.304467,29.642857,30.673529,9.944118,1.620690,30.038235,32.432353,92.735294,128.617647,17.682759,34.413793,18.632353,3.312647,10.855882,7.000000,0.379310,0.310345,0.404762,0.724138,0.023810,0.206897,0.285714,0.476190,0.116667,0.000000,0.904762,0.048780,0.485714,0.047619,0.500000,0.250000,0.809524,0.970588,1.000000,1.000000,0.000000,0.235294,0.000000,0.764706,1.000000,0.206897,1.000000,1.000000,0.323529,0.000000,0.0,0.0,5.0,0.0,0.0,0.0,0.60,4.0,0.0,0.00,0.0,0.00,1.40,0.70,4.0,0.0,0.0,13.30,5.30,0.0,0.00,1.10,0.0,69.0,0.0,0.00,0.0,1.800,0.00,0.0,78.0,19.0,0.0,140.0,1.0,1.50,0.00,12.0,0.6,0.00,125.0,0.20,0.60,0.00,8.0,0.256,22.0,0.00,0.00,1.40,0.00,0.00,0.0,0.0,15.00,18.40,5.50,0.000,16.70,0.00,19997886.0,20793010.0


## Standardize the Data for use in Models

In [70]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

In [71]:
labs_scaled = lab_events_sampled_pivot
labs_scaled_KNN = lab_events_impute_KNN_imputed


In [72]:
labs_scaled

Unnamed: 0_level_0,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,subject_id,hadm_id
itemid,50861,50863,50868,50878,50882,50885,50893,50902,50911,50912,50931,50960,50970,50971,50983,51003,51006,51221,51222,51237,51248,51249,51250,51265,51274,51275,51277,51279,51301,51491,50861,50863,50868,50878,50882,50885,50893,50902,50911,50912,50931,50960,50970,50971,50983,51003,51006,51221,51222,51237,51248,51249,51250,51265,51274,51275,51277,51279,51301,51491,50861,50863,50868,50878,50882,50885,50893,50902,50911,50912,50931,50960,50970,50971,50983,51003,51006,51221,51222,51237,51248,51249,51250,51265,51274,51275,51277,51279,51301,51491,50861,50863,50868,50878,50882,50885,50893,50902,50911,50912,50931,50960,50970,50971,50983,51003,51006,51221,51222,51237,51248,51249,51250,51265,51274,51275,51277,51279,51301,51491,50861,50863,50868,50878,50882,50885,50893,50902,50911,50912,50931,50960,50970,50971,50983,51003,51006,51221,51222,51237,51248,51249,51250,51265,51274,51275,51277,51279,51301,51491,50861,50863,50868,50878,50882,50885,50893,50902,50911,50912,50931,50960,50970,50971,50983,51003,51006,51221,51222,51237,51248,51249,51250,51265,51274,51275,51277,51279,51301,51491,Unnamed: 181_level_1,Unnamed: 182_level_1
0,2.997088,53.363795,10.369141,-2.706156,23.468429,4.100000,8.659014,96.556417,1.614418,0.510737,74.495080,1.828794,2.758343,3.544652,134.918019,0.001827,5.881766,39.617388,13.366440,0.899418,27.401276,31.615461,84.576850,212.953627,9.931735,25.803209,11.430216,4.514907,5.264300,5.652647,35.116566,110.466755,19.164154,35.127708,30.964237,4.100000,9.902997,107.320873,9.615632,1.178871,98.767255,2.612165,4.399954,5.021700,144.121682,0.008474,18.114887,48.157030,16.307127,1.087596,30.931775,34.875913,99.059571,377.968479,12.323660,33.637831,14.824123,5.500960,10.503180,7.723480,20.936125,83.005689,14.051980,19.831696,27.180790,4.100000,9.351480,101.627342,5.238521,0.798318,86.108137,2.085692,3.608362,4.223675,139.468988,0.005110,12.601808,43.427836,14.662697,0.989909,29.155896,33.434872,90.395689,291.292280,11.164421,29.802420,13.142158,4.977740,7.658412,6.635017,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10002264,23205369.0
1,36.000000,75.000000,15.000000,32.000000,14.000000,0.400000,7.700000,95.000000,44.000000,1.100000,165.000000,1.700000,3.000000,3.500000,129.000000,0.690000,30.000000,33.100000,11.100000,1.100000,30.400000,32.100000,92.000000,112.000000,11.800000,24.300000,12.300000,3.590000,11.000000,5.500000,44.000000,99.000000,23.000000,210.000000,22.000000,0.600000,8.700000,103.000000,170.000000,1.600000,370.000000,2.400000,4.100000,4.800000,137.000000,6.740000,45.000000,43.000000,14.300000,1.700000,31.800000,34.500000,97.000000,216.000000,18.900000,62.500000,13.200000,4.580000,36.800000,5.500000,39.333333,86.000000,18.900000,96.333333,18.700000,0.500000,8.270000,99.545455,112.166667,1.420000,252.900000,2.172727,3.530000,4.000000,133.090909,2.813333,40.000000,36.927273,12.330000,1.218182,31.110000,33.400000,93.400000,168.181818,13.445455,41.418182,12.760000,3.960000,21.620000,5.500000,0.333333,0.000000,0.200000,0.666667,0.900000,0.000000,0.600000,0.090909,1.0,0.800000,1.000000,0.000000,0.000000,0.000000,0.363636,1.0,1.000000,0.818182,0.8,0.545455,0.0,0.000000,0.0,0.272727,0.545455,0.727273,0.0,1.0,1.000000,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.7,1.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,6.9,2.6,0.0,0.0,0.0,0.0,38.0,0.0,0.7,0.0,1.01,0.0,0.0,4.0,0.0,3.0,170.0,0.0,0.0,0.0,0.0,160.0,0.4,270.0,0.0,0.0,0.0,0.0,6.73,25.0,0.0,0.0,0.6,0.0,0.0,0.0,0.0,6.4,26.0,0.0,0.0,26.8,0.0,10002495,24982426.0
2,8.502958,51.899458,9.000000,6.138574,23.000000,0.745549,8.791798,103.000000,1.358330,1.000000,96.000000,1.900000,2.919012,4.200000,139.000000,0.002461,11.000000,26.400000,9.900000,1.300000,29.600000,35.900000,80.000000,151.000000,14.600000,34.800000,13.900000,3.280000,7.000000,5.592508,32.626622,112.934165,13.000000,32.912612,31.000000,3.349623,10.207952,113.000000,8.090807,1.600000,138.000000,2.300000,4.201805,4.600000,143.000000,0.008889,18.000000,34.800000,12.200000,1.500000,30.700000,38.400000,83.000000,184.000000,16.600000,43.100000,14.700000,3.980000,12.400000,7.312706,19.860518,82.219086,10.666667,20.813993,27.250000,2.105422,9.398583,107.500000,4.482076,1.233333,117.666667,2.100000,3.558404,4.416667,140.666667,0.005483,15.833333,30.800000,11.200000,1.400000,30.140000,37.300000,81.200000,172.200000,15.600000,38.950000,14.300000,3.706000,9.560000,6.351088,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.500000,0.0,0.333333,0.666667,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,1.000000,1.0,1.000000,0.0,1.000000,0.6,0.000000,1.000000,0.500000,0.0,1.0,0.200000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.6,4.1,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.32,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.4,33.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.4,0.0,3.4,0.0,0.0,3.2,8.1,0.0,0.0,1.4,0.0,10002527,29112696.0
3,10.068224,48.020546,9.310767,3.595374,23.373409,2.600000,8.511920,96.704762,2.099609,0.425821,77.253035,1.697534,3.124944,3.414196,133.659811,0.001490,8.466543,37.970242,13.205456,0.929930,27.397103,31.197589,81.857658,171.195602,10.118039,25.242495,11.138986,4.574508,4.975365,5.652813,35.871744,115.705433,17.890060,34.682742,32.292623,2.600000,10.190463,107.758389,8.217735,0.988299,97.827204,2.565730,4.532212,4.873285,143.275481,0.009105,19.223835,46.925865,15.741572,1.059345,30.908301,35.293966,98.581067,382.002859,12.692358,34.144829,15.439255,5.549687,10.249398,8.056388,21.708522,82.221527,13.821099,20.256268,27.048091,2.600000,9.320917,102.202381,5.111280,0.779119,85.577175,2.116240,3.661668,4.158663,139.112461,0.004977,12.944500,43.184755,14.714147,0.996888,29.255462,33.401183,89.929670,282.548024,11.330477,30.326882,12.845734,4.929637,8.001395,6.511782,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10005078,24046197.0
4,5.192135,42.016499,9.261099,6.635144,24.476791,7.600000,8.587909,97.204544,0.227240,0.536863,74.185054,1.799836,2.733779,3.675192,135.833220,0.001854,4.956332,39.241589,13.040595,0.899542,27.187877,32.371442,84.834987,186.776332,10.000672,26.571084,11.172996,4.283723,3.922422,5.459776,34.407151,104.397248,18.056469,36.865927,30.032624,7.600000,10.093367,106.592979,9.099011,1.024348,94.647085,2.437836,4.315405,5.000474,144.525808,0.010592,19.416939,47.997564,16.561867,1.076300,31.462128,34.843296,96.936095,446.738604,12.254134,35.068016,14.408640,5.561695,10.599066,7.543097,19.383891,80.520389,14.175913,20.474492,26.982901,7.600000,9.307472,102.082050,4.620237,0.783572,85.713630,2.087041,3.563786,4.281503,139.946382,0.005132,12.947949,42.646328,14.461459,0.999372,29.201465,33.481927,89.996922,292.978007,11.320688,29.812392,13.085716,4.987123,7.680690,6.480907,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10006062,21916409.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8162,5.450012,60.737503,8.333361,6.911677,23.766951,1.047266,8.523331,97.682902,1.729202,0.593280,71.259179,1.732312,2.848717,3.649594,136.699936,0.001313,7.346292,33.400000,12.200000,0.939502,30.300000,33.600000,90.000000,205.000000,10.315352,26.032971,13.800000,4.030000,8.500000,5.060023,35.891723,112.662957,18.337088,31.534786,31.840402,3.734925,9.961556,106.556065,8.164076,1.022508,93.460192,2.460349,4.109581,4.940327,143.029446,0.008509,17.727488,36.300000,12.200000,1.087330,30.300000,33.600000,90.000000,205.000000,11.934968,35.051636,13.800000,4.030000,8.500000,7.406545,20.719673,80.508226,13.827317,20.344699,27.180146,2.192764,9.302369,101.735240,4.779623,0.805817,83.834183,2.046631,3.634913,4.229920,139.662966,0.004775,12.954525,34.850000,12.200000,1.006798,30.300000,33.600000,90.000000,205.000000,11.162672,29.955567,13.800000,4.030000,8.500000,6.492162,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.500000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,19991798,26825654.0
8163,9.210654,42.542491,12.000000,-6.983840,26.000000,0.506439,9.100000,104.000000,0.650965,1.500000,89.000000,2.100000,3.800000,5.100000,142.000000,0.000235,21.000000,36.800000,11.900000,0.943021,32.200000,32.300000,100.000000,201.000000,10.280159,27.215659,12.700000,3.690000,8.900000,5.469564,35.554024,119.421495,12.000000,34.136631,26.000000,3.750604,9.100000,104.000000,9.332100,1.500000,89.000000,2.100000,3.800000,5.100000,142.000000,0.008912,21.000000,37.600000,11.900000,1.078344,32.200000,32.300000,100.000000,201.000000,12.171601,33.314836,12.700000,3.690000,8.900000,7.552257,22.708110,83.164702,12.000000,19.282095,26.000000,2.116331,9.100000,104.000000,5.062902,1.500000,89.000000,2.100000,3.800000,5.100000,142.000000,0.004817,21.000000,37.200000,11.900000,1.007901,32.200000,32.300000,100.000000,201.000000,11.181015,30.081695,12.700000,3.690000,8.900000,6.552187,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,1.000000,1.000000,1.0,0.000000,1.0,0.000000,1.0,0.000000,0.000000,0.000000,0.0,1.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.2,1.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.91,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3,0.0,0.0,0.0,0.0,0.0,0.00,1.0,0.0,0.0,0.0,0.2,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,19995478,24108472.0
8164,9.000000,64.000000,10.000000,11.000000,16.000000,0.300000,7.500000,86.000000,2.382568,0.800000,148.000000,1.400000,3.100000,4.100000,114.000000,0.001202,13.000000,23.100000,7.700000,1.200000,26.700000,33.300000,77.000000,226.000000,13.200000,29.200000,11.700000,2.850000,7.900000,6.000000,9.000000,64.000000,18.000000,11.000000,23.000000,0.300000,8.700000,97.000000,8.388843,1.200000,312.000000,2.600000,4.000000,5.400000,133.000000,0.008778,20.000000,30.600000,10.500000,1.300000,27.300000,35.000000,81.000000,334.000000,14.200000,31.400000,12.600000,3.870000,11.700000,6.000000,9.000000,64.000000,14.055556,11.000000,19.888889,0.300000,8.293333,91.055556,5.319776,0.936842,215.666667,1.906667,3.433333,4.688889,125.222222,0.005377,16.888889,26.222222,8.977778,1.260000,26.966667,34.211111,78.777778,280.555556,13.740000,30.120000,12.233333,3.328889,9.500000,6.000000,0.000000,0.000000,0.000000,0.000000,0.611111,0.000000,0.533333,0.888889,0.0,0.000000,1.000000,0.066667,0.000000,0.000000,1.000000,0.0,0.000000,1.000000,1.0,1.000000,0.0,0.000000,1.0,0.000000,1.000000,0.000000,0.0,1.0,0.333333,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.9,10.0,0.0,0.0,0.0,0.2,0.0,0.0,21.0,0.0,0.0,16.9,6.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,1.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,212.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,1.7,0.0,0.0,0.0,1.7,0.0,19996783,25894657.0
8165,15.000000,57.000000,5.000000,25.000000,22.000000,0.800000,7.800000,92.000000,0.772004,0.500000,76.000000,1.800000,1.300000,2.800000,131.000000,0.001252,14.000000,26.700000,8.400000,1.300000,29.300000,30.900000,90.000000,81.000000,14.400000,25.900000,16.800000,2.800000,5.400000,6.500000,118.000000,149.000000,16.000000,180.000000,33.000000,3.000000,9.500000,120.000000,8.671598,0.800000,225.000000,2.800000,5.100000,5.100000,155.000000,0.009768,42.000000,41.400000,12.900000,2.500000,31.000000,33.900000,97.000000,185.000000,27.500000,54.900000,21.000000,4.410000,26.700000,7.500000,44.379310,112.517241,10.214286,59.379310,27.642857,1.358621,8.602857,99.190476,4.992896,0.611905,129.547619,2.139024,2.808571,4.364286,137.095238,0.005001,29.642857,30.673529,9.944118,1.620690,30.038235,32.432353,92.735294,128.617647,17.682759,34.413793,18.632353,3.312647,10.855882,7.000000,0.379310,0.310345,0.404762,0.724138,0.023810,0.206897,0.285714,0.476190,0.0,0.000000,0.904762,0.048780,0.485714,0.047619,0.500000,0.0,0.809524,0.970588,1.0,1.000000,0.0,0.235294,0.0,0.764706,1.000000,0.206897,1.0,1.0,0.323529,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.6,4.0,0.0,0.0,0.0,0.0,1.4,0.7,4.0,0.0,0.0,13.3,5.3,0.0,0.0,1.1,0.0,69.0,0.0,0.0,0.0,1.80,0.0,0.0,78.0,19.0,0.0,140.0,1.0,1.5,0.0,12.0,0.0,0.0,125.0,0.2,0.6,0.0,8.0,0.00,22.0,0.0,0.0,1.4,0.0,0.0,0.0,0.0,15.0,18.4,5.5,0.0,16.7,0.0,19997886,20793010.0


In [76]:
scaler.fit(labs_scaled.loc[:, ('min', 50861) : ('above_max', 51491)])
labs_scaled.loc[:, ('min', 50861) : ('above_max', 51491)] = scaler.transform(labs_scaled.loc[:, ('min', 50861) : ('above_max', 51491)])

scaler.fit(labs_scaled_KNN.loc[:, ('min', 50861) : ('above_max', 51491)])
labs_scaled_KNN.loc[:, ('min', 50861) : ('above_max', 51491)] = scaler.transform(labs_scaled_KNN.loc[:, ('min', 50861) : ('above_max', 51491)])


In [77]:
pd.DataFrame(labs_scaled_KNN)

Unnamed: 0_level_0,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,subject_id,hadm_id
itemid,50861,50863,50868,50878,50882,50885,50893,50902,50911,50912,50931,50960,50970,50971,50983,51003,51006,51221,51222,51237,51248,51249,51250,51265,51274,51275,51277,51279,51301,51491,50861,50863,50868,50878,50882,50885,50893,50902,50911,50912,50931,50960,50970,50971,50983,51003,51006,51221,51222,51237,51248,51249,51250,51265,51274,51275,51277,51279,51301,51491,50861,50863,50868,50878,50882,50885,50893,50902,50911,50912,50931,50960,50970,50971,50983,51003,51006,51221,51222,51237,51248,51249,51250,51265,51274,51275,51277,51279,51301,51491,50861,50863,50868,50878,50882,50885,50893,50902,50911,50912,50931,50960,50970,50971,50983,51003,51006,51221,51222,51237,51248,51249,51250,51265,51274,51275,51277,51279,51301,51491,50861,50863,50868,50878,50882,50885,50893,50902,50911,50912,50931,50960,50970,50971,50983,51003,51006,51221,51222,51237,51248,51249,51250,51265,51274,51275,51277,51279,51301,51491,50861,50863,50868,50878,50882,50885,50893,50902,50911,50912,50931,50960,50970,50971,50983,51003,51006,51221,51222,51237,51248,51249,51250,51265,51274,51275,51277,51279,51301,51491,Unnamed: 181_level_1,Unnamed: 182_level_1
0,-0.186665,-0.193075,0.585371,-0.181148,-0.163053,0.401468,0.248314,0.297256,-0.449481,0.003921,0.147048,-0.164456,-0.434913,-0.102946,0.432213,-0.125327,-0.084374,0.410294,0.397458,0.436923,-0.163384,0.198614,-0.280472,-0.433657,0.485161,0.234420,-0.250883,0.563089,-0.098115,-0.696118,-0.190684,-0.266166,0.031737,-0.231020,-0.524055,0.109165,-0.121752,0.020084,0.001438,-0.201336,-0.003029,-0.214543,-0.674156,-0.337602,0.035915,-0.155386,-0.311333,0.302457,0.242995,0.382108,-0.317260,0.099774,-0.351478,-0.528043,0.321086,-0.021379,-0.393868,0.468117,-0.196135,-0.618142,-0.189532,-0.217521,0.463738,-0.248135,-0.427282,0.254557,0.053086,0.178776,-0.249068,-0.113477,0.209370,-0.322717,-0.651838,-0.285306,0.237119,-0.153522,-0.240881,0.399367,0.377260,0.414907,-0.244392,0.133282,-0.324405,-0.547056,0.411707,0.001400,-0.326886,0.585583,-0.152407,-0.715424,-0.332681,-0.633072,-0.417705,-0.244604,0.337957,-0.719770,-0.327584,-0.564223,-0.425582,-0.334323,0.315761,-0.351134,0.200436,-0.156373,-0.421473,0.302080,0.017126,-0.557052,-0.357990,0.744331,-0.717668,0.056999,-0.629282,0.262178,0.767679,0.271737,-0.697028,-0.249388,-0.282540,-0.168972,0.0,-0.254217,-0.324788,0.0,-0.149211,0.0,-0.423610,-0.365159,0.0,-0.17236,0.082526,-0.107992,0.328012,0.403183,-0.314069,0.0,0.119456,-0.329644,-0.257083,-0.063856,-0.256938,-0.071223,-0.251023,0.363679,-0.073602,0.292033,0.0,-0.312828,-0.269665,0.0,-0.182915,-0.184444,-0.309297,-0.228575,-0.264178,-0.478070,-0.126293,-0.164424,0.049720,-0.340726,-0.010641,-0.102425,-0.412721,-0.322713,-0.263268,0.092827,-0.391247,-0.213014,-0.276976,0.361023,-0.476408,0.000540,-0.405789,-0.255117,0.302352,-0.035623,-0.486535,-0.156342,-0.114556,-0.203819,10002264.0,23205369.0
1,-0.128054,-0.417991,1.275651,-0.182491,-2.686965,-0.801902,-1.049778,-1.256173,3.396581,0.275733,2.296938,-0.571876,0.088468,-0.661990,-2.003305,0.499912,1.648569,-0.093299,-0.087731,-0.370677,0.101788,-0.484211,0.212417,-1.219678,-0.568664,-0.859517,-1.181776,-0.165073,0.425924,-1.132367,-0.220733,-0.332739,2.264269,0.114838,-1.673229,-0.814250,-0.689752,-0.668999,5.950545,0.276743,2.289741,0.182353,0.033400,0.419176,-1.215782,6.619085,1.255797,0.841899,0.793802,0.200396,0.329702,0.520675,0.572869,-0.530728,0.253913,0.647932,-0.832782,0.727252,2.633956,-1.634236,-0.205752,-0.388368,2.339607,0.096907,-2.416439,-0.824918,-0.953694,-1.032340,5.974544,0.408241,3.812576,0.471805,0.101561,-0.326131,-2.085329,3.319780,1.811471,0.187450,0.173823,-0.299891,0.228008,0.078402,0.253389,-0.832256,-0.317248,0.469320,-1.008734,0.065685,1.663226,-1.475636,0.173272,-1.030476,1.013267,1.251465,3.103596,-0.719770,1.061784,-0.301745,5.221614,1.988992,1.226347,-0.456379,-1.031173,-0.479006,1.662637,2.487877,2.044958,0.414748,0.280190,0.094598,-0.847545,-0.727240,-0.629282,0.219616,0.118317,1.581073,-0.697028,0.815672,1.936169,-0.168972,0.0,-0.254217,-0.324788,0.0,3.661750,0.0,0.874543,0.168645,0.0,-0.17236,-0.268160,-0.394533,-0.784727,-0.382583,1.671700,0.0,-0.311557,0.277121,0.319217,-0.063856,-0.256938,-0.420643,-0.251023,1.139546,-0.073602,0.961958,0.0,0.468286,-0.269665,0.0,-0.257824,-0.487542,1.695064,0.094993,-0.264178,-0.478070,-0.126293,-0.476508,6.133541,0.033979,2.299374,-0.102425,-0.412721,-0.322713,-0.263268,8.141481,1.146936,-0.213014,-0.276976,0.177616,-0.536590,-0.400082,-0.405789,-0.255117,0.250442,0.481166,-0.486535,-0.156342,2.795558,-0.203819,10002495.0,24982426.0
2,0.545964,-0.216228,-1.160633,0.013652,-0.045662,-0.489676,0.373431,0.571391,-0.411867,0.115844,-0.130925,0.168887,-0.059152,0.968556,0.643998,0.079226,-0.448623,-1.035779,-0.572921,0.436923,-0.201265,2.218639,-1.527191,-0.736163,0.544812,0.749214,-0.220854,-0.594219,-0.235324,-0.805180,1.525623,-0.165742,-0.880143,0.636104,1.122059,-0.360458,-0.004235,1.707148,-0.477949,0.276743,-0.199368,0.058323,-0.274233,0.103852,0.572356,-0.303242,-0.377891,-0.571336,-0.238961,-0.079160,-0.081661,3.505251,-1.358603,-0.817158,-0.055080,-0.167686,-0.093039,-0.316244,-0.012814,-0.313313,1.569273,-0.203980,-1.368700,0.583845,0.692056,-0.421006,0.164942,1.268288,-0.482053,0.178923,-0.092646,0.228357,-0.157374,0.797322,0.650272,-0.173561,-0.287786,-0.805474,-0.346874,0.203361,-0.143081,3.167526,-1.515991,-0.784913,0.268946,0.240666,-0.149173,-0.345653,-0.081170,-0.796298,0.283949,0.393554,-0.417705,0.659271,-0.673293,0.507546,-0.340269,1.458029,-0.395057,0.440115,0.183549,-0.456379,-0.305127,-0.479006,-0.495665,0.877290,-0.945242,0.894649,0.821676,1.521836,-0.847545,2.409715,1.170178,-0.674182,1.544740,0.737923,-0.697028,0.815672,-0.380805,0.110469,0.0,2.841278,-0.324788,0.0,-0.572651,0.0,-0.562698,-0.365159,0.0,-0.17236,-0.268160,-0.394533,-0.021706,-0.382583,-0.364986,0.0,-0.311557,1.605657,1.142502,-0.063856,-0.256938,-0.420643,0.833745,-0.498396,-0.073602,-0.417300,0.0,0.972756,-0.269665,0.0,1.537327,-0.288436,-0.309297,0.628892,-0.264178,-0.127268,0.264342,1.752662,-0.417313,0.033979,-0.269718,-0.102425,-0.053347,-0.322713,-0.263268,0.186208,-0.547098,-0.213014,-0.276976,-0.104547,-0.536590,8.680668,-0.405789,-0.255117,-0.186692,-0.325331,-0.486535,-0.156342,-0.193399,0.939443,10002527.0,29112696.0
3,-0.232250,-0.128577,-0.592167,-0.252351,-0.251097,-0.086385,-0.267795,-0.388080,0.838809,-0.171957,-0.472234,-0.831143,-0.421493,-0.778457,-0.732600,-0.133046,-0.426547,0.220392,0.433848,-0.491817,1.014736,0.113261,1.053228,-0.122471,-0.505037,-0.425926,0.674004,0.010738,0.255653,-0.368931,-0.145022,-0.127377,-0.031152,-0.078752,0.314531,-0.286585,0.152454,0.281460,1.294209,0.348906,-0.008394,-0.090513,0.187217,1.270550,0.065717,0.485321,0.614423,0.567869,0.547906,-0.149049,0.890651,0.183954,0.945367,0.570238,-0.120909,0.282165,0.518482,0.043762,-0.017153,0.499562,-0.184018,-0.138973,-0.272401,-0.116074,-0.059532,-0.195224,-0.063378,-0.180111,0.993719,-0.044511,-0.119124,-0.301402,-0.219632,0.217839,-0.529984,0.140294,-0.156664,0.370303,0.470179,-0.362553,0.967289,0.139166,1.037110,0.062227,-0.342706,0.326877,0.624778,-0.004823,0.140609,0.034005,0.002999,-0.036967,-0.140028,-0.383662,0.856697,-0.719770,0.382099,0.044978,-0.069453,0.180915,-0.330961,0.211119,0.944774,-0.021853,0.760783,0.407535,0.125480,-0.117143,-0.198123,-0.486564,0.624388,-0.247734,0.870268,-0.246713,-0.470770,-0.151977,0.710543,-0.033794,-0.463554,0.017322,0.0,1.160866,0.406847,0.0,0.380089,0.0,0.434098,0.382166,0.0,0.84144,0.624495,1.324712,1.091032,1.014335,0.144185,0.0,0.765976,-0.121440,-0.311969,-0.063856,-0.256938,0.033603,-0.251023,-0.175118,-0.073602,0.292033,0.0,-0.141959,-0.269665,0.0,-0.138799,-0.312531,-0.042049,-0.081800,1.213658,-0.478070,-0.126293,0.103076,0.901124,0.379860,-0.023649,-0.089203,0.134152,1.624679,0.639792,-0.381752,0.726816,0.053021,0.349966,-0.146872,0.921155,0.000540,0.537555,1.223540,-0.112926,0.267602,0.360951,-0.113893,-0.252237,0.939443,10005078.0,24046197.0
4,-0.213799,-0.229458,-0.348539,-0.118006,0.101077,1.539790,-0.314714,0.388634,-0.449481,-0.044045,-0.264634,0.057773,0.048208,0.060109,0.061591,-0.532504,-0.161639,-0.432311,-0.411191,0.033123,-0.053527,-0.185475,0.023960,-0.049324,0.063631,-0.035234,-0.352981,-0.440557,-0.172506,-0.150807,-0.229865,0.036235,-0.534257,-0.192178,-0.027115,1.032579,-0.415545,0.376506,-0.584043,-0.084071,-0.089933,-0.090513,-0.007618,0.103852,-0.142899,-0.672881,-0.299232,-0.492057,-0.460267,-0.204960,0.011831,0.046204,0.076205,0.526379,-0.196142,-0.558677,-0.117697,-0.524943,-0.082238,-0.414923,-0.235736,-0.055478,-0.590417,-0.210283,0.096499,1.304046,-0.244456,0.389604,-0.575307,-0.037537,-0.191964,-0.011353,0.102357,0.282941,-0.100729,-0.668655,-0.268068,-0.545886,-0.533309,-0.102721,0.014835,-0.052714,0.040478,0.330965,-0.080554,-0.426963,-0.251229,-0.604445,-0.131526,-0.324535,0.046783,0.310761,-0.328269,0.467810,-0.271125,2.225789,0.402185,-0.173021,-0.883463,-0.085397,-0.466338,-0.456379,-0.550052,-0.113600,0.246251,-0.733297,0.004147,0.190794,0.075082,0.265866,-0.036505,-0.244149,0.264803,0.247789,0.289488,-0.328662,-0.053209,0.273460,-0.117084,-0.168972,0.0,-0.165775,-0.141879,0.0,-0.043351,0.0,0.410917,-0.365159,0.0,-0.17236,-0.268160,-0.394533,-0.466802,0.490491,-0.110401,0.0,-0.096050,0.427820,0.352148,-0.063856,0.019530,-0.088694,0.020169,0.195574,-0.073602,0.173811,0.0,0.377156,0.038992,0.0,-0.243612,0.057781,-0.309297,-0.197730,-0.264178,-0.263293,-0.126293,0.459743,-0.479308,-0.071707,-0.054001,-0.102425,-0.115847,-0.265436,0.113007,-0.450828,-0.235396,-0.213014,-0.276976,-0.217413,0.105353,-0.400082,0.065883,0.839089,-0.208549,-0.607379,-0.084710,-0.156342,-0.048658,-0.203819,10006062.0,21916409.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8162,-0.029285,-0.080617,-0.348539,-0.032025,-0.075010,-0.583994,0.076278,-0.342391,-0.270813,0.003921,-0.247040,0.242963,0.316609,0.129990,-0.256085,-0.003753,0.500632,-0.051099,0.357026,1.527183,0.063906,0.582703,-0.077517,-0.066681,1.308338,-0.166997,-0.280912,0.444036,0.012644,-0.041745,0.280078,-0.232315,-0.062596,0.055542,0.221355,-0.566248,-0.317614,-0.360100,-0.289338,-0.165255,0.245882,0.095532,0.033400,0.135384,-0.321713,0.087679,0.426851,-0.312818,-0.238961,0.451997,-0.231247,-0.168073,-0.392867,-0.629188,0.303621,-0.363602,-0.536885,-0.229286,-0.435863,-0.516532,0.067084,-0.172136,-0.200644,0.038520,0.045153,-0.591325,-0.122588,-0.510733,-0.300558,-0.088549,0.026015,0.291785,0.261583,0.214808,-0.340100,0.046732,0.472845,-0.149172,0.113920,1.098311,-0.081870,0.236818,-0.239717,-0.398454,0.883105,-0.359675,-0.428251,0.179045,-0.234492,-0.286794,0.024462,0.325372,-0.035216,0.353090,-0.067187,-0.430990,0.099193,0.192756,-0.781712,-0.002421,0.237694,-0.047092,0.049854,-0.127294,0.737057,-0.330650,0.683705,-0.425079,-1.885755,0.899084,-0.847545,-0.727240,-0.629282,-0.674182,0.922344,-0.164810,-0.697028,-1.895390,-0.960048,-0.168972,0.0,1.426194,0.041030,0.0,-0.202141,0.0,-0.168616,0.328786,0.0,-0.17236,0.369450,-0.107992,-0.371424,-0.295276,0.296937,0.0,-0.311557,-0.972100,-1.107811,-0.063856,-0.256938,-0.420643,-0.251023,-0.498396,-0.073602,-0.417300,0.0,-1.175309,-0.269665,0.0,0.266243,-0.325213,-0.175673,0.054832,-0.067133,-0.202440,-0.126293,-0.253591,-0.466909,-0.158178,0.228925,-0.075982,-0.115847,-0.179522,-0.263268,-0.443153,0.354128,-0.213014,-0.276976,0.445672,-0.536590,-0.400082,-0.405789,-0.255117,0.269567,-0.384354,-0.486535,-0.156342,-0.358145,-0.203819,19991798.0,26825654.0
8163,-0.226824,-0.065733,0.057509,-0.138158,0.834772,-0.486424,1.139774,0.799836,-0.411867,0.915290,-0.377230,0.909650,1.162071,3.064972,1.438189,-0.536363,0.655162,0.427175,0.235729,-0.289917,0.783657,-0.341956,1.372156,-0.116273,-0.294272,-0.266585,-0.941545,-0.026639,0.078769,-0.150807,-0.227803,-0.092398,-1.194584,-0.206641,-0.430879,-0.558333,0.093696,-0.431384,-0.521172,0.186539,-0.725085,-0.189737,-0.274233,0.892162,0.274333,-0.676241,-0.196370,-0.088768,-0.386498,-0.232916,0.479289,-1.162932,0.986756,-0.664992,-0.247193,-0.264383,-1.079364,-0.820600,-0.392474,0.499562,-0.243829,-0.067570,-0.768165,-0.235648,0.237597,-0.533060,0.819400,0.256011,-0.526574,0.506520,-0.920472,0.228357,0.502589,2.639784,1.131737,-0.661133,0.161020,0.231646,-0.024318,-0.310313,0.645004,-0.792890,1.210595,-0.445583,-0.309436,-0.133337,-1.042223,-0.371564,-0.176635,0.254790,0.236516,-0.235668,-0.417705,-0.232137,-0.673293,0.016620,-0.985246,-0.692806,-0.028752,2.652796,-1.902048,-0.456379,-1.031173,-0.479006,-0.495665,-0.531974,2.044958,0.894649,0.821676,-0.257454,1.749984,-0.727240,2.369818,-0.674182,-0.233534,0.119613,-0.697028,0.815672,-0.960048,-0.168972,0.0,-0.254217,-0.324788,0.0,-0.572651,0.0,-0.748149,-0.365159,0.0,-0.17236,-0.268160,-0.394533,-0.784727,-0.382583,-0.364986,0.0,-0.311557,-0.456549,-0.119869,-0.063856,-0.256938,-0.420643,-0.251023,-0.498396,-0.073602,-0.062633,0.0,0.305554,-0.269665,0.0,-0.224663,0.005785,-0.309297,-0.212014,-0.264178,-0.399319,-0.126293,-0.476508,-0.425579,-0.062100,-0.627439,-0.102425,-0.412721,-0.322713,-0.263268,-0.426524,-0.479337,-0.213014,-0.276976,-0.245629,-0.402852,-0.400082,0.319860,-0.255117,-0.259093,-0.215395,-0.486535,-0.156342,-0.358145,-0.203819,19995478.0,24108472.0
8164,-0.421106,-0.599909,-0.754586,-0.464616,-2.100009,-0.834425,-1.362571,-3.312183,0.603719,-0.203935,1.698769,-1.683021,0.222669,0.735621,-5.974260,0.708324,-0.227866,-1.499986,-1.462435,0.033123,-1.299833,0.369320,-1.962093,0.193673,-0.011926,-0.108776,-1.542121,-1.189485,-0.086543,-0.041745,-0.323841,-0.727666,0.692063,-0.296308,-1.362642,-0.893400,-0.689752,-2.094687,0.885552,-0.084071,1.667463,0.430412,-0.069144,1.365147,-2.407875,1.400457,-0.256877,-1.295189,-1.075007,-0.358716,-1.353146,0.903313,-1.634527,0.525483,-0.377507,-0.659579,-1.128680,-0.507552,-0.088746,-0.618142,-0.373796,-0.708100,0.157661,-0.409723,-1.984198,-0.884888,-0.903848,-3.487804,0.741883,-0.185314,2.737365,-0.418806,-0.042017,1.531311,-4.926707,0.960671,-0.196095,-1.547303,-1.370859,-0.184143,-1.357089,0.720869,-1.867288,0.491763,-0.237110,-0.577349,-1.302696,-0.956362,-0.089849,-0.343406,-1.091611,-1.030476,-0.417705,-1.241983,1.891261,-0.719770,0.834336,3.130901,1.212613,-0.666225,1.226347,0.034765,-1.031173,-0.479006,5.439664,0.071997,-0.945242,0.894649,0.821676,1.521836,-0.847545,-0.727240,2.369818,-0.674182,1.544740,-1.117008,-0.697028,0.815672,0.005358,-0.168972,0.0,-0.254217,-0.324788,0.0,2.603149,0.0,1.338169,4.972877,0.0,-0.17236,-0.268160,2.470875,-0.784727,-0.382583,10.327619,0.0,-0.311557,2.260011,2.185330,-0.063856,-0.256938,-0.420643,2.460897,-0.498396,-0.073602,-0.417300,0.0,1.672505,-0.269665,0.0,-0.269667,-0.487542,-0.309297,-0.256937,-0.264178,-0.478070,-0.126293,-0.476508,0.954854,-0.350334,1.670651,-0.102425,-0.412721,-0.322713,-0.263268,1.056056,-0.547098,-0.213014,-0.276976,-0.386711,-0.536590,-0.400082,-0.405789,-0.255117,-0.391599,-0.690282,-0.486535,-0.156342,-0.158096,-0.203819,19996783.0,25894657.0
8165,-0.355983,-0.715675,-2.784823,-0.276533,-0.339140,-0.671808,-0.893381,-1.941510,-0.374253,-0.683602,-0.834654,-0.201494,-2.192937,-2.292536,-1.473845,-0.374264,-0.117488,-0.993579,-1.179407,0.436923,-0.314910,-1.337743,-0.077517,-1.604010,0.465278,-0.614377,1.520816,-1.258702,-0.499824,1.048878,-0.002733,0.231442,0.063181,0.052856,1.743234,-0.181052,0.877144,3.370452,-0.517243,-0.444886,0.734047,0.678472,1.058843,0.892162,4.148633,-0.490302,1.074276,0.566146,0.105294,1.318621,0.030529,0.061510,0.572869,-0.808207,1.409278,0.328412,3.013883,0.431595,1.538367,2.430141,-0.177797,-0.002986,-1.572453,-0.122491,0.834885,-0.567457,-0.242625,-1.135008,-0.488871,-0.584496,0.250448,0.358988,-0.969968,0.656087,-0.639369,-0.489003,0.911789,-0.825969,-0.925576,0.814205,-0.182012,-0.688055,0.156986,-1.298413,0.835608,-0.179570,2.268959,-0.982665,0.106270,1.921056,0.347738,0.202846,2.478311,1.466417,-0.573376,-0.110344,-0.010470,1.355608,-0.171204,-0.666225,0.928405,-0.097005,0.843679,-0.157770,2.472000,-0.531974,1.475396,0.817018,0.821676,1.521836,-0.847545,0.010867,-0.629282,1.831959,1.544740,-0.349450,2.118114,0.815672,-0.023036,-0.168972,0.0,-0.254217,8.820640,0.0,-0.572651,0.0,0.642730,1.770056,0.0,-0.17236,-0.268160,-0.394533,3.666228,5.728934,1.671700,0.0,-0.311557,1.546171,1.801130,-0.063856,-0.256938,1.501168,-0.251023,2.475762,-0.073602,-0.417300,0.0,1.753871,-0.269665,0.0,-0.038723,-0.246585,-0.309297,0.032888,0.721046,0.058871,-0.126293,4.873500,-0.454510,-0.350334,0.727567,0.162002,0.524776,-0.322713,5.757129,-0.139986,0.943652,-0.213014,-0.276976,1.306272,-0.536590,-0.400082,-0.405789,-0.255117,1.425242,0.138743,3.531716,-0.156342,1.607036,-0.203819,19997886.0,20793010.0


## Check for Multicollinearity

In [None]:
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant

In [None]:
%%time

labs_scaled_features = labs_scaled.loc[:, ('min', 50868) : ('above_max', 51484)]
add_constant(labs_scaled_features)

high_vif = pd.Series([variance_inflation_factor(labs_scaled_features.values, i) 
               for i in range(labs_scaled_features.shape[1])], 
              index=labs_scaled_features.columns).sort_values(ascending = False)

high_vif.head(20)


In [None]:
labs_scaled_features_limited = labs_scaled_features.copy()
labs_scaled_features_limited.drop([('above_max', 51484),
                                  ('above_max', 51478),
                                  ('max', 51237),
                                  ('mean', 51248)], axis = 1, inplace = True)


In [None]:
%%time

high_vif = pd.Series([variance_inflation_factor(labs_scaled_features_limited.values, i) 
               for i in range(labs_scaled_features_limited.shape[1])], 
              index=labs_scaled_features_limited.columns).sort_values(ascending = False)

high_vif.head(20)


## Save Lab Results for Models

In [78]:
CHF_final_test = labs_scaled.merge(admissions_sample, how='left', left_on='hadm_id', right_on='hadm_id')
CHF_KNN_final_test = labs_scaled_KNN.merge(admissions_sample, how='left', left_on='hadm_id', right_on='hadm_id')




In [79]:
# admissions_sample.to_pickle("admissions_sample.pkl")
# admissions_sample.to_pickle("admissions_sample.pkl")
CHF_final_test.to_csv('CHF_final_test.csv.gz', compression="gzip")
CHF_KNN_final_test.to_csv('CHF_KNN_final_test.csv.gz', compression="gzip")
