## Import Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.utils import resample

## Import Tables

In [3]:
diagnoses_icd = pd.read_csv('./data/mimic-iv-0.4/hosp/diagnoses_icd.csv.gz', compression='gzip')
d_icd_diagnoses = pd.read_csv('./data/mimic-iv-0.4/hosp/d_icd_diagnoses.csv.gz', compression='gzip')
admissions = pd.read_csv('./data/mimic-iv-0.4/core/admissions.csv.gz', compression='gzip')
d_labitems = pd.read_csv('./data/mimic-iv-0.4/hosp/d_labitems.csv.gz', compression='gzip')


## Pull codes related to Acute Kidney Failure

In [4]:
# Congestive Heart Failure
# chf_codes = list(map(str, d_icd_diagnoses[(d_icd_diagnoses['long_title'].str.lower().str.contains('congestive')) & (d_icd_diagnoses['long_title'].str.lower().str.contains('heart'))]['icd_code']))

kidney_list = list(d_icd_diagnoses[(d_icd_diagnoses['long_title'].str.lower().str.contains('kidney')) & (d_icd_diagnoses['long_title'].str.lower().str.contains('failure'))]['icd_code'])
pd.DataFrame(diagnoses_icd[diagnoses_icd['icd_code'].isin(kidney_list)].icd_code.value_counts()).reset_index().head(20).merge(d_icd_diagnoses, left_index = True, left_on = "index", right_on = 'icd_code', how = 'left')

# ICD Codes we care about
# 5849, N179, 5845, N170
akf_codes = ['5849   ', 'N179   ', '5845   ', 'N170   ']


## Start filtering down to only records where CHF is True

In [5]:
diagnoses_icd_akf = diagnoses_icd[diagnoses_icd['icd_code'].isin(akf_codes)]


## Generate new Admissions table with hospital stay duration and CHF flag

In [6]:
admissions_akf = admissions[['subject_id', 'hadm_id', 'admittime', 'dischtime', 'ethnicity']]

admissions_akf['AKF'] = np.where(admissions_akf['subject_id'].isin(diagnoses_icd_akf.subject_id), 1, 0)
admissions_akf['time_spent'] = pd.to_datetime(admissions_akf['dischtime']) - pd.to_datetime(admissions_akf['admittime'])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


## Sample only 5k patients for testing

In [7]:
admissions_akf_reduced = admissions_akf[['subject_id', 'AKF']].drop_duplicates()
# admissions_chf_reduced = admissions_chf[['subject_id', 'CHF', 'ethnicity']].drop_duplicates()
# admissions_chf_reduced

admissions_akf_sample = resample(admissions_akf_reduced, n_samples = 5000, replace = False, stratify = admissions_akf_reduced['AKF'], random_state = 0)

admissions_akf[admissions_akf['subject_id'].isin(admissions_akf_sample.subject_id)].groupby('AKF').subject_id.nunique()


AKF
0    4423
1     577
Name: subject_id, dtype: int64

Generate pickle file so we all work on the exact same data

In [8]:
admissions_akf_sample.to_pickle("./admissions_akf_sample.pkl")

## Pull Chart and Lab data only where it matches our sample patient list

In [40]:
%%time #run this for next time

chunksize = 3*(10 ** 6)
counter=0
chartevents_sample = []
for chunk in pd.read_csv('./data/mimic-iv-0.4/icu/chartevents.csv.gz', compression='gzip', chunksize=chunksize):
    chartevents_sample.append(chunk[chunk['subject_id'].isin(list(admissions_akf_sample.subject_id))])
    counter+=1
    print(f'chunk {counter} processed')

chartevents_final = pd.concat(chartevents_sample)


  interactivity=interactivity, compiler=compiler, result=result)


chunk 1 processed
chunk 2 processed
chunk 3 processed
chunk 4 processed


  interactivity=interactivity, compiler=compiler, result=result)


chunk 5 processed
chunk 6 processed
chunk 7 processed
chunk 8 processed
chunk 9 processed
chunk 10 processed
chunk 11 processed
chunk 12 processed
chunk 13 processed
chunk 14 processed
chunk 15 processed
chunk 16 processed
chunk 17 processed
chunk 18 processed
chunk 19 processed
chunk 20 processed
chunk 21 processed
chunk 22 processed
chunk 23 processed
chunk 24 processed
chunk 25 processed
chunk 26 processed
chunk 27 processed
chunk 28 processed
chunk 29 processed
chunk 30 processed
chunk 31 processed
chunk 32 processed
chunk 33 processed
chunk 34 processed
chunk 35 processed
chunk 36 processed
chunk 37 processed
chunk 38 processed
chunk 39 processed
chunk 40 processed
chunk 41 processed
chunk 42 processed
chunk 43 processed
chunk 44 processed
chunk 45 processed
chunk 46 processed
chunk 47 processed
chunk 48 processed
chunk 49 processed
chunk 50 processed
chunk 51 processed
chunk 52 processed
chunk 53 processed
chunk 54 processed
chunk 55 processed
chunk 56 processed
chunk 57 processe

In [41]:
chartevents_final.shape

(7602370, 10)

In [42]:
chartevents_final.to_pickle("./chartevents_final.pkl")

Load Lab Events table

In [43]:
%%time

chunksize = 3*(10 ** 6)
counter=0
lab_events_sample = []
for chunk in pd.read_csv('./data/mimic-iv-0.4/hosp/labevents.csv.gz', compression='gzip', chunksize=chunksize):
    lab_events_sample.append(chunk[chunk['subject_id'].isin(list(admissions_akf_sample.subject_id))])
    counter+=1
    print(f'chunk {counter} processed')

lab_events_final = pd.concat(lab_events_sample)

chunk 1 processed
chunk 2 processed
chunk 3 processed




chunk 4 processed
chunk 5 processed
chunk 6 processed
chunk 7 processed
chunk 8 processed
chunk 9 processed




chunk 10 processed




chunk 11 processed
chunk 12 processed
chunk 13 processed
chunk 14 processed
chunk 15 processed




chunk 16 processed
chunk 17 processed
chunk 18 processed
chunk 19 processed
chunk 20 processed
chunk 21 processed
chunk 22 processed
chunk 23 processed




chunk 24 processed
chunk 25 processed
chunk 26 processed
chunk 27 processed
chunk 28 processed
chunk 29 processed
chunk 30 processed
chunk 31 processed
chunk 32 processed
chunk 33 processed
chunk 34 processed
chunk 35 processed
chunk 36 processed
chunk 37 processed
chunk 38 processed
chunk 39 processed
chunk 40 processed
chunk 41 processed
CPU times: user 3min 33s, sys: 18.5 s, total: 3min 51s
Wall time: 3min 52s


In [45]:
lab_events_final.shape

(2337576, 15)

In [46]:
lab_events_final.to_pickle("./lab_events_final.pkl")

## Manipulating the Data

Take only records related to our sample patients

In [10]:
# This code was from before filtering in the chunking process
# Leaving this code here.  Next we will want to filter on specific hospitalizations

# %time lab_events_sampled = lab_events_final[lab_events_final['subject_id'].isin(admissions_chf_sample.subject_id)]

CPU times: user 37.6 s, sys: 1min 43s, total: 2min 20s
Wall time: 3min 19s


In [152]:
lab_events_sampled = lab_events_final.copy()

Check how many patients have had each test done

In [153]:
lab_events_sampled.groupby('itemid')['subject_id'].nunique().sort_values(ascending = False).head(10)

itemid
51221    3870
51265    3867
51222    3859
51277    3858
51279    3858
51248    3858
51249    3858
51250    3858
51301    3858
50885    3615
Name: subject_id, dtype: int64

Most common tests performed on patients with AKF

In [154]:
lab_events_sampled[lab_events_sampled['subject_id'].isin(list(admissions_akf_sample[admissions_akf_sample['AKF'] == 1].subject_id))].groupby('itemid')['subject_id'].nunique().sort_values(ascending = False).head(10)


itemid
50882    577
51222    577
51006    577
50902    577
51279    577
51277    577
50912    577
50983    577
50920    577
51265    577
Name: subject_id, dtype: int64

Item Black List

In [155]:
item_black_list = [50920]
# 50920 - test results are text in comments - need to come back and figure out how to handle this

Take only the X most commonly performed tests

In [156]:
top_test_num = 10

lab_events_valid = lab_events_sampled[~lab_events_sampled['itemid'].isin(item_black_list)]

# On patients with AKF
itemid_sub_sample = lab_events_valid[lab_events_valid['subject_id'].isin(list(admissions_akf_sample[admissions_akf_sample['AKF'] == 1].subject_id))].groupby('itemid')['subject_id'].nunique().sort_values(ascending = False).head(top_test_num).reset_index().rename(columns = {'index' : 'itemid'}).itemid.to_list()
# On all sample patients
# itemid_sub_sample = lab_events_sampled.groupby('itemid')['subject_id'].nunique().sort_values(ascending = False).head(top_test_num).reset_index().rename(columns = {'index' : 'itemid'}).itemid.to_list()

lab_events_sampled_sub = lab_events_sampled[lab_events_sampled['itemid'].isin(itemid_sub_sample)]

lab_events_sampled_sub


Unnamed: 0,labevent_id,subject_id,hadm_id,specimen_id,itemid,charttime,storetime,value,valuenum,valueuom,ref_range_lower,ref_range_upper,flag,priority,comments
1630,30338422,12455866,,45747115,51221,2134-06-20 15:20:00,2134-06-20 20:30:00,39.6,39.6,%,34.0,45.0,,ROUTINE,
1631,30338423,12455866,,45747115,51222,2134-06-20 15:20:00,2134-06-20 20:30:00,12.8,12.8,g/dL,11.2,15.7,,ROUTINE,
1632,30338424,12455866,,45747115,51248,2134-06-20 15:20:00,2134-06-20 20:30:00,32.1,32.1,pg,26.0,32.0,abnormal,ROUTINE,
1638,30338430,12455866,,45747115,51301,2134-06-20 15:20:00,2134-06-20 20:30:00,6.6,6.6,K/uL,4.0,10.0,,ROUTINE,
1640,30338434,12455866,,26864207,50882,2134-10-13 09:15:00,2134-10-13 20:40:00,26,26.0,mEq/L,22.0,32.0,,ROUTINE,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122208734,29507756,12390274,28385235.0,31586721,51301,2191-11-09 06:45:00,2191-11-09 07:31:00,7.7,7.7,K/uL,4.0,10.0,,ROUTINE,
122208736,29507759,12390274,28385235.0,72916201,50882,2191-11-09 06:45:00,2191-11-09 09:08:00,30,30.0,mEq/L,22.0,32.0,,ROUTINE,
122208738,29507761,12390274,28385235.0,72916201,50902,2191-11-09 06:45:00,2191-11-09 09:08:00,87,87.0,mEq/L,96.0,108.0,abnormal,ROUTINE,
122208739,29507762,12390274,28385235.0,72916201,50912,2191-11-09 06:45:00,2191-11-09 09:08:00,0.9,0.9,mg/dL,0.4,1.1,,ROUTINE,


In [157]:
lab_events_sampled_sub[lab_events_sampled_sub['itemid'] == 50920].comments.value_counts()

Series([], Name: comments, dtype: int64)

## Code to look into specific test values

In [158]:
lab_events_sampled_sub.groupby(['subject_id', 'hadm_id', 'itemid']).count()


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,labevent_id,specimen_id,charttime,storetime,value,valuenum,valueuom,ref_range_lower,ref_range_upper,flag,priority,comments
subject_id,hadm_id,itemid,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
10010424,28388172.0,50882,3,3,3,3,3,3,3,3,3,0,3,0
10010424,28388172.0,50902,4,4,4,4,4,4,4,4,4,1,4,0
10010424,28388172.0,50912,4,4,4,4,4,4,4,4,4,0,4,0
10010424,28388172.0,50931,3,3,3,3,3,3,3,3,3,3,3,3
10010424,28388172.0,50971,4,4,4,4,4,4,4,4,4,0,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19988493,25600709.0,51006,33,33,33,33,33,33,33,33,33,1,33,0
19988493,25600709.0,51221,19,19,19,19,19,19,19,19,19,19,19,0
19988493,25600709.0,51222,19,19,19,19,19,19,19,19,19,19,19,0
19988493,25600709.0,51248,19,19,19,19,19,19,19,19,19,0,19,0


In [159]:
lab_events_sampled_sub_tiny = lab_events_sampled_sub[(lab_events_sampled_sub['subject_id'] == 19988493) & (lab_events_sampled_sub['hadm_id'] == 25600709.0) & (lab_events_sampled_sub['itemid'] == 51006)]#.valuenum.describe()


In [160]:
# lab_events_sampled_sub_tiny['comments'] = lab_events_sampled_sub_tiny['comments'].replace(np.nan, 'NA')

lab_events_sampled_sub_tiny


Unnamed: 0,labevent_id,subject_id,hadm_id,specimen_id,itemid,charttime,storetime,value,valuenum,valueuom,ref_range_lower,ref_range_upper,flag,priority,comments
116765230,124205275,19988493,25600709.0,89289838,51006,2116-09-26 23:36:00,2116-09-27 00:47:00,14,14.0,mg/dL,6.0,20.0,,STAT,
116765245,124205291,19988493,25600709.0,42351361,51006,2116-09-27 13:35:00,2116-09-27 14:07:00,9,9.0,mg/dL,6.0,20.0,,STAT,
116765268,124205314,19988493,25600709.0,37219778,51006,2116-09-27 20:02:00,2116-09-27 21:34:00,12,12.0,mg/dL,6.0,20.0,,ROUTINE,
116765291,124205337,19988493,25600709.0,36073753,51006,2116-09-28 03:53:00,2116-09-28 05:06:00,11,11.0,mg/dL,6.0,20.0,,ROUTINE,
116765316,124205363,19988493,25600709.0,65054321,51006,2116-09-28 12:22:00,2116-09-28 13:16:00,12,12.0,mg/dL,6.0,20.0,,ROUTINE,
116765327,124205375,19988493,25600709.0,99705869,51006,2116-09-28 17:53:00,2116-09-28 19:34:00,11,11.0,mg/dL,6.0,20.0,,ROUTINE,
116765348,124205396,19988493,25600709.0,80468571,51006,2116-09-29 02:55:00,2116-09-29 03:45:00,10,10.0,mg/dL,6.0,20.0,,STAT,
116765359,124205407,19988493,25600709.0,48965201,51006,2116-09-29 19:25:00,2116-09-29 20:34:00,9,9.0,mg/dL,6.0,20.0,,STAT,
116765375,124205423,19988493,25600709.0,32568583,51006,2116-09-30 06:20:00,2116-09-30 08:37:00,10,10.0,mg/dL,6.0,20.0,,ROUTINE,
116765421,124205481,19988493,25600709.0,83738250,51006,2116-09-30 10:55:00,2116-09-30 11:42:00,10,10.0,mg/dL,6.0,20.0,,ROUTINE,


## Get Aggregate Test Values

In [161]:
%time lab_events_sampled_sub_grouped = lab_events_sampled_sub.groupby(['subject_id', 'hadm_id', 'itemid']).agg({'itemid' : ['count'], 'valuenum' : ['min', 'max', 'mean'], 'flag' : ['count'], 'ref_range_lower' : ['min'], 'ref_range_upper' : ['min']})
# 'comments' : ['unique'], 


CPU times: user 179 ms, sys: 27.2 ms, total: 206 ms
Wall time: 205 ms


In [162]:
lab_events_sampled_sub_grouped.reset_index(inplace = True)
lab_events_sampled_sub_grouped.columns = ['subject_id', 'hadm_id', 'itemid', 'count', 'min', 'max', 'mean', 'abn_percent', 'range_min', 'range_max']
# 'comments', 


In [163]:
# lab_events_sampled_sub_grouped[lab_events_sampled_sub_grouped['itemid'] == 51221]

missing_vals = lab_events_sampled_sub_grouped.isnull().sum(axis = 0) 
missing_vals[missing_vals > 0].sort_values()

min     6
max     6
mean    6
dtype: int64

In [164]:
lab_events_sampled_sub_grouped['below_min'] = np.where(lab_events_sampled_sub_grouped['min'] < lab_events_sampled_sub_grouped['range_min'], lab_events_sampled_sub_grouped['range_min'] - lab_events_sampled_sub_grouped['min'], 0)
lab_events_sampled_sub_grouped['above_max'] = np.where(lab_events_sampled_sub_grouped['max'] > lab_events_sampled_sub_grouped['range_max'], lab_events_sampled_sub_grouped['max'] - lab_events_sampled_sub_grouped['range_max'], 0)
lab_events_sampled_sub_grouped['abn_percent'] = lab_events_sampled_sub_grouped['abn_percent'] / lab_events_sampled_sub_grouped['count']


In [165]:
lab_events_sampled_sub_grouped

Unnamed: 0,subject_id,hadm_id,itemid,count,min,max,mean,abn_percent,range_min,range_max,below_min,above_max
0,10010424,28388172.0,50882,3,24.0,25.0,24.666667,0.000000,22.0,32.0,0.0,0.0
1,10010424,28388172.0,50902,4,105.0,109.0,106.750000,0.250000,96.0,108.0,0.0,1.0
2,10010424,28388172.0,50912,4,0.9,1.0,0.950000,0.000000,0.5,1.2,0.0,0.0
3,10010424,28388172.0,50931,3,106.0,143.0,121.333333,1.000000,70.0,100.0,0.0,43.0
4,10010424,28388172.0,50971,4,3.7,4.2,3.950000,0.000000,3.3,5.1,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
68821,19988493,25600709.0,51006,33,5.0,14.0,10.090909,0.030303,6.0,20.0,1.0,0.0
68822,19988493,25600709.0,51221,19,27.4,38.5,32.657895,1.000000,40.0,52.0,12.6,0.0
68823,19988493,25600709.0,51222,19,9.2,13.1,10.931579,1.000000,14.0,18.0,4.8,0.0
68824,19988493,25600709.0,51248,19,27.1,28.6,27.921053,0.000000,27.0,32.0,0.0,0.0


## Merge Admissions data so we can group by ethnicity/gender to grab average range min and max

In [166]:
# Here is where we merge Admissions

# lab_events_sampled_sub_grouped.merge(d_icd_diagnoses, left_index = True, left_on = "index", right_on = 'icd_code', how = 'left')


In [167]:
# For now aggregate all patients together

lab_range_min_max = lab_events_sampled_sub_grouped.groupby('itemid').agg({'range_min' : 'mean', 'range_max' : 'mean'})
lab_range_min_max.reset_index(inplace = True)

lab_range_dic = dict(zip(lab_range_min_max['itemid'], zip(lab_range_min_max['range_min'], lab_range_min_max['range_max'])))
lab_range_dic


{50882: (22.0, 32.0),
 50902: (96.0, 108.0),
 50912: (0.44819973329384916, 1.148199733293838),
 50931: (69.96657550896384, 100.8189000303859),
 50971: (3.329904341427168, 5.144856512140841),
 51006: (6.0, 20.0),
 51221: (37.45029561391448, 49.24364086346762),
 51222: (12.66962524654769, 16.779867568328832),
 51248: (26.485613540197463, 31.96191819464034),
 51301: (4.0418722684336466, 10.904130833215847)}

Pivot the table so we have feature columns related to test results

In [168]:
lab_events_sampled_sub_grouped['new_index'] = list(zip(lab_events_sampled_sub_grouped['subject_id'], lab_events_sampled_sub_grouped['hadm_id']))


In [170]:
%time lab_events_sampled_pivot = lab_events_sampled_sub_grouped.pivot(index = 'new_index', columns = 'itemid', values = ['min', 'max', 'mean', 'abn_percent', 'below_min', 'above_max'])
# 'comments', 


CPU times: user 38.3 ms, sys: 4.05 ms, total: 42.4 ms
Wall time: 42.6 ms


In [171]:
pd.options.display.max_columns = 500

lab_events_sampled_pivot = lab_events_sampled_pivot.reset_index()
lab_events_sampled_pivot['subject_id'], lab_events_sampled_pivot['hadm_id'] = zip(*lab_events_sampled_pivot['new_index'])
lab_events_sampled_pivot.drop(['new_index'], axis = 1, inplace = True)


  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


In [172]:
# missing_vals = lab_events_sampled_pivot.isnull().sum(axis = 0) 
# missing_vals[missing_vals > 0].sort_values()

## Start Data Imputation

Copy table, retain original for Random Forest

In [173]:
lab_events_impute = lab_events_sampled_pivot.copy()

In [174]:
d_labitems = pd.read_csv('./data/mimic-iv-0.4/hosp/d_labitems.csv.gz', compression='gzip')


In [175]:
# lab_events_impute.columns

d_labitems[d_labitems['itemid'].isin(itemid_sub_sample)]

Unnamed: 0,itemid,label,fluid,category,loinc_code
212,50882,Bicarbonate,Blood,Chemistry,1963-8
442,50902,Chloride,Blood,Chemistry,2075-0
512,50912,Creatinine,Blood,Chemistry,2160-0
723,50931,Glucose,Blood,Chemistry,6777-7
761,51221,Hematocrit,Blood,Hematology,4544-3
771,51222,Hemoglobin,Blood,Hematology,718-7
1013,51248,MCH,Blood,Hematology,785-6
1233,50971,Potassium,Blood,Chemistry,2823-3
1506,51006,Urea Nitrogen,Blood,Chemistry,3094-0
1598,51301,White Blood Cells,Blood,Hematology,804-5


In [176]:
%%time

np.random.seed(0)

for labitem in lab_range_dic:
    for ind in lab_events_sampled_pivot[lab_events_sampled_pivot[('mean', labitem)].isnull()].index:
        val_max = lab_range_dic[labitem][1]
        val_min = lab_range_dic[labitem][0]

        val_ave = (val_max + val_min) / 2
        val_std = (val_max - val_ave) * .683

        ran_vals = np.random.normal(val_ave, val_std, 100)
        impute_min = min(ran_vals)
        impute_max = max(ran_vals)
        impute_mean = np.mean(ran_vals)
        
        val_min = lab_events_sampled_pivot[('min', labitem)][ind]
        val_max = lab_events_sampled_pivot[('max', labitem)][ind]
        val_mean = lab_events_sampled_pivot[('mean', labitem)][ind]
        
        lab_events_sampled_pivot[('min', labitem)][ind] = np.where(np.isnan(val_min), impute_min, val_min)
        lab_events_sampled_pivot[('max', labitem)][ind] = np.where(np.isnan(val_max), impute_max, val_max)
        lab_events_sampled_pivot[('mean', labitem)][ind] = np.where(np.isnan(val_mean), impute_mean, val_mean)
#         patient[('mean', labitem)].replace(np.nan, impute_mean)
#         patient[('abn_count', labitem)].replace(np.nan, 0)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


CPU times: user 2.55 s, sys: 8.45 ms, total: 2.56 s
Wall time: 2.56 s


In [179]:
for lab_itemid in lab_range_dic:
    lab_events_sampled_pivot[( 'above_max', lab_itemid)] = lab_events_sampled_pivot[( 'above_max', lab_itemid)].replace(np.nan, 0)
    lab_events_sampled_pivot[( 'below_min', lab_itemid)] = lab_events_sampled_pivot[( 'below_min', lab_itemid)].replace(np.nan, 0)
    lab_events_sampled_pivot[( 'abn_percent', lab_itemid)] = lab_events_sampled_pivot[( 'abn_percent', lab_itemid)].replace(np.nan, 0)
#     lab_events_sampled_pivot[( 'comments', lab_itemid)] = lab_events_sampled_pivot[( 'comments', lab_itemid)].replace(np.nan, np.array(np.nan))


In [180]:
missing_vals = lab_events_sampled_pivot.isnull().sum(axis = 0) 
missing_vals[missing_vals > 0].sort_values()

Series([], dtype: int64)

In [181]:
lab_events_sampled_pivot

Unnamed: 0_level_0,min,min,min,min,min,min,min,min,min,min,max,max,max,max,max,max,max,max,max,max,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,abn_percent,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,below_min,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,above_max,subject_id,hadm_id
itemid,50882,50902,50912,50931,50971,51006,51221,51222,51248,51301,50882,50902,50912,50931,50971,51006,51221,51222,51248,51301,50882,50902,50912,50931,50971,51006,51221,51222,51248,51301,50882,50902,50912,50931,50971,51006,51221,51222,51248,51301,50882,50902,50912,50931,50971,51006,51221,51222,51248,51301,50882,50902,50912,50931,50971,51006,51221,51222,51248,51301,Unnamed: 61_level_1,Unnamed: 62_level_1
0,24.0,105.0,0.9,106.0,3.7,11.0,36.4,12.9,32.2,9.7,25.0,109.0,1.0,143.0,4.2,15.0,39.0,13.4,32.7,10.5,24.666667,106.750000,0.950000,121.333333,3.950000,12.750000,37.720000,13.150000,32.450000,10.100000,0.00000,0.2500,0.0,1.000000,0.000000,0.000000,1.0,1.0,1.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,3.6,1.1,0.0,0.0,0.0,1.0,0.0,43.0,0.0,0.0,0.0,0.0,0.7,0.0,10010424,28388172.0
1,28.0,98.0,1.0,87.0,4.4,12.0,46.1,15.3,28.9,7.5,28.0,104.0,1.1,94.0,4.6,20.0,46.1,15.3,28.9,7.5,28.000000,101.000000,1.033333,90.500000,4.500000,16.000000,46.100000,15.300000,28.900000,7.500000,0.00000,0.0000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10011259,24468341.0
2,23.0,104.0,0.8,103.0,4.5,10.0,38.2,13.8,33.3,6.3,23.0,104.0,0.8,103.0,4.5,10.0,38.2,13.8,33.3,6.3,23.000000,104.000000,0.800000,103.000000,4.500000,10.000000,38.200000,13.800000,33.300000,6.300000,0.00000,0.0000,0.0,1.000000,0.000000,0.000000,1.0,0.0,1.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,1.8,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,1.3,0.0,10011279,29504188.0
3,21.0,103.0,1.2,127.0,3.9,14.0,35.3,12.5,30.7,2.1,26.0,106.0,1.3,141.0,4.3,17.0,36.5,12.6,31.2,3.8,23.500000,104.500000,1.250000,134.000000,4.100000,15.500000,35.900000,12.550000,30.950000,2.950000,0.50000,0.0000,0.5,1.000000,0.000000,0.000000,1.0,1.0,0.000000,1.000000,1.0,0.0,0.0,0.0,0.0,0.0,4.7,1.5,0.0,1.9,0.0,0.0,0.1,36.0,0.0,0.0,0.0,0.0,0.0,0.0,10014179,21090004.0
4,27.0,104.0,1.3,191.0,3.9,15.0,40.1,13.1,29.0,3.9,27.0,104.0,1.3,191.0,3.9,15.0,40.1,13.1,29.0,3.9,27.000000,104.000000,1.300000,191.000000,3.900000,15.000000,40.100000,13.100000,29.000000,3.900000,0.00000,0.0000,1.0,1.000000,0.000000,0.000000,0.0,1.0,0.000000,1.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9,0.0,0.1,0.0,0.0,0.1,91.0,0.0,0.0,0.0,0.0,0.0,0.0,10014179,21448325.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7364,23.0,97.0,0.8,121.0,3.5,10.0,40.1,12.5,25.8,2.6,26.0,104.0,0.9,172.0,3.8,13.0,42.1,13.2,26.0,3.8,24.000000,100.666667,0.866667,138.666667,3.600000,12.000000,41.400000,12.900000,25.866667,3.133333,0.00000,0.0000,0.0,1.000000,0.000000,0.000000,0.0,0.0,0.666667,1.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,1.4,0.0,0.0,0.0,72.0,0.0,0.0,0.0,0.0,0.0,0.0,19986230,22442009.0
7365,27.0,105.0,0.7,123.0,4.0,15.0,36.2,11.0,26.8,8.4,27.0,105.0,0.7,123.0,4.0,15.0,39.7,12.4,27.7,10.2,27.000000,105.000000,0.700000,123.000000,4.000000,15.000000,37.950000,11.700000,27.250000,9.300000,0.00000,0.0000,0.0,1.000000,0.000000,0.000000,0.0,0.5,0.000000,0.500000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,23.0,0.0,0.0,0.0,0.0,0.0,0.2,19986230,25365757.0
7366,26.0,105.0,0.7,107.0,3.5,14.0,32.6,10.6,28.1,8.4,26.0,105.0,0.7,107.0,3.5,14.0,32.6,10.6,28.1,8.4,26.000000,105.000000,0.700000,107.000000,3.500000,14.000000,32.600000,10.600000,28.100000,8.400000,0.00000,0.0000,0.0,1.000000,0.000000,0.000000,1.0,1.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,3.4,1.4,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,19986230,25934775.0
7367,24.0,100.0,0.7,112.0,3.4,11.0,40.5,12.5,27.0,4.4,25.0,105.0,0.9,135.0,3.8,12.0,43.9,13.7,27.1,5.1,24.666667,103.000000,0.800000,122.000000,3.566667,11.666667,41.733333,12.933333,27.033333,4.766667,0.00000,0.0000,0.0,1.000000,0.333333,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,35.0,0.0,0.0,0.0,0.0,0.0,0.0,19986230,28928599.0
