In [1]:
import pandas as pd
import numpy as np
import os
from tqdm import notebook

## Version as of 2021-1-1

input_dir = '../../../og_input_data/mimic_eicu_rawdata/' # '../ghhur_data_input/' in old system
cohort_dir = '../../../KyungHoon_Dec/cohort/'
output_dir = '../../../output/PrePr1_output_Wes/'


In [2]:
sources = ['mimic','eicu']
items= ['lab','med','inf'] # items = ['dx','lab','med','trt','chart','inf']

mimic_csv_files = {'dx':['DIAGNOSES_ICD'], 'lab':['LABEVENTS'], 'med':['PRESCRIPTIONS'],  # mimic dictionary
                   'trt':['PROCEDURES_ICD','PROCEDUREEVENTS_MV'], 'chart':['CHARTEVENTS'],
                  'inf':['INPUTEVENTS_CV', 'INPUTEVENTS_MV']} 
eicu_csv_files = {'dx':['diagnosis'], 'lab':['lab'], 'med':['medication'], # eicu dictionary
                   'trt':['treatment'], 'inf':['infusionDrug']}

mimic_dictionary_file = {'DIAGNOSES_ICD':'D_ICD_DIAGNOSES', 'LABEVENTS':'D_LABITEMS', 
                         'PROCEDURES_ICD':'D_ICD_PROCEDURES', 'PROCEDURES_MV':'D_ITEMS',
                        'CHARTEVENTS':'D_ITEMS', 'INPUTEVENTS_CV':'D_ITEMS', 'INPUTEVENTS_MV':'D_ITEMS'}
#eicu chartevent 없음

In [3]:
mimic_columns_map = {'DIAGNOSES_ICD':  # used to rename
                         {'HADM_ID':'ID','SEQ_NUM':'order_offset','ICD9_CODE':'code_name'}, 
                      'LABEVENTS':
                         {'HADM_ID':'ID','CHARTTIME':'order_time','ITEMID':'code_name',
                          'VALUE':'value','VALUEUOM':'value_uom','FLAG':'issue'},
                     'PRESCRIPTIONS':
                         {'HADM_ID':'ID','STARTDATE':'start_time', 'ENDDATE':'end_time', 
                          'DRUG':'code_name','DOSE_VAL_RX':'value','DOSE_UNIT_RX':'value_uom',
                         'ROUTE':'route', 'DRUG_TYPE':'drug_type','FORM_VAL_DISP':'val_disp' ,
                          'FORM_UNIT_DISP':'unit_disp'},
                     'PROCEDURES_ICD':
                         {'HADM_ID':'ID','SEQ_NUM':'order_offset','ICD9_CODE':'code_name'},
                     'PROCEDUREEVENTS_MV':
                         {'HADM_ID':'ID','STARTTIME':'start_time', 'ENDTIME':'end_time',
                          'ICD9_CODE':'code_name','ITEMID':'code_name','VALUE':'value','VALUEUOM':'value_uom',
                          'ORDERCATEGORYNAME':'order_category', 'STATUSDESCRIPTION':'issue'},
                      'CHARTEVENTS':
                         {'HADM_ID':'ID','CHARTTIME':'order_time','ITEMID':'code_name',
                          'VALUE':'value','VALUEUOM':'value_uom', 'STOPPED':'stopped', 
                          'RESULTSTATUS':'resultsatus' ,'ERROR':'error','WARNING':'issue'},                             
                      'INPUTEVENTS_CV': 
                         {'HADM_ID':'ID','CHARTTIME':'order_time', 
                          'ITEMID':'code_name', 'AMOUNT':'value', 'AMOUNTUOM':'value_uom', 
                          'RATE':'rate','RATEUOM':'rateuom', 'STOPPED':'issue'},
                      'INPUTEVENTS_MV': 
                         {'HADM_ID':'ID', 'STARTTIME':'start_time', 'ENDTIME':'end_time', 
                          'ITEMID':'code_name', 'AMOUNT':'value', 'AMOUNTUOM':'value_uom', 
                          'PATIENTWEIGHT':'patient_weight',
                          'RATE':'rate','RATEUOM':'rateuom','STOPPED':'issue'}
                    }

In [4]:
eicu_columns_map =  {'diagnosis':
                         {'patientunitstayid':'ID', 'diagnosisoffset':'order_offset','diagnosisstring':'code_name'},
                      'lab':
                         {'patientunitstayid':'ID', 'labresultoffset':'order_offset','labname':'code_name',
                          'labresulttext':'value','labmeasurenamesystem':'value_uom'},
                     'medication':
                         {'patientunitstayid':'ID','drugstartoffset':'start_offset', 'drugstopoffset':'end_offset', 
                          'drugname':'code_name','routeadmin':'route','dosage':'value',
                         'routeadmin':'route', 'ordercancelled':'issue'},
                     'treatment':
                         {'patientunitstayid':'ID','treatmentoffset':'order_offset','treatmentstring':'code_name'},        
                      'infusionDrug':
                         {'patientunitstayid':'ID','infusionoffset':'order_offset', 'drugname':'code_name',
                          'drugamount':'value', 'patientweight':'patient_weight','drugrate':'drugrate','infusionrate':'infusionrate'}
                    }

In [5]:
issue_map = {'LABEVENTS': ['abnormal'],
             'PROCEDUREEVENTS_MV':['Rewritten','Stopped','Paused '], 
             'CHARTEVENTS': [1] ,                             
             'INPUTEVENTS_CV':['Restart','NotStopd'] ,
             'INPUTEVENTS_MV': ['Rewritten', 'Changed', 'Paused', 'Flushed', 'Stopped'],
             'medication': ['Yes'],          
            }

# Cohort check

In [6]:
mimic_cohort = pd.read_pickle(os.path.join(cohort_dir, 'mimic_cohort.pk'))
mimic_cohort.head()

Unnamed: 0,SUBJECT_ID,HADM_ID,ICUSTAY_ID,DBSOURCE,FIRST_CAREUNIT,LAST_CAREUNIT,FIRST_WARDID,LAST_WARDID,INTIME,OUTTIME,...,DOD_SSN,EXPIRE_FLAG,age,readmission,mortality,los>3day,los>7day,ICD9_CODE,12h_obs,24h_obs
0,58526,100001,275225,metavision,MICU,MICU,52,52,2117-09-11 11:47:35,2117-09-15 17:57:14,...,NaT,0,35,0,0,1,0,"[25013, 3371, 5849, 5780, V5867, 25063, 5363, ...",2117-09-11 23:47:35,2117-09-12 11:47:35
1,54610,100003,209281,metavision,MICU,MICU,50,50,2150-04-17 15:35:42,2150-04-19 14:12:52,...,2150-12-28,1,59,0,0,0,0,"[53100, 2851, 07054, 5715, 45621, 53789, 4019,...",2150-04-18 03:35:42,2150-04-18 15:35:42
2,9895,100006,291788,carevue,MICU,MICU,15,15,2108-04-06 15:50:15,2108-04-11 15:18:03,...,NaT,1,48,0,0,1,0,"[49320, 51881, 486, 20300, 2761, 7850, 3090, V...",2108-04-07 03:50:15,2108-04-07 15:50:15
3,68591,100016,217590,metavision,MICU,MICU,52,23,2188-05-24 13:07:20,2188-05-30 17:16:33,...,2188-07-06,1,55,0,0,1,0,"[5070, 51881, 25541, 47874, 7580, 34590, 2512,...",2188-05-25 01:07:20,2188-05-25 13:07:20
4,16229,100017,258320,carevue,MICU,MICU,15,15,2103-03-11 00:54:00,2103-03-11 17:31:00,...,NaT,0,27,0,0,0,0,"[9696, 51881, 78009, 2760, E9503, 29634, 30470...",2103-03-11 12:54:00,2103-03-12 00:54:00


In [7]:
eicu_meds = pd.read_csv(os.path.join(input_dir, 'eicu/medication.csv'), nrows=10000)
eicu_meds.head()

Unnamed: 0,medicationid,patientunitstayid,drugorderoffset,drugstartoffset,drugivadmixture,drugordercancelled,drugname,drughiclseqno,dosage,routeadmin,frequency,loadingdose,prn,drugstopoffset,gtc
0,7426715,141168,309,666,No,No,METOPROLOL TARTRATE 25 MG PO TABS,2102.0,25 3,PO,Q12H SCH,,No,1826,0
1,9643232,141168,1847,1832,No,No,3 ML - IPRATROPIUM-ALBUTEROL 0.5-2.5 (3) MG/...,,3 1,NEBULIZATION,Q4H Resp PRN,,Yes,2047,0
2,10270090,141168,296,1386,No,No,ASPIRIN EC 81 MG PO TBEC,1820.0,81 3,PO,Daily,,No,2390,0
3,9496768,141168,2048,2029,No,No,3 ML - IPRATROPIUM-ALBUTEROL 0.5-2.5 (3) MG/...,,3 1,NEBULIZATION,Q4H Resp PRN,,Yes,2390,0
4,11259680,141168,117,246,No,No,ENOXAPARIN SODIUM 40 MG/0.4ML SC SOLN,,40 3,SC,Daily,,No,1721,0


In [8]:
'''
input args : src, code_type -> file
file -> data load, dictionary_file, issue_map
'''

'\ninput args : src, code_type -> file\nfile -> data load, dictionary_file, issue_map\n'

# Input args

# Preprocessing for files

In [9]:
def pre_processing_1st(src, item, file, columns_map ,mimic_dictionary_file):
        print('preprocessing.. {}_{}_{} start!'.format(src,item,file))
        data = data_init(file, src, item)
        df, cohort = data()
        list_prep = list_preparation(df, cohort, src, item, file)
        pickle_cohort= list_prep()
        #cohort to pickle
        pickle_cohort.to_pickle(os.path.join(output_dir,'{}_{}_{}_init.pkl'.format(src,item,file)))  
        print('preprocessing.. {}_{}_{} finish!'.format(src,item,file))

# Preprocess

In [10]:
class data_init():
    def __init__(self, file:str, src:str, item:str):
        self.file = file
        self.src = src
        self.item = item
        self.input_folder = os.path.join(input_dir,src)
        df_path = os.path.join(self.input_folder,file+'.csv')
        cohort_path = os.path.join(cohort_dir, src+'_cohort.pk')
        
        # Read in cohort pickle and appropriate csv file
        self.cohort = pd.read_pickle(cohort_path).reset_index(drop=True)
        print('cohort load finish!')
        self.df = pd.read_csv(df_path)
        print('csv file load finish!')
        if self.src == 'mimic':
            self.cohort=self.cohort.rename({'HADM_ID':'ID'},axis='columns') # rename ID columns in pickle as needed
        elif self.src == 'eicu':
             self.cohort=self.cohort.rename({'patientunitstayid':'ID'},axis='columns')
        
    def column_rename(self, df):      # for similar col names
        df = df.rename(columns_map[self.file], axis='columns')
        return df                          
        
    def cohort_filtering(self, df):    # take only the observations from the .csv file which are in the cohort
        df_id=df['ID']
        cohort_id=self.cohort['ID']
        df = df[df_id.isin(cohort_id)].reset_index(drop=True)
        
        cohort = self.cohort[cohort_id.isin(df_id)].reset_index(drop=True) # drop cohort obs which !in(csv)
    #    df.replace(' ', '_', regex=True, inplace=True) <-- earlier preprocessing code?
       
        if 'ICUSTAY_ID' in df.columns: # drop all with missing ICUSTAY_ID (mimic)
            df = df.loc[df['ICUSTAY_ID'].isnull()==False].reset_index(drop=True)
        if 'start_time' in df.columns: # if null end_time, input start_time (from e.g. mimic prescriptions)
            indexes = df[df['end_time'].isnull()==True]['start_time'].index
            df['end_time'][indexes] = df[df['end_time'].isnull()==True]['start_time']
        df.fillna('null', inplace=True)
        return df, cohort
    
    
    def issue_delete(self, df): # e.g. drop if 'order_cancelled'==yes in eicu medications
        if 'issue' in df.columns:
            issue_label = issue_map[self.file]
            df.drop(df[df['issue'].isin(issue_label)].index, inplace=True)
        return df
    
    def name_dict(self, df): # for mimic files, create code_name from appropriate dictionary file (key e.g. ICD9)
        if self.file in mimic_dictionary_file:
            dict_name=mimic_dictionary_file[self.file]
            dict_path = os.path.join(self.input_folder, dict_name+'.csv')
            code_dict = pd.read_csv(dict_path)
            if dict_name in ['D_ICD_DIAGNOSES', 'D_ICD_PROCEDURES']:
                key = code_dict['ICD9_CODE']
                value = code_dict['LONG_TITLE']
            else:
                key = code_dict['ITEMID']
                value = code_dict['LABEL']
            code_dict = dict(zip(key,value))
            df['code_name'] = df['code_name'].map(code_dict)
        return df          
                
    def __call__(self):        
        print('column_rename start!')
        df = self.column_rename(self.df)
        print('column_rename finish!')
        print('cohort_filtering start!')
        df, cohort = self.cohort_filtering(df)
        print('cohort_filtering finish!')
        print('issue_delete start!')
        df = self.issue_delete(df)
        print('issue_delete finish!')
        if self.src == 'mimic':
            print('name_dict start!')
            df = self.name_dict(df)
            print('name_dict finish!')
        return df, cohort

In [11]:
'''
Input : argument -> output : df, cohort
processing:
column_rename-> cohort_filtering -> issue_delete -> if mimic, name_dict -> df, cohort
'''

'\nInput : argument -> output : df, cohort\nprocessing:\ncolumn_rename-> cohort_filtering -> issue_delete -> if mimic, name_dict -> df, cohort\n'

Example: 


In [12]:
# for example's sake:
columns_map = mimic_columns_map
data_ex = data_init('INPUTEVENTS_CV', 'mimic', 'inf')
df_ex, cohort_ex = data_ex()

cohort load finish!


  if (await self.run_code(code, result,  async_=asy)):


csv file load finish!
column_rename start!
column_rename finish!
cohort_filtering start!
cohort_filtering finish!
issue_delete start!
issue_delete finish!
name_dict start!
name_dict finish!


In [13]:
df_ex['code_name'].unique()

array(['Po Intake', 'D5W', 'Lactated Ringers', 'IV Piggyback',
       '.9% Normal Saline', 'Carrier', 'TF Residual', 'OR Crystalloid',
       'PACU Crystalloids', 'OR Colloid', 'OR Autologous Blood',
       'D5/.45NS', 'Vivonex', 'Sterile Water', nan, "Packed RBC's",
       'Dextrose 10%', '.45% Normal Saline', 'D5NS', 'Gastric Meds',
       'Pre-Admission Intake', 'PACU Colloids', 'OR FFP',
       'Fresh Frozen Plasma', "OR Packed RBC's", 'Nepro', 'Platelets',
       'PPN', 'TPN', 'Replete w/fiber', 'TPN w/Lipids', 'Cath Lab Intake',
       'Replete', 'D5 Ringers Lact.', 'D5 Normal Saline',
       'Impact w/fiber', 'OR Platelets', 'D5RL', 'Albumin 5%',
       'Whole Blood', 'Deliver 2.0', 'Albumin 25%', 'Lipids', 'Ultracal',
       'Hespan', 'Free Water Bolus', 'Peptamen', 'Tube Feeding',
       'GT Flush', 'Fentanyl Base', 'Cell Saver', 'Cryoprecipitate',
       'Isocal HN', "Washed PRBC's", 'Respalor', 'Promote w/fiber',
       'Criticare HN', '3% Normal Saline', 'Promote',
       '

Back to Code

In [15]:
class list_preparation():
    def __init__(self, df:pd.DataFrame, cohort:pd.DataFrame,  src:str, code:str, file:str):
        self.file = file
        self.df = df.reset_index(drop=True) # our filtered df, now with med names if needed
        self.cohort = cohort
        self.src = src
        self.item = item
        if src =='mimic':
            self.INTIME = pd.to_datetime(self.cohort['INTIME']) # for manipulation later
            self.OUTTIME = pd.to_datetime(self.cohort['OUTTIME']) 
#####################################################################################################   
    def generate_offset(self, item_list:list, df):
        '''
        Input : time -> output : offset
        (order_time - INTIME) and make time as min
        
        time_list : example [order_time, start_time, end_time ]
        offset_list : ex [order_offset, start_offset, end_offset]
        '''
        self.cohort = self.cohort.reset_index(drop=True)
        time_list = ['{}_time'.format(item) for item in item_list] 
        offset_list = ['{}_offset'.format(item) for item in item_list] # names of cols
        offset_dict = {}
        index_dict = {}
        #Empty dict for offset, empty list for index
        for idx, item_time in enumerate(time_list):    # time_list = [start_time, end_time] e.g. for PRESCRIPTIONS
            df[item_time] = pd.to_datetime(df[item_time])
            offset_dict[offset_list[idx]]=[]
            index_list =[]
        # from cohort ID, take INTIME
        for row, ID in enumerate(notebook.tqdm(self.cohort['ID'])): # w/ progress bar
            one_id_rows = df[df['ID']==ID]        
            index_list.extend(list(one_id_rows.index)) # add index of one_id_rows
            for idx, item_time in enumerate(time_list):
                offset_series=one_id_rows[item_time].apply(lambda x: round((x-self.INTIME[row]).total_seconds()/60))               
                offset_dict[offset_list[idx]].extend(list(offset_series))
        
        df= pd.concat([df, pd.DataFrame(offset_dict, index=index_list, columns=offset_list)], axis=1).reset_index(drop=True)
        
        return df        
    
    def time_filtering(self, item, df):
        ''' 
        Input : time -> output : timne
        time filtering by INTIME , OUTTIME
        '''
        series = pd.Series()
        for row, ID in enumerate(notebook.tqdm(self.cohort['ID'])):
            timestamp_in = self.INTIME[row]
            timestamp_out = self.OUTTIME[row]
            times = df.loc[df['ID'] == ID][item]  # from <  for item in ['start_time','end_time'] >
            series = series.append((timestamp_in <= times) & (times <= timestamp_out)) # T/F
        df = df.loc[series].reset_index(drop=True)
        
        return df
############################################################################################################
    
    def charttime_offset(self, df):
        if 'start_time' in df.columns:
            df['order_time'] = pd.to_datetime(df['start_time']) #start_time 을 order_time으로
            df = self.generate_offset(['order', 'start', 'end'], df)
        
        elif 'order_time' in df.columns:    
            df = self.generate_offset(['order'], df)        
        
        return df.reset_index(drop=True)    
       
   
    def time_filter(self, df):   
        if self.src == 'mimic':
            if 'start_time' in df.columns:
                for item in ['start_time','end_time']:
                    df[item] = pd.to_datetime(df[item])
                    df = self.time_filtering(item, df)
            elif 'order_time' in df.columns:
                    df['order_time'] = pd.to_datetime(df['order_time'])
                    df = self.time_filtering('order_time', df)          
    
        elif self.src == 'eicu': 
            if 'order_offset' in df.columns:
                df = df.loc[df['order_offset'] >= 0].reset_index(drop=True)   
            elif 'start_offset' in df.columns:
                #start_offset -> order_offset copy
                df = df.loc[df['start_offset'] >= 0].reset_index(drop=True)
                df = df.loc[df['end_offset'] >= df['start_offset']].reset_index(drop=True)
                df['order_offset'] = df['start_offset']
                  
        
        return df.reset_index(drop=True)    

    def list_make_sort(self, df):
            cohort = self.cohort.reset_index(drop=True)  
            columns = columns_map[self.file]
            columns_names = [value for key, value in columns.items() if value not in ['order_offset','ID','order_time','issue']]
            columns_names.append('order_offset') # take .csv file columns + order_offset
            columns_dict = {}
            for column in columns_names:
                columns_dict[column]=[] # dictionary w/ column names as keys
            for row, ID in enumerate(notebook.tqdm(self.cohort['ID'])):
                one_id_rows = df.loc[df['ID']==ID] # take each ID
                sort_by_offset = one_id_rows.sort_values(by='order_offset', ascending=True) # sort events by time since admission
                for column in columns_names:
                    columns_dict[column].append(list(sort_by_offset[column].values)) # append these as value list to key
                
            for column in columns_names:
                list_column=columns_dict[column] # take the key:value pairs one by one 
                series_list = pd.Series(list_column) # make it a series 
                cohort[column] = series_list # append it to cohort dataframe as a new column w/ same name as in df
            
            return cohort
    
    def __call__(self):        
        print('time_filter start!')
        df = self.time_filter(self.df)
        print('time_filter finished!')
        if self.src == 'mimic':
            print('charttime_offset start!')
            df = self.charttime_offset(df)   
            print('charttime_offset finished!')
        print('list_make_sort start!')  
        cohort = self.list_make_sort(df)
        print('list_make_sort finished!')
        return cohort

In [16]:
'''
Input : df, cohort -> output : cohort
processing:
Time filter -> if mimic, charttime_offset -> list_make_sort -> cohort
'''

'\nInput : df, cohort -> output : cohort\nprocessing:\nTime filter -> if mimic, charttime_offset -> list_make_sort -> cohort\n'

In [17]:
exist_output = os.listdir(output_dir) # what's in our output directory?
print('exist_files!',exist_output)

for src in sources: # that is, < for src in sources = ['mimic', 'eicu'] >
    for item in items:  # that is, < for item in items = ['lab','med','inf'] >
        if src == 'mimic':
            files = mimic_csv_files[item] # the files from mimic that we want
            columns_map= mimic_columns_map # the columns from mimic we care about, to be arg for pre_processing_1st
        elif src == 'eicu':
            if item == 'chart': # ignore "chart"
                continue
            files = eicu_csv_files[item]
            columns_map= eicu_columns_map 
        for file in files: 
            filename = os.path.join(output_dir, '{}_{}_{}_init.pkl'.format(src,item,file))
          #  if filename not in exist_output:

            if filename in exist_output:
                os.remove(filename)
            pre_processing_1st(src, item, file, columns_map, mimic_dictionary_file)


exist_files! []
preprocessing.. mimic_lab_LABEVENTS start!
cohort load finish!
csv file load finish!
column_rename start!
column_rename finish!
cohort_filtering start!
cohort_filtering finish!
issue_delete start!
issue_delete finish!
name_dict start!
name_dict finish!
time_filter start!


  series = pd.Series()


HBox(children=(FloatProgress(value=0.0, max=18625.0), HTML(value='')))


time_filter finished!
charttime_offset start!


HBox(children=(FloatProgress(value=0.0, max=18625.0), HTML(value='')))


charttime_offset finished!
list_make_sort start!


HBox(children=(FloatProgress(value=0.0, max=18625.0), HTML(value='')))


list_make_sort finished!
preprocessing.. mimic_lab_LABEVENTS finish!
preprocessing.. mimic_med_PRESCRIPTIONS start!
cohort load finish!


  exec(code_obj, self.user_global_ns, self.user_ns)


csv file load finish!
column_rename start!
column_rename finish!
cohort_filtering start!


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['end_time'][indexes] = df[df['end_time'].isnull()==True]['start_time']


cohort_filtering finish!
issue_delete start!
issue_delete finish!
name_dict start!
name_dict finish!
time_filter start!


  series = pd.Series()


HBox(children=(FloatProgress(value=0.0, max=17578.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=17578.0), HTML(value='')))


time_filter finished!
charttime_offset start!


HBox(children=(FloatProgress(value=0.0, max=17578.0), HTML(value='')))


charttime_offset finished!
list_make_sort start!


HBox(children=(FloatProgress(value=0.0, max=17578.0), HTML(value='')))


list_make_sort finished!
preprocessing.. mimic_med_PRESCRIPTIONS finish!
preprocessing.. mimic_inf_INPUTEVENTS_CV start!
cohort load finish!


  exec(code_obj, self.user_global_ns, self.user_ns)


csv file load finish!
column_rename start!
column_rename finish!
cohort_filtering start!
cohort_filtering finish!
issue_delete start!
issue_delete finish!
name_dict start!
name_dict finish!
time_filter start!


  series = pd.Series()


HBox(children=(FloatProgress(value=0.0, max=9366.0), HTML(value='')))


time_filter finished!
charttime_offset start!


HBox(children=(FloatProgress(value=0.0, max=9366.0), HTML(value='')))


charttime_offset finished!
list_make_sort start!


HBox(children=(FloatProgress(value=0.0, max=9366.0), HTML(value='')))


list_make_sort finished!
preprocessing.. mimic_inf_INPUTEVENTS_CV finish!
preprocessing.. mimic_inf_INPUTEVENTS_MV start!
cohort load finish!
csv file load finish!
column_rename start!
column_rename finish!
cohort_filtering start!


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['end_time'][indexes] = df[df['end_time'].isnull()==True]['start_time']


cohort_filtering finish!
issue_delete start!
issue_delete finish!
name_dict start!
name_dict finish!
time_filter start!


  series = pd.Series()


HBox(children=(FloatProgress(value=0.0, max=9125.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=9125.0), HTML(value='')))


time_filter finished!
charttime_offset start!


HBox(children=(FloatProgress(value=0.0, max=9125.0), HTML(value='')))


charttime_offset finished!
list_make_sort start!


HBox(children=(FloatProgress(value=0.0, max=9125.0), HTML(value='')))


list_make_sort finished!
preprocessing.. mimic_inf_INPUTEVENTS_MV finish!
preprocessing.. eicu_lab_lab start!
cohort load finish!
csv file load finish!
column_rename start!
column_rename finish!
cohort_filtering start!
cohort_filtering finish!
issue_delete start!
issue_delete finish!
time_filter start!
time_filter finished!
list_make_sort start!


HBox(children=(FloatProgress(value=0.0, max=13820.0), HTML(value='')))


list_make_sort finished!
preprocessing.. eicu_lab_lab finish!
preprocessing.. eicu_med_medication start!
cohort load finish!


  exec(code_obj, self.user_global_ns, self.user_ns)


csv file load finish!
column_rename start!
column_rename finish!
cohort_filtering start!
cohort_filtering finish!
issue_delete start!
issue_delete finish!
time_filter start!
time_filter finished!
list_make_sort start!


HBox(children=(FloatProgress(value=0.0, max=12139.0), HTML(value='')))


list_make_sort finished!
preprocessing.. eicu_med_medication finish!
preprocessing.. eicu_inf_infusionDrug start!
cohort load finish!


  exec(code_obj, self.user_global_ns, self.user_ns)


csv file load finish!
column_rename start!
column_rename finish!
cohort_filtering start!
cohort_filtering finish!
issue_delete start!
issue_delete finish!
time_filter start!
time_filter finished!
list_make_sort start!


HBox(children=(FloatProgress(value=0.0, max=6455.0), HTML(value='')))


list_make_sort finished!
preprocessing.. eicu_inf_infusionDrug finish!
