In [1]:
import numpy as np
import pandas as pd
import json
import collections
from collections import defaultdict 
from functools import partial
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm


### This notebook transforms information in tables into points with "day" as smallest time unit


In [2]:
# Good read: https://iq-inc.com/importerror-attempted-relative-import/

import sys
import importlib
from mimicnet import concept

importlib.reload(sys.modules['mimicnet.concept'])

<module 'mimicnet.concept' from '/home/asem/GP/MIMIC-SNONET/mimicnet/concept.py'>

In [3]:
# multi_visit_mimic_dir = '/home/am8520/GP/ehr-data/mimic3-multi-visit'
multi_visit_mimic_dir = '/home/asem/GP/ehr-data/mimic3-multi-visit'

PATIENTS = pd.read_csv(f'{multi_visit_mimic_dir}/PATIENTS.csv.gz')
ADMISSIONS = pd.read_csv(f'{multi_visit_mimic_dir}/ADMISSIONS.csv.gz')
DIAGNOSES_ICD = pd.read_csv(f'{multi_visit_mimic_dir}/DIAGNOSES_ICD.csv.gz', dtype = {'ICD9_CODE': str})
PROCEDURES_ICD = pd.read_csv(f'{multi_visit_mimic_dir}/PROCEDURES_ICD.csv.gz', dtype = {'ICD9_CODE': str})
LABEVENTS = pd.read_csv(f'{multi_visit_mimic_dir}/LABEVENTS.csv.gz')

In [4]:
LABEVENTS = LABEVENTS[['SUBJECT_ID', 'ITEMID', 'CHARTTIME', 'VALUE', 'VALUENUM', 'VALUEUOM']]

In [5]:
N_PATIENTS = PATIENTS.shape[0]
N_PATIENTS 

7537

In [6]:
chunksize = 10 ** 7
CHARTEVENTS_dfs = []
with pd.read_csv(f'{multi_visit_mimic_dir}/CHARTEVENTS.csv.gz', chunksize=chunksize) as reader:
    for chunk in tqdm(reader):
        CHARTEVENTS_dfs.append(chunk[['SUBJECT_ID', 'ITEMID', 'CHARTTIME', 'VALUE', 'VALUENUM', 'VALUEUOM']])

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
12it [01:38,  8.23s/it]


### Load dictionary stuff

In [7]:
mimic_dir = '/home/asem/GP/MIMIC-SNONET/RAW/mimic-iii-clinical-database-1.4'
# mimic_dir = '/home/asem/GP/ehr-data/mimic3-v1.4/physionet.org/files/mimiciii/1.4'


D_LABITEMS = pd.read_csv(f'{mimic_dir}/D_LABITEMS.csv.gz')
D_ITEMS = pd.read_csv(f'{mimic_dir}/D_ITEMS.csv.gz')

itemid_label = dict(zip(D_ITEMS.ITEMID, D_ITEMS.LABEL))
itemid_category = dict(zip(D_ITEMS.ITEMID, D_ITEMS.LABEL))


labitem_label = dict(zip(D_LABITEMS.ITEMID, D_LABITEMS.LABEL))
labitem_category = dict(zip(D_LABITEMS.ITEMID, D_LABITEMS.CATEGORY))

In [8]:
D_ITEMS.head()

Unnamed: 0,ROW_ID,ITEMID,LABEL,ABBREVIATION,DBSOURCE,LINKSTO,CATEGORY,UNITNAME,PARAM_TYPE,CONCEPTID
0,457,497,Patient controlled analgesia (PCA) [Inject],,carevue,chartevents,,,,
1,458,498,PCA Lockout (Min),,carevue,chartevents,,,,
2,459,499,PCA Medication,,carevue,chartevents,,,,
3,460,500,PCA Total Dose,,carevue,chartevents,,,,
4,461,501,PCV Exh Vt (Obser),,carevue,chartevents,,,,


In [9]:
D_LABITEMS.head()

Unnamed: 0,ROW_ID,ITEMID,LABEL,FLUID,CATEGORY,LOINC_CODE
0,546,51346,Blasts,Cerebrospinal Fluid (CSF),Hematology,26447-3
1,547,51347,Eosinophils,Cerebrospinal Fluid (CSF),Hematology,26451-5
2,548,51348,"Hematocrit, CSF",Cerebrospinal Fluid (CSF),Hematology,30398-2
3,549,51349,Hypersegmented Neutrophils,Cerebrospinal Fluid (CSF),Hematology,26506-6
4,550,51350,Immunophenotyping,Cerebrospinal Fluid (CSF),Hematology,


In [10]:
CHARTEVENTS_dfs[0].head()

Unnamed: 0,SUBJECT_ID,ITEMID,CHARTTIME,VALUE,VALUENUM,VALUEUOM
0,36,223834,2134-05-12 12:00:00,15.0,15.0,L/min
1,36,223835,2134-05-12 12:00:00,100.0,100.0,
2,36,224328,2134-05-12 12:00:00,0.37,0.37,
3,36,224329,2134-05-12 12:00:00,6.0,6.0,min
4,36,224330,2134-05-12 12:00:00,2.5,2.5,


In [11]:
LABEVENTS.head()

Unnamed: 0,SUBJECT_ID,ITEMID,CHARTTIME,VALUE,VALUENUM,VALUEUOM
0,17,50960,2134-12-29 03:18:00,2.1,2.1,mg/dL
1,17,50970,2134-12-29 03:18:00,2.0,2.0,mg/dL
2,17,50971,2134-12-29 03:18:00,4.7,4.7,mEq/L
3,17,50983,2134-12-29 03:18:00,136.0,136.0,mEq/L
4,17,51006,2134-12-29 03:18:00,11.0,11.0,mg/dL


In [12]:
CHARTEVENTS_dfs[0].ITEMID.value_counts()


220045    782629
220210    774740
220277    754336
220181    389933
220179    389017
           ...  
220125         1
228181         1
227647         1
227039         1
228146         1
Name: ITEMID, Length: 470, dtype: int64

## (A) Select CHARTEVENTS with ITEMID covering at least 5% of all patients in the dataset

### (A-1) Drop non-numerical measurements

In [13]:
non_numeric_chartevents_dfs = []
non_numeric_chartevents_vals = defaultdict(set)
non_numeric_chartevents_units = defaultdict(set)

for i, chunk_df in enumerate(tqdm(CHARTEVENTS_dfs)):
    numeric_chunk_df = chunk_df[chunk_df.VALUENUM.notnull()].reset_index(drop=True)
    CHARTEVENTS_dfs[i] = numeric_chunk_df
    non_numeric_chunk_df = chunk_df[chunk_df.VALUENUM.isnull() & chunk_df.VALUE.notnull()].reset_index(drop=True)
    non_numeric_chartevents_dfs.append(non_numeric_chunk_df)
    
    for itemid, df in non_numeric_chunk_df.groupby(['ITEMID']):
        non_numeric_chartevents_vals[itemid].update(set(df.VALUE))
        non_numeric_chartevents_units[itemid].update(set(df.VALUEUOM))

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:02<00:00,  4.31it/s]


In [None]:
non_numeric_chartevents_df = pd.DataFrame({'ITEMID': non_numeric_chartevents_vals.keys(),
                                       'LABEL': map(itemid_label.get, non_numeric_chartevents_vals.keys()),
                                       'CATEGORY': map(itemid_category.get, non_numeric_chartevents_vals.keys()),
                                       'VALS': map(lambda vals: "|".join(vals), non_numeric_chartevents_vals.values())})
non_numeric_chartevents_df.to_csv('non_numeric_chartevents_df.csv')

 67%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                                               | 8/12 [00:13<00:07,  1.82s/it]

In [None]:
non_numeric_chartevents_units

### (A-2) Filter below 5% patients coverage

In [14]:
# For each chartevent item_id, store a set of patient that have at least one measurement of that type.
chartevents_item_patients = defaultdict(set)
for df in CHARTEVENTS_dfs:
    item_subject_df = df.drop_duplicates(subset=['ITEMID', 'SUBJECT_ID'], ignore_index=True)
    for item_id, subjects_df in item_subject_df.groupby('ITEMID'):
        chartevents_item_patients[item_id].update(subjects_df.SUBJECT_ID.tolist())
        
        

#### CONCLUSION: No duplicate info between LABEVENTS and CHARTEVENTS

In [15]:
print(len(chartevents_item_patients))
print(len(set(LABEVENTS.ITEMID)))
print(len(set(chartevents_item_patients.keys()) & set(LABEVENTS.ITEMID)))

2057
697
0


In [16]:
chartevents_item_patients_count_df = pd.DataFrame({'ITEMID': chartevents_item_patients.keys(),
                                                 'N_PATIENTS': map(len, chartevents_item_patients.values())})
chartevents_item_patients_count_df['P_PATIENTS'] = chartevents_item_patients_count_df['N_PATIENTS'] / N_PATIENTS
chartevents_item_patients_count_df = chartevents_item_patients_count_df.sort_values(by='N_PATIENTS', ascending=False)
chartevents_item_patients_count_df

Unnamed: 0,ITEMID,N_PATIENTS,P_PATIENTS
526,211,5032,0.667640
693,813,4879,0.647340
692,811,4876,0.646942
708,829,4876,0.646942
673,791,4875,0.646809
...,...,...,...
1624,5703,1,0.000133
1625,5739,1,0.000133
1626,5892,1,0.000133
1627,6024,1,0.000133


In [17]:
np.mean(chartevents_item_patients_count_df.P_PATIENTS > 0.05)

0.2396694214876033

In [18]:
selected_chartevents_itemid_set = set(chartevents_item_patients_count_df[chartevents_item_patients_count_df.P_PATIENTS > 0.05].ITEMID)

In [19]:
len(selected_chartevents_itemid_set)

493

In [20]:
for i, df in enumerate(CHARTEVENTS_dfs):
    CHARTEVENTS_dfs[i] = df[df.ITEMID.isin(selected_chartevents_itemid_set)].reset_index(drop=True)

In [21]:
for i, df_chunk in enumerate(tqdm(CHARTEVENTS_dfs)):
    # Set writing mode to append after first chunk
    mode = 'w' if i == 0 else 'a'
    
    # Add header if it is the first chunk
    header = i == 0

    df_chunk.to_csv(
        f'{multi_visit_mimic_dir}/CHARTEVENTS_Q5.csv.gz', 
        compression='gzip', 
        index=False,
        header=header, 
        mode=mode)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [03:44<00:00, 18.71s/it]


## (B) Select LABEVENTS with ITEMID covering at least 5% of all patients in the dataset

### (B-1) Drop non-numerical measurements

In [23]:
non_numeric_labevents_vals = defaultdict(set)
non_numeric_labevents_units = defaultdict(set)

numeric_labevents_df = LABEVENTS[LABEVENTS.VALUENUM.notnull()].reset_index(drop=True)

In [22]:
non_numeric_labevents_df = chunk_df[chunk_df.VALUENUM.isnull() & chunk_df.VALUE.notnull()].reset_index(drop=True)

for itemid, df in non_numeric_labevents_df.groupby(['ITEMID']):
    non_numeric_labevents_vals[itemid] = set(df.VALUE)
    non_numeric_labevents_units[itemid] = set(df.VALUEUOM)

KeyboardInterrupt: 

In [None]:
non_numeric_labevents_df = pd.DataFrame({'ITEMID': non_numeric_labevents_vals.keys(),
                                       'LABEL': map(itemid_label.get, non_numeric_labevents_vals.keys()),
                                       'CATEGORY': map(itemid_category.get, non_numeric_labevents_vals.keys()),
                                       'VALS': map(lambda vals: "|".join(vals), non_numeric_labevents_vals.values())})
                                
non_numeric_labevents_df.to_csv('non_numeric_labevents_df.csv')

In [24]:
# For each labevents item_id, store a set of patient that have at least one measurement of that type.

labevents_item_patients = defaultdict(set)

labitem_subject_df = numeric_labevents_df.drop_duplicates(subset=['ITEMID', 'SUBJECT_ID'], ignore_index=True)
for item_id, subjects_df in labitem_subject_df.groupby('ITEMID'):
    labevents_item_patients[item_id].update(subjects_df.SUBJECT_ID.tolist())
    
labitem_patients_count_df = pd.DataFrame({'ITEMID': labevents_item_patients.keys(),
                                                 'N_PATIENTS': map(len, labevents_item_patients.values())})
labitem_patients_count_df['P_PATIENTS'] = labitem_patients_count_df['N_PATIENTS'] / N_PATIENTS

labitem_patients_count_df = labitem_patients_count_df.sort_values(by='N_PATIENTS', ascending=False)
labitem_patients_count_df

Unnamed: 0,ITEMID,N_PATIENTS,P_PATIENTS
303,51221,7517,0.997346
355,51301,7505,0.995754
331,51265,7504,0.995622
342,51279,7503,0.995489
321,51250,7503,0.995489
...,...,...,...
431,51456,1,0.000133
230,51096,1,0.000133
398,51409,1,0.000133
434,51459,1,0.000133


In [25]:
np.mean(labitem_patients_count_df.P_PATIENTS > 0.05)

0.3741935483870968

In [26]:
selected_labevents_itemid_set = set(labitem_patients_count_df[labitem_patients_count_df.P_PATIENTS > 0.05].ITEMID)

In [27]:
LABEVENTS_Q5 = numeric_labevents_df[numeric_labevents_df.ITEMID.isin(selected_labevents_itemid_set)].reset_index(drop=True)
LABEVENTS_Q5.to_csv(f'{multi_visit_mimic_dir}/LABEVENTS_Q5.csv.gz', 
                    compression='gzip', 
                    index=False,)

In [28]:
len(selected_labevents_itemid_set)

174

## (C) Investigate the units used for each test type in CHARTEVENTS

### Load Filtered CHARTEVENTS (CHARTEVENTS_Q5)

In [29]:
CHARTEVENTS_Q5 = pd.read_csv(f'{multi_visit_mimic_dir}/CHARTEVENTS_Q5.csv.gz')

  exec(code_obj, self.user_global_ns, self.user_ns)


### Investigate numerical/categorical measurements in CHARTEVENTS_Q5


In [30]:


# Group each ITEMID with the set of used units (to detect unit incosistency for each unique test).
chartevents_item_units = defaultdict(dict)

for item_id, item_df in tqdm(CHARTEVENTS_Q5.groupby('ITEMID')):
    item_df.loc[item_df.VALUEUOM.isnull(), 'VALUEUOM'] = ''
    for unit, unit_df in item_df.groupby('VALUEUOM'):
        vals_np = pd.to_numeric(unit_df.VALUENUM, errors='coerce')
        chartevents_item_units[item_id][unit] = (np.size(vals_np), unit_df.VALUENUM.mean(skipna=True), unit_df.VALUENUM.std(skipna=True))

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 493/493 [00:13<00:00, 37.13it/s]


In [31]:
chartevents_item_units_count_df = pd.DataFrame({'ITEMID': chartevents_item_units.keys(),
                                                'LABEL': map(itemid_label.get, chartevents_item_units.keys()),
                                                'CATEGORY': map(itemid_category.get, chartevents_item_units.keys()),
                                                'N_UNITS': map(len, chartevents_item_units.values())})
chartevents_item_units_count_df = chartevents_item_units_count_df.sort_values(by='N_UNITS', ascending=False)
chartevents_item_units_count_df

Unnamed: 0,ITEMID,LABEL,CATEGORY,N_UNITS
246,220602,Chloride (serum),Chloride (serum),2
373,225638,Differential-Bands,Differential-Bands,2
375,225640,Differential-Eos,Differential-Eos,2
376,225641,Differential-Lymphs,Differential-Lymphs,2
377,225642,Differential-Monos,Differential-Monos,2
...,...,...,...,...
159,1390,DO2,DO2,1
158,1286,PT,PT,1
157,1162,BUN,BUN,1
156,1127,"WBC (4-11,000)","WBC (4-11,000)",1


In [32]:
chartevents_item_tuples = []
for itemid in chartevents_item_units_count_df.ITEMID:
    for unit in chartevents_item_units[itemid]:
        chartevents_item_tuples.append((itemid, unit))
chartevents_units_df = pd.DataFrame(chartevents_item_tuples, columns=['ITEMID', 'VALUEUOM'])
chartevents_units_df['LABEL'] = chartevents_units_df.ITEMID.map(itemid_label)
chartevents_units_df['CATEGORY'] = chartevents_units_df.ITEMID.map(itemid_category)
chartevents_units_df.to_csv('chartevents_units_df.csv')
chartevents_units_df


chartevents_item_tuples = []
for itemid in chartevents_item_units_count_df.ITEMID:
    for unit, (n, mean, std) in chartevents_item_units[itemid].items():
        chartevents_item_tuples.append((itemid, unit, n, mean, std))
chartevents_units_df = pd.DataFrame(chartevents_item_tuples, columns=['ITEMID', 'VALUEUOM', 'N', 'MEAN', 'STD'])
chartevents_units_df['LABEL'] = chartevents_units_df.ITEMID.map(itemid_label)
chartevents_units_df['CATEGORY'] = chartevents_units_df.ITEMID.map(itemid_category)

chartevents_units_df.to_csv('chartevents_units_df.csv')
chartevents_units_df


Unnamed: 0,ITEMID,VALUEUOM,N,MEAN,STD,LABEL,CATEGORY
0,220602,,51,987.725490,6383.147152,Chloride (serum),Chloride (serum)
1,220602,mEq/L,65220,104.013707,6.876020,Chloride (serum),Chloride (serum)
2,225638,,1,0.000000,,Differential-Bands,Differential-Bands
3,225638,%,3766,4.013250,7.183821,Differential-Bands,Differential-Bands
4,225640,,1,0.000000,,Differential-Eos,Differential-Eos
...,...,...,...,...,...,...,...
550,1390,ml/min,5374,799.461407,310.476716,DO2,DO2
551,1286,,43817,16.422468,6.516709,PT,PT
552,1162,,63798,35.594501,25.899441,BUN,BUN
553,1127,,67407,12.397424,49.172902,"WBC (4-11,000)","WBC (4-11,000)"


### CONCLUSION: Units are consistent for each measurement type in CHARTEVENTS

## (D) Investigate the units used for each test type in LABEVENTS

In [33]:
LABEVENTS_Q5 = pd.read_csv(f'{multi_visit_mimic_dir}/LABEVENTS_Q5.csv.gz')

In [34]:
# Group each ITEMID with the set of used units (to detect unit incosistency for each unique test).
labevents_item_units = defaultdict(dict)
for item_id, units_df in tqdm(LABEVENTS_Q5.groupby('ITEMID')):
    units_df.loc[units_df.VALUEUOM.isnull(), 'VALUEUOM'] = ''
    for unit, vals_df in units_df.groupby('VALUEUOM'):
        vals_np = pd.to_numeric(vals_df.VALUENUM, errors='coerce')
        labevents_item_units[item_id][unit] = (np.size(vals_np), vals_df.VALUENUM.mean(skipna=True), vals_df.VALUENUM.std(skipna=True))

        

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 174/174 [00:02<00:00, 70.41it/s]


In [35]:
labevents_item_units_count_df = pd.DataFrame({'ITEMID': labevents_item_units.keys(),
                                              'LABEL': map(labitem_label.get, labevents_item_units.keys()),
                                              'CATEGORY': map(labitem_category.get, labevents_item_units.keys()),
                                              'N_UNITS': map(len, labevents_item_units.values())})
labevents_item_units_count_df = labevents_item_units_count_df.sort_values(by='N_UNITS', ascending=False)
labevents_item_units_count_df

Unnamed: 0,ITEMID,LABEL,CATEGORY,N_UNITS
39,50889,C-Reactive Protein,Chemistry,4
71,50964,"Osmolality, Measured",Chemistry,3
0,50801,Alveolar-arterial Gradient,Blood Gas,2
86,51003,Troponin T,Chemistry,2
114,51128,"WBC, Ascites",Hematology,2
...,...,...,...,...
64,50951,Immunoglobulin M,Chemistry,1
65,50952,Iron,Chemistry,1
66,50953,"Iron Binding Capacity, Total",Chemistry,1
67,50954,Lactate Dehydrogenase (LD),Chemistry,1


In [36]:

labitem_nunits = dict(zip(labevents_item_units_count_df.ITEMID, labevents_item_units_count_df.N_UNITS))

In [37]:
labevents_item_tuples = []
for itemid in labevents_item_units_count_df.ITEMID:
    for unit, (n, mean, std) in labevents_item_units[itemid].items():
        labevents_item_tuples.append((itemid, unit, n, mean, std))
labevents_units_df = pd.DataFrame(labevents_item_tuples, columns=['ITEMID', 'VALUEUOM', 'N', 'MEAN', 'STD'])
labevents_units_df['LABEL'] = labevents_units_df.ITEMID.map(labitem_label)
labevents_units_df['CATEGORY'] = labevents_units_df.ITEMID.map(labitem_category)
labevents_units_df['N_UNITS'] = labevents_units_df.ITEMID.map(labitem_nunits)

labevents_units_df.to_csv('labevents_units_df.csv')
labevents_units_df

Unnamed: 0,ITEMID,VALUEUOM,N,MEAN,STD,LABEL,CATEGORY,N_UNITS
0,50889,,1,24.200000,,C-Reactive Protein,Chemistry,4
1,50889,MG/DL,9,4.322222,4.443191,C-Reactive Protein,Chemistry,4
2,50889,mg/L,2463,62.656902,68.999405,C-Reactive Protein,Chemistry,4
3,50889,mg/dL,460,7.213630,7.363761,C-Reactive Protein,Chemistry,4
4,50964,MOSM/KG,117,296.188034,22.361810,"Osmolality, Measured",Chemistry,3
...,...,...,...,...,...,...,...,...
197,50951,mg/dL,1218,122.012315,280.399402,Immunoglobulin M,Chemistry,1
198,50952,ug/dL,8571,51.690235,43.223533,Iron,Chemistry,1
199,50953,ug/dL,8168,237.032444,93.208851,"Iron Binding Capacity, Total",Chemistry,1
200,50954,IU/L,46458,404.447587,894.333094,Lactate Dehydrogenase (LD),Chemistry,1


### (D-1) Convert only units for (ITEMID=50889, C-Reactive Protein)

- Convert from mg/dL to mg/L

In [38]:
to_convert_units = ['MG/DL', 'mg/dL']
cond = (LABEVENTS_Q5.ITEMID == 50889) & (LABEVENTS_Q5.VALUEUOM.isin(to_convert_units))
LABEVENTS_Q5[cond]

Unnamed: 0,SUBJECT_ID,ITEMID,CHARTTIME,VALUE,VALUENUM,VALUEUOM
9200,61,50889,2118-11-25 00:30:00,6.06,6.06,mg/dL
18676,109,50889,2138-04-17 05:01:00,2.59,2.59,mg/dL
29259,135,50889,2174-03-09 13:05:00,0.09,0.09,mg/dL
40919,156,50889,2120-06-04 03:25:00,7.50,7.50,mg/dL
67122,256,50889,2166-06-23 06:10:00,8.93,8.93,mg/dL
...,...,...,...,...,...,...
6525512,25935,50889,2114-11-07 05:05:00,1.79,1.79,mg/dL
6529569,26013,50889,2109-10-30 20:54:00,0.18,0.18,mg/dL
6587324,26224,50889,2115-06-05 00:30:00,2.65,2.65,mg/dL
6639774,26395,50889,2186-08-21 13:50:00,9.27,9.27,mg/dL


In [39]:
LABEVENTS_Q5.loc[cond, 'VALUE'] = LABEVENTS_Q5.loc[cond, 'VALUE'] * 10
LABEVENTS_Q5.loc[cond, 'VALUEUOM'] = 'mg/L'
LABEVENTS_Q5.loc[cond, 'VALUENUM'] = LABEVENTS_Q5.loc[cond, 'VALUENUM'] * 10


In [40]:
LABEVENTS_Q5[cond]

Unnamed: 0,SUBJECT_ID,ITEMID,CHARTTIME,VALUE,VALUENUM,VALUEUOM
9200,61,50889,2118-11-25 00:30:00,60.6,60.6,mg/L
18676,109,50889,2138-04-17 05:01:00,25.9,25.9,mg/L
29259,135,50889,2174-03-09 13:05:00,0.9,0.9,mg/L
40919,156,50889,2120-06-04 03:25:00,75.0,75.0,mg/L
67122,256,50889,2166-06-23 06:10:00,89.3,89.3,mg/L
...,...,...,...,...,...,...
6525512,25935,50889,2114-11-07 05:05:00,17.9,17.9,mg/L
6529569,26013,50889,2109-10-30 20:54:00,1.8,1.8,mg/L
6587324,26224,50889,2115-06-05 00:30:00,26.5,26.5,mg/L
6639774,26395,50889,2186-08-21 13:50:00,92.7,92.7,mg/L


In [42]:
LABEVENTS_Q5.to_csv(f'{multi_visit_mimic_dir}/LABEVENTS_Q5_UNITS_FIXED.csv.gz')