# MIMIC 4 data - dataset construction chartevents

Code taken from GRU-ODE-Bayes preprocessing; simplified and adapted for MIMIC 4 1.0

In [1]:
import os
import pathlib

p_project = str(pathlib.Path(os.getcwd()).parents[1])

In [2]:
import pandas as pd
from datetime import datetime
from datetime import timedelta
import numpy as np

In [3]:
path_mimic = p_project + '/data/mimic4'

In [4]:
adm = pd.read_csv(path_mimic + '/processed/tables/admissions_processed.csv')

In [5]:
# only choose previously selected admission ids
adm_ids=list(adm['hadm_id'])
charts = pd.DataFrame()
for chunk in pd.read_csv(path_mimic + '/raw/icu/chartevents.csv', chunksize=10000000, low_memory=False):
    chunk=chunk.loc[chunk['hadm_id'].isin(adm_ids)]
    charts = pd.concat([charts, chunk[['subject_id', 'hadm_id', 'charttime', 'itemid', 'valuenum', 'valueuom']]])
    
charts = charts.reset_index(drop=True)

In [6]:
# only choose previously selected admission ids.
print('Number of patients remaining in the database: ')
print(charts['subject_id'].nunique())

Number of patients remaining in the database: 
44057


In [7]:
# get item ids
item_id=pd.read_csv(path_mimic + '/raw/icu/d_items.csv.gz')[['itemid','label']]

# get names of administered items
charts2=pd.merge(charts, item_id, on='itemid')
charts2.head()

Unnamed: 0,subject_id,hadm_id,charttime,itemid,valuenum,valueuom,label
0,10004235,24181354,2196-02-24 14:53:00,220224,125.0,mmHg,Arterial O2 pressure
1,10004235,24181354,2196-02-24 16:25:00,220224,108.0,mmHg,Arterial O2 pressure
2,10004235,24181354,2196-02-24 17:46:00,220224,98.0,mmHg,Arterial O2 pressure
3,10004235,24181354,2196-02-24 19:10:00,220224,191.0,mmHg,Arterial O2 pressure
4,10004235,24181354,2196-02-24 20:54:00,220224,165.0,mmHg,Arterial O2 pressure


In [8]:

pat_for_item=charts2.groupby('label')['subject_id'].nunique().sort_values(ascending=False)

label_counts=charts2['label'].value_counts()

In [9]:
# get only top 50 most used tests
frequent_labels1=pat_for_item[:100]
frequent_labels2=label_counts.head(200)

fre_labels = frequent_labels1.loc[frequent_labels1.index.isin(frequent_labels2.index)]

In [10]:
fre_labels.index

Index(['Heart Rate', 'O2 saturation pulseoxymetry', 'GCS - Eye Opening',
       'GCS - Verbal Response', 'GCS - Motor Response', 'Respiratory Rate',
       'Alarms On', 'Heart Rate Alarm - Low', 'Heart rate Alarm - High',
       'Head of Bed', 'O2 Saturation Pulseoxymetry Alarm - Low',
       'Skin Integrity', 'LUL Lung Sounds', 'RUL Lung Sounds',
       'Skin Temperature', 'RLL Lung Sounds', 'LLL Lung Sounds',
       'O2 Saturation Pulseoxymetry Alarm - High', 'Resp Alarm - High',
       'Skin Condition', 'Braden Sensory Perception', 'Braden Moisture',
       'Braden Mobility', 'Braden Activity', 'Resp Alarm - Low',
       'Braden Nutrition', 'Turn', 'Abdominal Assessment',
       'Braden Friction/Shear', 'Heart Rhythm', 'Parameters Checked',
       'Bowel Sounds', 'Skin Color', 'Oral Cavity', 'Activity Tolerance',
       'Temperature Site', 'SpO2 Desat Limit', 'Diet Type', 'Urine Source',
       'Pain Assessment Method', 'IV/Saline lock', 'Ambulatory aid',
       'Gait/Transferring',

In [11]:
mask_df = charts2['label'].isin(fre_labels.index)

In [12]:
mask_df.sum()

128331422

In [13]:
charts3=charts2.loc[mask_df]

In [14]:
charts3.shape

(128331422, 7)

In [15]:
charts3.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  charts3.dropna(inplace=True)


In [16]:
charts3.shape

(36835389, 7)

In [17]:
charts3.to_csv(path_mimic + '/processed/tables/charts_processed.csv')

In [18]:
charts3['label'].unique()

array(['Heart Rate', 'Respiratory Rate', 'O2 saturation pulseoxymetry',
       'Heart rate Alarm - High', 'Heart Rate Alarm - Low',
       'O2 Saturation Pulseoxymetry Alarm - High',
       'O2 Saturation Pulseoxymetry Alarm - Low', 'Resp Alarm - High',
       'Resp Alarm - Low', 'SpO2 Desat Limit', 'Potassium (serum)',
       'Non Invasive Blood Pressure systolic',
       'Non Invasive Blood Pressure diastolic',
       'Non Invasive Blood Pressure mean', 'Temperature Fahrenheit',
       'Non-Invasive Blood Pressure Alarm - High',
       'Non-Invasive Blood Pressure Alarm - Low'], dtype=object)

In [19]:
itemid_valueuom = charts3.groupby(["itemid"])["valueuom"].unique()

In [23]:
itemid_valueuom

itemid
220045         [bpm]
220046         [bpm]
220047         [bpm]
220179        [mmHg]
220180        [mmHg]
220181        [mmHg]
220210    [insp/min]
220277           [%]
223751        [mmHg]
223752        [mmHg]
223761          [°F]
223769           [%]
223770           [%]
224161    [insp/min]
224162    [insp/min]
226253           [%]
227442       [mEq/L]
Name: valueuom, dtype: object