In [1]:
import numpy as np
import pandas as pd
import datetime
import copy
import time
import os
import re
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import operator

from tqdm.auto import tqdm, trange
from tqdm.notebook import tqdm
from datetime import timedelta

tqdm.pandas()

In [2]:
# Edit to point to your MIMIC directory.
dataDirStr = '/Users/gmessier/data/mimic-1.4/'

In [3]:
datetimeevents_df = pd.read_csv(dataDirStr + "DATETIMEEVENTS.csv")
datetimeevents_df.columns = datetimeevents_df.columns.str.lower()
datetimeevents_df

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0,row_id,subject_id,hadm_id,icustay_id,itemid,charttime,storetime,cgid,value,valueuom,warning,error,resultstatus,stopped
0,711,7657,121183.0,297945.0,3411,2172-03-14 11:00:00,2172-03-14 11:52:00,16446,,Date,,,,NotStopd
1,712,7657,121183.0,297945.0,3411,2172-03-14 13:00:00,2172-03-14 12:36:00,16446,,Date,,,,NotStopd
2,713,7657,121183.0,297945.0,3411,2172-03-14 15:00:00,2172-03-14 15:10:00,14957,,Date,,,,NotStopd
3,714,7657,121183.0,297945.0,3411,2172-03-14 17:00:00,2172-03-14 17:01:00,16446,,Date,,,,NotStopd
4,715,7657,121183.0,297945.0,3411,2172-03-14 19:00:00,2172-03-14 19:29:00,14815,,Date,,,,NotStopd
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4485932,4456093,99366,136021.0,218447.0,224279,2197-03-24 16:32:00,2197-03-24 16:32:00,18234,2197-03-24 13:03:00,Date and Time,0.0,0.0,,
4485933,4456094,99366,136021.0,218447.0,224280,2197-03-24 16:32:00,2197-03-24 16:32:00,18234,2197-03-24 00:00:00,Date,0.0,0.0,,
4485934,4456095,99366,136021.0,218447.0,224282,2197-03-24 16:32:00,2197-03-24 16:32:00,18234,2197-03-24 00:00:00,Date,0.0,0.0,,
4485935,4456096,99366,136021.0,218447.0,224284,2197-03-24 16:32:00,2197-03-24 16:32:00,18234,2197-03-24 00:00:00,Date,0.0,0.0,,


`DATETIMEEVENTS` contains all date measurements about a patient in the ICU. This is very similar to `CHARTEVENTS`.

In [4]:
print(f"There are {datetimeevents_df.subject_id.nunique()} patients who have a date measurement in the ICU ")

There are 29185 patients who have a date measurement in the ICU 


`itemid` is categorical data. It refers to the type of measurement taken. Each row is associated with one `itemid`, corresponds to an instantiation of the same measurement. 

Refer to `D_ITEMS` table for exact definitions of each `itemid`.

In [5]:
c = datetimeevents_df.itemid.value_counts()[:5]
p = datetimeevents_df.itemid.value_counts(normalize=True).mul(100).round(2)[:5]
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
5684,260957,5.82
3411,204762,4.56
5685,200814,4.48
6704,190596,4.25
224288,182011,4.06


`charttime` records the time at which an observation was charted, and is usually the closest proxy to the time the data was actually measured. `storetime` records the time at which an observation was manually input or manually validated by a member of the clinical staff

`cgid` is the identifier for the caregiver who validated the given measurement.

In [6]:
c = datetimeevents_df.cgid.value_counts()[:5]
p = datetimeevents_df.cgid.value_counts(normalize=True).mul(100).round(2)[:5]
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
20889,50643,1.13
19611,40289,0.9
21570,32623,0.73
19963,27861,0.62
16284,27730,0.62


`value` and `valuenum` correspond to the value measured for `itemid`. If `value` is numeric, then `value` and `valuenum` are the exact same. If `value` is not numeric, then `valuenum` will be NULL. `valueuom` is the unit of measurement for `value`, if applicable. 

`warning` and `error` are binary valued columns, which specify if a warning or an error value occured for that measurement.

In [7]:
c = datetimeevents_df.warning.value_counts()[:5]
p = datetimeevents_df.warning.value_counts(normalize=True).mul(100).round(2)[:5]
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
0.0,2686689,99.99
1.0,231,0.01


In [8]:
c = datetimeevents_df.error.value_counts()[:5]
p = datetimeevents_df.error.value_counts(normalize=True).mul(100).round(2)[:5]
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
0.0,2686325,99.98
1.0,595,0.02


`resultstatus` and `stopped` specify the type of measurement (RESULTSTATUS is ‘Manual’ or ‘Automatic’) and whether the measurement was stopped.

In [9]:
datetimeevents_df.resultstatus.value_counts()[:5]

Series([], Name: resultstatus, dtype: int64)

In [10]:
c = datetimeevents_df.stopped.value_counts()[:5]
p = datetimeevents_df.stopped.value_counts(normalize=True).mul(100).round(2)[:5]
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
NotStopd,1766394,98.19
D/C'd,32623,1.81
