In [1]:
import pandas as pd
import psycopg2


sqluser = 'postgres'
dbname = 'mimic'
schema_name = 'mimiciii'

# Password keyword only necessary if user-specified password required to connect to database
con = psycopg2.connect(dbname=dbname, user=sqluser, password='postgre')
cur = con.cursor()

query = "SELECT" \
        " a.subject_id, a.hadm_id, a.ethnicity, a.insurance, p.gender" \
        ", ROUND( (cast(a.admittime as date) - cast(p.dob as date)) / 365.242)" \
        " AS age_on_admiss" \
        ", cast(a.dischtime as date) - cast(a.admittime as date)" \
        " AS length_of_stay" \
        " FROM admissions a" \
        " INNER JOIN patients p" \
        " ON p.subject_id = a.subject_id" \
        " GROUP BY" \
        " a.subject_id, a.hadm_id, a.ethnicity, a.insurance, p.gender, a.admittime, p.dob, a.dischtime" \
        " ORDER BY a.subject_id;"

cur.execute('SET search_path to ' + schema_name)
df = pd.read_sql_query(query, con)

print(df.head(5))

   subject_id  hadm_id ethnicity insurance gender  age_on_admiss  \
0           2   163353     ASIAN   Private      M            0.0   
1           3   145834     WHITE  Medicare      M           77.0   
2           4   185777     WHITE   Private      F           48.0   
3           5   178980     ASIAN   Private      M            0.0   
4           6   107064     WHITE  Medicare      F           66.0   

   length_of_stay  
0               4  
1              11  
2               7  
3               2  
4              16  


In [4]:
%%time

# Code adapted from https://github.com/MIT-LCP/mimic-code/blob/master/concepts/pivot/pivoted-vital.sql
query_vitals = """
with ce as
(
  select ce.icustay_id
    , ce.charttime
    , (case when itemid in (211,220045) and valuenum > 0 and valuenum < 300 then valuenum else null end) as HeartRate
    , (case when itemid in (51,442,455,6701,220179,220050) and valuenum > 0 and valuenum < 400 then valuenum else null end) as SysBP
    , (case when itemid in (615,618,220210,224690) and valuenum > 0 and valuenum < 70 then valuenum else null end) as RespRate
    , (case when itemid in (223761,678) and valuenum > 70 and valuenum < 120 then (valuenum-32)/1.8 -- converted to degC in valuenum call
               when itemid in (223762,676) and valuenum > 10 and valuenum < 50  then valuenum else null end) as TempC
    , (case when itemid in (646,220277) and valuenum > 0 and valuenum <= 100 then valuenum else null end) as SpO2
  from chartevents ce
  -- exclude rows marked as error
  where ce.error IS DISTINCT FROM 1
  and ce.itemid in
  (
  -- HEART RATE
  211, --"Heart Rate"
  220045, --"Heart Rate"

  -- Systolic

  51, --	Arterial BP [Systolic]
  442, --	Manual BP [Systolic]
  455, --	NBP [Systolic]
  6701, --	Arterial BP #2 [Systolic]
  220179, --	Non Invasive Blood Pressure systolic
  220050, --	Arterial Blood Pressure systolic

  -- RESPIRATORY RATE
  618,--	Respiratory Rate
  615,--	Resp Rate (Total)
  220210,--	Respiratory Rate
  224690, --	Respiratory Rate (Total)


  -- SPO2, peripheral
  646, 220277,


  -- TEMPERATURE
  223762, -- "Temperature Celsius"
  676,	-- "Temperature C"
  223761, -- "Temperature Fahrenheit"
  678 --	"Temperature F"

  )
)
select
    ce.icustay_id
  , ce.charttime
  , avg(HeartRate) as HeartRate
  , avg(SysBP) as SysBP
  , avg(RespRate) as RespRate
  , avg(TempC) as TempC
  , avg(SpO2) as SpO2
from ce
group by ce.icustay_id, ce.charttime
order by ce.icustay_id, ce.charttime;"""

df_vitals = pd.read_sql_query(query_vitals, con)

print(df_vitals.head(5))

   icustay_id           charttime  heartrate  sysbp  resprate      tempc  spo2
0    200001.0 2181-11-25 19:06:00      115.0    NaN       NaN        NaN   NaN
1    200001.0 2181-11-25 19:07:00        NaN    NaN      22.0        NaN   NaN
2    200001.0 2181-11-25 19:08:00        NaN  113.0       NaN        NaN   NaN
3    200001.0 2181-11-25 19:14:00        NaN    NaN       NaN        NaN  94.0
4    200001.0 2181-11-25 19:16:00      114.0    NaN      26.0  37.277778  95.0
Wall time: 15min 28s


In [5]:
df_vitals.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8627341 entries, 0 to 8627340
Data columns (total 7 columns):
icustay_id    float64
charttime     datetime64[ns]
heartrate     float64
sysbp         float64
resprate      float64
tempc         float64
spo2          float64
dtypes: datetime64[ns](1), float64(6)
memory usage: 460.7 MB
