# merge patients

## load mimic data

In [65]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import datetime
from IPython.display import display, HTML # used to print out pretty pandas dataframes
import matplotlib.dates as dates
import matplotlib.lines as mlines

%matplotlib inline
plt.style.use('ggplot') 

In [66]:
# specify user/password/where for MIMIC
sqluser = 'postgres'
sqlpass = 'postgres'
dbname = 'mimic'
schema_name = 'mimiciii'
host = 'localhost'

query_schema = 'SET search_path to ' + schema_name + ';'

# connect to the database
con = psycopg2.connect(dbname=dbname, user=sqluser, password=sqlpass, host=host)

In [67]:
# load patient from mimic
query = query_schema + """
SELECT 
    SUBJECT_ID,
    GENDER,
    DOB
from patients
"""
print('patients query:')
print(query)
mp_dat = pd.read_sql_query(query,con)
mp_dat.head()

patients query:
SET search_path to mimiciii;
SELECT 
    SUBJECT_ID,
    GENDER,
    DOB
from patients



Unnamed: 0,subject_id,gender,dob
0,249,F,2075-03-13
1,250,F,2164-12-27
2,251,M,2090-03-15
3,252,M,2078-03-06
4,253,F,2089-11-26


In [68]:
#load admissions from mimic
query = query_schema + """
SELECT 
    SUBJECT_ID,
    HADM_ID,
    ADMITTIME,
    ADMISSION_TYPE,
    ETHNICITY,
    HAS_CHARTEVENTS_DATA
from admissions
WHERE
    HAS_CHARTEVENTS_DATA = 1
"""
print('admissions query:')
print(query)
ma_dat = pd.read_sql_query(query,con)
ma_dat.head()

admissions query:
SET search_path to mimiciii;
SELECT 
    SUBJECT_ID,
    HADM_ID,
    ADMITTIME,
    ADMISSION_TYPE,
    ETHNICITY,
    HAS_CHARTEVENTS_DATA
from admissions
WHERE
    HAS_CHARTEVENTS_DATA = 1



Unnamed: 0,subject_id,hadm_id,admittime,admission_type,ethnicity,has_chartevents_data
0,22,165315,2196-04-09 12:26:00,EMERGENCY,WHITE,1
1,23,152223,2153-09-03 07:15:00,ELECTIVE,WHITE,1
2,23,124321,2157-10-18 19:34:00,EMERGENCY,WHITE,1
3,24,161859,2139-06-06 16:14:00,EMERGENCY,WHITE,1
4,25,129635,2160-11-02 02:06:00,EMERGENCY,WHITE,1


In [69]:
# load the weight from the chartevents in mimic
# unique_item = '226531' #lb
unique_item = '226512' #kg
query = query_schema + """
SELECT 
    subject_id ,
    hadm_id,
    icustay_id,
    valuenum
from chartevents
WHERE
    ITEMID = '{}'
ORDER BY
    valuenum DESC
""".format(unique_item)
print('my query:')
print(query)
mw_dat = pd.read_sql_query(query,con).rename(columns={"valuenum": "weight_kg"})
mw_dat.head()

my query:
SET search_path to mimiciii;
SELECT 
    subject_id ,
    hadm_id,
    icustay_id,
    valuenum
from chartevents
WHERE
    ITEMID = '226512'
ORDER BY
    valuenum DESC



Unnamed: 0,subject_id,hadm_id,icustay_id,weight_kg
0,97830,112817,264867.0,1251.0
1,84039,150314,274467.0,710.4
2,82004,144594,287511.0,710.0
3,97917,118576,220449.0,670.0
4,42683,192931,267149.0,575.0


In [72]:
#merge the mimic data tables
mdat = pd.merge(mp_dat, ma_dat, how='inner', on = ['subject_id']) #pateints and admissions
mdat = pd.merge(mdat, mw_dat, on = ['subject_id', 'hadm_id']) #weight
print(mdat.shape)
mdat.head()

(22604, 10)


Unnamed: 0,subject_id,gender,dob,hadm_id,admittime,admission_type,ethnicity,has_chartevents_data,icustay_id,weight_kg
0,249,F,2075-03-13,149546,2155-02-03 20:16:00,EMERGENCY,WHITE,1,263055.0,76.5
1,249,F,2075-03-13,158975,2156-04-27 15:33:00,EMERGENCY,WHITE,1,282599.0,69.0
2,249,F,2075-03-13,158975,2156-04-27 15:33:00,EMERGENCY,WHITE,1,263882.0,66.8
3,266,F,2090-12-17,186251,2168-07-10 08:01:00,EMERGENCY,BLACK/AFRICAN AMERICAN,1,293876.0,61.0
4,665,M,2052-05-20,152089,2119-02-25 20:13:00,EMERGENCY,BLACK/AFRICAN AMERICAN,1,220843.0,56.0


In [73]:
#get age from the dates
mdat['age'] = mdat['admittime'].sub(mdat['dob'], axis=0).dt.days/365
mdat.head()

Unnamed: 0,subject_id,gender,dob,hadm_id,admittime,admission_type,ethnicity,has_chartevents_data,icustay_id,weight_kg,age
0,249,F,2075-03-13,149546,2155-02-03 20:16:00,EMERGENCY,WHITE,1,263055.0,76.5,79.947945
1,249,F,2075-03-13,158975,2156-04-27 15:33:00,EMERGENCY,WHITE,1,282599.0,69.0,81.178082
2,249,F,2075-03-13,158975,2156-04-27 15:33:00,EMERGENCY,WHITE,1,263882.0,66.8,81.178082
3,266,F,2090-12-17,186251,2168-07-10 08:01:00,EMERGENCY,BLACK/AFRICAN AMERICAN,1,293876.0,61.0,77.613699
4,665,M,2052-05-20,152089,2119-02-25 20:13:00,EMERGENCY,BLACK/AFRICAN AMERICAN,1,220843.0,56.0,66.810959


## load eicu data