In [2]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
from IPython.display import display, HTML # used to print out pretty pandas dataframes
import matplotlib.dates as dates
import matplotlib.lines as mlines

pd.options.display.max_colwidth = 500
pd.options.display.width = 500
pd.options.display.max_columns = 500
pd.options.display.max_rows = 200


%matplotlib inline
plt.style.use('ggplot') 

# specify user/password/where the database is
sqluser = 'eightiesfanjan'
sqlpass = 'squiggle'
dbname = 'mimic'
schema_name = 'mimiciii'
host = 'localhost'

query_schema = 'SET search_path to ' + schema_name + ';'

# connect to the database
con = psycopg2.connect(dbname=dbname, user=sqluser, password=sqlpass, host=host)

In [16]:
#get anemia icd9 code

query = query_schema + """

SELECT 
    DISTINCT(long_title)
FROM
     d_icd_diagnoses
WHERE 
    long_title like '%anemia%'
LIMIT 10
"""
df = pd.read_sql_query(query,con)

df



Unnamed: 0,long_title
0,"Acquired hemolytic anemia, unspecified"
1,Acute posthemorrhagic anemia
2,Antineoplastic chemotherapy induced anemia
3,"Aplastic anemia, unspecified"
4,Autoimmune hemolytic anemias
5,Congenital anemia
6,Family history of anemia
7,Folate-deficiency anemia
8,"Hereditary hemolytic anemia, unspecified"
9,Iron deficiency anemia secondary to blood loss (chronic)


In [12]:
#people with albumin lab work
query = query_schema + """


SELECT 
    *
FROM
    labevents a
INNER JOIN
    d_labitems b
ON a.itemid = b.itemid
WHERE lower(label) like '%albumin%'
LIMIT 1000
"""
df = pd.read_sql_query(query,con)

df


Unnamed: 0,row_id,subject_id,hadm_id,itemid,charttime,value,valuenum,valueuom,flag,row_id.1,itemid.1,label,fluid,category,loinc_code
0,59671,109,196721.0,50835,2142-07-16 09:09:00,2.3,2.3,g/dL,,36,50835,"Albumin, Ascites",Ascites,Chemistry,1749-1
1,167762,252,190159.0,50835,2133-04-13 14:19:00,<1.0,,g/dL,,36,50835,"Albumin, Ascites",Ascites,Chemistry,1749-1
2,178520,279,192224.0,50835,2164-06-13 11:06:00,LESS THAN 1,,g/dL,,36,50835,"Albumin, Ascites",Ascites,Chemistry,1749-1
3,188580,292,179726.0,50835,2103-09-27 22:10:00,0.6,0.6,g/dL,,36,50835,"Albumin, Ascites",Ascites,Chemistry,1749-1
4,254113,405,134001.0,50835,2116-01-10 11:00:00,LESS THAN 1.0,,g/dL,,36,50835,"Albumin, Ascites",Ascites,Chemistry,1749-1
5,339326,495,186199.0,50835,2183-05-21 11:57:00,2.2,2.2,g/dL,,36,50835,"Albumin, Ascites",Ascites,Chemistry,1749-1
6,423180,634,145911.0,50835,2116-12-08 10:00:00,1.3,1.3,g/dL,,36,50835,"Albumin, Ascites",Ascites,Chemistry,1749-1
7,423192,634,145911.0,50835,2116-12-08 13:00:00,1.4,1.4,g/dL,,36,50835,"Albumin, Ascites",Ascites,Chemistry,1749-1
8,422132,634,145911.0,50835,2116-11-12 13:48:00,1.9,1.9,g/dL,,36,50835,"Albumin, Ascites",Ascites,Chemistry,1749-1
9,474739,725,105223.0,50835,2106-08-23 15:37:00,3.2,3.2,g/dL,,36,50835,"Albumin, Ascites",Ascites,Chemistry,1749-1


In [10]:
#people with transferrin blood work 
query = query_schema + """


SELECT 
    *
FROM
    labevents a
INNER JOIN
    d_labitems b
ON a.itemid = b.itemid
WHERE lower(label) like '%transferrin%'
LIMIT 1000
"""
df = pd.read_sql_query(query,con)

df


Unnamed: 0,row_id,subject_id,hadm_id,itemid,charttime,value,valuenum,valueuom,flag,row_id.1,itemid.1,label,fluid,category,loinc_code
0,2648,4,,50998,2191-05-23 07:10:00,148,148.0,mg/dL,abnormal,198,50998,Transferrin,Blood,Chemistry,2500-7
1,1438,3,145834.0,50998,2101-10-29 04:45:00,128,128.0,mg/dL,abnormal,198,50998,Transferrin,Blood,Chemistry,2500-7
2,2113,4,,50998,2191-04-12 06:55:00,232,232.0,mg/dL,,198,50998,Transferrin,Blood,Chemistry,2500-7
3,4101,6,,50998,2175-06-26 08:00:00,154,154.0,mg/dL,abnormal,198,50998,Transferrin,Blood,Chemistry,2500-7
4,8107,17,,50998,2135-07-19 11:15:00,299,299.0,mg/dL,,198,50998,Transferrin,Blood,Chemistry,2500-7
5,9654,21,109451.0,50998,2134-09-19 06:35:00,239,239.0,mg/dL,,198,50998,Transferrin,Blood,Chemistry,2500-7
6,12075,30,104557.0,50998,2172-10-15 04:00:00,211,211.0,mg/dL,,198,50998,Transferrin,Blood,Chemistry,2500-7
7,20699,44,,50998,2194-12-11 16:50:00,177,177.0,mg/dL,abnormal,198,50998,Transferrin,Blood,Chemistry,2500-7
8,18668,41,101757.0,50998,2133-01-17 06:46:00,205,205.0,mg/dL,,198,50998,Transferrin,Blood,Chemistry,2500-7
9,21760,52,190797.0,50998,2191-01-10 02:41:00,142,142.0,mg/dL,abnormal,198,50998,Transferrin,Blood,Chemistry,2500-7


In [3]:
#get demographics table
#get patients whose age between 18-99
#can only get ages less than 89. 

query = query_schema + """

WITH first_admission_time AS
(
  SELECT
      p.subject_id,
      a.hadm_id,
      p.dob, 
      p.gender, 
      MIN (a.admittime) AS first_admittime, 
      MIN( ROUND( (cast(admittime as date) - cast(dob as date)) / 365.242,2) )
          AS first_admit_age
  FROM patients p
  INNER JOIN admissions a
  ON p.subject_id = a.subject_id
  GROUP BY p.subject_id, p.dob, p.gender,a.hadm_id
  ORDER BY p.subject_id
)
SELECT
    a.subject_id, 
    a.hadm_id,
  gender,
  dob,
  first_admit_age, 
  first_admittime,
  admittime,
  dischtime,
  (cast(dischtime as date) - cast(admittime as date)) as duration,
  CASE
      -- all ages > 89 in the database were replaced with 300
      WHEN first_admit_age > 100
          then '>89'
      WHEN first_admit_age >= 14
          THEN 'adult'
      WHEN first_admit_age <= 1
          THEN 'neonate'
      ELSE 'middle'
      END AS age_group,
  deathtime,
  admission_type, 
  diagnosis, 
  hospital_expire_flag as mortality_bin
FROM first_admission_time a
INNER JOIN 
    admissions c
ON a.subject_id = c.subject_id


"""
df_demo= pd.read_sql_query(query,con)
df_demo



Unnamed: 0,subject_id,hadm_id,gender,dob,first_admit_age,first_admittime,admittime,dischtime,duration,age_group,deathtime,admission_type,diagnosis,mortality_bin
0,2,163353,M,2138-07-17,0.00,2138-07-17 19:04:00,2138-07-17 19:04:00,2138-07-21 15:48:00,4,neonate,,NEWBORN,NEWBORN,0
1,4,185777,F,2143-05-12,47.84,2191-03-16 00:28:00,2191-03-16 00:28:00,2191-03-23 18:41:00,7,adult,,EMERGENCY,"FEVER,DEHYDRATION,FAILURE TO THRIVE",0
2,6,107064,F,2109-06-21,65.94,2175-05-30 07:15:00,2175-05-30 07:15:00,2175-06-15 16:00:00,16,adult,,ELECTIVE,CHRONIC RENAL FAILURE/SDA,0
3,7,118037,F,2121-05-23,0.00,2121-05-23 15:05:00,2121-05-23 15:05:00,2121-05-27 11:57:00,4,neonate,,NEWBORN,NEWBORN,0
4,8,159514,M,2117-11-20,0.00,2117-11-20 10:22:00,2117-11-20 10:22:00,2117-11-24 14:20:00,4,neonate,,NEWBORN,NEWBORN,0
5,9,150750,M,2108-01-26,41.79,2149-11-09 13:06:00,2149-11-09 13:06:00,2149-11-14 10:15:00,5,adult,2149-11-14 10:15:00,EMERGENCY,HEMORRHAGIC CVA,1
6,10,184167,F,2103-06-28,0.00,2103-06-28 11:36:00,2103-06-28 11:36:00,2103-07-06 12:10:00,8,neonate,,NEWBORN,NEWBORN,0
7,11,194540,F,2128-02-22,50.15,2178-04-16 06:18:00,2178-04-16 06:18:00,2178-05-11 19:00:00,25,adult,,EMERGENCY,BRAIN MASS,0
8,13,143045,F,2127-02-27,39.86,2167-01-08 18:43:00,2167-01-08 18:43:00,2167-01-15 15:15:00,7,adult,,EMERGENCY,CORONARY ARTERY DISEASE,0
9,16,103251,M,2178-02-03,0.00,2178-02-03 06:35:00,2178-02-03 06:35:00,2178-02-05 10:51:00,2,neonate,,NEWBORN,NEWBORN,0
