In [88]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
from IPython.display import display, HTML # used to print out pretty pandas dataframes
import matplotlib.dates as dates
import matplotlib.lines as mlines

pd.options.display.max_colwidth = 500
pd.options.display.width = 500
pd.options.display.max_columns = 500
pd.options.display.max_rows = 10


%matplotlib inline
plt.style.use('ggplot') 

# specify user/password/where the database is
sqluser = 'eightiesfanjan'
sqlpass = 'squiggle'
dbname = 'mimic'
schema_name = 'mimiciii'
host = 'localhost'

query_schema = 'SET search_path to ' + schema_name + ';'

# connect to the database
con = psycopg2.connect(dbname=dbname, user=sqluser, password=sqlpass, host=host)

# Demographic + Admissions Table

subject id, admission id, age, dob, first admit, last discharge, gender, age group, admissions data (mortality, complications)

In [89]:
#get patients whose age between 18-99
#can only get ages less than 89. 

query = query_schema + """

WITH first_admission_time AS
(
  SELECT
      p.subject_id,
      a.hadm_id,
      p.dob, 
      p.gender, 
      MIN (a.admittime) AS first_admittime, 
      MAX(a.dischtime) AS last_discharge,
      MIN( ROUND( (cast(admittime as date) - cast(dob as date)) / 365.242,2) )
          AS first_admit_age
  FROM patients p
  INNER JOIN admissions a
  ON p.subject_id = a.subject_id
  GROUP BY p.subject_id, p.dob, p.gender,a.hadm_id
  ORDER BY p.subject_id
)
SELECT
  row_number() over() as row_id,
  a.subject_id, 
  a.hadm_id,
  first_admittime,
  last_discharge,
  dob,
  first_admit_age, 
  gender,
  CASE
      -- all ages > 89 in the database were replaced with 300
      -- we check using > 100 as a conservative threshold to ensure we capture all these patients
      WHEN first_admit_age > 100
          then '>89'
      WHEN first_admit_age >= 14
          THEN 'adult'
      WHEN first_admit_age <= 1
          THEN 'neonate'
      ELSE 'middle'
      END AS age_group,
  c.*
FROM first_admission_time a
INNER JOIN 
    admissions c
ON a.subject_id = c.subject_id


"""
df = pd.read_sql_query(query,con)
df



Unnamed: 0,row_id,subject_id,hadm_id,first_admittime,last_discharge,dob,first_admit_age,gender,age_group,row_id.1,subject_id.1,hadm_id.1,admittime,dischtime,deathtime,admission_type,admission_location,discharge_location,insurance,language,religion,marital_status,ethnicity,edregtime,edouttime,diagnosis,hospital_expire_flag,has_chartevents_data
0,1,4,185777,2191-03-16 00:28:00,2191-03-23 18:41:00,2143-05-12,47.84,F,adult,3,4,185777,2191-03-16 00:28:00,2191-03-23 18:41:00,NaT,EMERGENCY,EMERGENCY ROOM ADMIT,HOME WITH HOME IV PROVIDR,Private,,PROTESTANT QUAKER,SINGLE,WHITE,2191-03-15 13:10:00,2191-03-16 01:10:00,"FEVER,DEHYDRATION,FAILURE TO THRIVE",0,1
1,2,8,159514,2117-11-20 10:22:00,2117-11-24 14:20:00,2117-11-20,0.00,M,neonate,7,8,159514,2117-11-20 10:22:00,2117-11-24 14:20:00,NaT,NEWBORN,PHYS REFERRAL/NORMAL DELI,HOME,Private,,CATHOLIC,,WHITE,NaT,NaT,NEWBORN,0,1
2,3,9,150750,2149-11-09 13:06:00,2149-11-14 10:15:00,2108-01-26,41.79,M,adult,8,9,150750,2149-11-09 13:06:00,2149-11-14 10:15:00,2149-11-14 10:15:00,EMERGENCY,EMERGENCY ROOM ADMIT,DEAD/EXPIRED,Medicaid,,UNOBTAINABLE,,UNKNOWN/NOT SPECIFIED,2149-11-09 11:13:00,2149-11-09 13:18:00,HEMORRHAGIC CVA,1,1
3,4,10,184167,2103-06-28 11:36:00,2103-07-06 12:10:00,2103-06-28,0.00,F,neonate,9,10,184167,2103-06-28 11:36:00,2103-07-06 12:10:00,NaT,NEWBORN,PHYS REFERRAL/NORMAL DELI,SHORT TERM HOSPITAL,Medicaid,,UNOBTAINABLE,,BLACK/AFRICAN AMERICAN,NaT,NaT,NEWBORN,0,1
4,5,11,194540,2178-04-16 06:18:00,2178-05-11 19:00:00,2128-02-22,50.15,F,adult,10,11,194540,2178-04-16 06:18:00,2178-05-11 19:00:00,NaT,EMERGENCY,EMERGENCY ROOM ADMIT,HOME HEALTH CARE,Private,,OTHER,MARRIED,WHITE,2178-04-15 20:46:00,2178-04-16 06:53:00,BRAIN MASS,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
111817,111818,99923,192053,2201-05-15 13:12:00,2201-05-25 14:51:00,2146-10-04,54.61,M,adult,58953,99923,192053,2201-05-15 13:12:00,2201-05-25 14:51:00,NaT,EMERGENCY,CLINIC REFERRAL/PREMATURE,HOME HEALTH CARE,Private,ENGL,CATHOLIC,MARRIED,WHITE,NaT,NaT,END STAGE LIVER DISEASE,0,1
111818,111819,99923,192053,2201-05-15 13:12:00,2201-05-25 14:51:00,2146-10-04,54.61,M,adult,58952,99923,164914,2201-02-23 20:42:00,2201-03-03 16:00:00,NaT,EMERGENCY,CLINIC REFERRAL/PREMATURE,HOME,Private,ENGL,CATHOLIC,MARRIED,WHITE,2201-02-23 15:54:00,2201-02-23 21:58:00,HYPONATREMIA,0,1
111819,111820,99934,176121,2110-02-28 03:47:00,2110-03-06 14:31:00,2092-03-18,17.95,M,adult,58955,99934,176121,2110-02-28 03:47:00,2110-03-06 14:31:00,NaT,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,HOME HEALTH CARE,Private,ENGL,NOT SPECIFIED,SINGLE,WHITE,NaT,NaT,PELVIC ABSCESS,0,1
111820,111821,99937,129380,2128-05-11 15:16:00,2128-05-20 13:35:00,2062-09-06,65.68,F,adult,58958,99937,129380,2128-05-11 15:16:00,2128-05-20 13:35:00,NaT,EMERGENCY,EMERGENCY ROOM ADMIT,SNF,Medicare,SPAN,CATHOLIC,MARRIED,HISPANIC/LATINO - GUATEMALAN,2128-05-11 11:40:00,2128-05-11 16:45:00,PNEUMONIA,0,1


# Thrombocytopenia Table
Gets subject id, admission id, diagnosis of thrombocytopenia

In [90]:
#3,065 RECORDS
query = query_schema + """

SELECT 
    a.subject_id, 
    a.hadm_id, 
    a.icd9_code
FROM
    diagnoses_icd a
INNER JOIN 
    (
        SELECT 
            icd9_code
        FROM
             d_icd_diagnoses
        WHERE 
            long_title like '%Thrombocytopenia%'
    ) b
ON a.icd9_code = b.icd9_code
ORDER BY subject_id DESC;

"""
df = pd.read_sql_query(query,con)

df


Unnamed: 0,subject_id,hadm_id,icd9_code
0,99982,183791,2875
1,99982,151454,2875
2,99939,159023,2875
3,99823,179720,2875
4,99802,108099,2875
...,...,...,...
3060,109,102024,2875
3061,109,189332,2875
3062,68,108329,2875
3063,61,176332,2875


# HIT Table
Gets subject id, admission id, for patients with HIT


In [91]:
#97 RECORDS
query = query_schema + """

SELECT 
    DISTINCT(a.hadm_id), 
    a.subject_id,
    a.icd9_code
FROM
    diagnoses_icd a
INNER JOIN 
    (
        SELECT 
            icd9_code
        FROM
             d_icd_diagnoses
        WHERE 
            long_title like '%HIT%'
    ) b
ON a.icd9_code = b.icd9_code

"""
df = pd.read_sql_query(query,con)

df


Unnamed: 0,hadm_id,subject_id,icd9_code
0,100098,65411,28984
1,100292,62345,28984
2,103246,93595,28984
3,103487,45410,28984
4,104262,79602,28984
...,...,...,...
92,196132,79075,28984
93,196803,82465,28984
94,197499,16856,28984
95,197999,73755,28984


# Hit or Thrombocytopenia with Steroid Use Table 

In [92]:
#1,092 RECORDS
query = query_schema + """


SELECT 
    DISTINCT(a.subject_id), 
    a.hadm_id, 
    a.icd9_code, 
    b.long_title
FROM
    diagnoses_icd a
INNER JOIN 
    (
        SELECT 
            icd9_code, long_title
        FROM
             d_icd_diagnoses
        WHERE 
            long_title like '%HIT%'
    ) b
ON a.icd9_code = b.icd9_code
UNION
    (
    SELECT 
        DISTINCT(a.subject_id), 
        a.hadm_id, 
        a.icd9_code, 
        b.long_title
    FROM
        diagnoses_icd a
    INNER JOIN 
        (
            SELECT 
                icd9_code, long_title
            FROM
                 d_icd_diagnoses
            WHERE 
                long_title like '%Thrombocytopenia%'
        ) b
    ON a.icd9_code = b.icd9_code
    INNER JOIN
        prescriptions d
    ON a.subject_id = d.subject_id
    WHERE
        LOWER(d.drug_name_generic) like '%prednisone%'
            OR 
        LOWER(d.drug) like '%methylprednisolone%'
            OR 
        LOWER(d.drug) like '%dexamethasone%'
    )

"""
df = pd.read_sql_query(query,con)

df


Unnamed: 0,subject_id,hadm_id,icd9_code,long_title
0,9402,106278,2875,"Thrombocytopenia, unspecified"
1,10814,155894,2875,"Thrombocytopenia, unspecified"
2,93566,139770,2875,"Thrombocytopenia, unspecified"
3,53019,192310,2875,"Thrombocytopenia, unspecified"
4,69905,111323,2875,"Thrombocytopenia, unspecified"
...,...,...,...,...
1087,109,137510,2875,"Thrombocytopenia, unspecified"
1088,2550,152273,2875,"Thrombocytopenia, unspecified"
1089,24553,152313,2875,"Thrombocytopenia, unspecified"
1090,32453,163483,2875,"Thrombocytopenia, unspecified"


# Heparin Table
Gets subject id, admission id, heparin date start/stop for patients with thrombocytopenia


In [93]:
query = query_schema + """

SELECT 
    a.subject_id, 
    a.hadm_id, 
    d.*
FROM
    diagnoses_icd a
INNER JOIN 
    (
        SELECT 
            icd9_code
        FROM
             d_icd_diagnoses
        WHERE 
            long_title like '%Thrombocytopenia%'
    ) b
ON a.icd9_code = b.icd9_code
INNER JOIN 
    admissions c
ON a.subject_id = c.subject_id
INNER JOIN
    prescriptions d
ON a.subject_id = d.subject_id
WHERE
    d.drug like '%Heparin%'

LIMIT 100

"""
df = pd.read_sql_query(query,con)

df


Unnamed: 0,subject_id,hadm_id,row_id,subject_id.1,hadm_id.1,icustay_id,startdate,enddate,drug_type,drug,drug_name_poe,drug_name_generic,formulary_drug_cd,gsn,ndc,prod_strength,dose_val_rx,dose_unit_rx,form_val_disp,form_unit_disp,route
0,109,172335,1837040,109,166018,241668.0,2141-03-28,2141-03-28,MAIN,Heparin,Heparin,Heparin Sodium,HEPA5I,006549,63323026201,5000 Units / mL- 1mL Vial,5000,UNIT,1,mL,SC
1,109,172335,1837040,109,166018,241668.0,2141-03-28,2141-03-28,MAIN,Heparin,Heparin,Heparin Sodium,HEPA5I,006549,63323026201,5000 Units / mL- 1mL Vial,5000,UNIT,1,mL,SC
2,109,172335,1837040,109,166018,241668.0,2141-03-28,2141-03-28,MAIN,Heparin,Heparin,Heparin Sodium,HEPA5I,006549,63323026201,5000 Units / mL- 1mL Vial,5000,UNIT,1,mL,SC
3,109,172335,1837040,109,166018,241668.0,2141-03-28,2141-03-28,MAIN,Heparin,Heparin,Heparin Sodium,HEPA5I,006549,63323026201,5000 Units / mL- 1mL Vial,5000,UNIT,1,mL,SC
4,109,172335,1837040,109,166018,241668.0,2141-03-28,2141-03-28,MAIN,Heparin,Heparin,Heparin Sodium,HEPA5I,006549,63323026201,5000 Units / mL- 1mL Vial,5000,UNIT,1,mL,SC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,109,172335,1837096,109,173633,284204.0,2141-12-08,2141-12-10,MAIN,Heparin,Heparin,Heparin Sodium,HEPA5I,006549,63323026201,5000 Units / mL- 1mL Vial,800-1500,UNIT,0.16-0.3,mL,IV
96,109,172335,1837096,109,173633,284204.0,2141-12-08,2141-12-10,MAIN,Heparin,Heparin,Heparin Sodium,HEPA5I,006549,63323026201,5000 Units / mL- 1mL Vial,800-1500,UNIT,0.16-0.3,mL,IV
97,109,172335,1837096,109,173633,284204.0,2141-12-08,2141-12-10,MAIN,Heparin,Heparin,Heparin Sodium,HEPA5I,006549,63323026201,5000 Units / mL- 1mL Vial,800-1500,UNIT,0.16-0.3,mL,IV
98,109,172335,1837096,109,173633,284204.0,2141-12-08,2141-12-10,MAIN,Heparin,Heparin,Heparin Sodium,HEPA5I,006549,63323026201,5000 Units / mL- 1mL Vial,800-1500,UNIT,0.16-0.3,mL,IV


# Argatroban Table
Gets subject id, admission id, argatroban date start/stop for patients with thrombocytopenia


In [94]:

query = query_schema + """

SELECT 
    a.subject_id, 
    a.hadm_id, 
    d.*
FROM
    diagnoses_icd a
INNER JOIN 
    (
        SELECT 
            icd9_code
        FROM
             d_icd_diagnoses
        WHERE 
            long_title like '%Thrombocytopenia%'
    ) b
ON a.icd9_code = b.icd9_code
INNER JOIN 
    admissions c
ON a.subject_id = c.subject_id
INNER JOIN
    prescriptions d
ON a.subject_id = d.subject_id
WHERE
    d.drug like '%Argatroban%'

LIMIT 100

"""
df = pd.read_sql_query(query,con)

df


Unnamed: 0,subject_id,hadm_id,row_id,subject_id.1,hadm_id.1,icustay_id,startdate,enddate,drug_type,drug,drug_name_poe,drug_name_generic,formulary_drug_cd,gsn,ndc,prod_strength,dose_val_rx,dose_unit_rx,form_val_disp,form_unit_disp,route
0,1569,138644,1718667,1569,100045,260971.0,2176-02-07,2176-02-08,MAIN,Argatroban,,,ARGA250I,047021,00007440701,250mg/2.5mL Vial,250,mg,1,VIAL,IV DRIP
1,1569,138644,1718667,1569,100045,260971.0,2176-02-07,2176-02-08,MAIN,Argatroban,,,ARGA250I,047021,00007440701,250mg/2.5mL Vial,250,mg,1,VIAL,IV DRIP
2,1569,138644,1718667,1569,100045,260971.0,2176-02-07,2176-02-08,MAIN,Argatroban,,,ARGA250I,047021,00007440701,250mg/2.5mL Vial,250,mg,1,VIAL,IV DRIP
3,1569,138644,1718667,1569,100045,260971.0,2176-02-07,2176-02-08,MAIN,Argatroban,,,ARGA250I,047021,00007440701,250mg/2.5mL Vial,250,mg,1,VIAL,IV DRIP
4,1569,138644,1718667,1569,100045,260971.0,2176-02-07,2176-02-08,MAIN,Argatroban,,,ARGA250I,047021,00007440701,250mg/2.5mL Vial,250,mg,1,VIAL,IV DRIP
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,7294,149551,2659330,7294,149551,245778.0,2149-01-06,2149-01-06,MAIN,Argatroban,,,ARGA250I,047021,00007440701,250mg/2.5mL Vial,250,mg,1,VIAL,IV DRIP
96,8231,101216,527795,8231,101216,,2121-12-19,2121-12-19,MAIN,Argatroban,,,ARGA250I,047021,00007440701,250mg/2.5mL Vial,250,mg,1,VIAL,IV DRIP
97,8231,101216,527795,8231,101216,,2121-12-19,2121-12-19,MAIN,Argatroban,,,ARGA250I,047021,00007440701,250mg/2.5mL Vial,250,mg,1,VIAL,IV DRIP
98,8231,101216,527795,8231,101216,,2121-12-19,2121-12-19,MAIN,Argatroban,,,ARGA250I,047021,00007440701,250mg/2.5mL Vial,250,mg,1,VIAL,IV DRIP


# Steroid Table
Gets subject id, admission id, steroid date start/stop, for patients with thrombocytopenia


In [95]:

query = query_schema + """

SELECT 
    a.subject_id, 
    a.hadm_id, 
    d.*
FROM
    diagnoses_icd a
INNER JOIN 
    (
        SELECT 
            icd9_code
        FROM
             d_icd_diagnoses
        WHERE 
            long_title like '%Thrombocytopenia%'
    ) b
ON a.icd9_code = b.icd9_code
INNER JOIN 
    admissions c
ON a.subject_id = c.subject_id
INNER JOIN
    prescriptions d
ON a.subject_id = d.subject_id
WHERE
    LOWER(d.drug_name_generic) like '%prednisone%'
        OR 
    LOWER(d.drug) like '%methylprednisolone%' AND route = 'IV'
        OR 
    LOWER(d.drug) like '%dexamethasone%'

"""
df = pd.read_sql_query(query,con)

df


Unnamed: 0,subject_id,hadm_id,row_id,subject_id.1,hadm_id.1,icustay_id,startdate,enddate,drug_type,drug,drug_name_poe,drug_name_generic,formulary_drug_cd,gsn,ndc,prod_strength,dose_val_rx,dose_unit_rx,form_val_disp,form_unit_disp,route
0,109,172335,1837039,109,166018,241668.0,2141-03-28,2141-03-28,MAIN,PredniSONE,PredniSONE,PredniSONE,PRED5,006753,00054872425,5 mg Tablet,15,mg,3,TAB,PO
1,109,172335,1837039,109,166018,241668.0,2141-03-28,2141-03-28,MAIN,PredniSONE,PredniSONE,PredniSONE,PRED5,006753,00054872425,5 mg Tablet,15,mg,3,TAB,PO
2,109,172335,1837039,109,166018,241668.0,2141-03-28,2141-03-28,MAIN,PredniSONE,PredniSONE,PredniSONE,PRED5,006753,00054872425,5 mg Tablet,15,mg,3,TAB,PO
3,109,172335,1837039,109,166018,241668.0,2141-03-28,2141-03-28,MAIN,PredniSONE,PredniSONE,PredniSONE,PRED5,006753,00054872425,5 mg Tablet,15,mg,3,TAB,PO
4,109,172335,1837039,109,166018,241668.0,2141-03-28,2141-03-28,MAIN,PredniSONE,PredniSONE,PredniSONE,PRED5,006753,00054872425,5 mg Tablet,15,mg,3,TAB,PO
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69562,95705,134885,753946,95705,134885,212530.0,2134-12-30,2134-12-31,MAIN,MethylPREDNISolone Sodium Succ,MethylPREDNISolone Sodium Succ,MethylPREDNISolone Sodium Succ,METH40I,051554,00009011319,40mg Vial,80,mg,2,VIAL,IV
69563,95705,134885,753946,95705,134885,212530.0,2134-12-30,2134-12-31,MAIN,MethylPREDNISolone Sodium Succ,MethylPREDNISolone Sodium Succ,MethylPREDNISolone Sodium Succ,METH40I,051554,00009011319,40mg Vial,80,mg,2,VIAL,IV
69564,97263,164631,3786883,97263,164631,223670.0,2122-01-16,2122-01-21,MAIN,Dexamethasone,Dexamethasone,Dexamethasone Sod Phosphate,DEXA4I,006778,63323016501,4mg/mL 1mL Vial,4,mg,1,VIAL,IV
69565,99939,159023,3394229,99939,159023,252442.0,2110-03-22,2110-03-23,MAIN,MethylPREDNISolone Sodium Succ,,,MPRED5I,051556,00009076502,500mg Vial,500,mg,1,VIAL,IV


# //TODO Tables
* Presence of positive Pf4 antibody test OR HIT positive serology 
* Positive serotonin release assay  ( not sure if possible )
* Platelet count < 150,000