In [41]:
import psycopg2
import pandas as pd
import pickle as pkl
from datetime import datetime
from datetime import timedelta
import matplotlib.pyplot as plt

pd.set_option('display.max_columns',300)
pd.set_option('display.max_rows',300)

In [2]:
host = '119.8.167.24'
port = '5432'
username = 'team27'
password = 'bill-mirror-nerve27'
database = 'mimiciv' #mimic

In [3]:
con = psycopg2.connect(database=database, user=username, password=password, host=host, port=port)

In [4]:
cur = con.cursor()

In [101]:
mode = 'query' # 'query'

# Hospital and ICU cohort

In [11]:
%%time

if mode == 'query':

    query = '''
    select 

    adm_patient.gender,
    adm_patient.anchor_age,
    adm_patient.dod,
    admittime, 
    dischtime, 
    deathtime, 
    admission_type, 
    admission_location, 
    discharge_location, 
    insurance,edregtime, 
    edouttime,
    hospital_expire_flag,
    first_icu.*,
    DATE_PART('day',dischtime - admittime)+1 as hosp_los

    from
    (
        select *
            from mimic_core.admissions adm
        join 
            mimic_core.patients pat
        on pat.subject_id = adm.subject_id
        where pat.anchor_age >= 18
        and adm.hospital_expire_flag = 0 

      ) adm_patient

    join

    (
        select icu.*
            from mimic_icu.icustays icu
        join
        (
            select hadm_id  , min(intime) as min_intime 
            FROM 
                mimic_icu.icustays 
            GROUP BY hadm_id 
        ) first_
        on icu.hadm_id = first_.hadm_id and icu.intime = first_.min_intime

    ) first_icu
    on adm_patient.hadm_id = first_icu.hadm_id


    '''

    hosp_icu = pd.read_sql(query, con)

In [None]:
hosp_icu = hosp_icu.rename(columns={'los':'icu_los'})

In [None]:
if mode == 'query':
    with open('hosp_icu.pkl', 'wb') as f:
        pkl.dump(hosp_icu, f)
elif mode == 'load':
    with open('hosp_icu.pkl', 'rb') as f:
        hosp_icu = pkl.load(f)

In [68]:
hosp_icu.shape

(58101, 22)

In [42]:
hosp_icu.head()

Unnamed: 0,gender,anchor_age,dod,admittime,dischtime,deathtime,admission_type,admission_location,discharge_location,insurance,edregtime,edouttime,hospital_expire_flag,subject_id,hadm_id,stay_id,first_careunit,last_careunit,intime,outtime,los,hosp_los
0,M,71,,2121-08-30 16:33:00,2121-09-03 15:45:00,,URGENT,TRANSFER FROM HOSPITAL,SKILLED NURSING FACILITY,Medicare,NaT,NaT,0,14990224,20000147,37348463,Cardiac Vascular Intensive Care Unit (CVICU),Cardiac Vascular Intensive Care Unit (CVICU),2121-08-30 18:08:39,2121-08-31 21:29:49,1.139699,3.0
1,M,63,,2125-10-26 00:00:00,2125-10-28 19:25:00,,SURGICAL SAME DAY ADMISSION,PHYSICIAN REFERRAL,HOME,Medicaid,NaT,NaT,0,15975141,20001494,39346951,Neuro Intermediate,Neuro Intermediate,2125-10-26 17:34:33,2125-10-28 19:33:26,2.082558,2.0
2,M,64,,2196-06-12 00:00:00,2196-06-19 20:01:00,,ELECTIVE,PHYSICIAN REFERRAL,HOME HEALTH CARE,Medicare,NaT,NaT,0,17112572,20001687,32926753,Cardiac Vascular Intensive Care Unit (CVICU),Cardiac Vascular Intensive Care Unit (CVICU),2196-06-13 09:18:36,2196-06-15 10:34:07,2.052442,7.0
3,M,56,,2157-09-23 23:24:00,2157-09-29 14:20:00,,EW EMER.,EMERGENCY ROOM,SKILLED NURSING FACILITY,Medicare,2157-09-23 18:09:00,2157-09-24 01:01:00,0,18826698,20002252,35781968,Surgical Intensive Care Unit (SICU),Surgical Intensive Care Unit (SICU),2157-09-24 01:01:00,2157-09-24 15:32:48,0.605417,5.0
4,M,46,,2156-08-31 14:52:00,2156-09-03 11:15:00,,URGENT,TRANSFER FROM HOSPITAL,HOME,Other,NaT,NaT,0,18346781,20002270,38835257,Medical/Surgical Intensive Care Unit (MICU/SICU),Medical/Surgical Intensive Care Unit (MICU/SICU),2156-08-31 14:52:54,2156-09-01 02:07:59,0.468808,2.0


# Ventilator queries

In [99]:
%%time
query = '''
SELECT subject_id, stay_id, charttime FROM (
  select vs.subject_id, ce.stay_id, vs.charttime, vs.ventilator_mode, vs.extubated from mimic_derived.ventilator_setting vs 
  JOIN (select subject_id, stay_id, charttime from mimic_icu.chartevents ) ce
  on  vs.subject_id = ce.subject_id and vs.charttime = ce.charttime
  where extubated = 1) vsce
  GROUP BY subject_id, stay_id, charttime
'''

vs = pd.read_sql(query, con)

query = '''
select *, DATE_PART('day', endtime - starttime)+1 as vent_duration from mimic_derived.ventilator_durations vd
'''

vd = pd.read_sql(query, con)

Wall time: 584 ms


In [102]:
if mode == 'query':
    with open('vs.pkl', 'wb') as f:
        pkl.dump(vs, f)
    with open('vd.pkl', 'wb') as f:
        pkl.dump(vd, f)   
elif mode == 'load':
    with open('vs.pkl', 'rb') as f:
        vs = pkl.load(f)
    with open('vd.pkl', 'rb') as f:
        vd = pkl.load(f)

In [103]:
vs.shape, vd.shape

((6161, 3), (28908, 5))

In [104]:
vsvd = pd.merge(vs, vd, on=['stay_id'])

In [105]:
vsvd[vsvd['subject_id']==16108683]

Unnamed: 0,subject_id,stay_id,charttime,ventnum,starttime,endtime,vent_duration
3,16108683,38197814,2120-11-21 16:10:00,1,2120-11-10 04:00:00,2120-11-12 16:09:00,3.0
4,16108683,38197814,2120-11-21 16:10:00,2,2120-11-12 23:00:00,2120-11-21 16:10:00,9.0
5,16108683,38197814,2120-11-21 16:10:00,3,2120-11-22 02:12:00,2120-11-27 18:00:00,6.0
6,16108683,38197814,2120-11-27 18:00:00,1,2120-11-10 04:00:00,2120-11-12 16:09:00,3.0
7,16108683,38197814,2120-11-27 18:00:00,2,2120-11-12 23:00:00,2120-11-21 16:10:00,9.0
8,16108683,38197814,2120-11-27 18:00:00,3,2120-11-22 02:12:00,2120-11-27 18:00:00,6.0


In [106]:
vsvd1 = pd.merge(vs, vd, left_on=['stay_id','charttime'], right_on=['stay_id','endtime'])

In [107]:
vsvd1[vsvd1['subject_id']==16108683]

Unnamed: 0,subject_id,stay_id,charttime,ventnum,starttime,endtime,vent_duration
3,16108683,38197814,2120-11-21 16:10:00,2,2120-11-12 23:00:00,2120-11-21 16:10:00,9.0
2727,16108683,38197814,2120-11-27 18:00:00,3,2120-11-22 02:12:00,2120-11-27 18:00:00,6.0


In [108]:
vsvd1 = vsvd1.groupby(['subject_id','stay_id']).agg({'vent_duration':'sum'}).reset_index()

In [109]:
vsvd1[vsvd1['subject_id']==16108683]

Unnamed: 0,subject_id,stay_id,vent_duration
2938,16108683,38197814,15.0


In [129]:
hosp_icu[hosp_icu['stay_id']==38197814]

Unnamed: 0,gender,anchor_age,dod,admittime,dischtime,deathtime,admission_type,admission_location,discharge_location,insurance,edregtime,edouttime,hospital_expire_flag,subject_id,hadm_id,stay_id,first_careunit,last_careunit,intime,outtime,icu_los,hosp_los
16964,M,41,,2120-11-08 15:02:00,2121-03-12 14:40:00,,URGENT,TRANSFER FROM HOSPITAL,REHAB,Medicaid,NaT,NaT,0,16108683,21607477,38197814,Coronary Care Unit (CCU),Medical/Surgical Intensive Care Unit (MICU/SICU),2120-11-10 03:56:33,2120-11-29 17:46:11,19.576134,123.0


## CCI

In [8]:
%%time # Wall time: 2min 5s

if mode == 'query':

    query = '''
    WITH diag AS
    (
        SELECT 
            hadm_id
            , CASE WHEN icd_version = 9 THEN icd_code ELSE NULL END AS icd9_code
            , CASE WHEN icd_version = 10 THEN icd_code ELSE NULL END AS icd10_code
        FROM mimic_hosp.diagnoses_icd diag
    )
    , com AS
    (
        SELECT
            ad.hadm_id

            -- Myocardial infarction
            , MAX(CASE WHEN
                SUBSTR(icd9_code, 1, 3) IN ('410','412')
                OR
                SUBSTR(icd10_code, 1, 3) IN ('I21','I22')
                OR
                SUBSTR(icd10_code, 1, 4) = 'I252'
                THEN 1 
                ELSE 0 END) AS myocardial_infarct

            -- Congestive heart failure
            , MAX(CASE WHEN 
                SUBSTR(icd9_code, 1, 3) = '428'
                OR
                SUBSTR(icd9_code, 1, 5) IN ('39891','40201','40211','40291','40401','40403',
                              '40411','40413','40491','40493')
                OR 
                SUBSTR(icd9_code, 1, 4) BETWEEN '4254' AND '4259'
                OR
                SUBSTR(icd10_code, 1, 3) IN ('I43','I50')
                OR
                SUBSTR(icd10_code, 1, 4) IN ('I099','I110','I130','I132','I255','I420',
                                                       'I425','I426','I427','I428','I429','P290')
                THEN 1 
                ELSE 0 END) AS congestive_heart_failure

            -- Peripheral vascular disease
            , MAX(CASE WHEN 
                SUBSTR(icd9_code, 1, 3) IN ('440','441')
                OR
                SUBSTR(icd9_code, 1, 4) IN ('0930','4373','4471','5571','5579','V434')
                OR
                SUBSTR(icd9_code, 1, 4) BETWEEN '4431' AND '4439'
                OR
                SUBSTR(icd10_code, 1, 3) IN ('I70','I71')
                OR
                SUBSTR(icd10_code, 1, 4) IN ('I731','I738','I739','I771','I790',
                                                       'I792','K551','K558','K559','Z958','Z959')
                THEN 1 
                ELSE 0 END) AS peripheral_vascular_disease

            -- Cerebrovascular disease
            , MAX(CASE WHEN 
                SUBSTR(icd9_code, 1, 3) BETWEEN '430' AND '438'
                OR
                SUBSTR(icd9_code, 1, 5) = '36234'
                OR
                SUBSTR(icd10_code, 1, 3) IN ('G45','G46')
                OR 
                SUBSTR(icd10_code, 1, 3) BETWEEN 'I60' AND 'I69'
                OR
                SUBSTR(icd10_code, 1, 4) = 'H340'
                THEN 1 
                ELSE 0 END) AS cerebrovascular_disease

            -- Dementia
            , MAX(CASE WHEN 
                SUBSTR(icd9_code, 1, 3) = '290'
                OR
                SUBSTR(icd9_code, 1, 4) IN ('2941','3312')
                OR
                SUBSTR(icd10_code, 1, 3) IN ('F00','F01','F02','F03','G30')
                OR
                SUBSTR(icd10_code, 1, 4) IN ('F051','G311')
                THEN 1 
                ELSE 0 END) AS dementia

            -- Chronic pulmonary disease
            , MAX(CASE WHEN 
                SUBSTR(icd9_code, 1, 3) BETWEEN '490' AND '505'
                OR
                SUBSTR(icd9_code, 1, 4) IN ('4168','4169','5064','5081','5088')
                OR 
                SUBSTR(icd10_code, 1, 3) BETWEEN 'J40' AND 'J47'
                OR 
                SUBSTR(icd10_code, 1, 3) BETWEEN 'J60' AND 'J67'
                OR
                SUBSTR(icd10_code, 1, 4) IN ('I278','I279','J684','J701','J703')
                THEN 1 
                ELSE 0 END) AS chronic_pulmonary_disease

            -- Rheumatic disease
            , MAX(CASE WHEN 
                SUBSTR(icd9_code, 1, 3) = '725'
                OR
                SUBSTR(icd9_code, 1, 4) IN ('4465','7100','7101','7102','7103',
                                                      '7104','7140','7141','7142','7148')
                OR
                SUBSTR(icd10_code, 1, 3) IN ('M05','M06','M32','M33','M34')
                OR
                SUBSTR(icd10_code, 1, 4) IN ('M315','M351','M353','M360')
                THEN 1 
                ELSE 0 END) AS rheumatic_disease

            -- Peptic ulcer disease
            , MAX(CASE WHEN 
                SUBSTR(icd9_code, 1, 3) IN ('531','532','533','534')
                OR
                SUBSTR(icd10_code, 1, 3) IN ('K25','K26','K27','K28')
                THEN 1 
                ELSE 0 END) AS peptic_ulcer_disease

            -- Mild liver disease
            , MAX(CASE WHEN 
                SUBSTR(icd9_code, 1, 3) IN ('570','571')
                OR
                SUBSTR(icd9_code, 1, 4) IN ('0706','0709','5733','5734','5738','5739','V427')
                OR
                SUBSTR(icd9_code, 1, 5) IN ('07022','07023','07032','07033','07044','07054')
                OR
                SUBSTR(icd10_code, 1, 3) IN ('B18','K73','K74')
                OR
                SUBSTR(icd10_code, 1, 4) IN ('K700','K701','K702','K703','K709','K713',
                                                       'K714','K715','K717','K760','K762',
                                                       'K763','K764','K768','K769','Z944')
                THEN 1 
                ELSE 0 END) AS mild_liver_disease

            -- Diabetes without chronic complication
            , MAX(CASE WHEN 
                SUBSTR(icd9_code, 1, 4) IN ('2500','2501','2502','2503','2508','2509') 
                OR
                SUBSTR(icd10_code, 1, 4) IN ('E100','E10l','E106','E108','E109','E110','E111',
                                                       'E116','E118','E119','E120','E121','E126','E128',
                                                       'E129','E130','E131','E136','E138','E139','E140',
                                                       'E141','E146','E148','E149')
                THEN 1 
                ELSE 0 END) AS diabetes_without_cc

            -- Diabetes with chronic complication
            , MAX(CASE WHEN 
                SUBSTR(icd9_code, 1, 4) IN ('2504','2505','2506','2507')
                OR
                SUBSTR(icd10_code, 1, 4) IN ('E102','E103','E104','E105','E107','E112','E113',
                                                       'E114','E115','E117','E122','E123','E124','E125',
                                                       'E127','E132','E133','E134','E135','E137','E142',
                                                       'E143','E144','E145','E147')
                THEN 1 
                ELSE 0 END) AS diabetes_with_cc

            -- Hemiplegia or paraplegia
            , MAX(CASE WHEN 
                SUBSTR(icd9_code, 1, 3) IN ('342','343')
                OR
                SUBSTR(icd9_code, 1, 4) IN ('3341','3440','3441','3442',
                                                      '3443','3444','3445','3446','3449')
                OR 
                SUBSTR(icd10_code, 1, 3) IN ('G81','G82')
                OR 
                SUBSTR(icd10_code, 1, 4) IN ('G041','G114','G801','G802','G830',
                                                       'G831','G832','G833','G834','G839')
                THEN 1 
                ELSE 0 END) AS paraplegia

            -- Renal disease
            , MAX(CASE WHEN 
                SUBSTR(icd9_code, 1, 3) IN ('582','585','586','V56')
                OR
                SUBSTR(icd9_code, 1, 4) IN ('5880','V420','V451')
                OR
                SUBSTR(icd9_code, 1, 4) BETWEEN '5830' AND '5837'
                OR
                SUBSTR(icd9_code, 1, 5) IN ('40301','40311','40391','40402','40403','40412','40413','40492','40493')          
                OR
                SUBSTR(icd10_code, 1, 3) IN ('N18','N19')
                OR
                SUBSTR(icd10_code, 1, 4) IN ('I120','I131','N032','N033','N034',
                                                       'N035','N036','N037','N052','N053',
                                                       'N054','N055','N056','N057','N250',
                                                       'Z490','Z491','Z492','Z940','Z992')
                THEN 1 
                ELSE 0 END) AS renal_disease

            -- Any malignancy, including lymphoma and leukemia, except malignant neoplasm of skin
            , MAX(CASE WHEN 
                SUBSTR(icd9_code, 1, 3) BETWEEN '140' AND '172'
                OR
                SUBSTR(icd9_code, 1, 4) BETWEEN '1740' AND '1958'
                OR
                SUBSTR(icd9_code, 1, 3) BETWEEN '200' AND '208'
                OR
                SUBSTR(icd9_code, 1, 4) = '2386'
                OR
                SUBSTR(icd10_code, 1, 3) IN ('C43','C88')
                OR
                SUBSTR(icd10_code, 1, 3) BETWEEN 'C00' AND 'C26'
                OR
                SUBSTR(icd10_code, 1, 3) BETWEEN 'C30' AND 'C34'
                OR
                SUBSTR(icd10_code, 1, 3) BETWEEN 'C37' AND 'C41'
                OR
                SUBSTR(icd10_code, 1, 3) BETWEEN 'C45' AND 'C58'
                OR
                SUBSTR(icd10_code, 1, 3) BETWEEN 'C60' AND 'C76'
                OR
                SUBSTR(icd10_code, 1, 3) BETWEEN 'C81' AND 'C85'
                OR
                SUBSTR(icd10_code, 1, 3) BETWEEN 'C90' AND 'C97'
                THEN 1 
                ELSE 0 END) AS malignant_cancer

            -- Moderate or severe liver disease
            , MAX(CASE WHEN 
                SUBSTR(icd9_code, 1, 4) IN ('4560','4561','4562')
                OR
                SUBSTR(icd9_code, 1, 4) BETWEEN '5722' AND '5728'
                OR
                SUBSTR(icd10_code, 1, 4) IN ('I850','I859','I864','I982','K704','K711',
                                                       'K721','K729','K765','K766','K767')
                THEN 1 
                ELSE 0 END) AS severe_liver_disease

            -- Metastatic solid tumor
            , MAX(CASE WHEN 
                SUBSTR(icd9_code, 1, 3) IN ('196','197','198','199')
                OR 
                SUBSTR(icd10_code, 1, 3) IN ('C77','C78','C79','C80')
                THEN 1 
                ELSE 0 END) AS metastatic_solid_tumor

            -- AIDS/HIV
            , MAX(CASE WHEN 
                SUBSTR(icd9_code, 1, 3) IN ('042','043','044')
                OR 
                SUBSTR(icd10_code, 1, 3) IN ('B20','B21','B22','B24')
                THEN 1 
                ELSE 0 END) AS aids
        FROM mimic_core.admissions ad
        LEFT JOIN diag
        ON ad.hadm_id = diag.hadm_id
        GROUP BY ad.hadm_id
    )
    , ag AS
    (
        SELECT 
            hadm_id
            , age
            , CASE WHEN age <= 40 THEN 0
        WHEN age <= 50 THEN 1
        WHEN age <= 60 THEN 2
        WHEN age <= 70 THEN 3
        ELSE 4 END AS age_score
        FROM mimic_derived.age
    )
    SELECT 
        ad.subject_id
        , ad.hadm_id
        , ag.age_score
        , myocardial_infarct
        , congestive_heart_failure
        , peripheral_vascular_disease
        , cerebrovascular_disease
        , dementia
        , chronic_pulmonary_disease
        , rheumatic_disease
        , peptic_ulcer_disease
        , mild_liver_disease
        , diabetes_without_cc
        , diabetes_with_cc
        , paraplegia
        , renal_disease
        , malignant_cancer
        , severe_liver_disease 
        , metastatic_solid_tumor 
        , aids
        -- Calculate the Charlson Comorbidity Score using the original
        -- weights from Charlson, 1987.
        , age_score
        + myocardial_infarct + congestive_heart_failure + peripheral_vascular_disease
        + cerebrovascular_disease + dementia + chronic_pulmonary_disease
        + rheumatic_disease + peptic_ulcer_disease
        + GREATEST(mild_liver_disease, 3*severe_liver_disease)
        + GREATEST(2*diabetes_with_cc, diabetes_without_cc)
        + GREATEST(2*malignant_cancer, 6*metastatic_solid_tumor)
        + 2*paraplegia + 2*renal_disease 
        + 6*aids
        AS charlson_comorbidity_index
    FROM mimic_core.admissions ad
    LEFT JOIN com
    ON ad.hadm_id = com.hadm_id
    LEFT JOIN ag
    ON com.hadm_id = ag.hadm_id

    ;
    '''

    cci = pd.read_sql(query, con)

Wall time: 2min 5s


In [10]:
if mode == 'query':
    with open('cci_score.pkl', 'wb') as f:
        pkl.dump(cci, f)
elif mode == 'load':
    with open('cci_score.pkl', 'rb') as f:
        cci = pkl.load(f)

# SOFA

In [56]:
query = '''
WITH co AS
(
  select ih.stay_id, ie.hadm_id
  , hr
  -- start/endtime can be used to filter to values within this hour
  , DATETIME_SUB(ih.endtime, INTERVAL '1' HOUR) AS starttime
  , ih.endtime
  from mimic_derived.icustay_hourly ih
  INNER JOIN mimic_icu.icustays ie
    ON ih.stay_id = ie.stay_id
)
, pafi as
(
  -- join blood gas to ventilation durations to determine if patient was vent
  select ie.stay_id
  , bg.charttime
  -- because pafi has an interaction between vent/PaO2:FiO2, we need two columns for the score
  -- it can happen that the lowest unventilated PaO2/FiO2 is 68, but the lowest ventilated PaO2/FiO2 is 120
  -- in this case, the SOFA score is 3, *not* 4.
  , case when vd.stay_id is null then pao2fio2ratio else null end pao2fio2ratio_novent
  , case when vd.stay_id is not null then pao2fio2ratio else null end pao2fio2ratio_vent
  FROM mimic_icu.icustays ie
  inner join mimic_derived.bg bg
    on ie.subject_id = bg.subject_id
  left join mimic_derived.ventilator_durations vd
    on ie.stay_id = vd.stay_id
    and bg.charttime >= vd.starttime
    and bg.charttime <= vd.endtime
  WHERE specimen_pred = 'ART.'
)
, vs AS
(
    
  select co.stay_id, co.hr
  -- vitals
  , min(vs.mbp) as meanbp_min
  from co
  left join mimic_derived.vitalsign vs
    on co.stay_id = vs.stay_id
    and co.starttime < vs.charttime
    and co.endtime >= vs.charttime
  group by co.stay_id, co.hr
)
, gcs AS
(
  select co.stay_id, co.hr
  -- gcs
  , min(gcs.gcs) as gcs_min
  from co
  left join mimic_derived.gcs gcs
    on co.stay_id = gcs.stay_id
    and co.starttime < gcs.charttime
    and co.endtime >= gcs.charttime
  group by co.stay_id, co.hr
)
, bili AS
(
  select co.stay_id, co.hr
  , max(enz.bilirubin_total) as bilirubin_max
  from co
  left join mimic_derived.enzyme enz
    on co.hadm_id = enz.hadm_id
    and co.starttime < enz.charttime
    and co.endtime >= enz.charttime
  group by co.stay_id, co.hr
)
, cr AS
(
  select co.stay_id, co.hr
  , max(chem.creatinine) as creatinine_max
  from co
  left join mimic_derived.chemistry chem
    on co.hadm_id = chem.hadm_id
    and co.starttime < chem.charttime
    and co.endtime >= chem.charttime
  group by co.stay_id, co.hr
)
, plt AS
(
  select co.stay_id, co.hr
  , min(cbc.platelet) as platelet_min
  from co
  left join mimic_derived.complete_blood_count cbc
    on co.hadm_id = cbc.hadm_id
    and co.starttime < cbc.charttime
    and co.endtime >= cbc.charttime
  group by co.stay_id, co.hr
)
, pf AS
(
  select co.stay_id, co.hr
  , min(pafi.pao2fio2ratio_novent) AS pao2fio2ratio_novent
  , min(pafi.pao2fio2ratio_vent) AS pao2fio2ratio_vent
  from co
  -- bring in blood gases that occurred during this hour
  left join pafi
    on co.stay_id = pafi.stay_id
    and co.starttime < pafi.charttime
    and co.endtime  >= pafi.charttime
  group by co.stay_id, co.hr
)
-- sum uo separately to prevent duplicating values
, uo as
(
  select co.stay_id, co.hr
  -- uo
  , MAX(
      CASE WHEN uo.uo_tm_24hr >= 22 AND uo.uo_tm_24hr <= 30
          THEN uo.urineoutput_24hr / uo.uo_tm_24hr * 24
  END) as uo_24hr
  from co
  left join mimic_derived.urine_output_rate uo
    on co.stay_id = uo.stay_id
    and co.starttime < uo.charttime
    and co.endtime >= uo.charttime
  group by co.stay_id, co.hr
)
-- collapse vasopressors into 1 row per hour
-- also ensures only 1 row per chart time
, vaso AS
(
    SELECT 
        co.stay_id
        , co.hr
        , MAX(epi.vaso_rate) as rate_epinephrine
        , MAX(nor.vaso_rate) as rate_norepinephrine
        , MAX(dop.vaso_rate) as rate_dopamine
        , MAX(dob.vaso_rate) as rate_dobutamine
    FROM co
    LEFT JOIN mimic_derived.epinephrine epi
        on co.stay_id = epi.stay_id
        and co.endtime > epi.starttime
        and co.endtime <= epi.endtime
    LEFT JOIN mimic_derived.norepinephrine nor
        on co.stay_id = nor.stay_id
        and co.endtime > nor.starttime
        and co.endtime <= nor.endtime
    LEFT JOIN mimic_derived.dopamine dop
        on co.stay_id = dop.stay_id
        and co.endtime > dop.starttime
        and co.endtime <= dop.endtime
    LEFT JOIN mimic_derived.dobutamine dob
        on co.stay_id = dob.stay_id
        and co.endtime > dob.starttime
        and co.endtime <= dob.endtime
    WHERE epi.stay_id IS NOT NULL
    OR nor.stay_id IS NOT NULL
    OR dop.stay_id IS NOT NULL
    OR dob.stay_id IS NOT NULL
    GROUP BY co.stay_id, co.hr
)
, scorecomp as
(
  select
      co.stay_id
    , co.hr
    , co.starttime, co.endtime
    , pf.pao2fio2ratio_novent
    , pf.pao2fio2ratio_vent
    , vaso.rate_epinephrine
    , vaso.rate_norepinephrine
    , vaso.rate_dopamine
    , vaso.rate_dobutamine
    , vs.meanbp_min
    , gcs.gcs_min
    -- uo
    , uo.uo_24hr
    -- labs
    , bili.bilirubin_max
    , cr.creatinine_max
    , plt.platelet_min
  from co
  left join vs
    on co.stay_id = vs.stay_id
    and co.hr = vs.hr
  left join gcs
    on co.stay_id = gcs.stay_id
    and co.hr = gcs.hr
  left join bili
    on co.stay_id = bili.stay_id
    and co.hr = bili.hr
  left join cr
    on co.stay_id = cr.stay_id
    and co.hr = cr.hr
  left join plt
    on co.stay_id = plt.stay_id
    and co.hr = plt.hr
  left join pf
    on co.stay_id = pf.stay_id
    and co.hr = pf.hr
  left join uo
    on co.stay_id = uo.stay_id
    and co.hr = uo.hr
  left join vaso
    on co.stay_id = vaso.stay_id
    and co.hr = vaso.hr
)
, scorecalc as
(
  -- Calculate the final score
  -- note that if the underlying data is missing, the component is null
  -- eventually these are treated as 0 (normal), but knowing when data is missing is useful for debugging
  select scorecomp.*
  -- Respiration
  , case
      when pao2fio2ratio_vent   < 100 then 4
      when pao2fio2ratio_vent   < 200 then 3
      when pao2fio2ratio_novent < 300 then 2
      when pao2fio2ratio_vent   < 300 then 2
      when pao2fio2ratio_novent < 400 then 1
      when pao2fio2ratio_vent   < 400 then 1
      when coalesce(pao2fio2ratio_vent, pao2fio2ratio_novent) is null then null
      else 0
    end as respiration

  -- Coagulation
  , case
      when platelet_min < 20  then 4
      when platelet_min < 50  then 3
      when platelet_min < 100 then 2
      when platelet_min < 150 then 1
      when platelet_min is null then null
      else 0
    end as coagulation

  -- Liver
  , case
      -- Bilirubin checks in mg/dL
        when bilirubin_max >= 12.0 then 4
        when bilirubin_max >= 6.0  then 3
        when bilirubin_max >= 2.0  then 2
        when bilirubin_max >= 1.2  then 1
        when bilirubin_max is null then null
        else 0
      end as liver

  -- Cardiovascular
  , case
      when rate_dopamine > 15 or rate_epinephrine >  0.1 or rate_norepinephrine >  0.1 then 4
      when rate_dopamine >  5 or rate_epinephrine <= 0.1 or rate_norepinephrine <= 0.1 then 3
      when rate_dopamine >  0 or rate_dobutamine > 0 then 2
      when meanbp_min < 70 then 1
      when coalesce(meanbp_min, rate_dopamine, rate_dobutamine, rate_epinephrine, rate_norepinephrine) is null then null
      else 0
    end as cardiovascular

  -- Neurological failure (GCS)
  , case
      when (gcs_min >= 13 and gcs_min <= 14) then 1
      when (gcs_min >= 10 and gcs_min <= 12) then 2
      when (gcs_min >=  6 and gcs_min <=  9) then 3
      when  gcs_min <   6 then 4
      when  gcs_min is null then null
      else 0
    end as cns

  -- Renal failure - high creatinine or low urine output
  , case
    when (creatinine_max >= 5.0) then 4
    when uo_24hr < 200 then 4
    when (creatinine_max >= 3.5 and creatinine_max < 5.0) then 3
    when uo_24hr < 500 then 3
    when (creatinine_max >= 2.0 and creatinine_max < 3.5) then 2
    when (creatinine_max >= 1.2 and creatinine_max < 2.0) then 1
    when coalesce (uo_24hr, creatinine_max) is null then null
    else 0 
  end as renal
  from scorecomp
)
, score_final as
(
  select s.*
    -- Combine all the scores to get SOFA
    -- Impute 0 if the score is missing
   -- the window function takes the max over the last 24 hours
    , coalesce(
        MAX(respiration) OVER (PARTITION BY stay_id ORDER BY HR
        ROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING)
      ,0) as respiration_24hours
     , coalesce(
         MAX(coagulation) OVER (PARTITION BY stay_id ORDER BY HR
         ROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING)
        ,0) as coagulation_24hours
    , coalesce(
        MAX(liver) OVER (PARTITION BY stay_id ORDER BY HR
        ROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING)
      ,0) as liver_24hours
    , coalesce(
        MAX(cardiovascular) OVER (PARTITION BY stay_id ORDER BY HR
        ROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING)
      ,0) as cardiovascular_24hours
    , coalesce(
        MAX(cns) OVER (PARTITION BY stay_id ORDER BY HR
        ROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING)
      ,0) as cns_24hours
    , coalesce(
        MAX(renal) OVER (PARTITION BY stay_id ORDER BY HR
        ROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING)
      ,0) as renal_24hours

    -- sum together data for final SOFA
    , coalesce(
        MAX(respiration) OVER (PARTITION BY stay_id ORDER BY HR
        ROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING)
      ,0)
     + coalesce(
         MAX(coagulation) OVER (PARTITION BY stay_id ORDER BY HR
         ROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING)
      ,0)
     + coalesce(
        MAX(liver) OVER (PARTITION BY stay_id ORDER BY HR
        ROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING)
      ,0)
     + coalesce(
        MAX(cardiovascular) OVER (PARTITION BY stay_id ORDER BY HR
        ROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING)
      ,0)
     + coalesce(
        MAX(cns) OVER (PARTITION BY stay_id ORDER BY HR
        ROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING)
      ,0)
     + coalesce(
        MAX(renal) OVER (PARTITION BY stay_id ORDER BY HR
        ROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING)
      ,0)
    as sofa_24hours
  from scorecalc s
  WINDOW W as
  (
    PARTITION BY stay_id
    ORDER BY hr
    ROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING
  )
)
select * from score_final
where hr >= 0;
'''

sofa = pd.read_sql(query, con)

In [57]:
if mode == 'query':
    with open('sofa_score.pkl', 'wb') as f:
        pkl.dump(sofa, f)
elif mode == 'load':
    with open('sofa_score.pkl', 'rb') as f:
        sofa = pkl.load(f)

In [59]:
sofa.head()

Unnamed: 0,stay_id,hr,starttime,endtime,pao2fio2ratio_novent,pao2fio2ratio_vent,rate_epinephrine,rate_norepinephrine,rate_dopamine,rate_dobutamine,meanbp_min,gcs_min,uo_24hr,bilirubin_max,creatinine_max,platelet_min,respiration,coagulation,liver,cardiovascular,cns,renal,respiration_24hours,coagulation_24hours,liver_24hours,cardiovascular_24hours,cns_24hours,renal_24hours,sofa_24hours
0,30000010,0,2166-12-20 12:00:00,2166-12-20 13:00:00,,91.0,,,,,50.0,3.0,,,4.6,209.0,4.0,0.0,,1.0,4.0,3.0,4,0,0,1,4,4,13
1,30000010,1,2166-12-20 13:00:00,2166-12-20 14:00:00,,285.0,,,,,54.0,,,,,,2.0,,,1.0,,,4,0,0,1,4,4,13
2,30000010,2,2166-12-20 14:00:00,2166-12-20 15:00:00,,,,,,,71.0,,,,,,,,,0.0,,,4,0,0,1,4,4,13
3,30000010,3,2166-12-20 15:00:00,2166-12-20 16:00:00,,230.0,,,,,58.0,,,,,,2.0,,,1.0,,,4,0,0,1,4,4,13
4,30000010,4,2166-12-20 16:00:00,2166-12-20 17:00:00,,,,,,,74.0,,,,,,,,,0.0,,,4,0,0,1,4,4,13


In [62]:
sofa['date'] = sofa.starttime.dt.date

In [64]:
## worst and variability of sofa per 24h
sofa.groupby(['stay_id','date']).agg({'sofa_24hours':['max','std']}) #worst and std of sofa 24h per day

Unnamed: 0_level_0,Unnamed: 1_level_0,sofa_24hours,sofa_24hours
Unnamed: 0_level_1,Unnamed: 1_level_1,max,std
stay_id,date,Unnamed: 2_level_2,Unnamed: 3_level_2
30000010,2166-12-20,13,0.000000
30000010,2166-12-21,13,3.378320
30000010,2166-12-22,7,0.494535
30000010,2166-12-23,8,1.020621
30000010,2166-12-24,8,0.000000
...,...,...,...
39998606,2136-08-14,2,0.000000
39998664,2120-05-26,1,0.414039
39998706,2130-09-21,7,1.000000
39998706,2130-09-22,7,0.332106


In [65]:
## worst and average of sofa for stay
sofa.groupby(['stay_id']).agg({'sofa_24hours':['max','mean']})

Unnamed: 0_level_0,sofa_24hours,sofa_24hours
Unnamed: 0_level_1,max,mean
stay_id,Unnamed: 1_level_2,Unnamed: 2_level_2
30000010,13,7.661442
30000186,5,4.535714
30000575,3,2.790698
30000670,6,1.437500
30000974,6,2.305211
...,...,...
39998269,17,9.960920
39998606,2,1.636364
39998664,1,0.800000
39998706,7,6.692308


# Merge the dataframes together

In [114]:
### making sure we only have index icu stays

In [115]:
num_stay = hosp_icu.groupby('hadm_id').agg({'stay_id':'nunique'}).reset_index()

In [116]:
num_stay[num_stay['stay_id']>1] 

Unnamed: 0,hadm_id,stay_id


## Merge

In [126]:
master = pd.merge(pd.merge(hosp_icu, vsvd1, on=['subject_id','stay_id']), cci, on=['subject_id','hadm_id'])

In [127]:
## create label
master['icu_disch_los'] = (master['dischtime'] - master['outtime']).dt.days

In [128]:
master.head(10)

Unnamed: 0,gender,anchor_age,dod,admittime,dischtime,deathtime,admission_type,admission_location,discharge_location,insurance,edregtime,edouttime,hospital_expire_flag,subject_id,hadm_id,stay_id,first_careunit,last_careunit,intime,outtime,icu_los,hosp_los,vent_duration,age_score,myocardial_infarct,congestive_heart_failure,peripheral_vascular_disease,cerebrovascular_disease,dementia,chronic_pulmonary_disease,rheumatic_disease,peptic_ulcer_disease,mild_liver_disease,diabetes_without_cc,diabetes_with_cc,paraplegia,renal_disease,malignant_cancer,severe_liver_disease,metastatic_solid_tumor,aids,charlson_comorbidity_index,icu_disch_los
0,M,72,,2158-02-13 04:59:00,2158-02-26 14:45:00,,EW EMER.,EMERGENCY ROOM,REHAB,Medicare,2158-02-13 02:15:00,2158-02-13 03:24:00,0,19836972,20014283,32496266,Coronary Care Unit (CCU),Cardiac Vascular Intensive Care Unit (CVICU),2158-02-13 04:59:54,2158-02-15 18:54:41,2.579711,13.0,1.0,4,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,10
1,F,81,,2142-04-09 20:06:00,2142-04-15 16:47:00,,EW EMER.,PROCEDURE SITE,SKILLED NURSING FACILITY,Medicare,NaT,NaT,0,16651226,20026217,35802155,Medical/Surgical Intensive Care Unit (MICU/SICU),Medical/Surgical Intensive Care Unit (MICU/SICU),2142-04-10 14:21:57,2142-04-11 21:08:28,1.282303,5.0,1.0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,3
2,M,38,,2145-02-07 21:02:00,2145-02-14 16:08:00,,EW EMER.,EMERGENCY ROOM,SKILLED NURSING FACILITY,Other,2145-02-07 18:31:00,2145-02-07 22:00:00,0,10291458,20046372,37588913,Trauma SICU (TSICU),Trauma SICU (TSICU),2145-02-07 22:00:00,2145-02-11 11:06:39,3.546285,6.0,1.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3
3,F,23,,2173-05-19 23:58:00,2173-05-23 14:00:00,,EW EMER.,EMERGENCY ROOM,HOME,Other,2173-05-19 20:23:00,2173-05-20 02:42:00,0,10922424,20057730,34246418,Trauma SICU (TSICU),Trauma SICU (TSICU),2173-05-20 02:42:00,2173-05-21 00:10:29,0.89478,3.0,1.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2
4,F,82,,2164-09-01 23:14:00,2164-09-04 17:30:00,,EW EMER.,EMERGENCY ROOM,SKILLED NURSING FACILITY,Other,2164-09-01 16:51:00,2164-09-02 01:05:00,0,11549236,20060563,36714996,Surgical Intensive Care Unit (SICU),Surgical Intensive Care Unit (SICU),2164-09-02 01:05:00,2164-09-02 19:29:36,0.767083,2.0,1.0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,1
5,M,64,,2170-10-11 17:41:00,2170-10-20 15:00:00,,URGENT,TRANSFER FROM HOSPITAL,HOME HEALTH CARE,Other,NaT,NaT,0,16084174,20098814,31904152,Cardiac Vascular Intensive Care Unit (CVICU),Cardiac Vascular Intensive Care Unit (CVICU),2170-10-16 11:12:09,2170-10-18 09:46:22,1.940428,8.0,1.0,3,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,5,2
6,M,55,,2145-06-29 01:45:00,2145-07-13 16:55:00,,EW EMER.,EMERGENCY ROOM,CHRONIC/LONG TERM ACUTE CARE,Other,2145-06-28 23:46:00,2145-06-29 02:45:00,0,16053405,20112244,37340726,Surgical Intensive Care Unit (SICU),Surgical Intensive Care Unit (SICU),2145-06-29 02:45:00,2145-07-10 18:24:50,11.652662,14.0,3.0,3,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,7,2
7,M,52,,2154-06-13 07:15:00,2154-06-22 17:56:00,,ELECTIVE,PHYSICIAN REFERRAL,HOME,Other,NaT,NaT,0,11695285,20112353,34827181,Trauma SICU (TSICU),Trauma SICU (TSICU),2154-06-13 17:12:19,2154-06-17 16:30:36,3.97103,9.0,1.0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,5
8,M,77,,2163-10-20 13:04:00,2163-10-24 17:32:00,,EW EMER.,EMERGENCY ROOM,HOME HEALTH CARE,Other,2163-10-20 12:30:00,2163-10-20 14:37:00,0,15505091,20114592,30311281,Medical Intensive Care Unit (MICU),Medical Intensive Care Unit (MICU),2163-10-20 14:37:00,2163-10-22 20:32:15,2.246701,4.0,1.0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,1
9,M,56,,2148-08-18 14:00:00,2148-08-23 19:03:00,,ELECTIVE,PHYSICIAN REFERRAL,HOME HEALTH CARE,Other,NaT,NaT,0,10161185,20117568,33262844,Cardiac Vascular Intensive Care Unit (CVICU),Cardiac Vascular Intensive Care Unit (CVICU),2148-08-19 10:32:08,2148-08-20 15:27:57,1.205428,5.0,1.0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3


In [130]:
master.shape

(3980, 43)

In [6]:
%%time
# query = 'SELECT * FROM admissions limit 500'

# query = '''
# SELECT *
# FROM mimic_core.patients pat
# JOIN mimic_core.admissions adm on pat.subject_id = adm.subject_id
# JOIN mimic_icu.icustays icu on adm.subject_id = icu.subject_id and adm.hadm_id = icu.hadm_id
# JOIN (select hadm_id  , min(intime) as min_intime FROM mimic_icu.icustays GROUP BY hadm_id ) first_ on icu.hadm_id = first_.hadm_id and icu.intime = first_.min_intime
# --on adm.hadm_id = first_icu.hadm_id
# WHERE  pat.anchor_age >= 18
# AND adm.hospital_expire_flag = 0
# AND icu.stay_id = 30000010
# AND icu.subject_id in (select subject_id from mimic_derived.ventilator_setting where extubated = 1 )
# limit 1000
# '''

# query = '''
# select

# a.stay_id, a.charttime, a.storetime, a.value as device , b.value as mode ,b.valuenum
# from

# (
# select  stay_id,charttime,storetime,value,valuenum from chartevents 
# where itemid  = '223848' 
# and lower(value) in ('drager','avea','pb 7200','sensor medic (hfo)','hamilton')

# ) a

# join
# (
#  select  stay_id,charttime,storetime,value,valuenum from chartevents
# where itemid = '223849'

# ) b

# on a.stay_id = b.stay_id and a.charttime = b.charttime

# '''

# vent = pd.read_sql(query, con)
