<a href="https://colab.research.google.com/github/johnsonjzhou/comp90089-project/blob/main/sql/initial_cohort_final_updated.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **COMP90089 Final Project**

## **Selecting Initial Patient Cohort from MIMICIV**

### **Set up the environemnt**

In [28]:
from google.colab import drive

drive.mount('/content/drive')
path = '/content/drive/My Drive'

KeyError: 'CLOUDSDK_CONFIG'

In [16]:
!pip install -U pandasql



In [22]:
#Set up the environement

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import functools as ft
from pandasql import sqldf
%matplotlib inline


#Project_ID
project_id = "model-myth-358803"

# Read data from BigQuery into pandas dataframes.
def run_query(query, project_id=project_id):
    return pd.io.gbq.read_gbq(
        query,
        project_id=project_id,
        dialect='standard')


### **SQL queries**

#### **1. Queries for finding the patients with criteria:**
 * n_stay >= 1
 * sum LoS >= 3.3 
 * Age between 18 and 90
 * Date of Death is undefined 

In [23]:
#Select patients from ICU_stays based on: n_stay >= 1 and sum los >= 3.3 per hadm_id:

#module: mimiciv-icu
#table: icustays

c1= f"""
      SELECT
        icustays.subject_id As subject_id,
        icustays.hadm_id As hadm_id,
        count(icustays.stay_id) As n_stays,
      FROM
      `physionet-data.mimiciv_icu.icustays` AS icustays
      GROUP BY
        subject_id,	hadm_id
      HAVING 
        n_stays >= 1 AND 
      ORDER BY
        subject_id,	hadm_id
      """

In [24]:
#Select stay_ids for cohort in c1

c2 = f"""
    SELECT DISTINCT
           c1.subject_id,
           c1.hadm_id As hadm_id,
           icustays.stay_id As stay_id, 
           c1.n_stays,
          FROM ({c1}) As c1
          INNER JOIN `physionet-data.mimiciv_icu.icustays` AS icustays
            ON c1.hadm_id = icustays.hadm_id
"""

In [25]:
#Filter the cohort patients from c1 to include only patinets with age range (18 - 90) and survival status (dod is null):
#module: mimiciv-icu
#table: patients

c3 = f"""
     SELECT DISTINCT 
            c2.*,
            patients.anchor_age As anchor_age,
            patients.gender As gender
            FROM ({c2}) As c2
            INNER JOIN `physionet-data.mimiciv_hosp.patients` As patients
            ON c2.subject_id = patients.subject_id AND
               (patients.anchor_age BETWEEN 18 AND 90) AND
               (patients.dod is null)
               
""" 

In [26]:
cohort_c = run_query(f"""
            SELECT c3.*
            FROM ({c3}) As c3
""")

GenericGBQException: Reason: 400 Syntax error: Unexpected keyword ORDER at [25:7]

Location: US
Job ID: f93710d7-4ae6-4a1d-abdf-3ca786b6ff32


In [8]:
cohort_c.to_csv('/content/drive/MyDrive/cohort_criteria_data.csv', index=False)

In [9]:
#ICU_intime for all cohort stay_ids

icu_intime = f"""
      SELECT  c3.subject_id,
              c3.stay_id,
             icustays.intime As intime
      FROM ({c3}) As c3
      INNER JOIN `physionet-data.mimiciv_icu.icustays` AS icustays
        ON c3.stay_id = icustays.stay_id
      """

In [10]:
#Desired cohort subject_ids, hadm_id, and stay_ids 
cohort_subject_ids = list(cohort_c['subject_id'].unique())
cohort_stay_ids = list(cohort_c['stay_id'].unique())
cohort_hadm_ids = list(cohort_c['hadm_id'].unique())

#### **2. Queries for selecting demographic features for the cohort:** 

In [12]:
##Demographic_1
# Presence of infection

#1. Find antibiotic amount, starttime, endtime for each patient within our cohort within the first 24hrs: 

antibiotic_q = f"""

                 SELECT DISTINCT input_antibiotic.subject_id As subject_id,
                 input_antibiotic.hadm_id As hadm_id,
                 input_antibiotic.stay_id As stay_id,
                 input_antibiotic.starttime As anti_starttime,
                 input_antibiotic.endtime As anti_endtime,
                 input_antibiotic.amount As antibiotic,
                 icu_intime.intime As icu_intime
                  FROM ({icu_intime}) As icu_intime
                   INNER JOIN `physionet-data.mimiciv_icu.inputevents` As input_antibiotic
                   ON icu_intime.stay_id = input_antibiotic.stay_id AND
                       --input_antibiotic.starttime <= DATETIME_ADD(icu_intime.intime, INTERVAL '1' DAY) AND
                        --input_antibiotic.endtime <= DATETIME_ADD(icu_intime.intime, INTERVAL '1' DAY) AND
                      input_antibiotic.itemid = 225798   
                    ORDER BY anti_endtime ASC                      
"""


In [13]:
#ref code: https://github.com/MIT-LCP/mimic-iv/blob/master/concepts/sepsis/suspicion_of_infection.sql

suspicion_infec_q = f"""

WITH ab_tbl AS 
(
  select
      abx.subject_id, abx.hadm_id, abx.stay_id
    , abx.antibiotic
    , FIRST_VALUE(abx.anti_starttime) over (partition by stay_id order by abx.anti_starttime asc) AS antibiotic_time
    -- date is used to match microbiology cultures with only date available
    , DATETIME_TRUNC(FIRST_VALUE(abx.anti_starttime) over (partition by stay_id order by abx.anti_starttime asc), DAY) AS antibiotic_date
    , FIRST_VALUE(abx.anti_endtime) over (partition by stay_id order by abx.anti_endtime DESC) AS anti_endtime
    -- create a unique identifier for each patient antibiotic
    , ROW_NUMBER() OVER
    (
      PARTITION BY subject_id
      ORDER BY anti_starttime, anti_endtime, antibiotic
    ) AS ab_id
  from ({antibiotic_q}) As abx
)
, me as
(
  select micro_specimen_id
    -- the following columns are identical for all rows of the same micro_specimen_id
    -- these aggregates simply collapse duplicates down to 1 row
    , MAX(subject_id) AS subject_id
    , MAX(hadm_id) AS hadm_id
    , CAST(MAX(chartdate) AS DATE) AS chartdate
    , MAX(charttime) AS charttime
    , MAX(spec_type_desc) AS spec_type_desc
    , max(case when org_name is not null and org_name != '' then 1 else 0 end) as PositiveCulture
  from `physionet-data.mimiciv_hosp.microbiologyevents` 
  group by micro_specimen_id
)
, me_then_ab AS
(
  select
    ab_tbl.subject_id
    , ab_tbl.hadm_id
    , ab_tbl.stay_id
    , ab_tbl.ab_id
    
    , me72.micro_specimen_id
    , coalesce(me72.charttime, CAST(me72.chartdate AS DATETIME)) as last72_charttime
    , me72.positiveculture as last72_positiveculture
    , me72.spec_type_desc as last72_specimen

    -- we will use this partition to select the earliest culture before this abx
    -- this ensures each antibiotic is only matched to a single culture
    -- and consequently we have 1 row per antibiotic
    , ROW_NUMBER() OVER
    (
      PARTITION BY ab_tbl.subject_id, ab_tbl.ab_id
      ORDER BY me72.chartdate, me72.charttime NULLS LAST
    ) AS micro_seq
  from ab_tbl
  -- abx taken after culture, but no more than 72 hours after
  LEFT JOIN me me72
    on ab_tbl.subject_id = me72.subject_id
    and
    (
      (
      -- if charttime is available, use it
          me72.charttime is not null
      and ab_tbl.antibiotic_time > me72.charttime
      and ab_tbl.antibiotic_time <= DATETIME_ADD(me72.charttime, INTERVAL 72 HOUR) 
      )
      OR
      (
      -- if charttime is not available, use chartdate
          me72.charttime is null
      and antibiotic_date >= me72.chartdate
      and antibiotic_date <= DATE_ADD(me72.chartdate, INTERVAL 3 DAY)
      )
    )
)
, ab_then_me AS
(
  select
      ab_tbl.subject_id
    , ab_tbl.hadm_id
    , ab_tbl.stay_id
    , ab_tbl.ab_id
    
    , me24.micro_specimen_id
    , COALESCE(me24.charttime, CAST(me24.chartdate AS DATETIME)) as next24_charttime
    , me24.positiveculture as next24_positiveculture
    , me24.spec_type_desc as next24_specimen

    -- we will use this partition to select the earliest culture before this abx
    -- this ensures each antibiotic is only matched to a single culture
    -- and consequently we have 1 row per antibiotic
    , ROW_NUMBER() OVER
    (
      PARTITION BY ab_tbl.subject_id, ab_tbl.ab_id
      ORDER BY me24.chartdate, me24.charttime NULLS LAST
    ) AS micro_seq
  from ab_tbl
  -- culture in subsequent 24 hours
  LEFT JOIN me me24
    on ab_tbl.subject_id = me24.subject_id
    and
    (
      (
          -- if charttime is available, use it
          me24.charttime is not null
      and ab_tbl.antibiotic_time >= DATETIME_SUB(me24.charttime, INTERVAL 24 HOUR)  
      and ab_tbl.antibiotic_time < me24.charttime
      )
      OR
      (
          -- if charttime is not available, use chartdate
          me24.charttime is null
      and ab_tbl.antibiotic_date >= DATE_SUB(me24.chartdate, INTERVAL 1 DAY)
      and ab_tbl.antibiotic_date <= me24.chartdate
      )
    )
)
SELECT
ab_tbl.subject_id
, ab_tbl.stay_id
, ab_tbl.hadm_id
, ab_tbl.ab_id
, ab_tbl.antibiotic
, ab_tbl.antibiotic_time

, CASE
  WHEN last72_specimen IS NULL AND next24_specimen IS NULL
    THEN 0
  ELSE 1 
  END AS suspected_infection
-- time of suspected infection:
--    (1) the culture time (if before antibiotic)
--    (2) or the antibiotic time (if before culture)
, CASE
  WHEN last72_specimen IS NULL AND next24_specimen IS NULL
    THEN NULL
  ELSE COALESCE(last72_charttime, antibiotic_time)
  END AS suspected_infection_time

, COALESCE(last72_charttime, next24_charttime) AS culture_time

-- the specimen that was cultured
, COALESCE(last72_specimen, next24_specimen) AS specimen

-- whether the cultured specimen ended up being positive or not
, COALESCE(last72_positiveculture, next24_positiveculture) AS positive_culture

FROM ab_tbl
LEFT JOIN ab_then_me ab2me
    ON ab_tbl.subject_id = ab2me.subject_id
    AND ab_tbl.ab_id = ab2me.ab_id
    AND ab2me.micro_seq = 1
LEFT JOIN me_then_ab me2ab
    ON ab_tbl.subject_id = me2ab.subject_id
    AND ab_tbl.ab_id = me2ab.ab_id
    AND me2ab.micro_seq = 1
"""



In [14]:
#Demographic_1

# --- Presence of infection (based on input antibiotic that has itemid = 225798)
# module: mimiciv_icu, mimiciv_hosp
# Tabels: inputevents, microbiologyevents
# Presence of infection (suspected_infection = 1 or 0)


demographic_1 = run_query(f"""
    SELECT DISTINCT suspicion_infec_q.subject_id As subject_id,
           suspicion_infec_q.hadm_id As hadm_id,
           suspicion_infec_q.stay_id As stay_id,
           suspicion_infec_q.suspected_infection As suspected_infection        
    FROM ({suspicion_infec_q}) As suspicion_infec_q
       WHERE suspicion_infec_q.stay_id IN UNNEST ({cohort_stay_ids})
""")

In [15]:
demographic_1

Unnamed: 0,subject_id,hadm_id,stay_id,suspected_infection
0,17763725,29949885,37918934,1
1,18765526,24616282,39800559,1
2,19461458,27696762,30803639,1
3,14643554,28137438,36715386,1
4,18881137,20690277,30473503,1
...,...,...,...,...
7081,10888007,23296210,35406666,1
7082,11343484,29736871,39661672,1
7083,17237928,26689098,34086829,1
7084,18219834,26310730,34685718,1


In [16]:
demographic_1.to_csv('/content/drive/MyDrive/cohort_demographic_1.csv',index=False)

In [17]:
#Demographic_2
# Module: mimiciv_hosp
# Table: admissions
# Type of admission (admission_type)

#Note: Some patients have more than one admission type

demographic_2 = run_query(f"""
    SELECT  admission.subject_id As subject_id,
            admission.hadm_id As hadm_id,
            admission.admission_type As admission_type
    FROM `physionet-data.mimiciv_hosp.admissions` As admission    
""")

In [18]:
demographic_2.to_csv('/content/drive/MyDrive/cohort_demographic_2.csv',index=False)

In [19]:
##Demographic_3 item_id = 226512 

# Module: mimiciv_icu
# Table: chartevents
# Admission Weight (Kg)  (--Avergae weight)

demographic_3 = run_query(f""" SELECT
   ie.stay_id,
   AVG(ce.valuenum) AS weight
    FROM ({icu_intime}) ie
    -- admission weight
    LEFT JOIN`physionet-data.mimiciv_icu.chartevents` As ce
      ON ie.stay_id = ce.stay_id AND itemid IN (226512, 224639)
      AND ce.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY ie.stay_id
    """)
   

In [20]:
demographic_3.to_csv('/content/drive/MyDrive/cohort_demographic_3.csv',index=False)

In [21]:
##Demographic_4 item_id = 226730 

# Module: mimiciv_icu
# Table: chartevents
# Height (cm)
demographic_4 = run_query(f""" SELECT
      
      ie.stay_id,
      AVG(valuenum) as Height_chart
    FROM ({icu_intime}) ie
    -- admission height
    LEFT JOIN`physionet-data.mimiciv_icu.chartevents` As ce
      ON ie.stay_id = ce.stay_id AND itemid = 226730 
      AND ce.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY) AND 
      ce.valuenum != 0
    GROUP BY ie.stay_id 
    """)

In [22]:
demographic_4.to_csv('/content/drive/MyDrive/cohort_demographic_4.csv',index=False)

#### **3. Queries for selecting vitalsigns features for the cohort:** 

In [23]:
# Vital signs

# module: mimiciv_icu
# Tabel: chartevents

# heart_rate : itemid = 220045
# Temperature Fahrenheit: itemid = 223761
#Arterial Blood Pressure mean (ABPm): itemid = 220052
#Arterial Blood Pressure diastolic (ABPd): itemid = 220051
#Arterial Blood Pressure systolic (ABPs): itemid = 220050
#Non Invasive Blood Pressure mean (NBPm): itemid = 220181
#Non Invasive Blood Pressure diastolic (NBPd): itemid = 220180
#Non Invasive Blood Pressure systolic (NBPs): itemid = 220179
#Respiratory Rate (RR): itemid = (220210, 224690)
#SpO2: itemid =  (220277, 226253)



    # AND ce.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
    # AND ce.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)

vitalsigns = f"""
    WITH vitalsigns As (
       SELECT ie.subject_id,
           ie.stay_id,
           MIN(ce.valuenum) AS heart_rate_min,
           MAX(ce.valuenum) AS heart_rate_max
       FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_icu.chartevents` As ce
          ON ie.stay_id = ce.stay_id 
          AND  ce.itemid = 220045
          AND ce.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND ce.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
    ),
       temperature As (
           SELECT ie.subject_id,
           ie.stay_id,
           MIN(ce.valuenum) AS temperature_min,
           MAX(ce.valuenum) AS temperature_max
     FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_icu.chartevents` As ce
          ON ie.stay_id = ce.stay_id 
          AND  ce.itemid = 223761
          AND ce.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND ce.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
    ),
         ABPm As (
           SELECT ie.subject_id,
           ie.stay_id,
           MIN(ce.valuenum) AS ABPm_min,
           MAX(ce.valuenum) AS ABPm_max
     FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_icu.chartevents` As ce
          ON ie.stay_id = ce.stay_id 
          AND  ce.itemid = 220052
          AND ce.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND ce.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
     ),    
           ABPd As (
           SELECT ie.subject_id,
           ie.stay_id,
           MIN(ce.valuenum) AS ABPd_min,
           MAX(ce.valuenum) AS ABPd_max
     FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_icu.chartevents` As ce
          ON ie.stay_id = ce.stay_id 
          AND  ce.itemid = 220051
          AND ce.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND ce.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
     ),
           ABPs As (
           SELECT ie.subject_id,
           ie.stay_id,
           MIN(ce.valuenum) AS ABPs_min,
           MAX(ce.valuenum) AS ABPs_max
     FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_icu.chartevents` As ce
          ON ie.stay_id = ce.stay_id 
          AND  ce.itemid =  220050
          AND ce.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND ce.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
     ),       
          NBPm As (
           SELECT ie.subject_id,
           ie.stay_id,
           MIN(ce.valuenum) AS NBPm_min,
           MAX(ce.valuenum) AS NBPm_max
     FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_icu.chartevents` As ce
          ON ie.stay_id = ce.stay_id 
          AND  ce.itemid =  220181
          AND ce.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND ce.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
     ),
          NBPd As (
           SELECT ie.subject_id,
           ie.stay_id,
           MIN(ce.valuenum) AS NBPd_min,
           MAX(ce.valuenum) AS NBPd_max
     FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_icu.chartevents` As ce
          ON ie.stay_id = ce.stay_id 
          AND  ce.itemid =  220180
          AND ce.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND ce.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
     ),
        NBPs As (
           SELECT ie.subject_id,
           ie.stay_id,
           MIN(ce.valuenum) AS NBPs_min,
           MAX(ce.valuenum) AS NBPs_max
     FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_icu.chartevents` As ce
          ON ie.stay_id = ce.stay_id 
          AND  ce.itemid = 220179
          AND ce.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND ce.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
     ),
           RR As (
           SELECT ie.subject_id,
           ie.stay_id,
           MIN(ce.valuenum) AS RR_min,
           MAX(ce.valuenum) AS RR_max
     FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_icu.chartevents` As ce
          ON ie.stay_id = ce.stay_id 
          AND  ce.itemid = 220210
          AND ce.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND ce.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
     ),
           SpO2 As (
           SELECT ie.subject_id,
           ie.stay_id,
           MIN(ce.valuenum) AS SpO2_min,
           MAX(ce.valuenum) AS SpO2_max
     FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_icu.chartevents` As ce
          ON ie.stay_id = ce.stay_id 
          AND  ce.itemid IN (220277, 226253)
          AND ce.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND ce.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
     )  

   SELECT ie.subject_id,
          ie.stay_id, 
          vitalsigns.heart_rate_min,
          vitalsigns.heart_rate_max,
          temperature.temperature_min,
          temperature.temperature_max,
          ABPm.ABPm_min,
          ABPm.ABPm_max,
          ABPd.ABPd_min,
          ABPd.ABPd_max,
          ABPs.ABPs_min,
          ABPs.ABPs_max,
          NBPm.NBPm_min,
          NBPm.NBPm_max,
          NBPd.NBPd_min,
          NBPd.NBPd_max,
          NBPs.NBPs_min,
          NBPs.NBPs_max,
          RR.RR_min,
          RR.RR_max,
          SpO2.SpO2_min,
          SpO2.SpO2_max

          
   FROM ({icu_intime}) As ie
   LEFT JOIN vitalsigns 
   ON ie.stay_id = vitalsigns.stay_id
   LEFT JOIN temperature
   ON vitalsigns.stay_id = temperature.stay_id
   LEFT JOIN ABPm
   ON temperature.stay_id = ABPm.stay_id
   LEFT JOIN ABPd
   ON  ABPm.stay_id = ABPd.stay_id
   LEFT JOIN ABPs
   ON ABPd.stay_id = ABPs.stay_id 
   LEFT JOIN NBPm
   ON ABPs.stay_id = NBPm.stay_id 
   LEFT JOIN NBPd
   ON NBPm.stay_id = NBPd.stay_id
   LEFT JOIN NBPs
   ON NBPd.stay_id = NBPs.stay_id
   LEFT JOIN RR
   ON NBPs.stay_id = RR.stay_id
   LEFT JOIN SpO2
   ON RR.stay_id = SpO2.stay_id
"""



In [24]:
cohort_vitalsigns = run_query(f"""
  SELECT vitalsigns.*
    FROM ({vitalsigns}) As vitalsigns
""")

In [25]:
cohort_vitalsigns.to_csv('/content/drive/MyDrive/cohort_vitalsigns.csv',index=False)

#### **4. Queries for selecting lab tests features for the cohort:** 

In [26]:

# Laboratory tests 1
# module: mimiciv_hosp
# Tabel: labevents


# Albumin(albumin_min, albumin_max) item_id = 52022, 50862, 53085, 53138
# Blood urea nitrogen (bun_min, bun_max) itemid = (51006, 52647)
# Calcium_blood (calcium_min, calcium_max) itemid = 50893

# creatinine_blood (creatinine_min, creatinine_max) itemid = 50912, 52546
# Glucose (glucose_min, glucose_max) itemid = 50931, 52569
# Bicarbonate (bicarbonate_min, bicarbonate_max) itemid = 50882

# Potassium (potassium_min, potassium_max) itemid = 50971, 52610
# Sodium (sodium_min, sodium_max) itemid = (50983, 52623)
# Platelet Aggregation (platelets_min, platelets_max) itemid = 51265

# Bilirubin (bilirubin_total_min, bilirubin_total_max) item_id = 50885, 53089
# White blood cell count (wbc_min, wbc_max) item_id (51301)
# Lactate Dehydrogenase itemid = 50813, 52442, 50954

# Magnesium (Mg) itemid = 50960
# pH (ph_min, ph_max) itemid (50820, 50831)
# Partial pressure of carbon dioxide (pCO2_min, pCO2_max) itemid = (50818, 50830)
# Partial pressure of Oxygen (pO2_min, pO2_max) itemid = (50821, 50832)

#Not Found!
# PaO2/FiO2 ratio (pao2fio2ratio_min, pao2fio2ratio_max)  

lab_tests = f"""
    WITH labtests As (
       SELECT ie.subject_id,
           ie.stay_id,
           MIN(le.valuenum) AS albumin_min,
           MAX(le.valuenum) AS albumin_max
       FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_hosp.labevents` As le
          ON ie.subject_id = le.subject_id 
          AND  le.itemid IN (52022, 50862, 53085,53138)
          AND le.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND le.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
    ),
       platelet  As (
       SELECT ie.subject_id,
           ie.stay_id,
           MIN(le.valuenum) AS platelet_min,
           MAX(le.valuenum) AS platelet_max
       FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_hosp.labevents` As le
          ON ie.subject_id = le.subject_id 
          AND  le.itemid = 51265
          AND le.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND le.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
    ),
       bun As (
         SELECT ie.subject_id,
           ie.stay_id,
           MIN(le.valuenum) AS bun_min,
           MAX(le.valuenum) AS bun_max
       FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_hosp.labevents` As le
          ON ie.subject_id = le.subject_id
          AND  le.itemid IN (51006, 52647)
          AND le.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND le.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
       ),
       calcium As (
         SELECT ie.subject_id,
           ie.stay_id,
           MIN(le.valuenum) AS calcium_min,
           MAX(le.valuenum) AS calcium_max
       FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_hosp.labevents` As le
          ON ie.subject_id = le.subject_id 
          AND  le.itemid = 50893
          AND le.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND le.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
       ),
       creatinine As (
         SELECT ie.subject_id,
           ie.stay_id,
           MIN(le.valuenum) AS creatinine_min,
           MAX(le.valuenum) AS creatinine_max
       FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_hosp.labevents` As le
          ON ie.subject_id = le.subject_id
          AND  le.itemid IN (50912, 52546)
          AND le.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND le.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
       ),
       glucose As (
         SELECT ie.subject_id,
           ie.stay_id,
           MIN(le.valuenum) AS  glucose_min,
           MAX(le.valuenum) AS glucose_max
       FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_hosp.labevents` As le
          ON ie.subject_id = le.subject_id
          AND  le.itemid IN (50931, 52569)
          AND le.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND le.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
       ),
       bicarbonate As (
         SELECT ie.subject_id,
           ie.stay_id,
           MIN(le.valuenum) AS  bicarbonate_min,
           MAX(le.valuenum) AS bicarbonate_max
       FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_hosp.labevents` As le
          ON ie.subject_id = le.subject_id
          AND  le.itemid = 50882
          AND le.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND le.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
       ),
        potassium As (
         SELECT ie.subject_id,
           ie.stay_id,
           MIN(le.valuenum) AS   potassium_min,
           MAX(le.valuenum) AS  potassium_max
       FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_hosp.labevents` As le
          ON ie.subject_id = le.subject_id
          AND  le.itemid IN (50971, 52610)
          AND le.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND le.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
       ),
       sodium As (
         SELECT ie.subject_id,
           ie.stay_id,
           MIN(le.valuenum) AS  sodium_min,
           MAX(le.valuenum) AS  sodium_max
       FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_hosp.labevents` As le
          ON ie.subject_id = le.subject_id
          AND  le.itemid IN (50983, 52623)
          AND le.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND le.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
       ),
       bilirubin As (
         SELECT ie.subject_id,
           ie.stay_id,
           MIN(le.valuenum) AS  bilirubin_min,
           MAX(le.valuenum) AS  bilirubin_max
       FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_hosp.labevents` As le
          ON ie.subject_id = le.subject_id
          AND  le.itemid IN  (50885, 50883, 50884, 51568, 51569, 51570, 53089)
          AND le.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND le.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
       ),
      wbc As (
         SELECT ie.subject_id,
           ie.stay_id,
           MIN(le.valuenum) AS  wbc_min,
           MAX(le.valuenum) AS  wbc_max
       FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_hosp.labevents` As le
          ON ie.subject_id = le.subject_id
          AND  le.itemid = 51301
          AND le.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND le.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
       ),
       lactate As (
         SELECT ie.subject_id,
           ie.stay_id,
           MIN(le.valuenum) AS  lactate_min,
           MAX(le.valuenum) AS  lactate_max
       FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_hosp.labevents` As le
          ON ie.subject_id = le.subject_id 
          AND  le.itemid IN (50813, 52442, 50954)
          AND le.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND le.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
       ),
       magnesium As (
         SELECT ie.subject_id,
           ie.stay_id,
           MIN(le.valuenum) AS  magnesium_min,
           MAX(le.valuenum) AS  magnesium_max
       FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_hosp.labevents` As le
          ON ie.subject_id = le.subject_id
          AND  le.itemid = 50960
          AND le.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND le.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
       ),
       pH As (
         SELECT ie.subject_id,
           ie.stay_id,
           MIN(le.valuenum) AS  pH_min,
           MAX(le.valuenum) AS  pH_max
       FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_hosp.labevents` As le
          ON ie.subject_id = le.subject_id
          AND  le.itemid IN (50820, 50831)
          AND le.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND le.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
       ),
       pCO2 As (
         SELECT ie.subject_id,
           ie.stay_id,
           MIN(le.valuenum) AS  pCO2_min,
           MAX(le.valuenum) AS  pCO2_max
       FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_hosp.labevents` As le
          ON ie.subject_id = le.subject_id
          AND  le.itemid IN (50818, 50830)
          AND le.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND le.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
       ),
       pO2 As (
         SELECT ie.subject_id,
           ie.stay_id,
           MIN(le.valuenum) AS  pO2_min,
           MAX(le.valuenum) AS  pO2_max
       FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_hosp.labevents` As le
          ON ie.subject_id = le.subject_id
          AND  le.itemid IN (50821, 50832)
          AND le.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND le.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
       )

       SELECT ie.subject_id,
              ie.stay_id,
              labtests.albumin_min,
              labtests.albumin_max,
              platelet.platelet_min,
              platelet.platelet_max,
              bun.bun_min,
              bun.bun_max,
              calcium.calcium_min,
              calcium.calcium_max,
              creatinine.creatinine_min,
              creatinine.creatinine_max,
              glucose.glucose_min,
              glucose.glucose_max,
              bicarbonate.bicarbonate_min,
              bicarbonate.bicarbonate_max,
              potassium.potassium_min,
              potassium.potassium_max,
              sodium.sodium_min,
              sodium.sodium_max,
              bilirubin.bilirubin_min,
              bilirubin.bilirubin_max,
              wbc.wbc_min,
              wbc.wbc_max,
              lactate.lactate_min,
              lactate.lactate_max,
              magnesium.magnesium_min,
              magnesium.magnesium_max,
              pH.pH_min,
              pH.pH_max,
              pCO2.pCO2_min,
              pCO2.pCO2_max,
              pO2.pO2_min,
              pO2.pO2_max,
            FROM ({icu_intime}) As ie
            LEFT JOIN labtests
            ON ie.stay_id = labtests.stay_id
            LEFT JOIN platelet
            ON labtests.stay_id = platelet.stay_id
            LEFT JOIN bun 
            ON platelet.stay_id = bun.stay_id
            LEFT JOIN calcium
            ON bun.stay_id = calcium.stay_id
            LEFT JOIN creatinine
            ON calcium.stay_id = creatinine.stay_id
            LEFT JOIN glucose
            ON creatinine.stay_id = glucose.stay_id
            LEFT JOIN bicarbonate
            ON glucose.stay_id = bicarbonate.stay_id 
            LEFT JOIN potassium
            ON bicarbonate.stay_id = potassium.stay_id 
            LEFT JOIN sodium
            ON potassium.stay_id = sodium.stay_id
            LEFT JOIN bilirubin
            ON sodium.stay_id  = bilirubin.stay_id
            LEFT JOIN wbc
            ON bilirubin.stay_id = wbc.stay_id 
            LEFT JOIN lactate
            ON wbc.stay_id = lactate.stay_id
            LEFT JOIN magnesium
            ON lactate.stay_id = magnesium.stay_id
            LEFT JOIN pH
            ON magnesium.stay_id = pH.stay_id 
            LEFT JOIN pCO2
            ON pH.stay_id = pCO2.stay_id 
            LEFT JOIN pO2
            ON pCO2.stay_id = pO2.stay_id 
   """       
  


In [27]:
cohort_lab_tests = run_query(f"""
 SELECT lab_tests.*
   FROM ({lab_tests}) As lab_tests
""")

In [28]:
cohort_lab_tests.to_csv('/content/drive/MyDrive/cohort_lab_tests.csv',index=False)

#### **5. Queries for selecting comorbidities scores features for the cohort:** 

In [29]:
#Comorbidities 1

# module: mimiciv_icu
# Table: chartevents

#reference code: https://github.com/MIT-LCP/mimic-iv/blob/master/concepts/firstday/first_day_gcs.sql

gcs = f"""
    with base as
    (
    select 
      ie.subject_id
    , ie.stay_id, ce.charttime
    -- pivot each value into its own column
    , max(case when ce.ITEMID = 223901 then ce.valuenum else null end) as GCSMotor
    , max(case
        when ce.ITEMID = 223900 and ce.VALUE = 'No Response-ETT' then 0
        when ce.ITEMID = 223900 then ce.valuenum
        else null 
      end) as GCSVerbal
    , max(case when ce.ITEMID = 220739 then ce.valuenum else null end) as GCSEyes
    -- convert the data into a number, reserving a value of 0 for ET/Trach
    , max(case
        -- endotrach/vent is assigned a value of 0
        -- flag it here to later parse specially
        when ce.ITEMID = 223900 and ce.VALUE = 'No Response-ETT' then 1 -- metavision
      else 0 end)
      as endotrachflag
    , ROW_NUMBER ()
            OVER (PARTITION BY ce.stay_id ORDER BY ce.charttime ASC) as rn
    FROM ({icu_intime}) As ie
    LEFT JOIN `physionet-data.mimiciv_icu.chartevents` As ce
        ON ce.stay_id = ie.stay_id AND 
           ce.ITEMID IN (223900, 223901, 220739)
    group by ie.subject_id, ie.stay_id, ce.stay_id, ce.charttime
    )
    , gcs as (
    select b.*
    , b2.GCSVerbal as GCSVerbalPrev
    , b2.GCSMotor as GCSMotorPrev
    , b2.GCSEyes as GCSEyesPrev
    -- Calculate GCS, factoring in special case when they are intubated and prev vals
    -- note that the coalesce are used to implement the following if:
    --  if current value exists, use it
    --  if previous value exists, use it
    --  otherwise, default to normal
    , case
        -- replace GCS during sedation with 15
        when b.GCSVerbal = 0
          then 15
        when b.GCSVerbal is null and b2.GCSVerbal = 0
          then 15
        -- if previously they were intub, but they aren't now, do not use previous GCS values
        when b2.GCSVerbal = 0
          then
              coalesce(b.GCSMotor,6)
            + coalesce(b.GCSVerbal,5)
            + coalesce(b.GCSEyes,4)
        -- otherwise, add up score normally, imputing previous value if none available at current time
        else
              coalesce(b.GCSMotor,coalesce(b2.GCSMotor,6))
            + coalesce(b.GCSVerbal,coalesce(b2.GCSVerbal,5))
            + coalesce(b.GCSEyes,coalesce(b2.GCSEyes,4))
        end as GCS

    from base b
    -- join to itself within 6 hours to get previous value
    left join base b2
      on b.stay_id = b2.stay_id
      and b.rn = b2.rn+1
      and b2.charttime > DATETIME_ADD(b.charttime, INTERVAL 6 HOUR)
    )
    -- combine components with previous within 6 hours
    -- filter down to cohort which is not excluded
    -- truncate charttime to the hour
    , gcs_stg as
    (
    select
      subject_id
    , gs.stay_id, gs.charttime
    , GCS
    , coalesce(GCSMotor,GCSMotorPrev) as GCSMotor
    , coalesce(GCSVerbal,GCSVerbalPrev) as GCSVerbal
    , coalesce(GCSEyes,GCSEyesPrev) as GCSEyes
    , case when coalesce(GCSMotor,GCSMotorPrev) is null then 0 else 1 end
    + case when coalesce(GCSVerbal,GCSVerbalPrev) is null then 0 else 1 end
    + case when coalesce(GCSEyes,GCSEyesPrev) is null then 0 else 1 end
      as components_measured
    , EndoTrachFlag
    from gcs gs
    )
    -- priority is:
    --  (i) complete data, (ii) non-sedated GCS, (iii) lowest GCS, (iv) charttime
    , gcs_priority as
    (
    select 
        subject_id
      , stay_id
      , charttime
      , gcs
      , gcsmotor
      , gcsverbal
      , gcseyes
      , EndoTrachFlag
      , ROW_NUMBER() over
        (
          PARTITION BY stay_id, charttime
          ORDER BY components_measured ASC, endotrachflag, gcs, charttime ASC
        ) as rn
    from gcs_stg
    )
    select DISTINCT
    gs.subject_id
    , gs.stay_id
    , min(GCS) AS gcs_min
    from gcs_priority gs
    where rn = 1
    GROUP BY gs.subject_id, gs.stay_id
    
      """

In [30]:
cohort_gcs_first_day = run_query(f"""
   SELECT gcs.*
   FROM ({gcs}) As gcs   
   """
)

In [31]:
cohort_gcs_first_day.to_csv('/content/drive/MyDrive/cohort_gcs_first_day.csv',index=False)

In [32]:
#Comorbidities 2

# module: mimiciv_icu_derived
# Table: first_day_sofa

cohort_sofa_first_day = run_query(f"""
    SELECT icu_intime.stay_id As stay_id,
           sofa.SOFA As SOFA
    FROM ({icu_intime}) AS icu_intime
    INNER JOIN `physionet-data.mimiciv_derived.first_day_sofa` As sofa
    ON icu_intime.stay_id = sofa.stay_id     
""")

In [33]:
cohort_sofa_first_day.to_csv('/content/drive/MyDrive/cohort_sofa_first_day.csv',index=False)

#### **6. Query for selecting ventilation status feature for the cohort:** 

In [34]:
#module: mimiciv_icu
#table: chartevents
#reference code: https://github.com/MIT-LCP/mimic-iv/blob/master/concepts/measurement/ventilator_setting.sql

ventilator_setting = f""" 
with ce as
(
  SELECT
      ce.subject_id
    , ce.stay_id
    , ce.charttime
    , itemid
    -- TODO: clean
    , value
    , case
        -- begin fio2 cleaning
        when itemid = 223835
        then
            case
                when valuenum >= 0.20 and valuenum <= 1
                    then valuenum * 100
                -- improperly input data - looks like O2 flow in litres
                when valuenum > 1 and valuenum < 20
                    then null
                when valuenum >= 20 and valuenum <= 100
                    then valuenum
            ELSE NULL END
        -- end of fio2 cleaning
        -- begin peep cleaning
        WHEN itemid in (220339, 224700)
        THEN
          CASE
            WHEN valuenum > 100 THEN NULL
            WHEN valuenum < 0 THEN NULL
          ELSE valuenum END
        -- end peep cleaning
    ELSE valuenum END AS valuenum
    , valueuom
    , storetime
  FROM `physionet-data.mimiciv_icu.chartevents` ce
  where ce.value IS NOT NULL
  AND ce.stay_id IS NOT NULL
  AND ce.itemid IN
  (
      224688 -- Respiratory Rate (Set)
    , 224689 -- Respiratory Rate (spontaneous)
    , 224690 -- Respiratory Rate (Total)
    , 224687 -- minute volume
    , 224685, 224684, 224686 -- tidal volume
    , 224696 -- PlateauPressure
    , 220339, 224700 -- PEEP
    , 223835 -- fio2
    , 223849 -- vent mode
    , 229314 -- vent mode (Hamilton)
    , 223848 -- vent type
  )
)
SELECT
      subject_id
    , MAX(stay_id) AS stay_id
    , charttime
    , MAX(CASE WHEN itemid = 224688 THEN valuenum ELSE NULL END) AS respiratory_rate_set
    , MAX(CASE WHEN itemid = 224690 THEN valuenum ELSE NULL END) AS respiratory_rate_total
    , MAX(CASE WHEN itemid = 224689 THEN valuenum ELSE NULL END) AS respiratory_rate_spontaneous
    , MAX(CASE WHEN itemid = 224687 THEN valuenum ELSE NULL END) AS minute_volume
    , MAX(CASE WHEN itemid = 224684 THEN valuenum ELSE NULL END) AS tidal_volume_set
    , MAX(CASE WHEN itemid = 224685 THEN valuenum ELSE NULL END) AS tidal_volume_observed
    , MAX(CASE WHEN itemid = 224686 THEN valuenum ELSE NULL END) AS tidal_volume_spontaneous
    , MAX(CASE WHEN itemid = 224696 THEN valuenum ELSE NULL END) AS plateau_pressure
    , MAX(CASE WHEN itemid in (220339, 224700) THEN valuenum ELSE NULL END) AS peep
    , MAX(CASE WHEN itemid = 223835 THEN valuenum ELSE NULL END) AS fio2
    , MAX(CASE WHEN itemid = 223849 THEN value ELSE NULL END) AS ventilator_mode
    , MAX(CASE WHEN itemid = 229314 THEN value ELSE NULL END) AS ventilator_mode_hamilton
    , MAX(CASE WHEN itemid = 223848 THEN value ELSE NULL END) AS ventilator_type
FROM ce
GROUP BY subject_id, charttime
"""

In [35]:
#module: mimiciv_icu
#table: chartevents

#reference code: https://github.com/MIT-LCP/mimic-iv/blob/master/concepts/measurement/oxygen_delivery.sql
oxygen_delivery = f"""
with ce_stg1 as
(
  SELECT
      ce.subject_id
    , ce.stay_id
    , ce.charttime
    , CASE
        -- merge o2 flows into a single row
        WHEN itemid IN (223834, 227582, 224691) THEN 223834
      ELSE itemid END AS itemid
    , value
    , valuenum
    , valueuom
    , storetime
  FROM `physionet-data.mimiciv_icu.chartevents` ce
  WHERE ce.value IS NOT NULL
  AND ce.itemid IN
  (
      223834 -- o2 flow
    , 227582 -- bipap o2 flow
    , 224691 -- Flow Rate (L)
    -- additional o2 flow is its own column
    , 227287 -- additional o2 flow
  )
)
, ce_stg2 AS
(
  select
    ce.subject_id
    , ce.stay_id
    , ce.charttime
    , itemid
    , value
    , valuenum
    , valueuom
    -- retain only 1 row per charttime
    -- prioritizing the last documented value
    -- primarily used to subselect o2 flows
    , ROW_NUMBER() OVER (PARTITION BY subject_id, charttime, itemid ORDER BY storetime DESC) as rn
  FROM ce_stg1 ce
)
, o2 AS
(
    -- The below ITEMID can have multiple entires for charttime/storetime
    -- These are totally valid entries, and should be retained in derived tables.
    --   224181 -- Small Volume Neb Drug #1              | Respiratory             | Text       | chartevents
    -- , 227570 -- Small Volume Neb Drug/Dose #1         | Respiratory             | Text       | chartevents
    -- , 224833 -- SBT Deferred                          | Respiratory             | Text       | chartevents
    -- , 224716 -- SBT Stopped                           | Respiratory             | Text       | chartevents
    -- , 224740 -- RSBI Deferred                         | Respiratory             | Text       | chartevents
    -- , 224829 -- Trach Tube Type                       | Respiratory             | Text       | chartevents
    -- , 226732 -- O2 Delivery Device(s)                 | Respiratory             | Text       | chartevents
    -- , 226873 -- Inspiratory Ratio                     | Respiratory             | Numeric    | chartevents
    -- , 226871 -- Expiratory Ratio                      | Respiratory             | Numeric    | chartevents
    -- maximum of 4 o2 devices on at once
    SELECT
        subject_id
        , stay_id
        , charttime
        , itemid
        , value AS o2_device
    , ROW_NUMBER() OVER (PARTITION BY subject_id, charttime, itemid ORDER BY value) as rn
    FROM `physionet-data.mimiciv_icu.chartevents`
    WHERE itemid = 226732 -- oxygen delivery device(s)
)
, stg AS
(
    select
      COALESCE(ce.subject_id, o2.subject_id) AS subject_id
    , COALESCE(ce.stay_id, o2.stay_id) AS stay_id
    , COALESCE(ce.charttime, o2.charttime) AS charttime
    , COALESCE(ce.itemid, o2.itemid) AS itemid
    , ce.value
    , ce.valuenum
    , o2.o2_device
    , o2.rn
    from ce_stg2 ce
    FULL OUTER JOIN o2
      ON ce.subject_id = o2.subject_id
      AND ce.charttime = o2.charttime
    -- limit to 1 row per subject_id/charttime/itemid from ce_stg2
    WHERE ce.rn = 1
)
SELECT
    subject_id
    , MAX(stay_id) AS stay_id
    , charttime
    , MAX(CASE WHEN itemid = 223834 THEN valuenum ELSE NULL END) AS o2_flow
    , MAX(CASE WHEN itemid = 227287 THEN valuenum ELSE NULL END) AS o2_flow_additional
    -- ensure we retain all o2 devices for the patient
    , MAX(CASE WHEN rn = 1 THEN o2_device ELSE NULL END) AS o2_delivery_device_1
    , MAX(CASE WHEN rn = 2 THEN o2_device ELSE NULL END) AS o2_delivery_device_2
    , MAX(CASE WHEN rn = 3 THEN o2_device ELSE NULL END) AS o2_delivery_device_3
    , MAX(CASE WHEN rn = 4 THEN o2_device ELSE NULL END) AS o2_delivery_device_4
FROM stg
GROUP BY subject_id, charttime
"""

In [36]:
# module: mimiciv_icu
# Table: procedureevents
# Ventilation status (ventilation_status) 

##reference code: https://github.com/MIT-LCP/mimic-iv/blob/master/concepts/treatment/ventilation.sql

ventilation_status = f"""
      WITH tm AS
      (
      SELECT stay_id, charttime
      FROM ({ventilator_setting})
      UNION DISTINCT
      SELECT stay_id, charttime
      FROM ({oxygen_delivery})
      )
      , vs AS
      (
        SELECT tm.stay_id, tm.charttime
        -- source data columns, here for debug
        , o2_delivery_device_1
        , COALESCE(ventilator_mode, ventilator_mode_hamilton) AS vent_mode
        -- case statement determining the type of intervention
        -- done in order of priority: trach > mech vent > NIV > high flow > o2
        , CASE
        -- tracheostomy
        WHEN o2_delivery_device_1 IN
        (
            'Tracheostomy tube'
        -- 'Trach mask ' -- 16435 observations
        )
            THEN 'Trach'
        -- mechanical ventilation
        WHEN o2_delivery_device_1 IN
        (
            'Endotracheal tube'
        )
        OR ventilator_mode IN
        (
            '(S) CMV',
            'APRV',
            'APRV/Biphasic+ApnPress',
            'APRV/Biphasic+ApnVol',
            'APV (cmv)',
            'Ambient',
            'Apnea Ventilation',
            'CMV',
            'CMV/ASSIST',
            'CMV/ASSIST/AutoFlow',
            'CMV/AutoFlow',
            'CPAP/PPS',
            'CPAP/PSV+Apn TCPL',
            'CPAP/PSV+ApnPres',
            'CPAP/PSV+ApnVol',
            'MMV',
            'MMV/AutoFlow',
            'MMV/PSV',
            'MMV/PSV/AutoFlow',
            'P-CMV',
            'PCV+',
            'PCV+/PSV',
            'PCV+Assist',
            'PRES/AC',
            'PRVC/AC',
            'PRVC/SIMV',
            'PSV/SBT',
            'SIMV',
            'SIMV/AutoFlow',
            'SIMV/PRES',
            'SIMV/PSV',
            'SIMV/PSV/AutoFlow',
            'SIMV/VOL',
            'SYNCHRON MASTER',
            'SYNCHRON SLAVE',
            'VOL/AC'
        )
        OR ventilator_mode_hamilton IN
        (
            'APRV',
            'APV (cmv)',
            'Ambient',
            '(S) CMV',
            'P-CMV',
            'SIMV',
            'APV (simv)',
            'P-SIMV',
            'VS',
            'ASV'
        )
            THEN 'InvasiveVent'
        -- NIV
        WHEN o2_delivery_device_1 IN
        (
            'Bipap mask ', -- 8997 observations
            'CPAP mask ' -- 5568 observations
        )
        OR ventilator_mode_hamilton IN
        (
            'DuoPaP',
            'NIV',
            'NIV-ST'
        )
            THEN 'NonInvasiveVent'
        -- high flow
        when o2_delivery_device_1 IN
        (
            'High flow neb', -- 10785 observations
            'High flow nasal cannula' -- 925 observations
        )
            THEN 'HighFlow'
        -- normal oxygen delivery
        WHEN o2_delivery_device_1 in
        (
            'Nasal cannula', -- 153714 observations
            'Face tent', -- 24601 observations
            'Aerosol-cool', -- 24560 observations
            'Non-rebreather', -- 5182 observations
            'Venti mask ', -- 1947 observations
            'Medium conc mask ', -- 1888 observations
            'T-piece', -- 1135 observations
            'Ultrasonic neb', -- 9 observations
            'Vapomist', -- 3 observations
            'Oxymizer' -- 1301 observations
        )
            THEN 'Oxygen'
        -- Not categorized:
        -- 'Other', 'None'
        ELSE NULL END AS ventilation_status
      FROM tm
      LEFT JOIN ({ventilator_setting}) vs
          ON tm.stay_id = vs.stay_id
          AND tm.charttime = vs.charttime
      LEFT JOIN ({oxygen_delivery}) od
          ON tm.stay_id = od.stay_id
          AND tm.charttime = od.charttime
      )
      , vd0 AS
      (
        SELECT
          stay_id, charttime
          -- source data columns, here for debug
          , o2_delivery_device_1
          , vent_mode
          -- carry over the previous charttime which had the same state
          , LAG(charttime, 1) OVER (PARTITION BY stay_id, ventilation_status ORDER BY charttime) AS charttime_lag
          -- bring back the next charttime, regardless of the state
          -- this will be used as the end time for state transitions
          , LEAD(charttime, 1) OVER w AS charttime_lead
          , ventilation_status
          , LAG(ventilation_status, 1) OVER w AS ventilation_status_lag
        FROM vs
        WHERE ventilation_status IS NOT NULL
        WINDOW w AS (PARTITION BY stay_id ORDER BY charttime)
      )
      , vd1 as
      (
        SELECT
            stay_id
            -- source data columns, here for debug
            , o2_delivery_device_1
            , vent_mode
            , charttime_lag
            , charttime
            , charttime_lead
            , ventilation_status

            -- calculate the time since the last event
            , DATETIME_DIFF(charttime, charttime_lag, MINUTE)/60 as ventduration

            -- now we determine if the current ventilation status is "new", or continuing the previous
            , CASE
                -- a 14 hour gap always initiates a new event
                WHEN DATETIME_DIFF(charttime, charttime_lag, HOUR) >= 14 THEN 1
                WHEN ventilation_status_lag IS NULL THEN 1
                -- not a new event if identical to the last row
                WHEN ventilation_status_lag != ventilation_status THEN 1
              ELSE 0
              END AS new_status
        FROM vd0
      )
      , vd2 as
      (
        SELECT vd1.*
        -- create a cumulative sum of the instances of new ventilation
        -- this results in a monotonic integer assigned to each instance of ventilation
        , SUM(new_status) OVER (PARTITION BY stay_id ORDER BY charttime) AS vent_num
        FROM vd1
      )
      -- create the durations for each ventilation instance
      SELECT stay_id
      , MIN(charttime) AS starttime
      -- for the end time of the ventilation event, the time of the *next* setting
      -- i.e. if we go NIV -> O2, the end time of NIV is the first row with a documented O2 device
      -- ... unless it's been over 14 hours, in which case it's the last row with a documented NIV.
      , MAX(
            CASE
                WHEN charttime_lead IS NULL
                OR DATETIME_DIFF(charttime_lead, charttime, HOUR) >= 14
                    THEN charttime
            ELSE charttime_lead
            END
        ) AS endtime
        -- all rows with the same vent_num will have the same ventilation_status
        -- for efficiency, we use an aggregate here, but we could equally well group by this column
      , MAX(ventilation_status) AS ventilation_status
      FROM vd2
      GROUP BY stay_id, vent_num
      HAVING min(charttime) != max(charttime)
      
    """

In [37]:

cohort_ventilation_status = run_query(f"""
     SELECT DISTINCT icu_intime.stay_id,
            ventilation.ventilation_status,
     FROM ({icu_intime}) As icu_intime
     INNER JOIN ({ventilation_status }) As ventilation
        ON icu_intime.stay_id = ventilation.stay_id AND
        ventilation.starttime <= DATETIME_ADD(icu_intime.intime, INTERVAL '1' DAY) 
""")

In [38]:
cohort_ventilation_status.to_csv('/content/drive/MyDrive/cohort_ventilation_status.csv',index=False)

#### **7. Queries for selecting Input/Output feature for the cohort:** 

In [39]:
# Input/output: 

#module: mimiciv_icu
#table: outputevents
#reference code:https://github.com/MIT-LCP/mimic-iv/blob/master/concepts/measurement/urine_output.sql
# Urine output (urineoutput)

urine_output = f"""
        select
      stay_id
      , charttime
      , sum(urineoutput) as urineoutput
      from
      (
        select
        -- patient identifiers
        oe.stay_id
        , oe.charttime
        -- volumes associated with urine output ITEMIDs
        -- note we consider input of GU irrigant as a negative volume
        -- GU irrigant volume in usually has a corresponding volume out
        -- so the net is often 0, despite large irrigant volumes
        , case
            when oe.itemid = 227488 and oe.value > 0 then -1*oe.value
            else oe.value
        end as urineoutput
        from `physionet-data.mimiciv_icu.outputevents` oe
        where itemid in
        (
        226559, -- Foley
        226560, -- Void
        226561, -- Condom Cath
        226584, -- Ileoconduit
        226563, -- Suprapubic
        226564, -- R Nephrostomy
        226565, -- L Nephrostomy
        226567, -- Straight Cath
        226557, -- R Ureteral Stent
        226558, -- L Ureteral Stent
        227488, -- GU Irrigant Volume In
        227489  -- GU Irrigant/Urine Volume Out
        )
      ) uo
      group by stay_id, charttime
      
"""

In [40]:
cohort_urine_output = run_query(f"""
      SELECT
      ie.subject_id
      , ie.stay_id
      , AVG(urineoutput) AS urineoutput
      FROM ({icu_intime}) ie
      -- Join to the outputevents table to get urine output
      LEFT JOIN ({urine_output}) uo
        ON ie.stay_id = uo.stay_id
        -- ensure the data occurs during the first day
        AND uo.charttime >= ie.intime
        AND uo.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
      GROUP BY ie.subject_id, ie.stay_id
    """
)

In [41]:
cohort_urine_output.to_csv('/content/drive/MyDrive/cohort_urine_output.csv',index=False)

In [42]:
# Input/output: 

#vasopressin_drugs
#module: mimiciv_icu
#table: outputevents

#item_ids: dopamine: 221662, 
# Epinephrine: 221289, 229617, 
# Norepinephrine: 221906,
# Phenylephrine: 221749, 229630, 229631, 229632,
# Vasopressin: 222315,
# Dobutamine: 221653,
# milrinone: 221986


vasopressin_med = f"""
    WITH vasopressin_drugs As (
       SELECT ie.subject_id,
           ie.stay_id,
           AVG(vaso.amount) AS dopamine_amount,
           AVG(vaso.rate) AS dopamine_rate
       FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_icu.inputevents`  As vaso
          ON ie.subject_id = vaso.subject_id 
          AND vaso.itemid = 221662
          AND vaso.starttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND vaso.starttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
    ),
     epinephrine As (
       SELECT ie.subject_id,
           ie.stay_id,
           AVG(vaso.amount) AS epinephrine_amount,
           AVG(vaso.rate) AS epinephrine_rate
       FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_icu.inputevents`  As vaso
          ON ie.subject_id = vaso.subject_id 
          AND  vaso.itemid IN (21289, 229617)
          AND vaso.starttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND vaso.starttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
    ),
    norepinephrine As (
       SELECT ie.subject_id,
           ie.stay_id,
           AVG(vaso.amount) AS norepinephrine_amount,
           AVG(vaso.rate) AS norepinephrine_rate
       FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_icu.inputevents`  As vaso
          ON ie.subject_id = vaso.subject_id 
          AND  vaso.itemid =  221906
          AND vaso.starttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND vaso.starttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
    ),
    phenylephrine As (
       SELECT ie.subject_id,
           ie.stay_id,
           AVG(vaso.amount) AS phenylephrine_amount,
           AVG(vaso.rate) AS phenylephrine_rate
       FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_icu.inputevents`  As vaso
          ON ie.subject_id = vaso.subject_id 
          AND  vaso.itemid IN (221749, 229630, 229631, 229632)
          AND vaso.starttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND vaso.starttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
    ),
    vasopressin As(
       SELECT ie.subject_id,
           ie.stay_id,
           AVG(vaso.amount) AS vasopressin_amount,
           AVG(vaso.rate) AS vasopressin_rate
       FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_icu.inputevents`  As vaso
          ON ie.subject_id = vaso.subject_id 
          AND  vaso.itemid = 222315
          AND vaso.starttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND vaso.starttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
    ),
    dobutamine As(
       SELECT ie.subject_id,
           ie.stay_id,
           AVG(vaso.amount) AS dobutamine_amount,
           AVG(vaso.rate) AS dobutamine_rate
       FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_icu.inputevents`  As vaso
          ON ie.subject_id = vaso.subject_id 
          AND  vaso.itemid = 221653
          AND vaso.starttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND vaso.starttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
    ),
       milrinone As(
       SELECT ie.subject_id,
           ie.stay_id,
           AVG(vaso.amount) AS milrinone_amount,
           AVG(vaso.rate) AS milrinone_rate
       FROM ({icu_intime}) As ie
       LEFT JOIN `physionet-data.mimiciv_icu.inputevents`  As vaso
          ON ie.subject_id = vaso.subject_id 
          AND  vaso.itemid = 221986
          AND vaso.starttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
          AND vaso.starttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
    GROUP BY  ie.subject_id, ie.stay_id 
    )
   SELECT vasopressin_drugs.subject_id,
          vasopressin_drugs.stay_id,
          vasopressin_drugs.dopamine_amount,
          vasopressin_drugs.dopamine_rate,
          epinephrine.epinephrine_amount,
          epinephrine.epinephrine_rate,
          norepinephrine.norepinephrine_amount,
          norepinephrine.norepinephrine_rate,
          phenylephrine.phenylephrine_amount,
          phenylephrine.phenylephrine_rate,
          vasopressin.vasopressin_amount,
          vasopressin.vasopressin_rate,
          dobutamine.dobutamine_amount,
          dobutamine.dobutamine_rate,
          milrinone.milrinone_amount,
          milrinone.milrinone_rate
   FROM vasopressin_drugs
   LEFT JOIN epinephrine
   ON vasopressin_drugs.stay_id = epinephrine.stay_id
   LEFT JOIN norepinephrine
   ON epinephrine.stay_id = norepinephrine.stay_id
   LEFT JOIN phenylephrine
   ON norepinephrine.stay_id = phenylephrine.stay_id
   LEFT JOIN vasopressin
   ON phenylephrine.stay_id = vasopressin.stay_id
   LEFT JOIN dobutamine
   ON vasopressin.stay_id = dobutamine.stay_id 
   LEFT JOIN milrinone
   ON dobutamine.stay_id  = milrinone.stay_id
"""

In [43]:
cohort_vasopressin = run_query(f"""
     SELECT vasopressin_med.*
     FROM ({vasopressin_med}) vasopressin_med
""")

In [44]:
cohort_vasopressin.to_csv('/content/drive/MyDrive/cohort_vasopressin.csv',index=False)

### **Join all result tables**

In [21]:
#Join tables: 

cohort_criteria = pd.read_csv('/content/drive/MyDrive/cohort_criteria_data.csv')
cohort_demographic_1 = pd.read_csv('/content/drive/MyDrive/cohort_demographic_1.csv')
cohort_demographic_2 = pd.read_csv('/content/drive/MyDrive/cohort_demographic_2.csv')
cohort_demographic_3 = pd.read_csv('/content/drive/MyDrive/cohort_demographic_3.csv')
cohort_demographic_4 = pd.read_csv('/content/drive/MyDrive/cohort_demographic_4.csv')
cohort_vitalsigns = pd.read_csv('/content/drive/MyDrive/cohort_vitalsigns.csv')
cohort_lab_tests = pd.read_csv('/content/drive/MyDrive/cohort_lab_tests.csv')
cohort_comorbidities_gcs = pd.read_csv('/content/drive/MyDrive/cohort_gcs_first_day.csv')
cohort_comorbidities_sofa = pd.read_csv('/content/drive/MyDrive/cohort_sofa_first_day.csv')
cohort_ventilation_status = pd.read_csv('/content/drive/MyDrive/cohort_ventilation_status.csv')
cohort_urine_output = pd.read_csv('/content/drive/MyDrive/cohort_urine_output.csv')
cohort_vasopressin =pd.read_csv('/content/drive/MyDrive/cohort_vasopressin.csv')


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/cohort_criteria_data.csv'

In [None]:
#1. Join demographic_1:
query1 = f"""
SELECT DISTINCT cohort_criteria.*,
       cohort_demographic_1.* 
FROM cohort_criteria  As cohort_criteria
LEFT JOIN cohort_demographic_1 AS cohort_demographic_1
   ON cohort_demographic_1.stay_id = cohort_criteria.stay_id 
""" 

In [None]:
#Run query
demographic_1_joined  = sqldf(query1, globals())

In [None]:
#Remove duplicate Ids columns 
demographic_1_joined = demographic_1_joined.loc[:,~demographic_1_joined.columns.duplicated()].copy()

In [None]:
#2. Join demographic_2:
query2 = f"""
SELECT DISTINCT demographic_1_joined.*,
       cohort_demographic_2.*
FROM demographic_1_joined  As demographic_1_joined
LEFT JOIN cohort_demographic_2 AS cohort_demographic_2
ON demographic_1_joined.hadm_id = cohort_demographic_2.hadm_id
""" 

In [None]:
#Run query
demographic_2_joined  = sqldf(query2, globals())

In [None]:
#Remove duplicate Ids columns 
demographic_2_joined = demographic_2_joined.loc[:,~demographic_2_joined.columns.duplicated()].copy()

In [None]:
#3. Join lab tests 
query3 = f"""
SELECT DISTINCT demographic_2_joined.*,
      cohort_demographic_3.* 
FROM demographic_2_joined  As demographic_2_joined
LEFT JOIN cohort_demographic_3 AS cohort_demographic_3
ON demographic_2_joined.stay_id = cohort_demographic_3.stay_id
""" 

In [None]:
#Run query
demographic_3_joined = sqldf(query3, globals())

In [None]:
#Remove duplicate Ids columns 
demographic_3_joined = demographic_3_joined.loc[:,~demographic_3_joined.columns.duplicated()].copy()

In [None]:
#4. Join comorbidities scores 

query4 = f"""
SELECT DISTINCT demographic_3_joined.*,
      cohort_demographic_4.*
FROM demographic_3_joined  As demographic_3_joined
LEFT JOIN cohort_demographic_4 AS cohort_demographic_4
ON demographic_3_joined.stay_id = cohort_demographic_4.stay_id
""" 

In [None]:
#Run query
demographic_4_joined  = sqldf(query4, globals())

In [None]:
#Remove duplicate Ids columns 
demographic_4_joined = demographic_4_joined.loc[:,~demographic_4_joined.columns.duplicated()].copy()

In [None]:
#5. Join vital signs 

query5 = f"""
SELECT DISTINCT demographic_4_joined.*,
     cohort_vitalsigns.*
FROM demographic_4_joined  As demographic_4_joined
LEFT JOIN cohort_vitalsigns AS cohort_vitalsigns 
ON demographic_4_joined.stay_id = cohort_vitalsigns .stay_id
""" 

In [None]:
#Run query
vitalsigns_joined  = sqldf(query5, globals())

In [None]:
#Remove duplicate Ids columns 
vitalsigns_joined = vitalsigns_joined.loc[:,~vitalsigns_joined.columns.duplicated()].copy()

In [None]:
#6. Join lab tests 1 

query6 = f"""
SELECT DISTINCT vitalsigns_joined.*,
     cohort_lab_tests.*
FROM vitalsigns_joined As vitalsigns_joined
LEFT JOIN cohort_lab_tests AS cohort_lab_tests
ON vitalsigns_joined.stay_id = cohort_lab_tests.stay_id
""" 

In [None]:
#Run query
lab_test_joined  = sqldf(query6, globals())

In [None]:
#Remove duplicate Ids columns 
lab_test_joined = lab_test_joined.loc[:,~lab_test_joined.columns.duplicated()].copy()

In [None]:
#7. Join lab tests 2

query7 = f"""
SELECT DISTINCT lab_test_joined.*,
     cohort_comorbidities_gcs.*
FROM lab_test_joined As lab_test_joined
LEFT JOIN cohort_comorbidities_gcs AS cohort_comorbidities_gcs
ON lab_test_joined.stay_id = cohort_comorbidities_gcs.stay_id
""" 

In [None]:
#Run query
cohort_gcs_joined  = sqldf(query7, globals())

In [None]:
#Remove duplicate Ids columns 
cohort_gcs_joined  = cohort_gcs_joined.loc[:,~cohort_gcs_joined.columns.duplicated()].copy()

In [None]:
#8. Join comorbiditiy score 1 

query8 = f"""
SELECT DISTINCT cohort_gcs_joined.*,
     cohort_comorbidities_sofa.*
FROM cohort_gcs_joined As cohort_gcs_joined
LEFT JOIN cohort_comorbidities_sofa AS cohort_comorbidities_sofa
ON cohort_gcs_joined.stay_id = cohort_comorbidities_sofa.stay_id
""" 


In [None]:
#Run query
cohort_sofa_joined = sqldf(query8, globals())

In [None]:
#Remove duplicate Ids columns 
cohort_sofa_joined = cohort_sofa_joined.loc[:,~cohort_sofa_joined.columns.duplicated()].copy()

In [None]:
#9. Join comorbiditiy score 2

query9 = f"""
SELECT DISTINCT cohort_sofa_joined.*,
    cohort_ventilation_status.*
FROM cohort_sofa_joined As cohort_sofa_joined
LEFT JOIN cohort_ventilation_status AS cohort_ventilation_status
ON cohort_sofa_joined.stay_id = cohort_ventilation_status.stay_id
""" 


In [None]:
#Run query
cohort_ventilation_joined  = sqldf(query9, globals())

In [None]:
#Remove duplicate Ids columns 
cohort_ventilation_joined  = cohort_ventilation_joined.loc[:,~cohort_ventilation_joined.columns.duplicated()].copy()

In [None]:
#10. Join comorbiditiy score 3

query10 = f"""
SELECT DISTINCT cohort_ventilation_joined.*,
     cohort_urine_output.*
FROM cohort_ventilation_joined As cohort_ventilation_joined
LEFT JOIN cohort_urine_output AS cohort_urine_output
ON cohort_ventilation_joined.stay_id = cohort_urine_output.stay_id
""" 

In [None]:
#Run query
cohort_urine_joined  = sqldf(query10, globals())

In [None]:
#Remove duplicate Ids columns 
cohort_urine_joined = cohort_urine_joined.loc[:,~cohort_urine_joined.columns.duplicated()].copy()

In [None]:
# 11. Join ventilation status
query11 = f"""
SELECT DISTINCT cohort_urine_joined.*,
     cohort_vasopressin.*
FROM cohort_urine_joined As cohort_urine_joined
LEFT JOIN cohort_vasopressin AS cohort_vasopressin
ON cohort_urine_joined.stay_id = cohort_vasopressin.stay_id
""" 


In [None]:
#Run query
cohort_vasopressin_joined = sqldf(query11, globals())

In [None]:
#Remove duplicate Ids columns 
cohort_vasopressin_joined = cohort_vasopressin_joined.loc[:,~cohort_vasopressin_joined.columns.duplicated()].copy()

### **Final result table**

In [None]:
initial_cohort = cohort_vasopressin_joined.copy()

In [None]:
initial_cohort.to_csv('/content/drive/MyDrive/initial_cohort_final_v02.csv')

In [None]:
pd.set_option('display.max_rows',None)
initial_cohort.isna().sum()

subject_id                   0
hadm_id                      0
stay_id                      0
n_stays                      0
sum_los                      0
anchor_age                   0
gender                       0
suspected_infection       7568
admission_type               0
weight                    1376
Height_chart              6253
heart_rate_min              30
heart_rate_max              30
temperature_min           1750
temperature_max           1750
ABPm_min                  7791
ABPm_max                  7791
ABPd_min                  7850
ABPd_max                  7850
ABPs_min                  7852
ABPs_max                  7852
NBPm_min                  2347
NBPm_max                  2347
NBPd_min                  2351
NBPd_max                  2351
NBPs_min                  2350
NBPs_max                  2350
RR_min                      51
RR_max                      51
SpO2_min                    33
SpO2_max                    33
albumin_min               9808
albumin_

In [None]:
len(initial_cohort)

15929