<a href="https://colab.research.google.com/github/johnsonjzhou/comp90089-project/blob/main/sql/initial_cohort.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **COMP90089 Final Project**

## **Selecting Initial Patient Cohort from MIMICIV**

In [50]:
from google.colab import drive

drive.mount('/content/drive')
path = '/content/drive/My Drive'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [51]:
#Set up the environement

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import functools as ft
%matplotlib inline


#Project_ID
project_id = "mimic-iv-projects" 

# Read data from BigQuery into pandas dataframes.
def run_query(query, project_id=project_id):
    return pd.io.gbq.read_gbq(
        query,
        project_id=project_id,
        dialect='standard')


In [52]:
#Select patients from ICU_stays based on: n_stay >= 1 and average los >= 3.3 per 	hadm_id:

c1= f"""
SELECT
    icustays.subject_id As subject_id,
    icustays.hadm_id As hadm_id,
    count(icustays.stay_id) As n_stays,
    avg(icustays.los) As avg_los
FROM
  `physionet-data.mimiciv_icu.icustays` AS icustays
GROUP BY
    subject_id,	hadm_id
HAVING 
   n_stays >= 1 AND avg_los >= 3.3
ORDER BY
    subject_id,	hadm_id
"""

In [53]:
cohort_c1 = f"""
    SELECT icustays.stay_id As stay_id,
    c1.*
    FROM  ({c1}) As c1
    INNER JOIN `physionet-data.mimiciv_icu.icustays` AS icustays
    ON icustays.hadm_id = c1.hadm_id
"""

In [54]:
#Select patients from cohort_c1 based on the age range (18 - 90) and survival status (dod is null) criteria from Table: hosp.patients :
cohort_q1= f"""
      SELECT cohort_c1.*,
            patients.gender As gender,
            patients.anchor_age As anchor_age
      FROM ({cohort_c1}) As cohort_c1
      INNER JOIN `physionet-data.mimiciv_hosp.patients` As patients
        ON cohort_c1.subject_id = patients.subject_id AND (patients.anchor_age BETWEEN 18 AND 90) AND  patients.dod IS NULL
        ORDER BY
          patients.subject_id
        """

In [55]:
#Demographic_1
# Presence of infection
# Table: mimiciv_derived.suspicion_of_infection
# Presence of infection (suspected_infection = 1)

demographic_1 = f"""
    SELECT suspicion_of_infection.subject_id As subject_id,
          suspicion_of_infection.suspected_infection As suspected_infection,
    FROM `physionet-data.mimiciv_derived.suspicion_of_infection` As suspicion_of_infection
      WHERE suspicion_of_infection.suspected_infection = 1
"""

In [56]:
#Demographic_2
# Table: mimiciv_hosp.admissions
# Type of admission (admission_type)

#Note: Some patients have more than one admission type

demographic_2 = f"""
    SELECT  DISTINCT admission.subject_id As subject_id,
            admission.hadm_id As hadm_id,
            admission.admission_type As admission_type
    FROM `physionet-data.mimiciv_hosp.admissions` As admission
"""

In [57]:
##Demographic_3
# Table: mimiciv_derived.first_day_weight
# weight
# Table: mimiciv_derived.first_day_height
# height
demographic_3 = f"""
    SELECT DISTINCT first_day_weight.subject_id As subject_id,
            first_day_weight.stay_id As stay_id,
            first_day_weight.weight As weight,
            first_day_height.height As height,
    FROM `physionet-data.mimiciv_derived.first_day_weight` as first_day_weight
    LEFT JOIN `physionet-data.mimiciv_derived.first_day_height` As first_day_height
    ON first_day_weight.stay_id = first_day_height.stay_id
   """

In [58]:
#Cohort_Demographic
demographic = f"""
    SELECT DISTINCT demographic_2.*, 
                    demographic_1.suspected_infection,
                    demographic_3.weight,
                    demographic_3.stay_id,
                    demographic_3.height
   FROM ({demographic_2}) As  demographic_2
   INNER JOIN ({demographic_1}) As  demographic_1
     ON demographic_2.subject_id = demographic_1.subject_id
   INNER JOIN ({demographic_3}) As  demographic_3
     ON demographic_1.subject_id = demographic_3.subject_id
    """

In [59]:
#Cohort_Demographic

cohort_q2 = f"""
        SELECT DISTINCT cohort_q1.*,
                        demographic.admission_type,
                        demographic.suspected_infection,
                        demographic.weight,
                        demographic.height
        FROM ({cohort_q1}) As cohort_q1
        INNER JOIN ({demographic}) As demographic
         ON cohort_q1.hadm_id = demographic.hadm_id
        """

In [60]:
# Vital signs
#Table: mimiciv_derived.vitalsign

#heart rate(heart_rate)
#respiratory_rate (resp_rate)
#body_temp (temperature)
# Invasive mean arterial pressure (mbp)
# Invasive systolic blood pressure (sbp)
# Invasive diastolic blood pressure (dbp)
# Oxygen saturation (SPO2)
# Non-invasive mean arterial pressure(mbp_ni)
# Non-invasive Systolic blood pressure(sbp_ni)
# Non-invasive Diastolic blood pressure(dbp_ni)

cohort_vitalsigns = f"""
SELECT DISTINCT cohort_q2.*,
       vitalsigns.subject_id As subject_id,
       vitalsigns.stay_id As stay_id,
       vitalsigns.heart_rate As heart_rate,
       vitalsigns.resp_rate As resp_rate,
       vitalsigns.temperature As temperature,
       vitalsigns.mbp As mbp,
       vitalsigns.sbp As sbp,
       vitalsigns.dbp As dbp,
       vitalsigns.mbp_ni As mbp_ni,
       vitalsigns.sbp_ni As sbp_ni,
       vitalsigns.dbp_ni As dbp_ni,
       vitalsigns.spo2 As spo2
 FROM ({cohort_q2}) As cohort_q2
 INNER JOIN `physionet-data.mimiciv_derived.vitalsign`  As vitalsigns 
 ON cohort_q2.stay_id = vitalsigns.stay_id 

"""

In [None]:
q = run_query(cohort_vitalsigns)

In [None]:

# Laboratory tests


#Table: mimiciv_derived.first_day_lab

# Albumin(albumin_min, albumin_max)
# Blood urea nitrogen (bun_min, bun_max)
# Calcium (calcium_min, calcium_max)
# creatinine (creatinine_min, creatinine_max)
# Glucose (glucose_min, glucose_max)
# Bicarbonate (bicarbonate_min, bicarbonate_max)
# Potassium (potassium_min, potassium_max)
# Sodium (sodium_min, sodium_max)
# Platelets (platelets_min, platelets_max)
# Bilirubin (bilirubin_total_min, bilirubin_total_max)
# White blood cell count (wbc_min, wbc_max)

#Table: mimiciv_derived.first_day_bg

# Lactate (lactate_min, lactate_max)
# pH (ph_min, ph_max)
# Partial pressure of carbon dioxide (pco2_min, pco2_max)
# Partial pressure of Oxygen (po2_min, po2_max)
# PaO2/FiO2 ratio (pao2fio2ratio_min, pao2fio2ratio_max)

#Not Found! 

# Lactate Dehydrogenase,
# Magnesium (Mg), 
#Leukocytes
# Urea

cohort_lab_tests = f"""
SELECT DISTINCT cohort_vitalsigns.*,
       lab_tests.albumin_min As albumin_min,
       lab_tests.albumin_max As albumin_max,
       lab_tests.bun_min As bun_min,
       lab_tests.bun_max As bun_max,
       lab_tests.calcium_min As calcium_min,
       lab_tests.calcium_max As calcium_max,
       lab_tests.creatinine_min As creatinine_min,
       lab_tests.creatinine_max As creatinine_max,
       lab_tests.glucose_min As glucose_min,
       lab_tests.glucose_max As glucose_max,
       lab_tests.bicarbonate_min As bicarbonate_min,
       lab_tests.bicarbonate_max As bicarbonate_max,
       lab_tests.potassium_min As potassium_min,
       lab_tests.potassium_max As potassium_max,
       lab_tests.sodium_min As sodium_min,
       lab_tests.sodium_max As sodium_max,
       lab_tests.platelets_min As platelets_min,
       lab_tests.platelets_max As platelets_max,
       lab_tests.bilirubin_total_min As bilirubin_total_min,
       lab_tests.bilirubin_total_max As bilirubin_total_max,
       lab_tests.wbc_min As wbc_min,
       lab_tests.wbc_max As wbc_max,
       lab_bg_tests.lactate_min As lactate_min,
       lab_bg_tests.lactate_max As lactate_max,
       lab_bg_tests.ph_min As ph_min,
       lab_bg_tests.ph_max As ph_max,
       lab_bg_tests.pco2_min As pco2_min,
       lab_bg_tests.pco2_max As pco2_max,
       lab_bg_tests.po2_min As po2_min,
       lab_bg_tests.po2_max As po2_max,
       lab_bg_tests.pao2fio2ratio_min As pao2fio2ratio_min,
       lab_bg_tests.pao2fio2ratio_max As pao2fio2ratio_max
      
 FROM ({cohort_vitalsigns}) As cohort_vitalsigns
 INNER JOIN `physionet-data.mimiciv_derived.first_day_lab`  As lab_tests
   ON cohort_vitalsigns.stay_id = lab_tests.stay_id
 INNER JOIN `physionet-data.mimiciv_derived.first_day_bg` As lab_bg_tests
 ON lab_tests.stay_id = lab_bg_tests.stay_id
"""


In [None]:
# Comorbidities

# Table: mimiciv_derived.charlson
# Charlson index (charlson_comorbidity_index)


cohort_comorbidities_1 = f"""
SELECT charlson_comorbidity.subject_id As subject_id,
       charlson_comorbidity.charlson_comorbidity_index As charlson

FROM `physionet-data.mimiciv_derived.charlson` As charlson_comorbidity
"""

In [None]:
# Table: mimiciv_derived.gcs
# Glasgow coma scale (gcs)

# Table: mimiciv_derived.first_day_sofa
# Sequential Organ Failure Assessment score (SOFA)

cohort_comorbidities_2 = f"""
SELECT glasgow_coma_scale.subject_id As subject_id,
       glasgow_coma_scale.gcs As gcs,
       first_day_sofa.SOFA As sofa

FROM `physionet-data.mimiciv_derived.gcs` As glasgow_coma_scale
INNER JOIN `physionet-data.mimiciv_derived.first_day_sofa` As first_day_sofa
ON glasgow_coma_scale.subject_id = first_day_sofa.subject_id 
"""

In [None]:
# Device use

# Table: mimiciv_derived.ventilation
# Ventilation status (ventilation_status)

device_use = f"""
SELECT ventilation.stay_id As stay_id,
       ventilation.ventilation_status As ventilation_status
FROM `physionet-data.mimiciv_derived.ventilation` As ventilation
"""

In [None]:
cohort_device_use = f"""
SELECT cohort_c1.*,
       device_use.* 
FROM ({cohort_c1}) as cohort_c1
INNER JOIN ({device_use}) as device_use
ON cohort_c1.stay_id = device_use.stay_id
"""  

In [None]:
run_query(cohort_device_use)

In [None]:
# Input/output: 


# Table: mimiciv_derived.first_day_urine_output 
# Urine output (urineoutput)


cohort_urine_output = f"""
SELECT urine_output.subject_id As subject_id,
       urine_output.urineoutput As urineoutput,           
FROM `physionet-data.mimiciv_derived.first_day_urine_output` As urine_output
"""

In [None]:
# Input/output: 

# Table: mimiciv_derived.vasopressin 
# Use of vasopressors (vaso_rate)

vasopressin = f"""
SELECT vasopressin.stay_id As stay_id,
       vasopressin.vaso_rate As vaso_rate      
FROM `physionet-data.mimiciv_derived.vasopressin` As vasopressin
"""

In [None]:
cohort_vasopressin = f"""
SELECT cohort_c1.*,      
       vasopressin.* 
FROM ({cohort_c1}) as cohort_c1
INNER JOIN ({vasopressin}) as vasopressin
ON cohort_c1.stay_id = vasopressin.stay_id
"""  

In [None]:
final_cohort = run_query(final_query)