<a href="https://colab.research.google.com/github/johnsonjzhou/comp90089-project/blob/main/initial_cohort.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **COMP90089 Final Project**

## **Selecting Initial Patient Cohort from MIMICIV**

In [3]:
from google.colab import drive

drive.mount('/content/drive')
path = '/content/drive/My Drive'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [1]:
#Set up the environement

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import functools as ft
%matplotlib inline


#Project_ID
project_id = "mimic-iv-projects" 

# Read data from BigQuery into pandas dataframes.
def run_query(query, project_id=project_id):
    return pd.io.gbq.read_gbq(
        query,
        project_id=project_id,
        dialect='standard')


In [2]:
#Select patients from ICU_stays based on the n_stay and average los:

cohort_c1= f"""
SELECT
    icustays.subject_id AS subject_id,
    icustays.stay_id As stay_id,
    icustays.hadm_id AS hadm_id,
    count(icustays.stay_id) As n_stays,
    avg(icustays.los) As avg_los
FROM
  `physionet-data.mimiciv_icu.icustays` AS icustays
GROUP BY
    subject_id, hadm_id, stay_id
HAVING 
   n_stays >= 1 AND avg_los >= 3.3
ORDER BY
    subject_id, hadm_id
"""

In [3]:
#Select patients from hosp based on age range:
cohort_c2= f"""
SELECT patients.subject_id As subject_id,
       patients.gender As gender,
       patients.anchor_age As anchor_age
FROM `physionet-data.mimiciv_hosp.patients` As patients
WHERE (anchor_age BETWEEN 18 AND 90) 
AND
dod IS NULL
ORDER BY
    subject_id
"""

In [4]:
cohort_criteria = pd.merge(run_query(cohort_c1),run_query(cohort_c1), on='subject_id', how='inner')

In [None]:
cohort_criteria

In [5]:
#Demographic
# Presence of infection
# Table: mimiciv_derived.suspicion_of_infection
# Presence of infection (suspected_infection)

cohort_demographic_1 = f"""
SELECT suspicion_of_infection.subject_id As subject_id,
       suspicion_of_infection.suspected_infection As suspected_infection,
FROM `physionet-data.mimiciv_derived.suspicion_of_infection` As suspicion_of_infection
   WHERE suspicion_of_infection.suspected_infection = 1
"""

In [6]:
cohort_demographic_1 = run_query(cohort_demographic_1)

In [7]:
# Table: mimiciv_hosp.admissions
# Type of admission (admission_type)

#Note: Some patients have more than one admission type

cohort_demographic_2 = f"""
 SELECT  admission.subject_id As subject_id,
         admission.admission_type As admission_type,
         first_day_weight.weight As weight,
         first_day_height.height As height

 FROM `physionet-data.mimiciv_hosp.admissions` As admission 
 INNER JOIN `physionet-data.mimiciv_derived.first_day_weight` as first_day_weight
 ON admission.subject_id = first_day_weight.subject_id
 INNER JOIN `physionet-data.mimiciv_derived.first_day_height` As first_day_height
 ON first_day_weight.subject_id = first_day_height.subject_id
"""

In [8]:
cohort_demographic_2 = run_query(cohort_demographic_2)

In [9]:
# Vital signs
#Table: mimiciv_derived.first_day_vitalsign

#heart rate(heart_rate)
#respiratory_rate (resp_rate)
#body_temp (temperature)
# Invasive mean arterial pressure (mbp_mean)
# Invasive systolic blood pressure (sbp_mean)
# Invasive diastolic blood pressure (dbp_mean)
# Oxygen saturation (SPO2)

# Note: (Non-invasive pressure) Not found in the first_day_vitalsign table
# Non-invasive mean arterial pressure(mbp_ni)
# Non-invasive Systolic blood pressure(sbp_ni)
# Non-invasive Diastolic blood pressure(dbp_ni)

cohort_vital_signs = f"""
SELECT vitalsigns.subject_id As subject_id,
       vitalsigns.heart_rate_mean As heart_rate,
       vitalsigns.resp_rate_mean As resp_rate,
       vitalsigns.temperature_mean As temperature,
       vitalsigns.mbp_mean As mbp,
       vitalsigns.sbp_mean As sbp,
       vitalsigns.dbp_mean As dbp,
       vitalsigns.mbp_mean As mbp,
       vitalsigns.spo2_mean As spo2
 FROM `physionet-data.mimiciv_derived.first_day_vitalsign`  As vitalsigns 
"""

In [10]:
cohort_vital_signs = run_query(cohort_vital_signs)

In [11]:

# Laboratory tests


#Table: mimiciv_derived.first_day_lab

# Albumin(albumin_min, albumin_max)
# Blood urea nitrogen (bun_min, bun_max)
# Calcium (calcium_min, calcium_max)
# creatinine (creatinine_min, creatinine_max)
# Glucose (glucose_min, glucose_max)
# Bicarbonate (bicarbonate_min, bicarbonate_max)
# Potassium (potassium_min, potassium_max)
# Sodium (sodium_min, sodium_max)
# Platelets (platelets_min, platelets_max)
# Bilirubin (bilirubin_total_min, bilirubin_total_max)
# White blood cell count (wbc_min, wbc_max)

#Table: mimiciv_derived.first_day_bg

# Lactate (lactate_min, lactate_max)
# pH (ph_min, ph_max)
# Partial pressure of carbon dioxide (pco2_min, pco2_max)
# Partial pressure of Oxygen (po2_min, po2_max)
# PaO2/FiO2 ratio (pao2fio2ratio_min, pao2fio2ratio_max)

#Not Found! 

# Lactate Dehydrogenase,
# Magnesium (Mg), 
#Leukocytes
# Urea

cohort_lab_tests = f"""
SELECT lab_tests.subject_id As subject_id,
       lab_tests.albumin_min As albumin_min,
       lab_tests.albumin_max As albumin_max,
       lab_tests.bun_min As bun_min,
       lab_tests.bun_max As bun_max,
       lab_tests.calcium_min As calcium_min,
       lab_tests.calcium_max As calcium_max,
       lab_tests.creatinine_min As creatinine_min,
       lab_tests.creatinine_max As creatinine_max,
       lab_tests.glucose_min As glucose_min,
       lab_tests.glucose_max As glucose_max,
       lab_tests.bicarbonate_min As bicarbonate_min,
       lab_tests.bicarbonate_max As bicarbonate_max,
       lab_tests.potassium_min As potassium_min,
       lab_tests.potassium_max As potassium_max,
       lab_tests.sodium_min As sodium_min,
       lab_tests.sodium_max As sodium_max,
       lab_tests.platelets_min As platelets_min,
       lab_tests.platelets_max As platelets_max,
       lab_tests.bilirubin_total_min As bilirubin_total_min,
       lab_tests.bilirubin_total_max As bilirubin_total_max,
       lab_tests.wbc_min As wbc_min,
       lab_tests.wbc_max As wbc_max,
       lab_bg_tests.lactate_min As lactate_min,
       lab_bg_tests.lactate_max As lactate_max,
       lab_bg_tests.ph_min As ph_min,
       lab_bg_tests.ph_max As ph_max,
       lab_bg_tests.pco2_min As pco2_min,
       lab_bg_tests.pco2_max As pco2_max,
       lab_bg_tests.po2_min As po2_min,
       lab_bg_tests.po2_max As po2_max,
       lab_bg_tests.pao2fio2ratio_min As pao2fio2ratio_min,
       lab_bg_tests.pao2fio2ratio_max As pao2fio2ratio_max
      
 FROM `physionet-data.mimiciv_derived.first_day_lab`  As lab_tests
 INNER JOIN `physionet-data.mimiciv_derived.first_day_bg` As lab_bg_tests
 ON lab_tests.subject_id = lab_bg_tests.subject_id
"""


In [12]:
cohort_lab_tests = run_query(cohort_lab_tests)

In [None]:
cohort_lab_tests

In [13]:
# Comorbidities

# Table: mimiciv_derived.charlson
# Charlson index (charlson_comorbidity_index)


cohort_comorbidities_1 = f"""
SELECT charlson_comorbidity.subject_id As subject_id,
       charlson_comorbidity.charlson_comorbidity_index As charlson

FROM `physionet-data.mimiciv_derived.charlson` As charlson_comorbidity
"""

In [14]:
cohort_comorbidities_1 = run_query(cohort_comorbidities_1)

In [None]:
cohort_comorbidities_1

In [15]:
# Table: mimiciv_derived.gcs
# Glasgow coma scale (gcs)

# Table: mimiciv_derived.first_day_sofa
# Sequential Organ Failure Assessment score (SOFA)

cohort_comorbidities_2 = f"""
SELECT glasgow_coma_scale.subject_id As subject_id,
       glasgow_coma_scale.gcs As gcs,
       first_day_sofa.SOFA As sofa

FROM `physionet-data.mimiciv_derived.gcs` As glasgow_coma_scale
INNER JOIN `physionet-data.mimiciv_derived.first_day_sofa` As first_day_sofa
ON glasgow_coma_scale.subject_id = first_day_sofa.subject_id 
"""

In [16]:
cohort_comorbidities_2 = run_query(cohort_comorbidities_2)

In [None]:
cohort_comorbidities_2

In [17]:
# Device use

# Table: mimiciv_derived.ventilation
# Ventilation status (ventilation_status)

device_use = f"""
SELECT ventilation.stay_id As stay_id,
       ventilation.ventilation_status As ventilation_status
FROM `physionet-data.mimiciv_derived.ventilation` As ventilation
"""

In [18]:
cohort_device_use = f"""
SELECT cohort_c1.*,
       device_use.* 
FROM ({cohort_c1}) as cohort_c1
INNER JOIN ({device_use}) as device_use
ON cohort_c1.stay_id = device_use.stay_id
"""  

In [19]:
cohort_device_use = run_query(cohort_device_use)

In [20]:
# Input/output: 


# Table: mimiciv_derived.first_day_urine_output 
# Urine output (urineoutput)


cohort_urine_output = f"""
SELECT urine_output.subject_id As subject_id,
       urine_output.urineoutput As urineoutput,           
FROM `physionet-data.mimiciv_derived.first_day_urine_output` As urine_output
"""

In [21]:
cohort_urine_output = run_query(cohort_urine_output)

In [22]:
# Input/output: 

# Table: mimiciv_derived.vasopressin 
# Use of vasopressors (vaso_rate)

vasopressin = f"""
SELECT vasopressin.stay_id As stay_id,
       vasopressin.vaso_rate As vaso_rate      
FROM `physionet-data.mimiciv_derived.vasopressin` As vasopressin
"""

In [23]:
cohort_vasopressin = f"""
SELECT cohort_c1.*,      
       vasopressin.* 
FROM ({cohort_c1}) as cohort_c1
INNER JOIN ({vasopressin}) as vasopressin
ON cohort_c1.stay_id = vasopressin.stay_id
"""  

In [24]:
cohort_vasopressin = run_query(cohort_vasopressin)

In [None]:
cohort_vasopressin

In [33]:
dfs_features = [cohort_criteria, cohort_demographic_1, cohort_demographic_2, cohort_vital_signs, cohort_lab_tests, cohort_comorbidities_1, cohort_comorbidities_2, cohort_device_use, cohort_urine_output, cohort_vasopressin]

In [None]:
#Inner merge dfs_features:


In [28]:
# final_query = f"""
#       SELECT cohort_c1.*,
#              cohort_c2.*,
#              cohort_demographic_1.*,
#              cohort_demographic_2.*,
#              cohort_vital_signs.*,
#              cohort_lab_tests.*,
#              cohort_comorbidities_1.*,
#              cohort_comorbidities_2.*,
#              cohort_device_use.*,
#              cohort_urine_output.*,
#              cohort_vasopressin.*
#       FROM ({cohort_c1}) As cohort_c1
#       INNER JOIN ({cohort_c2}) As cohort_c2
#       ON cohort_c1.subject_id = cohort_c2.subject_id
#       INNER JOIN ({cohort_demographic_1}) As cohort_demographic_1
#       ON cohort_c2.subject_id = cohort_demographic_1.subject_id
#       INNER JOIN ({cohort_demographic_2}) as cohort_demographic_2
#       ON cohort_demographic_1.subject_id = cohort_demographic_2.subject_id
#       INNER JOIN ({cohort_vital_signs}) As cohort_vital_signs
#       ON cohort_demographic_2.subject_id = cohort_vital_signs.subject_id
#       INNER JOIN ({cohort_lab_tests}) As cohort_lab_tests
#       ON cohort_vital_signs.subject_id = cohort_lab_tests.subject_id 
#       INNER JOIN ({cohort_comorbidities_1}) As cohort_comorbidities_1
#       ON cohort_lab_tests.subject_id  = cohort_comorbidities_1.subject_id
#       INNER JOIN ({cohort_comorbidities_2}) As cohort_comorbidities_2
#       ON cohort_comorbidities_1.subject_id  = cohort_comorbidities_2.subject_id
#       INNER JOIN ({cohort_device_use}) As cohort_device_use
#       ON  cohort_comorbidities_2.subject_id = cohort_device_use.subject_id
#       INNER JOIN ({cohort_urine_output}) As cohort_urine_output
#       ON cohort_device_use.subject_id = cohort_urine_output.subject_id 
#       INNER JOIN ({cohort_vasopressin}) As cohort_vasopressin
#       ON cohort_urine_output.subject_id  = cohort_vasopressin.subject_id
#       """