In [None]:
pip install tableone

Collecting tableone
  Downloading tableone-0.8.0-py3-none-any.whl (33 kB)
Installing collected packages: tableone
Successfully installed tableone-0.8.0


In [None]:
from collections import OrderedDict

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from tableone import TableOne
import math
from scipy import stats
pd.set_option('display.max_columns', 999)
import pandas.io.sql as psql
import seaborn as sns
import missingno as msn

# Preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn import preprocessing

from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif, chi2, mutual_info_classif, mutual_info_regression

from sklearn.model_selection import train_test_split
%matplotlib inline

In [None]:
from google.colab import auth
from google.cloud import bigquery

  and should_run_async(code)


In [None]:
auth.authenticate_user()

  and should_run_async(code)


## Retrieve and preprocess **drugs and treatment** related data

In [None]:
query = """
WITH dx AS -- sub-query diagnosed information about all admitted patient
(
  SELECT subject_id AS subject_id, hadm_id AS hadm_id, icd_version AS icd_version, TRIM(icd_code) AS icd_code
  FROM `physionet-data.mimiciv_hosp.diagnoses_icd`
), icd9 AS -- check whether type of patient's sick based on icd-9
(
  SELECT dx.subject_id AS subject_id
  , MAX(case when dx.icd_code in ('1960', '1961', '1962', '1963', '1965', '1966', '1968', '1969', '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977', '1978', '1980'
  , '1981', '1982', '1983', '1984', '1985', '1986', '1987', '19882', '19889', '1990') then 1 else 0 end) AS advanced_cancer
  , MAX(case when dx.icd_code in ('200', '201', '202', '203', '204', '205', '206', '207', '208') then 1
             when dx.icd_code LIKE '200%' then 1
             else 0 end) AS hematologic_malignancy
  FROM dx
  WHERE dx.icd_version = 9
  GROUP BY dx.subject_id
), icd10 AS -- check whether type of patient's sick based on icd-10
(
  SELECT dx.subject_id AS subject_id
  , MAX(case when dx.icd_code in ('C770', 'C771', 'C772', 'C773', 'C774', 'C775', 'C778', 'C779', 'C780', 'C7800', 'C7801', 'C7802', 'C781', 'C782', 'C783', 'C7830'
  , 'C7839', 'C784', 'C785', 'C786', 'C787', 'C788', 'C7880', 'C7889', 'C790', 'C7900', 'C7901','C7902', 'C791', 'C7910', 'C7911', 'C7919', 'C792', 'C793', 'C7931'
  , 'C7932', 'C794', 'C7940', 'C7949', 'C795', 'C7951', 'C7952', 'C796', 'C7960', 'C7961', 'C7962', 'C7963', 'C797', 'C7970', 'C7971', 'C7972', 'C798', 'C7981', 'C7982'
  , 'C7989', 'C799', 'C800' ) then 1 else 0 end) AS advanced_cancer
  , MAX(case when dx.icd_code in ('C81', 'C82', 'C83', 'C84', 'C85', 'C86', 'C88', 'C90', 'C91', 'C92', 'C93', 'C94', 'C95', 'C96') then 1
             when dx.icd_code LIKE 'C81%' then 1
             else 0 end) AS hematologic_malignancy
  FROM dx
  WHERE dx.icd_version = 10
  GROUP BY dx.subject_id
), icd_9_10 AS --Get index of metastatic cancer for each icu patient
(
  SELECT
    icu_stays.subject_id AS subject_id, icu_stays.hadm_id AS hadm_id, icu_stays.stay_id AS stay_id, icu_stays.icu_intime AS intime, icu_stays.icu_outtime AS outtime, icu_stays.dod AS dod, icu_stays.hospital_expire_flag AS label_hosp
    , GREATEST(COALESCE(icd9.advanced_cancer, 0), COALESCE(icd10.advanced_cancer, 0)) AS advanced_cancer
    , GREATEST(COALESCE(icd9.hematologic_malignancy, 0), COALESCE(icd10.hematologic_malignancy, 0)) AS hematologic_malignancy,
    CASE
      WHEN icu_stays.dod < icu_stays.icu_outtime THEN 1
      ELSE 0
    END AS label_icu
  FROM `physionet-data.mimiciv_derived.icustay_detail` AS icu_stays
  LEFT JOIN icd9 ON icu_stays.subject_id = icd9.subject_id
  LEFT JOIN icd10 ON icu_stays.subject_id = icd10.subject_id
), inclusion_set AS --Pick patient of metastatic cancer and age between 18 and 89, and return subject_id, hadm_id, stay_id, and intime of ICU
(
  SELECT i_9_10.subject_id AS subject_id, i_9_10.hadm_id AS hadm_id, i_9_10.stay_id AS stay_id, i_9_10.intime AS intime, i_9_10.outtime AS outtime,
  i_9_10.dod AS dod, i_9_10.label_hosp AS label_hosp, i_9_10.label_icu AS label_icu
  FROM icd_9_10 AS i_9_10
  INNER JOIN `physionet-data.mimiciv_hosp.patients` AS patients ON i_9_10.subject_id = patients.subject_id
  WHERE patients.anchor_age >= 18
  AND patients.anchor_age <= 89
  AND (i_9_10.advanced_cancer = 1 OR i_9_10.hematologic_malignancy = 1)
  ORDER BY i_9_10.subject_id, i_9_10.intime
), baseline_level_1 AS
(
  SELECT i_set.subject_id AS subject_id, i_set.hadm_id AS hadm_id, i_set.stay_id AS stay_id, i_set.intime AS intime, Age.age AS Age,
  icu_details.gender, admission.insurance AS insurance, admission.race AS race, admission.admission_type AS admission_type
  FROM inclusion_set AS i_set
  LEFT JOIN `physionet-data.mimiciv_derived.age` AS Age
  ON i_set.subject_id = Age.subject_id AND i_set.hadm_id = Age.hadm_id
  LEFT JOIN `physionet-data.mimiciv_derived.icustay_detail` AS icu_details
  ON i_set.stay_id = icu_details.stay_id
  LEFT JOIN `physionet-data.mimiciv_hosp.admissions` AS admission
  ON i_set.hadm_id = admission.hadm_id
), baseline_level_2 AS
(
  SELECT i_set.subject_id AS subject_id, i_set.hadm_id AS hadm_id, i_set.stay_id AS stay_id, i_set.intime AS intime,
  lods.LODS AS lods, oasis.oasis AS oasis, sapsii.sapsii AS sapsii, sirs.sirs AS sirs, sepsis3.sepsis3 AS sepsis3, meld.meld AS meld
  FROM inclusion_set AS i_set
  LEFT JOIN `physionet-data.mimiciv_derived.lods` AS lods
  ON i_set.stay_id = lods.stay_id
  LEFT JOIN `physionet-data.mimiciv_derived.oasis` AS oasis
  ON i_set.stay_id = oasis.stay_id
  LEFT JOIN `physionet-data.mimiciv_derived.sapsii` AS sapsii
  ON i_set.stay_id = sapsii.stay_id
  LEFT JOIN `physionet-data.mimiciv_derived.sirs` AS sirs
  ON i_set.stay_id = sirs.stay_id
  LEFT JOIN `physionet-data.mimiciv_derived.sepsis3` AS sepsis3
  ON i_set.stay_id = sepsis3.stay_id
  LEFT JOIN `physionet-data.mimiciv_derived.meld` AS meld
  ON i_set.stay_id = meld.stay_id
),

crrt_data AS(
  SELECT DISTINCT i_set.subject_id, i_set.stay_id,
  DATETIME_DIFF(CRRT.charttime, i_set.intime, DAY) AS crrt_day, label_icu, label_hosp

  FROM inclusion_set AS i_set
  LEFT JOIN `physionet-data.mimiciv_derived.crrt` AS CRRT
  ON i_set.stay_id = CRRT.stay_id
),


invasive_line_data as(
  SELECT DISTINCT i_set.subject_id, i_set.stay_id, (case when InvasiveLine.starttime is not null then 1 else 0 end) as invasive_line_label, label_icu,label_hosp
  -- , InvasiveLine.starttime, i_set.intime
  FROM inclusion_set AS i_set
  LEFT JOIN `physionet-data.mimiciv_derived.invasive_line` AS InvasiveLine
  ON i_set.stay_id = InvasiveLine.stay_id
),

rrt_data AS(
  SELECT DISTINCT i_set.subject_id, i_set.stay_id,
  DATETIME_DIFF(RRT.charttime, i_set.intime, DAY) AS rrt_day, label_icu, label_hosp

  FROM inclusion_set AS i_set
  LEFT JOIN `physionet-data.mimiciv_derived.rrt` AS RRT
  ON i_set.stay_id = RRT.stay_id
),

ventilation_data as(
  SELECT DISTINCT i_set.subject_id, i_set.stay_id, (case when Ventilation.starttime is not null then 1 else 0 end) as ventilation_label, label_icu, label_hosp
  -- , InvasiveLine.starttime, i_set.intime
  FROM inclusion_set AS i_set
  LEFT JOIN `physionet-data.mimiciv_derived.ventilation` AS Ventilation
  ON i_set.stay_id = Ventilation.stay_id
),

sedative_data as(
  with include_sedative as(
    SELECT subject_id, stay_id, hadm_id, starttime, endtime, itemid
    FROM `physionet-data.mimiciv_icu.inputevents`
    WHERE itemid IN (221668, 225942, 225972, 221744, 222168)
  )
  SELECT DISTINCT i_set.subject_id, i_set.stay_id, (case when include_sedative.starttime is not null then 1 else 0 end) as sedative_label, label_icu, label_hosp
  FROM inclusion_set AS i_set
  LEFT JOIN include_sedative
  ON i_set.stay_id = include_sedative.stay_id and i_set.subject_id = include_sedative.subject_id
),

antibiotic_data as(
  SELECT DISTINCT i_set.subject_id, i_set.stay_id, (case when Antibiotic.starttime is not null then 1 else 0 end) as antibiotic_label, label_icu, label_hosp
  FROM inclusion_set AS i_set
  LEFT JOIN `physionet-data.mimiciv_derived.antibiotic` as Antibiotic
  ON i_set.stay_id = Antibiotic.stay_id and i_set.subject_id = Antibiotic.subject_id
),

vasoactive_data as(
  with norepinephrine_equivalent_dose as(
    SELECT DISTINCT stay_id, (case when norepinephrine_equivalent_dose is not null then 1 else 0 end) as norepinephrine_equivalent_dose_label,
    FROM `physionet-data.mimiciv_derived.norepinephrine_equivalent_dose`
  )

  SELECT DISTINCT i_set.subject_id, i_set.stay_id,
  (case when dobutamine is not null then 1 else 0 end) as dobutamine_label,
  (case when dopamine is not null then 1 else 0 end) as dopamine_label,
  (case when epinephrine is not null then 1 else 0 end) as epinephrine_label,
  (case when milrinone is not null then 1 else 0 end) as milrinone_label,
  (case when norepinephrine is not null then 1 else 0 end) as norepinephrine_label,
  (case when norepinephrine_equivalent_dose is not null then 1 else 0 end) as norepinephrine_equivalent_dose_label,
  (case when phenylephrine is not null then 1 else 0 end) as phenylephrine_label,
  (case when vasopressin is not null then 1 else 0 end) as vasopressin_label,
  label_icu,
  label_hosp

  FROM inclusion_set AS i_set
  LEFT JOIN `physionet-data.mimiciv_derived.vasoactive_agent` as Vasoactive
  ON i_set.stay_id = Vasoactive.stay_id
  LEFT JOIN physionet-data.mimiciv_derived.norepinephrine_equivalent_dose as Norepinephrine_equivalent_dose
  ON i_set.stay_id = Norepinephrine_equivalent_dose.stay_id
)

"""

  and should_run_async(code)


In [None]:
def get_summary(data, target, subject):  ## used for drugs related data
  temp = {}
  recorded_patients = []

  for index, row in data.iterrows():
    subject_data = row[subject]
    target_data = row[target]
    label = row["label_icu"]
    label2 = row["label_hosp"]

    if subject_data in recorded_patients and target_data != None and not math.isnan(target_data) and target_data <= 1000:
      if target_data == 1:
        temp[subject_data] = [row["subject_id"], target_data ,label, label2]


    else:
      temp[subject_data] = [row["subject_id"],target_data ,label, label2]
      recorded_patients.append(subject_data)


  summary = []

  for subject_data in temp.keys():
    summary.append({'subject_id': temp[subject_data][0],'stay_id': subject_data, target: temp[subject_data][1], "label_icu": temp[subject_data][2],  "label_hosp": temp[subject_data][3]})

  return pd.DataFrame(summary)

  and should_run_async(code)


In [None]:
def get_summary2(data, target, subject): ## used for treatment related data
  temp = {}
  recorded_patients = []

  for index, row in data.iterrows():
    subject_data = row[subject]
    target_data = row[target]
    label = row["label_icu"]
    label2 = row["label_hosp"]

    if subject_data in recorded_patients and target_data != None and not math.isnan(target_data) and target_data <= 1000:
      if target_data >= 1:
        temp[subject_data] = [1 ,label, label2]


    else:
      flag = 0;
      if subject_data in recorded_patients and target_data != None and not math.isnan(target_data):
        flag = 1;

      else:
        flag = 0;

      temp[subject_data] = [flag ,label, label2]
      recorded_patients.append(subject_data)


  summary = []

  for subject_data in temp.keys():
    summary.append({'stay_id': subject_data, target: temp[subject_data][0], "label_icu": temp[subject_data][1],  "label_hosp": temp[subject_data][2]})

  return pd.DataFrame(summary)

  and should_run_async(code)


In [None]:
baseline3_data = pd.read_gbq(query + """
SELECT *
FROM invasive_line_data
""", "mimic-401105")

  and should_run_async(code)


In [None]:
print("Baseline3")
# print(baseline3_data)
baseline3_data = get_summary(baseline3_data, target = "invasive_line_label", subject = "stay_id")
baseline3_data.to_csv("invasive_line.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_icu", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5900,718,
"subject_id, mean (SD)",,0.0,15023287.6 (2868154.5),15017672.7 (2872403.3),15069426.7 (2834558.7),0.645
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35003572.4 (2852957.5),34891240.8 (2927351.6),0.331
"invasive_line_label, n (%)",0.0,0.0,2659 (40.2),2480 (42.0),179 (24.9),<0.001
"invasive_line_label, n (%)",1.0,,3959 (59.8),3420 (58.0),539 (75.1),
"label_hosp, n (%)",0.0,0.0,5397 (81.6),5387 (91.3),10 (1.4),<0.001
"label_hosp, n (%)",1.0,,1221 (18.4),513 (8.7),708 (98.6),


In [None]:
print("Baseline3")
# print(baseline3_data)
baseline3_data = get_summary(baseline3_data, target = "invasive_line_label", subject = "stay_id")
baseline3_data.to_csv("invasive_line.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_hosp", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5397,1221,
"subject_id, mean (SD)",,0.0,15023287.6 (2868154.5),15022223.7 (2864143.4),15027990.5 (2886991.8),0.950
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35005508.1 (2848887.5),34928960.3 (2914888.3),0.405
"invasive_line_label, n (%)",0.0,0.0,2659 (40.2),2259 (41.9),400 (32.8),<0.001
"invasive_line_label, n (%)",1.0,,3959 (59.8),3138 (58.1),821 (67.2),
"label_icu, n (%)",0.0,0.0,5900 (89.2),5387 (99.8),513 (42.0),<0.001
"label_icu, n (%)",1.0,,718 (10.8),10 (0.2),708 (58.0),


In [None]:
baseline3_data = pd.read_gbq(query + """
SELECT *
FROM ventilation_data
""", "mimic-401105")

print("Baseline3")
baseline3_data = get_summary(baseline3_data, target = "ventilation_label", subject = "stay_id")
baseline3_data.to_csv("ventilation.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_icu", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5900,718,
"subject_id, mean (SD)",,0.0,15023287.6 (2868154.5),15017672.7 (2872403.3),15069426.7 (2834558.7),0.645
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35003572.4 (2852957.5),34891240.8 (2927351.6),0.331
"ventilation_label, n (%)",0.0,0.0,1670 (25.2),1584 (26.8),86 (12.0),<0.001
"ventilation_label, n (%)",1.0,,4948 (74.8),4316 (73.2),632 (88.0),
"label_hosp, n (%)",0.0,0.0,5397 (81.6),5387 (91.3),10 (1.4),<0.001
"label_hosp, n (%)",1.0,,1221 (18.4),513 (8.7),708 (98.6),


In [None]:
baseline3_data = pd.read_gbq(query + """
SELECT *
FROM ventilation_data
""", "mimic-401105")

print("Baseline3")
baseline3_data = get_summary(baseline3_data, target = "ventilation_label", subject = "stay_id")
baseline3_data.to_csv("ventilation.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_hosp", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5397,1221,
"subject_id, mean (SD)",,0.0,15023287.6 (2868154.5),15022223.7 (2864143.4),15027990.5 (2886991.8),0.950
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35005508.1 (2848887.5),34928960.3 (2914888.3),0.405
"ventilation_label, n (%)",0.0,0.0,1670 (25.2),1492 (27.6),178 (14.6),<0.001
"ventilation_label, n (%)",1.0,,4948 (74.8),3905 (72.4),1043 (85.4),
"label_icu, n (%)",0.0,0.0,5900 (89.2),5387 (99.8),513 (42.0),<0.001
"label_icu, n (%)",1.0,,718 (10.8),10 (0.2),708 (58.0),


In [None]:
# print("Baseline3")
# # print(baseline3_data)
# baseline3_data = get_summary(baseline3_data, target = "invasive_line_label", subject = "stay_id")
# baseline3_data.to_csv("invasive_line.csv")
# baseline3_table = TableOne(data = baseline3_data, groupby = "label_icu", pval = True, pval_adjust=False)
# display(baseline3_table)

  and should_run_async(code)


In [None]:
baseline3_data = pd.read_gbq(query + """
SELECT *
FROM sedative_data
""", "mimic-401105")

print("Baseline3")
baseline3_data = get_summary(baseline3_data, target = "sedative_label", subject = "stay_id")
baseline3_data.to_csv("sedative.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_icu", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5900,718,
"subject_id, mean (SD)",,0.0,15023287.6 (2868154.5),15017672.7 (2872403.3),15069426.7 (2834558.7),0.645
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35003572.4 (2852957.5),34891240.8 (2927351.6),0.331
"sedative_label, n (%)",0.0,0.0,4526 (68.4),4237 (71.8),289 (40.3),<0.001
"sedative_label, n (%)",1.0,,2092 (31.6),1663 (28.2),429 (59.7),
"label_hosp, n (%)",0.0,0.0,5397 (81.6),5387 (91.3),10 (1.4),<0.001
"label_hosp, n (%)",1.0,,1221 (18.4),513 (8.7),708 (98.6),


In [None]:
print("Baseline3")
baseline3_data = get_summary(baseline3_data, target = "sedative_label", subject = "stay_id")
baseline3_data.to_csv("sedative.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_hosp", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5397,1221,
"subject_id, mean (SD)",,0.0,15023287.6 (2868154.5),15022223.7 (2864143.4),15027990.5 (2886991.8),0.950
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35005508.1 (2848887.5),34928960.3 (2914888.3),0.405
"sedative_label, n (%)",0.0,0.0,4526 (68.4),3842 (71.2),684 (56.0),<0.001
"sedative_label, n (%)",1.0,,2092 (31.6),1555 (28.8),537 (44.0),
"label_icu, n (%)",0.0,0.0,5900 (89.2),5387 (99.8),513 (42.0),<0.001
"label_icu, n (%)",1.0,,718 (10.8),10 (0.2),708 (58.0),


In [None]:
baseline3_data = pd.read_gbq(query + """
SELECT *
FROM antibiotic_data
""", "mimic-401105")

print("Baseline3")
baseline3_data = get_summary(baseline3_data, target = "antibiotic_label", subject = "stay_id")
baseline3_data.to_csv("antibiotic.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_icu", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5900,718,
"subject_id, mean (SD)",,0.0,15023287.6 (2868154.5),15017672.7 (2872403.3),15069426.7 (2834558.7),0.645
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35003572.4 (2852957.5),34891240.8 (2927351.6),0.331
"antibiotic_label, n (%)",0.0,0.0,2218 (33.5),2109 (35.7),109 (15.2),<0.001
"antibiotic_label, n (%)",1.0,,4400 (66.5),3791 (64.3),609 (84.8),
"label_hosp, n (%)",0.0,0.0,5397 (81.6),5387 (91.3),10 (1.4),<0.001
"label_hosp, n (%)",1.0,,1221 (18.4),513 (8.7),708 (98.6),


In [None]:
print("Baseline3")
baseline3_data = get_summary(baseline3_data, target = "antibiotic_label", subject = "stay_id")
baseline3_data.to_csv("antibiotic.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_hosp", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5397,1221,
"subject_id, mean (SD)",,0.0,15023287.6 (2868154.5),15022223.7 (2864143.4),15027990.5 (2886991.8),0.950
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35005508.1 (2848887.5),34928960.3 (2914888.3),0.405
"antibiotic_label, n (%)",0.0,0.0,2218 (33.5),1957 (36.3),261 (21.4),<0.001
"antibiotic_label, n (%)",1.0,,4400 (66.5),3440 (63.7),960 (78.6),
"label_icu, n (%)",0.0,0.0,5900 (89.2),5387 (99.8),513 (42.0),<0.001
"label_icu, n (%)",1.0,,718 (10.8),10 (0.2),708 (58.0),


In [None]:

baseline3_data = pd.read_gbq(query + """
SELECT *
FROM vasoactive_data
""", "mimic-401105")

print("Baseline3")
baseline3_data = get_summary(baseline3_data, target = "dobutamine_label", subject = "stay_id")
baseline3_data.to_csv("dobutamine.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_icu", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5900,718,
"subject_id, mean (SD)",,0.0,15023287.6 (2868154.5),15017672.7 (2872403.3),15069426.7 (2834558.7),0.645
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35003572.4 (2852957.5),34891240.8 (2927351.6),0.331
"dobutamine_label, n (%)",0.0,0.0,6576 (99.4),5880 (99.7),696 (96.9),<0.001
"dobutamine_label, n (%)",1.0,,42 (0.6),20 (0.3),22 (3.1),
"label_hosp, n (%)",0.0,0.0,5397 (81.6),5387 (91.3),10 (1.4),<0.001
"label_hosp, n (%)",1.0,,1221 (18.4),513 (8.7),708 (98.6),


In [None]:
print("Baseline3")
baseline3_data = get_summary(baseline3_data, target = "dobutamine_label", subject = "stay_id")
baseline3_data.to_csv("dobutamine.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_hosp", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5397,1221,
"subject_id, mean (SD)",,0.0,15023287.6 (2868154.5),15022223.7 (2864143.4),15027990.5 (2886991.8),0.950
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35005508.1 (2848887.5),34928960.3 (2914888.3),0.405
"dobutamine_label, n (%)",0.0,0.0,6576 (99.4),5381 (99.7),1195 (97.9),<0.001
"dobutamine_label, n (%)",1.0,,42 (0.6),16 (0.3),26 (2.1),
"label_icu, n (%)",0.0,0.0,5900 (89.2),5387 (99.8),513 (42.0),<0.001
"label_icu, n (%)",1.0,,718 (10.8),10 (0.2),708 (58.0),


In [None]:

baseline3_data = pd.read_gbq(query + """
SELECT *
FROM vasoactive_data
""", "mimic-401105")

print("Baseline3")
baseline3_data = get_summary(baseline3_data, target = "dopamine_label", subject = "stay_id")
baseline3_data.to_csv("dopamine.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_icu", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5900,718,
"subject_id, mean (SD)",,0.0,15023287.6 (2868154.5),15017672.7 (2872403.3),15069426.7 (2834558.7),0.645
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35003572.4 (2852957.5),34891240.8 (2927351.6),0.331
"dopamine_label, n (%)",0.0,0.0,6500 (98.2),5833 (98.9),667 (92.9),<0.001
"dopamine_label, n (%)",1.0,,118 (1.8),67 (1.1),51 (7.1),
"label_hosp, n (%)",0.0,0.0,5397 (81.6),5387 (91.3),10 (1.4),<0.001
"label_hosp, n (%)",1.0,,1221 (18.4),513 (8.7),708 (98.6),


In [None]:
print("Baseline3")
baseline3_data = get_summary(baseline3_data, target = "dopamine_label", subject = "stay_id")
baseline3_data.to_csv("dopamine.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_hosp", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5397,1221,
"subject_id, mean (SD)",,0.0,15023287.6 (2868154.5),15022223.7 (2864143.4),15027990.5 (2886991.8),0.950
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35005508.1 (2848887.5),34928960.3 (2914888.3),0.405
"dopamine_label, n (%)",0.0,0.0,6500 (98.2),5336 (98.9),1164 (95.3),<0.001
"dopamine_label, n (%)",1.0,,118 (1.8),61 (1.1),57 (4.7),
"label_icu, n (%)",0.0,0.0,5900 (89.2),5387 (99.8),513 (42.0),<0.001
"label_icu, n (%)",1.0,,718 (10.8),10 (0.2),708 (58.0),


In [None]:

baseline3_data = pd.read_gbq(query + """
SELECT *
FROM vasoactive_data
""", "mimic-401105")

print("Baseline3")
baseline3_data = get_summary(baseline3_data, target = "epinephrine_label", subject = "stay_id")
baseline3_data.to_csv("epinephrine.csv")
baseline3_table = TableOne(data = baseline3_data, groupby ="label_icu", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5900,718,
"subject_id, mean (SD)",,0.0,15023287.6 (2868154.5),15017672.7 (2872403.3),15069426.7 (2834558.7),0.645
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35003572.4 (2852957.5),34891240.8 (2927351.6),0.331
"epinephrine_label, n (%)",0.0,0.0,6516 (98.5),5854 (99.2),662 (92.2),<0.001
"epinephrine_label, n (%)",1.0,,102 (1.5),46 (0.8),56 (7.8),
"label_hosp, n (%)",0.0,0.0,5397 (81.6),5387 (91.3),10 (1.4),<0.001
"label_hosp, n (%)",1.0,,1221 (18.4),513 (8.7),708 (98.6),


In [None]:
print("Baseline3")
baseline3_data = get_summary(baseline3_data, target = "epinephrine_label", subject = "stay_id")
baseline3_data.to_csv("epinephrine.csv")
baseline3_table = TableOne(data = baseline3_data, groupby ="label_hosp", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5397,1221,
"subject_id, mean (SD)",,0.0,15023287.6 (2868154.5),15022223.7 (2864143.4),15027990.5 (2886991.8),0.950
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35005508.1 (2848887.5),34928960.3 (2914888.3),0.405
"epinephrine_label, n (%)",0.0,0.0,6516 (98.5),5352 (99.2),1164 (95.3),<0.001
"epinephrine_label, n (%)",1.0,,102 (1.5),45 (0.8),57 (4.7),
"label_icu, n (%)",0.0,0.0,5900 (89.2),5387 (99.8),513 (42.0),<0.001
"label_icu, n (%)",1.0,,718 (10.8),10 (0.2),708 (58.0),


In [None]:
baseline3_data = pd.read_gbq(query + """
SELECT *
FROM vasoactive_data
""", "mimic-401105")

print("Baseline3")
baseline3_data = get_summary(baseline3_data, target = "milrinone_label", subject = "stay_id")
baseline3_data.to_csv("milrinone.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_icu", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5900,718,
"subject_id, mean (SD)",,0.0,15023287.6 (2868154.5),15017672.7 (2872403.3),15069426.7 (2834558.7),0.645
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35003572.4 (2852957.5),34891240.8 (2927351.6),0.331
"milrinone_label, n (%)",0.0,0.0,6598 (99.7),5885 (99.7),713 (99.3),0.058
"milrinone_label, n (%)",1.0,,20 (0.3),15 (0.3),5 (0.7),
"label_hosp, n (%)",0.0,0.0,5397 (81.6),5387 (91.3),10 (1.4),<0.001
"label_hosp, n (%)",1.0,,1221 (18.4),513 (8.7),708 (98.6),


In [None]:
print("Baseline3")
baseline3_data = get_summary(baseline3_data, target = "milrinone_label", subject = "stay_id")
baseline3_data.to_csv("milrinone.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_hosp", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5397,1221,
"subject_id, mean (SD)",,0.0,15023287.6 (2868154.5),15022223.7 (2864143.4),15027990.5 (2886991.8),0.950
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35005508.1 (2848887.5),34928960.3 (2914888.3),0.405
"milrinone_label, n (%)",0.0,0.0,6598 (99.7),5382 (99.7),1216 (99.6),0.397
"milrinone_label, n (%)",1.0,,20 (0.3),15 (0.3),5 (0.4),
"label_icu, n (%)",0.0,0.0,5900 (89.2),5387 (99.8),513 (42.0),<0.001
"label_icu, n (%)",1.0,,718 (10.8),10 (0.2),708 (58.0),


In [None]:
baseline3_data = pd.read_gbq(query + """
SELECT *
FROM vasoactive_data
""", "mimic-401105")

print("Baseline3")
baseline3_data = get_summary(baseline3_data, target = "norepinephrine_equivalent_dose_label", subject = "stay_id")
baseline3_data.to_csv("norepinephrine_equivalent_dose.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_icu", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5900,718,
"subject_id, mean (SD)",,0.0,15023287.6 (2868154.5),15017672.7 (2872403.3),15069426.7 (2834558.7),0.645
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35003572.4 (2852957.5),34891240.8 (2927351.6),0.331
"norepinephrine_equivalent_dose_label, n (%)",1.0,0.0,6618 (100.0),5900 (100.0),718 (100.0),1.000
"label_hosp, n (%)",0.0,0.0,5397 (81.6),5387 (91.3),10 (1.4),<0.001
"label_hosp, n (%)",1.0,,1221 (18.4),513 (8.7),708 (98.6),


In [None]:
baseline3_data = pd.read_gbq(query + """
SELECT *
FROM vasoactive_data
""", "mimic-401105")

print("Baseline3")
baseline3_data = get_summary(baseline3_data, target = "norepinephrine_equivalent_dose_label", subject = "stay_id")
baseline3_data.to_csv("norepinephrine_equivalent_dose_label.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_icu", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5900,718,
"subject_id, mean (SD)",,0.0,15023287.6 (2868154.5),15017672.7 (2872403.3),15069426.7 (2834558.7),0.645
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35003572.4 (2852957.5),34891240.8 (2927351.6),0.331
"norepinephrine_equivalent_dose_label, n (%)",1.0,0.0,6618 (100.0),5900 (100.0),718 (100.0),1.000
"label_hosp, n (%)",0.0,0.0,5397 (81.6),5387 (91.3),10 (1.4),<0.001
"label_hosp, n (%)",1.0,,1221 (18.4),513 (8.7),708 (98.6),


In [None]:
print("Baseline3")
baseline3_data = get_summary(baseline3_data, target = "norepinephrine_equivalent_dose_label", subject = "stay_id")
baseline3_data.to_csv("norepinephrine_equivalent_dose_label.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_hosp", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5397,1221,
"subject_id, mean (SD)",,0.0,15023287.6 (2868154.5),15022223.7 (2864143.4),15027990.5 (2886991.8),0.950
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35005508.1 (2848887.5),34928960.3 (2914888.3),0.405
"norepinephrine_equivalent_dose_label, n (%)",1.0,0.0,6618 (100.0),5397 (100.0),1221 (100.0),1.000
"label_icu, n (%)",0.0,0.0,5900 (89.2),5387 (99.8),513 (42.0),<0.001
"label_icu, n (%)",1.0,,718 (10.8),10 (0.2),708 (58.0),


In [None]:
baseline3_data = pd.read_gbq(query + """
SELECT *
FROM vasoactive_data
""", "mimic-401105")

print("Baseline3")
baseline3_data = get_summary(baseline3_data, target = "norepinephrine_label", subject = "stay_id")
baseline3_data.to_csv("norepinephrine.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_icu", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5900,718,
"subject_id, mean (SD)",,0.0,15023287.6 (2868154.5),15017672.7 (2872403.3),15069426.7 (2834558.7),0.645
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35003572.4 (2852957.5),34891240.8 (2927351.6),0.331
"norepinephrine_label, n (%)",0.0,0.0,5420 (81.9),5092 (86.3),328 (45.7),<0.001
"norepinephrine_label, n (%)",1.0,,1198 (18.1),808 (13.7),390 (54.3),
"label_hosp, n (%)",0.0,0.0,5397 (81.6),5387 (91.3),10 (1.4),<0.001
"label_hosp, n (%)",1.0,,1221 (18.4),513 (8.7),708 (98.6),


In [None]:
baseline3_data = pd.read_gbq(query + """
SELECT *
FROM vasoactive_data
""", "mimic-401105")

print("Baseline3")
baseline3_data = get_summary(baseline3_data, target = "phenylephrine_label", subject = "stay_id")
baseline3_data.to_csv("phenylephrine.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_hosp", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5397,1221,
"subject_id, mean (SD)",,0.0,15023287.6 (2868154.5),15022223.7 (2864143.4),15027990.5 (2886991.8),0.950
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35005508.1 (2848887.5),34928960.3 (2914888.3),0.405
"phenylephrine_label, n (%)",0.0,0.0,5703 (86.2),4759 (88.2),944 (77.3),<0.001
"phenylephrine_label, n (%)",1.0,,915 (13.8),638 (11.8),277 (22.7),
"label_icu, n (%)",0.0,0.0,5900 (89.2),5387 (99.8),513 (42.0),<0.001
"label_icu, n (%)",1.0,,718 (10.8),10 (0.2),708 (58.0),


In [None]:
print("Baseline3")
baseline3_data = get_summary(baseline3_data, target = "phenylephrine_label", subject = "stay_id")
baseline3_data.to_csv("phenylephrine.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_hosp", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5397,1221,
"subject_id, mean (SD)",,0.0,15023287.6 (2868154.5),15022223.7 (2864143.4),15027990.5 (2886991.8),0.950
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35005508.1 (2848887.5),34928960.3 (2914888.3),0.405
"phenylephrine_label, n (%)",0.0,0.0,5703 (86.2),4759 (88.2),944 (77.3),<0.001
"phenylephrine_label, n (%)",1.0,,915 (13.8),638 (11.8),277 (22.7),
"label_icu, n (%)",0.0,0.0,5900 (89.2),5387 (99.8),513 (42.0),<0.001
"label_icu, n (%)",1.0,,718 (10.8),10 (0.2),708 (58.0),


In [None]:
baseline3_data = pd.read_gbq(query + """
SELECT *
FROM vasoactive_data
""", "mimic-401105")

print("Baseline3")
baseline3_data = get_summary(baseline3_data, target = "vasopressin_label", subject = "stay_id")
baseline3_data.to_csv("vasopressin.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_icu", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5900,718,
"subject_id, mean (SD)",,0.0,15023287.6 (2868154.5),15017672.7 (2872403.3),15069426.7 (2834558.7),0.645
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35003572.4 (2852957.5),34891240.8 (2927351.6),0.331
"vasopressin_label, n (%)",0.0,0.0,6249 (94.4),5740 (97.3),509 (70.9),<0.001
"vasopressin_label, n (%)",1.0,,369 (5.6),160 (2.7),209 (29.1),
"label_hosp, n (%)",0.0,0.0,5397 (81.6),5387 (91.3),10 (1.4),<0.001
"label_hosp, n (%)",1.0,,1221 (18.4),513 (8.7),708 (98.6),


In [None]:
print("Baseline3")
baseline3_data = get_summary(baseline3_data, target = "vasopressin_label", subject = "stay_id")
baseline3_data.to_csv("vasopressin.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_hosp", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5397,1221,
"subject_id, mean (SD)",,0.0,15023287.6 (2868154.5),15022223.7 (2864143.4),15027990.5 (2886991.8),0.950
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35005508.1 (2848887.5),34928960.3 (2914888.3),0.405
"vasopressin_label, n (%)",0.0,0.0,6249 (94.4),5257 (97.4),992 (81.2),<0.001
"vasopressin_label, n (%)",1.0,,369 (5.6),140 (2.6),229 (18.8),
"label_icu, n (%)",0.0,0.0,5900 (89.2),5387 (99.8),513 (42.0),<0.001
"label_icu, n (%)",1.0,,718 (10.8),10 (0.2),708 (58.0),


In [None]:
baseline3_data = pd.read_gbq(query + """
SELECT *
FROM vasoactive_data
""", "mimic-401105")

print("Baseline3")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_icu", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,9039,7344,1695,
"subject_id, mean (SD)",,0.0,15025949.6 (2861953.7),15004438.5 (2872577.9),15119151.7 (2814395.7),0.132
"stay_id, mean (SD)",,0.0,34959412.6 (2861573.3),34994494.4 (2846179.0),34807412.0 (2923330.3),0.017
"dobutamine_label, n (%)",0.0,0.0,8963 (99.2),7314 (99.6),1649 (97.3),<0.001
"dobutamine_label, n (%)",1.0,,76 (0.8),30 (0.4),46 (2.7),
"dopamine_label, n (%)",0.0,0.0,8860 (98.0),7248 (98.7),1612 (95.1),<0.001
"dopamine_label, n (%)",1.0,,179 (2.0),96 (1.3),83 (4.9),
"epinephrine_label, n (%)",0.0,0.0,8821 (97.6),7242 (98.6),1579 (93.2),<0.001
"epinephrine_label, n (%)",1.0,,218 (2.4),102 (1.4),116 (6.8),
"milrinone_label, n (%)",0.0,0.0,8975 (99.3),7294 (99.3),1681 (99.2),0.630


In [None]:
baseline3_data = pd.read_gbq(query + """
SELECT *
FROM vasoactive_data
""", "mimic-401105")

print("Baseline3")
baseline3_data = get_summary(baseline3_data, target = "norepinephrine_label", subject = "stay_id")
baseline3_data.to_csv("norepinephrine.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_hosp", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5397,1221,
"subject_id, mean (SD)",,0.0,15023287.6 (2868154.5),15022223.7 (2864143.4),15027990.5 (2886991.8),0.950
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35005508.1 (2848887.5),34928960.3 (2914888.3),0.405
"norepinephrine_label, n (%)",0.0,0.0,5420 (81.9),4664 (86.4),756 (61.9),<0.001
"norepinephrine_label, n (%)",1.0,,1198 (18.1),733 (13.6),465 (38.1),
"label_icu, n (%)",0.0,0.0,5900 (89.2),5387 (99.8),513 (42.0),<0.001
"label_icu, n (%)",1.0,,718 (10.8),10 (0.2),708 (58.0),


In [None]:
baseline3_data = pd.read_gbq(query + """
SELECT *
FROM crrt_data
""", "mimic-401105")

print("Baseline3")
baseline3_data = get_summary2(baseline3_data, target = "crrt_day", subject = "stay_id")
# baseline3_data.to_csv("norepinephrine.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_icu", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5900,718,
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35003572.4 (2852957.5),34891240.8 (2927351.6),0.331
"crrt_day, n (%)",0.0,0.0,6534 (98.7),5854 (99.2),680 (94.7),<0.001
"crrt_day, n (%)",1.0,,84 (1.3),46 (0.8),38 (5.3),
"label_hosp, n (%)",0.0,0.0,5397 (81.6),5387 (91.3),10 (1.4),<0.001
"label_hosp, n (%)",1.0,,1221 (18.4),513 (8.7),708 (98.6),


In [None]:
print("Baseline3")
baseline3_data = get_summary2(baseline3_data, target = "crrt_day", subject = "stay_id")
# baseline3_data.to_csv("norepinephrine.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_hosp", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5397,1221,
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35005508.1 (2848887.5),34928960.3 (2914888.3),0.405
"crrt_day, n (%)",0.0,0.0,6618 (100.0),5397 (100.0),1221 (100.0),1.000
"label_icu, n (%)",0.0,0.0,5900 (89.2),5387 (99.8),513 (42.0),<0.001
"label_icu, n (%)",1.0,,718 (10.8),10 (0.2),708 (58.0),


In [None]:
baseline3_data = pd.read_gbq(query + """
SELECT *
FROM rrt_data
""", "mimic-401105")

print("Baseline3")
baseline3_data = get_summary2(baseline3_data, target = "rrt_day", subject = "stay_id")
# baseline3_data.to_csv("norepinephrine.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_icu", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu,Grouped by label_icu
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5900,718,
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35003572.4 (2852957.5),34891240.8 (2927351.6),0.331
"rrt_day, n (%)",0.0,0.0,6446 (97.4),5779 (97.9),667 (92.9),<0.001
"rrt_day, n (%)",1.0,,172 (2.6),121 (2.1),51 (7.1),
"label_hosp, n (%)",0.0,0.0,5397 (81.6),5387 (91.3),10 (1.4),<0.001
"label_hosp, n (%)",1.0,,1221 (18.4),513 (8.7),708 (98.6),


In [None]:
baseline3_data = pd.read_gbq(query + """
SELECT *
FROM rrt_data
""", "mimic-401105")
print("Baseline3")
baseline3_data = get_summary2(baseline3_data, target = "rrt_day", subject = "stay_id")
# baseline3_data.to_csv("norepinephrine.csv")
baseline3_table = TableOne(data = baseline3_data, groupby = "label_hosp", pval = True, pval_adjust=False)
display(baseline3_table)

  and should_run_async(code)


Baseline3


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp,Grouped by label_hosp
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,0,1,P-Value
n,,,6618,5397,1221,
"stay_id, mean (SD)",,0.0,34991385.3 (2861110.5),35005508.1 (2848887.5),34928960.3 (2914888.3),0.405
"rrt_day, n (%)",0.0,0.0,6446 (97.4),5295 (98.1),1151 (94.3),<0.001
"rrt_day, n (%)",1.0,,172 (2.6),102 (1.9),70 (5.7),
"label_icu, n (%)",0.0,0.0,5900 (89.2),5387 (99.8),513 (42.0),<0.001
"label_icu, n (%)",1.0,,718 (10.8),10 (0.2),708 (58.0),


## Preprocess data for **first day** and **measurement**
require files:


*   first_day.csv
*   measurement.csv



In [None]:
data = pd.read_csv("first_day.csv")
data_measurement = pd.read_csv("measurement.csv")

### Remove Outliers

In [None]:
def remove_outlier(data, include = []):
    new_data = []
    mean = data.mean()
    std = data.std()

    for index, row in data.iterrows():
        temp = []

        for key in data.keys():
            if key not in include:
                temp.append(row[key])
                continue

            if ((row[key] - mean[key]) / std[key] > 2):
                print(f"{key}: {row[key]}")
                temp.append(None)

            else:
                temp.append(row[key])
        new_data.append(temp)

    return pd.DataFrame(new_data, columns = data.keys())

In [None]:
new_data = remove_outlier(data, list(data.keys()))

### Fill missing data

In [None]:
def fill_missing(data, exclude_flag = True, exclude = [], include = [], type = "by_median", target = "", value = 0):
    ## exclude_flag: True if want to exclude a list, False is want to specify include instead
    ## type: "by_target" or "by_median"

    new_data = []

    if type == "by_median":
        d_median = {}
        for key in data.keys():
            if ((exclude_flag and key in exclude) or (not exclude_flag and key not in include)):
                continue

            median = data[key].median()
            print(f"MEDIAN of  '{key}' = {median}")
            d_median[key] = median

        for index, row in data.iterrows():
            temp = []
            for key in data.keys():
                if ((exclude_flag and key in exclude) or (not exclude_flag and key not in include)):
                    temp.append(row[key])
                    continue

                if row[key] == None or math.isnan(row[key]):
                    temp.append(d_median[key])

                else:
                    temp.append(row[key])
            new_data.append(temp)

    elif type == "by_target":
        d_target = {}
        recorded = []
        for index, row in data.iterrows():
            ID = row[target]
            if (row[target] in recorded) or (row[include[0]] == None or math.isnan(row[include[0]])):
                continue
            d_target[ID] = row[include[0]]
        # print(d_target)
        for index, row in data.iterrows():
            temp = row;
            if (d_target.get(row[target]) == None):
                new_data.append(temp)
                continue

            if (row[include[0]] == None or math.isnan(row[include[0]])):
                temp[include[0]] = d_target[row[target]]

            new_data.append(temp)

    elif type == "by_value":

        for inc in include:
            data[inc].fillna(value, inplace=True)

        new_data = data

    elif type == "by_majority":
        d_data = {}
        for index, row in data.iterrows():

            if (row[include[0]] == None):
                continue

            if (d_data.get(row[include[0]]) == None):
                d_data[row[include[0]]] = 1

            else:
                d_data[row[include[0]]] += 1

        max_val = ""
        max_count = 0

        for key in d_data:
            if d_data[key] > max_count:
                max_count = d_data[key]
                max_val = key

        print(max_val)
        data[include[0]].fillna(max_val, inplace=True)
        new_data = data

    return pd.DataFrame(new_data, columns = data.keys())

In [None]:
new_data = fill_missing(new_data, exclude_flag = False, include = ["height"], target = "subject_id", type = "by_target")
new_data = fill_missing(new_data, exclude_flag = False, include = ["weight"], target = "subject_id", type = "by_target")
new_data = fill_missing(new_data, exclude_flag = True, exclude = ["subject_id", "stay_id"], type = "by_median")

In [None]:
new_data = new_data.astype({'subject_id':'int', 'stay_id':'int','dialysis_present':'int', 'SOFA': 'int'})
new_data.to_csv("fill missing/first_day.csv", index = False)

In [None]:
new_data = fill_missing(data_measurement, exclude_flag = False, include = ["crp"], type = "by_value", value = 0)
new_data = fill_missing(new_data, exclude_flag = False, include = ["o2_delivery_device_1"], type = "by_majority", value = 0)
new_data = fill_missing(new_data, exclude_flag = False, include = ["heart_rhythm"], type = "by_majority", value = 0)
new_data = fill_missing(new_data, exclude_flag = False, include = ["ectopy_type", "ectopy_frequency", "ectopy_frequency_secondary", "ectopy_type_secondary"], type = "by_value", value = "None")

In [None]:
new_data.to_csv("fill missing/measurement.csv", index = False)

## Calculate p-value for all data in baseline

required file:


*   baseline_processed.csv: contains all preprocessed data in baseline



In [None]:

data = pd.read_csv("baseline_processed.csv")
baseline_table = TableOne(data =data.drop(columns = ['label_icu',  'hosp_timestep_back', 'icu_timestep_back', 'stay_id']), groupby = "label_hosp", pval = True, pval_adjust=False)
print(baseline_table)
# write.csv(baseline_table, file = "TestTable_icu.csv")
baseline_table.to_excel('mytable.xlsx')

  and should_run_async(code)


FileNotFoundError: ignored