In [56]:
import duckdb
duckdb.sql("ATTACH 'dbname=mimic4 user=szu004' AS db (TYPE POSTGRES, READ_ONLY)")
duckdb.sql("USE db")

BinderException: Binder Error: Failed to attach database: database with name "db" already exists

In [None]:
# ALL ICU stays (paper reports 765,40)
duckdb.sql("SELECT COUNT(*) as icu_stay_count FROM mimiciv_derived.icustay_detail")

In [None]:
# ALL Unique Patients (paper reports 53,150)
duckdb.sql("SELECT COUNT(DISTINCT subject_id) as patient_count FROM mimiciv_derived.icustay_detail")

In [None]:
# Patients with documented enthicity
duckdb.sql("""
WITH patients AS (
    SELECT subject_id,
    CASE
        WHEN race LIKE 'ASIAN%' THEN 'ASIAN'
        WHEN race LIKE 'BLACK%' THEN 'BLACK'
        WHEN race LIKE 'WHITE%' THEN 'WHITE'
        WHEN race LIKE 'HISPANIC%' THEN 'HISPANIC'
        ELSE 'UNKNOWN'
        END AS race_category
    FROM mimiciv_derived.icustay_detail WHERE first_icu_stay AND first_hosp_stay)
SELECT race_category, COUNT(*) as patient_count FROM patients WHERE RACE_CATEGORY != 'UNKNOWN' GROUP BY race_category ORDER BY race_category
""")

In [57]:
patient_with_race = duckdb.sql("""
WITH patients AS (
    SELECT *,
    CASE
        WHEN race LIKE 'ASIAN%' THEN 'ASIAN'
        WHEN race LIKE 'BLACK%' THEN 'BLACK'
        WHEN race LIKE 'WHITE%' THEN 'WHITE'
        WHEN race LIKE 'HISPANIC%' THEN 'HISPANIC'
        ELSE 'UNKNOWN'
        END AS race_category
    FROM mimiciv_derived.icustay_detail WHERE first_icu_stay AND first_hosp_stay)
SELECT subject_id, stay_id, icu_intime, icu_outtime, race_category  FROM patients WHERE race_category != 'UNKNOWN'
""")
patient_with_race

┌────────────┬──────────┬─────────────────────┬─────────────────────┬───────────────┐
│ subject_id │ stay_id  │     icu_intime      │     icu_outtime     │ race_category │
│   int32    │  int32   │      timestamp      │      timestamp      │    varchar    │
├────────────┼──────────┼─────────────────────┼─────────────────────┼───────────────┤
│   10000032 │ 39553978 │ 2180-07-23 14:00:00 │ 2180-07-23 23:50:47 │ WHITE         │
│   10000980 │ 39765666 │ 2189-06-27 08:42:00 │ 2189-06-27 20:38:27 │ BLACK         │
│   10001217 │ 37067082 │ 2157-11-20 19:18:02 │ 2157-11-21 22:08:00 │ WHITE         │
│   10001725 │ 31205490 │ 2110-04-11 15:52:22 │ 2110-04-12 23:59:56 │ WHITE         │
│   10001884 │ 37510196 │ 2131-01-11 04:20:05 │ 2131-01-20 08:27:30 │ BLACK         │
│   10002155 │ 33685454 │ 2129-08-04 12:45:00 │ 2129-08-10 17:02:38 │ WHITE         │
│   10002348 │ 32610785 │ 2112-11-30 23:24:00 │ 2112-12-10 18:25:13 │ WHITE         │
│   10002428 │ 33987268 │ 2156-04-12 16:24:18 │ 2156-0

In [58]:
# Make sure that patients are unique
duckdb.sql("""
SELECT subject_id, COUNT(*) as patient_count FROM patient_with_race GROUP BY subject_id HAVING COUNT(*) > 1
""")

┌────────────┬───────────────┐
│ subject_id │ patient_count │
│   int32    │     int64     │
├────────────┴───────────────┤
│           0 rows           │
└────────────────────────────┘

In [59]:
# Make sure that patients are unique
duckdb.sql("""
SELECT COUNT(*) FROM patient_with_race
""")

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│        42075 │
└──────────────┘

In [60]:
# Compute patients index period and select valid parient
stays_with_interventions = duckdb.sql("""
WITH first_vent_intervention AS (
    SELECT stay_id, MIN(starttime) as  starttime FROM mimiciv_derived.ventilation
    WHERE ventilation_status NOT in ('None', 'SupplementalOxygen') GROUP BY stay_id)
SELECT pwr.*,v.* FROM patient_with_race AS pwr
LEFT OUTER JOIN first_vent_intervention AS v ON pwr.stay_id = v.stay_id
""")
patient_with_index_period=duckdb.sql("""
WITH patient_with_period AS (SELECT subject_id, stay_id, race_category, 
icu_intime AS ip_starttime, 
LEAST(icu_outtime, starttime, icu_intime + interval '5 days') AS ip_endtime FROM stays_with_interventions)
SELECT *,(ip_endtime-ip_starttime) AS ip_duration FROM patient_with_period 
WHERE (ip_endtime-ip_starttime) >= interval '12 hours'
""")
patient_with_index_period

┌────────────┬──────────┬───────────────┬─────────────────────┬─────────────────────┬─────────────────┐
│ subject_id │ stay_id  │ race_category │    ip_starttime     │     ip_endtime      │   ip_duration   │
│   int32    │  int32   │    varchar    │      timestamp      │      timestamp      │    interval     │
├────────────┼──────────┼───────────────┼─────────────────────┼─────────────────────┼─────────────────┤
│   10001217 │ 37067082 │ WHITE         │ 2157-11-20 19:18:02 │ 2157-11-21 22:08:00 │ 1 day 02:49:58  │
│   10001725 │ 31205490 │ WHITE         │ 2110-04-11 15:52:22 │ 2110-04-12 23:59:56 │ 1 day 08:07:34  │
│   10002155 │ 33685454 │ WHITE         │ 2129-08-04 12:45:00 │ 2129-08-09 12:45:00 │ 5 days          │
│   10002348 │ 32610785 │ WHITE         │ 2112-11-30 23:24:00 │ 2112-12-05 23:24:00 │ 5 days          │
│   10002428 │ 33987268 │ WHITE         │ 2156-04-12 16:24:18 │ 2156-04-17 15:57:08 │ 4 days 23:32:50 │
│   10002443 │ 35044219 │ WHITE         │ 2183-10-18 00:47:00 │ 

In [61]:
duckdb.sql("""
SELECT COUNT( subject_id) FROM patient_with_index_period
""")

┌───────────────────┐
│ count(subject_id) │
│       int64       │
├───────────────────┤
│             26521 │
└───────────────────┘

In [62]:
patients_with_flow = duckdb.sql("""
SELECT pwr.subject_id, pwr.race_category,od.charttime as flow_time, od.o2_flow FROM patient_with_index_period AS pwr
JOIN mimiciv_derived.oxygen_delivery AS od ON  pwr.stay_id = od.stay_id
WHERE od.o2_delivery_device_1 = 'Nasal cannula' AND od.charttime BETWEEN pwr.ip_starttime AND pwr.ip_endtime AND od.o2_flow IS NOT NULL
ORDER BY pwr.subject_id, od.charttime
""")
patients_with_flow

┌────────────┬───────────────┬─────────────────────┬─────────┐
│ subject_id │ race_category │      flow_time      │ o2_flow │
│   int32    │    varchar    │      timestamp      │ double  │
├────────────┼───────────────┼─────────────────────┼─────────┤
│   10001217 │ WHITE         │ 2157-11-20 19:47:00 │     2.0 │
│   10001217 │ WHITE         │ 2157-11-20 22:00:00 │     2.0 │
│   10001217 │ WHITE         │ 2157-11-21 00:00:00 │     2.0 │
│   10001217 │ WHITE         │ 2157-11-21 02:00:00 │     2.0 │
│   10001217 │ WHITE         │ 2157-11-21 08:00:00 │     2.0 │
│   10001217 │ WHITE         │ 2157-11-21 18:00:00 │     3.0 │
│   10001217 │ WHITE         │ 2157-11-21 20:00:00 │     4.0 │
│   10001725 │ WHITE         │ 2110-04-11 16:07:00 │     2.0 │
│   10001725 │ WHITE         │ 2110-04-11 20:00:00 │     2.0 │
│   10001725 │ WHITE         │ 2110-04-12 02:00:00 │     2.0 │
│       ·    │   ·           │          ·          │      ·  │
│       ·    │   ·           │          ·          │   

In [63]:
duckdb.sql("""
SELECT subject_id, COUNT(*) AS flow_count FROM patients_with_flow GROUP BY subject_id ORDER BY flow_count DESC
""")

┌────────────┬────────────┐
│ subject_id │ flow_count │
│   int32    │   int64    │
├────────────┼────────────┤
│   17552487 │         48 │
│   18679942 │         44 │
│   11153132 │         43 │
│   13280145 │         43 │
│   15022555 │         42 │
│   12806413 │         41 │
│   19411951 │         40 │
│   18881392 │         40 │
│   19717191 │         40 │
│   19203810 │         39 │
│       ·    │          · │
│       ·    │          · │
│       ·    │          · │
│   18795090 │          4 │
│   18809075 │          4 │
│   18844748 │          4 │
│   18942549 │          4 │
│   18955379 │          4 │
│   19005505 │          4 │
│   19067228 │          4 │
│   19075481 │          4 │
│   19101726 │          4 │
│   19148949 │          4 │
├────────────┴────────────┤
│ ? rows        2 columns │
└─────────────────────────┘

In [64]:
# Save the data
#patients_with_flow.to_csv('../data/patients_with_flow.csv')

### Sp02

In [67]:
patients_with_spo2 = duckdb.sql("""
SELECT pwr.subject_id, pwr.race_category,vs.charttime as read_time, vs.spo2 FROM patient_with_index_period AS pwr
JOIN mimiciv_derived.vitalsign AS vs ON  pwr.stay_id = vs.stay_id
WHERE vs.charttime BETWEEN pwr.ip_starttime AND pwr.ip_endtime AND vs.spo2 IS NOT NULL
ORDER BY pwr.subject_id, vs.charttime
""")
patients_with_spo2

┌────────────┬───────────────┬─────────────────────┬────────┐
│ subject_id │ race_category │      read_time      │  spo2  │
│   int32    │    varchar    │      timestamp      │ double │
├────────────┼───────────────┼─────────────────────┼────────┤
│   10001217 │ WHITE         │ 2157-11-20 19:19:00 │   99.0 │
│   10001217 │ WHITE         │ 2157-11-20 20:00:00 │   98.0 │
│   10001217 │ WHITE         │ 2157-11-20 21:00:00 │   98.0 │
│   10001217 │ WHITE         │ 2157-11-20 22:00:00 │   95.0 │
│   10001217 │ WHITE         │ 2157-11-20 23:00:00 │   98.0 │
│   10001217 │ WHITE         │ 2157-11-21 00:00:00 │   94.0 │
│   10001217 │ WHITE         │ 2157-11-21 01:00:00 │   93.0 │
│   10001217 │ WHITE         │ 2157-11-21 02:00:00 │   98.0 │
│   10001217 │ WHITE         │ 2157-11-21 03:00:00 │   98.0 │
│   10001217 │ WHITE         │ 2157-11-21 04:00:00 │   98.0 │
│       ·    │   ·           │          ·          │     ·  │
│       ·    │   ·           │          ·          │     ·  │
│       

In [70]:
patients_with_so2 = duckdb.sql("""
SELECT pwr.subject_id, pwr.race_category,bg.charttime as read_time, bg.so2 FROM patient_with_index_period AS pwr
JOIN mimiciv_derived.bg AS bg ON  pwr.subject_id = bg.subject_id
WHERE bg.charttime BETWEEN pwr.ip_starttime AND pwr.ip_endtime AND bg.so2 IS NOT NULL AND bg.specimen = 'ART.'
ORDER BY pwr.subject_id, bg.charttime
""")
patients_with_so2

┌────────────┬───────────────┬─────────────────────┬────────┐
│ subject_id │ race_category │      read_time      │  so2   │
│   int32    │    varchar    │      timestamp      │ double │
├────────────┼───────────────┼─────────────────────┼────────┤
│   10002155 │ WHITE         │ 2129-08-05 07:48:00 │   94.0 │
│   10002443 │ WHITE         │ 2183-10-18 02:35:00 │   90.0 │
│   10004401 │ WHITE         │ 2144-01-27 02:39:00 │   95.0 │
│   10005817 │ WHITE         │ 2132-12-15 16:34:00 │   97.0 │
│   10005817 │ WHITE         │ 2132-12-15 18:47:00 │   97.0 │
│   10005817 │ WHITE         │ 2132-12-15 20:20:00 │   95.0 │
│   10008454 │ WHITE         │ 2110-12-03 11:51:00 │   88.0 │
│   10008454 │ WHITE         │ 2110-12-03 13:05:00 │   98.0 │
│   10013049 │ WHITE         │ 2114-06-20 12:57:00 │   96.0 │
│   10013569 │ WHITE         │ 2167-11-29 09:34:00 │   93.0 │
│       ·    │   ·           │          ·          │     ·  │
│       ·    │   ·           │          ·          │     ·  │
│       

In [71]:
# Save the data
patients_with_spo2.to_csv('../data/patients_with_spo2.csv')
patients_with_so2.to_csv('../data/patients_with_so2.csv')