In [1]:
import duckdb
duckdb.sql("ATTACH 'dbname=mimic4 user=szu004' AS db (TYPE POSTGRES, READ_ONLY)")
duckdb.sql("USE db")

In [2]:
# ALL ICU stays (paper reports 765,40)
duckdb.sql("SELECT COUNT(*) as icu_stay_count FROM mimiciv_derived.icustay_detail")

┌────────────────┐
│ icu_stay_count │
│     int64      │
├────────────────┤
│          73181 │
└────────────────┘

In [3]:
patient_with_race = duckdb.sql("""
WITH patients AS (
    SELECT *,
    CASE
        WHEN race LIKE 'ASIAN%' THEN 'ASIAN'
        WHEN race LIKE 'BLACK%' THEN 'BLACK'
        WHEN race LIKE 'WHITE%' THEN 'WHITE'
        WHEN race LIKE 'HISPANIC%' THEN 'HISPANIC'
        ELSE 'UNKNOWN'
        END AS race_category
    FROM mimiciv_derived.icustay_detail WHERE first_icu_stay AND first_hosp_stay)
SELECT subject_id, stay_id, icu_intime, icu_outtime, race_category, gender  FROM patients WHERE race_category != 'UNKNOWN'
""")
patient_with_race

┌────────────┬──────────┬─────────────────────┬─────────────────────┬───────────────┬─────────┐
│ subject_id │ stay_id  │     icu_intime      │     icu_outtime     │ race_category │ gender  │
│   int32    │  int32   │      timestamp      │      timestamp      │    varchar    │ varchar │
├────────────┼──────────┼─────────────────────┼─────────────────────┼───────────────┼─────────┤
│   10000032 │ 39553978 │ 2180-07-23 14:00:00 │ 2180-07-23 23:50:47 │ WHITE         │ F       │
│   10000980 │ 39765666 │ 2189-06-27 08:42:00 │ 2189-06-27 20:38:27 │ BLACK         │ F       │
│   10001217 │ 37067082 │ 2157-11-20 19:18:02 │ 2157-11-21 22:08:00 │ WHITE         │ F       │
│   10001725 │ 31205490 │ 2110-04-11 15:52:22 │ 2110-04-12 23:59:56 │ WHITE         │ F       │
│   10001884 │ 37510196 │ 2131-01-11 04:20:05 │ 2131-01-20 08:27:30 │ BLACK         │ F       │
│   10002155 │ 33685454 │ 2129-08-04 12:45:00 │ 2129-08-10 17:02:38 │ WHITE         │ F       │
│   10002348 │ 32610785 │ 2112-11-30 23:

In [4]:
# Make sure that patients are unique
duckdb.sql("""
SELECT subject_id, COUNT(*) as patient_count FROM patient_with_race GROUP BY subject_id HAVING COUNT(*) > 1
""")

┌────────────┬───────────────┐
│ subject_id │ patient_count │
│   int32    │     int64     │
├────────────┴───────────────┤
│           0 rows           │
└────────────────────────────┘

In [5]:
# Make sure that patients are unique
duckdb.sql("""
SELECT COUNT(*) FROM patient_with_race
""")

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│        42075 │
└──────────────┘

In [6]:
# Compute patients index period and select valid parient
stays_with_interventions = duckdb.sql("""
WITH first_vent_intervention AS (
    SELECT stay_id, MIN(starttime) as  starttime FROM mimiciv_derived.ventilation
    WHERE ventilation_status NOT in ('None', 'SupplementalOxygen') GROUP BY stay_id)
SELECT pwr.*,v.* FROM patient_with_race AS pwr
LEFT OUTER JOIN first_vent_intervention AS v ON pwr.stay_id = v.stay_id
""")
patient_with_index_period=duckdb.sql("""
WITH patient_with_period AS (SELECT subject_id, stay_id, gender, race_category, 
icu_intime AS ip_starttime, 
LEAST(icu_outtime, starttime, icu_intime + interval '5 days') AS ip_endtime FROM stays_with_interventions)
SELECT *,(ip_endtime-ip_starttime) AS ip_duration FROM patient_with_period 
WHERE (ip_endtime-ip_starttime) >= interval '12 hours'
""")
patient_with_index_period

┌────────────┬──────────┬─────────┬───────────────┬─────────────────────┬─────────────────────┬─────────────────┐
│ subject_id │ stay_id  │ gender  │ race_category │    ip_starttime     │     ip_endtime      │   ip_duration   │
│   int32    │  int32   │ varchar │    varchar    │      timestamp      │      timestamp      │    interval     │
├────────────┼──────────┼─────────┼───────────────┼─────────────────────┼─────────────────────┼─────────────────┤
│   10001217 │ 37067082 │ F       │ WHITE         │ 2157-11-20 19:18:02 │ 2157-11-21 22:08:00 │ 1 day 02:49:58  │
│   10001725 │ 31205490 │ F       │ WHITE         │ 2110-04-11 15:52:22 │ 2110-04-12 23:59:56 │ 1 day 08:07:34  │
│   10002155 │ 33685454 │ F       │ WHITE         │ 2129-08-04 12:45:00 │ 2129-08-09 12:45:00 │ 5 days          │
│   10002348 │ 32610785 │ F       │ WHITE         │ 2112-11-30 23:24:00 │ 2112-12-05 23:24:00 │ 5 days          │
│   10002428 │ 33987268 │ F       │ WHITE         │ 2156-04-12 16:24:18 │ 2156-04-17 15:

In [7]:
duckdb.sql("""
SELECT COUNT( subject_id) FROM patient_with_index_period
""")

┌───────────────────┐
│ count(subject_id) │
│       int64       │
├───────────────────┤
│             26521 │
└───────────────────┘

In [8]:
reading_o2_flow = duckdb.sql("""
SELECT pwr.subject_id, od.charttime as chart_time, od.o2_flow FROM patient_with_index_period AS pwr
JOIN mimiciv_derived.oxygen_delivery AS od ON  pwr.stay_id = od.stay_id
WHERE od.o2_delivery_device_1 = 'Nasal cannula' AND od.charttime BETWEEN pwr.ip_starttime AND pwr.ip_endtime AND od.o2_flow IS NOT NULL
ORDER BY pwr.subject_id, od.charttime
""")
reading_o2_flow

┌────────────┬─────────────────────┬─────────┐
│ subject_id │     chart_time      │ o2_flow │
│   int32    │      timestamp      │ double  │
├────────────┼─────────────────────┼─────────┤
│   10001217 │ 2157-11-20 19:47:00 │     2.0 │
│   10001217 │ 2157-11-20 22:00:00 │     2.0 │
│   10001217 │ 2157-11-21 00:00:00 │     2.0 │
│   10001217 │ 2157-11-21 02:00:00 │     2.0 │
│   10001217 │ 2157-11-21 08:00:00 │     2.0 │
│   10001217 │ 2157-11-21 18:00:00 │     3.0 │
│   10001217 │ 2157-11-21 20:00:00 │     4.0 │
│   10001725 │ 2110-04-11 16:07:00 │     2.0 │
│   10001725 │ 2110-04-11 20:00:00 │     2.0 │
│   10001725 │ 2110-04-12 02:00:00 │     2.0 │
│       ·    │          ·          │      ·  │
│       ·    │          ·          │      ·  │
│       ·    │          ·          │      ·  │
│   10828138 │ 2154-10-11 04:00:00 │     4.0 │
│   10828138 │ 2154-10-11 08:00:00 │     4.0 │
│   10828138 │ 2154-10-11 11:00:00 │     2.0 │
│   10828209 │ 2114-11-05 04:32:00 │     3.0 │
│   10828209 

### Sp02

In [9]:
reading_spo2 = duckdb.sql("""
SELECT pwr.subject_id, vs.charttime as chart_time, vs.spo2 FROM patient_with_index_period AS pwr
JOIN mimiciv_derived.vitalsign AS vs ON  pwr.stay_id = vs.stay_id
WHERE vs.charttime BETWEEN pwr.ip_starttime AND pwr.ip_endtime AND vs.spo2 IS NOT NULL
ORDER BY pwr.subject_id, vs.charttime
""")
reading_spo2

┌────────────┬─────────────────────┬────────┐
│ subject_id │     chart_time      │  spo2  │
│   int32    │      timestamp      │ double │
├────────────┼─────────────────────┼────────┤
│   10001217 │ 2157-11-20 19:19:00 │   99.0 │
│   10001217 │ 2157-11-20 20:00:00 │   98.0 │
│   10001217 │ 2157-11-20 21:00:00 │   98.0 │
│   10001217 │ 2157-11-20 22:00:00 │   95.0 │
│   10001217 │ 2157-11-20 23:00:00 │   98.0 │
│   10001217 │ 2157-11-21 00:00:00 │   94.0 │
│   10001217 │ 2157-11-21 01:00:00 │   93.0 │
│   10001217 │ 2157-11-21 02:00:00 │   98.0 │
│   10001217 │ 2157-11-21 03:00:00 │   98.0 │
│   10001217 │ 2157-11-21 04:00:00 │   98.0 │
│       ·    │          ·          │     ·  │
│       ·    │          ·          │     ·  │
│       ·    │          ·          │     ·  │
│   10078723 │ 2161-10-29 05:00:00 │   94.0 │
│   10078723 │ 2161-10-29 06:00:00 │   90.0 │
│   10078723 │ 2161-10-29 07:00:00 │   93.0 │
│   10078723 │ 2161-10-29 08:00:00 │   96.0 │
│   10078723 │ 2161-10-29 09:00:00

In [10]:
reading_so2 = duckdb.sql("""
SELECT pwr.subject_id, bg.charttime as chart_time, bg.so2 FROM patient_with_index_period AS pwr
JOIN mimiciv_derived.bg AS bg ON  pwr.subject_id = bg.subject_id
WHERE bg.charttime BETWEEN pwr.ip_starttime AND pwr.ip_endtime AND bg.so2 IS NOT NULL AND bg.specimen = 'ART.'
ORDER BY pwr.subject_id, bg.charttime
""")
reading_so2

┌────────────┬─────────────────────┬────────┐
│ subject_id │     chart_time      │  so2   │
│   int32    │      timestamp      │ double │
├────────────┼─────────────────────┼────────┤
│   10002155 │ 2129-08-05 07:48:00 │   94.0 │
│   10002443 │ 2183-10-18 02:35:00 │   90.0 │
│   10004401 │ 2144-01-27 02:39:00 │   95.0 │
│   10005817 │ 2132-12-15 16:34:00 │   97.0 │
│   10005817 │ 2132-12-15 18:47:00 │   97.0 │
│   10005817 │ 2132-12-15 20:20:00 │   95.0 │
│   10008454 │ 2110-12-03 11:51:00 │   88.0 │
│   10008454 │ 2110-12-03 13:05:00 │   98.0 │
│   10013049 │ 2114-06-20 12:57:00 │   96.0 │
│   10013569 │ 2167-11-29 09:34:00 │   93.0 │
│       ·    │          ·          │     ·  │
│       ·    │          ·          │     ·  │
│       ·    │          ·          │     ·  │
│   19983257 │ 2166-01-11 05:07:00 │   89.0 │
│   19983257 │ 2166-01-11 09:34:00 │   93.0 │
│   19983257 │ 2166-01-11 12:51:00 │   93.0 │
│   19983257 │ 2166-01-11 16:28:00 │   87.0 │
│   19983257 │ 2166-01-12 00:20:00

## Perform final extract

In [11]:
coh_subject = duckdb.sql("""
SELECT subject_id, gender, race_category FROM patient_with_index_period AS pwr
WHERE 
    EXISTS( SELECT 1 FROM reading_o2_flow AS rof WHERE pwr.subject_id = rof.subject_id)
    OR EXISTS( SELECT 1 FROM reading_spo2 AS rs WHERE pwr.subject_id = rs.subject_id)
    OR EXISTS( SELECT 1 FROM reading_so2 AS rso WHERE pwr.subject_id = rso.subject_id)
""")
coh_subject

┌────────────┬─────────┬───────────────┐
│ subject_id │ gender  │ race_category │
│   int32    │ varchar │    varchar    │
├────────────┼─────────┼───────────────┤
│   10001217 │ F       │ WHITE         │
│   10001725 │ F       │ WHITE         │
│   10002155 │ F       │ WHITE         │
│   10002348 │ F       │ WHITE         │
│   10002428 │ F       │ WHITE         │
│   10002443 │ M       │ WHITE         │
│   10003046 │ M       │ WHITE         │
│   10003502 │ F       │ WHITE         │
│   10004113 │ M       │ WHITE         │
│   10005817 │ M       │ WHITE         │
│       ·    │ ·       │   ·           │
│       ·    │ ·       │   ·           │
│       ·    │ ·       │   ·           │
│   13884068 │ M       │ HISPANIC      │
│   13885556 │ F       │ WHITE         │
│   13886737 │ M       │ WHITE         │
│   13887637 │ M       │ WHITE         │
│   13889463 │ M       │ WHITE         │
│   13889721 │ F       │ WHITE         │
│   13890121 │ F       │ WHITE         │
│   13890865 │ F

In [12]:
duckdb.sql("""
SELECT race_category, COUNT(*) FROM coh_subject GROUP BY  race_category
""")

┌───────────────┬──────────────┐
│ race_category │ count_star() │
│    varchar    │    int64     │
├───────────────┼──────────────┤
│ HISPANIC      │         1061 │
│ WHITE         │        21347 │
│ ASIAN         │          949 │
│ BLACK         │         3153 │
└───────────────┴──────────────┘

In [13]:
coh_reading_o2_flow = duckdb.sql("""
SELECT * FROM reading_o2_flow AS rd WHERE EXISTS( SELECT 1 FROM coh_subject AS cs WHERE rd.subject_id = cs.subject_id)
""")
coh_reading_spo2 = duckdb.sql("""
SELECT * FROM reading_spo2 AS rd WHERE EXISTS( SELECT 1 FROM coh_subject AS cs WHERE rd.subject_id = cs.subject_id)
""")
coh_reading_so2 = duckdb.sql("""
SELECT * FROM reading_so2 AS rd WHERE EXISTS( SELECT 1 FROM coh_subject AS cs WHERE rd.subject_id = cs.subject_id)
""")

### Save the extreact to cvs

In [14]:
coh_subject.to_csv('../data/psql/subject.csv')
coh_reading_o2_flow.to_csv('../data/psql/reading_o2_flow.csv')
coh_reading_spo2.to_csv('../data/psql/reading_spo2.csv')
coh_reading_so2.to_csv('../data/psql/reading_so2.csv')