In [None]:
import psycopg2

dbname = "mimic"
user = "username"

In [None]:
# chloride_input:
# - Administrations from inputevents_mv are split between boluses and (long term)
# - rate-based drips. While there are columns explicitly denoting this difference,
# - we identify long term administrations by checking if rate data is available.
# - This avoids string-parsing of order category data. We calculate daily rates
# - using total amounts to avoid unnecessary unit-parsing, since a very small
# - number of rows use mL/min instead of mL/hour (see itemid 225158)). This should
# - be okay since the rates seem to be derived from the amount given over time (as
# - would likely be the case in the ICU). Note that in order for this to work, we
# - filter out the small number of rows with nonsensical times (e.g. starttimes
# - fall before ICU admission).
#
# - For bolus administrations, we use endtime as the point of comparison to intime
# - for inputevents_mv - this makes sense for consistency with charttime in
# - inputevents_cv.
#
# - 99.99% of MetaVision unit mismatches are eliminated by accounting for 'mEq.'.
# - There are only two other notable cases, between 'L' and 'ml'. There are ten
# - cases between 'ml/hr' and 'mEq', which would seem to be un-convertable.
# -  TODO: There are ~1000 cases of unexpected units in CareVue, including 'U'
# -  instead of 'mEq'. Should evaluate these cases and see what to do about them.
# -  TODO: Many of the meq_multipliers don't exist yet - will need to add them
# -  so they don't get filtered out.
#
# chloride_level:
# - itemid 50806, 50902 are 'Chloride, Whole Blood' and 'Chloride', respectively.
query = """
    WITH chloride_input_cv AS (
        SELECT ie.subject_id, ie.hadm_id, ie.icustay_id,
            (DATE_PART('day', charttime - intime) + 1) AS icu_day,
            SUM(amount * meq_multiplier) AS chloride_input_meq
        FROM mimiciii.inputevents_cv AS ie
        JOIN mimiciii.itemclconversions AS icc
            ON ie.itemid = icc.itemid
            AND (amountuom = uom OR ((amountuom = 'mEQ' OR amountuom = 'U') AND uom = 'mEq'))
        JOIN mimiciii.icustays AS icu
            ON ie.subject_id = icu.subject_id
            AND ie.hadm_id = icu.hadm_id
            AND ie.icustay_id = icu.icustay_id
        WHERE (DATE_PART('day', charttime - intime) + 1) <= 28
            AND amount IS NOT NULL
            AND amount > 0
            AND meq_multiplier IS NOT NULL
        GROUP BY ie.subject_id, ie.hadm_id, ie.icustay_id, icu_day
    ), chloride_input_mv_selected AS (
        SELECT ie.subject_id, ie.hadm_id, ie.icustay_id, intime, starttime, endtime, rate,
            (amount * meq_multiplier) AS amount_meq
        FROM mimiciii.inputevents_mv AS ie
        JOIN mimiciii.itemclconversions AS icc
            ON ie.itemid = icc.itemid
            AND (amountuom = uom OR (amountuom = 'mEq.' AND uom = 'mEq'))
        JOIN mimiciii.icustays AS icu
            ON ie.subject_id = icu.subject_id
            AND ie.hadm_id = icu.hadm_id
            AND ie.icustay_id = icu.icustay_id
        WHERE amount IS NOT NULL
            AND amount > 0
            AND meq_multiplier IS NOT NULL
            AND cancelreason = 0
            AND statusdescription != 'Rewritten'
    ), chloride_input_mv_bolus AS (
        SELECT subject_id, hadm_id, icustay_id, 
            (DATE_PART('day', endtime - intime) + 1) AS icu_day,
            SUM(amount_meq) AS chloride_input_meq
        FROM chloride_input_mv_selected
        WHERE rate IS NULL
            AND (DATE_PART('day', endtime - intime) + 1) <= 28
        GROUP BY subject_id, hadm_id, icustay_id, icu_day
    ), chloride_input_mv_rate AS (
        SELECT subject_id, hadm_id, icustay_id, icu_day,
            SUM(CASE WHEN icu_day = l_tail AND icu_day = r_tail
                          THEN amt_per_day * (l_tail_frac + r_tail_frac - 1)
                     WHEN icu_day = l_tail THEN amt_per_day * l_tail_frac
                     WHEN icu_day = r_tail THEN amt_per_day * r_tail_frac
                     ELSE amt_per_day END) AS chloride_input_meq
        FROM (
            SELECT subject_id, hadm_id, icustay_id,
                GENERATE_SERIES(FLOOR(EXTRACT(EPOCH FROM starttime-intime)/86400 + 1)::INTEGER,
                    FLOOR(EXTRACT(EPOCH FROM endtime-intime)/86400 + 1)::INTEGER) AS icu_day,
                (amount_meq / (EXTRACT(EPOCH FROM endtime-starttime)/86400)) AS amt_per_day,
                FLOOR(EXTRACT(EPOCH FROM starttime-intime)/86400 + 1)::INTEGER AS l_tail,
                (1 - (FLOOR(EXTRACT(EPOCH FROM starttime-intime))::INTEGER % 86400)::FLOAT / 86400) AS l_tail_frac,
                FLOOR(EXTRACT(EPOCH FROM endtime-intime)/86400 + 1)::INTEGER AS r_tail,
                ((FLOOR(EXTRACT(EPOCH FROM endtime-intime))::INTEGER % 86400)::FLOAT / 86400) AS r_tail_frac
            FROM chloride_input_mv_selected
            WHERE rate IS NOT NULL
                AND (DATE_PART('day', starttime - intime) + 1) <= 28
                AND endtime > starttime
                AND starttime >= intime
        ) AS rate_summary
        GROUP BY subject_id, hadm_id, icustay_id, icu_day
    ), chloride_input AS (
        SELECT subject_id, hadm_id, icustay_id, icu_day,
            SUM(chloride_input_meq) AS chloride_input_meq
        FROM (
            SELECT * FROM chloride_input_cv
            UNION ALL
            SELECT * FROM chloride_input_mv_bolus
            UNION ALL
            SELECT * FROM chloride_input_mv_rate
        ) AS chloride_input_ungrouped
        GROUP BY subject_id, hadm_id, icustay_id, icu_day
        ORDER BY subject_id, hadm_id, icustay_id, icu_day
    ), chloride_level AS (
        SELECT le.subject_id, le.hadm_id, icustay_id,
            (DATE_PART('day', charttime - intime) + 1) AS icu_day,
            MAX(valuenum) AS chloride_max
        FROM mimiciii.labevents AS le
        JOIN mimiciii.icustays AS icu
            ON le.subject_id = icu.subject_id
            AND le.hadm_id = icu.hadm_id
            AND charttime BETWEEN intime AND outtime
        WHERE itemid in (50806,50902)
            AND valuenum IS NOT NULL
            AND valuenum <= 160
            AND (DATE_PART('day', charttime - intime) + 1) <= 28
        GROUP BY le.subject_id, le.hadm_id, icustay_id, icu_day
        ORDER BY le.subject_id, le.hadm_id, icustay_id, icu_day
    )
    SELECT COALESCE(ci.subject_id, cl.subject_id) AS subject_id,
        COALESCE(ci.hadm_id, cl.hadm_id) AS hadm_id,
        COALESCE(ci.icustay_id, cl.icustay_id) AS icustay_id,
        COALESCE(ci.icu_day, cl.icu_day) AS icu_day,
        ci.chloride_input_meq, cl.chloride_max
    FROM chloride_input AS ci
    FULL OUTER JOIN chloride_level AS cl
        ON ci.subject_id = cl.subject_id
        AND ci.hadm_id = cl.hadm_id
        AND ci.icustay_id = cl.icustay_id
        AND ci.icu_day = cl.icu_day
    WHERE (ci.chloride_input_meq <= 5000 OR ci.chloride_input_meq IS NULL)
    ORDER BY subject_id, hadm_id, icustay_id, icu_day
"""

In [None]:
connection = psycopg2.connect("dbname=%s user=%s" % (dbname, user))
c = connection.cursor()

outputquery = "COPY ({0}) TO STDOUT WITH CSV HEADER".format(query)
with open('chloride.csv', 'w') as f:
    c.copy_expert(outputquery, f)