In [None]:
import pandas as pd
import psycopg2

dbname = "mimic"
user = "username"

In [None]:
# Reproduced from chloride.ipynb:
# - Administrations from inputevents_mv are split between boluses and (long term)
# - rate-based drips. While there are columns explicitly denoting this difference,
# - we identify long term administrations by checking if rate data is available.
# - This avoids string-parsing of order category data. We calculate daily rates
# - using total amounts to avoid unnecessary unit-parsing, since a very small
# - number of rows use mL/min instead of mL/hour (see itemid 225158)). This should
# - be okay since the rates seem to be derived from the amount given over time (as
# - would likely be the case in the ICU). Note that in order for this to work, we
# - filter out the small number of rows with nonsensical times (e.g. starttimes
# - fall before ICU admission).
#
# - For bolus administrations, we use endtime as the point of comparison to intime
# - for inputevents_mv - this makes sense for consistency with charttime in
# - inputevents_cv.
#
# Checking for amountuom = 'ml' seems to be sufficient - 'mL', 'ML',
# and 'Ml' are not present in the table.

query = """
    WITH fluid_input_cv AS (
        SELECT ie.subject_id, ie.hadm_id, ie.icustay_id,
            (DATE_PART('day', charttime - intime) + 1) AS icu_day,
            SUM(amount) AS fluid_input_ml
        FROM mimiciii.inputevents_cv AS ie
        JOIN mimiciii.icustays AS icu
            ON ie.subject_id = icu.subject_id
            AND ie.hadm_id = icu.hadm_id
            AND ie.icustay_id = icu.icustay_id
        WHERE (DATE_PART('day', charttime - intime) + 1) <= 28
            AND amountuom = 'ml'
            AND amount IS NOT NULL
            AND amount > 0
        GROUP BY ie.subject_id, ie.hadm_id, ie.icustay_id, icu_day
    ), fluid_input_mv_selected AS (
        SELECT ie.subject_id, ie.hadm_id, ie.icustay_id, intime,
            starttime, endtime, rate, amount
        FROM mimiciii.inputevents_mv AS ie
        JOIN mimiciii.icustays AS icu
            ON ie.subject_id = icu.subject_id
            AND ie.hadm_id = icu.hadm_id
            AND ie.icustay_id = icu.icustay_id
        WHERE amountuom = 'ml'
            AND amount IS NOT NULL
            AND amount > 0
            AND cancelreason = 0
            AND statusdescription != 'Rewritten'
    ), fluid_input_mv_bolus AS (
        SELECT subject_id, hadm_id, icustay_id,
            (DATE_PART('day', endtime - intime) + 1) AS icu_day,
            SUM(amount) AS fluid_input_ml
        FROM fluid_input_mv_selected
        WHERE rate IS NULL
            AND (DATE_PART('day', endtime - intime) + 1) <= 28
        GROUP BY subject_id, hadm_id, icustay_id, icu_day
    ), fluid_input_mv_rate AS (
        SELECT subject_id, hadm_id, icustay_id, icu_day,
            SUM(CASE WHEN icu_day = l_tail AND icu_day = r_tail
                          THEN amt_per_day * (l_tail_frac + r_tail_frac - 1)
                     WHEN icu_day = l_tail THEN amt_per_day * l_tail_frac
                     WHEN icu_day = r_tail THEN amt_per_day * r_tail_frac
                     ELSE amt_per_day END) AS fluid_input_ml
        FROM (
            SELECT subject_id, hadm_id, icustay_id,
                GENERATE_SERIES(FLOOR(EXTRACT(EPOCH FROM starttime-intime)/86400 + 1)::INTEGER,
                    FLOOR(EXTRACT(EPOCH FROM endtime-intime)/86400 + 1)::INTEGER) AS icu_day,
                (amount / (EXTRACT(EPOCH FROM endtime-starttime)/86400)) AS amt_per_day,
                FLOOR(EXTRACT(EPOCH FROM starttime-intime)/86400 + 1)::INTEGER AS l_tail,
                (1 - (FLOOR(EXTRACT(EPOCH FROM starttime-intime))::INTEGER % 86400)::FLOAT / 86400) AS l_tail_frac,
                FLOOR(EXTRACT(EPOCH FROM endtime-intime)/86400 + 1)::INTEGER AS r_tail,
                ((FLOOR(EXTRACT(EPOCH FROM endtime-intime))::INTEGER % 86400)::FLOAT / 86400) AS r_tail_frac
            FROM fluid_input_mv_selected
            WHERE rate IS NOT NULL
                AND (DATE_PART('day', starttime - intime) + 1) <= 28
                AND endtime > starttime
                AND starttime >= intime
        ) AS rate_summary
        GROUP BY subject_id, hadm_id, icustay_id, icu_day
    ), fluid_input_grouped AS (
        SELECT subject_id, hadm_id, icustay_id, icu_day,
            SUM(fluid_input_ml) AS fluid_input_ml
        FROM (
            SELECT * FROM fluid_input_cv
            UNION ALL
            SELECT * FROM fluid_input_mv_bolus
            UNION ALL
            SELECT * FROM fluid_input_mv_rate
        ) AS fluid_input_ungrouped
        GROUP BY subject_id, hadm_id, icustay_id, icu_day
    ), fluid_output AS (
        SELECT oe.subject_id, oe.hadm_id, oe.icustay_id,
            (DATE_PART('day', oe.charttime - icu.intime) + 1) AS icu_day,
            SUM(value) AS fluid_output_ml
        FROM mimiciii.outputevents AS oe
        JOIN mimiciii.icustays AS icu
            ON oe.subject_id = icu.subject_id
            AND oe.hadm_id = icu.hadm_id
            AND oe.icustay_id = icu.icustay_id
        WHERE (DATE_PART('day', oe.charttime - icu.intime) + 1) <= 28
            AND LOWER(valueuom) = 'ml'
            AND value IS NOT NULL
        GROUP BY oe.subject_id, oe.hadm_id, oe.icustay_id, icu_day
    )
    SELECT COALESCE(fi.subject_id, fo.subject_id) AS subject_id,
        COALESCE(fi.hadm_id, fo.hadm_id) AS hadm_id,
        COALESCE(fi.icustay_id, fo.icustay_id) AS icustay_id,
        COALESCE(fi.icu_day, fo.icu_day) AS icu_day,
        COALESCE(fi.fluid_input_ml, 0) AS fluid_input_ml,
        COALESCE(fo.fluid_output_ml, 0) AS fluid_output_ml,
        (COALESCE(fi.fluid_input_ml, 0) - COALESCE(fo.fluid_output_ml, 0)) AS fluid_net_input_ml
    FROM fluid_input_grouped AS fi
    FULL JOIN fluid_output AS fo
        ON fi.subject_id = fo.subject_id
        AND fi.hadm_id = fo.hadm_id
        AND fi.icustay_id = fo.icustay_id
        AND fi.icu_day = fo.icu_day
    WHERE ABS((COALESCE(fi.fluid_input_ml, 0) - COALESCE(fo.fluid_output_ml, 0))) <= 30000
    ORDER BY subject_id, hadm_id, icustay_id, icu_day
"""

In [None]:
connection = psycopg2.connect("dbname=%s user=%s" % (dbname, user))
c = connection.cursor()

outputquery = "COPY ({0}) TO STDOUT WITH CSV HEADER".format(query)
with open('fluid_io.csv', 'w') as f:
    c.copy_expert(outputquery, f)