In [None]:
import psycopg2

dbname = "mimic"
user = "username"

In [None]:
# We use DATE_PART('day', ...) since the difference returns 'D days HH:DD:SS'.
#
# This ethnicity grouping results in ~8000 cases under 'OTHER' - the majority
# of which are truly unknown/other/declined-to-respond. There are a few
# ethnicities that number < 100 (i.e. Portuguese) which were grouped under
# 'OTHER' for simplicity.
#
# We filter out infants and children to focus on adults. Recalling MIMIC's
# age shifting for patients older than 89, set the ages for this group to
# 91.4 (the median age of the group):
# https://mimic.physionet.org/mimictables/patients/#dob
query = """
    SELECT subject_id, hadm_id,
        (CASE WHEN age < 90 THEN age ELSE 91.4 END) AS age,
        gender, ethnicity, insurance
    FROM (
        SELECT pt.subject_id, adm.hadm_id,
            DATE_PART('day', adm.admittime - pt.dob)/365 AS age, pt.gender,
            CASE
                WHEN adm.ethnicity LIKE '%WHITE%' THEN 'WHITE'
                WHEN adm.ethnicity LIKE '%ASIAN%' THEN 'ASIAN'
                WHEN adm.ethnicity LIKE '%HISPANIC%' THEN 'HISPANIC'
                WHEN adm.ethnicity LIKE '%BLACK%' THEN 'BLACK'
                ELSE 'OTHER'
            END AS ethnicity, adm.insurance
        FROM mimiciii.patients AS pt
        JOIN mimiciii.admissions AS adm
            ON adm.subject_id = pt.subject_id
    ) AS unfiltered
    WHERE age >= 18
    ORDER BY subject_id, hadm_id
"""

In [None]:
connection = psycopg2.connect("dbname=%s user=%s" % (dbname, user))
c = connection.cursor()

outputquery = "COPY ({0}) TO STDOUT WITH CSV HEADER".format(query)
with open('adm_demographics.csv', 'w') as f:
    c.copy_expert(outputquery, f)