# Patients that were re-admitted within X-days of discharge

In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

#! UNCOMMENT BELOW IF USING ON COLAB
# Access data using Google BigQuery on Google Colab
# from google.colab import auth
# from google.cloud import bigquery
# auth.authenticate_user()

# Dataset paths
project_id = "composite-drive-359503"   #! CHANGEME
physionet_proj = "physionet-data"
mimic_core = f"{physionet_proj}.mimic_core"
mimic_hosp = f"{physionet_proj}.mimic_hosp"
mimic_icu = f"{physionet_proj}.mimic_icu"
mimic_derived = f"{physionet_proj}.mimic_derived"

# Read data from BigQuery into pandas dataframes.
def run_query(query, project_id=project_id):
    return pd.io.gbq.read_gbq(
        query,
        project_id=project_id,
        dialect='standard')

In [14]:
# how many days between discharge and readmission
readmin_days = 28

In [15]:
# Patients with multiple admissions that did not DIE
multiple_admissions_query = f"""
--sql
SELECT
    admissions.subject_id AS subject_id,
    count(admissions.hadm_id) AS n_admissions
FROM
    {mimic_core}.admissions AS admissions
WHERE
    admissions.discharge_location NOT LIKE ("DIED")
GROUP BY
    subject_id
HAVING
    n_admissions > 1
ORDER BY
    n_admissions DESC
"""

In [16]:
run_query(multiple_admissions_query)

Unnamed: 0,subject_id,n_admissions
0,13297743,87
1,12468016,81
2,11965254,74
3,11296936,71
4,13475033,69
...,...,...
62796,11310067,2
62797,18681175,2
62798,19355337,2
62799,14020162,2


In [17]:
# Get patient previous discharge dates
#! wip -- query too slow, unsupported join predicate
readmission_within_query = f"""
--sql
SELECT
    *
FROM
    {mimic_core}.admissions AS adm
    LEFT JOIN {mimic_core}.admissions AS adm_prev
        ON adm.subject_id = adm_prev.subject_id
        AND adm_prev.hadm_id = (
            SELECT
                adm_prev_cond.hadm_id
            FROM
                {mimic_core}.admissions AS adm_prev_cond
            WHERE
                adm_prev_cond.subject_id = adm.subject_id
                AND adm_prev_cond.hadm_id < adm.hadm_id
            ORDER BY
                adm_prev_cond.hadm_id
            LIMIT 1
        )
ORDER BY
    adm.subject_id, adm.hadm_id
LIMIT 10
"""

In [18]:
# run_query(readmission_within_query)

In [19]:
# Patients that were readmitted within 'readmin_days'
readmission_pt_query = f"""
--sql
SELECT
    *
FROM
    {mimic_core}.admissions AS admissions
WHERE
    admissions.subject_id IN (
        SELECT
            multi_adm_pts.subject_id AS subject_id
        FROM
            ({multiple_admissions_query}) AS multi_adm_pts
    )
ORDER BY
    admissions.subject_id, admissions.hadm_id
"""

In [56]:
# Run the readmission query
# readmission_pts = run_query(readmission_pt_query)

# Save the query to csv
# readmission_pts.to_csv("../data/readmission_patients_all.csv")

# Load readmission query results from previously saved data
readmission_pts = pd.read_csv("../data/readmission_patients_all.csv") \
    .drop(columns=["Unnamed: 0"])

readmission_pts["admittime"] = pd.to_datetime(readmission_pts["admittime"])
readmission_pts["dischtime"] = pd.to_datetime(readmission_pts["dischtime"])
readmission_pts

Unnamed: 0,subject_id,hadm_id,admittime,dischtime,deathtime,admission_type,admission_location,discharge_location,insurance,language,marital_status,ethnicity,edregtime,edouttime,hospital_expire_flag
0,10000032,22595853,2180-05-06 22:23:00,2180-05-07 17:15:00,,URGENT,TRANSFER FROM HOSPITAL,HOME,Other,ENGLISH,WIDOWED,WHITE,2180-05-06 19:17:00,2180-05-06 23:30:00,0
1,10000032,22841357,2180-06-26 18:27:00,2180-06-27 18:49:00,,EW EMER.,EMERGENCY ROOM,HOME,Medicaid,ENGLISH,WIDOWED,WHITE,2180-06-26 15:54:00,2180-06-26 21:31:00,0
2,10000032,25742920,2180-08-05 23:44:00,2180-08-07 17:50:00,,EW EMER.,EMERGENCY ROOM,HOSPICE,Medicaid,ENGLISH,WIDOWED,WHITE,2180-08-05 20:58:00,2180-08-06 01:44:00,0
3,10000032,29079034,2180-07-23 12:35:00,2180-07-25 17:55:00,,EW EMER.,EMERGENCY ROOM,HOME,Medicaid,ENGLISH,WIDOWED,WHITE,2180-07-23 05:54:00,2180-07-23 14:00:00,0
4,10000730,24554295,2121-01-30 23:35:00,2121-02-02 12:20:00,,ELECTIVE,,HOME,Other,ENGLISH,,WHITE,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
296721,19999784,29355057,2119-10-17 10:28:00,2119-10-23 14:45:00,,ELECTIVE,PHYSICIAN REFERRAL,HOME,Medicaid,ENGLISH,SINGLE,BLACK/AFRICAN AMERICAN,,,0
296722,19999784,29889147,2120-10-25 09:43:00,2120-10-31 09:00:00,,ELECTIVE,PHYSICIAN REFERRAL,HOME,Medicaid,ENGLISH,SINGLE,BLACK/AFRICAN AMERICAN,,,0
296723,19999784,29956342,2121-01-31 00:00:00,2121-02-05 12:44:00,,ELECTIVE,PHYSICIAN REFERRAL,HOME,Medicaid,ENGLISH,SINGLE,BLACK/AFRICAN AMERICAN,,,0
296724,19999828,25744818,2149-01-08 16:44:00,2149-01-18 17:00:00,,EW EMER.,TRANSFER FROM HOSPITAL,HOME HEALTH CARE,Other,ENGLISH,SINGLE,WHITE,2149-01-08 09:11:00,2149-01-08 18:12:00,0


In [58]:
# Determine dischtime of previous admissions
#! work in progress
readmission_pts_selected = readmission_pts.copy()
readmission_pts_selected["prev_dischtime"] = np.where(
    readmission_pts["subject_id"].shift(1) == readmission_pts["subject_id"],
    readmission_pts["dischtime"].shift(1),
    np.nan
)
# drop first admission with no prev_dischtime
readmission_pts_selected.dropna(subset=["prev_dischtime"], inplace=True)
# calculate duration to readmission
# readmission_pts_selected["days_to_readmission"] = \
#     (readmission_pts_selected["admittime"] \
#     - readmission_pts_selected["prev_dischtime"]) \
#     .dt.total_seconds() / (60 * 60 * 24)
readmission_pts_selected.info()

TypeError: The DType <class 'numpy.dtype[datetime64]'> could not be promoted by <class 'numpy.dtype[float64]'>. This means that no common DType exists for the given inputs. For example they cannot be stored in a single array unless the dtype is `object`. The full list of DTypes is: (<class 'numpy.dtype[datetime64]'>, <class 'numpy.dtype[float64]'>)