In [1]:

# Import Libraries
import json
import os


import pandas as pd
import numpy as np
from tqdm import tqdm
tqdm.pandas()

import src.data_processing.MIMIC.test_functions as tests

# LOAD CONFIGURATION 
with open("src/data_processing/MIMIC/MIMIC_PROCESSING_DEFAULT_VARS.json", "r") as f:
    DEFAULT_CONFIG = json.load(f)
    f.close()

if not os.path.exists(DEFAULT_CONFIG["SAVE_FD"]):
    os.makedirs(DEFAULT_CONFIG["SAVE_FD"])

In [2]:
try:
    assert os.path.exists(DEFAULT_CONFIG["SAVE_FD"] + "admissions_intermediate.csv")
    assert os.path.exists(DEFAULT_CONFIG["SAVE_FD"] + "vitals_intermediate.csv")

except AssertionError as e:
    raise e

In [3]:
# Print Information
print("\n\n ======== PROCESSING OUTCOMES ======== \n\n")

# Load previously processed data
adm_proc = pd.read_csv(
    DEFAULT_CONFIG["SAVE_FD"] + "admissions_intermediate.csv",
    index_col=0, 
    header=0, 
    parse_dates=["intime", "outtime", "intime_next", "outtime_next", "deathtime"]
)
vit_proc = (
    pd.read_csv(
    DEFAULT_CONFIG["SAVE_FD"] + "vitals_intermediate.csv", 
    index_col=0, 
    header=0, 
    parse_dates=DEFAULT_CONFIG["VITALS_TIME_VARS"]
    )
    .reset_index(drop=False)
    .assign(sampled_time_to_end=lambda x: pd.to_timedelta(x["sampled_time_to_end"]))  # pd does not load timedelta automatically
)


# Check correct computation of admissions and vitals
tests.test_admissions_processed_correctly(adm_proc)
tests.test_vitals_processed_correctly(vit_proc, config_dic=DEFAULT_CONFIG)






Testing admissions processed correctly...

Testing outtime is after intime.
Test passed!

Testing next transfer information is consistent.
Test passed!

Testing admission times are before death (if exists).
Test passed!

Testing ids are unique for params ('subject_id', 'hadm_id', 'stay_id', 'transfer_id_next')
Test passed for variable  subject_id!
Test passed for variable  hadm_id!
Test passed for variable  stay_id!
Test passed for variable  transfer_id_next!

Testing ids are complete for params ('subject_id', 'stay_id', 'intime', 'outtime')
Test passed for variable subject_id!
Test passed for variable stay_id!
Test passed for variable intime!
Test passed for variable outtime!
Test passed!
Admissions correctly computed! Safe to go ahead.

Testing vitals were processed correctly and make sense.

Testing ids are complete for params ('subject_id', 'stay_id', 'sampled_time_to_end')
Test passed for variable subject_id!
Test passed for variable stay_id!
Test passed for variable sampled_

100%|█████| 8364/8364 [00:28<00:00, 295.92it/s]


Test passed!

Testing resampling data is linear from min to max per patient.


100%|████| 8364/8364 [00:03<00:00, 2482.37it/s]

Test passed!
Vitals seem correctly processed!





In [4]:
# Load core info
transfers_core = pd.read_csv(
    DEFAULT_CONFIG["DATA_FD"] + "core/transfers.csv", 
    index_col=None, 
    header=0, 
    parse_dates=["intime", "outtime"]
)
admissions_core = pd.read_csv(
    DEFAULT_CONFIG["DATA_FD"] + "core/admissions.csv",
    index_col=None,
    header=0,
    parse_dates=["admittime", "dischtime", "deathtime", "edregtime", "edouttime"]
)



In [5]:

"""
Step 1: Subset the set of transfers/admissions_core to the already processed cohort.

We do this by merging. 
"""

# Define Id for merging. We separate deathtime as one database registers only date, while the other
# registers everyting (i.e. up to second)
# tr_merge_ids = [
#     col for 
#     col in vit_proc.columns.tolist() if
#     col in transfers_core.columns.tolist() and
#     "death" not in col
# ]
hadm_merge_ids = [
    col for
    col in vit_proc.columns.tolist() if
    col in admissions_core.columns.tolist() and
    "death" not in col
]
merge_ids = ["subject_id", "hadm_id", "stay_id"]         # Useful simplication

# # Inner merge for transfers core
# transfers_S1 = (
#     transfers_core
#     .merge(
#         vit_proc.drop_duplicates(subset=merge_ids),   # Drop duplicates as we don't need all the rows
#         how="inner",
#         on=tr_merge_ids
#     )
#     .dropna(subset=["hadm_id"])                 # Drop rows with no hadm_id as we can't compare with transfers
#     .sort_values(by=merge_ids, ascending=True) # Sort by subject_id and stay_id
# )

# Inner merge for admissions core
admissions_S1 = (
    admissions_core
    .merge(
        vit_proc.drop_duplicates(subset=merge_ids), # only want one obvs per admission for merging
        how="inner",
        on=hadm_merge_ids,
        suffixes=("", "_ed")
    )
    .dropna(subset=["hadm_id"])            # Drop rows with no hadm_id as we can't compare with transfers
    .sort_values(by=merge_ids, ascending=True) # Sort by subject_id and stay_id
)

# Testing and save
# tests.test_ids_subset_of_cohort(transfers_S1, vit_proc, *merge_ids)
tests.test_ids_subset_of_cohort(admissions_S1, vit_proc, *merge_ids)
# tests.test_is_complete_ids(transfers_S1, *merge_ids, "stay_id")
tests.test_is_complete_ids(admissions_S1, *merge_ids, "stay_id")

# Check processing and correctdeness
# transfers_S1.to_csv(DEFAULT_CONFIG["SAVE_FD"] + "transfers_S1.csv", header=True, index=True)



Testing ('subject_id', 'hadm_id', 'stay_id') are subset of cohort data.
Test passed!

Testing ids are complete for params ('subject_id', 'hadm_id', 'stay_id', 'stay_id')
Test passed for variable subject_id!
Test passed for variable hadm_id!
Test passed for variable stay_id!
Test passed for variable stay_id!


In [7]:
admissions_S2 = (
    admissions_S1
    .query("intime <= admittime")                            # admissions to hospital after ED admissions
    .query("intime_next >= admittime | intime_next.isna()")  # admissions to hospital before next ED transfer
    .query("outtime <= edouttime")                           # transfer outtime before ed exit time
    .query("intime <= edregtime")                            # transfer intmie before ed registration time
    .query("dischtime - outtime_next >= @pd.Timedelta('-6h') | outtime_next.isna()")
    # discharge time not earlier than outtime_next (added -6 hours due to some potential delays)
    .query("deathtime <= dischtime | deathtime.isna()")
)

# First subset Transfers
tr_merge_ids = ["subject_id", "hadm_id", "stay_id", 
                "outtime", "deathtime", "intime_next", "outtime_next",
                "dischtime", "discharge_location"]
transfers_S1 = (
    transfers_core
    .merge(
        admissions_S2[tr_merge_ids],
        how="inner",
        on=["subject_id", "hadm_id"],
        suffixes=("", "_ed")
    )
    .sort_values(by=["subject_id", "stay_id"], ascending=True)
)

# Run tests
tests.test_is_complete_ids(transfers_S1, "subject_id", "hadm_id")
tests.test_outtimes_match(transfers_S1)
tests.test_every_patient_has_discharge_transfer(transfers_S1)
# endregion



Testing ids are complete for params ('subject_id', 'hadm_id')
Test passed for variable subject_id!
Test passed for variable hadm_id!


100%|████| 8134/8134 [00:02<00:00, 3353.17it/s]


In [8]:
test = transfers_S1.query("eventtype=='discharge'")

In [20]:
test.query("discharge_location.isna()")

Unnamed: 0,subject_id,hadm_id,transfer_id,eventtype,careunit,intime,outtime,stay_id,outtime_ed,deathtime,intime_next,outtime_next,dischtime,discharge_location
12064,10001176,23334588.0,33477045,discharge,,2186-12-02 15:35:03,NaT,39677610,2186-11-29 05:01:00,NaT,2186-11-29 05:01:00,2186-12-02 15:35:03,2186-12-02 15:00:00,
20974,10001180,21102262.0,36596649,discharge,,2192-05-23 16:43:10,NaT,33306350,2192-05-23 05:27:00,NaT,NaT,NaT,2192-05-23 16:43:00,
2857,10008647,25482427.0,37318262,discharge,,2118-04-01 15:46:41,NaT,34901812,2118-03-31 00:48:00,NaT,2118-03-31 00:48:00,2118-04-01 15:46:41,2118-04-01 15:00:00,
22149,10014763,20246322.0,34476722,discharge,,2174-07-02 22:36:41,NaT,31473454,2174-07-02 22:28:00,NaT,NaT,NaT,2174-07-02 22:28:00,
8044,10016832,24538391.0,31433041,discharge,,2196-04-28 15:51:27,NaT,30342707,2196-04-25 21:20:00,NaT,NaT,NaT,2196-04-28 15:50:00,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14848,19953114,20039616.0,30855696,discharge,,2149-07-04 11:38:07,NaT,38019534,2149-07-04 06:05:36,NaT,NaT,NaT,2149-07-04 11:37:00,
5047,19975148,25817116.0,38657699,discharge,,2130-08-07 10:50:58,NaT,35121497,2130-08-07 10:50:00,NaT,NaT,NaT,2130-08-07 10:50:00,
29289,19986309,21193364.0,36871271,discharge,,2117-06-01 17:34:11,NaT,38490767,2117-06-01 03:46:00,NaT,2117-06-01 03:46:00,2117-06-01 17:34:11,2117-06-01 17:30:00,
7288,19991111,28286999.0,31607369,discharge,,2141-09-12 12:39:58,NaT,33861407,2141-09-11 16:21:00,NaT,2141-09-11 16:21:00,2141-09-12 12:39:58,2141-09-12 12:38:00,


In [76]:
transfers_S1

Unnamed: 0,subject_id,hadm_id,transfer_id,eventtype,careunit,intime,outtime,outtime_ed,deathtime,dischtime,intime_next,outtime_next
7594,10000084,23052089.0,34537425,admit,Medicine,2160-11-21 03:20:00,2160-11-25 14:52:20,2160-11-21 03:20:00,NaT,2160-11-25 14:52:00,NaT,NaT
7592,10000084,23052089.0,32326678,discharge,,2160-11-25 14:52:20,NaT,2160-11-21 03:20:00,NaT,2160-11-25 14:52:00,NaT,NaT
7593,10000084,23052089.0,35203156,ED,Emergency Department,2160-11-20 20:36:00,2160-11-21 03:20:00,2160-11-21 03:20:00,NaT,2160-11-25 14:52:00,NaT,NaT
12065,10001176,23334588.0,39677610,ED,Emergency Department,2186-11-28 21:34:00,2186-11-29 05:01:00,2186-11-29 05:01:00,NaT,2186-12-02 15:00:00,2186-11-29 05:01:00,2186-12-02 15:35:03
12064,10001176,23334588.0,33477045,discharge,,2186-12-02 15:35:03,NaT,2186-11-29 05:01:00,NaT,2186-12-02 15:00:00,2186-11-29 05:01:00,2186-12-02 15:35:03
...,...,...,...,...,...,...,...,...,...,...,...,...
23708,19997576,25548363.0,39468668,ED,Emergency Department,2187-10-07 18:43:00,2187-10-07 23:42:00,2187-10-07 23:42:00,NaT,2187-10-10 19:50:00,2187-10-07 23:42:00,2187-10-10 19:59:11
23707,19997576,25548363.0,37328828,discharge,,2187-10-10 19:59:11,NaT,2187-10-07 23:42:00,NaT,2187-10-10 19:50:00,2187-10-07 23:42:00,2187-10-10 19:59:11
15148,19999204,29046609.0,37072487,discharge,,2146-06-08 20:27:30,NaT,2146-05-30 20:09:00,NaT,2146-06-08 20:20:00,2146-05-30 20:09:00,2146-06-08 20:27:30
15149,19999204,29046609.0,32654415,ED,Emergency Department,2146-05-30 14:10:00,2146-05-30 20:09:00,2146-05-30 20:09:00,NaT,2146-06-08 20:20:00,2146-05-30 20:09:00,2146-06-08 20:27:30


In [35]:
admissions_S2

Unnamed: 0,subject_id,hadm_id,admittime,dischtime,deathtime,admission_type,admission_location,discharge_location,insurance,language,...,transfer_id,eventtype,careunit,intime,outtime,gender,age,ESI,deathtime_ed,charttime_ub
1869,10000084,23052089,2160-11-21 01:56:00,2160-11-25 14:52:00,NaT,EW EMER.,WALK-IN/SELF REFERRAL,HOME HEALTH CARE,Medicare,ENGLISH,...,35203156,ED,Emergency Department,2160-11-20 20:36:00,2160-11-21 03:20:00,1,72,2.0,NaT,2160-11-21 03:20:00
3294,10001176,23334588,2186-11-29 03:56:00,2186-12-02 15:00:00,NaT,EU OBSERVATION,EMERGENCY ROOM,,Medicare,ENGLISH,...,39677610,ED,Emergency Department,2186-11-28 21:34:00,2186-11-29 05:01:00,0,64,3.0,NaT,2186-11-29 05:01:00
5413,10001180,21102262,2192-05-23 04:58:00,2192-05-23 16:43:00,NaT,EU OBSERVATION,EMERGENCY ROOM,,Other,ENGLISH,...,33306350,ED,Emergency Department,2192-05-22 23:46:00,2192-05-23 05:27:00,0,33,3.0,NaT,2192-05-23 05:27:00
2565,10001217,24597018,2157-11-18 22:56:00,2157-11-25 18:00:00,NaT,EW EMER.,EMERGENCY ROOM,HOME HEALTH CARE,Other,?,...,39866888,ED,Emergency Department,2157-11-18 17:38:00,2157-11-19 01:24:00,0,55,3.0,NaT,2157-11-19 01:24:00
2857,10004719,21197153,2183-08-30 18:28:00,2183-09-03 12:34:00,NaT,OBSERVATION ADMIT,EMERGENCY ROOM,HOME,Medicare,ENGLISH,...,36819102,ED,Emergency Department,2183-08-30 14:38:00,2183-08-30 20:32:00,0,66,3.0,NaT,2183-08-30 20:32:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,19993951,23793933,2139-02-18 20:03:00,2139-02-19 15:49:00,NaT,EU OBSERVATION,EMERGENCY ROOM,,Medicare,ENGLISH,...,33496052,ED,Emergency Department,2139-02-18 11:39:00,2139-02-18 20:03:54,1,72,2.0,NaT,2139-02-18 20:03:54
4740,19996783,25894657,2188-03-05 20:17:00,2188-03-14 17:15:00,NaT,DIRECT EMER.,PHYSICIAN REFERRAL,HOME HEALTH CARE,Other,?,...,39421649,ED,Emergency Department,2188-03-05 15:06:00,2188-03-05 21:40:00,1,89,3.0,NaT,2188-03-05 21:40:00
4901,19997473,27787494,2173-09-11 00:53:00,2173-10-02 15:50:00,NaT,URGENT,TRANSFER FROM HOSPITAL,SKILLED NURSING FACILITY,Medicare,ENGLISH,...,39636284,ED,Emergency Department,2173-09-10 22:20:00,2173-09-11 01:54:00,0,82,2.0,NaT,2173-09-11 01:54:00
5547,19997576,25548363,2187-10-07 22:35:00,2187-10-10 19:50:00,NaT,OBSERVATION ADMIT,TRANSFER FROM HOSPITAL,HOME,Medicare,ENGLISH,...,39468668,ED,Emergency Department,2187-10-07 18:43:00,2187-10-07 23:42:00,0,80,2.0,NaT,2187-10-07 23:42:00


In [49]:

# region Auxiliary Functions - used to define outcome

def get_first_death_time(df):
    """
    Given a list of transfers which includes information about the patient hospital admission and other information, get
    the time of death for the patient. This function exists for standardisation.
    """

    # For each stay id (groupby), access the deathtime and compute the minimum if available
    earliest_deathtime = df.groupby("stay_id").deathtime.nth(0)

    return earliest_deathtime


def get_first_icu_time(df):
    """
    Given a list of transfers which includes information about the patient hospital admission and other information, get
    the time of the first ICU entry for the patient if it exists.
    """

    # For each stay id (groupby), identify the transfers to ICU wards, and compute the entry time if available
    earliest_icu_time = (
        df
        .groupby("stay_id")
        .progress_apply(lambda x: (
            x
            # Careunit has ICU in name
            .query("careunit.str.contains('(?i)ICU', na=False, case=False)")
            # Another ICU name
            .query("careunit.str.contains('(?i)Neuro Stepdown', na=False, case=False)")
            # Get transfer entry time
            .intime
            # Get minimum of all ICU entries
            .min()
        )
        )
    )

    return earliest_icu_time

None
def get_first_discharge_time(df):
    """
    Given a list of transfers which includes information about the patient hospital admission and other information, get
    the time of discharge for the patient if it exists.

    Args:
        df (pd.DataFrame): Dataframe with transfers information.
    """
    
    # For each stay id (groupby), identify the discharge transfer, and compute the time if the location is not 'DIED'
    earliest_discharge_time = (
        df
        .groupby("stay_id")
        .progress_apply(lambda x: ( 
            x
            # Remove any transfers for death events
            .query("~ eventtype.str.contains('(?i)DIED', na=False, case=False)")
            # Within remaining transfers, get the discharge transfer
            .query("eventtype == 'discharge'")
            .squeeze()                     # Convert to pd.Series, we know there is exactly one discharge eventtype
            .dischtime                     # Get the discharge time
            
            )
        )
    )

    return earliest_discharge_time


def get_first_ward_time(df):
    """
    Given a list of transfers which includes information about the patient hospital admission and other information, get
    the time of the first transfer to a medical ward for the patient if it exists. This function exists for standardisation.

    Args:
        df (pd.DataFrame): Dataframe with transfers information.
    """
    earliest_ward_time = df.groupby("stay_id").intime_next.nth(0)

    return earliest_ward_time

# endregion



In [50]:
earliest_outcome_times = (
    admissions_S1
    .set_index("stay_id")              # Set index to stay_id to match the below
    .assign(first_death=get_first_death_time(transfers_S1))  # Compute first death time
    .assign(first_icu=get_first_icu_time(transfers_S1)) # Compute first icu time
    .assign(first_ward=get_first_ward_time(transfers_S1)) # Compute first ward time
    .assign(first_discharge=get_first_discharge_time(transfers_S1)) # Compute first discharge time
)


 25%|█▏   | 2018/8134 [00:07<00:22, 273.89it/s]

Unnamed: 0_level_0,subject_id,hadm_id,admittime,dischtime,deathtime,admission_type,admission_location,discharge_location,insurance,language,...,eventtype,careunit,intime,outtime,gender,age,ESI,deathtime_ed,charttime_ub,first_death
stay_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
35203156,10000084,23052089,2160-11-21 01:56:00,2160-11-25 14:52:00,NaT,EW EMER.,WALK-IN/SELF REFERRAL,HOME HEALTH CARE,Medicare,ENGLISH,...,ED,Emergency Department,2160-11-20 20:36:00,2160-11-21 03:20:00,1,72,2.0,NaT,2160-11-21 03:20:00,NaT
39677610,10001176,23334588,2186-11-29 03:56:00,2186-12-02 15:00:00,NaT,EU OBSERVATION,EMERGENCY ROOM,,Medicare,ENGLISH,...,ED,Emergency Department,2186-11-28 21:34:00,2186-11-29 05:01:00,0,64,3.0,NaT,2186-11-29 05:01:00,NaT
33306350,10001180,21102262,2192-05-23 04:58:00,2192-05-23 16:43:00,NaT,EU OBSERVATION,EMERGENCY ROOM,,Other,ENGLISH,...,ED,Emergency Department,2192-05-22 23:46:00,2192-05-23 05:27:00,0,33,3.0,NaT,2192-05-23 05:27:00,NaT
39866888,10001217,24597018,2157-11-18 22:56:00,2157-11-25 18:00:00,NaT,EW EMER.,EMERGENCY ROOM,HOME HEALTH CARE,Other,?,...,ED,Emergency Department,2157-11-18 17:38:00,2157-11-19 01:24:00,0,55,3.0,NaT,2157-11-19 01:24:00,NaT
36819102,10004719,21197153,2183-08-30 18:28:00,2183-09-03 12:34:00,NaT,OBSERVATION ADMIT,EMERGENCY ROOM,HOME,Medicare,ENGLISH,...,ED,Emergency Department,2183-08-30 14:38:00,2183-08-30 20:32:00,0,66,3.0,NaT,2183-08-30 20:32:00,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33496052,19993951,23793933,2139-02-18 20:03:00,2139-02-19 15:49:00,NaT,EU OBSERVATION,EMERGENCY ROOM,,Medicare,ENGLISH,...,ED,Emergency Department,2139-02-18 11:39:00,2139-02-18 20:03:54,1,72,2.0,NaT,2139-02-18 20:03:54,NaT
39421649,19996783,25894657,2188-03-05 20:17:00,2188-03-14 17:15:00,NaT,DIRECT EMER.,PHYSICIAN REFERRAL,HOME HEALTH CARE,Other,?,...,ED,Emergency Department,2188-03-05 15:06:00,2188-03-05 21:40:00,1,89,3.0,NaT,2188-03-05 21:40:00,NaT
39636284,19997473,27787494,2173-09-11 00:53:00,2173-10-02 15:50:00,NaT,URGENT,TRANSFER FROM HOSPITAL,SKILLED NURSING FACILITY,Medicare,ENGLISH,...,ED,Emergency Department,2173-09-10 22:20:00,2173-09-11 01:54:00,0,82,2.0,NaT,2173-09-11 01:54:00,NaT
39468668,19997576,25548363,2187-10-07 22:35:00,2187-10-10 19:50:00,NaT,OBSERVATION ADMIT,TRANSFER FROM HOSPITAL,HOME,Medicare,ENGLISH,...,ED,Emergency Department,2187-10-07 18:43:00,2187-10-07 23:42:00,0,80,2.0,NaT,2187-10-07 23:42:00,NaT
