In [1]:

# Import Libraries
import datetime as dt
import json
import os

import pandas as pd
import numpy as np
from tqdm import tqdm
tqdm.pandas()

import src.data_processing.MIMIC.test_functions as tests

# LOAD CONFIGURATION 
with open("src/data_processing/MIMIC/MIMIC_PROCESSING_DEFAULT_VARS.json", "r") as f:
    DEFAULT_CONFIG = json.load(f)
    f.close()

if not os.path.exists(DEFAULT_CONFIG["SAVE_FD"]):
    os.makedirs(DEFAULT_CONFIG["SAVE_FD"])

In [2]:
try:
    assert os.path.exists(DEFAULT_CONFIG["SAVE_FD"] + "admissions_intermediate.csv")
    assert os.path.exists(DEFAULT_CONFIG["SAVE_FD"] + "vitals_intermediate.csv")

except AssertionError as e:
    raise e

In [3]:
from importlib import reload
reload(tests)

<module 'src.data_processing.MIMIC.test_functions' from '/home/hq-boss31/Projects/Github/VarPhenClustering/src/data_processing/MIMIC/test_functions.py'>

In [4]:
# Print Information
print("\n\n ======== PROCESSING OUTCOMES ======== \n\n")

# Load previously processed data
adm_proc = pd.read_csv(
    DEFAULT_CONFIG["SAVE_FD"] + "admissions_intermediate.csv",
    index_col=0, 
    header=0, 
    parse_dates=["intime", "outtime", "intime_next", "outtime_next", "deathtime"]
)
vit_proc = (
    pd.read_csv(
    DEFAULT_CONFIG["SAVE_FD"] + "vitals_intermediate.csv", 
    index_col=0, 
    header=0, 
    parse_dates=DEFAULT_CONFIG["VITALS_TIME_VARS"]
    )
    .reset_index(drop=False)
    .assign(sampled_time_to_end=lambda x: pd.to_timedelta(x["sampled_time_to_end"]))  # pd does not load timedelta automatically
)


# Check correct computation of admissions and vitals
tests.test_admissions_processed_correctly(adm_proc)
tests.test_vitals_processed_correctly(vit_proc, config_dic=DEFAULT_CONFIG)






Testing admissions processed correctly...

Testing outtime is after intime.
Test passed!

Testing next transfer information is consistent.
Test passed!

Testing admission times are before death (if exists).
Test passed!

Testing ids are unique for params ('subject_id', 'hadm_id', 'stay_id', 'transfer_id_next')
Test passed for variable  subject_id!
Test passed for variable  hadm_id!
Test passed for variable  stay_id!
Test passed for variable  transfer_id_next!

Testing ids are complete for params ('subject_id', 'stay_id', 'intime', 'outtime')
Test passed for variable subject_id!
Test passed for variable stay_id!
Test passed for variable intime!
Test passed for variable outtime!
Test passed!
Admissions correctly computed! Safe to go ahead.

Testing vitals were processed correctly and make sense.

Testing ids are complete for params ('subject_id', 'stay_id', 'sampled_time_to_end')
Test passed for variable subject_id!
Test passed for variable stay_id!
Test passed for variable sampled_

100%|████████████████████████████████████████████████████████████████████████████████████████████| 8364/8364 [00:24<00:00, 347.32it/s]


Test passed!

Testing resampling data is linear from min to max per patient.


100%|███████████████████████████████████████████████████████████████████████████████████████████| 8364/8364 [00:03<00:00, 2381.89it/s]

Test passed!
Vitals seem correctly processed!





In [5]:
# Load core info
transfers_core = pd.read_csv(
    DEFAULT_CONFIG["DATA_FD"] + "core/transfers.csv", 
    index_col=None, 
    header=0, 
    parse_dates=["intime", "outtime"]
)
admissions_core = pd.read_csv(
    DEFAULT_CONFIG["DATA_FD"] + "core/admissions.csv",
    index_col=None,
    header=0,
    parse_dates=["admittime", "dischtime", "deathtime", "edregtime", "edouttime"]
)



In [13]:

"""
Step 1: Subset the set of transfers/admissions_core to the already processed cohort.

We do this by merging. 
"""
merge_ids = ["subject_id", "hadm_id"]
info_ids = merge_ids + ["stay_id", "intime", "outtime", "intime_next", "outtime_next"]

# Inner merge for transfers core
transfers_S1 = (
    transfers_core
    .merge(
        vit_proc[info_ids].drop_duplicates(),   # Drop duplicates as we don't need all the rows
        how="inner",
        on=merge_ids
    )
    .dropna(subset=["hadm_id"])                 # Drop rows with no hadm_id as we can't compare with transfers
    .sort_values(by=["subject_id", "stay_id"], ascending=True) # Sort by subject_id and stay_id
)

# Inner merge for admissions core
admissions_S1 = (
    admissions_core
    .merge(
        vit_proc[info_ids].drop_duplicates(),
        how="inner",
        on=merge_ids
    )
    .dropna(subset=["hadm_id"])            # Drop rows with no hadm_id as we can't compare with transfers
    .sort_values(by=["subject_id", "stay_id"], ascending=True) # Sort by subject_id and stay_id
)

# Testing and save
tests.test_ids_subset_of_cohort(transfers_S1, vit_proc, *merge_ids)
tests.test_ids_subset_of_cohort(admissions_S1, vit_proc, *merge_ids)
tests.test_is_complete_ids(transfers_S1, *merge_ids, "stay_id")
tests.test_is_complete_ids(admissions_S1, *merge_ids, "stay_id")

# Check processing and correctdeness
transfers_S1.to_csv(DEFAULT_CONFIG["SAVE_FD"] + "transfers_S1.csv", header=True, index=True)


Testing ('subject_id', 'hadm_id') are subset of cohort data.
Test passed!

Testing ('subject_id', 'hadm_id') are subset of cohort data.
Test passed!

Testing ids are complete for params ('subject_id', 'hadm_id', 'stay_id')
Test passed for variable subject_id!
Test passed for variable hadm_id!
Test passed for variable stay_id!

Testing ids are complete for params ('subject_id', 'hadm_id', 'stay_id')
Test passed for variable subject_id!
Test passed for variable hadm_id!
Test passed for variable stay_id!


In [9]:
adm_proc

Unnamed: 0,subject_id,hadm_id,transfer_id_next,eventtype_next,careunit_next,intime_next,outtime_next,transfer_id,eventtype,careunit,intime,outtime,stay_id,gender,age,ESI,deathtime
0,10000032,22595853.0,,,,NaT,NaT,33258284,ED,Emergency Department,2180-05-06 19:17:00,2180-05-06 23:30:00,33258284,0,52,3.0,NaT
1,10000084,23052089.0,,,,NaT,NaT,35203156,ED,Emergency Department,2160-11-20 20:36:00,2160-11-21 03:20:00,35203156,1,72,2.0,NaT
2,10000108,,,,,NaT,NaT,32522732,ED,Emergency Department,2163-09-16 16:34:00,2163-09-16 16:43:00,32522732,1,25,3.0,NaT
3,10000115,,,,,NaT,NaT,38081480,ED,Emergency Department,2154-12-10 02:04:00,2154-12-10 02:16:00,38081480,1,24,3.0,NaT
4,10000178,,,,,NaT,NaT,31721172,ED,Emergency Department,2157-04-08 09:58:00,2157-04-08 10:20:00,31721172,0,59,3.0,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
163018,19999733,27674281.0,,,,NaT,NaT,30940569,ED,Emergency Department,2152-07-08 20:15:00,2152-07-09 03:45:00,30940569,0,19,3.0,NaT
163019,19999750,,,,,NaT,NaT,38224473,ED,Emergency Department,2144-03-22 14:27:00,2144-03-22 14:36:00,38224473,1,45,3.0,NaT
163020,19999784,26194817.0,,,,NaT,NaT,35692999,ED,Emergency Department,2119-06-18 14:21:00,2119-06-18 21:09:29,35692999,1,57,3.0,NaT
163021,19999828,29734428.0,,,,NaT,NaT,30712109,ED,Emergency Department,2147-07-17 17:18:00,2147-07-18 17:34:00,30712109,0,46,2.0,NaT


In [17]:
admissions_S1

Unnamed: 0,subject_id,hadm_id,admittime,dischtime,deathtime,admission_type,admission_location,discharge_location,insurance,language,marital_status,ethnicity,edregtime,edouttime,hospital_expire_flag,stay_id,intime,outtime,intime_next,outtime_next
1869,10000084,23052089,2160-11-21 01:56:00,2160-11-25 14:52:00,NaT,EW EMER.,WALK-IN/SELF REFERRAL,HOME HEALTH CARE,Medicare,ENGLISH,MARRIED,WHITE,2160-11-20 20:36:00,2160-11-21 03:20:00,0,35203156,2160-11-20 20:36:00,2160-11-21 03:20:00,NaT,NaT
3294,10001176,23334588,2186-11-29 03:56:00,2186-12-02 15:00:00,NaT,EU OBSERVATION,EMERGENCY ROOM,,Medicare,ENGLISH,MARRIED,WHITE,2186-11-28 21:34:00,2186-11-29 05:01:00,0,39677610,2186-11-28 21:34:00,2186-11-29 05:01:00,2186-11-29 05:01:00,2186-12-02 15:35:03
5413,10001180,21102262,2192-05-23 04:58:00,2192-05-23 16:43:00,NaT,EU OBSERVATION,EMERGENCY ROOM,,Other,ENGLISH,MARRIED,WHITE,2192-05-22 23:46:00,2192-05-23 05:27:00,0,33306350,2192-05-22 23:46:00,2192-05-23 05:27:00,NaT,NaT
2565,10001217,24597018,2157-11-18 22:56:00,2157-11-25 18:00:00,NaT,EW EMER.,EMERGENCY ROOM,HOME HEALTH CARE,Other,?,MARRIED,WHITE,2157-11-18 17:38:00,2157-11-19 01:24:00,0,39866888,2157-11-18 17:38:00,2157-11-19 01:24:00,NaT,NaT
2857,10004719,21197153,2183-08-30 18:28:00,2183-09-03 12:34:00,NaT,OBSERVATION ADMIT,EMERGENCY ROOM,HOME,Medicare,ENGLISH,SINGLE,WHITE,2183-08-30 14:38:00,2183-08-30 20:32:00,0,36819102,2183-08-30 14:38:00,2183-08-30 20:32:00,2183-08-30 20:32:00,2183-09-03 12:34:53
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,19993951,23793933,2139-02-18 20:03:00,2139-02-19 15:49:00,NaT,EU OBSERVATION,EMERGENCY ROOM,,Medicare,ENGLISH,SINGLE,BLACK/AFRICAN AMERICAN,2139-02-18 11:39:00,2139-02-19 20:42:00,0,33496052,2139-02-18 11:39:00,2139-02-18 20:03:54,NaT,NaT
4740,19996783,25894657,2188-03-05 20:17:00,2188-03-14 17:15:00,NaT,DIRECT EMER.,PHYSICIAN REFERRAL,HOME HEALTH CARE,Other,?,MARRIED,ASIAN,2188-03-05 15:06:00,2188-03-05 21:40:00,0,39421649,2188-03-05 15:06:00,2188-03-05 21:40:00,NaT,NaT
4901,19997473,27787494,2173-09-11 00:53:00,2173-10-02 15:50:00,NaT,URGENT,TRANSFER FROM HOSPITAL,SKILLED NURSING FACILITY,Medicare,ENGLISH,MARRIED,WHITE,2173-09-10 22:20:00,2173-09-11 01:54:00,0,39636284,2173-09-10 22:20:00,2173-09-11 01:54:00,2173-09-11 01:54:00,2173-09-13 20:10:50
5547,19997576,25548363,2187-10-07 22:35:00,2187-10-10 19:50:00,NaT,OBSERVATION ADMIT,TRANSFER FROM HOSPITAL,HOME,Medicare,ENGLISH,MARRIED,WHITE,2187-10-07 18:43:00,2187-10-07 23:42:00,0,39468668,2187-10-07 18:43:00,2187-10-07 23:42:00,2187-10-07 23:42:00,2187-10-10 19:59:11


In [48]:
test = admissions_S1.query("intime <= admittime").query("intime_next >= admittime | intime_next.isna()")

In [43]:
(test["intime_next"].ge(test["admittime"]) | test["intime_next"].isna())

8306

In [71]:
(test["dischtime"] - test["outtime_next"]).idxmin()

2301

In [105]:
__test = (
    admissions_S1
    .query("intime <= admittime")                            # admissions to hospital after ED admissions
    .query("intime_next >= admittime | intime_next.isna()")  # admissions to hospital before next ED transfer
    .query("outtime <= edouttime")                           # transfer outtime before ed exit time
    .query("intime <= edregtime")                            # transfer intmie before ed registration time
    .query("(outtime_next - dischtime).le(@pd.Timedelta(hours=2))")    # discharge after next transfer
    # .query("")
)

In [75]:
transfers_S1.query("stay_id==38593659").sort_values("intime_x")

Unnamed: 0,subject_id,hadm_id,transfer_id,eventtype,careunit,intime_x,outtime_x,stay_id,intime_y,outtime_y,intime_next,outtime_next
10659,12638978,27643582.0,38593659,ED,Emergency Department,2143-07-16 19:44:00,2143-07-17 01:40:00,38593659,2143-07-16 19:44:00,2143-07-17 01:40:00,2143-07-17 01:40:00,2143-07-22 09:16:56
10660,12638978,27643582.0,36995469,admit,PACU,2143-07-17 01:40:00,2143-07-22 09:16:56,38593659,2143-07-16 19:44:00,2143-07-17 01:40:00,2143-07-17 01:40:00,2143-07-22 09:16:56
10658,12638978,27643582.0,30649779,discharge,,2143-07-22 09:16:56,NaT,38593659,2143-07-16 19:44:00,2143-07-17 01:40:00,2143-07-17 01:40:00,2143-07-22 09:16:56


In [107]:
__test.isna().sum()

subject_id                 0
hadm_id                    0
admittime                  0
dischtime                  0
deathtime               4643
admission_type             0
admission_location         0
discharge_location       754
insurance                  0
language                   0
marital_status           252
ethnicity                  0
edregtime                  0
edouttime                  0
hospital_expire_flag       0
stay_id                    0
intime                     0
outtime                    0
intime_next                0
outtime_next               0
dtype: int64

In [108]:
150/4800

0.03125