In [3]:
# imports

import pickle
import pandas as pd
from datetime import datetime
from sklearn.preprocessing import OneHotEncoder

In [5]:
# load X data
X_filePath = '../../../data/originalData/preprocessed_X_visit_over3.pkl'
with open(X_filePath, 'rb') as f:
    data = pickle.load(f)

# load admission data
admissions = pd.read_csv('../../../data/originalData/admissions.csv')

In [6]:
#convert to datetime
admissions['admittime'] = pd.to_datetime(admissions['admittime'])
admissions['dischtime'] = pd.to_datetime(admissions['dischtime'])

In [7]:
admissions['timespent'] = (admissions['dischtime'] - admissions['admittime']).dt.total_seconds() // 3600

In [11]:
# drop all unnecessary columns

columns_to_keep = [
    'subject_id', 'hadm_id', 'timespent', 'admission_type', 
    'admission_location', 'insurance', 'marital_status', 
    'hospital_expire_flag'
]
admissions = admissions[columns_to_keep]

print(admissions.head())

   subject_id   hadm_id  timespent  admission_type      admission_location  \
0    10000032  22595853       18.0          URGENT  TRANSFER FROM HOSPITAL   
1    10000032  22841357       24.0        EW EMER.          EMERGENCY ROOM   
2    10000032  25742920       42.0        EW EMER.          EMERGENCY ROOM   
3    10000032  29079034       53.0        EW EMER.          EMERGENCY ROOM   
4    10000068  25022803        7.0  EU OBSERVATION          EMERGENCY ROOM   

  insurance marital_status  hospital_expire_flag  
0     Other        WIDOWED                     0  
1  Medicaid        WIDOWED                     0  
2  Medicaid        WIDOWED                     0  
3  Medicaid        WIDOWED                     0  
4     Other         SINGLE                     0  


In [12]:
# Update the nested dictionary
for subject_id, nested_dict in data.items():
    for hadm_id, record in nested_dict.items():
        # Find the matching row in admissions
        match = admissions[(admissions['subject_id'] == subject_id) & (admissions['hadm_id'] == hadm_id)]
        if not match.empty:
            row = match.iloc[0]
            # Add the new features
            record['timespent'] = int(row['timespent'])
            record['admission_type'] = row['admission_type']
            record['admission_location'] = row['admission_location']
            record['insurance'] = row['insurance']
            record['marital_status'] = row['marital_status']
            record['hospital_expire_flag'] = int(row['hospital_expire_flag'])

In [13]:
def explore_nested_dict(data, top_key):
    if top_key in data:
        nested_dict = data[top_key]
        print(f"Top-level Key: {top_key}")
        print(f"Keys in nested dictionary: {nested_dict.keys()}")
        
        # Inspect one of the nested keys in detail
        for nested_key in nested_dict:
            print(f"\nNested Key: {nested_key}")
            print(f"Type of value: {type(nested_dict[nested_key])}")
            print(f"Value sample: {nested_dict[nested_key]}")
    else:
        print(f"Key {top_key} not found in the data.")


top_key_to_inspect = 10000032

explore_nested_dict(data, top_key_to_inspect)

Top-level Key: 10000032
Keys in nested dictionary: dict_keys([22595853, 22841357, 29079034])

Nested Key: 22595853
Type of value: <class 'dict'>
Value sample: {'diagnoses': ['d_572', 'd_789', 'd_571', 'd_070', 'd_496', 'd_296', 'd_309'], 'procedures': ['pcs_549'], 'drugs': ['p_NACLFLUSH', 'p_SPIR25', 'p_RALT400', 'p_ALBU17H', 'p_FURO20', 'p_MICROK10', 'p_NICO14P', 'p_HEPA5I', 'p_SPIR25', 'p_IPRA2H', 'p_INFL0.5LF', 'p_TRUV200/300', 'p_FURO40', 'p_APAP500'], 'admitdate': '2180-05-06', 'timespent': 18, 'admission_type': 'URGENT', 'admission_location': 'TRANSFER FROM HOSPITAL', 'insurance': 'Other', 'marital_status': 'WIDOWED', 'hospital_expire_flag': 0}

Nested Key: 22841357
Type of value: <class 'dict'>
Value sample: {'diagnoses': ['d_070', 'd_789', 'd_287', 'd_276', 'd_496', 'd_571', 'd_305'], 'procedures': ['pcs_549'], 'drugs': ['p_BACTDS', 'p_TIOT', 'p_RIFA550', 'p_APAP500', 'p_ALBU25', 'p_NACLFLUSH', 'p_RALT400', 'p_HEPA5I', 'p_TRUV200/300', 'p_CAL1250', 'p_FURO40', 'p_INFL0.5LF', 'p

In [None]:
with open('X_Admin.pkl', 'wb') as f:
    pickle.dump(data, f)

print("Updated file saved as preprocessed_X_visit_over3_updated.pkl")

Updated file saved as preprocessed_X_visit_over3_updated.pkl
