In [1]:
# Import libraries and packages
import pandas as pd
import numpy as np
import warnings
import os
warnings.filterwarnings("ignore")

In [2]:
# Load encounters file
dept_path = '/labs/kamaleswaranlab/ECMO/new_data/TAB2_Encounter_Departments.parquet.gzip'
dept = pd.read_parquet(dept_path)
dept[['BIRTH_DATE', 'Hosp_Admission', 'Hosp_Discharge', 'Entered_Dept', 'Exited_Dept']] = dept[['BIRTH_DATE', 'Hosp_Admission', 'Hosp_Discharge', 'Entered_Dept', 'Exited_Dept']].apply(pd.to_datetime)
dept = dept[['Pat ID', 'Encounter CSN', 'Name', 'BIRTH_DATE', 'Department', 'Entered_Dept', 'Exited_Dept', 'Hosp_Admission', 'Hosp_Discharge']]
dept.columns = ['patid', 'csn', 'name', 'dob', 'department', 'entered_dept', 'exited_dept', 'hosp_adm', 'hosp_disch']
dept = dept[(dept['department'].str.contains('PEDIATRIC ICU')) & (dept['hosp_adm'] >= '2010-01-01')]
dept['csn'] = dept['csn'].astype(int)
dept.dropna(inplace=True)
dept.drop_duplicates(inplace=True)
dept.sort_values(by=['csn', 'hosp_adm', 'entered_dept'], inplace=True)
dept = dept.groupby('csn', as_index=False).first()

# Load cohort file
cohort = pd.read_csv('/labs/kamaleswaranlab/dchanci/data/pediatric_sepsis/prediction_ml/updated_data/data_screening/complete_cohort.csv')
cohort = cohort[['patid', 'mrn', 'csn', 'dob']]
cohort['dob'] = cohort['dob'].apply(pd.to_datetime)
cohort['csn'] = cohort['csn'].astype(int)
cohort = cohort.merge(dept[['csn', 'department', 'entered_dept', 'exited_dept', 'hosp_adm', 'hosp_disch']], how='inner', on='csn')
cohort.drop_duplicates(inplace=True)

In [4]:
# Load encounters file
dept = pd.read_parquet('/labs/kamaleswaranlab/ECMO/new_data/TAB2_Encounter_Departments.parquet.gzip')
dept = dept[['Pat ID', 'Encounter CSN', 'Hosp_Admission']]
dept.columns = ['patid', 'prev_csn', 'prev_hosp_adm']
dept['prev_csn'] = dept['prev_csn'].astype(int)
dept['prev_hosp_adm'] = dept['prev_hosp_adm'].apply(pd.to_datetime)
dept.drop_duplicates(inplace=True)

In [5]:
# Find previous hospitalizations
cohort = cohort[['patid', 'csn', 'hosp_adm']]
cohort = cohort.merge(dept, how='inner', on='patid')
cohort['interval'] = (cohort['hosp_adm'] - cohort['prev_hosp_adm']) / pd.Timedelta('1 hour')
cohort = cohort[cohort['interval'] > 0]
cohort['interval'] = (cohort['hosp_adm'] - cohort['prev_hosp_adm']) / pd.Timedelta('365 days')
cohort['prev_year'] = 0
cohort.loc[cohort['interval'] <= 1.0, 'prev_year'] = 1

# Save file
cohort.to_parquet('/labs/kamaleswaranlab/dchanci/data/pediatric_sepsis/prediction_ml/updated_data/data_models/previous_hosp.parquet.gzip', compression='gzip')