In [12]:
# Import libraries and packages
import pandas as pd
import numpy as np
import os
import math
import tableone
from datetime import datetime, timedelta
from scipy import stats
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [13]:
screening_method = 'inf_phoenix'

In [None]:
# Load data
data = pd.read_parquet(os.path.join('/labs/kamaleswaranlab/dchanci/data/pediatric_sepsis/prediction_ml/updated_data/data_models', 'dataset_agg_sr_' + screening_method + '.parquet.gzip'))
data = data.replace(np.nan, 0)
data = data.replace([np.inf, -np.inf], 0).dropna()

# Select columns
data = data[['patid', 'csn', 'age_years', 'label', 'gender', 'race']]
data.columns = ['patid', 'csn', 'Age in years', 'Sepsis', 'Gender', 'Race']

# Compute Age group
data['Age Group'] = np.where(data['Age in years'] <= 0.083, '≤ 28 days', np.where(data['Age in years'] <= 3.0, '29 days - 2 years', 
                    np.where(data['Age in years'] <= 6.0, '3 - 5 years', '6 - 17 years')))
data.head()

In [None]:
# Load encounters file
print('Loading encounters...')
dept_path = '/labs/collab/ECMO/new_data/TAB2_Encounter_Departments.parquet.gzip'
dept = pd.read_parquet(dept_path)
dept[['BIRTH_DATE', 'Hosp_Admission', 'Hosp_Discharge', 'Entered_Dept', 'Exited_Dept']] = dept[['BIRTH_DATE', 'Hosp_Admission', 'Hosp_Discharge', 'Entered_Dept', 'Exited_Dept']].apply(pd.to_datetime)
dept = dept[['Pat ID', 'Encounter CSN', 'Name', 'BIRTH_DATE', 'Department', 'Entered_Dept', 'Exited_Dept', 'Hosp_Admission', 'Hosp_Discharge']]
dept.columns = ['patid', 'csn', 'name', 'dob', 'department', 'entered_dept', 'exited_dept', 'hosp_adm', 'hosp_disch']
dept = dept[(dept['department'].str.contains('PEDIATRIC ICU')) & (dept['hosp_adm'] >= '2010-01-01')]
dept.dropna(inplace=True)
dept.drop_duplicates(inplace=True)

dept_first = dept.sort_values(by=['csn', 'hosp_adm', 'entered_dept'])
dept_first = dept_first.groupby('csn', as_index=False).first()

# Add hospital and PICU admission and discharge
data = data.merge(dept_first[['csn', 'department', 'entered_dept', 'exited_dept', 'hosp_adm', 'hosp_disch']], how='inner', on='csn')
data

In [16]:
# Compute hospital LOS
data['Hospital Length of Stay in days'] = (data['hosp_disch'] - data['hosp_adm']) / pd.Timedelta('1 day')

# Compute PICU LOS
data['PICU Length of Stay in days'] = (data['exited_dept'] - data['entered_dept']) / pd.Timedelta('1 day')
data['PICU Length of Stay in hours'] = (data['exited_dept'] - data['entered_dept']) / pd.Timedelta('1 hour')
data.drop(['hosp_disch', 'hosp_adm', 'exited_dept'], axis=1, inplace=True)

# Add mortality
depts = pd.read_parquet(dept_path)
depts[['Entered_Dept']] = depts[['Entered_Dept']].apply(pd.to_datetime)
depts['Hospital mortality'] = np.where(depts['Hospital_Discharge_Disposition'] == 'Expired', 'Yes', np.where(depts['Hospital_Discharge_Disposition'] == 'Expired Place Unknown', 'Yes', 'No'))
depts = depts[['Encounter CSN', 'Hospital mortality', 'Department', 'Entered_Dept']]
depts.columns = ['csn', 'Hospital Mortality', 'department', 'entered_dept']

# Merge dataframes
data = data.merge(depts, how='inner', on=['csn', 'department', 'entered_dept'])
data.rename(columns={'department':'PICU Campus'}, inplace=True)

# Add composite outcome
data['Hospital Mortality or PICU Stay >= 72 hours'] = 'No'
data.loc[(data['PICU Length of Stay in hours'] >= 72) | (data['Hospital Mortality'] ==  'Yes'), 'Hospital Mortality or PICU Stay >= 72 hours'] = 'Yes'
data.drop('PICU Length of Stay in hours', axis=1, inplace=True)

# Add ethnicity
pats = pd.read_parquet("/labs/collab/ECMO/new_data/TAB1_Patients.parquet.gzip")
pats = pats[['Pat ID', 'Ethnicity']]
pats.columns = ['patid', 'Ethnicity']

# Merge dataframes
data = data.merge(pats, how='inner', on='patid')
data.drop(['entered_dept'], axis=1, inplace=True)

In [17]:
# Add mortality scores
scores = pd.read_csv('/labs/kamaleswaranlab/dchanci/data/pediatric_sepsis/prediction_ml/updated_data/data_analysis/scores_24.csv')
scores = scores[['csn', 'phoenix', 'psofa', 'pelod2', 'prism3']]
scores.columns = ['csn', 'Phoenix', 'pSOFA', 'PELOD II', 'PRISM III']
data = data.merge(scores, how='left', on='csn')

In [18]:
# Add admission diagnoses
adm_diag = pd.read_parquet('/labs/kamaleswaranlab/dchanci/data/pediatric_sepsis/prediction_ml/updated_data/data_analysis/adm_diag_predictions.parquet.gzip')

data['Sickle Cell Disease'] = 'No'
data.loc[data['csn'].isin(adm_diag.loc[adm_diag['Sickle Cell Disease'] == 1, 'csn'].unique().tolist()), 'Sickle Cell Disease'] = 'Yes'

data['Diabetic Ketoacidosis'] = 'No'
data.loc[data['csn'].isin(adm_diag.loc[adm_diag['Diabetic Ketoacidosis'] == 1, 'csn'].unique().tolist()), 'Diabetic Ketoacidosis'] = 'Yes'

data['Asthmaticus'] = 'No'
data.loc[data['csn'].isin(adm_diag.loc[adm_diag['Asthmaticus'] == 1, 'csn'].unique().tolist()), 'Asthmaticus'] = 'Yes'

data['Kidney Failure'] = 'No'
data.loc[data['csn'].isin(adm_diag.loc[adm_diag['Kidney Failure'] == 1, 'csn'].unique().tolist()), 'Kidney Failure'] = 'Yes'

In [19]:
# Add predictions
predictions = pd.read_csv('/labs/kamaleswaranlab/dchanci/data/pediatric_sepsis/prediction_ml/updated_data/data_analysis/daily_predictions.csv')
data = data.merge(predictions, how='left', on='csn')
data.drop(['Sepsis', 'PICU Campus', 'patid', 'csn'], axis=1, inplace=True)

In [None]:
# Fix Ethnicity
data.loc[data['Ethnicity'].isin(['Declined', 'Patient Not Present', 'Parent Not Present']), 'Ethnicity'] = 'Unknown'
data.head()

In [None]:
# Create tableOne
columns = list(data.columns)
columns.remove('prediction')

categorical = ['Gender', 'Race', 'Age Group', 'Hospital Mortality', 'Ethnicity', 'Sickle Cell Disease', 'Diabetic Ketoacidosis', 'Asthmaticus', 'Kidney Failure', 'Hospital Mortality or PICU Stay >= 72 hours']
    
nonnormal = [x for x in columns if x not in categorical]

groupby = ['prediction']

data.reset_index(inplace=True, drop=True)

myTable = tableone.TableOne(data, columns=columns, categorical=categorical, groupby=groupby, nonnormal=nonnormal, pval=False, missing=False, htest_name=False)
myTable

In [22]:
myTable.to_csv(os.path.join('/home/dchanci/projects/ped_sepsis_prediction_ml/models/results_updated', screening_method, 'tableone_predictions.csv'))