In [7]:
# Import libraries and packages
import pandas as pd
import numpy as np
import os
import math
import tableone
from datetime import datetime, timedelta
from scipy import stats
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [8]:
screening_method = 'inf_phoenix'

In [None]:
# Load data
data = pd.read_parquet(os.path.join('/labs/kamaleswaranlab/dchanci/data/pediatric_sepsis/prediction_ml/updated_data/data_models', 'dataset_agg_sr_' + screening_method + '.parquet.gzip'))
data = data.replace(np.nan, 0)
data = data.replace([np.inf, -np.inf], 0).dropna()

# Select columns
data = data[['patid', 'csn', 'age_years', 'label', 'gender', 'race']]
data.columns = ['patid', 'csn', 'Age in years', 'Sepsis', 'Gender', 'Race']

# Compute Age group
data['Age Group'] = np.where(data['Age in years'] <= 0.083, '≤ 28 days', np.where(data['Age in years'] <= 3.0, '29 days - 2 years', 
                    np.where(data['Age in years'] <= 6.0, '3 - 5 years', '6 - 17 years')))
data.head()

In [None]:
# Load encounters file
print('Loading encounters...')
dept_path = '/labs/kamaleswaranlab/ECMO/new_data/TAB2_Encounter_Departments.parquet.gzip'
dept = pd.read_parquet(dept_path)
dept[['BIRTH_DATE', 'Hosp_Admission', 'Hosp_Discharge', 'Entered_Dept', 'Exited_Dept']] = dept[['BIRTH_DATE', 'Hosp_Admission', 'Hosp_Discharge', 'Entered_Dept', 'Exited_Dept']].apply(pd.to_datetime)
dept = dept[['Pat ID', 'Encounter CSN', 'Name', 'BIRTH_DATE', 'Department', 'Entered_Dept', 'Exited_Dept', 'Hosp_Admission', 'Hosp_Discharge']]
dept.columns = ['patid', 'csn', 'name', 'dob', 'department', 'entered_dept', 'exited_dept', 'hosp_adm', 'hosp_disch']
dept = dept[(dept['department'].str.contains('PEDIATRIC ICU')) & (dept['hosp_adm'] >= '2010-01-01')]
dept.dropna(inplace=True)
dept.drop_duplicates(inplace=True)

dept_first = dept.sort_values(by=['csn', 'hosp_adm', 'entered_dept'])
dept_first = dept_first.groupby('csn', as_index=False).first()

# Add hospital and PICU admission and discharge
data = data.merge(dept_first[['csn', 'department', 'entered_dept', 'exited_dept', 'hosp_adm', 'hosp_disch']], how='inner', on='csn')

In [None]:
# Compute hospital LOS
data['Hospital Length of Stay in days'] = (data['hosp_disch'] - data['hosp_adm']) / pd.Timedelta('1 day')

# Compute PICU LOS
data['PICU Length of Stay in days'] = (data['exited_dept'] - data['entered_dept']) / pd.Timedelta('1 day')
data.drop(['hosp_disch', 'hosp_adm', 'exited_dept'], axis=1, inplace=True)

# Add mortality
depts = pd.read_parquet('/labs/kamaleswaranlab/ECMO/new_data/TAB2_Encounter_Departments.parquet.gzip')
depts[['Entered_Dept']] = depts[['Entered_Dept']].apply(pd.to_datetime)
depts['Hospital mortality'] = np.where(depts['Hospital_Discharge_Disposition'] == 'Expired', 'Yes', np.where(depts['Hospital_Discharge_Disposition'] == 'Expired Place Unknown', 'Yes', 'No'))
depts = depts[['Encounter CSN', 'Hospital mortality', 'Department', 'Entered_Dept']]
depts.columns = ['csn', 'Hospital Mortality', 'department', 'entered_dept']

# Merge dataframes
data = data.merge(depts, how='inner', on=['csn', 'department', 'entered_dept'])
data.rename(columns={'department':'PICU Campus'}, inplace=True)

# Add ethnicity
pats = pd.read_parquet('/labs/kamaleswaranlab/ECMO/new_data/TAB1_Patients.parquet.gzip')
pats = pats[['Pat ID', 'Ethnicity']]
pats.columns = ['patid', 'Ethnicity']

# Merge dataframes
data = data.merge(pats, how='inner', on='patid')
data.drop(['entered_dept'], axis=1, inplace=True)

In [12]:
# Add mortality scores
scores = pd.read_csv('/labs/kamaleswaranlab/dchanci/data/pediatric_sepsis/prediction_ml/updated_data/data_analysis/scores_24.csv')
scores = scores[['csn', 'phoenix', 'psofa', 'pelod2', 'prism3']]
scores.columns = ['csn', 'Phoenix', 'pSOFA', 'PELOD II', 'PRISM III']
data = data.merge(scores, how='left', on='csn')

In [13]:
# Add admission diagnoses
adm_diag = pd.read_parquet('/labs/kamaleswaranlab/dchanci/data/pediatric_sepsis/prediction_ml/updated_data/data_analysis/adm_diag_predictions.parquet.gzip')

data['Sickle Cell Disease'] = 'No'
data.loc[data['csn'].isin(adm_diag.loc[adm_diag['Sickle Cell Disease'] == 1, 'csn'].unique().tolist()), 'Sickle Cell Disease'] = 'Yes'

data['Diabetic Ketoacidosis'] = 'No'
data.loc[data['csn'].isin(adm_diag.loc[adm_diag['Diabetic Ketoacidosis'] == 1, 'csn'].unique().tolist()), 'Diabetic Ketoacidosis'] = 'Yes'

data['Asthmaticus'] = 'No'
data.loc[data['csn'].isin(adm_diag.loc[adm_diag['Asthmaticus'] == 1, 'csn'].unique().tolist()), 'Asthmaticus'] = 'Yes'

data['Kidney Failure'] = 'No'
data.loc[data['csn'].isin(adm_diag.loc[adm_diag['Kidney Failure'] == 1, 'csn'].unique().tolist()), 'Kidney Failure'] = 'Yes'

In [14]:
# Add predictions
predictions = pd.read_csv('/labs/kamaleswaranlab/dchanci/data/pediatric_sepsis/prediction_ml/updated_data/data_analysis/daily_predictions.csv')
data = data.merge(predictions, how='left', on='csn')
data.drop(['Sepsis', 'PICU Campus', 'patid', 'csn'], axis=1, inplace=True)

In [15]:
# Fix Ethnicity
data.loc[data['Ethnicity'].isin(['Declined', 'Patient Not Present', 'Parent Not Present']), 'Ethnicity'] = 'Unknown'
data.head()

Unnamed: 0,Age in years,Gender,Race,Age Group,Hospital Length of Stay in days,PICU Length of Stay in days,Hospital Mortality,Ethnicity,Phoenix,pSOFA,PELOD II,PRISM III,Sickle Cell Disease,Diabetic Ketoacidosis,Asthmaticus,Kidney Failure,prediction
0,7.99,Female,White,6 - 17 years,2.385417,2.091667,No,Non Hispanic or Latino,0.0,0.0,0.0,6.0,No,No,No,No,True Negatives
1,8.25,Female,White,6 - 17 years,0.893056,0.893056,No,Non Hispanic or Latino,0.0,1.0,2.0,2.0,No,No,No,No,True Negatives
2,11.04,Female,White,6 - 17 years,2.174306,1.010417,No,Non Hispanic or Latino,0.0,0.0,3.0,3.0,No,No,No,No,True Negatives
3,13.04,Female,White,6 - 17 years,2.256944,1.2625,No,Non Hispanic or Latino,0.0,1.0,5.0,4.0,No,No,No,No,True Negatives
4,14.1,Female,White,6 - 17 years,6.185417,5.852083,No,Non Hispanic or Latino,2.0,7.0,9.0,16.0,No,No,No,No,True Positives


In [16]:
# Create tableOne
columns = list(data.columns)
columns.remove('prediction')

categorical = ['Gender', 'Race', 'Age Group', 'Hospital Mortality', 'Ethnicity', 'Sickle Cell Disease', 'Diabetic Ketoacidosis', 'Asthmaticus', 'Kidney Failure']
    
nonnormal = [x for x in columns if x not in categorical]

groupby = ['prediction']

data.reset_index(inplace=True, drop=True)

myTable = tableone.TableOne(data, columns=columns, categorical=categorical, groupby=groupby, nonnormal=nonnormal, pval=False, missing=False, htest_name=False)
myTable

Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by prediction,Grouped by prediction,Grouped by prediction,Grouped by prediction,Grouped by prediction,Grouped by prediction
Unnamed: 0_level_1,Unnamed: 1_level_1,Overall,False Negatives,False Positives,Late True Positives,True Negatives,True Positives
n,,35557,274,3918,116,28837,2412
"Age in years, median [Q1,Q3]",,"4.1 [1.1,11.2]","8.9 [3.2,13.8]","4.0 [0.8,10.7]","8.6 [3.7,13.0]","3.9 [1.1,11.2]","5.2 [1.2,11.2]"
"Gender, n (%)",Female,15976 (44.9),124 (45.3),1753 (44.7),53 (45.7),12952 (44.9),1094 (45.4)
"Gender, n (%)",Male,19580 (55.1),150 (54.7),2164 (55.2),63 (54.3),15885 (55.1),1318 (54.6)
"Gender, n (%)",Unknown,1 (0.0),,1 (0.0),,,
"Race, n (%)",Asian,1224 (3.4),12 (4.4),148 (3.8),3 (2.6),956 (3.3),105 (4.4)
"Race, n (%)",Black or African American,10990 (30.9),87 (31.8),1184 (30.2),31 (26.7),8889 (30.8),799 (33.1)
"Race, n (%)",Other,160 (0.4),1 (0.4),23 (0.6),,129 (0.4),7 (0.3)
"Race, n (%)",Unknown,6281 (17.7),41 (15.0),856 (21.8),19 (16.4),4944 (17.1),421 (17.5)
"Race, n (%)",White,16902 (47.5),133 (48.5),1707 (43.6),63 (54.3),13919 (48.3),1080 (44.8)


In [17]:
myTable.to_csv(os.path.join('/home/dchanci/research/pediatric_sepsis/prediction_ml/models/results_updated', screening_method, 'tableone_predictions.csv'))