In [1]:
# Import libraries and packages
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [2]:
# Load encounters file
print('Loading encounters...')
dept_path = '/opt/data/rishi/ECMO/new_data/TAB2_Encounter_Departments.csv'
dept = pd.read_csv(dept_path)
dept[['BIRTH_DATE', 'Hosp_Admission', 'Hosp_Discharge', 'Entered_Dept', 'Exited_Dept']] = dept[['BIRTH_DATE', 'Hosp_Admission', 'Hosp_Discharge', 'Entered_Dept', 'Exited_Dept']].apply(pd.to_datetime)
dept = dept[['Pat ID', 'Encounter CSN', 'Name', 'BIRTH_DATE', 'Department', 'Entered_Dept', 'Exited_Dept', 'Hosp_Admission', 'Hosp_Discharge']]
dept.columns = ['patid', 'csn', 'name', 'dob', 'department', 'entered_dept', 'exited_dept', 'hosp_adm', 'hosp_disch']
dept = dept[(dept['department'].str.contains('PEDIATRIC ICU')) & (dept['hosp_adm'] >= '2010-01-01')]
dept.dropna(inplace=True)
dept.drop_duplicates(inplace=True)

dept_first = dept.sort_values(by=['csn', 'hosp_adm', 'entered_dept'])
dept_first = dept_first.groupby('csn', as_index=False).first()

# Load complete cohort
print('Loading complete cohort...')
cohort = pd.read_csv('/opt/moredata/dchanci/pediatric_sepsis/data_screening/cohort_sirs_od.csv')
cohort = cohort[['patid', 'mrn', 'csn', 'dob']]
cohort = cohort.merge(dept_first[['csn', 'department', 'entered_dept', 'exited_dept', 'hosp_adm', 'hosp_disch']], how='inner', on='csn')

Loading encounters...
Loading complete cohort...


In [3]:
# Load data
print('Loading data...')
variables = pd.read_csv('/opt/moredata/dchanci/pediatric_sepsis/data_models/raw_features.csv')
variables[['dob', 'recorded_time']] = variables[['dob', 'recorded_time']].apply(pd.to_datetime)
variables[['csn', 'variable_id']] = variables[['csn', 'variable_id']].astype(int)
variables.dropna(subset=['value'], inplace=True)

# Add mechanical ventilation data
mv = pd.read_csv('/opt/moredata/dchanci/pediatric_sepsis/data_screening/mv_data.csv')
mv = mv[mv['csn'].isin(variables['csn'].unique().tolist())]
mv[['dob', 'recorded_time']] = mv[['dob', 'recorded_time']].apply(pd.to_datetime)
variables = pd.concat([variables, mv])

# Load meds
meds = pd.read_csv('/opt/moredata/dchanci/pediatric_sepsis/data_screening/filtered_meds.csv')
meds[['dob', 'mar_time']] = meds[['dob', 'mar_time']].apply(pd.to_datetime)
meds = meds[(meds['csn'].isin(variables['csn'].unique().tolist())) & (meds['dose_unit'] == 'mcg/kg/min') & 
        (meds['med'].str.contains('epinephrine|dopamine', case=False))]
meds = meds[['patid', 'csn', 'dob', 'med_id', 'med', 'mar_time', 'dose']]
meds.columns = ['patid', 'csn', 'dob', 'variable_id', 'variable_name', 'recorded_time', 'value']
meds.loc[(meds['variable_name'].str.contains("epinephrine", case=False)) & ~(meds['variable_name'].str.contains("norepinephrine", case=False)), 'variable_name'] = 'epinephrine'
meds.loc[meds['variable_name'].str.contains("norepinephrine", case=False), 'variable_name'] = 'norepinephrine'
meds.loc[meds['variable_name'].str.contains("dopamine", case=False), 'variable_name'] = 'dopamine'
meds.reset_index(inplace=True, drop=True)
variables = pd.concat([variables, meds])

# Add hospital admission and department
print('Adding hospital admission and department...')
variables = pd.merge(variables, cohort[['csn', 'hosp_adm', 'department']], on='csn', how='inner')
variables = variables[['patid', 'csn', 'dob', 'hosp_adm', 'department', 'variable_id', 'variable_name', 'recorded_time', 'value']]

# Fix blood pressure
print('Fixing blood pressure...')
sysbp = variables[variables['variable_name'] == 'BP']
sysbp['variable_id'] = 1
sysbp['variable_name'] = 'bp_sys'
sysbp['value'] = sysbp['value'].apply(lambda x: float(x.split('/')[0]))
variables.loc[variables['variable_name'] == 'BP', 'variable_name'] = 'bp_dias'
variables.loc[variables['variable_name'] == 'bp_dias', 'value'] = variables.loc[variables['variable_name'] == 'bp_dias', 'value'].apply(lambda x: float(x.split('/')[1]))
variables = pd.concat([variables, sysbp])
variables.dropna(subset=['value'], inplace=True)
variables.reset_index(drop=True, inplace=True)

Loading data...
Adding hospital admission and department...
Fixing blood pressure...


In [4]:
# Change some variables names. Similar variables will have the same name
print('Changing variables names...')
variables.loc[variables['variable_name'] == 'Weight', 'variable_name'] = 'weight'
variables.loc[variables['variable_name'] == 'Volume Infused (mL)', 'variable_name'] = 'vol_infused'
variables.loc[variables['variable_name'] == 'Urine (mL)', 'variable_name'] = 'urine'
variables.loc[variables['variable_name'] == 'Code Sheet Weight (kg)', 'variable_name'] = 'weight'
variables.loc[variables['variable_name'] == 'Pulse', 'variable_name'] = 'pulse'
variables.loc[variables['variable_name'] == 'MAP', 'variable_name'] = 'map'
variables.loc[variables['variable_name'] == 'ABP MAP', 'variable_name'] = 'map'
variables.loc[variables['variable_name'] == 'ART MAP', 'variable_name'] = 'map'
variables.loc[variables['variable_name'] == 'Resp', 'variable_name'] = 'resp'
variables.loc[variables['variable_name'] == 'SpO2', 'variable_name'] = 'spo2'
variables.loc[variables['variable_name'] == 'Perfused Pulse (SpO2)', 'variable_name'] = 'spo2'
variables.loc[variables['variable_name'] == 'Temp', 'variable_name'] = 'temp'
variables.loc[variables['variable_name'] == 'FiO2 (%)', 'variable_name'] = 'fio2'
variables.loc[variables['variable_name'] == 'PaO2/FiO2 (Calculated)', 'variable_name'] = 'pao2_fio2'
variables.loc[variables['variable_name'] == 'Pupil Left Reaction', 'variable_name'] = 'pupil_left_reaction'
variables.loc[variables['variable_name'] == 'Pupil Left Size', 'variable_name'] = 'pupil_left_size'
variables.loc[variables['variable_name'] == 'Pupil Right Reaction', 'variable_name'] = 'pupil_right_reaction'
variables.loc[variables['variable_name'] == 'Pupil Right Size', 'variable_name'] = 'pupil_right_size'
variables.loc[variables['variable_name'] == 'Coma Scale Total', 'variable_name'] = 'coma_scale_total'
variables.loc[variables['variable_name'] == 'Oxygen Flow (lpm)', 'variable_name'] = 'o2_flow'
variables.loc[variables['variable_name'] == 'POC pH', 'variable_name'] = 'ph'
variables.loc[variables['variable_name'] == 'POC PO2', 'variable_name'] = 'art_po2'
variables.loc[variables['variable_name'] == 'ARTERIAL POC PO2', 'variable_name'] = 'art_po2'
variables.loc[variables['variable_name'] == 'POC PCO2', 'variable_name'] = 'art_pco2'
variables.loc[variables['variable_name'] == 'ARTERIAL POC PCO2', 'variable_name'] = 'art_pco2'
variables.loc[variables['variable_name'] == 'POTASSIUM', 'variable_name'] = 'potassium'
variables.loc[variables['variable_name'] == 'SODIUM', 'variable_name'] = 'sodium'
variables.loc[variables['variable_name'] == 'CHLORIDE', 'variable_name'] = 'chloride'
variables.loc[variables['variable_name'] == 'POC GLUCOSE', 'variable_name'] = 'glucose'
variables.loc[variables['variable_name'] == 'GLUCOSE', 'variable_name'] = 'glucose'
variables.loc[variables['variable_name'] == 'BUN', 'variable_name'] = 'bun'
variables.loc[variables['variable_name'] == 'CREATININE', 'variable_name'] = 'creatinine'
variables.loc[variables['variable_name'] == 'CALCIUM', 'variable_name'] = 'calcium'
variables.loc[variables['variable_name'] == 'POC CALCIUM IONIZED', 'variable_name'] = 'calcium_ionized'
variables.loc[variables['variable_name'] == 'CO2', 'variable_name'] = 'co2'
variables.loc[variables['variable_name'] == 'HEMOGLOBIN', 'variable_name'] = 'hemoglobin'
variables.loc[variables['variable_name'] == 'BILIRUBIN TOTAL', 'variable_name'] = 'bilirubin_total'
variables.loc[variables['variable_name'] == 'ALBUMIN', 'variable_name'] = 'albumin'
variables.loc[variables['variable_name'] == 'WBC', 'variable_name'] = 'wbc'
variables.loc[variables['variable_name'] == 'PLATELETS', 'variable_name'] = 'platelets'
variables.loc[variables['variable_name'] == 'PTT', 'variable_name'] = 'ptt'
variables.loc[variables['variable_name'] == 'PTT.', 'variable_name'] = 'ptt'
variables.loc[variables['variable_name'] == 'ARTERIAL BASE EXCESS', 'variable_name'] = 'art_base_excess'
variables.loc[variables['variable_name'] == 'HCO3', 'variable_name'] = 'bicarbonate'
variables.loc[variables['variable_name'] == 'LACTIC ACID', 'variable_name'] = 'lactic_acid'
variables.loc[variables['variable_name'] == 'POC LACTIC ACID', 'variable_name'] = 'lactic_acid'
variables.loc[variables['variable_name'] == 'BAND NEUTROPHILS % (MANUAL)', 'variable_name'] = 'band_neutrophils'
variables.loc[variables['variable_name'] == 'ART BASE DEFICIT', 'variable_name'] = 'base_deficit'
variables.loc[variables['variable_name'] == 'VENOUS BASE DEFICIT', 'variable_name'] = 'base_deficit'
variables.loc[variables['variable_name'] == 'CAP BASE DEFICIT', 'variable_name'] = 'base_deficit'
variables.loc[variables['variable_name'] == 'ARTERIAL POC PH', 'variable_name'] = 'ph'
variables.loc[variables['variable_name'] == 'CAPILLARY POC PH', 'variable_name'] = 'ph'
variables.loc[variables['variable_name'] == 'VENOUS POC PH', 'variable_name'] = 'ph'
variables.loc[variables['variable_name'] == 'ALT (SGPT)', 'variable_name'] = 'alt'
variables.loc[variables['variable_name'] == 'AST (SGOT)', 'variable_name'] = 'ast'
variables.loc[variables['variable_name'] == 'INT NORM RATIO', 'variable_name'] = 'inr'
variables.loc[variables['variable_name'] == 'PROTIME', 'variable_name'] = 'pt'
variables.drop(['variable_id'], axis=1, inplace=True)

Changing variables names...


In [5]:
# Gather first 24 hours of data
variables['int'] = np.ceil((variables['recorded_time'] - variables['hosp_adm']) / pd.Timedelta('1 hour'))
variables = variables[(variables['int'] > 0) & (variables['int'] <= 24)]
variables.drop('int', axis=1, inplace=True)
variables.reset_index(drop=True, inplace=True)

In [6]:
# Fix pupillary reaction
print('Fixing pupillary reaction...')
variables.loc[(variables['variable_name'].isin(['pupil_left_reaction', 'pupil_right_reaction'])) & (variables['value'].isin(['Brisk', 'Sluggish', 'Hippus'])), 'value'] = 'Reactive'
variables.loc[(variables['variable_name'].isin(['pupil_left_reaction', 'pupil_right_reaction'])) & (variables['value'].isin(['Non-reactive'])), 'value'] = 'Non-reactive'
variables.loc[(variables['variable_name'].isin(['pupil_left_reaction', 'pupil_right_reaction'])) & (variables['value'].isin(['Unable to assess', 'Pinpoint', 'No eye', 'Pharmacologically dilated', 'Keyhole', 'Ovoid', 'Ovid'])), 'value'] = 'Unable to Assess'

# Fix pupil size
variables.loc[variables['variable_name'].isin(['pupil_left_size', 'pupil_right_size']), 'value'] = variables.loc[variables['variable_name'].isin(['pupil_left_size', 'pupil_right_size']), 'value'].apply(lambda x: x[:-2])

# Numerical variables
print('Dropping invalid observations...')
num_vars = ['weight', 'pulse', 'map', 'bp_sys', 'bp_dias', 'resp', 'spo2', 'temp', 'fio2', 'pao2_fio2', 'pupil_left_size', 
            'pupil_right_size', 'coma_scale_total', 'o2_flow', 'ph', 'art_po2', 'art_pco2', 'potassium',
            'sodium', 'chloride', 'glucose', 'bun', 'creatinine', 'calcium', 'calcium_ionized', 'co2', 'hemoglobin',
            'bilirubin_total', 'albumin', 'wbc', 'platelets', 'ptt', 'art_base_excess', 'bicarbonate', 'lactic_acid',
            'vol_infused', 'urine']
num_vars_extra = num_vars.copy()
num_vars_extra.extend(['band_neutrophils', 'base_deficit', 'alt', 'ast', 'pt', 'inr', 'epinephrine', 'norepinephrine', 'dopamine'])

# Categorical variables
cat_vars = ['pupil_left_reaction', 'pupil_right_reaction']

# Check that all values are numbers for numerical variables
variables = variables[(variables['value'].apply(lambda x: str(x).replace(".", "", 1).isdigit())) | (variables['variable_name'].isin(cat_vars))]
variables.loc[variables['variable_name'].isin(num_vars_extra), 'value'] = variables.loc[variables['variable_name'].isin(num_vars_extra), 'value'].astype(float)
variables.dropna(subset=['value'], inplace=True)
variables.reset_index(drop=True, inplace=True)

Fixing pupillary reaction...
Dropping invalid observations...


In [8]:
# Data wrangling and imputation
print('Data wrangling and generation of flags...')

# Pivot data
variables = pd.pivot_table(variables, values='value', index=['patid', 'csn', 'dob', 'hosp_adm', 'department', 'recorded_time'], columns=['variable_name'], aggfunc=(lambda x: x.iloc[0]), fill_value=np.nan)
variables.reset_index(inplace=True)
variables[['dob', 'hosp_adm', 'recorded_time']] = variables[['dob', 'hosp_adm', 'recorded_time']].apply(pd.to_datetime)

Data wrangling and generation of flags...


In [9]:
# Add age
variables['age_days'] = round((variables['hosp_adm'] - variables['dob']) / pd.Timedelta('1 day'), 0)
variables['age_months'] = round(variables['age_days'] / 31, 2)
variables['age_years'] = round(variables['age_days'] / 365.25, 2)

In [10]:
# Calculate pSOFA components

# Calculate spo2_fio2
variables['spo2_imputed'] = variables.groupby(['csn'])['spo2'].ffill(24)
variables.loc[variables['spo2_imputed'] <= 97, 'spo2_fio2'] = variables['spo2_imputed'] / (variables['fio2'] / 100)

# Calculate respiratory component
variables['resp_psofa'] = 0
variables['resp_psofa'] = np.where((variables['pao2_fio2'] >= 400) | (variables['spo2_fio2'] >= 292), 0, 
                        np.where((variables['pao2_fio2'] >= 300) | (variables['spo2_fio2'] >= 264), 1, 
                        np.where((variables['pao2_fio2'] >= 200) | (variables['spo2_fio2'] >= 221), 2, 
                        np.where((variables['pao2_fio2'] >= 100) | (variables['spo2_fio2'] >= 148), 3, 
                        np.where((variables['pao2_fio2'] < 100) | (variables['spo2_fio2'] < 148), 4, 0)))))
variables.drop(['spo2_imputed', 'spo2_fio2'], axis=1, inplace=True)

# Calculate coagulation component
variables['coag_psofa'] = 0
variables['coag_psofa'] = np.where(variables['platelets'] >= 150, 0, 
                        np.where(variables['platelets'] >= 100, 1, 
                        np.where(variables['platelets'] >= 50, 2, 
                        np.where(variables['platelets'] >= 20, 3, 
                        np.where(variables['platelets'] < 20, 4, 0)))))

# Calculate hepatic component
variables['hep_psofa'] = 0
variables['hep_psofa'] = np.where(variables['bilirubin_total'] < 1.2, 0, 
                        np.where(variables['bilirubin_total'] < 2, 1, 
                        np.where(variables['bilirubin_total'] < 6, 2, 
                        np.where(variables['bilirubin_total'] < 12, 3, 
                        np.where(variables['bilirubin_total'] >= 12, 4, 0)))))

# Calculate cardiovascular component (No dobutamine)
variables['card_psofa'] = 0
variables['card_psofa'] = np.where(((variables['age_months'] < 1) & (variables['map'] >= 46)) | 
                                ((variables['age_months'] < 12) & (variables['map'] >= 55)) |
                                ((variables['age_months'] < 24) & (variables['map'] >= 60)) |
                                ((variables['age_months'] < 60) & (variables['map'] >= 62)) |
                                ((variables['age_months'] < 144) & (variables['map'] >= 65)) |
                                ((variables['age_months'] <= 216) & (variables['map'] >= 67)) |
                                ((variables['age_months'] > 216) & (variables['map'] >= 70)), 0, 
                        np.where(((variables['age_months'] < 1) & (variables['map'] < 46)) | 
                                ((variables['age_months'] < 12) & (variables['map'] < 55)) |
                                ((variables['age_months'] < 24) & (variables['map'] < 60)) |
                                ((variables['age_months'] < 60) & (variables['map'] < 62)) |
                                ((variables['age_months'] < 144) & (variables['map'] < 65)) |
                                ((variables['age_months'] <= 216) & (variables['map'] < 67)) |
                                ((variables['age_months'] > 216) & (variables['map'] < 70)), 1, 
                        np.where(variables['dopamine'] <= 5, 2, 
                        np.where((variables['dopamine'] > 5) | (variables['epinephrine'] <= 0.1) | (variables['norepinephrine'] <= 0.1), 3, 
                        np.where((variables['dopamine'] > 15) | (variables['epinephrine'] > 0.1) | (variables['norepinephrine'] > 0.1), 4, 0)))))

# Calculate neurologic component 
variables['neuro_psofa'] = 0
variables['neuro_psofa'] = np.where(variables['coma_scale_total'] >= 15, 0, 
                        np.where(variables['coma_scale_total'] >= 13, 1, 
                        np.where(variables['coma_scale_total'] >= 10, 2, 
                        np.where(variables['coma_scale_total'] >= 6, 3, 
                        np.where(variables['coma_scale_total'] < 6, 4, 0)))))

# Calculate renal component 
variables['renal_psofa'] = 0
variables['renal_psofa'] = np.where(((variables['age_months'] < 1) & (variables['creatinine'] < 0.8)) |
                                ((variables['age_months'] < 12) & (variables['creatinine'] < 0.3)) |
                                ((variables['age_months'] < 24) & (variables['creatinine'] < 0.4)) |
                                ((variables['age_months'] < 60) & (variables['creatinine'] < 0.6)) |
                                ((variables['age_months'] < 144) & (variables['creatinine'] < 0.7)) |
                                ((variables['age_months'] <= 216) & (variables['creatinine'] < 1.0)) |
                                ((variables['age_months'] > 216) & (variables['creatinine'] < 1.2)), 0, 
                        np.where(((variables['age_months'] < 1) & (variables['creatinine'] < 1.0)) |
                                ((variables['age_months'] < 12) & (variables['creatinine'] < 0.5)) |
                                ((variables['age_months'] < 24) & (variables['creatinine'] < 0.6)) |
                                ((variables['age_months'] < 60) & (variables['creatinine'] < 0.9)) |
                                ((variables['age_months'] < 144) & (variables['creatinine'] < 1.1)) |
                                ((variables['age_months'] <= 216) & (variables['creatinine'] < 1.7)) |
                                ((variables['age_months'] > 216) & (variables['creatinine'] < 2.0)), 1, 
                        np.where(((variables['age_months'] < 1) & (variables['creatinine'] < 1.2)) |
                                ((variables['age_months'] < 12) & (variables['creatinine'] < 0.8)) |
                                ((variables['age_months'] < 24) & (variables['creatinine'] < 1.1)) |
                                ((variables['age_months'] < 60) & (variables['creatinine'] < 1.6)) |
                                ((variables['age_months'] < 144) & (variables['creatinine'] < 1.8)) |
                                ((variables['age_months'] <= 216) & (variables['creatinine'] < 2.9)) |
                                ((variables['age_months'] > 216) & (variables['creatinine'] < 3.5)), 2, 
                        np.where(((variables['age_months'] < 1) & (variables['creatinine'] < 1.6)) |
                                ((variables['age_months'] < 12) & (variables['creatinine'] < 1.2)) |
                                ((variables['age_months'] < 24) & (variables['creatinine'] < 1.5)) |
                                ((variables['age_months'] < 60) & (variables['creatinine'] < 2.3)) |
                                ((variables['age_months'] < 144) & (variables['creatinine'] < 2.6)) |
                                ((variables['age_months'] <= 216) & (variables['creatinine'] < 4.2)) |
                                ((variables['age_months'] > 216) & (variables['creatinine'] < 5)), 3, 
                        np.where(((variables['age_months'] < 1) & (variables['creatinine'] >= 1.6)) |
                                ((variables['age_months'] < 12) & (variables['creatinine'] >= 1.2)) |
                                ((variables['age_months'] < 24) & (variables['creatinine'] >= 1.5)) |
                                ((variables['age_months'] < 60) & (variables['creatinine'] >= 2.3)) |
                                ((variables['age_months'] < 144) & (variables['creatinine'] >= 2.6)) |
                                ((variables['age_months'] <= 216) & (variables['creatinine'] >= 4.2)) |
                                ((variables['age_months'] > 216) & (variables['creatinine'] >= 5)), 4, 0)))))

# Create psofa components list
psofa_comps = ['resp_psofa', 'coag_psofa', 'hep_psofa', 'card_psofa', 'neuro_psofa', 'renal_psofa']

# Drop unnecessary columns
variables.drop(['epinephrine', 'norepinephrine', 'dopamine'], axis=1, inplace=True)

In [11]:
# Calculate pelod-II components

# Add creatinine
variables['creatinine_micro'] = variables['creatinine'].apply(lambda x: x * 88.42)
variables = variables.reset_index(drop=True)

# Add auxiliary columns

variables['coma_pelod'] = 0
variables['coma_pelod'] = np.where(variables['coma_scale_total'] >= 11, 0, 
                                    np.where(variables['coma_scale_total'] >= 5, 1, 
                                    np.where(variables['coma_scale_total'] >= 3, 4, 0)))

variables['pupil_pelod'] = 0
variables['pupil_pelod'] = np.where((variables['pupil_right_reaction'] == 'Reactive') & (variables['pupil_left_reaction'] == 'Reactive'), 0, 
                                    np.where((variables['pupil_right_reaction'] == 'Non-reactive') & (variables['pupil_left_reaction'] == 'Non-reactive'), 5, 0))

variables['lact_pelod'] = 0
variables['lact_pelod'] = np.where(variables['lactic_acid'] < 5, 0, 
                                    np.where(variables['lactic_acid'] < 11, 1, 
                                    np.where(variables['lactic_acid'] >= 11, 4, 0)))

variables['map_pelod'] = 0
variables['map_pelod'] = np.where(((variables['age_months'] < 1) & (variables['map'] >= 46)) |
                                    ((variables['age_months'] < 12) & (variables['map'] >= 55)) |
                                    ((variables['age_months'] < 24) & (variables['map'] >= 60)) |
                                    ((variables['age_months'] < 60) & (variables['map'] >= 62)) |
                                    ((variables['age_months'] < 144) & (variables['map'] >= 65)) |
                                    ((variables['age_months'] >= 144) & (variables['map'] >= 67)), 0, 
                                    np.where(((variables['age_months'] < 1) & (variables['map'] > 30)) |
                                    ((variables['age_months'] < 12) & (variables['map'] > 38)) |
                                    ((variables['age_months'] < 24) & (variables['map'] > 43)) |
                                    ((variables['age_months'] < 60) & (variables['map'] > 45)) |
                                    ((variables['age_months'] < 144) & (variables['map'] > 48)) |
                                    ((variables['age_months'] >= 144) & (variables['map'] > 51)), 2, 
                                    np.where(((variables['age_months'] < 1) & (variables['map'] > 16)) |
                                    ((variables['age_months'] < 12) & (variables['map'] > 24)) |
                                    ((variables['age_months'] < 24) & (variables['map'] > 30)) |
                                    ((variables['age_months'] < 60) & (variables['map'] > 31)) |
                                    ((variables['age_months'] < 144) & (variables['map'] > 35)) |
                                    ((variables['age_months'] >= 144) & (variables['map'] > 37)), 3, 
                                    np.where(((variables['age_months'] < 1) & (variables['map'] <= 16)) |
                                    ((variables['age_months'] < 12) & (variables['map'] <= 24)) |
                                    ((variables['age_months'] < 24) & (variables['map'] <= 30)) |
                                    ((variables['age_months'] < 60) & (variables['map'] <= 31)) |
                                    ((variables['age_months'] < 144) & (variables['map'] <= 35)) |
                                    ((variables['age_months'] >= 144) & (variables['map'] <= 37)), 6, 0))))

variables['creat_pelod'] = 0
variables['creat_pelod'] = np.where(((variables['age_months'] < 1) & (variables['creatinine_micro'] <= 69)) |
                                    ((variables['age_months'] < 12) & (variables['creatinine_micro'] <= 22)) |
                                    ((variables['age_months'] < 24) & (variables['creatinine_micro'] <= 34)) |
                                    ((variables['age_months'] < 60) & (variables['creatinine_micro'] <= 50)) |
                                    ((variables['age_months'] < 144) & (variables['creatinine_micro'] <= 58)) |
                                    ((variables['age_months'] >= 144) & (variables['creatinine_micro'] <= 92)), 0, 
                                    np.where(((variables['age_months'] < 1) & (variables['creatinine_micro'] >= 70)) |
                                    ((variables['age_months'] < 12) & (variables['creatinine_micro'] >= 23)) |
                                    ((variables['age_months'] < 24) & (variables['creatinine_micro'] >= 35)) |
                                    ((variables['age_months'] < 60) & (variables['creatinine_micro'] >= 51)) |
                                    ((variables['age_months'] < 144) & (variables['creatinine_micro'] >= 59)) |
                                    ((variables['age_months'] >= 144) & (variables['creatinine_micro'] >= 93)), 2, 0))

variables['pao2_pelod'] = 0
variables['pao2_pelod'] = np.where(variables['art_po2'] >= 61, 0, 
                                   np.where(variables['art_po2'] <= 60, 2, 0))

variables['paco2_pelod'] = 0
variables['paco2_pelod'] = np.where(variables['art_pco2'] < 59, 0, 
                                    np.where(variables['art_pco2'] < 95, 1, 
                                    np.where(variables['art_pco2'] >= 95, 3, 0)))

variables['mv_pelod'] = 0
variables['mv_pelod'] = np.where(variables['mv_indicator'] == 0, 0, 
                                 np.where(variables['mv_indicator'] > 0, 3, 0))

variables['wbc_pelod'] = 0
variables['wbc_pelod'] = np.where(variables['wbc'] > 2, 0, 
                                  np.where(variables['wbc'] <= 2, 2, 0))

variables['plat_pelod'] = 0
variables['plat_pelod'] = np.where(variables['platelets'] > 141, 0, 
                                   np.where(variables['platelets'] > 76, 1, 
                                    np.where(variables['platelets'] <= 76, 2, 0)))


pelod_comps = ['coma_pelod', 'pupil_pelod', 'lact_pelod', 'map_pelod', 'creat_pelod', 'pao2_pelod', 'paco2_pelod', 'mv_pelod', 'wbc_pelod', 'plat_pelod']

# Drop unnecessary columns
variables.drop(['creatinine_micro'], axis=1, inplace=True)

In [12]:
# Calculate prism-III components

# (infant: 31 days - 2 years, child: 2 years - 12years)

variables['sbp_prism'] = 0
variables['sbp_prism'] = np.where(((variables['age_months'] > 1) & (variables['age_years'] < 2) & 
                                (((variables['bp_sys'] >= 130) & (variables['bp_sys'] <= 160)) | 
                                ((variables['bp_sys'] >= 55) & (variables['bp_sys'] <= 65)))) | 
                                ((variables['age_years'] >= 2) & (variables['age_years'] < 12) & 
                                (((variables['bp_sys'] >= 50) & (variables['bp_sys'] <= 200)) | 
                                ((variables['bp_sys'] >= 65) & (variables['bp_sys'] <= 75)))), 2, 
                                np.where((((variables['age_months'] > 1) & (variables['age_years'] < 2) & 
                                (((variables['bp_sys'] >= 40) & (variables['bp_sys'] <= 54)) | 
                                (variables['bp_sys'] >= 160)))) | 
                                (((variables['age_years'] >= 2) & (variables['age_years'] < 12) & 
                                (((variables['bp_sys'] >= 50) & (variables['bp_sys'] <= 64)) | 
                                (variables['bp_sys'] >= 200)))), 6, 
                                np.where(((variables['age_months'] > 1) & (variables['age_years'] < 2) & (variables['bp_sys'] < 40)) | 
                                ((variables['age_years'] >= 2) & (variables['age_years'] < 12) & (variables['bp_sys'] < 50)), 7, 0)))

variables['dbp_prism'] = 0
variables['dbp_prism'] = np.where(variables['bp_dias'] > 110, 6, 0)

variables['hr_prism'] = 0
variables['hr_prism'] = np.where(((variables['age_months'] > 1) & (variables['age_years'] < 2) & 
                                  ((variables['pulse'] > 160) | (variables['pulse'] < 90))) | 
                                  ((variables['age_years'] >= 2) & (variables['age_years'] < 12) & 
                                   (variables['pulse'] > 150) | (variables['pulse'] < 80)), 4, 0)

variables['resp_prism'] = 0
variables['resp_prism'] = np.where(((variables['age_months'] > 1) & (variables['age_years'] < 2) & 
                                    (variables['resp'] >= 61) & (variables['resp'] <= 90)) | 
                                    ((variables['age_years'] >= 2) & (variables['age_years'] < 12) &
                                    (variables['resp'] >= 51) & (variables['resp'] <= 70)), 1, 
                                    np.where(((variables['age_months'] > 1) & (variables['age_years'] < 2) &
                                    ((variables['resp'] > 90) | (variables['resp'] < 1))) | 
                                    ((variables['age_years'] >= 2) & (variables['age_years'] < 12) &
                                    ((variables['resp'] > 70) | (variables['resp'] < 1))), 5, 0))

variables['pao2_prism'] = 0
variables['pao2_prism'] = np.where((variables['pao2_fio2'] >= 200) & (variables['pao2_fio2'] <= 300), 2, 
                                   np.where(variables['pao2_fio2'] < 200, 3, 0))

variables['paco2_prism'] = 0
variables['paco2_prism'] = np.where((variables['art_pco2'] >= 51) & (variables['art_pco2'] <= 65), 1, 
                                   np.where(variables['art_pco2'] > 65, 5, 0))

variables['coma_prism'] = 0
variables['coma_prism'] = np.where(variables['coma_scale_total'] < 8, 6, 0)

variables['pupil_prism'] = 0
variables['pupil_prism'] = np.where((variables['pupil_right_reaction'] == 'Non-reactive') | (variables['pupil_left_reaction'] == 'Non-reactive'), 10, 0)

variables['bilir_prism'] = 0
variables['bilir_prism'] = np.where((variables['bilirubin_total'] > 3.5) & (variables['age_months'] > 1), 6, 0)

variables['pot_prism'] = 0
variables['pot_prism'] = np.where(((variables['potassium'] >= 3.0) & (variables['potassium'] <= 3.5)) | 
                                  ((variables['potassium'] >= 6.5) & (variables['potassium'] <= 7.5)), 1, 
                                  np.where((variables['potassium'] < 3.0) | (variables['potassium'] > 7.5), 5, 0))

variables['calcium_prism'] = 0
variables['calcium_prism'] = np.where(((variables['calcium'] >= 7.0) & (variables['calcium'] <= 8.0)) | 
                                  ((variables['calcium'] >= 12.0) & (variables['calcium'] <= 15.0)), 2, 
                                  np.where((variables['calcium'] < 7.0) | (variables['calcium'] > 15.0), 6, 0))

variables['gluc_prism'] = 0
variables['gluc_prism'] = np.where(((variables['glucose'] >= 40) & (variables['glucose'] <= 60)) | 
                                  ((variables['glucose'] >= 250) & (variables['glucose'] <= 400)), 4, 
                                  np.where((variables['glucose'] < 40) | (variables['glucose'] > 400), 8, 0))

variables['bicarb_prism'] = 0
variables['bicarb_prism'] = np.where((variables['bicarbonate'] < 16) | (variables['bicarbonate'] > 32), 3, 0)

prism_comps = ['sbp_prism', 'dbp_prism', 'hr_prism', 'resp_prism', 'pao2_prism', 'paco2_prism', 'coma_prism', 'pupil_prism', 'bilir_prism', 'pot_prism', 'calcium_prism', 'gluc_prism', 'bicarb_prism']

In [13]:
# Compute scores

comps = ['csn']
comps.extend(psofa_comps)
comps.extend(pelod_comps)
comps.extend(prism_comps)

variables = variables[comps]
variables = variables.groupby('csn', as_index=False).max()

variables['psofa'] = variables['resp_psofa'] + variables['coag_psofa'] + variables['hep_psofa'] + variables['card_psofa'] + variables['neuro_psofa'] + variables['renal_psofa']

variables['pelod'] = variables['coma_pelod'] + variables['pupil_pelod'] + variables['lact_pelod'] + variables['map_pelod'] + variables['creat_pelod'] + variables['pao2_pelod'] + \
                        variables['paco2_pelod'] + variables['mv_pelod'] + variables['wbc_pelod'] + variables['plat_pelod']

variables['prism'] = variables['sbp_prism'] + variables['dbp_prism'] + variables['hr_prism'] + variables['resp_prism'] + variables['pao2_prism'] + variables['paco2_prism'] + \
                        variables['coma_prism'] + variables['pupil_prism'] + variables['bilir_prism'] + variables['pot_prism'] + variables['calcium_prism'] + variables['gluc_prism'] + \
                        variables['bicarb_prism']

variables = variables[['csn', 'psofa', 'pelod', 'prism']]

# Save file
variables.to_csv('/opt/moredata/dchanci/pediatric_sepsis/data_analysis/mortality_scores.csv', index=False)