In [1]:
# Import libraries and packages
import pandas as pd
import numpy as np
import os
import math
from datetime import datetime, timedelta
from scipy import stats
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

### 1. Extract MV Events from ETT Files

1. Identify hospital visits.
2. Assign corresponding CSN to ETT file entries.
3. Select patients with endotracheal tube.

In [2]:
# Load encounters file
dept_path = '/labs/kamaleswaranlab/ECMO/new_data/TAB2_Encounter_Departments.parquet.gzip'
dept = pd.read_parquet(dept_path)
dept[['BIRTH_DATE', 'Hosp_Admission', 'Hosp_Discharge', 'Entered_Dept', 'Exited_Dept']] = dept[['BIRTH_DATE', 'Hosp_Admission', 'Hosp_Discharge', 'Entered_Dept', 'Exited_Dept']].apply(pd.to_datetime)
dept = dept[['Pat ID', 'Encounter CSN', 'Name', 'BIRTH_DATE', 'Department', 'Entered_Dept', 'Exited_Dept', 'Hosp_Admission', 'Hosp_Discharge']]
dept.columns = ['patid', 'csn', 'name', 'dob', 'department', 'entered_dept', 'exited_dept', 'hosp_adm', 'hosp_disch']
dept = dept[(dept['department'].str.contains('PEDIATRIC ICU')) & (dept['hosp_adm'] >= '2010-01-01')]
dept['csn'] = dept['csn'].astype(int)
dept.dropna(inplace=True)
dept.drop_duplicates(inplace=True)

# Load ETT file
ett_path = '/labs/kamaleswaranlab/ECMO/new_data/TAB6_ETTs_and_Trachs.parquet.gzip'
mv_ett = pd.read_parquet(ett_path)
mv_ett.columns = ['patid', 'name', 'dob', 'mrn', 'lda_type', 'placement_date', 'removal_date', 'lda_in_place']
mv_ett[['dob', 'placement_date', 'removal_date']] = mv_ett[['dob', 'placement_date', 'removal_date']].apply(pd.to_datetime)
mv_ett = mv_ett.merge(dept[['patid', 'csn', 'hosp_adm', 'hosp_disch']], how='inner', on='patid')
mv_ett = mv_ett[(mv_ett['placement_date'] >= mv_ett['hosp_adm']) & (mv_ett['placement_date'] <= mv_ett['hosp_disch']) & (mv_ett['placement_date'] >= '2010-01-01')]

# Create list of CSNs with tracheostomy tube
trach_list = mv_ett.loc[mv_ett['lda_type'] == 'Tracheostomy Tube', 'csn'].unique().tolist()

# Filter patients with endotracheal tube
mv_ett = mv_ett[mv_ett['lda_type'] == 'Endotracheal Tube']
mv_ett.dropna(inplace=True)
mv_ett.reset_index(drop=True, inplace=True)

# Organize data
mv_ett = mv_ett[['patid', 'csn', 'dob', 'placement_date']]
mv_ett['variable_id'] = 1
mv_ett['variable_name'] = 'mv_indicator'
mv_ett['value'] = 1
mv_ett = mv_ett[['patid', 'csn', 'dob', 'variable_id', 'variable_name', 'placement_date', 'value']]
mv_ett.columns = ['patid', 'csn', 'dob', 'variable_id', 'variable_name', 'recorded_time', 'value']

### 2. Extract MV Events from Flowsheets

1. Extract mechanical ventilation indicators from flowsheets

In [3]:
# Create Dictionary with mechanical ventilation variable names 
mv_vars = ['Apparatus Type', #2060226
           'Type of Mechanical Ventilation', #2060652
           'Ventilator Type', #21000019
           'Vent Mode', #21000002
           'Mechanical Vent Initial', #2060659
           'Vent Subsequent Day Charge', #2060615
           'Airway Type', #2060451
           'ETT Placement Confirmation', #2060455
           'ETT Retape Date', #2060466
           'Is the Patient Intubated?', #2058125
           'Respiratory MAP Source', #2060635
           'Ventilator Disconnect Reason', #2060639
           'Ventilator Mode', #2060202
           'Monitored CO2 (ETCO2)' #2060501
           ]

# List of flowsheets
fs_list = list(filter(lambda x: '.parquet' in x, os.listdir('/labs/kamaleswaranlab/ECMO/new_data/new_flowsheets_feb23')))

# Loop through flowsheets to extract mechanical ventilator indicators
vars_data = []
for filename in fs_list:
    print('Extracting data from {} file...'.format(filename))
    df = pd.read_parquet(os.path.join('/labs/kamaleswaranlab/ECMO/new_data/new_flowsheets_feb23', filename))
    df.columns = ['patid',	'csn', 'dob', 'template_id', 'template', 'variable_id',	'variable_name', 'recorded_time', 'value', 'comment']
    df = df[['patid', 'csn', 'dob', 'template', 'variable_name', 'recorded_time', 'value']]
    df['csn'] = df['csn'].astype(int)
    vars_data.extend(df[df['variable_name'].isin(mv_vars)].values)

# Create dataframe
print('Creating dataframe...')
mv_fs = pd.DataFrame(vars_data, columns=['patid', 'csn', 'dob', 'template', 'variable_name', 'recorded_time', 'value'])
mv_fs[['dob', 'recorded_time']] = mv_fs[['dob', 'recorded_time']].apply(pd.to_datetime)
mv_fs.reset_index(drop=True, inplace=True)

# Save dataset file
mv_fs.to_parquet('/labs/kamaleswaranlab/dchanci/data/pediatric_sepsis/prediction_ml/updated_data/data_screening/mv_indicators_raw.parquet.gzip', compression='gzip')

Extracting data from Flowsheet_Rows_2010.parquet.gzip file...
Extracting data from Flowsheet_Rows_2011.parquet.gzip file...
Extracting data from Flowsheet_Rows_2012.parquet.gzip file...
Extracting data from Flowsheet_Rows_2013.parquet.gzip file...
Extracting data from Flowsheet_Rows_2014.parquet.gzip file...
Extracting data from Flowsheet_Rows_2015.parquet.gzip file...
Extracting data from Flowsheet_Rows_2016.parquet.gzip file...
Extracting data from Flowsheet_Rows_2017.parquet.gzip file...
Extracting data from Flowsheet_Rows_2018.parquet.gzip file...
Extracting data from Flowsheet_Rows_2019.parquet.gzip file...
Extracting data from Flowsheet_Rows_2020.parquet.gzip file...
Extracting data from Flowsheet_Rows_2021.parquet.gzip file...
Extracting data from Flowsheet_Rows_2022.parquet.gzip file...
Creating dataframe...


2. Filter mechanical ventilation indicators from flowhsheets.

In [5]:
# Load mechanical ventilation indicators
mv_fs = pd.read_parquet('/labs/kamaleswaranlab/dchanci/data/pediatric_sepsis/prediction_ml/updated_data/data_screening/mv_indicators_raw.parquet.gzip')
mv_fs[['dob', 'recorded_time']] = mv_fs[['dob', 'recorded_time']].apply(pd.to_datetime)
mv_fs['csn'] = mv_fs['csn'].astype(int)
mv_fs = mv_fs[mv_fs['csn'].isin(dept['csn'].unique().tolist())]

# Create list of CSNs with tracheostomy tube
trach_list.extend(mv_fs.loc[((mv_fs['variable_name'] == 'Apparatus Type') | (mv_fs['variable_name'] == 'Mechanical Vent Initial')) & (mv_fs['value'].str.contains('Trach')), 'csn'].unique().tolist()) 
mv_fs.dropna(subset='value', inplace=True)

# Filter apparatus type
keep = ['OETT', 'ETT']
keep = list(np.hstack([list(filter(lambda x: val in x, mv_fs.loc[mv_fs['variable_name'] == 'Apparatus Type', 'value'].unique().tolist())) for val in keep]))
discard = [x for x in mv_fs.loc[mv_fs['variable_name'] == 'Apparatus Type', 'value'].unique().tolist() if x not in keep]
mv_fs = mv_fs[~((mv_fs['variable_name'] == 'Apparatus Type') & (mv_fs['value'].isin(discard)))] 

# Filter Type of Mechanical Ventilation
discard = ['Non-Invasive Ventilator', 'AVAPS', 'NAVA']
mv_fs = mv_fs[~((mv_fs['variable_name'] == 'Type of Mechanical Ventilation') & (mv_fs['value'].isin(discard)))] 

# Filter Ventilator Type
keep = ['Servo i', 'Drager Apollo', 'Drager Infinity V500', 'Drager Baby Log', 'Drager Evita', 'Servo U']
discard = [x for x in mv_fs.loc[mv_fs['variable_name'] == 'Ventilator Type', 'value'].unique().tolist() if x not in keep]
mv_fs = mv_fs[~((mv_fs['variable_name'] == 'Ventilator Type') & (mv_fs['value'].isin(discard)))] 

# Filter Vent Mode
mv_fs = mv_fs[~((mv_fs['variable_name'] == 'Vent Mode') & (mv_fs['value'] == 'Non-Invasive'))] 

# Filter Mechanical Vent Initial
discard = ['Non-invasive Ventilator Initial Day', 'Off-Discontinued Vent', 'V/Trach', 'NV']
mv_fs = mv_fs[~((mv_fs['variable_name'] == 'Mechanical Vent Initial') & (mv_fs['value'].isin(discard)))] 

# Filter Vent Subsequent Day Charge
keep = ['Servo i', 'Drager Evita', 'SensorMedics (3100) B', 'Drager Baby Log', 'SensorMedics (3100) A', 'Drager Apollo', 'Drager Evita V 500', 'Drager Infinity C 500', 'Drager Infinity V500', 'Yes']
discard = [x for x in mv_fs.loc[mv_fs['variable_name'] == 'Vent Subsequent Day Charge', 'value'].unique().tolist() if x not in keep]
mv_fs = mv_fs[~((mv_fs['variable_name'] == 'Vent Subsequent Day Charge') & (mv_fs['value'].isin(discard)))] 

# Filter Airway Type
keep = ['OETT', 'ETT', 'NETT', 'RAE']
keep = list(np.hstack([list(filter(lambda x: val in x, mv_fs.loc[mv_fs['variable_name'] == 'Airway Type', 'value'].unique().tolist())) for val in keep]))
discard = [x for x in mv_fs.loc[mv_fs['variable_name'] == 'Airway Type', 'value'].unique().tolist() if x not in keep]
mv_fs = mv_fs[~((mv_fs['variable_name'] == 'Airway Type') & (mv_fs['value'].isin(discard)))] 

# Filter Is the Patient Intubated?
discard = ['No Change', 'No']
mv_fs = mv_fs[~((mv_fs['variable_name'] == 'Is the Patient Intubated?') & (mv_fs['value'].isin(discard)))] 

# Filter Ventilator Mode
keep = ['SIMV', 'HFOV', 'PRVC', 'PS', 'VS', 'BiVent/APRV', 'VCV', 'IMV', 'Assist', 'Volume Guarantee', 'CMV', 'MMV', 'VG']
keep = list(np.hstack([list(filter(lambda x: val in x, mv_fs.loc[mv_fs['variable_name'] == 'Ventilator Mode', 'value'].unique().tolist())) for val in keep]))
discard = [x for x in mv_fs.loc[mv_fs['variable_name'] == 'Ventilator Mode', 'value'].unique().tolist() if x not in keep]
mv_fs = mv_fs[~((mv_fs['variable_name'] == 'Ventilator Mode') & (mv_fs['value'].isin(discard)))] 
mv_fs.reset_index(drop=True, inplace=True)

mv_fs = mv_fs[['patid', 'csn', 'dob', 'recorded_time']]
mv_fs['variable_id'] = 1
mv_fs['variable_name'] = 'mv_indicator'
mv_fs['value'] = 1
mv_fs = mv_fs[['patid', 'csn', 'dob', 'variable_id', 'variable_name', 'recorded_time', 'value']]

### 3. Combine MV Events

In [6]:
# Concat dataframes
mv = pd.concat([mv_ett, mv_fs])

# Discard CSNs with trach
mv = mv[~(mv['csn'].isin(trach_list))]

# Save file
mv.to_parquet('/labs/kamaleswaranlab/dchanci/data/pediatric_sepsis/prediction_ml/updated_data/data_screening/mv_data.parquet.gzip', compression='gzip')