In [2]:
import pandas as pd

# Read ICD PROC FILES

# Edit the file paths to match the location of the icd procedure files
# List of file paths of icd procedure files
file_paths = [
    "W:/Premier/extracted_nyu_2021/nyu_20211_paticd_proc.txt",
    "W:/Premier/extracted_nyu_2021/nyu_20212_paticd_proc.txt",
    "W:/Premier/extracted_nyu_2021/nyu_20213_paticd_proc.txt"
]

# Initialize an empty list to hold dataframes
dfs = []

# Loop through the file paths and read each file separately
for file_path in file_paths:
    df = pd.read_csv(file_path, sep="|")
    dfs.append(df)

# Concatenate the dataframes into one
proc = pd.concat(dfs, ignore_index=True)

# Free up memory
del dfs

In [4]:
# mechanical ventilation ICD codes
mechanical_ventilation_icd_codes = ['5A1935Z', '5A1945Z', '5A1955Z']

# Filter primary and secondary procedures DataFrame for mechanical ventilation procedure codes
mechanical_ventilation_primary = proc[(proc['ICD_PRI_SEC'] == 'P') & proc['ICD_CODE'].isin(mechanical_ventilation_icd_codes)]
mechanical_ventilation_secondary = proc[(proc['ICD_PRI_SEC'] == 'S') & proc['ICD_CODE'].isin(mechanical_ventilation_icd_codes)]

# Concatenate filtered primary and secondary procedures dfs
mechanical_ventilation_data = pd.concat([mechanical_ventilation_primary, mechanical_ventilation_secondary])

In [6]:
# Select unique patient keys
mechanical_ventilation_patients = mechanical_ventilation_data['PAT_KEY'].unique()

In [7]:
# Edit the file path to match the location of the full Premier data

# Load full Premier {2021 or any years} data
full_premier = pd.read_csv("nyu_2021_cohort_full_v2.csv")

In [8]:
# Create mechanical ventilation variable. 1 if patient had mechanical ventilation, 0 otherwise
full_premier['MECH_VENT'] = full_premier['PAT_KEY'].isin(mechanical_ventilation_patients).astype(int)

In [9]:
# export the full premier data with mechanical ventilation variable
full_premier.to_csv("full_premier_mv_var.csv", index=False)

In [11]:
# filtering data where mechanical_ventilation is 1 to build mechanical ventilation only cohort
mechanical_ventilation_cohort = full_premier[full_premier['MECH_VENT'] == 1]

In [12]:
# Save the MV dataset to a CSV file
mechanical_ventilation_cohort.to_csv('premier_mv_cohort.csv', index=False)