### Imports

In [1]:
import os
import numpy as np
import pandas as pd

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

### Read in Data

In [2]:
# Change working directory
wd = "C://Users//alexm//OneDrive//Desktop//Northwestern//Winter 2025//MSDS 498//Capstone Project//PE_Predictions//Data//Preprocessed_Data"
os.chdir(wd)

In [3]:
# Read in expired data
expired = pd.read_csv('expired.csv')

expired.shape

(313, 61)

In [4]:
# Mortality %
mortality = round((313 / (4410 + 313) * 100), 1)
print(mortality)

6.6


### Create Treatment Column

In [5]:
# Create a single "treatment" field

# Define treatment labels
treatment_labels = {
    "ac_flag": "AC",
    "lytics_flag": "Lytics",
    "mt_flag": "MT",
    "us_cdt_flag": "CDT"
}

# Create treatment column by joining multiple active treatments
expired["treatment"] = expired.apply(lambda row: ", ".join([name for col, name in treatment_labels.items() if row[col] == 1]), axis = 1)

# See unique categories
expired['treatment'].value_counts()

treatment
AC                 236
AC, Lytics          46
                    25
AC, Lytics, CDT      3
AC, CDT              2
AC, MT               1
Name: count, dtype: int64

In [6]:
# Consolidate treatment categories

# Define function to consolidate treatment categories
def consolidate_treatment(treatment):
    if pd.isna(treatment) or treatment.strip() == "":
        return "No Treatment"
    elif treatment == "AC":
        return "AC Only"
    elif "MT" in treatment and "CDT" in treatment:
        return "Multiple Interventions"
    elif "Lytics" in treatment and "MT" not in treatment and "CDT" not in treatment:
        return "Lytics"
    elif "MT" in treatment and "CDT" not in treatment:
        return "MT"
    elif "CDT" in treatment and "MT" not in treatment:
        return "CDT"
    else:
        return "Other"

# Apply the function to create a new consolidated column
expired["treatment_grouped"] = expired["treatment"].apply(consolidate_treatment)

# Check value distribution
expired["treatment_grouped"].value_counts()

# Check value distribution (percentages)
expired["treatment_grouped"].value_counts(normalize = True) * 100

treatment_grouped
AC Only         236
Lytics           46
No Treatment     25
CDT               5
MT                1
Name: count, dtype: int64

treatment_grouped
AC Only         75.399361
Lytics          14.696486
No Treatment     7.987220
CDT              1.597444
MT               0.319489
Name: proportion, dtype: float64

In [7]:
expired.columns

Index(['subject_id', 'hadm_id', 'dvt_date', 'pe_date', 'days_to_pe',
       'dischtime', 'admission_type', 'admission_location',
       'discharge_location', 'insurance', 'marital_status', 'race',
       'hospital_expire_flag', 'gender', 'anchor_age', 'dvt_icd_code',
       'dvt_icd_version', 'dvt_diagnosis', 'dvt_chronicity', 'dvt_location',
       'pe_icd_code', 'pe_icd_version', 'pe_diagnosis', 'length_of_stay',
       'num_dvt_admissions', 'num_dvt_diagnoses', 'had_dvt_as_pri_diagnosis',
       'had_icu_stay', 'hx_ac', 'hx_dvt', 'hx_pe', 'hx_vte', 'pe_outcome',
       'age', 'myocardial_infarct', 'congestive_heart_failure',
       'peripheral_vascular_disease', 'cerebrovascular_disease', 'dementia',
       'chronic_pulmonary_disease', 'rheumatic_disease',
       'peptic_ulcer_disease', 'mild_liver_disease', 'diabetes_without_cc',
       'diabetes_with_cc', 'paraplegia', 'renal_disease', 'malignant_cancer',
       'severe_liver_disease', 'metastatic_solid_tumor', 'aids',
       'cha

In [8]:
# Group by 'treatment' and calculate the average CCI score
avg_cci_by_treatment = expired.groupby('treatment_grouped')['charlson_comorbidity_index'].mean().reset_index()
avg_cci_by_treatment

Unnamed: 0,treatment_grouped,charlson_comorbidity_index
0,AC Only,6.881356
1,CDT,7.4
2,Lytics,5.913043
3,MT,4.0
4,No Treatment,7.8


In [18]:
percent_dvt_pri_by_treatment = (
    expired.groupby('treatment_grouped')['had_dvt_as_pri_diagnosis']
    .sum())

percent_dvt_pri_by_treatment

treatment_grouped
AC Only         5
CDT             0
Lytics          3
MT              0
No Treatment    1
Name: had_dvt_as_pri_diagnosis, dtype: int64

In [19]:
5/236

0.0211864406779661