In [1]:
import pandas as pd
import ast
import re

ecg_icd_df = pd.read_csv('/data/padmalab_external/special_project/physionet.org/files/mimic-iv-ecg-ext-icd-labels/1.0.1/records_w_diag_icd10.csv')
ecg_icd_df['all_diag_hosp'] = ecg_icd_df['all_diag_hosp'].apply(ast.literal_eval)

In [31]:
diagnosis_dict = {
    "Non-ST_elevation_(NSTEMI)_myocardial_infarction": ['I214'],
    "ST_elevation_(STEMI)_myocardial_infarction": ['I210', 'I211', 'I212', 'I213'],
    'Heart_failure': ['I50', 'I43', 'I099', 'I110', 'I130','I132', 'I255','I420','I425','I426','I427','I428','I429','P290'],
    "Unstable_angina": ['I200'],
    "Atrial_fibrillation": ['I48'],
#     "Syncope": ['R55'],
    "Ventricular_tachycardia":	['I472'],
    "Cardiac_arrest": ['I46'],
    "Supraventricular_tachycardia": ['I471'],
    "Atrioventricular_block": ['I440', 'I441', 'I442', 'I443'],    
    "Pulmonary_embolism": ["I26", "I2782"], # except for I26.01 and I26.90
    "Aortic_Stenosis": ['I350', 'I352', 'I060', 'I062'],
#     "Cardiac_Amyloidosis": ['E85'], # or ICD-9 code of 277.3 only in HF cohort (HF cohort is identified by I50 in any Dx in any DAD or ED encounters, anytime between 2007 and 2020). Within this cohort select episodes with E85 in all positions (HF cohort - Amy episodes). 
    "Pulmonary_Hypertension": ['I270', 'I272'],
    
    "Hypertrophic_Cardiomyopathy": ['I421', 'I422'],
    "Mitral_Valve_Prolapse": ['I340', 'I341', 'I051', 'I052', 'I058'],
    "Mitral_Valve_Stenosis": ['I342', 'I050', 'I052'],
#     'Cardiac_Amyloidosis': []
}

In [32]:
temp_dx_dict = {}
for key, value in diagnosis_dict.items():
    temp_dx_dict[key] = False

In [38]:
pat_dx_list = []
from tqdm import tqdm, trange
with tqdm(total=ecg_icd_df.shape[0]) as pbar:
    for index, row in ecg_icd_df.iterrows():
        pat_dx_dict = temp_dx_dict.copy()
        pat_dx_dict['study_id'] = row['study_id']
        for key,value in diagnosis_dict.items():
            r = re.compile('|'.join(value))
            count_match = list(filter(r.match, row['all_diag_hosp'])) # Read Note below
            if count_match != []:
                pat_dx_dict[key] = True
        pat_dx_list.append(pat_dx_dict)
        pbar.update(1)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 800035/800035 [01:32<00:00, 8675.92it/s]


In [40]:
dx_df = pd.DataFrame.from_records(pat_dx_list)

In [60]:
dx_df.to_csv('/data/padmalab_external/special_project/multi_event_data/MIMIC_IV_ECG_15_Dx.csv')

In [57]:
dx_summary_dict = {
    'DX column': [],
    'Positive': [],
    'Negative': [],    
}
for column in dx_df.columns:
    dx_summary_dict['DX column'].append(column)
    dx_summary_dict['Positive'].append(f"{dx_df[column].value_counts()[True]} ({round(dx_df[column].value_counts(normalize=True)[True], 4)})")
    dx_summary_dict['Negative'].append(f"{dx_df[column].value_counts()[False]} ({round(dx_df[column].value_counts(normalize=True)[False], 4)})")


In [59]:
pd.DataFrame.from_dict(dx_summary_dict).to_csv('MIMIC_IV_ECG_Dx.csv')