In [8]:
import pandas as pd
import numpy as np
from tqdm import tqdm, trange
from datetime import datetime
import sys, os
sys.path.append(os.path.abspath("../src/"))
from data_helper import get_days, load_config

config = load_config('../config/mimic_file.yaml')

# which Dx codes
ICD_10_code = 'I48' # heart Failure
Dx_name = 'Atribe_fibeliation'
seq_threshold = 3 #only sequence <= 3 been considered (1-3 are primary dx, > 3 are commorbality)

MIMIC_IV_path = config['project']['mimic_parent_math']
ecg_record_df = pd.read_csv(config['project']['ecg_record_path'])
admissions = pd.read_csv(MIMIC_IV_path+'hosp/admissions.csv.gz', compression='gzip')
diagnoses_icd10_df = pd.read_csv(MIMIC_IV_path+'hosp/diagnoses_icd10.csv', index_col = 0)

patients = ecg_record_df['subject_id'].unique()

ecg_record_df[f'{Dx_name}_time'] = np.nan
ecg_record_df[f'{Dx_name}_event'] = False

# get hadmi include ICD_10_code
dx_df = diagnoses_icd10_df[diagnoses_icd10_df['icd_10_code'].str.contains(f'^{ICD_10_code}', na=False)]
with tqdm(total=len(patients)) as pbar:
    for subject_id in patients:
        pat_ecg_df = ecg_record_df[ecg_record_df['subject_id'] == subject_id]
        pat_dx_df = dx_df[dx_df['subject_id'] == subject_id]
        dx_hadm_id_list = pat_dx_df[pat_dx_df['seq_num'] <= seq_threshold]['hadm_id'].unique().tolist()
        pat_dx_admission = admissions[admissions['hadm_id'].isin(dx_hadm_id_list)]
        # for each patient
        for index, temp in pat_ecg_df.iterrows():
            pat_dx_admission = pat_dx_admission[temp['ecg_time'] < pat_dx_admission['admittime']]
            time = None
            event = False
            # for each ECG
            if pat_dx_admission.shape[0] > 0:
                next_dx_event_date = min(pat_dx_admission['admittime'])
                time = get_days(next_dx_event_date, temp['ecg_time'])
                ecg_record_df.loc[index, [f'{Dx_name}_time', f'{Dx_name}_event']] = [time, True]
        pbar.update(1)
        
ecg_record_df[['subject_id', 'study_id', f'{Dx_name}_time', f'{Dx_name}_event']].to_pickle(f"/data/padmalab_external/special_project/multi_event_data/MIMIC_ECG_{Dx_name}.pickle")
                                                                                                         