## Plot Heartattack vs. no heartattack ECGs

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
from scipy import stats
import pandas as pd
import numpy as np
from tqdm import tqdm

In [2]:
test_df = pd.read_csv("/home/ngsci/project/NEJM_benchmark/all_ids_labels_tested_with_covars_all_final_cath.csv")

heartattack_df = test_df[test_df['stent_or_cabg_010_day']==1]
heartattack_df_ste_std_twi = heartattack_df[((heartattack_df['has_depress']==1) | (heartattack_df['has_st_eleva']==1) | (heartattack_df['has_twave_inver']==1))]
heartattack_df_no_ste_std_twi = heartattack_df[~((heartattack_df['has_depress']==1) | (heartattack_df['has_st_eleva']==1) | (heartattack_df['has_twave_inver']==1) | (heartattack_df['has_twave_abnormal']==1))]

no_heartattack_df = test_df[test_df['stent_or_cabg_010_day']==0]
no_heartattack_df_ste_std_twi = no_heartattack_df[((no_heartattack_df['has_depress']==1) | (no_heartattack_df['has_st_eleva']==1) | (no_heartattack_df['has_twave_inver']==1))]
no_heartattack_df_no_ste_std_twi = no_heartattack_df[~((no_heartattack_df['has_depress']==1) | (no_heartattack_df['has_st_eleva']==1) | (no_heartattack_df['has_twave_inver']==1) | (no_heartattack_df['has_twave_abnormal']==1))]

heartattack_df_ste_std_twi_ids = heartattack_df_ste_std_twi['ecg_id_new'].tolist()[10:25]
heartattack_df_no_ste_std_twi_ids = heartattack_df_no_ste_std_twi['ecg_id_new'].tolist()[10:25]
no_heartattack_df_ste_std_twi_ids = no_heartattack_df_ste_std_twi['ecg_id_new'].tolist()[10:25]
no_heartattack_df_no_ste_std_twi_ids = no_heartattack_df_no_ste_std_twi['ecg_id_new'].tolist()[10:25]

In [3]:
def plot_leads_as_on_pdf_new(arr):
    short_lead_arr = arr[:12]
    short_lead_labels = ['I', 'II', 'III', 'aVR', 'aVL', 'aVF', 
                         'V1', 'V2', 'V3', 'V4', 'V5', 'V6']

    fig, axs = plt.subplots(3, 1, figsize=(10, 5))

    for i in range(3):
        row_leads = short_lead_arr[i::3]
        row_labels = short_lead_labels[i::3]

        processed_leads = []
        valid_labels = []

        for lead, label in zip(row_leads, row_labels):
            # Remove NaNs
            lead_without_nan = lead[~np.isnan(lead)]
            if len(lead_without_nan) == 0:
                # No valid data in this lead, skip it
                continue

            # Baseline correction
            mode_value = stats.mode(lead_without_nan, nan_policy='omit')[0][0]
            lead_corrected = lead - mode_value
            lead_corrected = np.nan_to_num(lead_corrected, nan=0)

            # Remove leading/trailing zeros that represent missing data
            nonzero_indices = np.where(lead_corrected != 0)[0]
            if len(nonzero_indices) > 0:
                lead_corrected = lead_corrected[nonzero_indices[0]:nonzero_indices[-1]+1]
            else:
                # All zeros, skip this lead
                continue

            processed_leads.append(lead_corrected)
            valid_labels.append(label)

        # Concatenate all processed leads for this row
        if len(processed_leads) == 0:
            # No leads to plot in this row
            continue

        concatenated = np.concatenate(processed_leads)
        axs[i].plot(concatenated, color='black')

        # Add vertical lines and annotations
        current_pos = 0
        for j, label in enumerate(valid_labels):
            seg_length = len(processed_leads[j])
            lead_midpoint = current_pos + seg_length / 2.0

            # Add text annotation
            axs[i].text(lead_midpoint, 1.2, label, color='black', 
                        ha='center', va='bottom', fontsize=10)

            # Draw vertical line after each segment except the last
            if j < len(valid_labels) - 1:
                boundary_pos = current_pos + seg_length
                axs[i].axvline(x=boundary_pos, color='black', linestyle='-', linewidth=1)

            current_pos += seg_length

        # Customize ECG-like grid
        axs[i].set_ylim(-1.6, 1.6)
        axs[i].xaxis.set_major_locator(MultipleLocator(100))
        axs[i].yaxis.set_major_locator(MultipleLocator(0.5))

        axs[i].xaxis.set_minor_locator(MultipleLocator(20))
        axs[i].yaxis.set_minor_locator(MultipleLocator(0.1))

        axs[i].grid(which='major', color='red', linestyle='-', linewidth=0.5)
        axs[i].grid(which='minor', color='red', linestyle='-', linewidth=0.2)
        axs[i].set_facecolor('white')

        # Hide x-axis labels if desired
        axs[i].tick_params(axis='x', which='major', labelbottom=False)
        axs[i].tick_params(axis='x', which='minor', labelbottom=False)

    plt.tight_layout()
    plt.show()


In [None]:
print('ACS with ECG feature')

for id_data in heartattack_df_ste_std_twi_ids:
    print(id_data)
    prefix = id_data[:2]
    id_data = id_data[:-4]
    arr = np.load(f'/home/ngsci/datasets/ed-bwh-ecg/v1/ecg-waveforms-npz/{prefix}/{id_data}.npz') #.npz #00385e09ac.npz #00177a731b.npz
    array_name = arr.files[0]  # Example: getting the first array name
    data = arr[array_name]
    plot_leads_as_on_pdf_new(data)
    print()
    print('#####################################################################')
    print()

In [None]:
print('ACS without ECG feature')

for id_data in heartattack_df_no_ste_std_twi_ids:
    print(id_data)
    prefix = id_data[:2]
    id_data = id_data[:-4]
    arr = np.load(f'/home/ngsci/datasets/ed-bwh-ecg/v1/ecg-waveforms-npz/{prefix}/{id_data}.npz') #.npz #00385e09ac.npz #00177a731b.npz
    array_name = arr.files[0]  # Example: getting the first array name
    data = arr[array_name]
    plot_leads_as_on_pdf_new(data)
    print()
    print('#####################################################################')
    print()

In [None]:
print('No ACS with ECG feature')

for id_data in no_heartattack_df_ste_std_twi_ids:
    print(id_data)
    prefix = id_data[:2]
    id_data = id_data[:-4]
    arr = np.load(f'/home/ngsci/datasets/ed-bwh-ecg/v1/ecg-waveforms-npz/{prefix}/{id_data}.npz') #.npz #00385e09ac.npz #00177a731b.npz
    array_name = arr.files[0]  # Example: getting the first array name
    data = arr[array_name]
    plot_leads_as_on_pdf_new(data)
    print()
    print('#####################################################################')
    print()

In [None]:
print('No ACS without ECG feature')

for id_data in no_heartattack_df_no_ste_std_twi_ids:
    print(id_data)
    prefix = id_data[:2]
    id_data = id_data[:-4]
    arr = np.load(f'/home/ngsci/datasets/ed-bwh-ecg/v1/ecg-waveforms-npz/{prefix}/{id_data}.npz') #.npz #00385e09ac.npz #00177a731b.npz
    array_name = arr.files[0]  # Example: getting the first array name
    data = arr[array_name]
    plot_leads_as_on_pdf_new(data)
    print()
    print('#####################################################################')
    print()