In [1]:
import os
import pandas as pd
import wfdb
import ecg_plot
import ast
from tqdm import tqdm

In [2]:
CSV_PATH = 'data/ptb-xl/ptbxl_database.csv'
RECORDS_PATH = 'data/ptb-xl/'
OUTPUT_DIR = 'data/ptb-xl_images'


In [3]:
df = pd.read_csv(CSV_PATH)
df['scp_codes'] = df['scp_codes'].apply(ast.literal_eval)

In [4]:
def get_main_label(scp_dict):
    if not scp_dict:
        return 'Unknown'
    return sorted(scp_dict.keys())[0]

In [None]:
for _, row in tqdm(df.iterrows(), total=len(df), desc="Processing ECGs"):
    ecg_id = row['ecg_id']
    filename_hr = row['filename_hr']  # e.g., 'records500/01000/01001_hr'
    scp_label = get_main_label(row['scp_codes'])

    # Build full path to the record (without extension)
    record_path = os.path.join(RECORDS_PATH, filename_hr)

    try:
        record = wfdb.rdrecord(record_path)

        # Transpose so each row is a lead
        ecg_data = record.p_signal.T
        lead_names = record.sig_name

        # Plot and save image
        ecg_plot.plot(ecg_data[:, :5000], sample_rate=record.fs, lead_index=lead_names)
        save_path = os.path.join(OUTPUT_DIR, f'{ecg_id}_{scp_label}')
        ecg_plot.save_as_png(save_path)
        
    except Exception as e:
        print(f"Failed to process {record_path}: {e}")

Processing ECGs:   1%|          | 236/21799 [09:06<13:23:56,  2.24s/it]