### Downloading the Dataset

In [None]:
import wfdb
import os

output_dir = 'data/mitbih_data'
os.makedirs(output_dir, exist_ok=True)

record_names = [
    '100', '101', '102', '103', '104', '105', '106', '107', '108', '109',
    '111', '112', '113', '114', '115', '116', '117', '118', '119', '121',
    '122', '123', '124', '200', '201', '202', '203', '205', '207', '208',
    '209', '210', '212', '213', '214', '215', '217', '219', '220', '221',
    '222', '223',
    '228', '230', '231', '232', '233', '234'
]

for record in record_names:
    wfdb.dl_database(
        db_dir='mitdb',
        dl_dir=output_dir,
        records=[record]
    )

print("Download complete.")



In [None]:
import wfdb
import os

output_dir = 'data/afdb_data'
os.makedirs(output_dir, exist_ok=True)

record_names = [
    '00735', '03665', '04015', '04043', '04048', '04126', '04746',
    '04908', '04936', '05091', '05121', '05261', '06426', '06453',
    '06995', '07162', '07859', '07879', '07910', '08215', '08219',
    '08378', '08405', '08434', '08455'
]

for record in record_names:
    wfdb.dl_database(
        db_dir='afdb',
        dl_dir=output_dir,
        records=[record]
    )

print("AFDB download complete.")


`wget -r -N -c -np https://physionet.org/files/ltafdb/1.0.0/`

### Preprocessing the Data

In [None]:
import wfdb
import numpy as np

def load_and_preprocess_record(record_path, target_fs=128, segment_length=1280):
    record = wfdb.rdrecord(record_path)
    signal = record.p_signal
    original_fs = record.fs

    # Downsample
    if original_fs != target_fs:
        from scipy.signal import resample
        signal = resample(signal, int(len(signal) * target_fs / original_fs))

    # Normalize
    signal = (signal - np.mean(signal)) / np.std(signal)

    # Segment
    segments = []
    for i in range(0, len(signal) - segment_length, segment_length):
        segments.append(signal[i:i+segment_length])

    return np.array(segments)


### Segmentation Data

In [6]:
import wfdb
import os

output_dir = 'data/afdb_data'
paths = os.listdir(output_dir)
print("Paths:", paths)

for path in paths:
    if path.endswith('.atr'):
        record_name = os.path.join(output_dir, path[:-4])
        print("Record name:", record_name)
        annotation = wfdb.rdann(record_name, 'atr')
        break
        print("Sample indices:", annotation.sample[:10]) 
        print("Symbols:", annotation.symbol[:10])


Paths: ['08215.hea', '04936.qrs', '08434.dat', '08378.dat', '07910.hea', '04908.qrs', '04048.qrs', '08219.dat', '07859.hea', '05121.hea', '08405.hea', '04015.hea', '06426.dat', '05261.dat', '07859.qrsc', '04936.atr', '04746.dat', '04908.atr', '05091.hea', '06995.hea', '07879.dat', '04048.atr', '06453.dat', '04048.hea', '07859.qrs', '04126.dat', '08405.qrs', '05121.qrs', '04015.qrs', '05091.atr', '07162.dat', '06995.atr', '07910.qrs', '04908.hea', '08455.dat', '04936.hea', '05091.qrsc', '08215.qrs', '08405.atr', '05121.atr', '04015.atr', '07859.atr', '05091.qrs', '06995.qrs', '07910.atr', '04043.dat', '08215.atr', '08378.atr', '04746.qrs', '08219.atr', '07879.qrs', '06426.atr', '05261.qrs', '00735.qrs', '03665.hea', '08434.atr', '04043.hea', '04908.dat', '04746.atr', '07162.hea', '08378.qrs', '04126.hea', '07879.atr', '06426.qrs', '06453.hea', '08219.qrs', '04048.dat', '00735.atr', '05261.atr', '08434.qrs', '04936.dat', '08455.hea', '08455.atr', '04043.qrs', '05261.hea', '00735.hea', '0

In [None]:
import wfdb
import os

output_dir = 'data/afdb_data'
paths = os.listdir(output_dir)

total_afib_samples = 0

for path in paths:
    if path.endswith('.atr'):
        record_name = os.path.join(output_dir, path[:-4])
        if not os.path.exists(record_name + '.dat'):
            print(f"Skipping {record_name} as .dat file does not exist.")
            continue

        record = wfdb.rdrecord(record_name)
        signal_length = len(record.p_signal)

        annotation = wfdb.rdann(record_name, 'atr')
        samples = annotation.sample
        aux_notes = annotation.aux_note

        afib_start = None
        afib_samples = 0

        for i in range(len(aux_notes)):
            note = aux_notes[i]
            if note == '(AFIB':
                afib_start = samples[i]
            elif note.startswith('(') and afib_start is not None:
                afib_end = samples[i]
                afib_samples += afib_end - afib_start
                afib_start = None

        if afib_start is not None:
            afib_samples += signal_length - afib_start

        total_afib_samples += afib_samples

total_afib_samples 
