In [1]:
import os
import pywt
import numpy as np
import wfdb
from scipy.signal import find_peaks

In [2]:
raw_data_path = "/home/carlitos/Documents/Projects/ecg_classification/data/raw/ecg-id-database-1.0.0"
processed_data_path = "/home/carlitos/Documents/Projects/ecg_classification/data/processed"

# Create the processed data directory if it doesn't exist
if not os.path.exists(processed_data_path):
    os.makedirs(processed_data_path)

In [11]:
def preprocess_ecg(file_path, output_path):
    # Load ECG data
    record_name = os.path.splitext(os.path.basename(file_path))[0]
    
    # Use rdsamp instead of rdrecord
    signals, fields = wfdb.rdsamp(record_name, pn_dir=os.path.dirname(file_path))
    ecg_data = signals.flatten()

    # Perform wavelet thresholding for denoising
    wavelet = 'sym4'
    level = 4
    coeffs = pywt.wavedec(ecg_data, wavelet, level=level)

    # Set a threshold value based on the noise standard deviation
    threshold = np.sqrt(2 * np.log(len(ecg_data))) * np.std(coeffs[-1])

    # Threshold the wavelet coefficients
    coeffs_thresholded = [pywt.threshold(c, threshold, mode='soft') for c in coeffs]

    # Reconstruct the denoised signal
    denoised_ecg = pywt.waverec(coeffs_thresholded, wavelet)

    # Remove artifacts (simple peak detection and removal)
    peaks, _ = find_peaks(denoised_ecg, height=0.2, distance=100)
    cleaned_ecg = np.delete(denoised_ecg, peaks)

    # Save pre-processed data
    wfdb.wrrecord(output_path, record=wfdb.Record(p_signal=cleaned_ecg.reshape(-1, 1), fs=fields['fs']))

In [12]:
for person_folder in os.listdir(raw_data_path):
    person_path = os.path.join(raw_data_path, person_folder)
    
    if os.path.isdir(person_path):
        for record_file in os.listdir(person_path):
            if record_file.endswith(".dat"):
                record_file_path = os.path.join(person_path, record_file)
                output_file_path = os.path.join(processed_data_path, f"{person_folder}_{os.path.splitext(record_file)[0]}")

                preprocess_ecg(record_file_path, output_file_path)

print("Pre-processing complete.")

NetFileError: no scheme specified for URL: '/home/carlitos/Documents/Projects/ecg_classification/data/raw/ecg-id-database-1.0.0/Person_39/rec_1.hea'