In [5]:
import os
import numpy as np
import pywt
import biosppy.signals.ecg as ecg

In [2]:
raw_data_path = "/home/carlitos/Documents/Projects/ecg_classification/data/raw/ecg-id-database-1.0.0"
processed_data_path = "/home/carlitos/Documents/Projects/ecg_classification/data/processed"

if not os.path.exists(processed_data_path):
    os.makedirs(processed_data_path)

In [6]:
def preprocess_ecg(file_path, output_path):
    # Load ECG data using biosppy's load_dat function
    data, header = biosppy.tools.load_dat(file_path)

    # Perform R-peaks detection using biosppy
    rpeaks = ecg.hamilton_segmenter(data[:, 1], sampling_rate=header['sampling_rate'])['rpeaks']

    # Perform wavelet thresholding for denoising
    wavelet = 'sym4'
    level = 4
    coeffs = pywt.wavedec(data[:, 1], wavelet, level=level)

    # Set a threshold value based on the noise standard deviation
    threshold = np.sqrt(2 * np.log(len(data[:, 1]))) * np.std(coeffs[-1])

    # Threshold the wavelet coefficients
    coeffs_thresholded = [pywt.threshold(c, threshold, mode='soft') for c in coeffs]

    # Reconstruct the denoised signal
    denoised_ecg = pywt.waverec(coeffs_thresholded, wavelet)

    # Remove artifacts (simple peak detection and removal)
    cleaned_ecg = np.delete(denoised_ecg, rpeaks)

    # Save pre-processed data
    np.savetxt(output_path, cleaned_ecg)

In [7]:
for person_folder in os.listdir(raw_data_path):
    person_path = os.path.join(raw_data_path, person_folder)
    
    if os.path.isdir(person_path):
        for record_file in os.listdir(person_path):
            if record_file.endswith(".dat"):
                record_file_path = os.path.join(person_path, record_file)
                output_file_path = os.path.join(processed_data_path, f"{person_folder}_{os.path.splitext(record_file)[0]}_processed.txt")

                preprocess_ecg(record_file_path, output_file_path)

print("Pre-processing complete.")

Pre-processing complete.
