In [1]:
import os
import wfdb
import pywt
import numpy as np

In [9]:
data_root = '/home/carlitos/Documents/Projects/ecg_classification/data/raw/ecg-id-database-1.0.0'
processed_data_dir = '/home/carlitos/Documents/Projects/ecg_classification/data/processed/'
os.makedirs(processed_data_dir, exist_ok=True)

In [77]:
wavelet = 'sym4'
level = 4

In [87]:
def denoise_and_save_wfdb(record_path, save_path):
    # Explicitly specify the file paths for .dat and .hea
    dat_file = record_path + '.dat'
    hea_file = record_path + '.hea'

    if not os.path.exists(dat_file) or not os.path.exists(hea_file):
        print(f"Files not found for {record_path}")
        return

    # Read the record from .dat file
    record_signals, record_info = wfdb.rdsamp(record_path)

    # Denoise each channel using symlet wavelet and universal threshold
    denoised_signals = []
    for channel in range(record_signals.shape[1]):
        noisy_signal = record_signals[:, channel]
        coeffs = pywt.wavedec(noisy_signal, symlet_wavelet, level=level)

        # A universal threshold based on the noise standard deviation
        threshold = np.sqrt(2 * np.log(len(noisy_signal))) * np.std(coeffs[-1])

        # Threshold the wavelet coefficients
        coeffs_thresholded = [pywt.threshold(c, threshold, mode='soft') for c in coeffs]

        # Reconstruct the denoised signal
        denoised_signal = pywt.waverec(coeffs_thresholded, symlet_wavelet)
        denoised_signals.append(denoised_signal)

    denoised_signals = np.array(denoised_signals).T

    # Extract the record name from the file path
    record_name = os.path.basename(record_path)

    # Create a Record object
    record = wfdb.Record(record_name=record_name, n_sig=record_signals.shape[1], fs=record_info['fs'])

    # Set additional attributes manually
    record.sig_name = record_info['sig_name']
    record.units = record_info['units']
    record.comments = record_info['comments']
    record.fmt = [f'{record_info["fmt"][i]}' if "fmt" in record_info else None for i in range(record.n_sig)]
    record.samps_per_frame = record_info.get('samps_per_frame', None)
    record.skew = record_info.get('skew', None)
    record.byte_offset = record_info.get('byte_offset', None)
    record.adc_gain = record_info.get('adc_gain', None)
    record.baseline = record_info.get('baseline', None)

    # Replace None values in d_signal with zeros
    record.d_signal = np.nan_to_num(record.d_signal)

    # Save the denoised signals in WFDB format using the set_p_signals method
    record.p_signals = denoised_signals
    record.sig_len = len(denoised_signals[0])
    record.baseline = [0] * record.n_sig
    record.record_name = os.path.basename(record_path).replace('.dat', '')
    record.file_name = [os.path.basename(record_path)]
    record.init_value = [0]

    # Calculate the checksum
    checksums = record.calc_checksum()
    record.checksum = checksums
    
    # Save the denoised signals in WFDB format using the wrsamp method
    record.wrsamp(write_dir=processed_data_dir)

In [88]:
for root, dirs, files in os.walk(data_root):
    for file in files:
        if file.endswith('.hea'):
            record_path = os.path.join(root, os.path.splitext(file)[0])
            save_path = os.path.join(processed_data_dir, f'{os.path.basename(record_path)}_denoised')

            # Perform denoising and save the processed data in WFDB format
            denoise_and_save_wfdb(record_path, save_path)

print("Denoising and saving in WFDB format completed.")

TypeError: unsupported operand type(s) for %: 'NoneType' and 'int'