In [1]:
import pandas as pd
import numpy as np
import wfdb
import neurokit2 as nk
from scipy.fft import fft
import librosa
import csv

Features:
- Dominant Frequency: The frequency component with the highest amplitude in the signal.
- Frequency Power Sum: The sum of the power of all frequency components in the signal.
- Mean Frequency: The average frequency of the signal, weighted by the power of each frequency component.
- Spectral Entropy: A measure of the complexity or randomness of the signal's frequency distribution.
- MFCC_1 to MFCC_13: Mel-Frequency Cepstral Coefficients, which represent the short-term power spectrum of a sound signal. These coefficients are commonly used in audio signal processing and speech recognition.

In [2]:
dataset = pd.read_csv('patient_scp.csv')
directory = 'physionet.org/files/ptb-xl/1.0.3/'
output_file = 'Fourier_Transform_Features.csv'

In [None]:
with open(output_file, 'w', newline='') as feature_data:
    feature_writer = csv.writer(feature_data)
    
    header = ['ecg_id', 'Patient_ID', 'Label', 'Dominant Frequency', 'Frequency Power Sum', 'Mean Frequency', 'Spectral Entropy'] + [f'MFCC_{i+1}' for i in range(13)]
    feature_writer.writerow(header)

    for index, row in dataset.iterrows():
        print(f"Processing record {index+1}/{len(dataset)}")
        patient_id = row['patient_id']
        ecg_id = row['ecg_id']
        label = row['label']
        
        record = wfdb.rdrecord(directory + row['filename_hr'])
        ecg_signal = record.p_signal[:, 0]
        
        ecg_cleaned = nk.ecg_clean(ecg_signal, sampling_rate=500)
        
        fft_result = fft(ecg_cleaned)
        magnitude = np.abs(fft_result)
        frequencies = np.fft.fftfreq(len(magnitude), d=1/500)
        
        positive_frequencies = frequencies[frequencies >= 0]
        magnitude = magnitude[:len(positive_frequencies)]
        
        dominant_frequency = positive_frequencies[np.argmax(magnitude)]
        power_sum = np.sum(magnitude**2)
        mean_frequency = np.sum(positive_frequencies * magnitude) / np.sum(magnitude)
        spectral_entropy = -np.sum((magnitude / np.sum(magnitude)) * np.log2(magnitude / np.sum(magnitude)))
        
        mfccs = librosa.feature.mfcc(y=ecg_cleaned, sr=500, n_mfcc=13)
        mfcc_mean = np.mean(mfccs, axis=1)
        
        feature_row = [
            ecg_id,
            patient_id,
            label,
            dominant_frequency,
            power_sum,
            mean_frequency,
            spectral_entropy
        ] + mfcc_mean.tolist()
        
        feature_writer.writerow(feature_row)

fourier_dataset = pd.read_csv(output_file)
print(fourier_dataset.head())


Processing record 1/20860
Processing record 2/20860
Processing record 3/20860
Processing record 4/20860
Processing record 5/20860
Processing record 6/20860
Processing record 7/20860
Processing record 8/20860
Processing record 9/20860
Processing record 10/20860
Processing record 11/20860
Processing record 12/20860
Processing record 13/20860
Processing record 14/20860
Processing record 15/20860
Processing record 16/20860
Processing record 17/20860
Processing record 18/20860
Processing record 19/20860
Processing record 20/20860
Processing record 21/20860
Processing record 22/20860
Processing record 23/20860
Processing record 24/20860
Processing record 25/20860
Processing record 26/20860
Processing record 27/20860
Processing record 28/20860
Processing record 29/20860
Processing record 30/20860
Processing record 31/20860
Processing record 32/20860
Processing record 33/20860
Processing record 34/20860
Processing record 35/20860
Processing record 36/20860
Processing record 37/20860
Processing