In [38]:
import pandas as pd
import csv
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import wfdb
import random
import neurokit2 as nk
import tsfresh as tsf
from tsfresh.feature_extraction import extract_features
from tsfresh.feature_extraction.settings import from_columns

In [39]:
dataset = pd.read_csv('patient_scp.csv')
hrv_dataset = pd.read_csv('ECG_Cardiac_Features.csv')
directory = 'physionet.org/files/ptb-xl/1.0.3/'

In [40]:
selected_features = pd.read_csv('top_n_features_relevance.csv')
selected_features = selected_features['feature'].tolist()

selected_settings = from_columns(selected_features)

# Print the corrected settings
print("Corrected Settings Dictionary:")
print(selected_settings)

updated_settings = selected_settings['value']
print(updated_settings)

Corrected Settings Dictionary:
{'value': {'quantile': [{'q': 0.9}, {'q': 0.6}, {'q': 0.4}, {'q': 0.7}, {'q': 0.1}, {'q': 0.8}], 'change_quantiles': [{'f_agg': 'var', 'isabs': True, 'qh': 0.4, 'ql': 0.2}, {'f_agg': 'var', 'isabs': False, 'qh': 0.4, 'ql': 0.2}, {'f_agg': 'mean', 'isabs': True, 'qh': 0.4, 'ql': 0.2}, {'f_agg': 'var', 'isabs': True, 'qh': 0.8, 'ql': 0.6}, {'f_agg': 'mean', 'isabs': True, 'qh': 0.4, 'ql': 0.0}, {'f_agg': 'var', 'isabs': False, 'qh': 0.8, 'ql': 0.6}, {'f_agg': 'mean', 'isabs': True, 'qh': 0.6, 'ql': 0.0}, {'f_agg': 'var', 'isabs': False, 'qh': 0.6, 'ql': 0.2}, {'f_agg': 'var', 'isabs': True, 'qh': 0.6, 'ql': 0.2}, {'f_agg': 'mean', 'isabs': True, 'qh': 0.8, 'ql': 0.6}, {'f_agg': 'var', 'isabs': True, 'qh': 0.8, 'ql': 0.4}, {'f_agg': 'mean', 'isabs': True, 'qh': 0.6, 'ql': 0.2}, {'f_agg': 'var', 'isabs': False, 'qh': 0.8, 'ql': 0.4}, {'f_agg': 'var', 'isabs': False, 'qh': 0.4, 'ql': 0.0}, {'f_agg': 'mean', 'isabs': False, 'qh': 0.4, 'ql': 0.2}, {'f_agg': 'var

In [None]:
time_features = pd.DataFrame()


for index, row in hrv_dataset.iterrows():
    print(f"{index}/{len(hrv_dataset)}")
    ecg_id = row['ecg_id']
    record = wfdb.rdrecord(directory + dataset[dataset['ecg_id'] == ecg_id]['filename_hr'].values[0])
    ecg_signal = record.p_signal[:,0]
    ecg_signal = nk.ecg_clean(ecg_signal, sampling_rate=500)

    ecg_df = pd.DataFrame({
        'id': [ecg_id] * len(ecg_signal),
        'time': range(len(ecg_signal)),
        'value': ecg_signal
    })

    extracted_features = extract_features(
        ecg_df,
        column_id='id',
        column_sort='time',
        column_value='value',
        n_jobs=1,
        default_fc_parameters=updated_settings
    )

    extracted_features['ecg_id'] = ecg_id

    time_features = pd.concat([time_features, extracted_features], ignore_index=True)


    

In [42]:
time_features.to_csv('time_features_updated_relevance.csv', index=False)