In [3]:
import pyedflib
import neurokit2 as nk
import pandas as pd
import os

## Data 

In [4]:
# Directory of EDF files
edf_directory = '../datasetsPart2/valu3s/vitaport/'

In [5]:
# Directory of CSV files
csv_directory = '../datasets_2/valu3s/vitaport/filtered_signals/'

In [6]:
# Duration of each interval in seconds
interval_duration = 120

# Initialize an empty DataFrame to store all features
feature_dfs = []

# Sampling Rate
sampling_rate = 256

## Process Data EDF File

In [12]:
# Function extract featrures from ECG
def extract_hrv_features(signal, sampling_rate, interval_duration):
    # number of intrevals
    interval_samples = int(interval_duration * sampling_rate)
    features = [] 
    
    # loop to extract features for each intreval 
    for start in range(0, len(signal), interval_samples):
        end = start + interval_samples
        if end > len(signal):
            break
        
        interval_signal = signal[start:end]
        if len(interval_signal) < interval_samples:
            continue
        
        try:
            # extract features 
            signals, info = nk.ecg_process(interval_signal, sampling_rate=sampling_rate)
            hrv_features = nk.hrv_time(info['ECG_R_Peaks'], sampling_rate=sampling_rate)
            hrv_features['Interval_Start'] = start / sampling_rate
            hrv_features['Interval_End'] = end / sampling_rate

            # add extrated features to the DataFrame
            features.append(hrv_features)
        except Exception as e:
            print(f"Error processing interval {start}-{end}: {e}")
            continue

    return features

# Loop through all .edf files in the directory
for filename in os.listdir(edf_directory):
    if filename.endswith('.csv'):
        file_path = os.path.join(edf_directory, filename)
        
        # Load durations and ECG .csv file
        durations = pd.read_csv('datasets/hrv/filtered_ECG_duration.csv')
        f = pd.read_csv(file_path)
        #Load the .edf file
        f = pyedflib.EdfReader(file_path)
        
        # Get signal labels and find ECG signal index
        signal_labels = f.getSignalLabels()
        if 'ECG' in signal_labels:
            ecg_signal_index = signal_labels.index('ECG')
            ecg_signal = f.readSignal(ecg_signal_index)
            sampling_rate = f.getSampleFrequency(ecg_signal_index)
        else:
            print(f"No ECG signal found in {filename}")
            continue
        
        # Close the EDF file
        f.close()

        # duration of the ECG signal in seconds
        ecg_duration = len(ecg_signal) / sampling_rate
        print(f"ECG duration for {filename}: {ecg_duration} seconds")
        
        # Extract HRV features for each interval
        interval_features = extract_hrv_features(ecg_signal, sampling_rate, interval_duration)

        # number of intervals
        num_intervals = len(interval_features)
        print(f"Number of intervals for {filename}: {num_intervals}")
        
        if interval_features:
            # Convert list of feature dictionaries to DataFrame and add filename
            interval_features_df = pd.concat([pd.DataFrame(features) for features in interval_features])
            interval_features_df['Filename'] = filename
        
            # Append the features DataFrame to the list
            feature_dfs.append(interval_features_df)
        else:
            print(f"No valid intervals found in {filename}")

-0.9713076923076923    float64
dtype: object
   -0.9713076923076923
0            -0.163328
1             2.731728
2             0.244323
3            -0.841933
4            -2.228436
-0.9713076923076923   -0.163328
Name: 0, dtype: float64
ECG duration for fp01_1.csv: 3694.6 seconds
1.1230923076923076    float64
dtype: object
   1.1230923076923076
0            1.088918
1            0.197944
2           -1.635267
3            0.110067
4            0.710559
1.1230923076923076    1.088918
Name: 0, dtype: float64
ECG duration for fp01_2.csv: 3576.7 seconds
-1.259348717948718    float64
dtype: object
   -1.259348717948718
0           -0.627123
1           -0.048600
2            1.420897
3            0.041718
4           -0.871226
-1.259348717948718   -0.627123
Name: 0, dtype: float64
ECG duration for fp01_3.csv: 3574.1 seconds
-0.21214871794871798    float64
dtype: object
   -0.21214871794871798
0              0.146682
1              0.061246
2             -0.204826
3             -0.783349
4

## Save Features Dataset EDF

In [9]:
# Concatenate all feature DataFrames into one DataFrame
all_features_df = pd.concat(feature_dfs, ignore_index=True)

# Save the features DataFrame to a CSV file
all_features_df.to_csv('hrv_time_domain_5_min.csv', index=False)

print("Saved features on 'rv_time_domain_5_min.csv'.")

Saved features on 'rv_time_domain_5_min.csv'.


## Process Data CSV File

In [7]:
# Function to extract features from ECG
def extract_hrv_features(signal, sampling_rate, interval_duration):
    # Number of intervals
    interval_samples = int(interval_duration * sampling_rate)
    features = [] 
    
    # Loop to extract features for each interval 
    for start in range(0, len(signal), interval_samples):
        end = start + interval_samples
        if end > len(signal):
            break
        
        interval_signal = signal[start:end]
        if len(interval_signal) < interval_samples:
            continue
        
        try:
            # Extract features using NeuroKit
            signals, info = nk.ecg_process(interval_signal, sampling_rate=sampling_rate)
            hrv_features = nk.hrv_time(info['ECG_R_Peaks'], sampling_rate=sampling_rate)
            hrv_features['Interval_Start'] = start / sampling_rate
            hrv_features['Interval_End'] = end / sampling_rate

            # Add extracted features to the list
            features.append(hrv_features)
        except Exception as e:
            print(f"Error processing interval {start}-{end}: {e}")
            continue

    return features

# List to store all features from multiple files
feature_dfs = []

# Loop all csv files
for filename in os.listdir(csv_directory):
    if filename.endswith('.csv'):
        file_path = os.path.join(csv_directory, filename)
        
        # Load the csv file 
        try:
            # Read csv 
            df = pd.read_csv(file_path, header=None)
            ecg_signal = df[0].values 
        except Exception as e:
            print(f"Error loading {filename}: {e}")
            continue

        # Duration of the ECG signal in seconds
        ecg_duration = len(ecg_signal) / sampling_rate
        print(f"ECG duration for {filename}: {ecg_duration} seconds")
        
        # Extract HRV features for each interval
        interval_features = extract_hrv_features(ecg_signal, sampling_rate, interval_duration)

        # Number of intervals
        num_intervals = len(interval_features)
        print(f"Number of intervals for {filename}: {num_intervals}")
        
        if interval_features:
            # Convert list of feature dictionaries to DataFrame and add filename
            interval_features_df = pd.concat([pd.DataFrame(features) for features in interval_features])
            interval_features_df['Filename'] = filename
        
            # Append the features DataFrame to the list
            feature_dfs.append(interval_features_df)
        else:
            print(f"No valid intervals found in {filename}")

ECG duration for fp01_1.csv: 3694.578125 seconds
Number of intervals for fp01_1.csv: 30
ECG duration for fp01_2.csv: 3576.703125 seconds
Number of intervals for fp01_2.csv: 29
ECG duration for fp01_3.csv: 3574.078125 seconds
Number of intervals for fp01_3.csv: 29
ECG duration for fp01_4.csv: 3734.52734375 seconds
Number of intervals for fp01_4.csv: 31
ECG duration for fp02_1.csv: 3721.453125 seconds
Number of intervals for fp02_1.csv: 31
ECG duration for fp02_2.csv: 3691.203125 seconds
Number of intervals for fp02_2.csv: 30
ECG duration for fp02_3.csv: 3575.078125 seconds
Number of intervals for fp02_3.csv: 29
ECG duration for fp02_4.csv: 3668.953125 seconds
Number of intervals for fp02_4.csv: 30
ECG duration for fp03_2.csv: 3990.5078125 seconds
Number of intervals for fp03_2.csv: 33
ECG duration for fp03_3.csv: 3580.953125 seconds
Number of intervals for fp03_3.csv: 29
ECG duration for fp03_4.csv: 3573.328125 seconds
Number of intervals for fp03_4.csv: 29
ECG duration for fp04_1.csv: 

  mrrs /= th2
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods.

ECG duration for fp13_2.csv: 3573.578125 seconds
Error processing interval 0-30720: index 9 is out of bounds for axis 0 with size 9


  mrrs /= th2
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods.

Error processing interval 30720-61440: cannot convert float NaN to integer
Error processing interval 61440-92160: integer division or modulo by zero
Error processing interval 92160-122880: integer division or modulo by zero
Error processing interval 122880-153600: integer division or modulo by zero
Error processing interval 153600-184320: integer division or modulo by zero
Error processing interval 184320-215040: integer division or modulo by zero
Error processing interval 215040-245760: integer division or modulo by zero
Error processing interval 245760-276480: integer division or modulo by zero
Error processing interval 276480-307200: integer division or modulo by zero
Error processing interval 307200-337920: index 0 is out of bounds for axis 0 with size 0
Error processing interval 337920-368640: integer division or modulo by zero
Error processing interval 368640-399360: integer division or modulo by zero
Error processing interval 399360-430080: integer division or modulo by zero
Err

  mrrs /= th2
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods.

ECG duration for fp13_4.csv: 3573.953125 seconds
Error processing interval 30720-61440: index 9 is out of bounds for axis 0 with size 9
Error processing interval 61440-92160: integer division or modulo by zero
Error processing interval 92160-122880: integer division or modulo by zero
Error processing interval 122880-153600: integer division or modulo by zero
Error processing interval 153600-184320: integer division or modulo by zero
Error processing interval 184320-215040: integer division or modulo by zero
Error processing interval 215040-245760: integer division or modulo by zero
Error processing interval 245760-276480: integer division or modulo by zero
Error processing interval 276480-307200: integer division or modulo by zero
Error processing interval 307200-337920: integer division or modulo by zero
Error processing interval 337920-368640: integer division or modulo by zero
Error processing interval 368640-399360: integer division or modulo by zero
Error processing interval 39936

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dt

ECG duration for fp14_1.csv: 3906.73828125 seconds
Number of intervals for fp14_1.csv: 32
ECG duration for fp14_2.csv: 3570.578125 seconds
Number of intervals for fp14_2.csv: 29
ECG duration for fp14_3.csv: 4214.078125 seconds
Number of intervals for fp14_3.csv: 35
ECG duration for fp14_4.csv: 3570.703125 seconds
Number of intervals for fp14_4.csv: 29
ECG duration for fp15_1.csv: 3671.453125 seconds
Number of intervals for fp15_1.csv: 30
ECG duration for fp15_2.csv: 3574.453125 seconds
Number of intervals for fp15_2.csv: 29
ECG duration for fp15_3.csv: 3574.703125 seconds
Number of intervals for fp15_3.csv: 29
ECG duration for fp15_4.csv: 3574.453125 seconds
Number of intervals for fp15_4.csv: 29
ECG duration for fp16_1.csv: 3829.703125 seconds
Number of intervals for fp16_1.csv: 31
ECG duration for fp16_2.csv: 3575.95703125 seconds
Number of intervals for fp16_2.csv: 29
ECG duration for fp16_3.csv: 3571.828125 seconds
Number of intervals for fp16_3.csv: 29
ECG duration for fp16_4.csv:

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  mrrs /= th2
  warn(
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  mrrs /= th2
  warn(
  mrrs /= th2
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(


Error processing interval 430080-460800: index 10 is out of bounds for axis 0 with size 9
Error processing interval 460800-491520: cannot convert float NaN to integer
Error processing interval 491520-522240: cannot convert float NaN to integer
Error processing interval 522240-552960: cannot convert float NaN to integer
Error processing interval 552960-583680: cannot convert float NaN to integer
Error processing interval 583680-614400: integer division or modulo by zero
Error processing interval 614400-645120: cannot convert float NaN to integer
Error processing interval 645120-675840: cannot convert float NaN to integer


  warn(
  warn(
  mrrs /= th2
  warn(
  mrrs /= th2
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  warn(
  warn(


Error processing interval 675840-706560: index 9 is out of bounds for axis 0 with size 9
Error processing interval 706560-737280: cannot convert float NaN to integer
Error processing interval 737280-768000: index 9 is out of bounds for axis 0 with size 9
Error processing interval 768000-798720: cannot convert float NaN to integer


  mrrs /= th2
  warn(
  mrrs /= th2
  warn(


Number of intervals for fp19_1.csv: 5
ECG duration for fp19_2.csv: 3582.71484375 seconds
Number of intervals for fp19_2.csv: 29
ECG duration for fp19_3.csv: 3015.7578125 seconds
Number of intervals for fp19_3.csv: 25
ECG duration for fp20_1.csv: 3712.078125 seconds
Number of intervals for fp20_1.csv: 30
ECG duration for fp20_2.csv: 3577.953125 seconds
Number of intervals for fp20_2.csv: 29
ECG duration for fp20_3.csv: 3045.82421875 seconds
Number of intervals for fp20_3.csv: 25
ECG duration for fp20_4.csv: 3628.453125 seconds
Number of intervals for fp20_4.csv: 30


## Save Features Dataset CSV

In [8]:
# Concatenate all feature DataFrames into one DataFrame
all_features_df = pd.concat(feature_dfs, ignore_index=True)

# Save the features DataFrame to a CSV file
all_features_df.to_csv('hrv_time_domain_2_min_filtered.csv', index=False)

print("Saved features on 'hrv_time_domain_2_min_filtered.csv'.")

Saved features on 'hrv_time_domain_2_min_filtered.csv'.
