In [1]:
import os
import pandas as pd
import numpy as np
from scipy.io import loadmat
import matplotlib.pyplot as plt
from scipy.signal import find_peaks
from scipy.stats import zscore
from datetime import datetime, timedelta
import neurokit2 as nk
from scipy.signal import welch

In [2]:
def timedomain(rr):
    """Calculate time domain HRV metrics from RR intervals."""
    results = {}
    hr = 60 / rr
    results['Mean RR (ms)'] = np.mean(rr)
    results['STD RR/SDNN (ms)'] = np.std(rr)
    results['RMSSD (ms)'] = np.sqrt(np.mean(np.square(np.diff(rr))))
    results['Mean HR (Kubios\' style) (beats/min)'] = 60000 / np.mean(rr)
    results['Mean HR (beats/min)'] = np.mean(hr)
    results['STD HR (beats/min)'] = np.std(hr)
    results['Min HR (beats/min)'] = np.min(hr)
    results['Max HR (beats/min)'] = np.max(hr)
    # results['NN50'] = np.sum(np.abs(np.diff(rr)) > 50) * 1
    # results['pNN50 (%)'] = 100 * np.sum((np.abs(np.diff(rr)) > 50) * 1) / len(rr)
    return results

In [3]:
def AbsolutePower(signal, fs, low, high):
    """Calculate absolute power in a specific frequency band."""
    f, Pxx = welch(signal, fs, nperseg=256)
    band_power = np.trapz(Pxx[(f >= low) & (f <= high)], f[(f >= low) & (f <= high)])
    return band_power

In [4]:
def RelativePower(signal, fs, low, high):
    """Calculate relative power in a specific frequency band."""
    total_power = AbsolutePower(signal, fs, 0, fs / 2)
    band_power = AbsolutePower(signal, fs, low, high)
    return band_power / total_power

In [5]:
def capture_uco_windows_ecg(ecg_data, fs, start_time, uco_start_time, uco_end_time, window_len_sec=15):
    """
    Capture UCO time before and after windows and compute ECG features.

    Parameters
    ----------
    ecg_data : array-like
        The ECG signal data.
    fs : int
        Sampling frequency of the ECG data.
    start_time : str
        Start time of the ECG data in format 'HH:MM:SS:%f %p'.
    uco_start_time : str
        UCO start time in format 'HH:MM:SS.SSSSSS AM/PM'.
    uco_end_time : str
        UCO end time in format 'HH:MM:SS.SSSSSS AM/PM'.
    window_len_sec : int
        Length of each window in seconds.

    Returns
    -------
    df : DataFrame
        DataFrame containing computed features for each window.
    """
    # Convert times to datetime objects
    start_datetime = datetime.strptime(start_time, '%I:%M:%S:%f %p')
    uco_start_datetime = datetime.strptime(uco_start_time, '%I:%M:%S.%f %p')
    uco_end_datetime = datetime.strptime(uco_end_time, '%I:%M:%S.%f %p')

    # Adjust for UCO times that cross midnight
    if uco_start_datetime < start_datetime:
        uco_start_datetime += timedelta(days=1)
    if uco_end_datetime < uco_start_datetime:
        uco_end_datetime += timedelta(days=1)

    # Calculate the difference in seconds between the start time and UCO times
    time_diff_start = (uco_start_datetime - start_datetime).total_seconds()
    time_diff_end = (uco_end_datetime - start_datetime).total_seconds()

    # Convert the time differences to sample indices
    uco_start_samples = int(time_diff_start * fs)
    uco_end_samples = int(time_diff_end * fs)

    # Define window length in samples
    window_len = window_len_sec * fs

    # Calculate the start and end sample indices for the desired windows
    start_sample_before = max(0, uco_start_samples - 75 * 60 * fs)
    end_sample_before = uco_start_samples - 60 * 60 * fs
    start_sample_uco = uco_start_samples
    end_sample_uco = uco_end_samples
    start_sample_after = uco_end_samples
    end_sample_after = min(len(ecg_data), uco_end_samples + 60 * window_len)

    print(f"time diff start: {time_diff_start}")
    print(f"time diff end: {time_diff_end}")
    print(f"start sample before UCO: {start_sample_before}")
    print(f"end sample before UCO: {end_sample_before}")
    print(f"start sample after UCO: {start_sample_after}")
    print(f"end sample after UCO: {end_sample_after}")

    data = {
        'time': [],
        'sdrr': [],
        'rmssd': [],
        'mrr': [],
        'mean_hr': [],
        'std_hr': [],
        'min_hr': [],
        'max_hr': [],
        # 'nn50': [],
        # 'pnn50': [],
        # 'absolute_power_low': [],
        # 'absolute_power_high': [],
        # 'relative_power_low': [],
        # 'relative_power_high': [],
        'label': []
    }

    def process_windows(start_sample, end_sample, label):
        for start in range(start_sample, end_sample, window_len):
            end = start + window_len
            if end > len(ecg_data):
                break  # Ensure the last window doesn't exceed the data length

            segment = ecg_data[start:end]
            rr_peaks, _ = find_peaks(segment)
            rr_intervals = np.diff(rr_peaks)
            rr_intervals[np.abs(zscore(rr_intervals)) > 2] = np.median(rr_intervals)

            hrv_metrics = timedomain(rr_intervals)
            # absolute_power_low = AbsolutePower(segment, fs, 0.04, 0.15)
            # absolute_power_high = AbsolutePower(segment, fs, 0.15, 0.4)
            # relative_power_low = RelativePower(segment, fs, 0.04, 0.15)
            # relative_power_high = RelativePower(segment, fs, 0.15, 0.4)

            # Append data to the dictionary
            data['time'].append(start / fs)
            data['sdrr'].append(hrv_metrics['STD RR/SDNN (ms)'])
            data['rmssd'].append(hrv_metrics['RMSSD (ms)'])
            data['mrr'].append(hrv_metrics['Mean RR (ms)'])
            data['mean_hr'].append(hrv_metrics['Mean HR (beats/min)'])
            data['std_hr'].append(hrv_metrics['STD HR (beats/min)'])
            data['min_hr'].append(hrv_metrics['Min HR (beats/min)'])
            data['max_hr'].append(hrv_metrics['Max HR (beats/min)'])
            # data['nn50'].append(hrv_metrics['NN50'])
            # data['pnn50'].append(hrv_metrics['pNN50 (%)'])
            # data['absolute_power_low'].append(absolute_power_low)
            # data['absolute_power_high'].append(absolute_power_high)
            # data['relative_power_low'].append(relative_power_low)
            # data['relative_power_high'].append(relative_power_high)
            data['label'].append(label)

            plt.close()

    # Process windows 1 hour and 15 minutes to 1 hour before UCO start time and label as -1
    process_windows(start_sample_before, end_sample_before, label=-1)

    # Process windows during UCO and label as 0
    process_windows(start_sample_uco, end_sample_uco, label=0)

    # Process windows after UCO end time and label as 1
    process_windows(start_sample_after, end_sample_after, label=1)

    df = pd.DataFrame(data)
    return df

In [7]:
# THIS BLOCK OF CODE REQUIRES REAL DATA

# Define the parameters
folder_path = r'Matlab code for preprocessing\Fake data\21203'
handled_folder_path = r'result\ECG_handled'
fs = 400  # Sampling frequency
start_time = '09:40:26:003 AM'  # Start time of the data
uco_start_time = '09:41:26.472337 AM'  # UCO start time
uco_end_time = '09:42:26.472337 AM'  # UCO end time

# Make sure the processed folder exists
if not os.path.exists(handled_folder_path):
    os.makedirs(handled_folder_path)

# Only process files in the specific folder
folder_path_current = os.path.join(folder_path)
mat_files = [file for file in os.listdir(folder_path_current) if file.endswith('_ECG.mat')]

for mat_file in mat_files:
    file_path_1 = os.path.join(folder_path_current, mat_file)
    mat_contents = loadmat(file_path_1)
    ecg_data = mat_contents['save_data'].flatten()

    # Capture UCO windows and compute features
    df = capture_uco_windows_ecg(ecg_data, fs, start_time, uco_start_time, uco_end_time)

    # Define new save path
    folder_name = os.path.basename(folder_path)
    new_save_folder = os.path.join(handled_folder_path, folder_name + '_handled')
    if not os.path.exists(new_save_folder):
        os.makedirs(new_save_folder)

    excel_file_name = mat_file[:-4] + '.xlsx'  # Change the extension to .xlsx
    new_excel_file_path = os.path.join(new_save_folder, excel_file_name)

    df.to_excel(new_excel_file_path, index=False)

print("Processing completed! All processed files have been saved.")

time diff start: 60.469337
time diff end: 120.469337
start sample before UCO: 0
end sample before UCO: -1415813
start sample after UCO: 48187
end sample after UCO: 50740
time diff start: 60.469337
time diff end: 120.469337
start sample before UCO: 0
end sample before UCO: -1415813
start sample after UCO: 48187
end sample after UCO: 48080
Processing completed! All processed files have been saved.
