In [1]:
import os
import pandas as pd
import numpy as np
import neurokit2 as nk
import pyeeg
import warnings
from scipy.io import loadmat
# from eeglib import PFD
from scipy.stats import entropy
from scipy.io import loadmat
import sys
import warnings
from datetime import datetime, timedelta
import matplotlib.pyplot as plt

In [2]:
# Ignore specific FutureWarning
warnings.filterwarnings("ignore", message="`rcond` parameter will change")

def bandpower(data, sf, band, window_sec=None, relative=False):
    """Compute the average power of the signal x in a specific frequency band.

    Parameters
    ----------
    data : 1d-array
        Input signal in the time-domain.
    sf : float
        Sampling frequency of the data.
    band : list
        Lower and upper frequencies of the band of interest.
    window_sec : float
        Length of each window in seconds.
        If None, window_sec = (1 / min(band)) * 2
    relative : boolean
        If True, return the relative power (= divided by the total power of the signal).
        If False (default), return the absolute power.

    Return
    ------
    bp : float
        Absolute or relative band power.
    """
    from scipy.signal import welch
    from scipy.integrate import simpson
    band = np.asarray(band)
    low, high = band

    # Define window length
    if window_sec is not None:
        nperseg = min(int(window_sec * sf), len(data))
    else:
        nperseg = (2 / low) * sf

    # Compute the modified periodogram (Welch)
    freqs, psd = welch(data, sf, nperseg=nperseg)

    # Frequency resolution
    freq_res = freqs[1] - freqs[0]

    # Find closest indices of band in frequency vector
    idx_band = np.logical_and(freqs >= low, freqs <= high)

    # Integral approximation of the spectrum using Simpson's rule.
    bp = simpson(psd[idx_band], dx=freq_res)

    if relative:
        bp /= simpson(psd, dx=freq_res)
    return bp

In [3]:
def capture_uco_windows(eeg_data, fs, start_time, uco_start_time, uco_end_time, additional_file_before=None, additional_start_time_before=None, additional_file_after=None, additional_start_time_after=None, window_len_sec=15):
    """
    Capture UCO time before and after windows and compute EEG features.

    Parameters
    ----------
    eeg_data : array-like
        The EEG signal data.
    fs : int
        Sampling frequency of the EEG data.
    start_time : str
        Start time of the EEG data in format 'HH:MM:SS:%f %p'.
    uco_start_time : str
        UCO start time in format 'HH:MM:SS.%f %p'.
    uco_end_time : str
        UCO end time in format 'HH:MM:SS.%f %p'.
    additional_file_before : str, optional
        Path to the additional file needed for segments before UCO start time.
    additional_start_time_before : str, optional
        Start time of the additional file for segments before UCO start time.
    additional_file_after : str, optional
        Path to the additional file needed for segments after UCO end time.
    additional_start_time_after : str, optional
        Start time of the additional file for segments after UCO end time.
    window_len_sec : int
        Length of each window in seconds.

    Returns
    -------
    df : DataFrame
        DataFrame containing computed features for each window.
    """
    # Convert times to datetime objects
    start_datetime = datetime.strptime(start_time, '%I:%M:%S:%f %p')
    uco_start_datetime = datetime.strptime(uco_start_time, '%I:%M:%S.%f %p')
    uco_end_datetime = datetime.strptime(uco_end_time, '%I:%M:%S.%f %p')

    # Adjust for UCO times that cross midnight
    if uco_start_datetime < start_datetime:
        uco_start_datetime += timedelta(days=1)
    if uco_end_datetime < uco_start_datetime:
        uco_end_datetime += timedelta(days=1)
    
    # Calculate the difference in seconds between the start time and UCO times
    time_diff_start = (uco_start_datetime - start_datetime).total_seconds()
    time_diff_end = (uco_end_datetime - start_datetime).total_seconds()
    
    # Convert the time differences to sample indices
    uco_start_samples = int(time_diff_start * fs)
    uco_end_samples = int(time_diff_end * fs)
    
    # Define window length in samples
    window_len = window_len_sec * fs
    
    # Calculate the start and end sample indices for the desired windows
    start_sample_before = max(0, uco_start_samples - 75 * 60 * fs)
    end_sample_before = uco_start_samples - 60 * 60 * fs
    start_sample_uco = uco_start_samples
    end_sample_uco = uco_end_samples
    start_sample_after = uco_end_samples
    end_sample_after = min(len(eeg_data), uco_end_samples + 60 * window_len)

    print(f"time diff start: {time_diff_start}")
    print(f"time diff end: {time_diff_end}")
    print(f"start sample before UCO: {start_sample_before}")
    print(f"end sample before UCO: {end_sample_before}")
    print(f"start sample after UCO: {start_sample_after}")
    print(f"end sample after UCO: {end_sample_after}")

    data = {
        'time': [],
        'PFD': [],
        'DFA': [],
        'sh_entropy': [],
        'multi_entropy': [],
        'sample_entropy': [],
        'approx_entropy': [],
        'absolute_delta_power': [],
        'relative_delta_power': [],
        'absolute_theta_power': [],
        'relative_theta_power': [],
        'absolute_alpha_power': [],
        'relative_alpha_power': [],
        'absolute_belta_power': [],
        'relative_belta_power': [],
        'label': []
    }

    def process_windows(signal_data, start_sample, end_sample, label):
        for start in range(start_sample, end_sample, window_len):
            end = start + window_len
            if end > len(signal_data):
                break  # Ensure the last window doesn't exceed the data length

            signal = signal_data[start:end]

            # Compute features
            time_scales = np.arange(1, 10)
            mse_values = [nk.entropy_multiscale(signal, scale=np.array([scale]), method='MSEn')[1]['Value'] for scale in time_scales]
            hfd, _ = nk.fractal_higuchi(signal, k_max=20, show=True)
            dfa = pyeeg.dfa(signal)
            sampen, _ = nk.entropy_sample(signal, delay=1, dimension=2)
            apen, _ = nk.entropy_approximate(signal)
            binned = pd.cut(signal, bins=100, labels=False)
            shanen, _ = nk.entropy_shannon(binned)
            absolute_delta_power = bandpower(signal, fs, [0, 3.9], window_len)
            relative_delta_power = bandpower(signal, fs, [0, 3.9], window_len, relative=True)
            absolute_theta_power = bandpower(signal, fs, [4, 7.9], window_len)
            relative_theta_power = bandpower(signal, fs, [4, 7.9], window_len, relative=True)
            absolute_alpha_power = bandpower(signal, fs, [8, 12.9], window_len)
            relative_alpha_power = bandpower(signal, fs, [8, 12.9], window_len, relative=True)
            absolute_belta_power = bandpower(signal, fs, [13, 22], window_len)
            relative_belta_power = bandpower(signal, fs, [13, 22], window_len, relative=True)

            # Append data to the dictionary
            data['time'].append(start / fs)
            data['PFD'].append(hfd)
            data['DFA'].append(dfa)
            data['sh_entropy'].append(shanen)
            data['multi_entropy'].append(mse_values)
            data['sample_entropy'].append(sampen)
            data['approx_entropy'].append(apen)
            data['absolute_delta_power'].append(absolute_delta_power)
            data['relative_delta_power'].append(relative_delta_power)
            data['absolute_theta_power'].append(absolute_theta_power)
            data['relative_theta_power'].append(relative_theta_power)
            data['absolute_alpha_power'].append(absolute_alpha_power)
            data['relative_alpha_power'].append(relative_alpha_power)
            data['absolute_belta_power'].append(absolute_belta_power)
            data['relative_belta_power'].append(relative_belta_power)
            data['label'].append(label)
            
            plt.close()

    # Process windows 1 hour and 15 minutes to 1 hour before UCO start time and label as -1
    if start_sample_before < end_sample_before:
        process_windows(eeg_data, start_sample_before, end_sample_before, label=-1)
    elif additional_file_before and additional_start_time_before:
        additional_data = loadmat(additional_file_before)['save_data'].flatten()
        new_start_datetime_before = datetime.strptime(additional_start_time_before, '%I:%M:%S:%f %p')
        
        # Adjust for additional start time crossing midnight
        if new_start_datetime_before > uco_start_datetime:
            new_start_datetime_before -= timedelta(days=1)
        
        additional_time_diff_start = (uco_start_datetime - new_start_datetime_before).total_seconds() - 75 * 60
        if additional_time_diff_start < 0:
            additional_time_diff_start += 24 * 3600  # Adjust for previous day
    
    additional_start_samples = int(additional_time_diff_start * fs)
    process_windows(additional_data, additional_start_samples, additional_start_samples + 60 * window_len, label=-1)

    
    # Process windows during UCO and label as 0
    process_windows(eeg_data, start_sample_uco, end_sample_uco, label=0)
    
    # Process windows after UCO end time and label as 1
    remaining_duration = len(eeg_data) - start_sample_after
    if remaining_duration >= 15 * 60 * fs:
        process_windows(eeg_data, start_sample_after, end_sample_after, label=1)
    else:
        process_windows(eeg_data, start_sample_after, len(eeg_data), label=1)
        if additional_file_after and additional_start_time_after:
            additional_data = loadmat(additional_file_after)['save_data'][0]
            new_start_datetime_after = datetime.strptime(additional_start_time_after, '%I:%M:%S:%f %p')
            
            # Adjust for additional start time crossing midnight
            if new_start_datetime_after < start_datetime:
                new_start_datetime_after += timedelta(days=1)
            
            additional_start_samples = 0  # Start from the beginning of the additional file
            additional_end_samples = int((15 * 60 * fs - remaining_duration))
            process_windows(additional_data, additional_start_samples, additional_end_samples, label=1)

    df = pd.DataFrame(data)
    return df


In [4]:
# Define the parameters
main_file = r'D:\IEEE Sensor\IEEE Sensor Matlab Data\EEG\21044\Clean Data Channel 2_CUsersvaishDownloadsFetal EEG-20231204T232927Z-001Fetal EEGFYP 2023CODE BACK UPRaw Data2104421044_1-06-2021 9_25_16.2 AM_ UCO file.mat'
handled_folder_path = r'D:\IEEE Sensor\IEEE Sensor Xlsx Data\EEG_handled'
fs = 400  # Sampling frequency
start_time = '09:25:16:2 AM'  # Start time of the data
uco_start_time = '09:54:34.055758 AM'  # UCO start time
uco_end_time = '10:09:31.555758 AM'  # UCO end time

additional_file_before = r'D:\IEEE Sensor\IEEE Sensor Matlab Data\EEG\21044\Clean Data Channel 1_CUsersvaishDownloadsFetal EEG-20231204T232927Z-001Fetal EEGFYP 2023CODE BACK UPRaw Data2104421044_1-06-2021 9_25_16.2 AM_ UCO file.mat'  # Path to additional file for before UCO
additional_start_time_before = '11:52:09:0 PM'  # Start time of the additional file for before UCO
additional_file_after = r'D:\IEEE Sensor\IEEE Sensor Matlab Data\EEG\21126\Clean Data Channel 1_CUsersvhor0002DownloadsCODE BACK UP-20240215T015228Z-001CODE BACK UPRaw Data2112621126_20-08-2021 9_05_07.3 AM.mat'  # Path to additional file for after UCO
additional_start_time_after = '09:05:07:3 AM'  # Start time of the additional file for after UCO

# Make sure the processed folder exists
if not os.path.exists(handled_folder_path):
    os.makedirs(handled_folder_path)

# Load the main file
main_data = loadmat(main_file)
eeg_data = main_data['save_data'][0]

# Capture UCO windows and compute features
df = capture_uco_windows(eeg_data, fs, start_time, uco_start_time, uco_end_time, additional_file_before, additional_start_time_before, additional_file_after, additional_start_time_after)

# Define new save path
folder_name = os.path.basename(os.path.dirname(main_file))
new_save_folder = os.path.join(handled_folder_path, folder_name + '_handled')
if not os.path.exists(new_save_folder):
    os.makedirs(new_save_folder)

excel_file_name = os.path.basename(main_file)[:-4] + '.xlsx'  # Change the extension to .xlsx
new_excel_file_path = os.path.join(new_save_folder, excel_file_name)

df.to_excel(new_excel_file_path, index=False)

print("Processing completed! All processed files have been saved.")

time diff start: 1757.855758
time diff end: 2655.355758
start sample before UCO: 0
end sample before UCO: -736858
start sample after UCO: 1062142
end sample after UCO: 1422142
Processing completed! All processed files have been saved.
