In [1]:
import os
import pandas as pd
import numpy as np
import neurokit2 as nk
import pyeeg
import warnings
from scipy.io import loadmat
# from eeglib import PFD
from scipy.stats import entropy
from scipy.io import loadmat
import sys
import warnings
from datetime import datetime, timedelta
import matplotlib.pyplot as plt

In [2]:
def bandpower(data, sf, band, window_sec=None, relative=False):
    """Compute the average power of the signal x in a specific frequency band.

    Parameters
    ----------
    data : 1d-array
        Input signal in the time-domain.
    sf : float
        Sampling frequency of the data.
    band : list
        Lower and upper frequencies of the band of interest.
    window_sec : float
        Length of each window in seconds.
        If None, window_sec = (1 / min(band)) * 2
    relative : boolean
        If True, return the relative power (= divided by the total power of the signal).
        If False (default), return the absolute power.

    Return
    ------
    bp : float
        Absolute or relative band power.
    """
    from scipy.signal import welch
    from scipy.integrate import simpson
    band = np.asarray(band)
    low, high = band

    # Define window length
    if window_sec is not None:
        nperseg = min(int(window_sec * sf), len(data))
    else:
        nperseg = (2 / low) * sf

    # Compute the modified periodogram (Welch)
    freqs, psd = welch(data, sf, nperseg=nperseg)

    # Frequency resolution
    freq_res = freqs[1] - freqs[0]

    # Find closest indices of band in frequency vector
    idx_band = np.logical_and(freqs >= low, freqs <= high)

    # Integral approximation of the spectrum using Simpson's rule.
    bp = simpson(psd[idx_band], dx=freq_res)

    if relative:
        bp /= simpson(psd, dx=freq_res)
    return bp

In [3]:
# Ignore specific FutureWarning
warnings.filterwarnings("ignore", message="`rcond` parameter will change")

def capture_uco_windows(eeg_data, fs, start_time, uco_start_time, uco_end_time, window_len_sec=15):
    """
    Capture UCO time before and after windows and compute EEG features.

    Parameters
    ----------
    eeg_data : array-like
        The EEG signal data.
    fs : int
        Sampling frequency of the EEG data.
    start_time : str
        Start time of the EEG data in format 'HH:MM:SS:%f %p'.
    uco_start_time : str
        UCO start time in format 'HH:MM:SS.%f %p'.
    uco_end_time : str
        UCO end time in format 'HH:MM:SS.%f %p'.
    window_len_sec : int
        Length of each window in seconds.

    Returns
    -------
    df : DataFrame
        DataFrame containing computed features for each window.
    """
    # Convert times to datetime objects
    start_datetime = datetime.strptime(start_time, '%I:%M:%S:%f %p')
    uco_start_datetime = datetime.strptime(uco_start_time, '%I:%M:%S.%f %p')
    uco_end_datetime = datetime.strptime(uco_end_time, '%I:%M:%S.%f %p')

    # Adjust for UCO times that cross midnight
    if uco_start_datetime < start_datetime:
        uco_start_datetime += timedelta(days=1)
    if uco_end_datetime < uco_start_datetime:
        uco_end_datetime += timedelta(days=1)

    # Calculate the difference in seconds between the start time and UCO times
    time_diff_start = (uco_start_datetime - start_datetime).total_seconds()
    time_diff_end = (uco_end_datetime - start_datetime).total_seconds()

    # Convert the time differences to sample indices
    uco_start_samples = int(time_diff_start * fs)
    uco_end_samples = int(time_diff_end * fs)

    # Define window length in samples
    window_len = window_len_sec * fs

    # Calculate the start and end sample indices for the desired windows
    start_sample_before = max(0, uco_start_samples - 75 * 60 * fs)
    end_sample_before = uco_start_samples - 60 * 60 * fs
    start_sample_uco = uco_start_samples
    end_sample_uco = uco_end_samples
    start_sample_after = uco_end_samples
    end_sample_after = min(len(eeg_data), uco_end_samples + 60 * window_len)

    print(f"time diff start: {time_diff_start}")
    print(f"time diff end: {time_diff_end}")
    print(f"start sample before UCO: {start_sample_before}")
    print(f"end sample before UCO: {end_sample_before}")
    print(f"start sample after UCO: {start_sample_after}")
    print(f"end sample after UCO: {end_sample_after}")

    data = {
        'time': [],
        'PFD': [],
        'DFA': [],
        'sh_entropy': [],
        'multi_entropy': [],
        'sample_entropy': [],
        'approx_entropy': [],
        'absolute_delta_power': [],
        'relative_delta_power': [],
        'absolute_theta_power': [],
        'relative_theta_power': [],
        'absolute_alpha_power': [],
        'relative_alpha_power': [],
        'absolute_belta_power': [],
        'relative_belta_power': [],
        'label': []
    }

    def process_windows(start_sample, end_sample, label):
        for start in range(start_sample, end_sample, window_len):
            end = start + window_len
            if end > len(eeg_data):
                break  # Ensure the last window doesn't exceed the data length

            signal = eeg_data[start:end]
            print(signal)

            # Compute features
            time_scales = np.arange(1, 10)
            mse_values = [nk.entropy_multiscale(signal, scale=np.array([scale]), method='MSEn')[1]['Value'] for scale in time_scales]
            hfd, _ = nk.fractal_higuchi(signal, k_max=20, show=True)
            dfa = pyeeg.dfa(signal)
            sampen, _ = nk.entropy_sample(signal, delay=1, dimension=2)
            apen, _ = nk.entropy_approximate(signal)
            binned = pd.cut(signal, bins=100, labels=False)
            shanen, _ = nk.entropy_shannon(binned)
            absolute_delta_power = bandpower(signal, fs, [0, 3.9], window_len)
            relative_delta_power = bandpower(signal, fs, [0, 3.9], window_len, relative=True)
            absolute_theta_power = bandpower(signal, fs, [4, 7.9], window_len)
            relative_theta_power = bandpower(signal, fs, [4, 7.9], window_len, relative=True)
            absolute_alpha_power = bandpower(signal, fs, [8, 12.9], window_len)
            relative_alpha_power = bandpower(signal, fs, [8, 12.9], window_len, relative=True)
            absolute_belta_power = bandpower(signal, fs, [13, 22], window_len)
            relative_belta_power = bandpower(signal, fs, [13, 22], window_len, relative=True)

            # Append data to the dictionary
            data['time'].append(start / fs)
            data['PFD'].append(hfd)
            data['DFA'].append(dfa)
            data['sh_entropy'].append(shanen)
            data['multi_entropy'].append(mse_values)
            data['sample_entropy'].append(sampen)
            data['approx_entropy'].append(apen)
            data['absolute_delta_power'].append(absolute_delta_power)
            data['relative_delta_power'].append(relative_delta_power)
            data['absolute_theta_power'].append(absolute_theta_power)
            data['relative_theta_power'].append(relative_theta_power)
            data['absolute_alpha_power'].append(absolute_alpha_power)
            data['relative_alpha_power'].append(relative_alpha_power)
            data['absolute_belta_power'].append(absolute_belta_power)
            data['relative_belta_power'].append(relative_belta_power)
            data['label'].append(label)

            plt.close()

    # Process windows 1 hour and 15 minutes to 1 hour before UCO start time and label as -1
    process_windows(start_sample_before, end_sample_before, label=-1)

    # Process windows during UCO and label as 0
    process_windows(start_sample_uco, end_sample_uco, label=0)

    # Process windows after UCO end time and label as 1
    process_windows(start_sample_after, end_sample_after, label=1)

    df = pd.DataFrame(data)
    return df

In [4]:
# THIS BLOCK OF CODE REQUIRES REAL DATA

# Define the parameters
folder_path = r'Matlab code for preprocessing\Fake data\21203'
handled_folder_path = r'result\EEG_handled'
fs = 400  # Sampling frequency
start_time = '09:40:26:003 AM'  # Start time of the data
uco_start_time = '09:41:26.472337 AM'  # UCO start time
uco_end_time = '09:42:26.472337 AM'  # UCO end time

# Make sure the processed folder exists
if not os.path.exists(handled_folder_path):
    os.makedirs(handled_folder_path)

# Only process files in the specific folder
folder_path_current = os.path.join(folder_path)
mat_files = [file for file in os.listdir(folder_path_current) if file.endswith('_EEG.mat')]

for mat_file in mat_files:
    file_path_1 = os.path.join(folder_path_current, mat_file)
    mat_contents = loadmat(file_path_1)
    eeg_data = mat_contents['save_data'][0]

    # Capture UCO windows and compute features
    df = capture_uco_windows(eeg_data, fs, start_time, uco_start_time, uco_end_time)

    # Define new save path
    folder_name = os.path.basename(folder_path)
    new_save_folder = os.path.join(handled_folder_path, folder_name + '_handled')
    if not os.path.exists(new_save_folder):
        os.makedirs(new_save_folder)

    excel_file_name = mat_file[:-4] + '.xlsx'  # Change the extension to .xlsx
    new_excel_file_path = os.path.join(new_save_folder, excel_file_name)

    df.to_excel(new_excel_file_path, index=False)

print("Processing completed! All processed files have been saved.")

time diff start: 60.469337
time diff end: 120.469337
start sample before UCO: 0
end sample before UCO: -1415813
start sample after UCO: 48187
end sample after UCO: 50740
[-0.80917275  0.67860123  0.57790336 ...  1.07102821 -0.58824629
 -0.4421701 ]
[-0.46309687  1.13460383  0.53304773 ...  1.09824158  1.26055277
 -0.24306128]
[-0.20544732  0.82384104  0.76092403 ... -0.60180441  0.13876955
  0.77867355]
[ 0.52217014  0.01093885 -0.1292523  ... -0.04716501  0.47963788
 -0.96395104]
time diff start: 60.469337
time diff end: 120.469337
start sample before UCO: 0
end sample before UCO: -1415813
start sample after UCO: 48187
end sample after UCO: 48080
[-0.69624522 -1.16385559 -1.28682364 ...  0.16502097 -0.07655305
  0.07586903]
[-0.29955766 -0.44630872  0.36473362 ... -0.97746614 -0.72631644
  1.36680507]
[ 0.35945915 -1.13800527 -0.96293992 ...  0.50971707  0.11318775
  0.31180629]
Processing completed! All processed files have been saved.
