In [1]:
%%capture 
%load_ext autoreload
%autoreload 2
%reset -f

In [2]:
from functions import *

## Data Preprocessing:


In [7]:
# 4 - Not connected, 7 - Cable is loose
chan_name = ['PO3', 'POz', 'PO4', 'PO7', 'O1', 'Oz','PO8', 'O2']

fs, lf, hf = 250, 5, 100 #Hz

target_freq = [6.6, 7.5, 13.2, 15, 19.8, 22.5]

folder = 'data/Participants/'

all_participants = []
# iterate  through all the subfolders in the folder
for subfolder in os.listdir(folder):
    # subfolder total path
    subfolder = os.path.join(folder, subfolder)
    # if not a folder, skip
    if not os.path.isdir(subfolder):
        continue
    eeg_data = process_folder(subfolder, chan_name, extra_name='path', target_freq=target_freq, hf=hf, lf=lf, epoch_length=6, filter=True)
    all_participants.append(eeg_data)

for eeg_data in all_participants:
    for eeg in eeg_data:
        print(eeg.title)

P4: 2604_2
P3: 2604_1
P2: 2504_2
P2: 2504_1
P5: 2704_1
P5: 2704_3
P7: 2408_1
P8: 0305_1
P6: 2804_1
P1: 2404_2
P1: 2404_1


In [8]:
# sort by participant number
all_participants = sorted(all_participants, key=lambda x: int(x[0].title.split(' ')[1]))

# remove channels ['PO7', 'PO8']
chan_to_remove = ['PO7', 'PO8']

for eeg_data in all_participants:
    for eeg in eeg_data:
        eeg.remove_channels(chan_to_remove)

for chan in chan_to_remove:
    if chan in chan_name:
        chan_name.remove(chan)

Original channel names: ['PO3', 'POz', 'PO4', 'PO7', 'O1', 'Oz', 'PO8', 'O2']
Updated channel names: ['PO3', 'POz', 'PO4', 'O1', 'Oz', 'O2']
Original channel names: ['PO3', 'POz', 'PO4', 'PO7', 'O1', 'Oz', 'PO8', 'O2']
Updated channel names: ['PO3', 'POz', 'PO4', 'O1', 'Oz', 'O2']
Original channel names: ['PO3', 'POz', 'PO4', 'PO7', 'O1', 'Oz', 'PO8', 'O2']
Updated channel names: ['PO3', 'POz', 'PO4', 'O1', 'Oz', 'O2']
Original channel names: ['PO3', 'POz', 'PO4', 'PO7', 'O1', 'Oz', 'PO8', 'O2']
Updated channel names: ['PO3', 'POz', 'PO4', 'O1', 'Oz', 'O2']
Original channel names: ['PO3', 'POz', 'PO4', 'PO7', 'O1', 'Oz', 'PO8', 'O2']
Updated channel names: ['PO3', 'POz', 'PO4', 'O1', 'Oz', 'O2']
Original channel names: ['PO3', 'POz', 'PO4', 'PO7', 'O1', 'Oz', 'PO8', 'O2']
Updated channel names: ['PO3', 'POz', 'PO4', 'O1', 'Oz', 'O2']
Original channel names: ['PO3', 'POz', 'PO4', 'PO7', 'O1', 'Oz', 'PO8', 'O2']
Updated channel names: ['PO3', 'POz', 'PO4', 'O1', 'Oz', 'O2']
Original chan

In [9]:
# common average re-referencing to remove common noise sources
for eeg_data in all_participants:
    for eeg in eeg_data:
        eeg.filtered_signal = eeg.filtered_signal - np.mean(eeg.filtered_signal, axis=0)

    # standardize
    for eeg in eeg_data:
        mean_val = np.mean(eeg.filtered_signal, axis=0)
        std_val = np.std(eeg.filtered_signal, axis=0)
        scaled_signal = (eeg.filtered_signal - mean_val) / std_val
        # keep only two decimal places
        eeg.filtered_signal = np.round(scaled_signal, 2)

In [10]:
# Assuming 'eeg_data' is your list of EEG_Data objects
save_folder = 'data/ParticipantsFiltered/'
main_folder = 'data/Participants/'

# Make the folder if it doesn't exist
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

import shutil

# Iterate over each EEG_Data object
for eeg_data in all_participants:
    for eeg in eeg_data:
        # Extract the title from the EEG_Data object
        title = eeg.title
        # split it and keep only the last part
        partic = title[0:2]
        act_title = title.split('_')[-1]

        # Construct the filename
        filename = f'{partic}_{act_title }_ExG.csv'
        filepath = os.path.join(save_folder, filename)

        # Open a new CSV file in write mode
        with open(filepath, 'w', newline='') as csvfile:
            writer = csv.writer(csvfile)

            # Write the header row with channel names
            header = ['TimeStamp'] + eeg.chan_name
            writer.writerow(header)

            # Write the data rows
            for i in range(len(eeg.timestamp)):
                timestamp = eeg.timestamp[i]
                channel_data = eeg.filtered_signal[:, i]
                row = [timestamp] + channel_data.tolist()
                writer.writerow(row)
        
        # Save the markers to the same folder too
        markerpath = os.path.join(main_folder, partic)
        # find all the files that end with Marker.csv
        marker_files = [f for f in os.listdir(markerpath) if f.endswith('_Marker.csv')]
        # save them in the same folder
        for marker_file in marker_files:
            marker_id = marker_file.split('_')[1]
            marker_name = f'{partic}_{marker_id}_Marker.csv' 
            shutil.copy(os.path.join(main_folder, partic, marker_file), os.path.join(save_folder, marker_name))

## Data Segmentation:

In [11]:
def group_data(marker_path, eeg_path):
    # Load marker data
    marker_data = pd.read_csv(marker_path)

    # remove the sw_ prefix from the Code column
    marker_data['Code'] = marker_data['Code'].str.replace('sw_', '')

    # Load EEG data
    eeg_data = pd.read_csv(eeg_path)

    # in eeg_data keep only the rows from the first timestamp to the last timestamp of the marker data
    # Get the first and last time stamps from the marker data
    first_time_stamp = marker_data['TimeStamp'].iloc[0] - 1 # subtract 1 to make sure we get the first time stamp
    last_time_stamp = marker_data['TimeStamp'].iloc[-1] + 1 # add 1 to make sure we get the last time stamp

    # boolean masks for the time stamps in eeg_data
    greater_than_first = eeg_data['TimeStamp'] >= first_time_stamp
    less_than_last = eeg_data['TimeStamp'] <= last_time_stamp

    # keep only time stamps between the first and last time stamps in marker_data
    eeg_data = eeg_data[greater_than_first & less_than_last]

    # Merge EEG data with marker data, add the Code column to the EEG data, based on previous time stamp
    merged_data = pd.merge_asof(eeg_data, marker_data, on='TimeStamp', direction='backward')

    # group the data by Code
    grouped_data = merged_data.groupby('Code')
    
    return grouped_data

def all_grouped_data(data_path):
    # Create an empty list to hold the grouped data and participant names
    grouped_data_list = []
    
    # read the files in the subdirectory
    participant_files = os.listdir(data_path)

    # keep only the csv files
    participant_files = [file_name for file_name in participant_files if file_name.endswith('.csv')]

    # parse the different names of the files
    participant_files = [file_name.split('.')[0] for file_name in participant_files]

    # remove the _Marker and _ExG suffixes
    participant_files = [file_name.replace('_Marker', '').replace('_ExG', '') for file_name in participant_files]

    # remove duplicates
    participant_files = list(set(participant_files))

    for name in participant_files:
        # Load the marker data
        marker_file_path = os.path.join(data_path, f"{name}_Marker.csv")
        # Load the EEG data
        eeg_file_path = os.path.join(data_path, f"{name}_ExG.csv")
        # Group the data
        grouped_data = group_data(marker_file_path, eeg_file_path)
        # add the participant number to the name
        grouped_data_list.append((name, grouped_data))

    # sort the list by participant name
    grouped_data_list.sort(key=lambda x: x[0])
    return grouped_data_list

In [12]:
filtered_data_path = 'data/ParticipantsFiltered'
grouped_data_list = all_grouped_data(filtered_data_path)

In [13]:
# now save each group to a separate file
for name, grouped_data in grouped_data_list:
    # create a directory for the participant
    dir = f"data/Epochs/"
    if not os.path.exists(dir): os.makedirs(dir)

    # save each group to a separate file
    for group_name, group_data in grouped_data:
        # if group_name starts with 2 or 14, skip it
        if group_name.startswith('2') or group_name.startswith('14'): continue
        # without the last column Code
        group_data.drop(columns=['Code'], inplace=True)
        # save the data to a csv file
        group_data.to_csv(f"{dir}/{name}_{group_name}_ExG.csv", index=False)