In [14]:
# %% Import packages
import chunk
import mne
import pandas as pd
from tqdm import tqdm
import warnings
import os
import re
import numbers
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [13]:

# %% Extract baseline data

# def extract_baseline_eye_data(path_2_csv_files: str, path_2_save_baseline_file: str, labelsequence: int =1, bad_files: list =[]):
""" 
Extract baseline data from raw EEG file (*.csv) that was obtained from hyperscanning2-redesign experiment \n

Arguments :
    - path_2_csv_files (str) : path to raw EEG file \n
    - path_2_save_baseline_file (str) : path to save extracted baseline file for each condition \n
    - labelsequence (int) : order of pre-defined label sequence, 1 (averted) is default \n
    - bad_files (list) (optional) : file name of raw EEG file, e.g., EEG-S8.csv, that wants to be skipped to process

Return :
    Extracted *.fif (MNE-Python) file for each condition of hand (finger pointing and tracking). 
    There are 6 files in total for each participant.
"""

## Temporary
path_2_csv_files = "/hpc/igum002/codes/Hyperscanning2-redesign/EyeTracker/data/"
path_2_save_baseline_file = "/hpc/igum002/codes/Hyperscanning2-redesign/EyeTracker/data/raw_baseline_eye_data/"
labelsequence = 1
bad_files=[]

### 

list_file_names = []
full_path_2_each_file = []
# bad_files=["EEG-S8.csv"] # Hard coded for now because bad EEG file is inside a folder of EEG. Remove this later

for (root, dirs, file) in os.walk(path_2_csv_files):
    for f in file:

        if (f in bad_files):
            
            # Skip the bad file to be processed
            print(f"Skipped bad file : {f}")
            continue

        else:
            # Populate all file names only
            list_file_names.append(f)
            list_file_names.sort()

            # Populate all full paths of each filename
            full_path_2_each_file.append(os.path.join(root, f))
            full_path_2_each_file.sort()

        

    # Iterate all file names

    for i in tqdm(range(len(full_path_2_each_file)), desc="In progress"):

        try:
            labelsequence = int(labelsequence)

        except IOError as err_filename:
            print("The format of file name is not correct or file doesn't exist \nThe format must be 'EEG-Sx.csv' , x=subject number ")
            raise
        except ValueError as err_integer:
            print("The labelsequence input must be integer : ", err_integer)
            raise

        else:
            if  labelsequence < 1 or labelsequence > 12:
                print("The value for labelsequence parameter is out of range. It must be be between 1 and 12")
                raise IndexError
            else:

                # Load the data
                fileName = full_path_2_each_file[i]
                print("Processing file : " + list_file_names[i])

                # Read each file by using pandas
                df = pd.read_csv(fileName, delimiter=',')
                # Define columns for raw csv file
                # df.columns = ['Index', 'FP1', 'FP2', 'F7', 'F3', 'F4', 'F8', 'T7', 'C3', 'C4', 'T8', 'P7', 'P3', 'P4', 'P8', 'O1',
                #                 'O2', 'X1', 'X2', 'X3', 'X4', 'X5',
                #                 'X6', 'X7', 'X8', 'X9', 'X10', 'X11', 'X12', 'X13', 'X14', 'Marker']
                
                
                # Replace all markers of "BEGIN*" and "END*" with 9999999
                
                # TODO: Do this

                # Check if the value is numeric ?
                    # Yes => Leave it as it is
                    # No => check if BEGIN is contained

                df['UnixTimeStamp'] = df.UnixTimeStamp.apply(lambda x: '9999999' if "BEGIN" in x else x)
                df['UnixTimeStamp'] = df.UnixTimeStamp.apply(lambda x: '9999999' if "END" in x else x)

                # Turn the UnixTimeStamp column into a list (we need the marker later on)
                markers = df['UnixTimeStamp'].tolist()
                
                #   Find all experimental markers and print them out.
                indicesOfMarkers = []  # Empty list to contain indices of markers
                for i, c in enumerate(markers):
                    if "9999999" in str(c) : 
                        indicesOfMarkers.append(i) 
                try:
                    number_markers = len(indicesOfMarkers)
                    if number_markers != 48:   # check if the number of markers = 48
                        raise ValueError("The {} file has incorrect number of markers : {} ! It MUST be 48".format(fileName,number_markers))
                except ValueError as err_unmatch_markers:
                    print(err_unmatch_markers)
                    raise


                # Create a list of labels for baseline data. We used only averted eye condition in UNITY.
                # It actually does not matter for different eye condition because participant only sees a white screen during the baseline condition)

                # Order = 1 (Averted) Odd subject no. For example, 1, 3, 5, etc.
                oddOrder1 = ["averted_pre_right_point", "averted_pre_left_point", "averted_left_tracking", "averted_right_tracking",
                                "averted_post_right_point", "averted_post_left_point"]

                # Order = 1 (Averted) Even subject no. For example, 2, 4, 6, etc.
                evenOrder1 = ["averted_pre_left_point", "averted_pre_right_point", "averted_right_tracking", "averted_left_tracking",
                                "averted_post_left_point", "averted_post_right_point"]


                # Put all labels into a list for baseline data
                listOfOrders = []
                listOfOrders.append(oddOrder1)
                listOfOrders.append(evenOrder1)

                # Number that is used to take the label (oddOrder1 atau evenOrder1)
                i_label_taker = 0

                if i % 2 == 0:

                    # Even number
                    i_label_taker = 0

                else:

                    # Odd number
                    i_label_taker = 1
                
                chosenOrder = listOfOrders[i_label_taker]


                # Get the first 12 markers' indices and extract the data
                indicesofBaselineMarkers = indicesOfMarkers[:13]

                # Get the 1st and 12th index and chunk dataframe based on those indices, and convert it into numpy array
                # For some data, it can be 13 markers after being extracted because when we combined the data the markers of beginning are right after the closing marker


                # Chunk the data based on opening and closing markers 
                chunkedData = []
                for i in range(0, 12, 2):

                    # chunkedData.append(df.iloc[indicesofBaselineMarkers[i] : indicesofBaselineMarkers[i+1], 1:17].to_numpy() * 1e-6)
                    chunkedData.append(df.iloc[indicesofBaselineMarkers[i] : indicesofBaselineMarkers[i+1], :])


                # Load each baseline file into MNE Python (averted eye condition only for baseline)

                # # Create 16 channels montage 10-20 international standard
                # montage = mne.channels.make_standard_montage('standard_1020')

                # # Pick only 16 channels that are used in Cyton+Daisy OpenBCI
                # # Create info
                # ch_names = ['FP1', 'Fp2', 'F7', 'F3', 'F4', 'F8', 'T7', 'C3', 'C4', 'T8', 'P7', 'P3', 'P4', 'P8', 'O1', 'O2']
                # ch_types = ['eeg'] * 16
                # info = mne.create_info(
                #     ch_names=ch_names,
                #     sfreq=125,
                #     ch_types=ch_types)
                # info.set_montage('standard_1020', match_case=False)
            
                # Match pattern EEG-Sx (x = any number)
                regex = r"\D{10}-S\d+"

                # Create filename that will be used for each condition. There are 6 conditions. See oddOrder1 or evenOrder1
                extracted_file_name_4_baseline = []
                for i in chosenOrder:
                    extracted_file_name = re.search(regex,fileName)
                    extracted_file_name_4_baseline.append(fileName[extracted_file_name.start() : extracted_file_name.end()] + "-" + i + "_raw.fif")


                # Save the chunkedData into a separate csv file
                for i, val in tqdm(enumerate(chunkedData), desc = "Saving process..."):
                    
                    # Convert array into dataframe
                    df_chunkedData = pd.DataFrame(val)

                    # Save dataframe into csv
                    os.chdir(path_2_save_baseline_file)
                    df_chunkedData.to_csv(extracted_file_name_4_baseline[i],sep= (","))
                    # chunkedData[i].to_csv(os.chdir(path_2_save_baseline_file), extracted_file_name_4_baseline[i])
                    # os.chdir(path_2_save_baseline_file)
                    # chunkedData.tofile(extracted_file_name_4_baseline[i], sep = "," )


                # for i, val in tqdm(enumerate(chunkedData), desc = "Saving process..."):
                #     # Load data into MNE-Python
                #     baseline_data_needs_label = mne.io.RawArray(val.transpose(), info, verbose=False)
                #     # Define a folder where we want to save the baseline data
                #     os.chdir(path_2_save_baseline_file) 
                #     # Save the data in MNE format
                #     baseline_data_needs_label.save(extracted_file_name_4_baseline[i], overwrite=True)


print(f"All baseline files have been saved in fif format in this path {path_2_save_baseline_file}")

In progress:   0%|                                        | 0/1 [00:00<?, ?it/s]

Processing file : EyeTracker-S1.csv


Saving process...: 6it [00:00, 28.38it/s]
In progress: 100%|████████████████████████████████| 1/1 [00:00<00:00,  2.91it/s]
In progress:   0%|                                        | 0/7 [00:00<?, ?it/s]

Processing file : EyeTracker-S1-averted_left_tracking_raw.fif


Saving process...: 6it [00:00, 27.38it/s]
In progress:  14%|████▌                           | 1/7 [00:00<00:02,  2.80it/s]

Processing file : EyeTracker-S1-averted_post_left_point_raw.fif
Unexpected exception formatting exception. Falling back to standard exception



Traceback (most recent call last):
  File "/hpc/igum002/environments/hyperscanning2_redesign_new/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3398, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_3487705/3414347509.py", line 84, in <cell line: 30>
    df['UnixTimeStamp'] = df.UnixTimeStamp.apply(lambda x: '9999999' if "BEGIN" in x else x)
  File "/hpc/igum002/environments/hyperscanning2_redesign_new/lib/python3.8/site-packages/pandas/core/series.py", line 4433, in apply
    return SeriesApply(self, func, convert_dtype, args, kwargs).apply()
  File "/hpc/igum002/environments/hyperscanning2_redesign_new/lib/python3.8/site-packages/pandas/core/apply.py", line 1088, in apply
    return self.apply_standard()
  File "/hpc/igum002/environments/hyperscanning2_redesign_new/lib/python3.8/site-packages/pandas/core/apply.py", line 1143, in apply_standard
    mapped = lib.map_infer(
  File "pandas/_libs/lib.pyx", line 2870, in pandas.

In [None]:
# %% Testing extract_baseline_eye_data function
path_2_csv_files = "/hpc/igum002/codes/Hyperscanning2-redesign/EyeTracker/data/"
path_2_save_files = "/hpc/igum002/codes/Hyperscanning2-redesign/EyeTracker/data/raw_baseline_eye_data/"

extract_baseline_eye_data(path_2_csv_files, path_2_save_files)