# Preprocessing 03 - Data Format - Transforming the Raw Data into splitted Data

## Importing the libraries

In [1]:
import utils.preparers as preparers
import os
import re
import pandas as pd
from tqdm.notebook import tqdm

## Get all the Participants based on the folders in data/rawData

In [2]:
participants = []
for _dir, sub_dirs, _files in os.walk("./data/rawData"):
    for dir in sub_dirs:
        numbers = re.findall(r'\d+', dir)
        participants.append(int(numbers[0]))
    break

## Move ICA eeg files back to the raw folder and delete everything in the eeg_tmp folder

In [3]:
ica_folder = "./data/eeg_tmp/ica"
raw_folder = "./data/eeg_tmp/raw"

# Move ICA eeg files back to the raw folder
for participant in tqdm(participants):
    participant_folder = "./data/rawData/Participant" + str(participant).zfill(2)
    fdt_file_source = ica_folder + "/eeg_raw_" + str(participant).zfill(2) + ".fdt"
    set_file_source = ica_folder + "/eeg_raw_" + str(participant).zfill(2) + ".set"
    fdt_file_destination = participant_folder + "/eeg_raw_" + str(participant).zfill(2) + ".fdt"
    set_file_destination = participant_folder + "/eeg_raw_" + str(participant).zfill(2) + ".set"
    try:
        # check if the source file exists
        if os.path.exists(fdt_file_source):
            os.remove(fdt_file_destination)
    except:
        pass
    try:
        if os.path.exists(set_file_source):
            os.remove(set_file_destination)
    except:
        pass
    try:
        os.rename(fdt_file_source, fdt_file_destination)
        os.rename(set_file_source, set_file_destination)
    except:
        print("Participant " + str(participant) + " already has the files")

# delete every file in the raw_folder
for _dir, _sub_dirs, _files in os.walk(raw_folder):
    for file in _files:
        os.remove(raw_folder + "/" + file)

  0%|          | 0/39 [00:00<?, ?it/s]

## Delete every psydata and log file in data/rawData

In [4]:
for participant in tqdm(participants):
    current_folder = "./data/rawData/Participant" + str(participant).zfill(2)
    # iterate over all files in the current folder
    for _dir, _sub_dirs, files in os.walk(current_folder):
        for file in files:
            if file.endswith(".psydat") or file.endswith(".log"):
                os.remove(os.path.join(current_folder, file))
                continue
            if file.endswith(".fif"):
                #rename the file
                os.rename(os.path.join(_dir, file), os.path.join(_dir, "eeg_raw.fif"))
                continue
            if file.endswith(".csv"):
                if "experiment_data" in file or "eyetracking" in file:
                    #rename the file
                    os.rename(os.path.join(_dir, file), os.path.join(_dir, "eyetracking_raw.csv"))
                    continue
                else:
                    #rename the file
                    os.rename(os.path.join(_dir, file), os.path.join(_dir, "experiment.csv"))
                    continue

  0%|          | 0/39 [00:00<?, ?it/s]

## Transform all the Data into splitted Data

In [5]:
from contextlib import redirect_stderr, redirect_stdout

columns = ["Participant", "Algorithm", "Subpart", "Behavioral", "StartTime", "EndTime", "Eyetracking", "EEG", "CrossEEG"]
df_filtered = pd.DataFrame(columns=columns)

def rescale(data):
    # Scaling factor (to obtain values in [V], depends on device and settings etc.)
    scaling_factor = 1e-8
    return scaling_factor * data

# Iterate over all participants
for participant in tqdm(participants):
    if participant == 9:
        continue

    # Check if folder exists
    if not os.path.exists("./data/filteredData/Participant" + str(participant).zfill(2)):
        os.makedirs("./data/filteredData/Participant" + str(participant).zfill(2))

    # Load in Raw Data from Input folder
    # disables the stdout and stderr
    with open(os.devnull, 'w') as devnull:
        with redirect_stdout(devnull):
            with redirect_stderr(devnull):
                data = preparers.load_raw(participant, cores=24, logging=True)
    folder_prev = "./data/filteredData/Participant" + str(participant).zfill(2) + "/"

    # save the raw data into splited data for task/input/output
    for algorithm in data.keys():
        # get the answer for the algorithm
        answer = data[algorithm]["Behavioral"]["ChosenAnswer"].array[0]

        # get the eeg data from cross fixation
        cross_eeg = data[algorithm]["Cross"]["EEG"]
        cross_eeg.apply_function(rescale, picks=['eeg'])
        cross_eeg.save(folder_prev + algorithm + "cross_eeg_raw.fif",overwrite=True)
        cross_eeg = folder_prev + algorithm + "cross_eeg_raw.fif"

        # get the start and end time, eyetracking and eeg data
        code_start = data[algorithm]["Behavioral"]["SnippetStart"].array[0]
        code_end = data[algorithm]["Behavioral"]["SnippetStop"].array[0]
        code_eyetracking = data[algorithm]["Code"]["EyeTracking"]
        code_eeg = data[algorithm]["Code"]["EEG"]

        # rescale the eeg data
        code_eeg.apply_function(rescale, picks=['eeg'])

        #save code_eeg to file
        code_eeg.save(folder_prev + algorithm + "code_eeg_raw.fif", overwrite=True)
        code_eeg = folder_prev + algorithm + "code_eeg_raw.fif"

        #save code_eyetracking to file
        code_eyetracking.to_csv(folder_prev + algorithm + "code_eyetracking_raw.csv", index=False)
        code_eyetracking = folder_prev + algorithm + "code_eyetracking_raw.csv"

        # append the data to the dataframe
        df_filtered.loc[len(df_filtered)] = [
            participant, algorithm, "Code", answer,
            code_start, code_end, code_eyetracking, code_eeg,
            cross_eeg]

        input_start = data[algorithm]["Behavioral"]["InputStart"].array[0]
        input_end = data[algorithm]["Behavioral"]["InputStop"].array[0]
        input_eyetracking = data[algorithm]["Input"]["EyeTracking"]
        input_eeg = data[algorithm]["Input"]["EEG"]

        input_eeg.apply_function(rescale, picks=['eeg'])

        #save input_eeg to file
        input_eeg.save(folder_prev + algorithm + "input_eeg_raw.fif", overwrite=True)
        input_eeg = folder_prev + algorithm + "input_eeg_raw.fif"

        #save input_eyetracking to file
        input_eyetracking.to_csv(folder_prev + algorithm + "input_eyetracking_raw.csv", index=False)
        input_eyetracking = folder_prev + algorithm + "input_eyetracking_raw.csv"

        # append the data to the dataframe
        df_filtered.loc[len(df_filtered)] = [
            participant, algorithm, "Input", answer,
            input_start, input_end, input_eyetracking, input_eeg,
            cross_eeg]

        output_start = data[algorithm]["Behavioral"]["OutputStart"].array[0]
        output_end = data[algorithm]["Behavioral"]["OutputStop"].array[0]
        output_eyetracking = data[algorithm]["Output"]["EyeTracking"]
        output_eeg = data[algorithm]["Output"]["EEG"]

        output_eeg.apply_function(rescale, picks=['eeg'])

        #save output_eeg to file
        output_eeg.save(folder_prev + algorithm + "output_eeg_raw.fif",overwrite=True)
        output_eeg = folder_prev + algorithm + "output_eeg_raw.fif"

        #save output_eyetracking to file
        output_eyetracking.to_csv(folder_prev + algorithm + "output_eyetracking_raw.csv", index=False)
        output_eyetracking = folder_prev + algorithm + "output_eyetracking_raw.csv"

        # append the data to the dataframe
        df_filtered.loc[len(df_filtered)] = [
            participant, algorithm, "Output", answer,
            output_start, output_end, output_eyetracking, output_eeg,
            cross_eeg]

df_filtered.to_csv("./data/filteredData/filtered_data.csv", index=False)

  0%|          | 0/39 [00:00<?, ?it/s]