# Preprocessing 03 - Data Format - Transforming the Raw Data into splitted Data

## Importing the libraries

In [1]:
import utils.preparers as preparers
import os
import re
import pandas as pd
from tqdm.notebook import tqdm

## Get all the Participants based on the folders in data/rawData

In [2]:
participants = []
for _dir, sub_dirs, _files in os.walk("./data/rawData"):
    for dir in sub_dirs:
        numbers = re.findall(r'\d+', dir)
        participants.append(int(numbers[0]))
    break

## Move ICA eeg files back to the raw folder and delete everything in the eeg_tmp folder

In [3]:
ica_folder = "./data/eeg_tmp/ica"
raw_folder = "./data/eeg_tmp/raw"

# Move ICA eeg files back to the raw folder
for participant in tqdm(participants):
    participant_folder = "./data/rawData/Participant" + str(participant).zfill(2)
    fdt_file_source = ica_folder + "/eeg_raw_" + str(participant).zfill(2) + ".fdt"
    set_file_source = ica_folder + "/eeg_raw_" + str(participant).zfill(2) + ".set"
    fdt_file_destination = participant_folder + "/eeg_raw_" + str(participant).zfill(2) + ".fdt"
    set_file_destination = participant_folder + "/eeg_raw_" + str(participant).zfill(2) + ".set"
    try:
        os.rename(fdt_file_source, fdt_file_destination)
        os.rename(set_file_source, set_file_destination)
    except:
        print("Participant " + str(participant) + " already has the files")

# delete every file in the raw_folder
for _dir, _sub_dirs, _files in os.walk(raw_folder):
    for file in _files:
        os.remove(raw_folder + "/" + file)

  0%|          | 0/38 [00:00<?, ?it/s]

Participant 1 already has the files
Participant 2 already has the files
Participant 3 already has the files
Participant 4 already has the files
Participant 5 already has the files
Participant 6 already has the files
Participant 7 already has the files
Participant 10 already has the files
Participant 11 already has the files
Participant 12 already has the files
Participant 13 already has the files
Participant 14 already has the files
Participant 18 already has the files
Participant 22 already has the files
Participant 24 already has the files
Participant 25 already has the files
Participant 28 already has the files
Participant 35 already has the files
Participant 36 already has the files
Participant 37 already has the files
Participant 38 already has the files
Participant 41 already has the files
Participant 42 already has the files
Participant 46 already has the files
Participant 49 already has the files
Participant 50 already has the files
Participant 55 already has the files
Particip

## Delete every psydata and log file in data/rawData

In [4]:
for participant in participants:
    print("Reformatting participant {}".format(participant))
    current_folder = "./data/rawData/Participant" + str(participant).zfill(2)
    # iterate over all files in the current folder
    for _dir, _sub_dirs, files in os.walk(current_folder):
        for file in files:
            if file.endswith(".psydat") or file.endswith(".log"):
                os.remove(os.path.join(current_folder, file))
                continue
            if file.endswith(".fif"):
                #rename the file
                os.rename(os.path.join(_dir, file), os.path.join(_dir, "eeg_raw.fif"))
                continue
            if file.endswith(".csv"):
                if "experiment_data" in file or "eyetracking" in file:
                    #rename the file
                    os.rename(os.path.join(_dir, file), os.path.join(_dir, "eyetracking_raw.csv"))
                    continue
                else:
                    #rename the file
                    os.rename(os.path.join(_dir, file), os.path.join(_dir, "experiment.csv"))
                    continue

Reformatting participant 1
Reformatting participant 2
Reformatting participant 3
Reformatting participant 4
Reformatting participant 5
Reformatting participant 6
Reformatting participant 7
Reformatting participant 10
Reformatting participant 11
Reformatting participant 12
Reformatting participant 13
Reformatting participant 14
Reformatting participant 18
Reformatting participant 22
Reformatting participant 24
Reformatting participant 25
Reformatting participant 28
Reformatting participant 35
Reformatting participant 36
Reformatting participant 37
Reformatting participant 38
Reformatting participant 41
Reformatting participant 42
Reformatting participant 46
Reformatting participant 49
Reformatting participant 50
Reformatting participant 55
Reformatting participant 58
Reformatting participant 59
Reformatting participant 60
Reformatting participant 61
Reformatting participant 62
Reformatting participant 63
Reformatting participant 66
Reformatting participant 67
Reformatting participant 68

## Transform all the Data into splitted Data

In [22]:
columns = ["Participant", "Algorithm", "Subpart", "Behavioral", "StartTime", "EndTime", "Eyetracking", "EEG"]
df_filtered = pd.DataFrame(columns=columns)

def rescale(data):
    # Scaling factor (to obtain values in [V], depends on device and settings etc.)
    scaling_factor = 1e-8
    return scaling_factor * data

# Iterate over all participants
for participant in tqdm(participants):
    print("Participant #" + str(participant))

    # Check if folder exists
    if not os.path.exists("./data/filteredData/Participant" + str(participant).zfill(2)):
        os.makedirs("./data/filteredData/Participant" + str(participant).zfill(2))

    # Load in Raw Data from Input folder
    data = preparers.load_raw(participant, cores=24, logging=True)
    folder_prev = "./data/filteredData/Participant" + str(participant).zfill(2) + "/"

    # save the raw data into splited data for task/input/output
    for algorithm in data.keys():
        # get the answer for the algorithm
        answer = data[algorithm]["Behavioral"]["ChosenAnswer"].array[0]

        # get the start and end time, eyetracking and eeg data
        code_start = data[algorithm]["Behavioral"]["SnippetStart"].array[0]
        code_end = data[algorithm]["Behavioral"]["SnippetStop"].array[0]
        code_eyetracking = data[algorithm]["Code"]["EyeTracking"]
        code_eeg = data[algorithm]["Code"]["EEG"]

        # rescale the eeg data
        code_eeg.apply_function(rescale, picks=['eeg'])

        #save code_eeg to file
        code_eeg.save(folder_prev + algorithm + "code_eeg_raw.fif", overwrite=True)
        code_eeg = folder_prev + algorithm + "code_eeg_raw.fif"

        #save code_eyetracking to file
        code_eyetracking.to_csv(folder_prev + algorithm + "code_eyetracking_raw.csv", index=False)
        code_eyetracking = folder_prev + algorithm + "code_eyetracking_raw.csv"

        # append the data to the dataframe
        df_filtered = df_filtered.append({
            "Participant": participant,
            "Algorithm": algorithm,
            "Subpart": "Code",
            "Behavioral": answer,
            "StartTime": code_start,
            "EndTime": code_end,
            "Eyetracking": code_eyetracking,
            "EEG": code_eeg
        }, ignore_index=True)

        input_start = data[algorithm]["Behavioral"]["InputStart"].array[0]
        input_end = data[algorithm]["Behavioral"]["InputStop"].array[0]
        input_eyetracking = data[algorithm]["Input"]["EyeTracking"]
        input_eeg = data[algorithm]["Input"]["EEG"]

        input_eeg.apply_function(rescale, picks=['eeg'])

        #save input_eeg to file
        input_eeg.save(folder_prev + algorithm + "input_eeg_raw.fif", overwrite=True)
        input_eeg = folder_prev + algorithm + "input_eeg_raw.fif"

        #save input_eyetracking to file
        input_eyetracking.to_csv(folder_prev + algorithm + "input_eyetracking_raw.csv", index=False)
        input_eyetracking = folder_prev + algorithm + "input_eyetracking_raw.csv"

        # append the data to the dataframe
        df_filtered = df_filtered.append({
            "Participant": participant,
            "Algorithm": algorithm,
            "Subpart": "Input",
            "Behavioral": answer,
            "StartTime": input_start,
            "EndTime": input_end,
            "Eyetracking": input_eyetracking,
            "EEG": input_eeg
        }, ignore_index=True)

        output_start = data[algorithm]["Behavioral"]["OutputStart"].array[0]
        output_end = data[algorithm]["Behavioral"]["OutputStop"].array[0]
        output_eyetracking = data[algorithm]["Output"]["EyeTracking"]
        output_eeg = data[algorithm]["Output"]["EEG"]

        output_eeg.apply_function(rescale, picks=['eeg'])

        #save output_eeg to file
        output_eeg.save(folder_prev + algorithm + "output_eeg_raw.fif",overwrite=True)
        output_eeg = folder_prev + algorithm + "output_eeg_raw.fif"

        #save output_eyetracking to file
        output_eyetracking.to_csv(folder_prev + algorithm + "output_eyetracking_raw.csv", index=False)
        output_eyetracking = folder_prev + algorithm + "output_eyetracking_raw.csv"


        # append the data to the dataframe
        df_filtered = df_filtered.append({
            "Participant": participant,
            "Algorithm": algorithm,
            "Subpart": "Output",
            "Behavioral": answer,
            "StartTime": output_start,
            "EndTime": output_end,
            "Eyetracking": output_eyetracking,
            "EEG": output_eeg
        }, ignore_index=True)

df_filtered.to_csv("./data/filteredData/filtered_data.csv", index=False)

Participant #1
(01/10) Construct Paths
(02/10) Read Eye Tracker Data
(03/10) Transform Eye Tracker Data
(04/10) Normalize Eye Tracker Time
(05/10) Read EEG Data
(06/10) Construct Events from EEG Data
(07/10) Read PsychoPy Data
(08/10) Transform PsychoPy Data
(9/10) Normalize PsychoPy Time
(10/10) Transform All Data to Dictionary
Participant #2
(01/10) Construct Paths
(02/10) Read Eye Tracker Data
(03/10) Transform Eye Tracker Data
(04/10) Normalize Eye Tracker Time
(05/10) Read EEG Data
(06/10) Construct Events from EEG Data
(07/10) Read PsychoPy Data
(08/10) Transform PsychoPy Data
(9/10) Normalize PsychoPy Time
(10/10) Transform All Data to Dictionary
Participant #3
(01/10) Construct Paths
(02/10) Read Eye Tracker Data
(03/10) Transform Eye Tracker Data
(04/10) Normalize Eye Tracker Time
(05/10) Read EEG Data
(06/10) Construct Events from EEG Data
(07/10) Read PsychoPy Data
(08/10) Transform PsychoPy Data
(9/10) Normalize PsychoPy Time
(10/10) Transform All Data to Dictionary
Partici