<h1>Merge DataFrame Together To Useful Dataformat</h1>

<h2>Imports And Makros</h2>

In [None]:
import os

import mne
i
mne.set_log_level("WARNING")

import re
from multiprocessing import Pool

import dask.dataframe as dd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import swifter
from psychopy.misc import fromFile

%matplotlib widget

<h2>Potential Function Arguments</h2>

In [2]:
participant_number = 1
questionaire_path = "./rawData/Questionnaire.xlsx"
participant_folder = "./rawData/Participant" + str(participant_number).zfill(2) + "/"
eyetracking_path = participant_folder + "experiment_data.csv"
eeg_path = participant_folder
psychopy_csv_path = participant_folder + "data/"
psychopy_log_path = participant_folder + "data/"
for (dirpath, dirnames, filenames) in os.walk(psychopy_csv_path):
    for file in filenames:
        _file, ext = os.path.splitext(file)
        if ext == ".csv":
            psychopy_csv_path += file
        if ext == ".log":
            psychopy_log_path += file
for (dirpath, dirnames, filenames) in os.walk(participant_folder):
    for file in filenames:
        _file, ext = os.path.splitext(file)
        if ext == ".fif":
            eeg_path += file

<h2>Get Data From participant 1</h2>

In [3]:
df_questionaire = pd.read_excel(questionaire_path)
df_questionaire = df_questionaire.transpose()
df_questionaire.columns = df_questionaire.iloc[0]
df_questionaire = df_questionaire.iloc[1:]
df_questionaire = df_questionaire.loc[participant_number]

<h2>Read Eyetracking Data</h2>

In [4]:
df_eyetracking = pd.read_csv(eyetracking_path, header=None, sep=";")

In [5]:
three_extractor_compiled = re.compile("\((.*), (.*), (.*)\)")
two_extractor_compiled = re.compile("\((.*), (.*)\)")


def three_extractor(value):
    pattern = three_extractor_compiled.match(value)
    return float(pattern.group(1)), float(pattern.group(2)), float(pattern.group(3))


def two_extractor(value):
    pattern = two_extractor_compiled.match(value)
    return float(pattern.group(1)), float(pattern.group(2))


meta_type = dd.utils.make_meta(0.0)
ddf_eyetracking = dd.from_pandas(df_eyetracking, npartitions=2)
df_0 = pd.DataFrame(
    ddf_eyetracking.apply(lambda x: three_extractor(x[0]), meta=meta_type, axis=1).compute().transpose().tolist(),
    columns=["l_gaze_point_in_user_coordinate_system_x", "l_gaze_point_in_user_coordinate_system_y", "l_gaze_point_in_user_coordinate_system_z"],
)
df_1 = pd.DataFrame(ddf_eyetracking[1].compute().transpose().tolist(), columns=["l_valid"])
df_2 = pd.DataFrame(ddf_eyetracking[2].compute().transpose().tolist(), columns=["r_valid"])
df_3 = pd.DataFrame(
    ddf_eyetracking.apply(lambda x: three_extractor(x[3]), meta=meta_type, axis=1).compute().transpose().tolist(),
    columns=["r_gaze_point_in_user_coordinate_system_x", "r_gaze_point_in_user_coordinate_system_y", "r_gaze_point_in_user_coordinate_system_z"],
)
df_4 = pd.DataFrame(
    ddf_eyetracking.apply(lambda x: three_extractor(x[4]), meta=meta_type, axis=1).compute().transpose().tolist(),
    columns=["l_gaze_origin_in_user_coordinate_system_x", "l_gaze_origin_in_user_coordinate_system_y", "l_gaze_origin_in_user_coordinate_system_z"],
)
df_5 = pd.DataFrame(
    ddf_eyetracking.apply(lambda x: three_extractor(x[5]), meta=meta_type, axis=1).compute().transpose().tolist(),
    columns=["r_gaze_origin_in_user_coordinate_system_x", "r_gaze_origin_in_user_coordinate_system_y", "r_gaze_origin_in_user_coordinate_system_z"],
)
df_6 = pd.DataFrame(ddf_eyetracking.apply(lambda x: two_extractor(x[6]), meta=meta_type, axis=1).compute().transpose().tolist(), columns=["l_display_x", "r_display_x"])
df_7 = pd.DataFrame(ddf_eyetracking.apply(lambda x: two_extractor(x[7]), meta=meta_type, axis=1).compute().transpose().tolist(), columns=["r_display_x", "r_display_y"])
df_8 = pd.DataFrame(ddf_eyetracking[8].compute().transpose().tolist(), columns=["time"])
df_9 = pd.DataFrame(ddf_eyetracking[9].compute().transpose().tolist(), columns=["l_pupil_diameter"])
df_10 = pd.DataFrame(ddf_eyetracking[10].compute().transpose().tolist(), columns=["r_pupil_diameter"])
df_eyetracking = pd.concat([df_0, df_1, df_2, df_3, df_4, df_5, df_6, df_7, df_8, df_9, df_10], axis=1)

<h2>Read EEG Data</h2>

In [6]:
def rescale(data):
    # Scaling factor (to obtain values in [V], depends on device and settings etc.)
    scaling_factor = 7e-9
    return scaling_factor * data


def get_events_from_info(inst):
    """ Helper to read events from the info field directly; specific to some of our recordings """
    eventsMNE = []
    eventsFromFIF = inst.info["events"]
    for i in range(0, len(eventsFromFIF)):
        if eventsFromFIF[i].get("list") is not None:
            content = eventsFromFIF[i].get("list")
            content_list = content.tolist()
            content_new = [content_list[2], content_list[1], content_list[0]]
            eventsMNE.append(content_new)
        elif eventsFromFIF[i].get("channels") is not None:
            raise
            # content = eventsFromFIF[i].get('channels')
        else:
            print("fiftools: Type of entry #" + str(i + 1) + "unkown.")
    eventsMNE = np.array(eventsMNE)
    return eventsMNE


raw = mne.io.read_raw_fif(fname=eeg_path, preload=True)
raw.apply_function(rescale, picks=["eeg"])


# Preprocessing: Bandpass filter (0.5 to 80 Hz) and notch filter (power net frequency and harmonics)
raw.filter(0.5, 80)
raw.notch_filter([50, 100]);

  raw = mne.io.read_raw_fif(fname=eeg_path, preload=True)


In [7]:
picks = raw.pick_types(eeg=True)
data = raw.get_data(picks=["eeg"])

channel_names = raw.info["ch_names"]
sampling_rate = raw.info["sfreq"]

events = get_events_from_info(raw)
event_ids = events[:, 2]

indices_events = events[:, 0]
t_events = event_ids / sampling_rate

start_time = t_events[1]
t_events = t_events - start_time

columns = [
    "Snippet",
    "SnippetStart",
    "SnippetStop",
    "InputStart",
    "InputStop",
    "OutputStart",
    "OutputStop",
    "EEG",
]
df_eeg = pd.DataFrame([], columns=columns)
for i in range(0, len(t_events)):
    if indices_events[i] > 100:
        continue
    df_eeg = df_eeg.append(pd.DataFrame([[None, t_events[i + 1], t_events[i + 2], t_events[i + 2], t_events[i + 3], t_events[i + 3], None, None,]], columns=columns,))
df_eeg = df_eeg.reset_index(drop=True)

In [8]:
#Snippet0 = raw.crop(0.0, 12.0)
# Snippet0.plot_psd(show=False)
#Snippet0.plot(block=True)
# result[0].plot_psd(show=False)
# result[0].plot(block=True)

<h2>Read PsychoPy Data</h2>

In [9]:
def to_file_name(path):
    file, _ext = os.path.splitext(path)
    return file.split("\\")[-1]


def map_to_answer(answer):
    if "Right" in answer:
        return "Right"
    if "Wrong1" in answer:
        return "Wrong1"
    if "Wrong2" in answer:
        return "Wrong2"
    if "None" in answer:
        return "Wrong3"
    if "Skipped" in answer:
        return "Skipped"


df_psydata = pd.read_csv(psychopy_csv_path)
df_psydata = df_psydata[
    ["ImagePath", "Image.started", "Image.stopped", "InputPath", "image.started", "image.stopped", "ImagePathInputs", "image_1.started", "image_1.stopped", "ChoosenAnwer", "image_7.started",]
]
df_psydata = df_psydata[df_psydata["ImagePath"].notna()]
df_psydata.insert(0, "Snippet", df_psydata["ImagePath"].apply(to_file_name))
df_psydata["ChoosenAnwer"] = df_psydata["ChoosenAnwer"].apply(map_to_answer)
df_psydata = df_psydata.reset_index(drop=True)
df_psydata = df_psydata.rename(columns={"Image.started": "SnippetStart", "Image.stopped": "SnippetStop"})
df_psydata = df_psydata.rename(columns={"image.started": "InputStart", "image.stopped": "InputStop"})
df_psydata = df_psydata.rename(columns={"image_1.started": "OutputStart", "image_1.stopped": "OutputStop"})
df_psydata = df_psydata.rename(columns={"image_7.started": "CrossStart"})
df_psydata["SnippetStop"] = df_psydata["InputStart"]
df_psydata["InputStop"] = df_psydata["OutputStart"]
df_psydata["OutputStop"] = df_psydata["CrossStart"]
df_psydata = df_psydata.drop(["ImagePath", "InputPath", "ImagePathInputs", "CrossStart"], axis=1)

start_time = df_psydata["SnippetStart"][0]
df_psydata["SnippetStart"] = df_psydata["SnippetStart"] - start_time
df_psydata["SnippetStop"] = df_psydata["SnippetStop"] - start_time
df_psydata["InputStart"] = df_psydata["InputStart"] - start_time
df_psydata["InputStop"] = df_psydata["InputStop"] - start_time
df_psydata["OutputStart"] = df_psydata["OutputStart"] - start_time
df_psydata["OutputStop"] = df_psydata["OutputStop"] - start_time

In [10]:
df_psylog = pd.read_csv(psychopy_log_path, header=None, sep="\t")

<h2>Bring Data Together and Groub them by Snippet</h2>

In [11]:
# set snippet name and endtime of each snippet
df_eeg["Snippet"] = df_psydata["Snippet"]
df_eeg["OutputStop"] = df_eeg["OutputStart"] + df_psydata["OutputStop"] - df_psydata["OutputStart"]

In [None]:
result = {}
template = {
    "Code": {"EyeTracking": None, "EEG": None, "Log": None, "Time": None,},
    "Input": {"EyeTracking": None, "EEG": None, "Log": None, "Time": None,},
    "Output": {"EyeTracking": None, "EEG": None, "Log": None, "Time": None,},
}
for index, row in df_psydata.iterrows():
    current = template.copy()
    current["Code"]["EyeTracking"] = df_eyetracking

In [None]:
df_eyetracking