# 0 Basic setup

In [None]:
import os
import json
import shutil
import numpy as np
import pandas as pd
from glob import glob
from tqdm import tqdm

# for eeg data
import mne
from mne_bids import write_raw_bids, BIDSPath, update_sidecar_json

In [None]:
# set path
# root_path = "/media/haiyanwu/Data/EGI_DATA"
root_path = os.path.join("..", "..", "EGI_DATA")
raw_path = os.path.join(root_path, "EGI_RAW")
src_path = os.path.join(root_path, "EGI_SRC")
set_path = os.path.join(root_path, "EGI_SET")
bids_root_path = os.path.join(root_path, "EGI_BIDS")
beh_path = "beh"
rec_path = "rec"
eeg_path = "eeg"
# set task name
foodtask = "foodchoice"
wordtask = "wordchoice"
imagetask = "imagechoice"
wmtask = "wordimagematch"
resting = "resting"

# 1 Format raw data

## 1-1 Format main data

In [None]:
if os.path.exists(src_path):
    shutil.rmtree(src_path)
else:
    os.mkdir(src_path)

In [None]:
task_set = set(["foodEnd", "foodChoice", "foodHealthy", "imageChoice", "wordChoice", "wordImageMatch", "foodTaste"])
task_dict = {"F": foodtask, "FC": foodtask, "I": imagetask, "IC": imagetask, "W": wordtask, "WC": wordtask, "R": resting, "RS": resting, "WM": wmtask, "WM": wmtask}
total = 0
for sub in tqdm(os.listdir(raw_path)):
    # under "sub-xx" directory
    if not os.path.exists(os.path.join(src_path, sub, eeg_path)):
        os.makedirs(os.path.join(src_path, sub, eeg_path))
    if not os.path.exists(os.path.join(src_path, sub, beh_path)):
        os.makedirs(os.path.join(src_path, sub, beh_path))
    for directory in os.listdir(os.path.join(raw_path, sub)):
        # eeg data
        if "EEG DATA" in directory or "EEG_DATA" in directory:
            cur_eeg_path = os.path.join(raw_path, sub, directory)
            if sub <= "sub-15":
                # sub-01 to sub-15
                cur_eeg_path = os.path.join(cur_eeg_path, "ori_mff")
            for mff in os.listdir(cur_eeg_path):
                if "fil.mff" in mff or "fil.edf" in mff:
                    continue
                for key, value in task_dict.items():
                    if key in mff[:-4].upper().replace("-", " ").replace("_", " ").split():
                        # print(os.path.join(cur_eeg_path, mff))
                        shutil.copytree(os.path.join(cur_eeg_path, mff), os.path.join(src_path, sub, eeg_path, f"{sub}_task-{value}_eeg.mff"))
                        break
        # behavioral data
        elif "Beha" in directory:
            for csv in os.listdir(os.path.join(raw_path, sub, directory)):
                cross = set(csv.split("_")) & task_set
                if len(cross) != 0:
                    total += 1
                    # print(os.path.join(src_path, sub, beh_path, csv))
                    shutil.copy(os.path.join(raw_path, sub, directory, csv), os.path.join(src_path, sub, beh_path, f"{sub}_task-{list(cross)[0]}_beh.csv".lower()))

## 1-2 Delete irrelevant task data

In [None]:
# delete wordimagematch data
for wmdata_path in glob(os.path.join(src_path, "*", "*", f"*{wmtask}*")):
    if os.path.isfile(wmdata_path):
        os.remove(wmdata_path)
    elif os.path.isdir(wmdata_path):
        shutil.rmtree(wmdata_path)

## 1-3 Delete subject related information

In [None]:
# in mff files
for mff_file in glob(os.path.join(src_path, "*", eeg_path, "*.mff")):
    # subject xml file
    os.remove(os.path.join(mff_file, "subject.xml"))
    # log file
    os.remove(glob(os.path.join(mff_file, "*.txt"))[0])

In [None]:
# in behavior files
for csv_file in glob(os.path.join(src_path, "*", beh_path, "*.csv")):
    # drop columns containing subject information
    df = pd.read_csv(csv_file)
    df.drop(columns=["姓名", "性别", "年龄", "participant"], inplace=True, errors="ignore")
    # re-write as tsv
    df.to_csv(csv_file, index=False, encoding="utf-8-sig")

# 2 Convert .mff to .set

In [None]:
# convert all mff to set
# make sure matlab have been added to the PATH environment variables
assert os.system("matlab -batch mff2set") == 0,\
    "Conversion from mff to set failed, please re-run and check the mff2set.m file manually!"

# 3 Get BIDS data

## 3-1 Write basic files

In [None]:
if os.path.exists(bids_root_path):
    shutil.rmtree(bids_root_path)
else:
    os.makedirs(bids_root_path)

In [None]:
for sub_path in tqdm(os.listdir(set_path)):
    subject_id = sub_path.split("-")[1]
    for task in [resting, foodtask, wordtask, imagetask]:
        bids_path = BIDSPath(subject=subject_id, task=task, root=bids_root_path)
        # load set file
        set_file = os.path.join(set_path, sub_path, eeg_path, f"sub-{subject_id}_task-{task}_eeg.set")
        mff_file = os.path.join(src_path, sub_path, eeg_path, f"sub-{subject_id}_task-{task}_eeg.mff")
        raw = mne.io.read_raw_eeglab(set_file)
        mff_raw = mne.io.read_raw_egi(mff_file)
        # reset information
        raw.set_montage(mff_raw.get_montage())
        raw.set_meas_date(mff_raw.info["meas_date"])
        raw.info["line_freq"] = 50
        # write to bids
        write_raw_bids(raw, bids_path, overwrite=True)
        # copy .set files to bids, see this issue https://github.com/mne-tools/mne-bids/issues/991
        shutil.copyfile(set_file, bids_path)

## 3-2 Update *_eeg.json

In [None]:
# update *_eeg.json
eeg_entries = {
    "EEGReference": "E129 (Cz)",
    "Manufacturer": "Electrical Geodesic Incorporated",
    "ManufacturersModelName": "NetAmps 400",
    "RecordingType": "discontinuous",
    "InstitutionName": "University of Macau",
    "InstitutionAddress": "Macau, China"
}

bids_path = BIDSPath(datatype="eeg", suffix="eeg", root=bids_root_path)
for eeg_file in bids_path.copy().match():
    eeg_json_path = eeg_file.update(extension=".json")
    update_sidecar_json(bids_path=eeg_json_path, entries=eeg_entries)

## 3-3 Modify *_events.tsv; Add *_events.json

In [None]:
# modify events.tsv
for events_tsv in glob(os.path.join(bids_root_path, "sub-*", "eeg", "*events.tsv")):
    df = pd.read_csv(events_tsv, sep="\t")
    # drop columns
    df.drop(columns=["value", "sample"], inplace=True, errors="ignore")
    # rewrite duration
    df["duration"] = df[["duration", "trial_type"]].apply(lambda x: 0 if x["trial_type"] != "boundary" else x["duration"], axis=1)
    # write back
    df.to_csv(events_tsv, index=False, encoding="utf-8-sig")

In [None]:
event_entries = {
    "onset": {
        "Description": "Onset (in seconds) of the event marker",
        "Units": "second"
    },
    "duration": {
        "Description": "Duration of the marker (do not mixed with duration of events). May not accurate in the .set file",
        "Units": "second"
    },
    "trial_type": {
        "Description": "Event related to the marker"
    }
}
# resting
r_event_entries = event_entries.copy()
r_event_entries["trial_type"]["Level"] = {
    "rsfi": "Start of fixation",
    "rsrs": "Start of blank screen"
}
# add items for task-based eeg
event_entries["trial_type"]["Level"] = {
    "IEND": "Recording start or end",
    "0000-0319": "Start of each trial's fixation"
}
# food choice
f_event_entries = event_entries.copy()
f_event_entries["trial_type"]["Level"].update({
    "0400": "Stimulus marker, presentation start with the food picture",
    "0500": "Response marker, participant select the left option"
})
# word choice
w_event_entries = event_entries.copy()
w_event_entries["trial_type"]["Level"].update({
    "0400": "Stimulus marker. Presentation start of animate word and the animate option appears on the left side",
    "0401": "Stimulus marker. Presentation start of animate word and the animate option appears on the right side",
    "0402": "Stimulus marker. Presentation start of inanimate word and the animate option appears on the left side",
    "0403": "Stimulus marker. Presentation start of inanimate word and the animate option appears on the right side",
    "0500": "Response marker. Participant select left option when showing animate word and the animate option appears on the left side",
    "0501": "Response marker. Participant select right option when showing animate word and the animate option appears on the left side",
    "0502": "Response marker. Participant select left option when showing animate word and the animate option appears on the right side",
    "0503": "Response marker. Participant select right option when showing animate word and the animate option appears on the right side",
    "0504": "Response marker. Participant select left option when showing inanimate word and the animate option appears on the left side",
    "0505": "Response marker. Participant select right option when showing inanimate word and the animate option appears on the left side",
    "0506": "Response marker. Participant select left option when showing inanimate word and the animate option appears on the right side",
    "0507": "Response marker. Participant select right option when showing inanimate word and the animate option appears on the right side",
})
# image choice
i_event_entries = event_entries.copy()
i_event_entries["trial_type"]["Level"].update({
    "0400": "Stimulus marker. Presentation start of animate image and the animate option appears on the left side",
    "0401": "Stimulus marker. Presentation start of animate image and the animate option appears on the right side",
    "0402": "Stimulus marker. Presentation start of inanimate image and the animate option appears on the left side",
    "0403": "Stimulus marker. Presentation start of inanimate image and the animate option appears on the right side",
    "0500": "Response marker. Participant select left option when showing animate image and the animate option appears on the left side",
    "0501": "Response marker. Participant select right option when showing animate image and the animate option appears on the left side",
    "0502": "Response marker. Participant select left option when showing animate image and the animate option appears on the right side",
    "0503": "Response marker. Participant select right option when showing animate image and the animate option appears on the right side",
    "0504": "Response marker. Participant select left option when showing inanimate image and the animate option appears on the left side",
    "0505": "Response marker. Participant select right option when showing inanimate image and the animate option appears on the left side",
    "0506": "Response marker. Participant select left option when showing inanimate image and the animate option appears on the right side",
    "0507": "Response marker. Participant select right option when showing inanimate image and the animate option appears on the right side",
})

In [None]:
for task, entry in zip([resting, foodtask, wordtask, imagetask], [r_event_entries, f_event_entries, w_event_entries, i_event_entries]):
    bids_path = BIDSPath(suffix="events", task=task, root=bids_root_path)
    for event_file in bids_path.copy().match():
        event_json_path = event_file.update(extension=".json")
        with open(event_json_path, "w") as f:
            json.dump(entry, f, indent=4)

## 3-4 Add behavior tsv file

In [None]:
def getBehTsv(file_in, file_out, task):
    def fc_task(sdf):
        sdf["trialIndex"] = sdf["images.thisN"].apply(int)
        sdf["left_option"], sdf["right_option"] = sdf["left_image"], sdf["right_image"]
        sdf["response"] = sdf["mouse.clicked_name"].map({"image_left": "left", "image_right": "right"})
        return sdf[["trialIndex", "left_option", "right_option", "response",
                "response_time", "xTrajectory", "yTrajectory", "tTrajectory"]]
    def wc_task(sdf):
        sdf["trialIndex"] = sdf["words.thisN"].apply(int)
        # stimuli
        for option in ["option_left", "option_right"]:
            sdf[option] = sdf[option].map({"有生命": "animate", "无生命": "inanimate"})
        sdf["stimulus"] = sdf["item"]
        # options
        sdf["left_option"], sdf["right_option"] = sdf["option_left"], sdf["option_right"]
        sdf["response"] = sdf["mouse.clicked_name"].map({"rect_left": "left", "rect_right": "right"})
        
        return sdf[["trialIndex", "stimulus", "isAnimate", "left_option", "right_option", "response",
                "response_time", "xTrajectory", "yTrajectory", "tTrajectory"]]
    def ic_task(sdf):
        sdf["trialIndex"] = sdf["images.thisN"].apply(int)
        # rename option
        for option in ["option_left", "option_right"]:
            sdf[option] = sdf[option].map({"有生命": "animate", "无生命": "inanimate"})
        sdf["stimulus"] = sdf["item"].apply(lambda x: x.replace("non-", "in"))
        # options
        sdf["left_option"], sdf["right_option"] = sdf["option_left"], sdf["option_right"]
        sdf["response"] = sdf["mouse.clicked_name"].map({"rect_left": "left", "rect_right": "right"})
        return sdf[["trialIndex", "stimulus", "left_option", "right_option", "response",
                "response_time", "xTrajectory", "yTrajectory", "tTrajectory"]]

    # used to extract rt
    stim_rec = {
        foodtask: ["image_right.started", "image_right_2.started"],
        wordtask: ["word_item.started", "word_item_2.started"],
        imagetask: ["image_item.started", "image_item_2.started"]
    }
    # used to get func
    task_prep = {
        foodtask: fc_task,
        wordtask: wc_task,
        imagetask: ic_task
    }
    # get data
    df = pd.read_csv(file_in)[1:321]
    df["response_time"] = df[stim_rec[task]].apply(lambda x: np.round((x[stim_rec[task][1]] - x[stim_rec[task][0]]), 3), axis=1)

    # little preprocess
    df = task_prep[task](df)
    # write to bids
    df.to_csv(file_out, index=False, sep="\t", encoding="utf-8")

In [None]:
for sub in os.listdir(set_path):
    os.makedirs(os.path.join(bids_root_path, sub, beh_path))
    for task in [foodtask, wordtask, imagetask]:
        file_in = os.path.join(src_path, sub, beh_path, "{}_task-{}_beh.csv".format(sub, task))
        file_out = os.path.join(bids_root_path, sub, beh_path, "{}_task-{}_beh.tsv".format(sub, task))
        getBehTsv(file_in, file_out, task)

## 3-5 Add *_beh.json

In [None]:
entries = {
    "trialIndex": {
        "Description": "Index of the current trial, used to align with EEG data when some EEG trigger lost."
    },
    "left_option": {
        "Description": "Clickable option on the left side on the screen."
    },
    "right_option": {
        "Description": "Clickable option on the right side on the screen."
    },
    "response": {
        "Description": "The side which the participant choose.",
        "Level": {
            "left": "The participant choose the left_optioin.",
            "right": "The participant choose the right_optioin."
        }
    },
    "response_time": {
        "Description": "Response time measured in seconds.",
        "Units": "second"
    },
    "xTrajectory": {
        "Description": "The x coordinate of the mouse trajectory over time.",
        "Units": "pixel"
    },
    "yTrajectory": {
        "Description": "The y coordinate of the mouse trajectory over time.",
        "Units": "pixel"
    },
    "tTrajectory": {
        "Description": "The time point which corrresdponds to the (x, y) coordinate series.",
        "Units": "second"
    }
}
# food choice
f_entries = entries.copy()
# word choice
w_entries = entries.copy()
w_entries["stimulus"] = {"Description": "Name of the word stimulus." }
w_entries["isAnimate"] = { "Description": "Whether the stimulus is animate." }
# image choice
i_entries = entries.copy()
i_entries["stimulus"] = { "Description": "File name of the image stimulus." }

In [None]:
for task, entry in zip([foodtask, wordtask, imagetask], [f_entries, w_entries, i_entries]):
    bids_path = BIDSPath(datatype="beh", suffix="beh", task=task, root=bids_root_path)
    for beh_file in bids_path.copy().match():
        beh_json_path = beh_file.update(extension=".json")
        with open(beh_json_path, "w") as f:
            json.dump(entry, f, indent=4)

## 3-6 Add sourcedata

In [None]:
sourcedata_path = os.path.join(bids_root_path, "sourcedata")
# source rawdata
source_raw_path = os.path.join(sourcedata_path, "rawdata")
shutil.copytree(src_path, source_raw_path)
# psychopy experimental program
source_psychopy_path = os.path.join(sourcedata_path, "psychopy")
shutil.copytree(os.path.join(root_path, "psychopy"), source_psychopy_path)

## 3-6 Update dataset_description

In [None]:
dataset_entries = {
    "Name": "HD-EEG task with mouse tracking",
    "BIDSVersion": "1.7.0",
    "DatasetType": "raw",
    "Authors": [
        "Kun Chen",
        "Ruien Wang",
        "Jiamin Huang",
        "Fei Gao",
        "Zhen Yuan",
        "Yanyan Qi",
        "Haiyan Wu"
    ],
    "KeyWords": [
        "Electroencephalography",
        "Decision making",
        "Mouse-racking",
        "Resting state"
    ],
    "DatasetDOI": "doi:10.18112/openneuro.ds003766.v1.0.0",
    "SourceDatasets": [ {"URL": "file://./sourcedata/rawdata"} ]
}
with open(os.path.join(bids_root_path, "dataset_description.json"), "w") as f:
    json.dump(dataset_entries, f, indent=4)

In [None]:
# replace participants file
shutil.copyfile(os.path.join(root_path, "participants.tsv"), os.path.join(bids_root_path, "participants.tsv"))