# 0 Basic setup

In [None]:
import os
import shutil

# for eeg data
import mne
from mff2edf import write_mne_edf
from mne_bids import write_raw_bids, BIDSPath, print_dir_tree, update_sidecar_json

In [None]:
# set path
base_path = "./data/EGI_DATA"
raw_path = os.path.join(base_path, "EGI_RAW")
fmt_path = os.path.join(base_path, "EGI_FMT")
edf_path = os.path.join(base_path, "EGI_EDF")
bids_root_path = os.path.join(base_path, "EGI_BIDS")
beh_path = "beh"
rec_path = "rec"
eeg_path = "eeg"
# set task name
wordtask = "wordchoice"
imagetask = "imagechoice"
foodtask = "foodchoice"
wmtask = "wordimagematch"
rest = "rest"

# 1 Format raw data

## 1-1 Format main data

In [None]:
if os.path.exists(fmt_path):
    shutil.rmtree(fmt_path)
else:
    os.mkdir(fmt_path)

In [None]:
task_set = set(["foodEnd", "foodChoice", "foodHealthy", "imageChoice", "wordChoice", "wordImageMatch", "foodTaste"])
task_dict = {"F": "foodchoice", "FC": "foodchoice", "I": "imagechoice", "IC": "imagechoice", "W": "wordchoice", "WC": "wordchoice", "R": "rest", "RS": "rest", "WM": "wordimagematch", "WM": "wordimagematch"}
total = 0
for sub in os.listdir(raw_path):
    # under "sub-xxx" directory
    if not os.path.exists(os.path.join(fmt_path, sub, eeg_path)):
        os.makedirs(os.path.join(fmt_path, sub, eeg_path))
    if not os.path.exists(os.path.join(fmt_path, sub, beh_path)):
        os.makedirs(os.path.join(fmt_path, sub, beh_path))
    for directory in os.listdir(os.path.join(raw_path, sub)):
        # eeg data
        if "EEG DATA" in directory or "EEG_DATA" in directory:
            # sub-001 to sub-015
            cur_eeg_path = os.path.join(raw_path, sub, directory)
            if sub <= "sub-015":
                cur_eeg_path = os.path.join(cur_eeg_path, "ori_mff")
            for mff in os.listdir(cur_eeg_path):
                if "fil.mff" in mff or "fil.edf" in mff:
                    continue
                for key, value in task_dict.items():
                    if key in mff[:-4].upper().replace('-', ' ').replace('_', ' ').split():
                        print(os.path.join(cur_eeg_path, mff))
                        shutil.copytree(os.path.join(cur_eeg_path, mff), os.path.join(fmt_path, sub, eeg_path, "{}_task-{}_eeg.mff".format(sub, value)))
                        break
        # behavioral data
        elif "Beha" in directory:
            for csv in os.listdir(os.path.join(raw_path, sub, directory)):
                cross = set(csv.split("_")) & task_set
                if len(cross) != 0:
                    total += 1
                    # print(os.path.join(fmt_path, sub, beh_path, csv))
                    shutil.copy(os.path.join(raw_path, sub, directory, csv), os.path.join(fmt_path, sub, beh_path, "{}_task-{}_beh.csv".format(sub, list(cross)[0]).lower()))

## 1-2 Delete irrelevant data

In [None]:
# delete wordimagematch eeg data
for sub in os.listdir(fmt_path):
    # shutil.rmtree(os.path.join(fmt_path, sub, beh_path), ignore_errors=True)
    shutil.rmtree(os.path.join(fmt_path, sub, eeg_path, "{}_task-{}_eeg.mff".format(sub, wmtask)), ignore_errors=True)

# 2 Convert .mff to .edf

In [None]:
if os.path.exists(edf_path):
    shutil.rmtree(edf_path)
else:
    os.mkdir(edf_path)

In [None]:
# place edf files in edf_path
for sub in os.listdir(fmt_path):
    # create sub/eeg path if not exist
    if not os.path.exists(os.path.join(edf_path, sub)):
        os.makedirs(os.path.join(edf_path, sub, eeg_path))
    for mff in os.listdir(os.path.join(fmt_path, sub, eeg_path)):
        edf_file_path = os.path.join(edf_path, sub, eeg_path, "{}edf".format(mff[:-3]))
        if not os.path.exists(edf_file_path):
            write_mne_edf(os.path.join(fmt_path, sub, eeg_path, mff), edf_file_path, overwrite=True)

# 3 Get BIDS data

## 3-1 Write basic files

In [None]:
if os.path.exists(bids_root_path):
    shutil.rmtree(bids_root_path)
else:
    os.makedirs(bids_root_path)

In [None]:
# TODO
for sub in os.listdir(edf_path):
    # create sub/eeg path if not exist
    # if not os.path.exists(os.path.join(edf_path, sub)):
    #     os.makedirs(os.path.join(edf_path, sub, eeg_path))
    for edf_file in os.listdir(os.path.join(edf_path, sub, eeg_path)):
        subject_id = edf_file.split("_")[0].split("-")[1]
        task = edf_file.split("_")[1].split("-")[1]
        bids_path = BIDSPath(subject=subject_id, task=task, root=bids_root_path)
        raw = mne.io.read_raw_edf(os.path.join(edf_path, sub, eeg_path, edf_file), preload=False)
        montage = mne.channels.make_standard_montage("GSN-HydroCel-129")
        montage.ch_names[-1] = "E129"
        raw.set_montage(montage)
        raw.info["line_freq"] = 50
        write_raw_bids(raw, bids_path, overwrite=True)

## 3-2 Update *_eeg.json

In [None]:
# update *_eeg.json
entries = {
    "EEGReference": "E129",
    "Manufacturer": "Electrical Geodesic Incorporated"
}

bids_path = BIDSPath(datatype="eeg", suffix="eeg", root=bids_root_path)
for eeg_file in bids_path.copy().match():
    eeg_json_path = eeg_file.update(extension=".json")
    update_sidecar_json(bids_path=eeg_json_path, entries=entries)

## 3-3 Add *_events.json

In [None]:
# import json
# tasks = [wordtask, imagetask, foodtask, rest]
# subjects = [str(i).zfill(3) for i in range(1, 32)]

In [None]:
# for sub in subjects:
#     for task in tasks:
#         event_json_path = BIDSPath(subject=sub, task=task, datatype="eeg",
#                             suffix="events", extension=".json", root=bids_root_path)
#         json.dumps(event_json_path, indent=4)

## 3-4 Add behavior tsv file

In [None]:
def getBehTsv(file_in, file_out, task):
    def fc_task(sdf):
        sdf["trialIndex"] = sdf["images.thisN"].apply(int)
        sdf["left_option"], sdf["right_option"] = sdf["left_image"], sdf["right_image"]
        sdf["response"] = sdf["mouse.clicked_name"].map({"image_left": "left", "image_right": "right"})
        return sdf[["trialIndex", "left_option", "right_option", "response",
                "response_time", "xTrajectory", "yTrajectory", "tTrajectory"]]
    def wc_task(sdf):
        sdf["trialIndex"] = sdf["words.thisN"].apply(int)
        # stimuli
        for option in ["option_left", "option_right"]:
            sdf[option] = sdf[option].map({"有生命": "animate", "无生命": "inanimate"})
        sdf["stimulus"] = sdf["item"]
        # options
        sdf["left_option"], sdf["right_option"] = sdf["option_left"], sdf["option_right"]
        sdf["response"] = sdf["mouse.clicked_name"].map({"rect_left": "left", "rect_right": "right"})
        
        return sdf[["trialIndex", "stimulus", "isAnimate", "left_option", "right_option", "response",
                "response_time", "xTrajectory", "yTrajectory", "tTrajectory"]]
    def ic_task(sdf):
        sdf["trialIndex"] = sdf["images.thisN"].apply(int)
        # rename option
        for option in ["option_left", "option_right"]:
            sdf[option] = sdf[option].map({"有生命": "animate", "无生命": "inanimate"})
        sdf["stimulus"] = sdf["item"].apply(lambda x: x.replace("non-", "in"))
        # options
        sdf["left_option"], sdf["right_option"] = sdf["option_left"], sdf["option_right"]
        sdf["response"] = sdf["mouse.clicked_name"].map({"rect_left": "left", "rect_right": "right"})
        return sdf[["trialIndex", "stimulus", "left_option", "right_option", "response",
                "response_time", "xTrajectory", "yTrajectory", "tTrajectory"]]

    # used to extract rt
    stim_rec = {
        foodtask: ["image_right.started", "image_right_2.started"],
        wordtask: ["word_item.started", "word_item_2.started"],
        imagetask: ["image_item.started", "image_item_2.started"]
    }
    # used to get func
    task_prep = {
        foodtask: fc_task,
        wordtask: wc_task,
        imagetask: ic_task
    }
    # get data
    df = pd.read_csv(file_in)[1:321]
    df["response_time"] = df[stim_rec[task]].apply(lambda x: np.round((x[stim_rec[task][1]] - x[stim_rec[task][0]]), 3), axis=1)

    # little preprocess
    df = task_prep[task](df)
    # write to bids
    df.to_csv(file_out, index=False, sep="\t")


In [None]:
for sub in os.listdir(edf_path):
    os.makedirs(os.path.join(bids_root_path, sub, beh_path))
    for task in [foodtask, wordtask, imagetask]:
        file_in = os.path.join(fmt_path, sub, beh_path, "{}_task-{}_beh.csv".format(sub, task))
        file_out = os.path.join(bids_root_path, sub, beh_path, "{}_task-{}_beh.tsv".format(sub, task))
        getBehTsv(file_in, file_out, task)

## 3-5 Add *_beh.json

In [None]:
entries = {
    "trialIndex": {
        "Description": "Index of the current trial, used to align with EEG data when some EEG trigger lost."
    },
    "left_option": {
        "Description": "Clickable option on the left side on the screen."
    },
    "right_option": {
        "Description": "Clickable option on the right side on the screen."
    },
    "response": {
        "Description": "The side which the participant choose.",
        "Level": {
            "left": "The participant choose the left_optioin.",
            "right": "The participant choose the right_optioin."
        }
    },
    "response_time": {
        "Description": "Response time measured in seconds.",
        "Units": "second"
    },
    "xTrajectory": {
        "Description": "The x coordinate of the mouse trajectory over time.",
        "Units": "pixel"
    },
    "yTrajectory": {
        "Description": "The y coordinate of the mouse trajectory over time.",
        "Units": "pixel"
    },
    "tTrajectory": {
        "Description": "The time point which corrresdponds to the (x, y) coordinate series.",
        "Units": "second"
    }
}
w_entries = entries.copy()
i_entries = entries.copy()
w_entries["stimulus"] = {"Description": "Name of the word stimulus." }
w_entries["isAnimate"] = { "Description": "Whether the stimulus is animate." }
i_entries["stimulus"] = { "Description": "File name of the image stimulus." }

In [None]:
# food choice
bids_path = BIDSPath(datatype="beh", suffix="beh", task=foodtask, root=bids_root_path)
for beh_file in bids_path.copy().match():
    beh_json_path = beh_file.update(extension=".json")
    with open(beh_json_path, "w") as f:
        json.dump(entries, f, indent=4)

In [None]:
# word choice
bids_path = BIDSPath(datatype="beh", suffix="beh", task=wordtask, root=bids_root_path)
for beh_file in bids_path.copy().match():
    beh_json_path = beh_file.update(extension=".json")
    with open(beh_json_path, "w") as f:
        json.dump(w_entries, f, indent=4)

In [None]:
# image choice
bids_path = BIDSPath(datatype="beh", suffix="beh", task=imagetask, root=bids_root_path)
for beh_file in bids_path.copy().match():
    beh_json_path = beh_file.update(extension=".json")
    with open(beh_json_path, "w") as f:
        json.dump(i_entries, f, indent=4)

## 3-6 Update dataset_description

In [None]:
entries = {
    "Name": "HD-EEG task with mouse tracking",
    "BIDSVersion": "1.6.0",
    "DatasetType": "raw",
    "Authors": [
        "Kun Chen",
        "Ruien Wang",
        "Jiamin Huang",
        "Fei Gao",
        "Zhen Yuan",
        "Yanyan Qi",
        "Haiyan Wu"
    ],
    "KeyWords": [
        "Electroencephalography",
        "Decision making",
        "Mouse-racking",
        "Resting state"
    ]
}

In [None]:
with open(os.path.join(bids_root_path, "dataset_description.json"), "w") as f:
    json.dump(entries, f, indent=4)