## Group: Hyacithara - Report

First, lets make sure that all required packages are available.

In [None]:
!pip install -r requirements.txt

Now, that all python packages required are installed, we can start to fetch all required data and perform some clean-up operations on the data, so we can use the data with the MNE-BIDS pipeline.

In [None]:
# import function to load configuration from file
from mne_bids_pipeline._config_import import _import_config as getConfig
from tools.logtools import *

# tools to get fresh data
import data_handling.data_downloader as dl
import data_handling.data_cleaner as clean

import matplotlib
matplotlib.use('qtagg')

First of all, we load the config file for the mne-bids pipeline. Note that we have to disable checks here, otherwise the import would fail if the data is not jet available

In [None]:
# set the file path of the main configuration file
bids_config_path = "./mne-bids/config/mne-bids-pipeline_data_tests.py"
# load configured settings from file
bids_cfg = getConfig(
    config_path=bids_config_path,
    check=False
)

Then, we may need to fetch a fresh copy of the data. In order to avoid confusion, this is done by this notebook. 

If the data is not where it is expected, a fresh copy is downloaded and extracted. Please ensure that you have enough disk space available (ca. 150 gigabyte).
For this there are some configuration options: 

In [None]:
from data_handling import getDataPathFromBidsRoot

dl.CLEAN_DATA = False # if true, clears the data directory in order to force downloading a fresh copy of the data
dl.DATA_BASE_DIR = getDataPathFromBidsRoot(bids_cfg.bids_root) # get the data folder from the bids pipeline configuration
dl.VALIDATE_DATA = True # if true, checks that the downloaded zip file is the expected file

In [None]:
if dl.CLEAN_DATA:
    clean.removeDirectory(dl.DATA_BASE_DIR)
dl.fetchData()

Once all data is downloaded and unpacked, we have to do some preprocessing, in order to be able to use the data.

For this data set, this consists mainly of two tasks:

1. Fix file links in *.vhdr and *.vmrk files. This is needed, because the files got renamed after exporting, but the original authors did not fix the file links
2. Generate a events.tsv file for each subject

In [None]:
# tools to patch fresh data
import data_handling.data_patcher as patch
import data_handling.convert_brainvision2bids as convert

patch.patchAllFiles(bids_cfg.bids_root)
convert.buildEventTSV(bids_cfg.bids_root)

Now that we got all the data we require, we can import the config again, this time with checks that all parameters are valid.

In [None]:
bids_cfg = getConfig(
    config_path=bids_config_path,
)

Once the configuration is loaded, we can then run the pipeline.

In [None]:
# allow for calling mne_bids_pipeline within Python
import sys

import mne_bids_pipeline
from mne_bids import BIDSPath

import mne_bids_pipeline._main
from typing import Optional

# define the function to run the pipeline within Python
def run_pipeline(config:str="", steps:str=""):
    sys.args = ["", "--config", config, "--steps", steps]
    mne_bids_pipeline._main.main()

In case errors occur while running the pipeline, we should remove the output of the previous pipeline runs.

In [None]:
CLEAR_PIPELINE_OUTPUT = False

In [None]:
if CLEAR_PIPELINE_OUTPUT:
    clean.removeDirectory("{}/derivatives/mne-bids-pipeline".format(bids_cfg.bids_root))


Now, we can finally start running the pipeline:

In [None]:
curr_steps = "init"
!mne_bids_pipeline --config {bids_config_path} --steps {curr_steps}

In case of Unicode encode errors when attempting to run the pipeline, make sure that the following environment variable is set:

Remember to restart Jupyter after setting the environment variable.

In [None]:
curr_steps = "preprocessing/_01_data_quality"
!mne_bids_pipeline --config {bids_config_path} --steps {curr_steps}

In [None]:
curr_steps = "preprocessing/_02_head_pos"
!mne_bids_pipeline --config {bids_config_path} --steps {curr_steps}

In [None]:
curr_steps = "preprocessing/_03_maxfilter"
!mne_bids_pipeline --config {bids_config_path} --steps {curr_steps}

In [None]:
curr_steps = "preprocessing/_04_frequency_filter"
!mne_bids_pipeline --config {bids_config_path} --steps {curr_steps}

In [None]:
curr_steps = "preprocessing/_05_make_epochs"
!mne_bids_pipeline --config {bids_config_path} --steps {curr_steps}

In [None]:
curr_steps = "preprocessing/_06a_run_ica"
!mne_bids_pipeline --config {bids_config_path} --steps {curr_steps}

In [None]:
# define a function which gets used in application of the ICA results to the raw data
def get_input_fnames_apply_ica(
        *,
        cfg,
        subject: str,
        session: Optional[str],
) -> dict:
    bids_basename = BIDSPath(
        subject=subject,
        session=session,
        task=cfg.task,
        acquisition=cfg.acq,
        recording=cfg.rec,
        space=cfg.space,
        datatype='eeg',
        root=cfg.deriv_root,
        check=False,
    )
    paths = dict()
    paths["ica"] = bids_basename.copy().update(suffix="ica", extension=".fif")
    paths["raw"] = bids_basename.copy().update(suffix="proc-filt_raw", extension=".fif")
    paths["components"] = bids_basename.copy().update(
        processing="ica", suffix="components", extension=".tsv"
    )
    return paths

In [None]:
from os.path import exists
import mne
import mne_icalabel
from mne.preprocessing import read_ica
import pandas as pd
from mne_bids_pipeline._config_utils import (
    get_subjects,
    get_sessions
)

for subject in get_subjects(bids_cfg):
    for session in get_sessions(bids_cfg):
        paths = get_input_fnames_apply_ica(cfg=bids_cfg, subject=subject, session=session)
        if not exists(paths["ica"]):
            print(formatString("ICA file not found, skipping Subject:", style=STYLE_TEXT_RED),
                  formatString(subject, style=STYLE_TEXT_BLUE))
            continue
        ica = read_ica(paths["ica"])
        raw = mne.io.read_raw_fif(paths["raw"])

        label_results = mne_icalabel.label_components(raw, ica, method="iclabel")

        print(str(ica))  # checkup print of known data about ICA
        print("\nresulting predictions:", label_results["y_pred_proba"])  # checkup print
        print("\nresulting labels:     ", label_results["labels"])  # checkup print

        labels = label_results["labels"]
        exclude_idx = [
            idx for idx, label in enumerate(labels) if label not in ["brain", "other"]
        ]
        tsv_data = pd.read_csv(paths["components"], sep="\t")

        # checkup: print old content of the file
        print("\nold tsv file content:")
        print(str(tsv_data))

        tsv_data.loc[exclude_idx, "status"] = "bad"

        # checkup: print updated content of the file
        print("\nnew tsv file content:")
        print(tsv_data)

        tsv_data.to_csv(paths["components"], sep="\t", index=False)

In [None]:
curr_steps = "preprocessing/_07a_apply_ica"
!mne_bids_pipeline --config {bids_config_path} --steps {curr_steps}

In [None]:
curr_steps = "preprocessing/_08_ptp_reject"
!mne_bids_pipeline --config {bids_config_path} --steps {curr_steps}

In [None]:
curr_steps = "sensor"
!mne_bids_pipeline --config {bids_config_path} --steps {curr_steps}

In [None]:
curr_steps = "source"
!mne_bids_pipeline --config {bids_config_path} --steps {curr_steps}