# Prepare Input

This notebook create xarray dataset to be used as the input to the B2BNet DL model.

## Participants

In [38]:
# Setups
%reload_ext autoreload
%autoreload 3

from src.b2bnet.data.input import process_input
from IPython.display import clear_output

import numpy as np
import xarray as xr
import pandas as pd
import mne

# read BIDS dataset from external drive
eeg_path = '/Volumes/Extreme SSD/PhD/OTKA study1/EEG data/BIDS/'
behvioral_path = '~/Codes/otka_data/behavioral/behavioral_data.csv'

# collector dataset
ds = xr.Dataset()

In [8]:
# Find relaxation session number (The dataset only include the relaxation procedure)
data = pd.read_csv(behvioral_path)
relax_ses = data.query('procedure == "relaxation"')[['bids_id', 'session']].reset_index(drop=True)

# Change bids id to match file names
relax_ses['bids_id'] = relax_ses['bids_id'].astype('int').astype('str').str.zfill(2)

# create y_class dataarray
y_class = data.query('procedure == "relaxation"')['score'].apply(lambda x:1 if x >= 5 else 0)[:-1]
# da_class = xr.DataArray(data=y_class.values, dims=["behavioral"])

In [80]:
# open relaxation eeg files
def load_subject_data(subj):
    task = 'induction' + str(relax_ses.query('bids_id == @subj')['session'].values[0])
    eeg_data = process_input(
        path=eeg_path,
        subject=subj,
        task=task,
        resampling_frq=120,
        ref_chs='average',
        filter_bounds=None,
        verbose=False
    )       
    return eeg_data.get_data()[:, :39707]

all_eeg_data = [load_subject_data(subj) for subj in relax_ses['bids_id'].unique()[:-1]]
clear_output()

## hypnotist's data

In [None]:
# open data and pick eeg channels
from pathlib import Path
hypotist_path = Path('~/Codes/otka_data/EEG/hypnotist/Relaxation_whole.vhdr').expanduser()
hypnotist_raw = mne.io.read_raw_brainvision(hypotist_path,
                                  misc=['EMG1', 'EMG2', 'EMG3', 'ECG'],
                                  eog=['EOG1', 'EOG2'],
                                  verbose=False)
hypnotist_raw.pick_types(eeg=True)

# CUT: time onsets determined by visual inspection of the raw data (see src/b2bnet/inspect_hypnotist_data.py)
tmax = 424
tmin = 84
hypnotist_raw.crop(tmin=tmin, tmax=tmax)

hypnotist_raw.resample(120)

hypnotist_raw.add_reference_channels(ref_channels='FCz')
hypnotist_raw.set_eeg_reference(ref_channels='average', verbose=False)

In [95]:
# merge datasets and save
ds = xr.Dataset({'hypnotee': (['subject', 'channel', 'timestep_hypnotee'], all_eeg_data),
                 'hypnotist': (['channel', 'timestep_hypnotist'], hypnotist_raw.get_data()),
                 'y_class': (['subject'], y_class.values)})
ds.to_netcdf('data/otka.nc5', engine='h5netcdf')
ds.close()