# Prepare Input

This notebook create xarray dataset to be used as the input to the B2BNet DL model.

## Participants

In [1]:
# Setups
%reload_ext autoreload
%autoreload 3

from src.b2bnet.data.input import process_input

import numpy as np
import xarray as xr
import pandas as pd
import mne

# read BIDS dataset from external drive
eeg_path = '/Volumes/Extreme SSD/PhD/OTKA study1/EEG data/BIDS/'

# collector dataset
ds = xr.Dataset()

In [61]:
# Find relaxation session number (The dataset only include the relaxation procedure)
data = pd.read_csv('data/behavioral/behavioral_data.csv')
relax_ses = data.query('procedure == "relaxation"')[['bids_id', 'session']].reset_index(drop=True)

# Change bids id to match file names
relax_ses['bids_id'] = relax_ses['bids_id'].astype('int').astype('str').str.zfill(2)

# create y_class and save it
y_class = data.query('procedure == "relaxation"')['score'].apply(lambda x:1 if x >= 5 else 0)
# y_class.to_csv('data/behavioral/relaxation_class.csv', index=False)

In [None]:
# open relaxation eeg files
for sub in relax_ses['bids_id'].unique()[:-1]:  # exclude sub_52 because of missing data
    task = 'induction' + str(relax_ses.query('bids_id == @sub')['session'].values[0])
    eeg_data = process_input(
        path=eeg_path,
        subject=sub,
        task=task,
        resampling_frq=120,
        ref_chs='average',
        filter_bounds=None,
        verbose=True
    )
    # create a xarray
    da = xr.DataArray(
    data=eeg_data.get_data(),
    dims=["channels", "time"],
    coords={
        "channels": eeg_data.ch_names,
        "time": np.arange(0, eeg_data.get_data().shape[1])
                          }
    )
    
    ds = ds.update({f'sub_{sub}': da})
    
## save dataset
# ds.to_netcdf('data/input.nc')

## hypnotist's data

In [None]:
# open data and pick eeg channels
raw = mne.io.read_raw_brainvision('data/EEG/hypnotist/Relaxation_whole.vhdr',
                                  misc=['EMG1', 'EMG2', 'EMG3', 'ECG'],
                                  eog=['EOG1', 'EOG2'],
                                  verbose=False)
raw.pick_types(eeg=True)

# CUT: time onsets determined by visual inspection of the raw data (see src/b2bnet/inspect_hypnotist_data.py)
tmax = 424
tmin = 84
raw.crop(tmin=tmin, tmax=tmax)

raw.resample(120)

raw.add_reference_channels(ref_channels='FCz')
raw.set_eeg_reference(ref_channels='average', verbose=False)


da = xr.DataArray(
    data=raw.get_data(),
    dims=["channels", "time"],
    coords={
        "channels": raw.ch_names,
        "time": np.arange(0, raw.get_data().shape[1])
                          }
    )

# save dataarray
da.to_netcdf('data/hypnotist.nc')