# Prepare Input

This notebook create xarray dataset to be used as the input to the B2BNet DL model.

## Participants

In [40]:
# Setups
%reload_ext autoreload
%autoreload 3

from src.b2bnet.data.prepare_data import preprocess_data

import numpy as np
import mne
from pathlib import Path
import xarray as xr
import pandas as pd

# read BIDS dataset from external drive
eeg_path = '/Volumes/Extreme SSD/PhD/OTKA study1/EEG data/BIDS/'

# collector dataset
ds = xr.Dataset()

In [None]:
# Find relaxation session number (The dataset only include the relaxation procedure)
data = pd.read_csv('data/behavioral/behavioral_data.csv')
relax_ses = data.query('procedure == "relaxation"')[['bids_id', 'session']].reset_index(drop=True)

# Change bids id to match file names
relax_ses['bids_id'] = relax_ses['bids_id'].astype('int').astype('str').str.zfill(2)

In [None]:
# open relaxation eeg files
for sub in relax_ses['bids_id'].unique()[:-1]:  # exclude sub_52 because of missing data
    task = 'induction' + str(relax_ses.query('bids_id == @sub')['session'].values[0])
    eeg_data = preprocess_data(
        path=eeg_path,
        subject=sub,
        task=task,
        resampling_frq=120,
        ref_chs='average',
        filter_bounds=(1, 50),
        verbose=False
    )
    # create a xarray
    da = xr.DataArray(
    data=eeg_data.get_data(),
    dims=["channels", "time"],
    coords={
        "channels": eeg_data.ch_names,
        "time": np.arange(0, eeg_data.get_data().shape[1])
                          }
    )
    
    ds = ds.update({f'sub_{sub}': da})
    
## save dataset
# ds.to_netcdf('data/input.nc')