# Developing behavioral data preprocessing pipeline for OPM MEG analysis


## Preprocessing Steps incuded:
- Loading, syncing, and alligning data 
- Categorizing ACC and EMG data into rest vs muscular activity vs movement
-


### 0. Importing

In [None]:
# general packages
import json
import os
import importlib
import sys
import numpy as np
import pandas as pd
import pyxdf
from itertools import compress
import matplotlib.pyplot as plt
import datetime as dt

import mne

# ephys packages
# from mne.filter import filter_data, notch_filter

In [None]:
def add_repo_dir():
    """adds local repo directory to sys to allow importing from repo"""

    wd = os.getcwd()

    COUNTER = 20  #  to prevent eternal while loop

    while not wd.endswith('lid_opm'):
        wd = os.path.dirname(wd)
        COUNTER -= 1

        if COUNTER == 0:
            raise ValueError('repo dir not found!')

    print(f'add repo directory to sys: {wd} ')

    sys.path.append(wd)

    return

In [None]:
# add custom functions

add_repo_dir()

import utils.load_utils as load_utils
from source_raw_conversion import load_source_opm as source_opm


## 1. Load behavioral source data

Define:
- subject
- task
- configuration version


In [None]:
CONFIG_VERSION = "v1"

SUB = '03'  # 


# load settings
sub_config = load_utils.load_subject_config(subject_id=SUB,)
preproc_config = load_utils.load_preproc_config(version=CONFIG_VERSION,)
sub_meta_info = load_utils.get_sub_rec_metainfo(config_sub=sub_config)




In [None]:
sub_meta_info

In [None]:
import source_raw_conversion.load_lsl as loadlsl
import source_raw_conversion.time_syncing as sync
import source_raw_conversion.load_PTB_source_opm as sourceopm

import signal_processing.preprocessing as preproc
import signal_processing.preproc_functions as prepr_funcs
import plotting.processing_checks as proc_plotting
import signal_processing.epoching as epoching

manual dev of source lsl to raw flow

In [None]:
importlib.reload(proc_plotting)
importlib.reload(prepr_funcs)
importlib.reload(preproc)
importlib.reload(loadlsl)
importlib.reload(sync)

TASK_sel = 'task'
ACQ_sel = 'dopa60'

for REC in sub_meta_info['rec_name']:
    print(REC)

    try:
        TASK, ACQ = REC.split('_')
    except:
        print(f'\n##### WARNING: {REC} skipped\n')
        continue
    
    if not (TASK == TASK_sel and ACQ == ACQ_sel): continue

    # if TASK == 'rest': continue 

    recRaw = preproc.rawData_singleRec(
        SUB, TASK, ACQ, ZSCORE_ACC=True,
        ZSCORE_EMG=True, COMBINE_ARM_EMG=True,)


    # plot for emg-acc for all task-cues (only unilateral to task)
    proc_plotting.plot_emgacc_check_for_tasks(
        recRaw, SAVE=False, SHOW=True,
    )


check hfc projections, for now too little sensors probably, therefore no conversion of SVD math

In [None]:
### HFC CHECK
# hfc_projs = recRaw.OPM_Z.info['projs']

# for p in hfc_projs:
#     # print('Name:', p['desc'], 'Active:', p.get('active', False))
#     cols = p.get('data', {}).get('col_names', None)
#     # print('  cols:', cols)


# # print(recRaw.OPM_Z.ch_names)
# # print(recRaw.OPM_Z.info['bads'])

# print(hfc_projs[0]['data']['col_names']) 
# print(hfc_projs[0]['data']['data'])          # should be your MEG channel names
# print(np.linalg.norm(hfc_projs[0]['data']['data']))  # should NOT be 0

# # check sensor geometry, should not be close to 0
# pos = np.array([ch['loc'][:3] for ch in recRaw.OPM_Z.info['chs']])
# print("Sensor bounds (min, max) in meters:\n", pos.min(axis=0), pos.max(axis=0))

In [None]:
# set interactive plotting back
%matplotlib inline

## Epoching based on behavioral task


- check (ICA) cleaning before epoching
- TODO select on event type, test topograms per event type

In [None]:
sub_meta_info

In [None]:
importlib.reload(epoching)
importlib.reload(preproc)
importlib.reload(loadlsl)
importlib.reload(sync)
importlib.reload(source_opm)

TASK = 'rest'
ACQ = 'predopa'


recRaw = preproc.rawData_singleRec(
    SUB, TASK, ACQ,
    INCL_OPM=True,
    OPM_PREPROC={
        'resample': True, 'bandpass': True,
        'notch': True, 'hfc': False
    },
    ZSCORE_ACC=True,
    ZSCORE_EMG=True,
)

TMIN, TMAX = -1, 3

opm_epochs, emg_epochs, acc_epochs = epoching.get_epochs(acqClass=recRaw,)

Plot ACC and EMG based on sides related to task

In [None]:
GO_STIM = 'rest'
TASK_SIDE = 'left'


GOTASK = f'{GO_STIM}_{TASK_SIDE}'
if GO_STIM == 'rest': GOTASK = GO_STIM

figpath = os.path.join(load_utils.get_onedrive_path('figures'),
                        'explore')
fname = f'emgaccCheck_sub{recRaw.sub}_{recRaw.task}_{recRaw.acq}_{GOTASK}'


sfreq = emg_epochs.info['sfreq']

fig, axes = plt.subplots(2, 2, figsize=(9, 9),
                         sharey='row', sharex='col',)
fsize = 14


# select matching channels
meg_ch_match = np.array([TASK_SIDE in ch for ch in emg_epochs.info['ch_names']])
acc_ch_match = np.array([TASK_SIDE in ch for ch in acc_epochs.info['ch_names']])


### plot matching sides
# emg
emg3d = emg_epochs[GOTASK].get_data()[:, meg_ch_match, :]
axes[0, 0].plot(np.mean(emg3d, axis=0).T, alpha=.3,
                label=np.array(emg_epochs.info['ch_names'])[meg_ch_match],
                )
# acc
acc3d = acc_epochs[GOTASK].get_data()[:, acc_ch_match, :]
axes[1, 0].plot(np.mean(acc3d, axis=0).T, alpha=.3,
                label=np.array(acc_epochs.info['ch_names'])[acc_ch_match],
                )

axes[0, 0].set_title('Matching sides to task', size=fsize)
axes[0, 0].set_ylabel('EMG-envelop (V)', size=fsize)
axes[1, 0].set_xlabel('Time vs TASK-onset (sec)', size=fsize)



### plot non-matching sides
# select NON-matching channels
print(f'include for NONMATCH: {np.array(emg_epochs.info["ch_names"])[~meg_ch_match]}')
# emg
emg3d = emg_epochs[GOTASK].get_data()[:, ~meg_ch_match, :]
axes[0, 1].plot(np.mean(emg3d, axis=0).T, alpha=.3,
                label=np.array(emg_epochs.info['ch_names'])[~meg_ch_match],
)
if recRaw.ZSCORE_EMG:
    axes[0, 0].set_ylim(-1, 3)
    axes[0, 1].set_ylim(-1, 3)
else:
    axes[0, 0].set_ylim(0, 3.5e-5)
    axes[0, 1].set_ylim(0, 3.5e-5)


# acc
acc3d = acc_epochs[GOTASK].get_data()[:, ~acc_ch_match, :]
axes[1, 1].plot(np.mean(acc3d, axis=0).T, alpha=.3,
                label=np.array(acc_epochs.info['ch_names'])[~acc_ch_match],
)
if recRaw.ZSCORE_ACC:
    axes[1, 1].set_ylim(-1, 3)
    axes[1, 0].set_ylim(-1, 3)
else:
    axes[1, 1].set_ylim(0, .01)
    axes[1, 0].set_ylim(0, .01)


axes[0, 1].set_title('Non-matching sides to task', size=fsize)
axes[1, 0].set_ylabel('ACC-magn. vector (g)', size=fsize)
axes[1, 1].set_xlabel('Time vs trial-onset (sec)', size=fsize)



for ax in axes.ravel():
    ax.tick_params(size=fsize, labelsize=fsize,)
    xtlabels = np.arange(TMIN, TMAX+.1, 1)
    ax.set_xticks([sfreq] * np.arange(len(xtlabels)),)
    ax.set_xticklabels(xtlabels)

    ax.legend()

    ax.axvline(-1*TMIN*sfreq, ymin=0, ymax=1, color='green', lw=3, alpha=.3,)

if GO_STIM == 'rest': TASK_SIDE = f'"{TASK_SIDE}"'

plt.suptitle(f'Task selection: {GO_STIM}: {TASK_SIDE}, {recRaw.acq} (n = {emg3d.shape[0]}))',
             size=fsize+4, x=.5, y=1.01)

plt.tight_layout()

# plt.savefig(os.path.join(figpath, fname), dpi=300, facecolor='w',
#             bbox_inches="tight",)

plt.show()

Perform ICAs per recording  --> MOVE ALL OPM STEPS TO OPM NOTEBOOK

In [None]:
from mne.preprocessing import ICA, corrmap
from sklearn.decomposition import PCA


In [None]:
%matplotlib inline

In [None]:
tempdat = recRaw.OPM_Z.copy()

tempdat.apply_function(lambda x: x * 1e-15)

for ch in tempdat.get_data():
    plt.plot(ch[:int(tempdat.info['sfreq']*2)], alpha=.5,)

plt.show()


for ch in tempcleaner.get_data():
    plt.plot(ch[:int(tempdat.info['sfreq']*2)], alpha=.5,)

plt.show()

In [None]:
ica = mne.preprocessing.ICA(n_components=len(tempdat.get_data()),
                            method='fastica', max_iter=500,
                            random_state=27,)
ica.fit(tempdat)

In [None]:
explained_var_ratio = ica.get_explained_variance_ratio(tempdat)
for channel_type, ratio in explained_var_ratio.items():
    print(f"Fraction of {channel_type} variance explained by all components: {ratio}")

In [None]:
ica.exclude

In [None]:
%matplotlib qt
ica.plot_sources(tempdat)

In [None]:
muscle_idx_auto, scores = ica.find_bads_muscle(tempdat)
print(muscle_idx_auto)

In [None]:
ica.plot_properties(tempdat, picks=muscle_idx_auto, log_scale=True)

In [None]:
mne.viz.plot_ica_components(ica, picks=muscle_idx_auto)

In [None]:

ica.exclude = muscle_idx_auto

tempcleaner = tempdat.copy()

ica.apply(tempcleaner)




In [None]:
# %matplotlib inline

%matplotlib qt

In [None]:

tempdat.plot()

tempcleaner.plot()


In [None]:
opm_epochs[STIM].get_data().shape

Plot single epochs per sensor

In [None]:
# use data without autom found muscles
cleaned_epochs = mne.Epochs(
    raw=tempcleaner, events=recRaw.opm_event_arr,
    event_id=recRaw.opm_event_codes,
    tmin=-1, tmax=3,
    baseline=None, preload=True, reject=None,
)

In [None]:
cleaned_epochs['go_left']

In [None]:
STIM = 'go_left'   
# STIM = 'rest'   

USE_EPOCHS = cleaned_epochs

XTICKLABS = [-1, 0, 1, 2, 3]
XTICKS = [int(recRaw.OPM_Z.info['sfreq']) * t
          for t in np.arange(len(XTICKLABS))]

fig, axes = plt.subplots(nrows=len(recRaw.OPM_Z.ch_names),
                         figsize=(6, 24))

for i_ch, ch in enumerate(recRaw.OPM_Z.ch_names):
    print(f'\n{ch}')

    ch_dat = USE_EPOCHS[STIM].get_data()[:, i_ch, :]

    axes[i_ch].set_title(ch)
    axes[i_ch].set_xlabel('Time vs Task-Cue (sec)')
    axes[i_ch].set_ylabel('signal (femto-Tesla)')

    axes[i_ch].set_xticks(XTICKS)
    axes[i_ch].set_xticklabels(XTICKLABS)


    for epoch_arr in ch_dat:
        # if np.nanmax(epoch_arr) - np.nanmin(epoch_arr) > 20:
            # print(f'skip var of {np.var(epoch_arr)}')
            # continue


        axes[i_ch].plot(epoch_arr, alpha=.5,)


plt.tight_layout()

# plt.legend()

plt.close()



Plot all sensor timeseries per epoch

In [None]:
from sklearn.linear_model import LinearRegression
from scipy.stats import pearsonr

In [None]:
STIM = 'go_left'   
# STIM = 'rest'

USE_EPOCHS = cleaned_epochs.copy()


EXECUTE_CLEAN = False
PC_IDX_PICK = [0, 2]
cleaned_epochs = opm_epochs[STIM].copy()


XTICKLABS = [-1, 0, 1, 2, 3]
XTICKS = [int(recRaw.OPM_Z.info['sfreq']) * t
          for t in np.arange(len(XTICKLABS))]

N_EPOCHS = USE_EPOCHS[STIM].get_data().shape[0]
N_EPOCHS = 5  # take first 5 as example

fig, axes = plt.subplots(nrows=N_EPOCHS,
                         figsize=(8, 24))

for i_ep, epoch_arr in enumerate(USE_EPOCHS[STIM].get_data().copy()):
    # if i_ep >= N_EPOCHS: continue
    
    # get acc epoch to correlate with artifacts
    acc_vectors = acc_epochs[STIM].get_data().copy()[i_ep, [0, 2]]

    ### test PCA for movement
    pca = PCA(n_components=5, svd_solver='full')  # keep a few comps to inspect
    pca.fit(epoch_arr.T)
    explained = pca.explained_variance_ratio_
    # print(f'epoch # {i_ep}, explained variance: {explained}')

    pc_idx_show = [0, 1, 2, 3, 4]
    for i_pc in pc_idx_show:
        pc_weights = pca.components_[i_pc]  # spatial weighting per component
        pc_vector = pc_weights @ epoch_arr
        # # check correlation with acc
        # R, p = pearsonr(pc_vector, acc_vectors[0])
        # print(f'epoch-{i_ep}, PC-{i_pc} x ACC : R: %.2f (%.4f)' %(R,p))
        
        if not EXECUTE_CLEAN:
            if i_ep >= N_EPOCHS: continue

            # plot components in raw signal
            axes[i_ep].plot(pc_vector, alpha=.3, lw=5,
                            label=f'PC-{i_pc} (explains: {explained[i_pc]})')
        
        # if artefact timecourse looks satisfactory than compute artefact ON UNFILTERED DATA
        if EXECUTE_CLEAN:
            if i_pc not in PC_IDX_PICK: continue
            # pc_vector_unfilt = pc_weights @ epoch_arr_unfilt
            artif_vector2d = np.outer(pc_weights, pc_vector)  # TODO: replace with pc_vector_unfilt
            # print(f'artif vector: {artif_vector2d.shape}')
            # ACTUAL CLEANING SUBTRACTION
            epoch_arr =- artif_vector2d

    ### transfer cleaned data into clean-epochs
    if EXECUTE_CLEAN:
        USE_EPOCHS._data[i_ep, :, :] = epoch_arr
        print(f'cleaned epoch # {i_ep}')


    ### PLOTTING PART

    if i_ep >= N_EPOCHS: continue

    axes[i_ep].set_title(f'epoch # {i_ep}')
    axes[i_ep].set_xlabel('Time vs Task-Cue (sec)')
    axes[i_ep].set_ylabel('signal (femto-Tesla)')

    axes[i_ep].set_xticks(XTICKS)
    axes[i_ep].set_xticklabels(XTICKLABS)

    for ch_sig in epoch_arr:
        # if np.nanmax(ch_sig) - np.nanmin(ch_sig) > 20:
        #     print(f'skip var of {np.var(ch_sig)}')
            # continue

        axes[i_ep].plot(ch_sig, alpha=.8, lw=.5,)
    
    axes[i_ep].legend()


plt.tight_layout()



plt.show()



In [None]:
USE_EPOCHS.compute_psd().plot()

plt.show()

In [None]:
FREQ_BANDS = {'theta/alpha': (4, 10),
              'beta': (13, 30),
              'mid-gamma': (60, 90)}


fig, axes = plt.subplots(1, len(FREQ_BANDS),
                         figsize=(3*len(FREQ_BANDS), 3))

for i_ax, (band, freq_tuple) in enumerate(FREQ_BANDS.items()):

    print(band, freq_tuple)

    psds, freqs = mne.time_frequency.psd_array_welch(
        USE_EPOCHS.get_data().copy(),
        fmin=freq_tuple[0], fmax=freq_tuple[1],
        n_fft=int(recRaw.OPM_Z.info['sfreq']),
        sfreq=recRaw.OPM_Z.info['sfreq'],
    )

    psds_plot = psds.mean(axis=(0, 2))  # gives mean PSD-power within defined range per channel

    mne.viz.plot_topomap(
        psds_plot,
        opm_epochs[STIM].info,
        cmap="viridis",  # for diff "RdBu_r"
        sensors=True,        # show sensor dots
        outlines="head",     # no change for meg, should add head circle, ears, nose
        contours=1,
        axes=axes[i_ax],
        show=False,
    )
    axes[i_ax].set_title(f'{band}')

plt.show()


extended cleaning


test further cleaning, HFC does not converge, try ICA for specific (stationary) artefacts

In [None]:
recRaw.OPM_Z.get_data().shape

### Explore visualization

- calculate spectral envelops (analytical signals) for theta, alpha, beta, gamma
- plot envelops over 3 second epoch windows, average over channels and over epochs, resulting in mean envelop over the course of specific task
- compare contra-lateral vs ipsi-lateral hemisphere

- plot next to ACC-hand, and mean-envelop from EMG per extremity (deltoid + brachrad)