# Developing MEG Data Preprocessing Pipeline

## Overview
This notebook implements a standardized preprocessing pipeline for OPM-MEG data analysis. 

## Research Context
- **Subject**: variable
- **Data Type**: OPM-MEG recordings (.fif format)

## Preprocessing Steps incuded:
-
-
-


### 0. Importing

In [None]:
# general packages
import json
import os
import importlib
import sys
import numpy as np
import pandas as pd
from itertools import compress
import matplotlib.pyplot as plt

# ephys packages
import mne
from mne.preprocessing import ICA, compute_proj_hfc
from mne.filter import filter_data, notch_filter

In [None]:
def add_repo_dir():
    """adds local repo directory to sys to allow importing from repo"""

    wd = os.getcwd()

    COUNTER = 20  #  to prevent eternal while loop

    while not wd.endswith('lid_opm'):
        wd = os.path.dirname(wd)
        COUNTER -= 1

        if COUNTER == 0:
            raise ValueError('repo dir not found!')

    print(f'add repo directory to sys: {wd} ')

    sys.path.append(wd)

    return

In [None]:
# add custom functions

add_repo_dir()

import utils.load_utils as load_utils

# from plot_functions import (plot_channels_comparison,
#                             plot_meg_2x3_grid,
#                             plot_ica_components,
#                             plot_all_channel_power_spectra)

# from config_manager import (load_and_display_config,
#                             validate_file_selection,
#                             preprocess_meg_data,
#                             remove_ica_artifacts,
#                             create_time_window_mask)

# from MEG_analysis_functions import apply_fastica_to_channels


## 1. Load data and first data visualization

Define:
- subject
- task
- configuration version
- rename channels to anatomical positions

raw data

In [None]:
importlib.reload(load_utils)

# Define subject and version
# SUB = '91'  # healthy control
# SUB = '94'  # fieltrip tutorial data
SUB = '03'  # 

# TASK = 'a_rest'  # change for standardised task names and real recording
# TASK = 'ftTutorial'  # fieltrip tutorial, median nerve stim, stim break stim, 2min pos1
# TASK = 'EmptyRoomNoiseShieldON'  # empty room test CCM

CONFIG_VERSION = "v1"

MEG_AXIS_SEL = {'x': False, 'y': False, 'z': True}


# load settings
sub_config = load_utils.load_subject_config(subject_id=SUB,)
preproc_config = load_utils.load_preproc_config(version=CONFIG_VERSION,)

# define paths
subdata_dir = os.path.join(
    load_utils.get_onedrive_path('raw_data'),
    f'sub-{SUB}', 'OPM_MEG'
)
files = os.listdir(subdata_dir)  # get available files
sel_file = [f for f in files if f'{TASK.lower()}' in f.lower()][0]  # select file for task
sel_filepath = os.path.join(subdata_dir, sel_file)  # get file path


In [None]:
importlib.reload(load_utils)

##  Convert opm source to raw data

In [None]:
CONFIG_VERSION = "v1"

SUB = '03'  # 

TASK = 'rest'
ACQ = 'predopa'

MEG_AXIS_SEL = {'x': False, 'y': False, 'z': True}


# load settings
sub_config = load_utils.load_subject_config(subject_id=SUB,)
preproc_config = load_utils.load_preproc_config(version=CONFIG_VERSION,)
sub_meta_info = load_utils.get_sub_rec_metainfo(config_sub=sub_config)




get data and meta-data for specific recording (task, acquisition)

In [None]:
from preprocessing_meg import load_source_opm as source_opm



In [None]:
sensor_reg = source_opm.get_sensor_info(sub_config, STORE=True)


In [None]:
source_opm.load_sensor_coords(sub='03',)

In [None]:
sub_meta_info

convert data from source to raw

In [None]:
importlib.reload(source_opm)

for REC in sub_meta_info['rec_name']:
    print(REC)

    try:
        TASK, ACQ = REC.split('_')
    except:
        print(f'\n##### WARNING: {REC} skipped\n')
        continue

    # get full meg data from source file
    (
        megtimes, megdata, meg_sfreq, meg_chnames, meg_trigger
    ) = source_opm.extract_opm_sourcedata(
        sub_config=sub_config,
        ACQ=ACQ, TASK=TASK,
    )

    for AX_sel in ['X', 'Y', 'Z']:
        (
            axdata, axtimes
        ) = source_opm.select_and_store_axis_data(
            meg_data=megdata, meg_times=megtimes,
            MEG_SFREQ=meg_sfreq, meg_chnames=meg_chnames,
            sensor_reg=sensor_reg,
            sub_meta=sub_meta_info,
            sub_config=sub_config,
            AX=AX_sel, STORE=True, ACQ=ACQ, TASK=TASK
        )

load one axis from one recording

In [None]:
importlib.reload(source_opm)

# LOAD STORED DATA and TIMES directly

AX_sel = 'Z'
ACQ = 'dopa55'

axdata, axtimes = source_opm.select_and_store_axis_data(
    AX=AX_sel, ACQ=ACQ, TASK='rest',
    sub_config=sub_config, LOAD=True,
)

In [None]:
importlib.reload(source_opm)



raw = source_opm.load_raw_opm_into_mne(
    meg_data=axdata, AX=AX_sel, sub_config=sub_config,)

In [None]:
# check senor locations

# fig = raw.plot_sensors(kind="3d")
# fig = raw.plot_sensors(kind="topomap", show_names=True)

test topogram

visualise to check

In [None]:
for i, ch in enumerate(sensor_reg[f'{AX_sel}_ch']):

    print(i, ch)
    
    # plt.plot(axdata[:, i])

    # plt.show()

#### Import raw data in MNE format

In [None]:
# ---- LOAD AND EXPLORE DATA ----

# Load the MEG data
print(f"Loading file: {sel_file}")
# raw = mne.io.read_raw_fif(sel_filepath, preload=True, verbose=False)
print(f"original Raw channel names: {raw.ch_names}")


# Get MEG channel mapping from config
meg_channel_map = sub_config['meg_channels']

# select MEG channels
if all([ch not in raw.ch_names for ch in meg_channel_map.values()]):
    meg_sel = mne.pick_types(raw.info, meg=True, stim=False, exclude=[])
    meg_dat = raw.copy().pick(np.array(raw.ch_names)[meg_sel])
    # select trigger
    trigger_sel = mne.pick_types(raw.info, meg=False, stim=True, exclude=[])
    if len(trigger_sel) > 0:
        trigger_dat = raw.copy().pick(np.array(raw.ch_names)[trigger_sel])
    else: 
        trigger_dat = None
    
else:
    # select MEG channels
    meg_dat = raw.copy().pick(list(meg_channel_map.values()))
    # rename channels according to anatomical names from map
    meg_dat.rename_channels({old: new for new, old in meg_channel_map.items()})
    # Get trigger channel mapping from config
    trigger_channel_map = sub_config['trigger_channels']
    # select trigger channels
    trigger_dat = raw.copy().pick(list(trigger_channel_map.values()))
    # rename channels according to anatomical names from map
    trigger_dat.rename_channels({old: new for new, old in trigger_channel_map.items()})


if not all(MEG_AXIS_SEL.values()):
    sel_chs = []
    for ax, axbool in MEG_AXIS_SEL.items():
        if axbool:
            sel_chs.extend([c for c in meg_dat.ch_names if f'_b{ax}' in c])
    meg_dat = meg_dat.copy().pick(sel_chs)
    


print(f"\nData loaded successfully for {sub_config['subject_id']}")
print("="*50)
print("DATA EXPLORATION:")
print("="*50)

# Basic data information
print(f"Data shape: {raw.get_data().shape}")
print(f"Number of channels: {raw.info['nchan']}")
print(f"Number of time points: {len(raw.times)}")
print(f"Sampling frequency: {raw.info['sfreq']} Hz")
print(f"Duration: {raw.times[-1]:.2f} seconds")

print(f"\n\n{'='*50}\nMEG DATA SELECTION:\n{'='*50}")

# Basic data information
print(f"Data shape: {meg_dat.get_data().shape}")
print(f"Number of MEG-channels selected: {raw.info['nchan']}")
print(f"MEG channel names: {meg_dat.ch_names}")


# Basic trigger data information
print(f"\n\n{'='*50}\nTRIGGER SELECTION:\n{'='*50}")
if trigger_dat:
    print(f"Data shape: {trigger_dat.get_data().shape}")
    print(f"Number of MEG-channels selected: {trigger_dat.info['nchan']}")
    print(f"MEG channel names: {trigger_dat.ch_names}")
else:
    print(trigger_dat)

## todo: import behavioral lsl data

In [None]:
import pyxdf

In [None]:
subdata_dir = os.path.join(
    load_utils.get_onedrive_path('source_data'),
    f'sub-03', 'EMG_ACC'
)
file = os.listdir(subdata_dir)[0]  # get available files

streams, fileheader = pyxdf.load_xdf(os.path.join(subdata_dir, file))


In [None]:

print(f"Found {len(streams)} streams")
for i, stream in enumerate(streams):
    print(f"\n--- Stream {i} ---")
    print("Name:", stream['info']['name'][0])
    print("Type:", stream['info']['type'][0])
    print("Channel count:", stream['info']['channel_count'][0])
    print("First 5 samples:", stream['time_series'][:5])

In [None]:
gamestream = streams[0]

In [None]:
gamestream.keys()

In [None]:
print(gamestream['time_series'][:10])

print(gamestream['time_stamps'][:10])

Visualization of imported data

In [None]:
fig = meg_dat.plot(start=0, duration=2,)



fig_dat = meg_dat.get_data()
N_SEC = 1

fig, axes = plt.subplots(fig_dat.shape[0])
for i_ax in np.arange(fig_dat.shape[0]):
    axes[i_ax].plot(fig_dat[i_ax, :int(N_SEC * meg_dat.info['sfreq'])])

# for ax in axes:
#     ax.

plt.show()

## 2- Resample and Filter

The following preprocessing steps will be applied to the MEG data to prepare it for analysis:

**Configuration Parameters:**
- Target sampling frequency: 512 Hz
- Bandpass filter: 1-100 Hz  
- Notch filter frequencies: 50, 100, 150 Hz

Resample

In [None]:
meg_dat = raw.copy()

In [None]:
# resample
if meg_dat.info['sfreq'] <= preproc_config['TARGET_SFREQ']:
    print(f'original sampling rate {meg_dat.info["sfreq"]} vs defined new rate {preproc_config["TARGET_SFREQ"]}')
else:
    print(f'resample original sampling rate {meg_dat.info["sfreq"]} to {preproc_config["TARGET_SFREQ"]}')
    meg_dat = meg_dat.resample(preproc_config['TARGET_SFREQ'], verbose=False)

# # convert to picoTesla
# meg_dat._data = meg_dat.get_data() * 1e12

Filter

- Bandpass filter
- Notch filter

In [None]:
temp_dat = meg_dat.copy()

# Bandpass filter (1-100 Hz); use .filter() to remain Raw Mne Object
temp_dat = temp_dat.filter(
    l_freq=preproc_config['BANDPASS_LOW'],
    h_freq=preproc_config['BANDPASS_HIGH'], 
    method='fir', verbose=False,
)  # sfreq=meg_dat.info['sfreq'], is given within Raw Object
meg_dat = temp_dat.copy()

# Apply notch filters (50 Hz and harmonics)
temp_dat = temp_dat.get_data()
for freq in preproc_config['NOTCH_FREQS']:
    temp_dat = notch_filter(
        temp_dat, 
        Fs=meg_dat.info['sfreq'],  
        freqs=freq,
        verbose=False
    )

meg_dat._data = temp_dat

In [None]:
# fig = meg_dat.plot(start=0, duration=1,)



# fig_dat = meg_dat.get_data()
# N_SEC = 1

# fig, axes = plt.subplots(fig_dat.shape[0])
# for i_ax in np.arange(fig_dat.shape[0]):
    # axes[i_ax].plot(fig_dat[i_ax, :int(N_SEC * meg_dat.info['sfreq'])])

# for ax in axes:
#     ax.

# plt.show()

#### Homogenous Field Correction

Requirements:
- sufficient number of channels
- sufficient head coverage with sensors
- anatomical coordinates of sensors (in FreeSurfer Coordinate Space)

In [None]:
# Compute HFC projectors
proj_hfc = compute_proj_hfc(meg_dat.info, order=preproc_config['HFC_ORDER'])

# Apply HFC projectors
temp_dat = meg_dat.copy()
meg_dat.add_proj(proj_hfc)
meg_dat.apply_proj()

In [None]:
fig = meg_dat.plot(start=0, duration=2,)



fig_dat = meg_dat.get_data()
N_SEC = 1

fig, axes = plt.subplots(fig_dat.shape[0])
for i_ax in np.arange(fig_dat.shape[0]):
    axes[i_ax].plot(fig_dat[i_ax, :int(N_SEC * meg_dat.info['sfreq'])])

# for ax in axes:
#     ax.

plt.show()

In [None]:
events = mne.find_events(trigger_dat, stim_channel='trigger1')
epochs = mne.Epochs(meg_dat, events=events, tmin=-.1, tmax=.3, baseline=None)

In [None]:
evoked = epochs.average()
plot = evoked.plot()

fig = evoked.plot_joint()


In [None]:
meg_dat.info['chs']

### 2.2- ICA analysis:

**ICA Configuration:**
- **Method**: FastICA algorithm
- **Components**: 7 (equal to number of MEG channels)
- **Input**: Preprocessed MEG data (512 Hz, filtered)

In [None]:
from sklearn.decomposition import FastICA

In [None]:
# ---- ICA ANALYSIS ON MEG CHANNELS ----
print("\n" + "="*60)
print("APPLYING ICA TO MEG CHANNELS")
print("="*60)

# Apply FastICA to the preprocessed MEG data
N_ICA_COMPONENTS = len(meg_dat.ch_names)
ICA_RANDOM_STATE = 0  # For reproducibility
ICA_MAX_ITER = 1000   # Maximum iterations for convergence

# Apply FastICA:
ica = FastICA(
    n_components=N_ICA_COMPONENTS,
    random_state=ICA_RANDOM_STATE,
    # max_iter=ICA_MAX_ITER
)
components = ica.fit_transform(meg_dat.get_data().T).T  # Transpose to (n_times, n_channels), then back

# Create component names for plotting
ICA_COMPONENT_NAMES = [f"ICA-{i+1}" for i in range(N_ICA_COMPONENTS)]
print(f"ICA component names: {ICA_COMPONENT_NAMES}")

In [None]:
from scipy.signal import welch

In [None]:
plt.plot(components[0][:10000])

plt.show()

In [None]:
f, pxx = welch(components[3], fs=meg_dat.info['sfreq'],
                  nperseg=meg_dat.info['sfreq'],)

plt.plot(f, pxx)
plt.show()

### 2.3- Visualise ICA components:

**Objective**: Even in this case the idea is to analyse the ICA components. Here it's possible to decide the time window's lenght in order to have a overall or a more detailed view.

In [None]:
# ---- VISUALIZE ICA COMPONENTS (60-SECOND WINDOW) ----


# Define ICA visualization window
ICA_WINDOW_DURATION = 60  # sec
ICA_START_TIME = 10       

# Create mask for ICA window
ica_mask = (PREPROCESSED_TIME >= ICA_START_TIME) & (PREPROCESSED_TIME <= ICA_START_TIME + ICA_WINDOW_DURATION)

# Extract windowed data
ica_time_window = PREPROCESSED_TIME[ica_mask]
ica_signals_window = ica_signals[:, ica_mask]

plot_ica_components(
    ica_signals=ica_signals_window,
    time=ica_time_window,
    axis_label="MEG",
    rec_label=f"{SUBJECT_ID} - {CONDITION} ({ICA_WINDOW_DURATION}s: Between {ICA_START_TIME}-{ICA_START_TIME + ICA_WINDOW_DURATION} sec)",
)

print("\n" + "="*60)
print(f"VISUALIZING ICA COMPONENTS - {ICA_WINDOW_DURATION} SECOND WINDOW")
print("="*60)

### 2.4- Select the artifacts:

-**Choose which ICA components you want to remove.**

In [None]:
# ---- USER CONFIGURATION: ARTIFACT COMPONENTS TO REMOVE ----
print("\n" + "="*60)
print("ICA ARTIFACT REMOVAL CONFIGURATION")
print("="*60)

# USER DEFINES WHICH COMPONENTS ARE ARTIFACTS (1-based: ICA-1, ICA-2, etc.)
ARTIFACT_COMPONENTS = [3, 6]  # Example: Remove ICA-3 and ICA-6

# Remove artifacts from preprocessed data
cleaned_meg_channels = remove_ica_artifacts(
    preprocessed_channels=preprocessed_meg_channels,
    ica_signals=ica_signals,
    ica_model=ica_model,
    artifact_components=ARTIFACT_COMPONENTS,
    verbose=True
)


In [None]:
# ---- VISUALIZE CLEANED DATA POWER SPECTRA ----
print("\n" + "="*60)
print("POWER SPECTRUM ANALYSIS: CLEANED MEG CHANNELS")
print("="*60)

print(f"Analyzing {len(cleaned_meg_channels)} cleaned MEG channels")
print(f"Sampling frequency: {TARGET_SFREQ} Hz")

plot_all_channel_power_spectra(
    channels=cleaned_meg_channels,
    channel_names=MEG_CHANNEL_NAMES,
    title=f"{SUBJECT_ID} - {CONDITION} (Artifacts Removed: ICA {ARTIFACT_COMPONENTS})",
    sfreq=TARGET_SFREQ,
    window_length=1.0,
    overlap=0.5,
    freq_range=(1, 100)
)

### test topogram


In [None]:
fname = f'sub{SUB}_{ACQ}_{TASK}_exploring_topogram'
figpath = os.path.join(load_utils.get_onedrive_path('figures'),
                       'explore', fname)

fig, axes = plt.subplots(1, 3, figsize=(12, 4))


for i, (band, fband) in enumerate(
    zip(['low-freq', 'beta', 'gamma'],
        [[4, 12], [15, 30], [65, 85]])
):

    # Band-pass filter raw data
    raw_band = meg_dat.copy().filter(fband[0], fband[1],
                                 fir_design="firwin",)

    # Compute PSD (Power Spectral Density)
    psds, freqs = mne.time_frequency.psd_array_welch(
        raw_band.get_data(),
        sfreq=raw.info['sfreq'],
        fmin=fband[0], 
        fmax=fband[1],
        n_fft=750
    )

    # Average across frequencies in the band
    psd_mean = psds.mean(axis=-1)  # shape (n_channels,)

    # Pick channel info (EEG/MEG sensors only)
    picks = mne.pick_types(raw.info, meg=True, eeg=False)

    # Plot topomap


    topofig, topoax = mne.viz.plot_topomap(
        psd_mean[picks],
        raw.info,
        cmap="viridis",
        show=False,
        axes=axes[i],
    )
    axes[i].set_title(f'{band} activity ({fband[0]}-{fband[1]} Hz)',
                      size=16,)

plt.tight_layout()

plt.savefig(figpath, dpi=300, facecolor='w',)

plt.show()
