<a href="https://colab.research.google.com/github/abelowska/mlNeuro/blob/main/2025/MLN_p3_LDA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Simple P300 Speller

Detect when participant see target, and when non-target stimuli using [BI2015a dataset](https://neurotechx.github.io/moabb/generated/moabb.datasets.BI2015a.html#moabb.datasets.BI2015a).

In [None]:
!pip install moabb
!pip install mne

Now, **restart your session** and then run next cells.

Imports

In [None]:
import moabb
import mne
import numpy as np
import matplotlib.pyplot as plt
import time
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from imblearn.under_sampling import RandomUnderSampler
from mne.decoding import LinearModel, Vectorizer, get_coef

Helpers

In [None]:
def get_test_data(session='1'):
  subject = 2
  session = session
  run = '0'

  test_raw = data[subject][session][run]
  # 1. re-reference: to almost-mastoids
  test_raw.set_eeg_reference(ref_channels=['T7', 'T8'])

  # 2. band-pass filter
  test_raw_filtered = test_raw.copy().filter(
      picks=['eeg'],
      l_freq=.1,
      h_freq=30.0,
      n_jobs=10,
      method='iir',
      iir_params=None
      )

  # 3. Notch filter
  power_freq = 50
  nyquist_freq = test_raw_filtered.info['sfreq'] / 2

  test_raw_filtered = test_raw_filtered.notch_filter(
      picks=['eeg', 'eog'],
      freqs=np.arange(power_freq, nyquist_freq, power_freq),
      n_jobs=10,
  )

  # fing events on the STIM channel
  events = mne.find_events(test_raw_filtered)

  # create events dict
  event_ids = {'Target': 2, 'Non-Target': 1}

  # create segments
  tmin = -0.2
  tmax = 0.6
  baseline = (-0.2,0)
  test_epochs = mne.Epochs(
      test_raw_filtered,
      events,
      event_id=event_ids,
      tmin=tmin,
      tmax=tmax,
      baseline=baseline,
      preload=True,
  )

  return test_epochs

## Prepare data

### 1. Fetch data

In [None]:
# Get data fro one subject. It might take a while
dataset = moabb.datasets.BI2015a()
data = dataset.get_data(subjects=[2])

In [None]:
data

Extract `MNE` `Raw` from the downloaded data

In [None]:
subject = 2
session = '0'
run = '0'

raw = data[subject][session][run]
raw

### 2. Simple Raw pre-processing

In [None]:
fig = raw.plot()
fig = raw.compute_psd().plot()
# 1. re-reference: to almost-mastoids
raw.set_eeg_reference(ref_channels=['T7', 'T8'])

# 2. band-pass filter
raw_filtered = raw.copy().filter(
    picks=['eeg'],
    l_freq=.1,
    h_freq=30.0,
    n_jobs=10,
    method='iir',
    iir_params=None
    )

# 3. Notch filter
power_freq = 50
nyquist_freq = raw_filtered.info['sfreq'] / 2

raw_filtered = raw_filtered.notch_filter(
    picks=['eeg', 'eog'],
    freqs=np.arange(power_freq, nyquist_freq, power_freq),
    n_jobs=10,
)

fig = raw_filtered.plot()
fig = raw_filtered.compute_psd().plot()

### 3. Create segments around stimuli

In [None]:
# fing events on the STIM channel
events = mne.find_events(raw_filtered)

# create events dict
event_ids = {'Target': 2, 'Non-Target': 1}

# create segments
tmin = -0.2
tmax = 0.6
baseline = (-0.2,0)
epochs = mne.Epochs(
    raw_filtered,
    events,
    event_id=event_ids,
    picks="eeg",
    tmin=tmin,
    tmax=tmax,
    baseline=baseline,
    preload=True
)

epochs

### 4. Look into EEG signal for target and non-target stimuli

In [None]:
# create ERPs
target_erp = epochs['Target'].average()
nontarget_erp = epochs['Non-Target'].average()

# compare target and non-target ERPs
picks = ['Cz']

fig = mne.viz.plot_compare_evokeds(
    evokeds = {'target': target_erp, 'non-target': nontarget_erp},
    picks=picks,
    invert_y=True
)

## LDA model on balanced data

### Prepare data

In [None]:
def balance_epochs_data(epochs):
  y = epochs.events[:, -1]

  # find class indices
  classes, counts = np.unique(y, return_counts=True)
  minority_class = classes[np.argmin(counts)]
  majority_class = classes[np.argmax(counts)]

  minority_indices = np.where(y == minority_class)[0]
  majority_indices = np.where(y == majority_class)[0]

  print(f"Majority class ({majority_class}) n_samples: {len(majority_indices)}")
  print(f"Minority class ({minority_class}) n_samples: {len(minority_indices)}")

  # Randomly choose majority samples to drop
  np.random.seed(42)
  majority_indices_to_drop = np.setdiff1d(
      majority_indices,
      np.random.choice(majority_indices, size=len(minority_indices), replace=False)
  )

  print(f"Dropping: {len(majority_indices_to_drop)} samples from {majority_class} class.")

  # Drop the selected epochs
  epochs_balanced = epochs.copy().drop(majority_indices_to_drop)
  return epochs_balanced

In [None]:
epochs_train_balanced = balance_epochs_data(epochs)

### LDA with MNE LinearModel

In [None]:
###### Creates data for fitting #################################
tmin = 0.25
tmax = 0.45
picks = ['eeg']

epochs_train_copy = epochs_train_balanced.copy()

X_train = epochs_train_copy.pick(picks=picks).crop(tmin=tmin, tmax=tmax)
y_train = epochs_train_copy.events[:, -1]

###### Fit the simplest classification model #####################
### But now use Pipelines, Standard Scaler and MNE LinearModel ###
clf = make_pipeline(
    Vectorizer(),  # vectorize across time and channels
    StandardScaler(),  # normalize features across trials
    LinearModel(  # fits a LDA regression
        LinearDiscriminantAnalysis(solver='lsqr')
    )
).fit(X_train, y_train)
y_predicted = clf.predict(X_train)


###### Print classification results #################################
print(f"\nTrain results:\n{classification_report(y_true=y_train, y_pred=y_predicted)}")

#####################################################################
###### Test the model on data from another session ##################
#####################################################################
epochs_test = get_test_data()
epochs_test_balanced = balance_epochs_data(epochs_test)

epochs_test_copy = epochs_test_balanced.copy()
X_test = epochs_test_copy.pick(picks=picks).crop(tmin=tmin, tmax=tmax)
y_test = epochs_test_copy.events[:, -1]

y_test_predicted = clf.predict(X_test)
print(f"\nTest results:\n{classification_report(y_true=y_test, y_pred=y_test_predicted)}")

### Plot spatial patterns and filters

In [None]:
# Extract and plot patterns and filters
for name in ("patterns_", "filters_"):
    # The `inverse_transform` parameter will call this method on any estimator
    # contained in the pipeline, in reverse order.
    coef = get_coef(clf, name, inverse_transform=True)
    evoked = mne.EvokedArray(coef, X_test.info, tmin=X_test.tmin)
    print(f"EEG {name[:-1]}")
    fig = evoked.plot_topomap()

## Play with models, regularizations, and patterns

Try to impelment the following models:
1. LDA with regularization (`shrinkage` parameter in `LinearDiscriminantAnalysis` class)
2. Logistic Regression with L2
3. LogisticRegression with L1


Note, that for Logistic Regression, the parameter that controls the strength of the penalty (`C`) is $1 / \lambda$

In [None]:
# your code here