<a href="https://colab.research.google.com/github/abelowska/mlNeuro/blob/main/MLN_first_ml_model_exercises.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# BCI classification model

We are going to use the open [EEG Motor Movement/Imagery Dataset](https://physionet.org/content/eegmmidb/1.0.0/S001/#files-panel) to classify **imagining the opening and closing of left or right fists**.

You can download the .zip file containing the already prepared `Epochs` of the first 10 participants here: . Each `Epochs` file consists of two types of events: *left* and *right*.

In [None]:
!pip install mne

Imports

In [None]:
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import mne
from mne.datasets import eegbci
from sklearn.model_selection import StratifiedKFold, cross_val_score, train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, classification_report
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.feature_selection import SelectPercentile

## Read data

In [None]:
data_dir = Path('./data')
epochs_subjects = []

for idx in np.arange(1,11):
  fname = data_dir / f'subj_{idx}-epo.fif'
  print(fname)
  epochs = mne.read_epochs(fname)
  epochs_subjects.append(epochs)

## Visualize data

In [None]:
all_epochs = mne.concatenate_epochs(epochs_subjects)

1. Joint plot per condition

In [None]:
fig = all_epochs['left'].average().plot_joint(times=[-1, 0.5, 0.8, 1.5, 2,3,4])
fig = all_epochs['right'].average().plot_joint(times=[-1, 0.5, 0.8, 1.5, 2,3,4])

2. Single-channel plots

In [None]:
picks = ['C3', 'Cz', 'C4']

evokeds = dict(
    left=list(all_epochs["left"].iter_evoked()),
    right=list(all_epochs["right"].iter_evoked()),
)

for idx, pick in enumerate(picks):
  plt.figure(idx)
  fig = mne.viz.plot_compare_evokeds(evokeds, picks=pick)
  plt.show()

3. Compare spectral representation (PSD) of conditions:

In [None]:
fmin = 1
fmax = 30

spectrum_left = all_epochs["left"].compute_psd(fmin=fmin, fmax=fmax)
spectrum_right = all_epochs["right"].compute_psd(fmin=fmin, fmax=fmax)

In [None]:
fig = spectrum_left.plot(picks="eeg", exclude="bads")
fig = spectrum_right.plot(picks="eeg", exclude="bads")

In [None]:
bands = {
    'Delta (0-4 Hz)': (0, 4),
    'Theta (4-8 Hz)': (4, 8),
    'Alpha (8-12 Hz)': (8, 12),
    'Beta (12-30 Hz)': (12, 30)
}

fig = spectrum_left.plot_topomap(bands=bands, normalize=True)
fig = spectrum_right.plot_topomap(bands=bands, normalize=True)

## Classification

In [None]:
def estimate_model(
    X_train,
    X_test,
    y_train,
    y_test,
    model=SVC()
):
  # fit
  model.fit(X_train, y_train)

  # predict test and train data
  y_test_predicted = model.predict(X_test)
  y_train_predicted = model.predict(X_train)

  print(f'Classification report for testing data:\n{classification_report(y_test, y_test_predicted)}')
  print(f'Classification report for training data:\n{classification_report(y_train, y_train_predicted)}')

  return model

### 1. Knowledge-based interpretable features


#### 1.1 Time domain features

- just pipe all data to the model

In [None]:
# create X and y datasets
epochs = all_epochs.copy()

y = epochs.events[:, -1] - 1
X_data = epochs.get_data(copy=True)

# reshape X to (n_samples, n_features) shape
X = X_data.reshape(len(epochs), -1)

print(f"Shape of y set (labels): {y.shape}\nShape of X set (features): {X.shape}")

In [None]:
# fit and predict
X_train = X[:-135]
X_test = X[-135:]
y_train = y[:-135]
y_test = y[-135:]

_ = estimate_model(
    X_train,
    X_test,
    y_train,
    y_test,
)

- extract the mean amplitude in 0 - 1 s time-window from the C3 channel. This approach is knowledge-based, as it relies on the knowledge gained after visualizing the evoked potentials.

In [None]:
# create X and y datasets
epochs = all_epochs.copy()

y = epochs.events[:, -1] - 1

# extract data with get_data() and calculate mean along correct axis
X = #

# reshape X to (n_samples, n_features) shape
X = X.reshape(len(epochs), -1)

print(f"Shape of y set (labels): {y.shape}\nShape of X set (features): {X.shape}")

In [None]:
# fit and predict
X_train = X[:-135]
X_test = X[-135:]
y_train = y[:-135]
y_test = y[-135:]

_ = estimate_model(
    X_train,
    X_test,
    y_train,
    y_test,
)

- extract mean amplitude in 0 - 1 time window on FC3, FCz, FC4, C3, Cz, and C4 channels

In [None]:
# create X and y datasets
epochs = all_epochs.copy()

y = epochs.events[:, -1] - 1

picks = ['FC3', 'FCz', 'FC4','C3', 'Cz', 'C4']
X = # your code here

# reshape X to (n_samples, n_features) shape
X = X.reshape(len(epochs), -1)

print(f"Shape of y set (labels): {y.shape}\nShape of X set (features): {X.shape}")

In [None]:
# fit and predict
X_train = X[:-135]
X_test = X[-135:]
y_train = y[:-135]
y_test = y[-135:]

_ = estimate_model(
    X_train,
    X_test,
    y_train,
    y_test,
)

#### 1.2 Frequency domain features

- just pipe all data to the model

In [None]:
# create X and y datasets
epochs = all_epochs.copy()

y = epochs.events[:, -1] - 1
X_data = epochs.compute_psd().get_data()

# reshape X to (n_samples, n_features) shape
X = X_data.reshape(len(epochs), -1)

print(f"Shape of y set (labels): {y.shape}\nShape of X set (features): {X.shape}")

In [None]:
# fit and predict
X_train = X[:-135]
X_test = X[-135:]
y_train = y[:-135]
y_test = y[-135:]

_ = estimate_model(
    X_train,
    X_test,
    y_train,
    y_test,
)

- extract the mean power of alpha from the C3 channel **from time window of 0 - 1 s**. Mind that you have to crop your data and only then run `compute_pds()` method.

In [None]:
# create X and y datasets
epochs = all_epochs.copy()

y = epochs.events[:, -1] - 1

# crop data from 0 to 1 s
cropped_epochs = # your code here

# perform FFT decomposition
spectrum_epochs = # your code here

# extract features: average frequencies from alpha band
X = # your code here

# reshape X to (n_samples, n_features) shape
X = X.reshape(len(epochs), -1)

print(f"Shape of y set (labels): {y.shape}\nShape of X set (features): {X.shape}")

In [None]:
# fit and predict
X_train = X[:-135]
X_test = X[-135:]
y_train = y[:-135]
y_test = y[-135:]

_ = estimate_model(
    X_train,
    X_test,
    y_train,
    y_test,
)

- extract the mean power of alpha from the FC3, FCz, FC4, C3, Cz, and C4 channels from time window of 0 - 1 s.

In [None]:
# create X and y datasets
epochs = all_epochs.copy()

y = epochs.events[:, -1] - 1

# create X and y datasets
epochs = all_epochs.copy()

y = epochs.events[:, -1] - 1

picks = ['FC3', 'FCz', 'FC4','C3', 'Cz', 'C4']

# your code here
# X =

# reshape X to (n_samples, n_features) shape
X = X.reshape(len(epochs), -1)

print(f"Shape of y set (labels): {y.shape}\nShape of X set (features): {X.shape}")

In [None]:
# fit and predict
X_train = X[:-135]
X_test = X[-135:]
y_train = y[:-135]
y_test = y[-135:]

_ = estimate_model(
    X_train,
    X_test,
    y_train,
    y_test,
)

#### 1.3 Combined time and frequency domain features

- extract the mean power of alpha from the FC3, FCz, FC4, C3, Cz, and C4 channels from time window of 0 - 1 s.
- extract the mean amplitude in time window 0 - 1 s from the FC3, FCz, FC4, C3, Cz, and C4 channels.

In [None]:
picks = ['FC3', 'FCz', 'FC4','C3', 'Cz', 'C4']

# create X and y datasets
epochs = all_epochs.copy()

y = epochs.events[:, -1] - 1

# create X and y datasets
epochs = all_epochs.copy()

y = epochs.events[:, -1] - 1

# specral features
# your code here
# X_spectrum =

# time-domain features
# your code here
# X_time =

# concatenate features and reshape X to (n_samples, n_features) shape
# use np.concatenate()
# X =

print(f"Shape of y set (labels): {y.shape}\nShape of X set (features): {X.shape}")

In [None]:
# fit and predict
X_train = X[:-135]
X_test = X[-135:]
y_train = y[:-135]
y_test = y[-135:]

_ = estimate_model(
    X_train,
    X_test,
    y_train,
    y_test,
)

#### 1.4 Combined time and frequency domain features and feature selection

- extract the mean power of alpha from the FC3, FCz, FC4, C3, Cz, and C4 channels from time window of 0 - 1 s.
- extract the mean amplitude in time window 0 - 1 s from the FC3, FCz, FC4, C3, Cz, and C4 channels.
- select K=1 best features

In [None]:
picks = ['FC3', 'FCz', 'FC4','C3', 'Cz', 'C4']

# create X and y datasets
epochs = all_epochs.copy()

y = epochs.events[:, -1] - 1

# create X and y datasets
epochs = all_epochs.copy()

y = epochs.events[:, -1] - 1

# specral features
# your code here
# X_spectrum =

# time-domain features
# your code here
# X_time =

# concatenate features and reshape X to (n_samples, n_features) shape
# use np.concatenate()
# X =

print(f"Shape of y set (labels): {y.shape}\nShape of X set (features): {X.shape}")

In [None]:
# fit and predict
X_train = X[:-135]
X_test = X[-135:]
y_train = y[:-135]
y_test = y[-135:]

# scale the data and select one, most important feature
model = make_pipeline(
    StandardScaler(),
    SelectKBest(f_classif, k=1),
    SVC()
)

model = estimate_model(
    X_train,
    X_test,
    y_train,
    y_test,
    model=model
)

Check what feature was selected

In [None]:
# extract feature selection step from the pipeline
f_selection_step = model[-2]

# extract the number of the feature that was selected
print(f_selection_step.get_feature_names_out())

---
- extract the mean power of alpha from the FC3, FCz, FC4, C3, Cz, and C4 channels from time window of 0 - 1 s.
- extract the mean amplitude in time window 0 - 1 s from the FC3, FCz, FC4, C3, Cz, and C4 channels.
- select 10 percentile of best features. Use [`SelectPercentile`](https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectPercentile.html).

In [None]:
picks = ['FC3', 'FCz', 'FC4','C3', 'Cz', 'C4']

# create X and y datasets
epochs = all_epochs.copy()

y = epochs.events[:, -1] - 1

# create X and y datasets
epochs = all_epochs.copy()

y = epochs.events[:, -1] - 1

# specral features
# your code here
# X_spectrum =

# time-domain features
# your code here
# X_time =

# concatenate features and reshape X to (n_samples, n_features) shape
# use np.concatenate()
# X =

print(f"Shape of y set (labels): {y.shape}\nShape of X set (features): {X.shape}")

In [None]:
# fit and predict
X_train = X[:-135]
X_test = X[-135:]
y_train = y[:-135]
y_test = y[-135:]

# scale the data and select 10 percentile of most important feature
# model = make_pipeline(
  # TODO
#)

model = estimate_model(
    X_train,
    X_test,
    y_train,
    y_test,
    model=model
)

Check what feature was selected

In [None]:
# extract feature selection step from the pipeline
f_selection_step = model[-2]

# extract the number of the feature that was selected
print(f_selection_step.get_feature_names_out())

---
- extract the mean power of alpha from the FC3, FCz, FC4, C3, Cz, and C4 channels from time window of 0 - 1 s.
- extract the mean amplitude in time window 0 - 1 s from the FC3, FCz, FC4, C3, Cz, and C4 channels.
- use wrapper feature selection method: [`SequentialFeatureSelector`](https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SequentialFeatureSelector.html). Keep in mind that `SequentialFeatureSelector` is parametrized with the estimator.

In [None]:
picks = ['FC3', 'FCz', 'FC4','C3', 'Cz', 'C4']

# create X and y datasets
epochs = all_epochs.copy()

y = epochs.events[:, -1] - 1

# create X and y datasets
epochs = all_epochs.copy()

y = epochs.events[:, -1] - 1

# specral features
# your code here
# X_spectrum =

# time-domain features
# your code here
# X_time =

# concatenate features and reshape X to (n_samples, n_features) shape
# use np.concatenate()
# X =

print(f"Shape of y set (labels): {y.shape}\nShape of X set (features): {X.shape}")

In [None]:
# fit and predict
X_train = X[:-135]
X_test = X[-135:]
y_train = y[:-135]
y_test = y[-135:]

# scale the data and perform sequential feature selection
# model = make_pipeline(
#    TODO
# )

model = estimate_model(
    X_train,
    X_test,
    y_train,
    y_test,
    model=model
)

Check what feature was selected

In [None]:
# extract feature selection step from the pipeline
f_selection_step = model[-2]

# extract the number of the feature that was selected
print(f_selection_step.get_feature_names_out())

### Signal-based features

#### 2.1 Extract signal on FC3 channel and calculate mean, max, min and std of the signal

In [None]:
# create X and y datasets
epochs = all_epochs.copy()

y = epochs.events[:, -1] - 1

picks = ['FC3']
X_data = epochs.get_data(tmin=0, tmax=1, picks=picks, copy=True)

# extract features
X_mean = np.mean(X_data, axis=-1)
X_max = # TODO
X_min = # TODO
X_std = # TODO

# concatenate features and reshape X to (n_samples, n_features) shape
X = np.concatenate((X_mean, X_max, X_min, X_std), axis=1).reshape(len(epochs), -1)

print(f"Shape of y set (labels): {y.shape}\nShape of X set (features): {X.shape}")

In [None]:
# fit and predict
X_train = X[:-135]
X_test = X[-135:]
y_train = y[:-135]
y_test = y[-135:]

_ = estimate_model(
    X_train,
    X_test,
    y_train,
    y_test,
)

#### 2.2 Extract signals from the FC3, FC4, C3, and C4 channels. Calculate the mean, maximum, minimum, and standard deviation of the signals. Finally, perform feature selection using a method of your choice.

In [None]:
# create X and y datasets
epochs = all_epochs.copy()

y = epochs.events[:, -1] - 1

# extract features
# picks =
# X =

print(f"Shape of y set (labels): {y.shape}\nShape of X set (features): {X.shape}")

In [None]:
# fit and predict
X_train = X[:-135]
X_test = X[-135:]
y_train = y[:-135]
y_test = y[-135:]

# model = make_pipeline(
#    TODO
# )

_ = estimate_model(
    X_train,
    X_test,
    y_train,
    y_test,
    model = model
)

#### 2.3 Extract signal on C3 channel and calculate [`catch22`](https://time-series-features.gitbook.io/catch22-features) features

In [None]:
!pip install pycatch22

In [None]:
import pycatch22

In [None]:
# create y dataset
epochs = all_epochs.copy()
y = epochs.events[:, -1] - 1

Create X dataset: **for each trial/epoch calculate catch22 features.**

Note, that `pycatch22.catch22_all()` returns dict with two keys: `names` and `values`. Names are names of the features, and values are values of features. E.g.:

```
X = [1,2,3,4,5,6,7]
features = pycatch22.catch22_all(X)['values']
```

---





In [None]:
# create X dataset: for each trial/epoch calculate catch22 features

# extract data
picks = ['C3']
X_data = epochs.get_data(tmin=0, tmax=1, picks=picks, copy=True)

# extract features
# X =

# concatenate features and reshape X to (n_samples, n_features) shape
X = X.reshape(len(epochs), -1)

print(f"Shape of y set (labels): {y.shape}\nShape of X set (features): {X.shape}")

In [None]:
# fit and predict
X_train = X[:-135]
X_test = X[-135:]
y_train = y[:-135]
y_test = y[-135:]

_ = estimate_model(
    X_train,
    X_test,
    y_train,
    y_test,
)

#### 2.4 Extract signal on C3 channel and calculate [`catch22`](https://time-series-features.gitbook.io/catch22-features) features and perform feature selection

In [None]:
# create y dataset
epochs = all_epochs.copy()
y = epochs.events[:, -1] - 1

In [None]:
# create X dataset: for each trial/epoch calculate catch22 features

# extract data
picks = ['C3']
X_data = epochs.get_data(tmin=0, tmax=1, picks=picks, copy=True)

# extract features
# X =

# concatenate features and reshape X to (n_samples, n_features) shape
X = X.reshape(len(epochs), -1)

print(f"Shape of y set (labels): {y.shape}\nShape of X set (features): {X.shape}")

In [None]:
# fit and predict
X_train = X[:-135]
X_test = X[-135:]
y_train = y[:-135]
y_test = y[-135:]

# model = make_pipeline(
#   TODO
# )

_ = estimate_model(
    X_train,
    X_test,
    y_train,
    y_test,
    model=model
)

Check what feature(s) were selected

In [None]:
# extract feature selection step from the pipeline
f_selection_step = model[-2]

# extract the number of the feature that was selected
print(f_selection_step.get_feature_names_out())

#### 2.5 Extract signal on C3 channel and calculate features from [`tsfresh`](https://tsfresh.readthedocs.io/en/latest/text/quick_start.html).

**This example is solved.You can now explore tsfresh features on your own.**

In [None]:
!pip install tsfresh

In [None]:
from tsfresh import extract_features
from tsfresh.utilities.dataframe_functions import impute

In [None]:
# create y dataset
epochs = all_epochs.copy()
y = epochs.events[:, -1] - 1

In [None]:
# extract data
picks = ['C3']
epochs_cropped = epochs.copy().crop(tmin=0, tmax=1).pick(picks)

# extract data to df. This format is requires by tsfresh, see documentation
epochs_cropped_df = epochs_cropped.to_data_frame().drop(columns='condition')

# extract features
X_df = extract_features(
    epochs_cropped_df,
    column_id='epoch',
    column_sort='time',
    impute_function=impute
    )

# reshape X to (n_samples, n_features) shape
X = X_df.to_numpy().reshape(len(epochs), -1)

print(f"Shape of y set (labels): {y.shape}\nShape of X set (features): {X.shape}")

In [None]:
# fit and predict
X_train = X[:-135]
X_test = X[-135:]
y_train = y[:-135]
y_test = y[-135:]

# scale the data and select one, most important feature
model = make_pipeline(
    StandardScaler(),
    SelectKBest(f_classif, k=1),
    SVC()
)

model = estimate_model(
    X_train,
    X_test,
    y_train,
    y_test,
    model=model
)

Check what feature was selected

In [None]:
# extract feature selection step from the pipeline
f_selection_step = model[-2]

# extract the number of the feature that was selected
print(f_selection_step.get_feature_names_out())