In [1]:
%matplotlib qt

In [2]:
import os, pyxdf, json, yaml
import mne
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# from mne.time_frequency import psd_welch, tfr_morlet, tfr_multitaper
# # from multitaper_spectrogram_python import multitaper_spectrogram
# from mne.decoding import Scaler, Vectorizer

# from sklearn.pipeline import make_pipeline
# from sklearn.experimental import enable_halving_search_cv
# from sklearn.model_selection import RepeatedStratifiedKFold, HalvingGridSearchCV

# from sklearn.neighbors import KNeighborsClassifier
# from sklearn.tree import DecisionTreeClassifier
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
# from sklearn.svm import SVC
# from sklearn.linear_model import SGDClassifier

### Settings

In [3]:
config_file = 'config_MI-hands.yaml'

with open(config_file) as f:
    config = yaml.load(f.read(), Loader=yaml.Loader)
    print(config)
    locals().update(config)

lslDir = os.path.join(os.path.expanduser('~'), 'Documents\CurrentStudy')

{'subject': 'P00J', 'session': 2, 'task': 'MI-push', 'eeg_stream_type': 'EXG', 'markers_stream_type': 'Marker', 'default_ch_names': ['Fz', 'FC1', 'FC2', 'C3', 'Cz', 'C4', 'CP1', 'CP2', 'P3', 'Pz', 'P4', 'PO3', 'PO4', 'O1', 'Oz', 'O2'], 'montage_file': 'openbci_montage.elc', 'event_dict': {'rest': 0, 'MI/push': 1}, 'rest_duration': 8, 'task_duration': 8, 'tmin': 0.0, 'tmax': 8.0, 'window_size': 1.0, 'window_overlap': 0.0, 'plotGraphs': True, 'scalings': {'eeg': 0.0002}, 'plot_duration': 10, 'bp_l_freq': 1.0, 'bp_h_freq': 40.0, 'performICA': False, 'features': 'psd', 'tfr_type': 'morlet', 'baseline': (0.0, 0.1), 'vmin': -1.0, 'vmax': 1.0}


### Find LSL Files

In [4]:
# Find files
xdf_files = []
hasSubject = subject!=''
hasSession = session!=''
hasTask = task!=''
for root, dir, files in os.walk(lslDir):
    for file in files:
        validFile = True
        if hasSubject:
            validFile = validFile and (('sub-'+subject) in file)
        if hasSession:
            validFile = validFile and (('ses-S' + str(session).zfill(3)) in file)
        if hasTask:
            validFile = validFile and (('task-' + task) in file)
        validFile = validFile and file.endswith('.xdf')
        if validFile:
            print(file)
            matchingFile = os.path.join(root, file)
            xdf_files.append(matchingFile)

if len(xdf_files) == 0:
    print('No files found')

sub-P00J_ses-S002_task-MI-push_run-001_eeg.xdf
sub-P00J_ses-S002_task-MI-push_run-002_eeg.xdf
sub-P00J_ses-S002_task-MI-push_run-005_eeg.xdf
sub-P00J_ses-S002_task-MI-push_run-006_eeg.xdf
sub-P00J_ses-S002_task-MI-push_run-007_eeg.xdf


In [5]:
# Parse streams
eeg_stream, marker_stream = [], []

print('Parsing streams')
for xdf_file in xdf_files:
    streams, header = pyxdf.load_xdf(xdf_file)
    for i in range(len(streams)):
        if streams[i]['info']['type'][0] == eeg_stream_type:
            print("Found %s stream in %s" % (eeg_stream_type, os.path.basename(xdf_file)))
            eeg_stream.append(streams[i])
        elif streams[i]['info']['type'][0] == markers_stream_type:
            print("Found %s stream in %s" % (markers_stream_type, os.path.basename(xdf_file)))
            marker_stream.append(streams[i])
del streams, header

Parsing streams
Found EXG stream in sub-P00J_ses-S002_task-MI-push_run-001_eeg.xdf
Found Marker stream in sub-P00J_ses-S002_task-MI-push_run-001_eeg.xdf
Found Marker stream in sub-P00J_ses-S002_task-MI-push_run-002_eeg.xdf
Found EXG stream in sub-P00J_ses-S002_task-MI-push_run-002_eeg.xdf
Found Marker stream in sub-P00J_ses-S002_task-MI-push_run-005_eeg.xdf
Found EXG stream in sub-P00J_ses-S002_task-MI-push_run-005_eeg.xdf
Found Marker stream in sub-P00J_ses-S002_task-MI-push_run-006_eeg.xdf
Found EXG stream in sub-P00J_ses-S002_task-MI-push_run-006_eeg.xdf
Found EXG stream in sub-P00J_ses-S002_task-MI-push_run-007_eeg.xdf
Found Marker stream in sub-P00J_ses-S002_task-MI-push_run-007_eeg.xdf


### Extract EEG and Marker data

In [6]:
# Extract EEG Info
print("Extracting EEG info...")

ch_names = []
if eeg_stream[0]['info']['desc'][0]:
    print("EEG channel names found")
    for i in range(len(eeg_stream[0]['info']['desc'][0]['channels'][0]['channel'])):
        ch_names.append(eeg_stream[0]['info']['desc'][0]['channels'][0]['channel'][i]['label'][0])
else:
    ch_names = default_ch_names
print('Channels: ', ch_names)

sfreq = float(eeg_stream[0]['info']['nominal_srate'][0])
print('Sampling frequency: ', sfreq)

# Create MNE info object
eeg_info = mne.create_info(ch_names, sfreq, ch_types='eeg')

Extracting EEG info...
Channels:  ['Fz', 'FC1', 'FC2', 'C3', 'Cz', 'C4', 'CP1', 'CP2', 'P3', 'Pz', 'P4', 'PO3', 'PO4', 'O1', 'Oz', 'O2']
Sampling frequency:  125.0


In [7]:
# Setup Montage
montage = mne.channels.read_custom_montage(montage_file)
# montage.plot()

In [8]:
# Get all EEG data
eeg_raw_list = []

for n in range(len(eeg_stream)):
    # Create MNE Raw object
    eeg_data = np.transpose(eeg_stream[n]['time_series'])
    eeg_data = eeg_data / 1e6
    print(eeg_data.shape)
    eeg_raw = mne.io.RawArray(eeg_data, eeg_info)
    
    # Set montage
    eeg_raw = eeg_raw.set_montage(montage)

    # Add annotations
    onset, duration, description = [], [], []
    current_target = -1
    current_flash = -1
    for i in range(len(marker_stream[n]['time_series'])):
        if 'MI' in task:
            if ('rest' in marker_stream[n]['time_series'][i][0]) and ('cue' not in marker_stream[n]['time_series'][i][0]):
                window_onset = tmin
                window_start = marker_stream[n]['time_stamps'][i] - eeg_stream[n]['time_stamps'][0]
                while (window_onset + window_size <= tmax):
                    onset.append(window_start + window_onset)
                    duration.append(window_size)
                    description.append(marker_stream[n]['time_series'][i][0])
                    window_onset = window_onset + window_size - window_overlap
            elif ('task' in marker_stream[n]['time_series'][i][0]) and ('cue' not in marker_stream[n]['time_series'][i][0]):
                window_onset = tmin
                window_start = marker_stream[n]['time_stamps'][i] - eeg_stream[n]['time_stamps'][0]
                while (window_onset + window_size <= tmax):
                    onset.append(window_start + window_onset)
                    duration.append(window_size)
                    description.append(marker_stream[n]['time_series'][i][0].replace('task_', '').replace('-','/'))
                    window_onset = window_onset + window_size - window_overlap
        elif 'P300' in task:
            if('target' in marker_stream[n]['time_series'][i][0]):
                current_target = json.loads(marker_stream[n]['time_series'][i][0])['target']
            elif('flash' in marker_stream[n]['time_series'][i][0]):
                current_flash = json.loads(marker_stream[n]['time_series'][i][0])['flash']
                onset.append(marker_stream[n]['time_stamps'][i] - eeg_stream[n]['time_stamps'][0])
                duration.append(task_duration)
                description.append("target" if current_flash == current_target else "nontarget")
    annotations = mne.Annotations(onset, duration, description)
    eeg_raw = eeg_raw.set_annotations(annotations)
    
    # Create list of raw objects
    eeg_raw_list.append(eeg_raw)

(16, 31672)
Creating RawArray with float64 data, n_channels=16, n_times=31672
    Range : 0 ... 31671 =      0.000 ...   253.368 secs
Ready.
(16, 31922)
Creating RawArray with float64 data, n_channels=16, n_times=31922
    Range : 0 ... 31921 =      0.000 ...   255.368 secs
Ready.
(16, 31959)
Creating RawArray with float64 data, n_channels=16, n_times=31959
    Range : 0 ... 31958 =      0.000 ...   255.664 secs
Ready.
(16, 31316)
Creating RawArray with float64 data, n_channels=16, n_times=31316
    Range : 0 ... 31315 =      0.000 ...   250.520 secs
Ready.
(16, 31057)
Creating RawArray with float64 data, n_channels=16, n_times=31057
    Range : 0 ... 31056 =      0.000 ...   248.448 secs
Ready.


In [9]:
# Concatenate raw objects
raw = mne.concatenate_raws(eeg_raw_list)
raw

0,1
Measurement date,Unknown
Experimenter,Unknown
Digitized points,19 points
Good channels,16 EEG
Bad channels,
EOG channels,Not available
ECG channels,Not available
Sampling frequency,125.00 Hz
Highpass,0.00 Hz
Lowpass,62.50 Hz


### Pre-processing

In [10]:
# Common average reference
raw_orig = raw.copy()
raw = raw.set_eeg_reference('average', projection=True)
mne.write_proj('average-ref-proj.fif', raw.info['projs'])

if plotGraphs:
    fig = raw_orig.plot(title='Before Re-referencing', n_channels=16, scalings=scalings)
    fig = raw.plot(title='After Re-referencing', n_channels=16, scalings=scalings, proj=True)

Adding average EEG reference projection.
1 projection items deactivated
Average reference projection was added, but has not been applied yet. Use the apply_proj method to apply it.
Using matplotlib as 2D backend.


In [11]:
# Bandpass filter data
raw_orig = raw.copy()
raw = raw.filter(l_freq=bp_l_freq, h_freq=bp_h_freq)

if plotGraphs:
    fig = raw_orig.plot(title='Before Filtering', scalings=scalings, duration=plot_duration)
    fig = raw.plot(title='After Filtering', scalings=scalings, duration=plot_duration)

Filtering raw data in 5 contiguous segments
Setting up band-pass filter from 1 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 413 samples (3.304 sec)



### Epoch data

In [12]:
# Epoch data
events, event_id = mne.events_from_annotations(raw, event_id=event_dict)
epochs = mne.Epochs(raw, events, event_id=event_id, tmin=0., tmax=window_size, baseline=None, picks='eeg', preload=True)
print(epochs)

Used Annotations descriptions: ['MI/push', 'rest']
Not setting metadata
Not setting metadata
800 matching events found
No baseline correction applied
Created an SSP operator (subspace dimension = 1)
1 projection items activated
Loading data for 800 events and 126 original time points ...
0 bad epochs dropped
<Epochs |  800 events (all good), 0 - 1 sec, baseline off, ~12.3 MB, data loaded,
 'MI/push': 400
 'rest': 400>


### Features

In [13]:
# Labels
y = epochs.events[:,-1] - min(epochs.events[:,-1])

### CSP

In [16]:
from sklearn.pipeline import Pipeline
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import ShuffleSplit, cross_val_score
from mne.decoding import CSP

import warnings
warnings.filterwarnings("ignore")

labels = y

# Define a monte-carlo cross-validation generator (reduce variance):
scores = []
epochs_data = epochs.get_data()
# epochs_data_train = epochs_train.get_data()
cv = ShuffleSplit(10, test_size=0.2, random_state=42)
cv_split = cv.split(epochs)

score_dict = {}

# reg = np.linspace(0.1, 1, 10)
reg = np.array([0.1, 0.2, 0.3])

fold = 1
for train_idx, test_idx in cv_split:
    print('CV fold: ', fold)
    fold += 1
    
    for i in range(len(reg)):
        print('reg: ', reg[i])
        csp = CSP(n_components=4, reg=reg[i], log=True, norm_trace=False)
        lda = LinearDiscriminantAnalysis()

        y_train, y_test = labels[train_idx], labels[test_idx]
        X_train = csp.fit_transform(epochs_data[train_idx], y_train)
        X_test = csp.transform(epochs_data[test_idx])

        lda.fit(X_train, y_train)
        scores = lda.score(X_test, y_test)

        if str(reg[i]) not in score_dict.keys():
            score_dict[str(reg[i])] = []
        score_dict[str(reg[i])].append(scores)

CV fold:  1
reg:  0.1
Computing rank from data with rank=None
    Using tolerance 4e-05 (2.2e-16 eps * 16 dim * 1.1e+10  max singular value)
    Estimated rank (mag): 15
    MAG: rank 15 computed from 16 data channels with 0 projectors
    Setting small MAG eigenvalues to zero (without PCA)
Reducing data rank from 16 -> 15
Estimating covariance using SHRINKAGE
Done.
Computing rank from data with rank=None
    Using tolerance 4.4e-05 (2.2e-16 eps * 16 dim * 1.2e+10  max singular value)
    Estimated rank (mag): 15
    MAG: rank 15 computed from 16 data channels with 0 projectors
    Setting small MAG eigenvalues to zero (without PCA)
Reducing data rank from 16 -> 15
Estimating covariance using SHRINKAGE
Done.
reg:  0.2
Computing rank from data with rank=None
    Using tolerance 4e-05 (2.2e-16 eps * 16 dim * 1.1e+10  max singular value)
    Estimated rank (mag): 15
    MAG: rank 15 computed from 16 data channels with 0 projectors
    Setting small MAG eigenvalues to zero (without PCA)
Re

LinAlgError: The leading minor of order 16 of B is not positive definite. The factorization of B could not be completed and no eigenvalues or eigenvectors were computed.

In [None]:
for i in range(len(reg)):
    print(reg[i], ': ', np.mean(score_dict[str(reg[i])]))

In [None]:
score_dict

In [None]:
# X = epochs.get_data()
# y = epochs.events[:,-1] - min(epochs.events[:,-1])

# csp = CSP(n_components=4, reg=0.8, log=True, norm_trace=False)
# lda = LinearDiscriminantAnalysis()

# model = Pipeline([('CSP', csp), ('LDA', lda)])
# model.fit(X, y)

# print(model.score(X, y))

# # Save model
# import pickle
# fname = 'model.sav'
# pickle.dump(model, open(fname, 'wb'))