## 1. Import & load libraries

In [14]:
%pip cache purge

%pip install mne


[0mFiles removed: 0 (0 bytes)
Note: you may need to restart the kernel to use updated packages.
Looking in indexes: https://pypi.python.org/simple
Note: you may need to restart the kernel to use updated packages.
Looking in indexes: https://pypi.python.org/simple
Note: you may need to restart the kernel to use updated packages.


In [15]:
import mne
from mne.decoding import CSP

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA, FastICA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC

# from mne.preprocessing import ICA
# from mne import Epochs, pick_types, events_from_annotations
# from mne.channels import make_standard_montage
# from mne.io import concatenate_raws, read_raw_edf
# from mne.datasets import eegbci
# from mne.decoding import CSP

# from sklearn.pipeline import Pipeline
# from sklearn.model_selection import cross_val_score, GridSearchCV
# from sklearn.linear_model import LogisticRegression
# from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
# from sklearn.svm import SVC
# from sklearn.preprocessing import StandardScaler
# from sklearn.decomposition import PCA
# from sklearn.base import BaseEstimator, TransformerMixin, ClassifierMixin
# from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
# from sklearn.exceptions import ConvergenceWarning

import numpy as np
import pandas as pd

import os
import warnings
import json
# import time

import pickle

# from tqdm import tqdm

# from IPython.display import display, HTML

from typing import List





## 2. Pipeline

In [16]:
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
import time
import json
import numpy as np

models_dir = '../models'

# 1. Create the pipeline with dimensionality reduction and classification
def create_processing_pipeline(n_components=0.95):
    pipeline = Pipeline([
        ('dimension_reduction', PCA(n_components=n_components)),
        ('classifier', SVC(kernel='rbf'))
    ])
    return pipeline

# 2. Load the preprocessed data
X_train = np.load(os.path.join(models_dir, 'X_preprocessed.npy'))
y_train = np.load(os.path.join(models_dir, 'y_labels.npy'))

# 3. Create and train the pipeline
pipeline = create_processing_pipeline()
pipeline.fit(X_train, y_train)

# 4. Evaluate using cross-validation
scores = cross_val_score(pipeline, X_train, y_train, cv=5)
print(f"Cross-validation scores: {scores}")
print(f"Mean CV score: {scores.mean():.3f} (+/- {scores.std() * 2:.3f})")

# 5. Save the trained pipeline
pipeline_info = {
    'pipeline_params': {
        'pca_n_components': float(pipeline.named_steps['dimension_reduction'].n_components_),
        'n_components_selected': int(pipeline.named_steps['dimension_reduction'].n_components_),
        'explained_variance_ratio': [float(x) for x in pipeline.named_steps['dimension_reduction'].explained_variance_ratio_],
        'classifier_params': {k: str(v) if isinstance(v, (np.int64, np.float64)) else v 
                            for k, v in pipeline.named_steps['classifier'].get_params().items()}
    }
}

# Save pipeline information to JSON
with open(os.path.join(models_dir, 'pipeline_info.json'), 'w') as f:
    json.dump(pipeline_info, f, indent=4)

# Save the complete pipeline using pickle
with open(os.path.join(models_dir, 'trained_pipeline.pkl'), 'wb') as f:
    pickle.dump(pipeline, f)

# 6. Simulate real-time prediction (playback)
def simulate_real_time_prediction(pipeline, X, y, chunk_size=10):
    predictions = []
    true_labels = []
    
    for i in range(0, len(X), chunk_size):
        # Get a chunk of data
        X_chunk = X[i:i + chunk_size]
        y_chunk = y[i:i + chunk_size]
        
        # Make predictions
        y_pred = pipeline.predict(X_chunk)
        
        predictions.extend(y_pred.tolist())  # Convert to list
        true_labels.extend(y_chunk.tolist())  # Convert to list
        
        # Calculate current accuracy
        current_acc = np.mean(np.array(predictions) == np.array(true_labels))
        print(f"Processed {i+len(X_chunk)}/{len(X)} samples. Current accuracy: {current_acc:.3f}")
        
        # Simulate real-time delay
        time.sleep(0.1)
    
    return predictions, true_labels

# Test the real-time simulation
print("\nTesting real-time prediction simulation:")
predictions, true_labels = simulate_real_time_prediction(pipeline, X_train[:100], y_train[:100])
final_accuracy = np.mean(np.array(predictions) == np.array(true_labels))
print(f"\nFinal accuracy on simulation: {final_accuracy:.3f}")


Cross-validation scores: [0.33333333 0.33333333 0.         0.33333333 0.33333333]
Mean CV score: 0.267 (+/- 0.267)

Testing real-time prediction simulation:
Processed 10/15 samples. Current accuracy: 1.000
Processed 15/15 samples. Current accuracy: 1.000

Final accuracy on simulation: 1.000


In [19]:
from mne.decoding import CSP
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.svm import SVC
import json

# Load preprocessing info to get the channel information
with open(os.path.join(models_dir, 'preprocessing_info.json'), 'r') as f:
    preprocess_info = json.load(f)

# Get dimensions
n_epochs = len(X_train)
n_channels = len(preprocess_info['channels'])
n_times = X_train.shape[1] // n_channels

print(f"Data dimensions:")
print(f"Number of epochs: {n_epochs}")
print(f"Number of channels: {n_channels}")
print(f"Time points: {n_times}")

# Reshape to (n_epochs, n_channels, n_times)
X_train_3d = X_train.reshape(n_epochs, n_channels, n_times)

def create_csp_pipeline(n_components=4):
    pipeline = Pipeline([
        ('csp', CSP(n_components=n_components, reg=None, log=True)),
        ('classifier', SVC(kernel='rbf'))
    ])
    return pipeline

def create_pca_pipeline(n_components=0.95):
    pipeline = Pipeline([
        ('dimension_reduction', PCA(n_components=n_components)),
        ('classifier', SVC(kernel='rbf'))
    ])
    return pipeline

# Create pipelines
pipelines = {
    'PCA': create_pca_pipeline(),
    'CSP': create_csp_pipeline()
}

# Evaluate each pipeline
for name, pipe in pipelines.items():
    if name == 'CSP':
        # Use 3D data for CSP
        scores = cross_val_score(pipe, X_train_3d, y_train, cv=5)
    else:
        # Use flattened data for PCA
        scores = cross_val_score(pipe, X_train, y_train, cv=5)
    
    print(f"\n{name} Pipeline:")
    print(f"Cross-validation scores: {scores}")
    print(f"Mean CV score: {scores.mean():.3f} (+/- {scores.std() * 2:.3f})")


Data dimensions:
Number of epochs: 15
Number of channels: 64
Time points: 801

PCA Pipeline:
Cross-validation scores: [0.33333333 0.33333333 0.         0.33333333 0.33333333]
Mean CV score: 0.267 (+/- 0.267)
Computing rank from data with rank=None
    Using tolerance 9.6 (2.2e-16 eps * 64 dim * 6.7e+14  max singular value)
    Estimated rank (data): 64
    data: rank 64 computed from 64 data channels with 0 projectors
Reducing data rank from 64 -> 64
Estimating class=2 covariance using EMPIRICAL
Done.
Estimating class=3 covariance using EMPIRICAL
Done.
Computing rank from data with rank=None
    Using tolerance 9.4 (2.2e-16 eps * 64 dim * 6.6e+14  max singular value)
    Estimated rank (data): 64
    data: rank 64 computed from 64 data channels with 0 projectors
Reducing data rank from 64 -> 64
Estimating class=2 covariance using EMPIRICAL
Done.
Estimating class=3 covariance using EMPIRICAL
Done.
Computing rank from data with rank=None
    Using tolerance 9.3 (2.2e-16 eps * 64 dim * 6.