In [1]:
import h5py
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from torch.utils.data import DataLoader
from utils import read_datasets, MEGDataset,fit_transform_scaler, temporal_downsampling
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV


In [3]:
data_directory = "../data"
data = read_datasets(data_directory)

In [4]:
def downsample_scale_data(X,scaler, sample_rate):
    """
    Downsampling MEG data from the time dimension
    """
    if isinstance(X,np.ndarray):
        pass
    else:
        X = np.array(X)
    downsampled = []
    original_size = X.shape[2]
    n_sampledpoints = original_size // sample_rate
    for instance in X:
        downsampled_instance = np.mean(instance[:, :n_sampledpoints * sample_rate].reshape(instance.shape[0], n_sampledpoints, sample_rate), axis=2)
        scaled_downsampled_instance = scaler.fit_transform(downsampled_instance)
        downsampled.append(scaled_downsampled_instance)
    return np.array(downsampled)

def shuffle_flatten_data(X,y):
    """
    flatten the data and shuffle the data and the labels.
    """
    n_instances, n_channels, n_timepoints = X.shape
    flattened_X = X.reshape(n_instances, n_channels * n_timepoints)
    rng = np.random.default_rng()
    indices = np.arange(n_instances)
    rng.shuffle(indices)
    shuffled_X = flattened_X[indices]
    shuffled_y = y[indices]
    return shuffled_X, shuffled_y

def preprocess_data(X,y,sample_rate,scaler):
    """
    Preprocess MEG data by downsampling, scaling, encoding labels, flattening and shuffling.
    """
    scaled_X = downsample_scale_data(X,sample_rate=sample_rate,scaler=scaler)
    label_encoder = LabelEncoder()
    encoded_y = label_encoder.fit_transform(y)
    shuffled_X, shuffled_y = shuffle_flatten_data(scaled_X, encoded_y)    
    return shuffled_X, shuffled_y

In [5]:
scaler = StandardScaler()

intra_X_train, intra_y_train = preprocess_data(X = data['intra']['X_train'], 
                                               y = data['intra']['y_train'],
                                               sample_rate=8,scaler=scaler)
intra_X_test, intra_y_test = preprocess_data(X= data['intra']['X_test'],
                                             y= data['intra']['y_test'],
                                             sample_rate=8,scaler=scaler)

cross_X_train, cross_y_train = preprocess_data(X = data['cross']['X_train'], 
                                               y = data['cross']['y_train'],
                                               sample_rate=8,scaler=scaler)
cross_X_test, cross_y_test = preprocess_data(X= data['cross']['X_test1']+data['cross']['X_test2']+data['cross']['X_test3'],
                                             y= data['cross']['y_test1']+data['cross']['y_test2']+data['cross']['y_test3'],
                                             sample_rate=8,scaler=scaler)

In [6]:
# Intra
param_grid_svc = {
    'C': [0.0001, 0.01, 0.1],
    'gamma': [100,10, 1],
    'kernel': ['rbf', 'poly', 'sigmoid']
}

svc = SVC()
grid_search_svc = GridSearchCV(estimator=svc, 
                               param_grid=param_grid_svc, 
                               cv=2,
                               n_jobs=-1) 
grid_search_svc.fit(intra_X_train, intra_y_train)

In [7]:
print("Best params for intra subject task:", grid_search_svc.best_params_)
print("Best score for intra subject task:", grid_search_svc.best_score_)
print("Accuracy on intra subject predictions:", grid_search_svc.score(intra_X_test,intra_y_test))

Best params for intra subject task: {'C': 0.0001, 'gamma': 100, 'kernel': 'poly'}
Best score for intra subject task: 1.0
Accuracy on intra subject predictions: 1.0


In [8]:
# Cross

param_grid_svc = {
    'C': [0.0001, 0.01, 0.1],
    'gamma': [100,10, 1],
    'kernel': ['rbf', 'poly', 'sigmoid']
}

svc = SVC()
cross_svc = GridSearchCV(estimator=svc, 
                               param_grid=param_grid_svc, 
                               cv=2,
                               n_jobs=-1) 
cross_svc.fit(cross_X_train, cross_y_train)

In [11]:
print("Best params for cross subject task:", cross_svc.best_params_)
print("Best score for cross subject task:", cross_svc.best_score_)

print("Averaged Accuracy on cross predictions:", cross_svc.score(cross_X_test,cross_y_test))

Best params for cross subject task: {'C': 0.0001, 'gamma': 100, 'kernel': 'poly'}
Best score for cross subject task: 0.984375
Averaged Accuracy on cross predictions: 0.625


In [29]:

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

encoder_dict = {
        'rest': 0,
        'task_motor': 1,
        'task_story_math': 2,
        'task_working_memory': 3,
    }

for i in ['1', '2', '3']:
    X = data['cross']['X_test'+i]
    y = data['cross']['y_test'+i]
    X, y = preprocess_data(X= data['intra']['X_test'],
                            y= data['intra']['y_test'],
                            sample_rate=8,scaler=scaler)
    predictions = cross_svc.predict(X)
    accuracy = accuracy_score(y, predictions)
    precision = precision_score(y, predictions, average='macro', zero_division=1)

    recall = recall_score(y, predictions, average='macro', zero_division=1)
    f1 = f1_score(y, predictions, average='macro', zero_division=1)

    print(f'Subject {i}:\n')
    print(f'Accuracy: {accuracy}')
    print(f'Precision: {precision}')
    print(f'Recall: {recall}')
    print(f'F1: {f1}')

    for label in encoder_dict.keys():
        print(f'Class {label}:')
        missclassified = [predictions[i] for i in range(len(predictions)) if predictions[i] != y[i] and y[i] == encoder_dict[label]]
        print(f'Missclassification rate: {len(missclassified)/np.count_nonzero(y == encoder_dict[label])}') 
        print('\n')

Subject 1:

Accuracy: 0.75
Precision: 0.875
Recall: 0.75
F1: 0.6666666666666666
Class rest:
Missclassification rate: 0.0


Class task_motor:
Missclassification rate: 0.0


Class task_story_math:
Missclassification rate: 1.0


Class task_working_memory:
Missclassification rate: 0.0


Subject 2:

Accuracy: 0.75
Precision: 0.875
Recall: 0.75
F1: 0.6666666666666666
Class rest:
Missclassification rate: 0.0


Class task_motor:
Missclassification rate: 0.0


Class task_story_math:
Missclassification rate: 1.0


Class task_working_memory:
Missclassification rate: 0.0


Subject 3:

Accuracy: 0.75
Precision: 0.875
Recall: 0.75
F1: 0.6666666666666666
Class rest:
Missclassification rate: 0.0


Class task_motor:
Missclassification rate: 0.0


Class task_story_math:
Missclassification rate: 1.0


Class task_working_memory:
Missclassification rate: 0.0


