In [34]:
import os
import numpy as np
import pandas as pd
import scipy
import variables as v

def load_dataset(data_type="ica_filtered", test_type="Arithmetic"):
    
    assert (test_type in v.TEST_TYPES)

    assert (data_type in v.DATA_TYPES)

    if data_type == "ica_filtered" and test_type != "Arithmetic":
        print("Data of type", data_type, "does not have test type", test_type)
        return 0

    if data_type == "raw":
        dir = v.DIR_RAW
        data_key = 'Data'
    elif data_type == "wt_filtered":
        dir = v.DIR_FILTERED
        data_key = 'Clean_data'
    else:
        dir = v.DIR_ICA_FILTERED
        data_key = 'Clean_data'
        
    dataset = np.empty((120, 32, 3200))

    counter = 0
    for filename in os.listdir(dir):
        if test_type not in filename:
            continue

        f = os.path.join(dir, filename)
        data = scipy.io.loadmat(f)[data_key]
        dataset[counter] = data
        counter += 1
    return dataset


In [18]:

def load_labels():
    
    labels = pd.read_excel(v.LABELS_PATH)
    labels = labels.rename(columns=v.COLUMNS_TO_RENAME)
    labels = labels[1:]
    labels = labels.astype("int")
    labels = labels > 5
    return labels


In [22]:
def format_labels(labels, test_type="Arithmetic", epochs=1):
    
    assert (test_type in v.TEST_TYPES)

    formatted_labels = []
    for trial in v.TEST_TYPE_COLUMNS[test_type]:
        formatted_labels.append(labels[trial])

    formatted_labels = pd.concat(formatted_labels).to_numpy()

    formatted_labels = formatted_labels.repeat(epochs)

    return formatted_labels


In [44]:
print(format_labels())

TypeError: format_labels() missing 1 required positional argument: 'labels'

In [23]:
def split_data(data, sfreq):

    n_trials, n_channels, n_samples = data.shape

    epoched_data = np.empty((n_trials, n_samples//sfreq, n_channels, sfreq))
    for i in range(data.shape[0]):
        for j in range(data.shape[2]//sfreq):
            epoched_data[i, j] = data[i, :, j*sfreq:(j+1)*sfreq]
    return epoched_data

In [24]:
import mne_features.univariate as mne_f
import numpy as np


def time_series_features(data):

    n_trials, n_secs, n_channels, _ = data.shape
    features_per_channel = 3

    features = np.empty([n_trials, n_secs, n_channels * features_per_channel])
    for i, trial in enumerate(data):
        for j, second in enumerate(trial):
            variance = mne_f.compute_variance(second)
            rms = mne_f.compute_rms(second)
            ptp_amp = mne_f.compute_ptp_amp(second)
            features[i][j] = np.concatenate([variance, rms, ptp_amp])
    features = features.reshape(
        [n_trials*n_secs, n_channels*features_per_channel])
    return features

# features_list = time_series_features(data)

# # Print the features list
# print(features_list)

In [25]:
def freq_band_features(data, freq_bands):
    '''
    Computes the frequency bands delta, theta, alpha, beta and gamma using the package mne_features.

    Args:
        data (ndarray): EEG data.
        freq_bands (ndarray): The frequency bands to compute.

    Returns:
        ndarray: Computed features.
    '''
    n_trials, n_secs, n_channels, sfreq = data.shape
    features_per_channel = len(freq_bands)-1

    features = np.empty([n_trials, n_secs, n_channels * features_per_channel])
    for i, trial in enumerate(data):
        for j, second in enumerate(trial):
            psd = mne_f.compute_pow_freq_bands(
                sfreq, second, freq_bands=freq_bands)
            features[i][j] = psd
    features = features.reshape(
        [n_trials*n_secs, n_channels*features_per_channel])
    return features

In [26]:
def hjorth_features(data):
    '''
    Computes the features Hjorth mobility (spectral) and Hjorth complexity (spectral) using the package mne_features.

    Args:
        data (ndarray): EEG data.

    Returns:
        ndarray: Computed features.
    '''
    n_trials, n_secs, n_channels, sfreq = data.shape
    features_per_channel = 2

    features = np.empty([n_trials, n_secs, n_channels * features_per_channel])
    for i, trial in enumerate(data):
        for j, second in enumerate(trial):
            mobility_spect = mne_f.compute_hjorth_mobility_spect(sfreq, second)
            complexity_spect = mne_f.compute_hjorth_complexity_spect(
                sfreq, second)
            features[i][j] = np.concatenate([mobility_spect, complexity_spect])
    features = features.reshape(
        [n_trials*n_secs, n_channels*features_per_channel])
    return features

In [27]:
def fractal_features(data):
    '''
    Computes the Higuchi Fractal Dimension and Katz Fractal Dimension using the package mne_features.

    Args:
        data (ndarray): EEG data.

    Returns:
        ndarray: Computed features.

    '''
    n_trials, n_secs, n_channels, _ = data.shape
    features_per_channel = 2

    features = np.empty([n_trials, n_secs, n_channels * features_per_channel])
    for i, trial in enumerate(data):
        for j, second in enumerate(trial):
            higuchi = mne_f.compute_higuchi_fd(second)
            katz = mne_f.compute_katz_fd(second)
            features[i][j] = np.concatenate([higuchi, katz])
    features = features.reshape(
        [n_trials*n_secs, n_channels*features_per_channel])
    return features

In [28]:
def entropy_features(data):
    '''
    Computes the features Approximate Entropy, Sample Entropy, Spectral Entropy and SVD entropy using the package mne_features.

    Args:
        data (ndarray): EEG data.

    Returns:
        ndarray: Computed features.

    '''
    n_trials, n_secs, n_channels, sfreq = data.shape
    features_per_channel = 4

    features = np.empty([n_trials, n_secs, n_channels * features_per_channel])
    for i, trial in enumerate(data):
        for j, second in enumerate(trial):
            app_entropy = mne_f.compute_app_entropy(second)
            samp_entropy = mne_f.compute_samp_entropy(second)
            spect_entropy = mne_f.compute_spect_entropy(sfreq, second)
            svd_entropy = mne_f.compute_svd_entropy(second)
            features[i][j] = np.concatenate(
                [app_entropy, samp_entropy, spect_entropy, svd_entropy])
    features = features.reshape(
        [n_trials*n_secs, n_channels*features_per_channel])
    return features

In [12]:
import numpy as np

from sklearn.model_selection import train_test_split, GridSearchCV, PredefinedSplit
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn import metrics
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

from keras import models, Input
from keras import optimizers as opt
from keras import backend as K
from keras.layers import Dense
from keras_tuner.tuners import RandomSearch
from tensorflow.keras.utils import to_categorical




In [11]:
pip install keras

Note: you may need to restart the kernel to use updated packages.


In [17]:
pip install tensorflow

Collecting tensorflow
  Obtaining dependency information for tensorflow from https://files.pythonhosted.org/packages/93/21/9b035a4f823d6aee2917c75415be9a95861ff3d73a0a65e48edbf210cec1/tensorflow-2.15.0-cp311-cp311-win_amd64.whl.metadata
  Downloading tensorflow-2.15.0-cp311-cp311-win_amd64.whl.metadata (3.6 kB)
Collecting tensorflow-intel==2.15.0 (from tensorflow)
  Obtaining dependency information for tensorflow-intel==2.15.0 from https://files.pythonhosted.org/packages/4c/48/1a5a15517f18eaa4ff8d598b1c000300b20c1bb0e624539d702117a0c369/tensorflow_intel-2.15.0-cp311-cp311-win_amd64.whl.metadata
  Downloading tensorflow_intel-2.15.0-cp311-cp311-win_amd64.whl.metadata (5.1 kB)
Collecting astunparse>=1.6.0 (from tensorflow-intel==2.15.0->tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)
Collecting flatbuffers>=23.5.26 (from tensorflow-intel==2.15.0->tensorflow)
  Obtaining dependency information for flatbuffers>=23.5.26 from https://files.pythonhosted.org/packages/6f

In [30]:
# from dataset import load_dataset, load_labels, split_data, format_labels
# from features import time_series_features, fractal_features, entropy_features, hjorth_features, freq_band_features
import variables as v

In [29]:
data_type = "ica_filtered"
test_type = "Arithmetic"

In [35]:
dataset_ = load_dataset(data_type=data_type, test_type=test_type)
dataset = split_data(dataset_, v.SFREQ)

In [45]:
label_ = load_labels()
label = format_labels(label_, test_type=test_type, epochs=dataset.shape[1])
print(label)

[ True  True  True ... False False False]


In [18]:
pip install xlrd

Note: you may need to restart the kernel to use updated packages.


In [13]:
!pip install keras-tuner --upgrade

Collecting keras-tuner
  Obtaining dependency information for keras-tuner from https://files.pythonhosted.org/packages/2b/39/21f819fcda657c37519cf817ca1cd03a8a025262aad360876d2a971d38b3/keras_tuner-1.4.6-py3-none-any.whl.metadata
  Downloading keras_tuner-1.4.6-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Downloading keras_tuner-1.4.6-py3-none-any.whl (128 kB)
   ---------------------------------------- 0.0/128.9 kB ? eta -:--:--
   ------------------- -------------------- 61.4/128.9 kB 1.7 MB/s eta 0:00:01
   ---------------------------------------- 128.9/128.9 kB 2.5 MB/s eta 0:00:00
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.6 kt-legacy-1.0.5


In [137]:
# features = time_series_features(dataset)
# freq_bands = np.array([1, 4, 8, 12, 30, 50])
# features = freq_band_features(dataset, freq_bands)
# features = hjorth_features(dataset)
# features = entropy_features(dataset)
features = fractal_features(dataset)


In [138]:
data = features
print(data)


[[1.46479541 1.47439684 1.5950604  ... 2.43207342 2.34598928 2.23888161]
 [1.37245693 1.5695702  1.49116233 ... 2.11801299 2.13759479 2.05629195]
 [1.30527958 1.33673023 1.39111425 ... 2.27585907 2.37447209 2.22149924]
 ...
 [1.53353794 1.53879684 1.55961493 ... 2.15046335 1.7276305  1.85910251]
 [1.57608601 1.56631244 1.49570705 ... 1.76653921 1.6933852  1.94194381]
 [1.54391507 1.53507259 1.50526977 ... 1.83881158 2.11339111 1.94677029]]


In [66]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics


In [139]:
x, x_test, y, y_test = train_test_split(
    data, label, test_size=0.2, random_state=1)

x_train, x_val, y_train, y_val = train_test_split(
    x, y, test_size=0.25, random_state=1)


In [140]:
scaler = MinMaxScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_val = scaler.transform(x_val)
x_test = scaler.transform(x_test)


In [141]:
# Initialize Decision Tree Classifier
dt_clf = DecisionTreeClassifier(random_state=1)

# Train the classifier on the training data
dt_clf.fit(x_train, y_train)


In [142]:
# Predict on the validation set
y_val_pred = dt_clf.predict(x_val)

# Predict on the test set
y_test_pred = dt_clf.predict(x_test)


In [143]:
# Predict on the validation set
y_val_pred = dt_clf.predict(x_val)

# Predict on the test set
y_test_pred = dt_clf.predict(x_test)

# Calculate accuracy on the test set
test_accuracy = metrics.accuracy_score(y_test, y_test_pred)
print(f"Test Set Accuracy: {test_accuracy * 100:.2f}%")


Test Set Accuracy: 55.50%


In [144]:
x, x_test, y, y_test = train_test_split(
    data, label, test_size=0.2, random_state=1)
x_train, x_val, y_train, y_val = train_test_split(
    x, y, test_size=0.25, random_state=1)
scaler = MinMaxScaler()
scaler.fit(x_train)
x = scaler.transform(x)
x_train = scaler.transform(x_train)
x_val = scaler.transform(x_val)
x_test = scaler.transform(x_test)

param_grid = {
    'leaf_size': range(50),
    'n_neighbors': range(1, 10),
    'p': [1, 2]
}
split_index = [-1 if x in range(len(x_train)) else 0 for x in range(len(x))]
ps = PredefinedSplit(test_fold=split_index)
knn_clf = GridSearchCV(KNeighborsClassifier(), param_grid, cv=ps, refit=True)
knn_clf.fit(x, y)
y_pred = knn_clf.predict(x_test)
y_true = y_test
print(metrics.classification_report(y_true, y_pred))
print(metrics.confusion_matrix(y_true, y_pred))

              precision    recall  f1-score   support

       False       0.54      0.66      0.60       311
        True       0.53      0.40      0.46       289

    accuracy                           0.54       600
   macro avg       0.54      0.53      0.53       600
weighted avg       0.54      0.54      0.53       600

[[206 105]
 [172 117]]


18 fits failed out of a total of 900.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
18 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Anaconda\Lib\site-packages\sklearn\model_selection\_validation.py", line 732, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Anaconda\Lib\site-packages\sklearn\base.py", line 1144, in wrapper
    estimator._validate_params()
  File "C:\Anaconda\Lib\site-packages\sklearn\base.py", line 637, in _validate_params
    validate_parameter_constraints(
  File "C:\Anaconda\Lib\site-packages\sklearn\utils\_param_validation.py", line 95, in validate_parameter_constraints
    raise InvalidParameterError(
sklearn.utils._param_validation.InvalidParam

In [145]:
test_accuracy = metrics.accuracy_score(y_test, y_test_pred)
print(f"Test Set Accuracy: {test_accuracy * 100:.2f}%")

Test Set Accuracy: 55.50%


In [146]:
accuracy = metrics.accuracy_score(y_true, y_pred)
print(f"\nAccuracy: {accuracy * 100:.2f}%")



Accuracy: 53.83%


In [147]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics
from sklearn.metrics import accuracy_score

# Assuming you have 'data' and 'label' from your earlier example
x, x_test, y, y_test = train_test_split(data, label, test_size=0.2, random_state=1)
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.25, random_state=1)

# Assuming 'MinMaxScaler' is imported
scaler = MinMaxScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_val = scaler.transform(x_val)
x_test = scaler.transform(x_test)

# Define the SVM classifier with chosen hyperparameters
svm_clf = SVC(kernel='linear', C=1.0, gamma='scale')

# Train the SVM classifier on the training set
svm_clf.fit(x_train, y_train)

# Make predictions on the test set
y_pred_test = svm_clf.predict(x_test)
y_true_test = y_test


# Calculate and print the accuracy on the test set
accuracy_test = accuracy_score(y_true_test, y_pred_test)
print("SVM Accuracy on Test Set: {:.2%}".format(accuracy_test))

SVM Accuracy on Test Set: 57.33%
