In [1]:
import mne
import numpy as np

import os

import matplotlib.pyplot as plt

from sklearn.utils import resample

In [2]:
directory = './eeg-during-mental-arithmetic-tasks-1.0.0/'

rest_filepaths = []
task_filepaths = []

for filename in os.listdir(directory):
    filepath = os.path.join(directory, filename)
    if filename.endswith('.edf'):
        label = filename.split('_')[-1].split('.')[0]

        if label == '1':
            rest_filepaths.append(filepath)
        else:
            task_filepaths.append(filepath)

In [3]:
len(rest_filepaths), len(task_filepaths)

(36, 36)

In [4]:
average_epochs = 81


def read_data(filepath):
    data = mne.io.read_raw_edf(filepath, preload=True)
    data.set_eeg_reference()
    data.filter(l_freq=0.5, h_freq=45)
    epochs = mne.make_fixed_length_epochs(data, duration=5, overlap=1)
    array = epochs.get_data()

    # if array.shape[0]>121:
    #     array = resample(array, replace=False, n_samples=average_epochs, random_state=42)
    # else:
    #     # Oversample 'task' epochs to the average value
    #     array = resample(array, replace=True, n_samples=average_epochs, random_state=42)
    return array

In [5]:
%%capture
rest_epochs_array = [read_data(filepath) for filepath in rest_filepaths]
task_epochs_array = [read_data(filepath) for filepath in task_filepaths]

In [6]:
rest_epochs_array[0].shape, task_epochs_array[0].shape

((45, 21, 2500), (15, 21, 2500))

In [7]:
# Labels:
rest_label = [len(i)*[0] for i in rest_epochs_array]
task_label = [len(i)*[1] for i in task_epochs_array]

In [8]:
len(task_label), len(task_label[0])

(36, 15)

In [9]:
all_epochs = rest_epochs_array + task_epochs_array
all_labels = rest_label + task_label

# perm = np.random.permutation(72)

# # Shuffle both arrays using the same permutation along the first axis
# shuffled_epochs = []
# shuffled_labels= []

# for index in perm:
#     shuffled_epochs.append(all_epochs[index])
#     shuffled_labels.append(all_labels[index])

# all_epochs = shuffled_epochs
# all_labels = shuffled_labels

In [10]:
len(all_epochs), len(all_epochs[0]), len(
    all_epochs[0][0]), len(all_epochs[0][0][0]),

(72, 45, 21, 2500)

In [11]:
len(all_labels), len(all_labels[0])

(72, 45)

In [12]:
len(all_epochs), len(all_labels)

(72, 72)

In [13]:
group_list = [[i]*len(j) for i, j in enumerate(all_epochs)]

In [14]:
len(group_list), len(all_epochs), len(all_labels)

(72, 72, 72)

In [15]:
data_array = np.vstack(all_epochs)
label_array = np.hstack(all_labels)
group_array = np.hstack(group_list)

In [16]:
print(data_array.shape, label_array.shape, group_array.shape)

(2132, 21, 2500) (2132,) (2132,)


In [17]:
data_array = np.moveaxis(data_array, 1, 2)
data_array.shape

(2132, 2500, 21)

# WAY TO TRAINING


In [18]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Activation, Permute, Dropout
from tensorflow.keras.layers import Conv2D, MaxPooling2D, AveragePooling2D
from tensorflow.keras.layers import SeparableConv2D, DepthwiseConv2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import SpatialDropout2D
from tensorflow.keras.regularizers import l1_l2
from tensorflow.keras.layers import Input, Flatten
from tensorflow.keras.constraints import max_norm
from tensorflow.keras import backend as K


def EEGNet(nb_classes, Chans=64, Samples=128,
           dropoutRate=0.5, kernLength=64, F1=8,
           D=2, F2=16, norm_rate=0.25, dropoutType='Dropout'):

    if dropoutType == 'SpatialDropout2D':
        dropoutType = SpatialDropout2D
    elif dropoutType == 'Dropout':
        dropoutType = Dropout
    else:
        raise ValueError('dropoutType must be one of SpatialDropout2D '
                         'or Dropout, passed as a string.')

    input1 = Input(shape=(Chans, Samples, 1))

    ##################################################################
    block1 = Conv2D(F1, (1, kernLength), padding='same',
                    input_shape=(Chans, Samples, 1),
                    use_bias=False)(input1)
    block1 = BatchNormalization()(block1)
    block1 = DepthwiseConv2D((Chans, 1), use_bias=False,
                             depth_multiplier=D,
                             depthwise_constraint=max_norm(1.))(block1)
    block1 = BatchNormalization()(block1)
    block1 = Activation('elu')(block1)
    block1 = AveragePooling2D((1, 4))(block1)
    block1 = dropoutType(dropoutRate)(block1)

    block2 = SeparableConv2D(F2, (1, 16),
                             use_bias=False, padding='same')(block1)
    block2 = BatchNormalization()(block2)
    block2 = Activation('elu')(block2)
    block2 = AveragePooling2D((1, 8))(block2)
    block2 = dropoutType(dropoutRate)(block2)

    flatten = Flatten(name='flatten')(block2)

    dense = Dense(nb_classes, name='dense',
                  kernel_constraint=max_norm(norm_rate))(flatten)
    softmax = Activation('softmax', name='softmax')(dense)

    return Model(inputs=input1, outputs=softmax)

In [19]:
from sklearn.model_selection import GroupKFold, LeaveOneGroupOut
from sklearn.preprocessing import StandardScaler

gkf = GroupKFold()

In [20]:
# accuracy = []

# for train_index, val_index in gkf.split(data_array, label_array, groups=group_array):
#     train_features, train_labels = data_array[train_index], label_array[train_index]
#     val_features, val_labels = data_array[val_index], label_array[val_index]

#     train_labels_onehot = np.zeros((train_labels.size, 2))
#     train_labels_onehot[np.arange(train_labels.size), train_labels] = 1

#     val_labels_onehot = np.zeros((val_labels.size, 2))
#     val_labels_onehot[np.arange(val_labels.size), val_labels] = 1

#     scaler = StandardScaler()
#     train_features = scaler.fit_transform(train_features.reshape(-1, train_features.shape[-1])).reshape(train_features.shape)
#     val_features = scaler.transform(val_features.reshape(-1, val_features.shape[-1])).reshape(val_features.shape)

#     train_features = np.moveaxis(train_features,1,2)
#     val_features = np.moveaxis(val_features,1,2)

#     train_features = np.reshape(train_features, (*train_features.shape, 1))
#     val_features = np.reshape(val_features, (*val_features.shape, 1))

#     model = EEGNet(2, Chans=21, Samples=1000)

#     model.compile(optimizer='adam',
#                   loss='binary_crossentropy',
#                   metrics=['accuracy'])

#     model.fit(train_features, train_labels_onehot, epochs =10, batch_size = 128)

#     accuracy.append(model.evaluate(val_features, val_labels_onehot)[1])

In [21]:
from sklearn.model_selection import GroupKFold
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten

# Example function for EEGNet model


def EEGNet(nb_classes, Chans=21, Samples=500,
           dropoutRate=0.25, kernLength=16, F1=64,
           D=2, F2=16, norm_rate=0.25, dropoutType='Dropout'):

    if dropoutType == 'SpatialDropout2D':
        dropoutType = SpatialDropout2D
    elif dropoutType == 'Dropout':
        dropoutType = Dropout
    else:
        raise ValueError('dropoutType must be one of SpatialDropout2D '
                         'or Dropout, passed as a string.')

    input1 = Input(shape=(Chans, Samples, 1))

    ##################################################################
    block1 = Conv2D(F1, (1, 64), padding='same', use_bias=False)(input1)
    block1 = BatchNormalization()(block1)
    block1 = DepthwiseConv2D((21, 1), use_bias=False,
                             depth_multiplier=D)(block1)
    block1 = BatchNormalization()(block1)
    block1 = Activation('elu')(block1)
    block1 = AveragePooling2D((1, 4))(block1)
    block1 = dropoutType(dropoutRate)(block1)

    block2 = SeparableConv2D(
        F2, (1, 16), padding='same', use_bias=True)(block1)
    block2 = BatchNormalization()(block2)
    block2 = Activation('elu')(block2)
    block2 = AveragePooling2D((1, 8))(block2)
    block2 = dropoutType(dropoutRate)(block2)

    flatten = Flatten(name='flatten')(block2)
    dense = Dense(nb_classes, name='dense')(flatten)
    softmax = Activation('softmax', name='softmax')(dense)

    return Model(inputs=input1, outputs=softmax)


# Define cross-validation strategy
gkf = GroupKFold(n_splits=5)  # Example of 5-fold cross-validation


accuracy = []
test_accuracy = []

# Loop over each fold
for train_index, test_index in gkf.split(data_array, label_array, groups=group_array):
    # Split data into train and test for this fold
    train_data, test_data = data_array[train_index], data_array[test_index]
    train_labels, test_labels = label_array[train_index], label_array[test_index]

    # Split train data further into train and validation for this fold
    train_features, val_features, train_labels, val_labels = train_test_split(
        train_data, train_labels, test_size=0.2)

    # Preprocess data
    scaler = StandardScaler()
    train_features = scaler.fit_transform(
        train_features.reshape(-1, train_features.shape[-1])).reshape(train_features.shape)
    val_features = scaler.transform(
        val_features.reshape(-1, val_features.shape[-1])).reshape(val_features.shape)
    test_features = scaler.transform(
        test_data.reshape(-1, test_data.shape[-1])).reshape(test_data.shape)

    train_features = np.moveaxis(train_features, 1, 2)
    val_features = np.moveaxis(val_features, 1, 2)
    test_features = np.moveaxis(test_features, 1, 2)

    train_features = np.reshape(train_features, (*train_features.shape, 1))
    val_features = np.reshape(val_features, (*val_features.shape, 1))
    test_features = np.reshape(test_features, (*test_features.shape, 1))

    # Convert labels to one-hot encoding
    num_classes = 2  # Example number of classes
    train_labels_onehot = np.zeros((train_labels.size, num_classes))
    train_labels_onehot[np.arange(train_labels.size), train_labels] = 1

    val_labels_onehot = np.zeros((val_labels.size, num_classes))
    val_labels_onehot[np.arange(val_labels.size), val_labels] = 1

    test_labels_onehot = np.zeros((test_labels.size, num_classes))
    test_labels_onehot[np.arange(test_labels.size), test_labels] = 1

    # Initialize model
    model = EEGNet(num_classes, Chans=21, Samples=2500)

    # Compile model
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    # Train model on current fold's train and validation data
    model.fit(train_features, train_labels_onehot, epochs=10,
              batch_size=64, validation_data=(val_features, val_labels_onehot))

    # Evaluate model on test data for this fold
    test_loss, test_acc = model.evaluate(test_features, test_labels_onehot)
    test_accuracy.append(test_acc)

# After all folds, print average test accuracy
print("Average Test Accuracy:", np.mean(test_accuracy))

Epoch 1/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 2s/step - accuracy: 0.6201 - loss: 0.6784 - val_accuracy: 0.7573 - val_loss: 0.6865
Epoch 2/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 2s/step - accuracy: 0.7443 - loss: 0.5452 - val_accuracy: 0.2924 - val_loss: 0.6982
Epoch 3/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 2s/step - accuracy: 0.7573 - loss: 0.5129 - val_accuracy: 0.2953 - val_loss: 0.6996
Epoch 4/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 2s/step - accuracy: 0.7738 - loss: 0.4754 - val_accuracy: 0.3187 - val_loss: 0.7036
Epoch 5/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 2s/step - accuracy: 0.8047 - loss: 0.4200 - val_accuracy: 0.3216 - val_loss: 0.7085
Epoch 6/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 2s/step - accuracy: 0.8097 - loss: 0.4004 - val_accuracy: 0.4620 - val_loss: 0.6944
Epoch 7/10
[1m22/22[0m [32m━━━━━━━━━━

In [23]:
test_accuracy

[0.8009478449821472,
 0.7241379022598267,
 0.7241379022598267,
 0.75,
 0.7404761910438538]

In [22]:
# from sklearn.model_selection import GroupKFold
# from sklearn.preprocessing import StandardScaler
# from sklearn.model_selection import train_test_split
# import numpy as np
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense, Flatten

# # Example function for EEGNet model


# def EEGNet(nb_classes, Chans=21, Samples=500,
#            dropoutRate=0.25, kernLength=16, F1=64,
#            D=2, F2=16, norm_rate=0.25, dropoutType='Dropout'):

#     if dropoutType == 'SpatialDropout2D':
#         dropoutType = SpatialDropout2D
#     elif dropoutType == 'Dropout':
#         dropoutType = Dropout
#     else:
#         raise ValueError('dropoutType must be one of SpatialDropout2D '
#                          'or Dropout, passed as a string.')

#     input1 = Input(shape=(Chans, Samples, 1))

#     ##################################################################
#     block1 = Conv2D(F1, (1, 64), padding='same', use_bias=False)(input1)
#     block1 = BatchNormalization()(block1)
#     block1 = DepthwiseConv2D((21, 1), use_bias=False,
#                              depth_multiplier=D)(block1)
#     block1 = BatchNormalization()(block1)
#     block1 = Activation('elu')(block1)
#     block1 = AveragePooling2D((1, 4))(block1)
#     block1 = dropoutType(dropoutRate)(block1)

#     block2 = SeparableConv2D(
#         F2, (1, 16), padding='same', use_bias=True)(block1)
#     block2 = BatchNormalization()(block2)
#     block2 = Activation('elu')(block2)
#     block2 = AveragePooling2D((1, 8))(block2)
#     block2 = dropoutType(dropoutRate)(block2)

#     flatten = Flatten(name='flatten')(block2)
#     dense = Dense(nb_classes, name='dense')(flatten)
#     softmax = Activation('softmax', name='softmax')(dense)

#     return Model(inputs=input1, outputs=softmax)

# # Define cross-validation strategy


# gkf = GroupKFold(n_splits=5)  # Example of 5-fold cross-validation

# accuracy = []
# test_accuracy = []

# # Loop over each fold
# for train_index, test_index in gkf.split(data_array, label_array, groups=group_array):
#     # Split data into train and test for this fold
#     train_data, test_data = data_array[train_index], data_array[test_index]
#     train_labels, test_labels = label_array[train_index], label_array[test_index]

#     # Split train data further into train and validation for this fold
#     train_features, val_features, train_labels, val_labels = train_test_split(
#         train_data, train_labels, test_size=0.2)

#     # Preprocess data
#     scaler = StandardScaler()
#     train_features = scaler.fit_transform(
#         train_features.reshape(-1, train_features.shape[-1])).reshape(train_features.shape)
#     val_features = scaler.transform(
#         val_features.reshape(-1, val_features.shape[-1])).reshape(val_features.shape)
#     test_features = scaler.transform(
#         test_data.reshape(-1, test_data.shape[-1])).reshape(test_data.shape)

#     train_features = np.moveaxis(train_features, 1, 2)
#     val_features = np.moveaxis(val_features, 1, 2)
#     test_features = np.moveaxis(test_features, 1, 2)

#     train_features = np.reshape(train_features, (*train_features.shape, 1))
#     val_features = np.reshape(val_features, (*val_features.shape, 1))
#     test_features = np.reshape(test_features, (*test_features.shape, 1))

#     # Convert labels to one-hot encoding
#     num_classes = 2  # Example number of classes
#     train_labels_onehot = np.zeros((train_labels.size, num_classes))
#     train_labels_onehot[np.arange(train_labels.size), train_labels] = 1

#     val_labels_onehot = np.zeros((val_labels.size, num_classes))
#     val_labels_onehot[np.arange(val_labels.size), val_labels] = 1

#     test_labels_onehot = np.zeros((test_labels.size, num_classes))
#     test_labels_onehot[np.arange(test_labels.size), test_labels] = 1

#     # Initialize model
#     model = EEGNet(num_classes, Chans=21, Samples=2500)

#     # Compile model
#     model.compile(optimizer='adam',
#                   loss='binary_crossentropy',
#                   metrics=['accuracy'])

#     # Train model on current fold's train and validation data
#     model.fit(train_features, train_labels_onehot, epochs=10,
#               batch_size=64, validation_data=(val_features, val_labels_onehot))

#     # Evaluate model on test data for this fold
#     test_loss, test_acc = model.evaluate(test_features, test_labels_onehot)
#     test_accuracy.append(test_acc)

#     # After all folds, print average test accuracy
#     print("Average Test Accuracy:", np.mean(test_accuracy))