In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, train_test_split, GridSearchCV, PredefinedSplit
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn import metrics
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
import keras
from keras.models import load_model
from keras import backend as K
from keras import models
from keras.layers import Dense
import tensorflow as tf
from tensorflow.keras.utils import to_categorical

from keras_tuner.tuners import RandomSearch
import keras_tuner

In [6]:
from dataset import load_dataset, load_ica_dataset, load_labels, convert_to_epochs
from features import time_series_features, nonlinear_features, entropy_features, hjorth_features, freq_band_features

In [7]:
num_classes = 2
channels = 32
sfreq = 128

In [8]:
#Filtered from SAM40
dataset_ = load_dataset(raw = False)
dataset = convert_to_epochs(dataset_, channels, sfreq)

#ICA filtered
dataset_ica_ = load_ica_dataset(round=1)
dataset_ica = convert_to_epochs(dataset_ica_, channels, sfreq)

#ICA filtered two times
dataset_ica_2_ = load_ica_dataset(round=2)
dataset_ica_2 = convert_to_epochs(dataset_ica_2_, channels, sfreq)

In [9]:
features = time_series_features(dataset, channels)
features_ica = time_series_features(dataset_ica, channels)
features_ica_2 = time_series_features(dataset_ica_2, channels)
# freq_bands = np.array([1, 4, 8, 13, 31, 50])
# features = freq_band_features(dataset, channels, sfreq, freq_bands)
# features = nonlinear_features(dataset, channels)
# features = hjorth_features(dataset, channels, sfreq)
# features = entropy_features(dataset, channels, sfreq)
data = features
data_ica = features_ica
data_ica_2 = features_ica_2

(120, 25, 32, 128)
(120, 25, 32, 128)
(120, 25, 32, 128)


In [10]:
labels = load_labels()
label = pd.concat([labels['t1_math'], labels['t2_math'],
                  labels['t3_math']]).to_numpy()
label = label.repeat(dataset.shape[1])

# Linear Regression

In [12]:
def LinearRegression(data, label):
    K.clear_session()
    x_train, x_test, y_train, y_test = train_test_split(
    data, label, test_size=0.33, random_state=42)
    scaler = StandardScaler()
    scaler.fit(x_train)
    x_train = scaler.transform(x_train)
    x_test = scaler.transform(x_test)
    lr_clf = LogisticRegression(max_iter=1000).fit(x_train, y_train)
    y_pred = lr_clf.predict(x_test)
    y_true = y_test

    scores_lr = lr_clf.score(x_test, y_test)
    precision_lr = metrics.precision_score(y_true, y_pred, average='macro')
    recall_lr = metrics.recall_score(y_true, y_pred, average='micro')
    f1_score_lr = metrics.f1_score(y_true, y_pred, average='weighted')
    print(metrics.classification_report(y_true, y_pred))
    print(metrics.confusion_matrix(y_true, y_pred))

# KNN Classifier

In [13]:
def KNN(data, label):
    K.clear_session()
    x, x_test, y, y_test = train_test_split(data, label, test_size=0.2, random_state=1)
    x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.25, random_state=1)
    scaler = StandardScaler()
    scaler.fit(x_train)
    x = scaler.transform(x)
    x_train = scaler.transform(x_train)
    x_val = scaler.transform(x_val)
    x_test = scaler.transform(x_test)

    param_grid = {
        'leaf_size': range(50),
        'n_neighbors': range(1, 10),
        'p': [1, 2]
    }
    split_index = [-1 if x in range(len(x_train)) else 0 for x in range(len(x))]
    ps = PredefinedSplit(test_fold=split_index)
    knn_clf = GridSearchCV(KNeighborsClassifier(), param_grid, cv=ps, refit=True)
    knn_clf.fit(x, y)

    y_pred = knn_clf.predict(x_test)
    y_true = y_test

    print(metrics.classification_report(y_true, y_pred))
    print(metrics.confusion_matrix(y_true, y_pred))

# SVM Classifier

In [14]:
def SVM(data, label):
    x, x_test, y, y_test = train_test_split(data, label, test_size=0.2, random_state=1)
    x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.25, random_state=1)

    param_grid = {
        'C': [0.1, 1, 10, 100, 1000],
        'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
        'kernel': ['rbf']
    }
    split_index = [-1 if x in range(len(x_train)) else 0 for x in range(len(x))]
    ps = PredefinedSplit(test_fold=split_index)
    clf = GridSearchCV(SVC(), param_grid, cv=ps, refit=True)
    clf.fit(x, y)

    y_pred = clf.predict(x_test)
    y_true = y_test
    print(metrics.classification_report(y_true, y_pred))
    print(metrics.confusion_matrix(y_true, y_pred))

In [15]:
print('Filtered data')
LinearRegression(data,label)
print('\nICA filtered data')
LinearRegression(data_ica,label)
print('\nICA filtered data round 2')
LinearRegression(data_ica_2,label)

Filtered data
              precision    recall  f1-score   support

       False       0.71      0.69      0.70       527
        True       0.66      0.67      0.66       463

    accuracy                           0.68       990
   macro avg       0.68      0.68      0.68       990
weighted avg       0.68      0.68      0.68       990

[[364 163]
 [152 311]]

ICA filtered data
              precision    recall  f1-score   support

       False       0.62      0.60      0.61       527
        True       0.56      0.58      0.57       463

    accuracy                           0.59       990
   macro avg       0.59      0.59      0.59       990
weighted avg       0.59      0.59      0.59       990

[[314 213]
 [193 270]]

ICA filtered data round 2
              precision    recall  f1-score   support

       False       0.66      0.67      0.66       527
        True       0.62      0.61      0.61       463

    accuracy                           0.64       990
   macro avg       0.6

In [16]:
print('Filtered data')
KNN(data,label)
print('\nICA filtered data')
KNN(data_ica,label)
print('\nICA filtered data round 2')
KNN(data_ica_2,label)

Filtered data
              precision    recall  f1-score   support

       False       0.79      0.77      0.78       311
        True       0.76      0.78      0.77       289

    accuracy                           0.78       600
   macro avg       0.77      0.78      0.77       600
weighted avg       0.78      0.78      0.78       600

[[241  70]
 [ 65 224]]

ICA filtered data
              precision    recall  f1-score   support

       False       0.63      0.65      0.64       311
        True       0.61      0.59      0.60       289

    accuracy                           0.62       600
   macro avg       0.62      0.62      0.62       600
weighted avg       0.62      0.62      0.62       600

[[202 109]
 [118 171]]

ICA filtered data round 2
              precision    recall  f1-score   support

       False       0.69      0.73      0.71       311
        True       0.69      0.64      0.66       289

    accuracy                           0.69       600
   macro avg       0.6

In [17]:
print('Filtered data')
SVM(data,label)
print('\nICA filtered data')
SVM(data_ica,label)
print('\nICA filtered data round 2')
SVM(data_ica_2,label)

Filtered data
              precision    recall  f1-score   support

       False       0.97      0.95      0.96       311
        True       0.95      0.97      0.96       289

    accuracy                           0.96       600
   macro avg       0.96      0.96      0.96       600
weighted avg       0.96      0.96      0.96       600

[[296  15]
 [ 10 279]]

ICA filtered data
              precision    recall  f1-score   support

       False       0.70      0.84      0.76       311
        True       0.78      0.61      0.68       289

    accuracy                           0.73       600
   macro avg       0.74      0.73      0.72       600
weighted avg       0.74      0.73      0.73       600

[[262  49]
 [113 176]]

ICA filtered data round 2
              precision    recall  f1-score   support

       False       0.88      0.92      0.90       311
        True       0.91      0.87      0.89       289

    accuracy                           0.89       600
   macro avg       0.8

# Neural Network

In [30]:
K.clear_session()
y_v = label
y_v = to_categorical(y_v, num_classes)
x_train, x_test, y_train, y_test = train_test_split(data, y_v, test_size=0.2, random_state=1)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.25, random_state=1)

def model_builder(hp):
    model = keras.models.Sequential()
    model.add(keras.Input(shape=(x_train.shape[1],)))

    for i in range(hp.Int('layers', 2, 6)):
      model.add(keras.layers.Dense(units=hp.Int('units_' + str(i), 32, 1024, step=32),
                                    activation=hp.Choice('act_' + str(i), ['relu', 'sigmoid'])))

    model.add(keras.layers.Dense(num_classes, activation='softmax', name='out'))
    
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    model.compile(optimizer=keras.optimizers.adam_v2.Adam(learning_rate=hp_learning_rate),
                loss = "binary_crossentropy",
                metrics=['accuracy'])
    return model


tuner = RandomSearch(
    model_builder,
    objective = 'val_accuracy',
    max_trials = 5,
    executions_per_trial = 2,
    overwrite=True
)

tuner.search_space_summary()

tuner.search(x_train, y_train, epochs = 50, validation_data= [x_val, y_val])

model = tuner.get_best_models(num_models=1)[0]

y_pred = model.predict(x_test)
y_true = y_test
y_pred = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_true, axis=1)

scores_dnn = model.evaluate(x_test, y_test, verbose=0)
precision_dnn = metrics.precision_score(y_true, y_pred, average='macro')
recall_dnn = metrics.recall_score(y_true, y_pred, average='micro')
f1_score_dnn = metrics.f1_score(y_true, y_pred, average='weighted')
print('accuracy is:', scores_dnn[1])
print('precision is:', precision_dnn)
print('recall is:', recall_dnn)
print('f1_score is:', f1_score_dnn)
metrics.confusion_matrix(y_true, y_pred)

Trial 5 Complete [00h 02m 41s]
val_accuracy: 0.565833330154419

Best val_accuracy So Far: 0.8816666603088379
Total elapsed time: 00h 09m 16s
INFO:tensorflow:Oracle triggered exit
accuracy is: 0.824999988079071
precision is: 0.8261298421807748
recall is: 0.825
f1_score is: 0.8246763408735239


array([[267,  44],
       [ 61, 228]])

In [31]:
K.clear_session()
y_v = label
y_v = to_categorical(y_v, num_classes)
x_train, x_test, y_train, y_test = train_test_split(data_ica, y_v, test_size=0.2, random_state=1)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.25, random_state=1)

def model_builder(hp):
    model = keras.models.Sequential()
    model.add(keras.Input(shape=(x_train.shape[1],)))

    for i in range(hp.Int('layers', 2, 6)):
      model.add(keras.layers.Dense(units=hp.Int('units_' + str(i), 32, 1024, step=32),
                                    activation=hp.Choice('act_' + str(i), ['relu', 'sigmoid'])))

    model.add(keras.layers.Dense(num_classes, activation='softmax', name='out'))
    
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    model.compile(optimizer=keras.optimizers.adam_v2.Adam(learning_rate=hp_learning_rate),
                loss = "binary_crossentropy",
                metrics=['accuracy'])
    return model


tuner = RandomSearch(
    model_builder,
    objective = 'val_accuracy',
    max_trials = 5,
    executions_per_trial = 2,
    overwrite=True
)

tuner.search_space_summary()

tuner.search(x_train, y_train, epochs = 50, validation_data= [x_val, y_val])

model = tuner.get_best_models(num_models=1)[0]

y_pred = model.predict(x_test)
y_true = y_test
y_pred = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_true, axis=1)

scores_dnn = model.evaluate(x_test, y_test, verbose=0)
precision_dnn = metrics.precision_score(y_true, y_pred, average='macro')
recall_dnn = metrics.recall_score(y_true, y_pred, average='micro')
f1_score_dnn = metrics.f1_score(y_true, y_pred, average='weighted')
print('accuracy is:', scores_dnn[1])
print('precision is:', precision_dnn)
print('recall is:', recall_dnn)
print('f1_score is:', f1_score_dnn)
metrics.confusion_matrix(y_true, y_pred)
    

Trial 5 Complete [00h 02m 57s]
val_accuracy: 0.7450000047683716

Best val_accuracy So Far: 0.8266666531562805
Total elapsed time: 00h 10m 44s
INFO:tensorflow:Oracle triggered exit
accuracy is: 0.846666693687439
precision is: 0.846427499416349
recall is: 0.8466666666666667
f1_score is: 0.8466973609581956


array([[263,  48],
       [ 44, 245]])

In [8]:
K.clear_session()
y_v = label
y_v = to_categorical(y_v, num_classes)
x_train, x_test, y_train, y_test = train_test_split(data_ica_2, y_v, test_size=0.2, random_state=1)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.25, random_state=1)

def model_builder(hp):
    model = keras.models.Sequential()
    model.add(keras.Input(shape=(x_train.shape[1],)))

    for i in range(hp.Int('layers', 2, 6)):
      model.add(keras.layers.Dense(units=hp.Int('units_' + str(i), 32, 1024, step=32),
                                    activation=hp.Choice('act_' + str(i), ['relu', 'sigmoid'])))

    model.add(keras.layers.Dense(num_classes, activation='softmax', name='out'))
    
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    model.compile(optimizer=keras.optimizers.adam_v2.Adam(learning_rate=hp_learning_rate),
                loss = "binary_crossentropy",
                metrics=['accuracy'])
    return model


tuner = RandomSearch(
    model_builder,
    objective = 'val_accuracy',
    max_trials = 5,
    executions_per_trial = 2,
    overwrite=True
)

tuner.search_space_summary()

tuner.search(x_train, y_train, epochs = 50, validation_data= [x_val, y_val])

model = tuner.get_best_models(num_models=1)[0]

y_pred = model.predict(x_test)
y_true = y_test
y_pred = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_true, axis=1)

scores_dnn = model.evaluate(x_test, y_test, verbose=0)
precision_dnn = metrics.precision_score(y_true, y_pred, average='macro')
recall_dnn = metrics.recall_score(y_true, y_pred, average='micro')
f1_score_dnn = metrics.f1_score(y_true, y_pred, average='weighted')
print('accuracy is:', scores_dnn[1])
print('precision is:', precision_dnn)
print('recall is:', recall_dnn)
print('f1_score is:', f1_score_dnn)
metrics.confusion_matrix(y_true, y_pred)

Trial 5 Complete [00h 01m 56s]
val_accuracy: 0.8974999785423279

Best val_accuracy So Far: 0.8974999785423279
Total elapsed time: 00h 09m 32s
INFO:tensorflow:Oracle triggered exit
accuracy is: 0.8949999809265137
precision is: 0.8957839939228741
recall is: 0.895
f1_score is: 0.894893803756363


array([[285,  26],
       [ 37, 252]])