# Imports

In [1]:
import numpy as np
from scipy.io import loadmat, whosmat, savemat
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import os
import re
import pandas as pd
from tqdm import tqdm
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import pickle
import gc
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, cross_val_score
from sklearn.utils import shuffle
from sklearn.neural_network import MLPClassifier
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Utils

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
base_path = '/content/drive/MyDrive/ML and Brain/SEED_EEG/'

In [4]:
# name of features
features_map = {
  0: 'asm_LDS',
  1: 'asm_movingAve',
  2: 'dasm_LDS',
  3: 'dasm_movingAve',
  4: 'dcau_LDS',
  5: 'dcau_movingAve',
  6: 'de_LDS',
  7: 'de_movingAve',
  8: 'psd_LDS',
  9: 'psd_movingAve',
  10: 'rasm_LDS',
  11: 'rasm_movingAve'
}

In [5]:
# extract
def ends_with_specific_number(input_string, specific_number):
  pattern = re.compile(rf".*{specific_number}$")
  return bool(pattern.match(input_string))

In [6]:
'''
Sorts files based on:
  1. subject (Asc)
  2. experiment date (Asc)
'''

def custom_sort(file_name):
  # Extract the number before the underscore and the date
  match = re.match(r'(\d+)_(\d+)', file_name)
  if match:
    number_part = int(match.group(1))
    date_part = int(match.group(2))
    return (number_part, date_part)
  else:
    return (float('inf'), float('inf'))

# Loading Data

In [7]:
with open(os.path.join(base_path, 'alldata.pkl'), 'rb') as file:
  data = pickle.load(file)

In [8]:
labels = loadmat(os.path.join(base_path, 'SEED_EEG/ExtractedFeatures', 'label.mat'))['label'][0]
labels_edited = np.empty(675)
for i in range(0,45):
  labels_edited[i*15:(i+1)*15] = labels

# All Subjects, 5-Fold (Mixed Data), Each Feature

In [9]:
results_dict =dict([(i,{
 "SVM": [],
    "K-Nearest Neighbors": [],
    "Logistic Regression": []
}) for i in features_map ])

kf = KFold(n_splits=5, shuffle=True, random_state = 42)

for f in range(len(features_map)):
  print(f"\n\nRunning feature {f}:{features_map[f]}")
  for fold, (train_index, test_index) in enumerate(kf.split(data)):
    print(f"Running fold {fold}")

    train_data = data[train_index,f,:,:,:].reshape(len(train_index),-1)
    test_data = data[test_index,f,:,:,:].reshape(len(test_index),-1)

    train_labels = labels_edited[train_index]
    test_labels = labels_edited[test_index]

    classifiers = {
      "SVM": SVC(),
      "K-Nearest Neighbors": KNeighborsClassifier(),
      "Logistic Regression": LogisticRegression()
    }

    for name, clf in classifiers.items():
        print(f"{name} started at: {datetime.now().strftime('%H:%M:%S')}")
        clf.fit(train_data, train_labels)
        print(f"{name} trained at: {datetime.now().strftime('%H:%M:%S')}")
        predictions = clf.predict(test_data)
        print(f"{name} predicted at: {datetime.now().strftime('%H:%M:%S')}")
        accuracy = accuracy_score(test_labels, predictions)
        print(f"{name} Accuracy: {accuracy}")
        if results_dict[f][name]:
           results_dict[f][name].append(accuracy)
        else:
           results_dict[f][name] = [accuracy]

        del clf
        del predictions
        gc.collect()


df = pd.DataFrame(results_dict)
df.to_csv(os.path.join(base_path, 'final-allsubj-5fold-eachfeatures.csv'))



Running feature 0:asm_LDS
Running fold 0
SVM started at: 19:56:06
SVM trained at: 19:57:03
SVM predicted at: 19:57:20
SVM Accuracy: 0.5185185185185185
K-Nearest Neighbors started at: 19:57:20
K-Nearest Neighbors trained at: 19:57:20
K-Nearest Neighbors predicted at: 19:57:21
K-Nearest Neighbors Accuracy: 0.4148148148148148
Logistic Regression started at: 19:57:21
Logistic Regression trained at: 19:58:08
Logistic Regression predicted at: 19:58:08
Logistic Regression Accuracy: 0.7407407407407407
Running fold 1
SVM started at: 19:58:10
SVM trained at: 19:58:55
SVM predicted at: 19:59:11
SVM Accuracy: 0.4962962962962963
K-Nearest Neighbors started at: 19:59:12
K-Nearest Neighbors trained at: 19:59:12
K-Nearest Neighbors predicted at: 19:59:12
K-Nearest Neighbors Accuracy: 0.37037037037037035
Logistic Regression started at: 19:59:12
Logistic Regression trained at: 19:59:55
Logistic Regression predicted at: 19:59:55
Logistic Regression Accuracy: 0.725925925925926
Running fold 2
SVM started

# All Subjects, 3 Subjects Aside, Each Feature

In [10]:
results_dict =dict([(i,{
 "SVM": [],
    "K-Nearest Neighbors": [],
    "Logistic Regression": []
}) for i in features_map ])

all_index = [i for i in range(15)]
kf = KFold(n_splits=5, shuffle=False)

for f in range(len(features_map)):
  print(f"\n\nRunning feature {f}:{features_map[f]}")
  for fold, (train_index, test_index) in enumerate(kf.split(all_index)):
    print(f"Running fold {fold}")
    train_indices = np.array([list(range(i*45, (i+1)*45)) for i in train_index ]).flatten()
    test_indices = np.array([list(range(i*45, (i+1)*45)) for i in test_index ]).flatten()

    train_data = data[train_indices,f,:,:,:].reshape(len(train_indices),-1)
    test_data = data[test_indices,f,:,:,:].reshape(len(test_indices),-1)

    train_labels = labels_edited[train_indices]
    test_labels = labels_edited[test_indices]

    classifiers = {
      "SVM": SVC(),
      "K-Nearest Neighbors": KNeighborsClassifier(),
      "Logistic Regression": LogisticRegression()
    }

    for name, clf in classifiers.items():
        print(f"{name} started at: {datetime.now().strftime('%H:%M:%S')}")
        clf.fit(train_data, train_labels)
        print(f"{name} trained at: {datetime.now().strftime('%H:%M:%S')}")
        predictions = clf.predict(test_data)
        print(f"{name} predicted at: {datetime.now().strftime('%H:%M:%S')}")
        accuracy = accuracy_score(test_labels, predictions)
        print(f"{name} Accuracy: {accuracy}")
        if results_dict[f][name]:
           results_dict[f][name].append(accuracy)
        else:
           results_dict[f][name] = [accuracy]

        del clf
        del predictions
        gc.collect()


df = pd.DataFrame(results_dict)
df.to_csv(os.path.join(base_path, 'final-allsubj-5fold-3subjaside-eachfeatures.csv'))



Running feature 0:asm_LDS
Running fold 0
SVM started at: 21:27:32
SVM trained at: 21:28:17
SVM predicted at: 21:28:29
SVM Accuracy: 0.5407407407407407
K-Nearest Neighbors started at: 21:28:29
K-Nearest Neighbors trained at: 21:28:29
K-Nearest Neighbors predicted at: 21:28:30
K-Nearest Neighbors Accuracy: 0.48148148148148145
Logistic Regression started at: 21:28:30
Logistic Regression trained at: 21:29:09
Logistic Regression predicted at: 21:29:09
Logistic Regression Accuracy: 0.6888888888888889
Running fold 1
SVM started at: 21:29:09
SVM trained at: 21:29:47
SVM predicted at: 21:29:59
SVM Accuracy: 0.6148148148148148
K-Nearest Neighbors started at: 21:29:59
K-Nearest Neighbors trained at: 21:29:59
K-Nearest Neighbors predicted at: 21:30:00
K-Nearest Neighbors Accuracy: 0.5481481481481482
Logistic Regression started at: 21:30:00
Logistic Regression trained at: 21:30:40
Logistic Regression predicted at: 21:30:40
Logistic Regression Accuracy: 0.6814814814814815
Running fold 2
SVM starte

# All Subjects, 3 Videos Aside, Each Feature

In [11]:
results_dict =dict([(i,{
 "SVM": [],
    "K-Nearest Neighbors": [],
    "Logistic Regression": []
}) for i in features_map ])


'''
Handling 5-fold on videos:
- Each time we selected 3 videos from each experiment. As we know, each experiment has 15 videos (trials) so
- it will be like we applied 5 fold on videos. In this method we considered that by the original order
- of the trials, each time we expect sad, happy, and neutral video for test.
'''
for k in range (5):
  print(f"\n\nRunning fold {fold}")
  test_index = []
  all_index = [i for i in range (675)]
  train_index=[]
  for e in range(45):
    test_index.extend(list(range(e*15 + 3*k,e*15 + 3*k + 3)))

  train_index = list(set(all_index)-set(test_index))
  for f in features_map:
    print(f"Running feature {f}:{features_map[f]}")
    train_data = data[train_index,f,:,:,:].reshape(len(train_index),-1)
    test_data = data[test_index,f,:,:,:].reshape(len(test_index),-1)

    train_labels = labels_edited[train_index]
    test_labels = labels_edited[test_index]


    classifiers = {
      "SVM": SVC(),
      "K-Nearest Neighbors": KNeighborsClassifier(),
      "Logistic Regression": LogisticRegression()
    }

    for name, clf in classifiers.items():
      print(f"{name} started at: {datetime.now().strftime('%H:%M:%S')}")
      clf.fit(train_data, train_labels)
      print(f"{name} trained at: {datetime.now().strftime('%H:%M:%S')}")
      predictions = clf.predict(test_data)
      print(f"{name} predicted at: {datetime.now().strftime('%H:%M:%S')}")
      accuracy = accuracy_score(test_labels, predictions)
      print(f"{name} Accuracy: {accuracy}")
      if results_dict[f][name]:
          results_dict[f][name].append(accuracy)
      else:
          results_dict[f][name] = [accuracy]

      del clf
      del predictions
      gc.collect()


df = pd.DataFrame(results_dict)
df.to_csv(os.path.join(base_path, 'final-allsubj-5fold-3vidaside-eachfeatures.csv'))



Running fold 4
Running feature 0:asm_LDS
SVM started at: 22:48:00
SVM trained at: 22:48:40
SVM predicted at: 22:48:55
SVM Accuracy: 0.2814814814814815
K-Nearest Neighbors started at: 22:48:55
K-Nearest Neighbors trained at: 22:48:55
K-Nearest Neighbors predicted at: 22:48:56
K-Nearest Neighbors Accuracy: 0.4074074074074074
Logistic Regression started at: 22:48:56
Logistic Regression trained at: 22:49:34
Logistic Regression predicted at: 22:49:34
Logistic Regression Accuracy: 0.4074074074074074
Running feature 1:asm_movingAve
SVM started at: 22:49:35
SVM trained at: 22:50:15
SVM predicted at: 22:50:29
SVM Accuracy: 0.2074074074074074
K-Nearest Neighbors started at: 22:50:30
K-Nearest Neighbors trained at: 22:50:30
K-Nearest Neighbors predicted at: 22:50:31
K-Nearest Neighbors Accuracy: 0.35555555555555557
Logistic Regression started at: 22:50:31
Logistic Regression trained at: 22:51:11
Logistic Regression predicted at: 22:51:11
Logistic Regression Accuracy: 0.4074074074074074
Running 

# All Subjects, 3 Subjects Aside, Each Feature, Each Frequency Band

In [12]:
results_dict =dict([(i,{
  "SVM": [[] for i in range(5)],
  "K-Nearest Neighbors": [[] for i in range(5)],
  "Logistic Regression": [[] for i in range(5)]
}) for i in features_map ])

all_index = [i for i in range(15)]
kf = KFold(n_splits=5, shuffle=False)

for f in range(len(features_map)):
  print(f"\n\nRunning feature {f}:{features_map[f]}")
  for fold, (train_index, test_index) in enumerate(kf.split(all_index)):
    print(f"Running fold {fold}")

    train_indices = np.array([list(range(i*45, (i+1)*45)) for i in train_index ]).flatten()
    test_indices = np.array([list(range(i*45, (i+1)*45)) for i in test_index ]).flatten()

    train_labels = labels_edited[train_indices]
    test_labels = labels_edited[test_indices]

    for b in range(5):
      print(f"Running band {b}")
      train_data = data[train_indices,f,:,:,b].reshape(len(train_indices),-1)
      test_data = data[test_indices,f,:,:,b].reshape(len(test_indices),-1)

      classifiers = {
        "SVM": SVC(),
        "K-Nearest Neighbors": KNeighborsClassifier(),
        "Logistic Regression": LogisticRegression()
      }

      for name, clf in classifiers.items():
        print(f"{name} started at: {datetime.now().strftime('%H:%M:%S')}")
        clf.fit(train_data, train_labels)
        print(f"{name} trained at: {datetime.now().strftime('%H:%M:%S')}")
        predictions = clf.predict(test_data)
        print(f"{name} predicted at: {datetime.now().strftime('%H:%M:%S')}")
        accuracy = accuracy_score(test_labels, predictions)
        print(f"{name} Accuracy: {accuracy}")
        if results_dict[f][name][b]:
            results_dict[f][name][b].append(accuracy)
        else:
            results_dict[f][name][b] = [accuracy]

        del clf
        del predictions
        gc.collect()


df = pd.DataFrame(results_dict)
df.to_csv(os.path.join(base_path, 'final-allsubj-5fold-3subjaside-eachfeatures-5bands.csv'))



Running feature 0:asm_LDS
Running fold 0
Running band 0
SVM started at: 00:08:01
SVM trained at: 00:08:06
SVM predicted at: 00:08:07
SVM Accuracy: 0.5777777777777777
K-Nearest Neighbors started at: 00:08:08
K-Nearest Neighbors trained at: 00:08:08
K-Nearest Neighbors predicted at: 00:08:08
K-Nearest Neighbors Accuracy: 0.4740740740740741
Logistic Regression started at: 00:08:08
Logistic Regression trained at: 00:08:18
Logistic Regression predicted at: 00:08:18
Logistic Regression Accuracy: 0.6814814814814815
Running band 1
SVM started at: 00:08:18
SVM trained at: 00:08:23
SVM predicted at: 00:08:24
SVM Accuracy: 0.5777777777777777
K-Nearest Neighbors started at: 00:08:24
K-Nearest Neighbors trained at: 00:08:24
K-Nearest Neighbors predicted at: 00:08:25
K-Nearest Neighbors Accuracy: 0.6074074074074074
Logistic Regression started at: 00:08:25
Logistic Regression trained at: 00:08:34
Logistic Regression predicted at: 00:08:34
Logistic Regression Accuracy: 0.6666666666666666
Running ban