# Imports

In [1]:
import numpy as np
from scipy.io import loadmat, whosmat, savemat
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import os
import re
import pandas as pd
from tqdm import tqdm
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import pickle
import gc
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, cross_val_score
from sklearn.utils import shuffle
from sklearn.neural_network import MLPClassifier
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Utils

In [2]:
base_path = ''

In [3]:
# name of features
features_map = {
  0: 'asm_LDS',
  1: 'asm_movingAve',
  2: 'dasm_LDS',
  3: 'dasm_movingAve',
  4: 'dcau_LDS',
  5: 'dcau_movingAve',
  6: 'de_LDS',
  7: 'de_movingAve',
  8: 'psd_LDS',
  9: 'psd_movingAve',
  10: 'rasm_LDS',
  11: 'rasm_movingAve'
}

In [4]:
# extract
def ends_with_specific_number(input_string, specific_number):
  pattern = re.compile(rf".*{specific_number}$")
  return bool(pattern.match(input_string))

In [5]:
'''
Sorts files based on:
  1. subject (Asc)
  2. experiment date (Asc)
'''

def custom_sort(file_name):
  # Extract the number before the underscore and the date
  match = re.match(r'(\d+)_(\d+)', file_name)
  if match:
    number_part = int(match.group(1))
    date_part = int(match.group(2))
    return (number_part, date_part)
  else:
    return (float('inf'), float('inf'))

# Loading Data

In [17]:
with open(os.path.join(base_path, 'alldata2.pkl'), 'rb') as file:
  data = pickle.load(file)

In [18]:
labels = loadmat(os.path.join(base_path, 'label.mat'))['label'][0]
labels_edited = np.empty(675)
for i in range(0,45):
  labels_edited[i*15:(i+1)*15] = labels

# All Subjects, 5-Fold (Mixed Data), All Features

In [11]:
results_dict ={
  "SVM": [],
  "K-Nearest Neighbors":[],
  "Logistic Regression": []
}

all_index = [i for i in range(15)]
kf = KFold(n_splits=5, shuffle=False)

data = data.reshape(len(data),-1)

for fold, (train_index, test_index) in enumerate(kf.split(all_index)):
  print(f"\n\nRunning fold {fold}")
  train_indices = np.array([list(range(i*45, (i+1)*45)) for i in train_index ]).flatten()
  test_indices = np.array([list(range(i*45, (i+1)*45)) for i in test_index ]).flatten()

  train_labels = labels_edited[train_indices]
  test_labels = labels_edited[test_indices]

  classifiers = {
    "SVM": SVC(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Logistic Regression": LogisticRegression()
  }

  for name, clf in classifiers.items():
    print(f"{name} started at: {datetime.now().strftime('%H:%M:%S')}")
    clf.fit(data[train_indices], train_labels)
    print(f"{name} trained at: {datetime.now().strftime('%H:%M:%S')}")
    predictions = clf.predict(data[test_indices])
    print(f"{name} predicted at: {datetime.now().strftime('%H:%M:%S')}")
    accuracy = accuracy_score(test_labels, predictions)
    print(f"{name} Accuracy: {accuracy}")
    results_dict[name].append(accuracy)
    del clf
    del predictions
    gc.collect()


df = pd.DataFrame(results_dict)
df.to_csv(os.path.join(base_path, 'final-allsubj-5fold-allfeatures.csv'))



Running fold 0
SVM started at: 13:47:20
SVM trained at: 13:57:31
SVM predicted at: 14:06:03
SVM Accuracy: 0.32592592592592595
K-Nearest Neighbors started at: 14:06:05
K-Nearest Neighbors trained at: 14:06:54
K-Nearest Neighbors predicted at: 14:07:32
K-Nearest Neighbors Accuracy: 0.4962962962962963
Logistic Regression started at: 14:07:32
Logistic Regression trained at: 14:18:16
Logistic Regression predicted at: 14:18:27
Logistic Regression Accuracy: 0.5481481481481482


Running fold 1
SVM started at: 14:18:28
SVM trained at: 14:27:10
SVM predicted at: 14:36:17
SVM Accuracy: 0.3333333333333333
K-Nearest Neighbors started at: 14:36:18
K-Nearest Neighbors trained at: 14:37:08
K-Nearest Neighbors predicted at: 14:37:33
K-Nearest Neighbors Accuracy: 0.37037037037037035
Logistic Regression started at: 14:37:34
Logistic Regression trained at: 14:40:11
Logistic Regression predicted at: 14:40:32
Logistic Regression Accuracy: 0.3333333333333333


Running fold 2
SVM started at: 14:40:34
SVM tr

# All Subjects, 3 Subjects Aside, All Features

In [12]:
results_dict ={
  "SVM": [],
  "K-Nearest Neighbors":[],
  "Logistic Regression": []
}

all_index = [i for i in range(15)]
kf = KFold(n_splits=5, shuffle=False)

# data = data.reshape(len(data),-1)

for fold, (train_index, test_index) in enumerate(kf.split(all_index)):
  print(f"\n\nRunning fold {fold}")
  train_indices = np.array([list(range(i*45, (i+1)*45)) for i in train_index ]).flatten()
  test_indices = np.array([list(range(i*45, (i+1)*45)) for i in test_index ]).flatten()

  train_labels = labels_edited[train_indices]
  test_labels = labels_edited[test_indices]

  classifiers = {
    "SVM": SVC(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Logistic Regression": LogisticRegression()
  }

  for name, clf in classifiers.items():
    print(f"{name} started at: {datetime.now().strftime('%H:%M:%S')}")
    clf.fit(data[train_indices], train_labels)
    print(f"{name} trained at: {datetime.now().strftime('%H:%M:%S')}")
    predictions = clf.predict(data[test_indices])
    print(f"{name} predicted at: {datetime.now().strftime('%H:%M:%S')}")
    accuracy = accuracy_score(test_labels, predictions)
    print(f"{name} Accuracy: {accuracy}")
    results_dict[name].append(accuracy)
    del clf
    del predictions
    gc.collect()


df = pd.DataFrame(results_dict)
df.to_csv(os.path.join(base_path, 'final-allsubj-5fold-3subjaside-allfeatures.csv'))



Running fold 0
SVM started at: 15:54:57
SVM trained at: 16:01:57
SVM predicted at: 16:09:52
SVM Accuracy: 0.32592592592592595
K-Nearest Neighbors started at: 16:09:53
K-Nearest Neighbors trained at: 16:10:48
K-Nearest Neighbors predicted at: 16:11:15
K-Nearest Neighbors Accuracy: 0.4962962962962963
Logistic Regression started at: 16:11:16
Logistic Regression trained at: 16:20:03
Logistic Regression predicted at: 16:20:12
Logistic Regression Accuracy: 0.5481481481481482


Running fold 1
SVM started at: 16:20:14
SVM trained at: 16:27:44
SVM predicted at: 16:35:32
SVM Accuracy: 0.3333333333333333
K-Nearest Neighbors started at: 16:35:33
K-Nearest Neighbors trained at: 16:36:12
K-Nearest Neighbors predicted at: 16:36:42
K-Nearest Neighbors Accuracy: 0.37037037037037035
Logistic Regression started at: 16:36:43
Logistic Regression trained at: 16:38:30
Logistic Regression predicted at: 16:38:39
Logistic Regression Accuracy: 0.3333333333333333


Running fold 2
SVM started at: 16:38:41
SVM tr

# All Subjects, 3 Videos Aside, All Features

In [20]:
results_dict ={
  "SVM": [],
  "K-Nearest Neighbors":[],
  "Logistic Regression": []
}

data = data.reshape(len(data),-1)

'''
Handling 5-fold on videos:
- Each time we selected 3 videos from each experiment. As we know, each experiment has 15 videos (trials) so
- it will be like we applied 5 fold on videos. In this method we considered that by the original order
- of the trials, each time we expect sad, happy, and neutral video for test.
'''
for k in range (5):
  print(f"\n\nRunning fold {fold}")
  test_index = []
  all_index = [i for i in range (675)]
  train_index=[]
  for e in range(45):
    test_index.extend(list(range(e*15 + 3*k,e*15 + 3*k + 3)))

  train_index = list(set(all_index)-set(test_index))

  train_labels = labels_edited[train_index]
  test_labels = labels_edited[test_index]

  classifiers = {
    "SVM": SVC(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Logistic Regression": LogisticRegression()
  }

  for name, clf in classifiers.items():
    print(f"{name} started at: {datetime.now().strftime('%H:%M:%S')}")
    clf.fit(data[train_index], train_labels)
    print(f"{name} trained at: {datetime.now().strftime('%H:%M:%S')}")
    predictions = clf.predict(data[test_index])
    print(f"{name} predicted at: {datetime.now().strftime('%H:%M:%S')}")
    accuracy = accuracy_score(test_labels, predictions)
    print(f"{name} Accuracy: {accuracy}")
    results_dict[name].append(accuracy)

    del clf
    del predictions
    gc.collect()

df = pd.DataFrame(results_dict)
df.to_csv(os.path.join(base_path, 'final-allsubj-5fold-3vidaside-allfeatures.csv'))



Running fold 4
SVM started at: 23:33:28
SVM trained at: 23:41:10
SVM predicted at: 23:48:57
SVM Accuracy: 0.3333333333333333
K-Nearest Neighbors started at: 23:48:58
K-Nearest Neighbors trained at: 23:49:29
K-Nearest Neighbors predicted at: 23:49:38
K-Nearest Neighbors Accuracy: 0.35555555555555557
Logistic Regression started at: 23:49:39
Logistic Regression trained at: 23:50:47
Logistic Regression predicted at: 23:50:49
Logistic Regression Accuracy: 0.3333333333333333


Running fold 4
SVM started at: 23:50:50
SVM trained at: 23:58:03
SVM predicted at: 00:05:42
SVM Accuracy: 0.3333333333333333
K-Nearest Neighbors started at: 00:05:42
K-Nearest Neighbors trained at: 00:05:57
K-Nearest Neighbors predicted at: 00:06:14
K-Nearest Neighbors Accuracy: 0.35555555555555557
Logistic Regression started at: 00:06:15
Logistic Regression trained at: 00:07:47
Logistic Regression predicted at: 00:07:55
Logistic Regression Accuracy: 0.3333333333333333


Running fold 4
SVM started at: 00:07:57
SVM tr

# All Subjects, 3 Subjects Aside, Selected Features

In [19]:
results_dict ={
  "SVM": [],
  "K-Nearest Neighbors":[],
  "Logistic Regression": []
}

all_index = [i for i in range(15)]
kf = KFold(n_splits=5, shuffle=False)

for fold, (train_index, test_index) in enumerate(kf.split(all_index)):
  print(f"\n\nRunning fold {fold}")
  train_indices = np.array([list(range(i*45, (i+1)*45)) for i in train_index ]).flatten()
  test_indices = np.array([list(range(i*45, (i+1)*45)) for i in test_index ]).flatten()

  train_data = data[train_indices,:,:,:]
  test_data = data[test_indices,:,:,:]
  train_data = train_data[:,[6,7,10,11],:,:].reshape(len(train_indices),-1)
  test_data = test_data[:,[6,7,10,11],:,:].reshape(len(test_indices),-1)
  train_labels = labels_edited[train_indices]
  test_labels = labels_edited[test_indices]

  classifiers = {
    "SVM": SVC(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Logistic Regression": LogisticRegression()
  }

  for name, clf in classifiers.items():
    print(f"{name} started at: {datetime.now().strftime('%H:%M:%S')}")
    clf.fit(train_data, train_labels)
    print(f"{name} trained at: {datetime.now().strftime('%H:%M:%S')}")
    predictions = clf.predict(test_data)
    print(f"{name} predicted at: {datetime.now().strftime('%H:%M:%S')}")
    accuracy = accuracy_score(test_labels, predictions)
    print(f"{name} Accuracy: {accuracy}")
    results_dict[name].append(accuracy)


df = pd.DataFrame(results_dict)
df.to_csv(os.path.join(base_path, 'final-allsubj-5fold-3subjaside-selectedfeatures.csv'))



Running fold 0
SVM started at: 22:35:22
SVM trained at: 22:37:01
SVM predicted at: 22:40:12
SVM Accuracy: 0.7333333333333333
K-Nearest Neighbors started at: 22:40:12
K-Nearest Neighbors trained at: 22:40:12
K-Nearest Neighbors predicted at: 22:40:15
K-Nearest Neighbors Accuracy: 0.8148148148148148
Logistic Regression started at: 22:40:15
Logistic Regression trained at: 22:41:35
Logistic Regression predicted at: 22:41:35
Logistic Regression Accuracy: 0.837037037037037


Running fold 1
SVM started at: 22:42:21
SVM trained at: 22:44:12
SVM predicted at: 22:46:45
SVM Accuracy: 0.7333333333333333
K-Nearest Neighbors started at: 22:46:45
K-Nearest Neighbors trained at: 22:46:45
K-Nearest Neighbors predicted at: 22:46:48
K-Nearest Neighbors Accuracy: 0.8222222222222222
Logistic Regression started at: 22:46:48
Logistic Regression trained at: 22:47:52
Logistic Regression predicted at: 22:47:52
Logistic Regression Accuracy: 0.7851851851851852


Running fold 2
SVM started at: 22:48:24
SVM train