In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.figure_factory as ff
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.manifold import TSNE
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import accuracy_score, balanced_accuracy_score, f1_score, mean_squared_error, r2_score, classification_report
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
import random
import os
import warnings
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
import torch.optim as optim
import torchmetrics
from torchmetrics import F1Score
from positional_encodings.torch_encodings import PositionalEncoding1D, PositionalEncoding2D, PositionalEncoding3D, Summer
from tqdm import tqdm
import neurokit2 as nk
import pyEDA.main as pyEDA


warnings.filterwarnings('ignore')


In [None]:
def seed_everything(seed: int):
    
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed = 111 # (42, 43, 111)    
seed_everything(seed)

In [None]:
def csv_read(path):
    df = pd.read_csv(path)
    return df

In [None]:
def feature_analysis(df,relevant_features,identifiers):
    df_features = df[relevant_features]
    

    corr_matrix = df_features.corr()
    plt.figure(figsize=(100, 100))
    sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f")
    plt.title('Correlation Heatmap')
    plt.show()

    df_features.hist(bins=30, figsize=(20, 15))
    plt.tight_layout()
    plt.show()

    arousals = df['arousal_category'].tolist()
    num_high_arousal = arousals.count(1)
    num_low_arousal = arousals.count(0)

    print(f"The Number of entries of High Arousal: {num_high_arousal} & Low Arousal: {num_low_arousal}")

    valence = df['valence_category'].tolist()
    num_high_valence = valence.count(1)
    num_low_valence = valence.count(0)

    print(f"The Number of entries of High Valence: {num_high_valence} & Low Valence: {num_low_valence}")

    taskwiselabel_list = df['taskwiselabel'].tolist()
    num_zeros = taskwiselabel_list.count(0)
    num_ones = taskwiselabel_list.count(1)

    print(f"The Number of entries of Positive Task: {num_ones} & Negative Task: {num_zeros}")

    pi = df['Participant ID'].tolist()
    vi = df['Video ID'].tolist()

    df_required = df[relevant_features + identifiers]

    return df_required, pi, vi

In [None]:
def train_logistic_regression(X_train, y_train_arousal, y_train_valence, y_train_stress, X_test, y_test_arousal , y_test_valence, y_test_stress):
    num_zeros = 0
    num_ones = 1
    
    # Create and fit the logistic regression model for Arousal
    model_arousal = LogisticRegression(random_state=random_seed)
    model_arousal.fit(X_train, y_train_arousal)

    # Create and fit the logistic regression model for Valence
    model_valence = LogisticRegression(random_state=random_seed)
    model_valence.fit(X_train, y_train_valence)

    #Create and fit the logistic regression model for Task 
    model_task = LogisticRegression(random_state=random_seed)
    model_task.fit(X_train,y_train_stress)

    # Predict on the test set for Arousal, Valence and task
    y_pred_arousal = model_arousal.predict(X_test)
    y_pred_valence = model_valence.predict(X_test)
    y_task = model_task.predict(X_test)

    num_zeros = list(y_pred_arousal).count(0)
    num_ones = list(y_pred_arousal).count(1)

    accuracy_arousal = accuracy_score(y_test_arousal, y_pred_arousal)
    balanced_acc_arousal = balanced_accuracy_score(y_test_arousal, y_pred_arousal)
    accuracy_valence = accuracy_score(y_test_valence, y_pred_valence)
    balanced_acc_valence = balanced_accuracy_score(y_test_valence, y_pred_valence)
    accuracy_task = accuracy_score(y_task, y_test_stress)
    balanced_acc_task = balanced_accuracy_score(y_task, y_test_stress)

    f1_a = f1_score(y_test_arousal, y_pred_arousal)
    f1_v = f1_score(y_test_valence, y_pred_valence)
    f1_task = f1_score(y_task, y_test_stress)

    r2_a = r2_score(y_test_arousal, y_pred_arousal)
    r2_v = r2_score(y_test_valence, y_pred_valence)
    r2_task = r2_score(y_task, y_test_stress)

    mse_a = mean_squared_error(y_test_arousal, y_pred_arousal)
    mse_v = mean_squared_error(y_test_valence, y_pred_valence)
    mse_task = mean_squared_error(y_task, y_test_stress)

    return accuracy_arousal, balanced_acc_arousal, accuracy_valence, balanced_acc_valence, r2_a, r2_v, mse_a, mse_v, f1_a, f1_v, accuracy_task, balanced_acc_task, r2_task, mse_task, f1_task , num_zeros, num_ones

In [None]:
def train_decision_tree(X_train, y_train_arousal, y_train_valence, y_train_stress, X_test, y_test_arousal , y_test_valence, y_test_stress):
    
     # Create and fit the Decision Tree model for Arousal
    model_arousal = DecisionTreeClassifier(criterion='entropy', min_samples_split=20, random_state=random_seed)
    model_arousal.fit(X_train, y_train_arousal)

    # Create and fit the Decision Tree model for Valence
    model_valence = DecisionTreeClassifier(criterion='entropy', min_samples_split=20, random_state=random_seed)
    model_valence.fit(X_train, y_train_valence)
    
    # Predict on the test set for Arousal and Valence
    y_pred_arousal = model_arousal.predict(X_test)
    y_pred_valence = model_valence.predict(X_test)

    num_zeros = list(y_pred_arousal).count(0)
    num_ones = list(y_pred_arousal).count(1)

    #Create and fit the DT for Task 
    model_task = DecisionTreeClassifier(criterion='entropy', min_samples_split=20, random_state=random_seed)
    model_task.fit(X_train,y_train_stress)
    y_task = model_task.predict(X_test)

    # Calculate accuracy for Arousal and Valence classification
    accuracy_arousal = accuracy_score(y_test_arousal, y_pred_arousal)
    balanced_acc_arousal = balanced_accuracy_score(y_test_arousal, y_pred_arousal)
    accuracy_valence = accuracy_score(y_test_valence, y_pred_valence)
    balanced_acc_valence = balanced_accuracy_score(y_test_valence, y_pred_valence)
    accuracy_task = accuracy_score(y_task, y_test_stress)
    balanced_acc_task = balanced_accuracy_score(y_task, y_test_stress)

    f1_a = f1_score(y_test_arousal, y_pred_arousal)
    f1_v = f1_score(y_test_valence, y_pred_valence)
    f1_task = f1_score(y_task, y_test_stress)

    r2_a = r2_score(y_test_arousal, y_pred_arousal)
    r2_v = r2_score(y_test_valence, y_pred_valence)
    r2_task = r2_score(y_task, y_test_stress)

    mse_a = mean_squared_error(y_test_arousal, y_pred_arousal)
    mse_v = mean_squared_error(y_test_valence, y_pred_valence)
    mse_task = mean_squared_error(y_task, y_test_stress)

    return accuracy_arousal, balanced_acc_arousal, accuracy_valence, balanced_acc_valence, r2_a, r2_v, mse_a, mse_v, f1_a, f1_v, accuracy_task, balanced_acc_task, r2_task, mse_task, f1_task, num_zeros, num_ones

In [None]:
def train_random_forest(X_train, y_train_arousal, y_train_valence, y_train_stress, X_test, y_test_arousal , y_test_valence, y_test_stress):

    model_arousal_rf = RandomForestClassifier(n_estimators=100, random_state=random_seed)
    model_arousal_rf.fit(X_train, y_train_arousal)

    # Create and fit the Random Forest model for Valence with 100 base estimators
    model_valence_rf = RandomForestClassifier(n_estimators=100, random_state=random_seed)
    model_valence_rf.fit(X_train, y_train_valence)

    # Predict on the test set for Arousal and Valence
    y_pred_arousal_rf = model_arousal_rf.predict(X_test)
    y_pred_valence_rf = model_valence_rf.predict(X_test)
    num_zeros = list(y_pred_arousal_rf).count(0)
    num_ones = list(y_pred_arousal_rf).count(1)

    #Create and fit the Random Forest model for Task 
    model_task = RandomForestClassifier(n_estimators=100, random_state=random_seed)
    model_task.fit(X_train,y_train_stress)
    y_task = model_task.predict(X_test)

    # Calculate accuracy for Arousal and Valence classification
    accuracy_arousal_rf = accuracy_score(y_test_arousal, y_pred_arousal_rf)
    balanced_acc_arousal_rf = balanced_accuracy_score(y_test_arousal, y_pred_arousal_rf)
    accuracy_valence_rf = accuracy_score(y_test_valence, y_pred_valence_rf)
    balanced_acc_valence_rf = balanced_accuracy_score(y_test_valence, y_pred_valence_rf)
    accuracy_task = accuracy_score(y_task, y_test_stress)
    balanced_acc_task = balanced_accuracy_score(y_task, y_test_stress)

    f1_a = f1_score(y_test_arousal, y_pred_arousal_rf)
    f1_v = f1_score(y_test_valence, y_pred_valence_rf)
    f1_task = f1_score(y_task, y_test_stress)


    r2_a = r2_score(y_test_arousal, y_pred_arousal_rf)
    r2_v = r2_score(y_test_valence, y_pred_valence_rf)
    r2_task = r2_score(y_task, y_test_stress)

    mse_a = mean_squared_error(y_test_arousal, y_pred_arousal_rf)
    mse_v = mean_squared_error(y_test_valence, y_pred_valence_rf)
    mse_task =mean_squared_error(y_task, y_test_stress)

    return accuracy_arousal_rf, balanced_acc_arousal_rf, accuracy_valence_rf, balanced_acc_valence_rf, r2_a, r2_v, mse_a, mse_v, f1_a, f1_v, accuracy_task, balanced_acc_task, r2_task, mse_task, f1_task, num_zeros, num_ones

In [None]:
def train_svm(X_train, y_train_arousal, y_train_valence, y_train_stress, X_test, y_test_arousal , y_test_valence, y_test_stress):

    # Create and fit the SVM model for Arousal
    model_arousal_svm = SVC(kernel='rbf',random_state=random_seed)
    model_arousal_svm.fit(X_train, y_train_arousal)

    # Create and fit the SVM model for Valence
    model_valence_svm = SVC(kernel='rbf',random_state=random_seed)
    model_valence_svm.fit(X_train, y_train_valence)

    # Predict on the test set for Arousal and Valence
    y_pred_arousal_svm = model_arousal_svm.predict(X_test)
    y_pred_valence_svm = model_valence_svm.predict(X_test)
    num_zeros = list(y_pred_arousal_svm).count(0)
    num_ones = list(y_pred_arousal_svm).count(1)

    #Create and fit the SVM for Task 
    model_task = SVC(kernel='rbf',random_state=random_seed)
    model_task.fit(X_train,y_train_stress)
    y_task = model_task.predict(X_test)

    # Calculate accuracy for Arousal and Valence classification
    accuracy_arousal_svm = accuracy_score(y_test_arousal, y_pred_arousal_svm)
    balanced_acc_arousal_svm = balanced_accuracy_score(y_test_arousal, y_pred_arousal_svm)
    accuracy_valence_svm = accuracy_score(y_test_valence, y_pred_valence_svm)
    balanced_acc_valence_svm = balanced_accuracy_score(y_test_valence, y_pred_valence_svm)
    accuracy_task = accuracy_score(y_task, y_test_stress)
    balanced_acc_task = balanced_accuracy_score(y_task, y_test_stress)

    f1_a = f1_score(y_test_arousal, y_pred_arousal_svm)
    f1_v = f1_score(y_test_valence, y_pred_valence_svm)
    f1_task = f1_score(y_task, y_test_stress)

    r2_a = r2_score(y_test_arousal, y_pred_arousal_svm)
    r2_v = r2_score(y_test_valence, y_pred_valence_svm)
    r2_task = r2_score(y_task, y_test_stress)

    mse_a = mean_squared_error(y_test_arousal, y_pred_arousal_svm)
    mse_v = mean_squared_error(y_test_valence, y_pred_valence_svm)
    mse_task = mean_squared_error(y_task, y_test_stress)


    return accuracy_arousal_svm, balanced_acc_arousal_svm, accuracy_valence_svm, balanced_acc_valence_svm, r2_a, r2_v, mse_a, mse_v, f1_a, f1_v, accuracy_task, balanced_acc_task, r2_task, mse_task, f1_task, num_zeros, num_ones

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

def train_lda(X_train, y_train_arousal, y_train_valence, y_train_stress, X_test, y_test_arousal , y_test_valence, y_test_stress):

    # Create and fit the LDA model for Arousal with k=9
    model_arousal_lda = LinearDiscriminantAnalysis(n_components=1)
    model_arousal_lda.fit(X_train, y_train_arousal)

    # Create and fit the LDA model for Valence with k=9
    model_valence_lda = LinearDiscriminantAnalysis(n_components=1)
    model_valence_lda.fit(X_train, y_train_valence)

    # Predict on the test set for Arousal and Valence
    y_pred_arousal_lda = model_arousal_lda.predict(X_test)
    y_pred_valence_lda = model_valence_lda.predict(X_test)

    num_zeros = list(y_pred_arousal_lda).count(0)
    num_ones = list(y_pred_arousal_lda).count(1)

    #Create and fit the LDA for Task 
    model_task = LinearDiscriminantAnalysis(n_components=1)
    model_task.fit(X_train,y_train_stress)
    y_task = model_task.predict(X_test)

    # Calculate accuracy for Arousal and Valence classification
    accuracy_arousal_lda = accuracy_score(y_test_arousal, y_pred_arousal_lda)
    balanced_acc_arousal_lda = balanced_accuracy_score(y_test_arousal, y_pred_arousal_lda)
    accuracy_valence_lda = accuracy_score(y_test_valence, y_pred_valence_lda)
    balanced_acc_valence_lda = balanced_accuracy_score(y_test_valence, y_pred_valence_lda)
    accuracy_task = accuracy_score(y_task, y_test_stress)
    balanced_acc_task = balanced_accuracy_score(y_task, y_test_stress)

    f1_a = f1_score(y_test_arousal, y_pred_arousal_lda)
    f1_v = f1_score(y_test_valence, y_pred_valence_lda)
    f1_task = f1_score(y_task, y_test_stress)

    r2_a = r2_score(y_test_arousal, y_pred_arousal_lda)
    r2_v = r2_score(y_test_valence, y_pred_valence_lda)
    r2_task = r2_score(y_task, y_test_stress)

    mse_a = mean_squared_error(y_test_arousal, y_pred_arousal_lda)
    mse_v = mean_squared_error(y_test_valence, y_pred_valence_lda)
    mse_task = mean_squared_error(y_task, y_test_stress)

    return accuracy_arousal_lda, balanced_acc_arousal_lda, accuracy_valence_lda, balanced_acc_valence_lda, r2_a, r2_v, mse_a, mse_v, f1_a, f1_v, accuracy_task, balanced_acc_task, r2_task, mse_task, f1_task, num_zeros, num_ones

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

def train_xgboost(X_train, y_train_arousal, y_train_valence, y_train_stress, X_test, y_test_arousal , y_test_valence, y_test_stress):

    # Create and fit the XGBoost model for Arousal with 100 base estimators
    model_arousal = GradientBoostingClassifier(n_estimators=100, random_state=random_seed)
    model_arousal.fit(X_train, y_train_arousal)

    # Create and fit the XGBoost model for Valence with 100 base estimators
    model_valence = GradientBoostingClassifier(n_estimators=100, random_state=random_seed)
    model_valence.fit(X_train, y_train_valence)

    # Predict on the test set for Arousal and Valence
    y_pred_arousal = model_arousal.predict(X_test)
    y_pred_valence = model_valence.predict(X_test)
    num_zeros = list(y_pred_arousal).count(0)
    num_ones = list(y_pred_arousal).count(1)

    #Create and fit the XG for Task 
    model_task = GradientBoostingClassifier(n_estimators=100, random_state=random_seed)
    model_task.fit(X_train,y_train_stress)
    y_task = model_task.predict(X_test)

    # Calculate accuracy for Arousal and Valence classification
    accuracy_arousal = accuracy_score(y_test_arousal, y_pred_arousal)
    balanced_acc_arousal = balanced_accuracy_score(y_test_arousal, y_pred_arousal)
    accuracy_valence = accuracy_score(y_test_valence, y_pred_valence)
    balanced_acc_valence= balanced_accuracy_score(y_test_valence, y_pred_valence)
    accuracy_task = accuracy_score(y_task, y_test_stress)
    balanced_acc_task = balanced_accuracy_score(y_task, y_test_stress)

    f1_a = f1_score(y_test_arousal, y_pred_arousal)
    f1_v = f1_score(y_test_valence, y_pred_valence)
    f1_task = f1_score(y_task, y_test_stress)

    r2_a = r2_score(y_test_arousal, y_pred_arousal)
    r2_v = r2_score(y_test_valence, y_pred_valence)
    r2_task = r2_score(y_task, y_test_stress)

    mse_a = mean_squared_error(y_test_arousal, y_pred_arousal)
    mse_v = mean_squared_error(y_test_valence, y_pred_valence)
    mse_task = mean_squared_error(y_task, y_test_stress)

    return accuracy_arousal, balanced_acc_arousal, accuracy_valence, balanced_acc_valence, r2_a, r2_v, mse_a, mse_v, f1_a, f1_v, accuracy_task, balanced_acc_task, r2_task, mse_task, f1_task, num_zeros, num_ones

In [None]:
from sklearn.neural_network import MLPClassifier

def train_mlp(X_train, y_train_arousal, y_train_valence, y_train_stress, X_test, y_test_arousal, y_test_valence, y_test_stress, hidden_layer_sizes=(100,), random_seed=42):
    # Create and fit the MLP model for Arousal
    model_arousal_mlp = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes, random_state=random_seed)
    model_arousal_mlp.fit(X_train, y_train_arousal)

    # Create and fit the MLP model for Valence
    model_valence_mlp = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes, random_state=random_seed)
    model_valence_mlp.fit(X_train, y_train_valence)

    # Predict on the test set for Arousal and Valence
    y_pred_arousal_mlp = model_arousal_mlp.predict(X_test)
    y_pred_valence_mlp = model_valence_mlp.predict(X_test)
    num_zeros = list(y_pred_arousal_mlp).count(0)
    num_ones = list(y_pred_arousal_mlp).count(1)

    # Create and fit the MLP model for Task
    model_task_mlp = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes, random_state=random_seed)
    model_task_mlp.fit(X_train, y_train_stress)
    y_pred_task_mlp = model_task_mlp.predict(X_test)

    # Calculate accuracy for Arousal, Valence, and Task classification
    accuracy_arousal_mlp = accuracy_score(y_test_arousal, y_pred_arousal_mlp)
    balanced_acc_arousal_mlp = balanced_accuracy_score(y_test_arousal, y_pred_arousal_mlp)
    accuracy_valence_mlp = accuracy_score(y_test_valence, y_pred_valence_mlp)
    balanced_acc_valence_mlp = balanced_accuracy_score(y_test_valence, y_pred_valence_mlp)
    accuracy_task_mlp = accuracy_score(y_test_stress, y_pred_task_mlp)
    balanced_acc_task_mlp = balanced_accuracy_score(y_test_stress, y_pred_task_mlp)

    f1_arousal = f1_score(y_test_arousal, y_pred_arousal_mlp)
    f1_valence = f1_score(y_test_valence, y_pred_valence_mlp)
    f1_task = f1_score(y_test_stress, y_pred_task_mlp)

    r2_arousal = r2_score(y_test_arousal, y_pred_arousal_mlp)
    r2_valence = r2_score(y_test_valence, y_pred_valence_mlp)
    r2_task = r2_score(y_test_stress, y_pred_task_mlp)

    mse_arousal = mean_squared_error(y_test_arousal, y_pred_arousal_mlp)
    mse_valence = mean_squared_error(y_test_valence, y_pred_valence_mlp)
    mse_task = mean_squared_error(y_test_stress, y_pred_task_mlp)

    return (accuracy_arousal_mlp, balanced_acc_arousal_mlp, accuracy_valence_mlp, balanced_acc_valence_mlp, 
            r2_arousal, r2_valence, mse_arousal, mse_valence, f1_arousal, f1_valence, 
            accuracy_task_mlp, balanced_acc_task_mlp, r2_task, mse_task, f1_task, num_zeros, num_ones)


In [None]:
def validation(df, pi, relevant_features):

    accuracy_scores_a_mlp = {}
    balance_acc_list_a_mlp = {}
    accuracy_scores_v_mlp = {}
    balance_acc_list_v_mlp = {}
    r2_a_mlp = {}
    r2_v_mlp = {}
    mse_a_mlp = {}
    mse_v_mlp = {}
    f1_a_mlp = {}
    f1_v_mlp = {}
    accuracy_scores_s_mlp = {}
    balance_acc_list_s_mlp = {}
    r2_s_mlp = {}
    mse_s_mlp = {}
    f1_s_mlp = {}

    accuracy_scores_a_lr = {}
    balance_acc_list_a_lr = {}
    accuracy_scores_v_lr = {}
    balance_acc_list_v_lr = {}
    r2_a_lr = {}
    r2_v_lr ={}
    mse_a_lr = {}
    mse_v_lr = {}
    f1_a_lr = {}
    f1_v_lr = {}
    accuracy_scores_s_lr = {}
    balance_acc_list_s_lr = {}
    r2_s_lr = {}
    mse_s_lr = {}
    f1_s_lr = {}

    num_zeros_rf = {}
    num_zeros_xg = {}
    num_ones_xg = {}
    num_ones_rf = {}

    accuracy_scores_a_dt = {}
    balance_acc_list_a_dt = {}
    accuracy_scores_v_dt = {}
    balance_acc_list_v_dt = {}
    r2_a_dt = {}
    r2_v_dt ={}
    mse_a_dt = {}
    mse_v_dt = {}
    f1_a_dt = {}
    f1_v_dt = {}
    accuracy_scores_s_dt = {}
    balance_acc_list_s_dt = {}
    r2_s_dt = {}
    mse_s_dt = {}
    f1_s_dt = {}

    accuracy_scores_a_rf = {}
    balance_acc_list_a_rf = {}
    accuracy_scores_v_rf = {}
    balance_acc_list_v_rf = {}
    r2_a_rf = {}
    r2_v_rf ={}
    mse_a_rf = {}
    mse_v_rf = {}
    f1_a_rf = {}
    f1_v_rf = {}
    accuracy_scores_s_rf = {}
    balance_acc_list_s_rf = {}
    r2_s_rf = {}
    mse_s_rf = {}
    f1_s_rf = {}

    accuracy_scores_a_svm = {}
    balance_acc_list_a_svm = {}
    accuracy_scores_v_svm = {}
    balance_acc_list_v_svm = {}
    r2_a_svm = {}
    r2_v_svm ={}
    mse_a_svm = {}
    mse_v_svm = {}
    f1_a_svm = {}
    f1_v_svm = {}
    accuracy_scores_s_svm = {}
    balance_acc_list_s_svm = {}
    r2_s_svm = {}
    mse_s_svm = {}
    f1_s_svm = {}

    accuracy_scores_a_lda = {}
    balance_acc_list_a_lda = {}
    accuracy_scores_v_lda = {}
    balance_acc_list_v_lda = {}
    r2_a_lda = {}
    r2_v_lda ={}
    mse_a_lda = {}
    mse_v_lda = {}
    f1_a_lda = {}
    f1_v_lda = {}
    accuracy_scores_s_lda = {}
    balance_acc_list_s_lda = {}
    r2_s_lda = {}
    mse_s_lda = {}
    f1_s_lda = {}

    accuracy_scores_a_xg = {}
    balance_acc_list_a_xg = {}
    accuracy_scores_v_xg = {}
    balance_acc_list_v_xg = {}
    r2_a_xg = {}
    r2_v_xg ={}
    mse_a_xg = {}
    mse_v_xg = {}
    f1_a_xg = {}
    f1_v_xg = {}
    accuracy_scores_s_xg = {}
    balance_acc_list_s_xg = {}
    r2_s_xg = {}
    mse_s_xg = {}
    f1_s_xg = {}
    

    pi = list(set(pi))
    for i in pi:

        train_data = df[df['Participant ID'] != i]
        test_data = df[df['Participant ID'] == i]

        X_train = train_data[relevant_features]
        y_train_arousal = train_data['arousal_category'].astype(int)
        y_train_valence = train_data['valence_category'].astype(int)
        y_train_stress = train_data['taskwiselabel'].astype(int)

        X_test = test_data[relevant_features]
        y_test_arousal = test_data['arousal_category'].astype(int)
        y_test_valence = test_data['valence_category'].astype(int)
        y_test_stress = test_data['taskwiselabel'].astype(int)

        # Train the MLP classifier with specified hidden layers
        accuracy_arousal, balanced_acc_arousal, accuracy_valence, balanced_acc_valence, r2_a, r2_v, mse_a, mse_v, f1_a, f1_v, accuracy_task, balanced_acc_task, r2_task, mse_task, f1_task, num_zeros, num_ones = train_mlp(X_train, y_train_arousal, y_train_valence, y_train_stress, X_test, y_test_arousal, y_test_valence, y_test_stress, hidden_layer_sizes=(100, 50))

        # Storing the results
        accuracy_scores_a_mlp[i] = accuracy_arousal * 100
        balance_acc_list_a_mlp[i] = balanced_acc_arousal * 100
        accuracy_scores_v_mlp[i] = accuracy_valence * 100
        balance_acc_list_v_mlp[i] = balanced_acc_valence * 100
        r2_a_mlp[i] = r2_a
        r2_v_mlp[i] = r2_v
        mse_a_mlp[i] = mse_a
        mse_v_mlp[i] = mse_v
        f1_a_mlp[i] = f1_a
        f1_v_mlp[i] = f1_v
        accuracy_scores_s_mlp[i] = accuracy_task * 100
        balance_acc_list_s_mlp[i] = balanced_acc_task * 100
        r2_s_mlp[i] = r2_task
        mse_s_mlp[i] = mse_task
        f1_s_mlp[i] = f1_task


        accuracy_arousal, balanced_acc_arousal, accuracy_valence, balanced_acc_valence, r2_a, r2_v, mse_a, mse_v, f1_a, f1_v, accuracy_task, balanced_acc_task, r2_task, mse_task, f1_task, num_zeros, num_ones = train_logistic_regression(X_train, y_train_arousal, y_train_valence, y_train_stress, X_test, y_test_arousal , y_test_valence, y_test_stress)

        accuracy_scores_a_lr[i] = accuracy_arousal*100
        balance_acc_list_a_lr[i] = balanced_acc_arousal*100
        accuracy_scores_v_lr[i] = accuracy_valence*100
        balance_acc_list_v_lr[i] = balanced_acc_valence*100
        r2_a_lr[i] = r2_a
        r2_v_lr[i] = r2_v
        mse_a_lr[i] = mse_a
        mse_v_lr[i] = mse_v
        f1_a_lr[i] = f1_a
        f1_v_lr[i] = f1_v
        accuracy_scores_s_lr[i] = accuracy_task*100
        balance_acc_list_s_lr[i] = balanced_acc_task*100
        r2_s_lr[i] = r2_task
        mse_s_lr[i] = mse_task
        f1_s_lr[i] = f1_task

        accuracy_arousal, balanced_acc_arousal, accuracy_valence, balanced_acc_valence, r2_a, r2_v, mse_a, mse_v, f1_a, f1_v, accuracy_task, balanced_acc_task, r2_task, mse_task, f1_task, num_zeros, num_ones= train_decision_tree(X_train, y_train_arousal, y_train_valence, y_train_stress, X_test, y_test_arousal , y_test_valence, y_test_stress)

        accuracy_scores_a_dt[i] = accuracy_arousal*100
        balance_acc_list_a_dt[i] = balanced_acc_arousal*100
        accuracy_scores_v_dt[i] = accuracy_valence*100
        balance_acc_list_v_dt[i] = balanced_acc_valence*100
        r2_a_dt[i] = r2_a
        r2_v_dt[i] = r2_v
        mse_a_dt[i] = mse_a
        mse_v_dt[i] = mse_v
        f1_a_dt[i] = f1_a
        f1_v_dt[i] = f1_v
        accuracy_scores_s_dt[i] = accuracy_task*100
        balance_acc_list_s_dt[i] = balanced_acc_task*100
        r2_s_dt[i] = r2_task
        mse_s_dt[i] = mse_task
        f1_s_dt[i] = f1_task

        accuracy_arousal, balanced_acc_arousal, accuracy_valence, balanced_acc_valence, r2_a, r2_v, mse_a, mse_v, f1_a, f1_v, accuracy_task, balanced_acc_task, r2_task, mse_task, f1_task, num_zeros, num_ones = train_random_forest(X_train, y_train_arousal, y_train_valence, y_train_stress, X_test, y_test_arousal , y_test_valence, y_test_stress)

        accuracy_scores_a_rf[i] = accuracy_arousal*100
        balance_acc_list_a_rf[i] = balanced_acc_arousal*100
        accuracy_scores_v_rf[i] = accuracy_valence*100
        balance_acc_list_v_rf[i] = balanced_acc_valence*100
        r2_a_rf[i] = r2_a
        r2_v_rf[i] = r2_v
        mse_a_rf[i] = mse_a
        mse_v_rf[i] = mse_v
        f1_a_rf[i] = f1_a
        f1_v_rf[i] = f1_v
        accuracy_scores_s_rf[i] = accuracy_task*100
        balance_acc_list_s_rf[i] = balanced_acc_task*100
        r2_s_rf[i] = r2_task
        mse_s_rf[i] = mse_task
        f1_s_rf[i] = f1_task
        num_zeros_rf[i] = num_zeros
        num_ones_rf[i] = num_ones

        accuracy_arousal, balanced_acc_arousal, accuracy_valence, balanced_acc_valence, r2_a, r2_v, mse_a, mse_v, f1_a, f1_v, accuracy_task, balanced_acc_task, r2_task, mse_task, f1_task, num_zeros, num_ones = train_svm(X_train, y_train_arousal, y_train_valence, y_train_stress, X_test, y_test_arousal , y_test_valence, y_test_stress)

        accuracy_scores_a_svm[i] = accuracy_arousal*100
        balance_acc_list_a_svm[i] = balanced_acc_arousal*100
        accuracy_scores_v_svm[i] = accuracy_valence*100
        balance_acc_list_v_svm[i] = balanced_acc_valence*100
        r2_a_svm[i] = r2_a
        r2_v_svm[i] = r2_v
        mse_a_svm[i] = mse_a
        mse_v_svm[i] = mse_v
        f1_a_svm[i] = f1_a
        f1_v_svm[i] = f1_v
        accuracy_scores_s_svm[i] = accuracy_task*100
        balance_acc_list_s_svm[i] = balanced_acc_task*100
        r2_s_svm[i] = r2_task
        mse_s_svm[i] = mse_task
        f1_s_svm[i] = f1_task

        accuracy_arousal, balanced_acc_arousal, accuracy_valence, balanced_acc_valence, r2_a, r2_v, mse_a, mse_v, f1_a, f1_v, accuracy_task, balanced_acc_task, r2_task, mse_task, f1_task, num_zeros, num_ones = train_lda(X_train, y_train_arousal, y_train_valence, y_train_stress, X_test, y_test_arousal , y_test_valence, y_test_stress)

        accuracy_scores_a_lda[i] = accuracy_arousal*100
        balance_acc_list_a_lda[i] = balanced_acc_arousal*100
        accuracy_scores_v_lda[i] = accuracy_valence*100
        balance_acc_list_v_lda[i] = balanced_acc_valence*100
        r2_a_lda[i] = r2_a
        r2_v_lda[i] = r2_v
        mse_a_lda[i] = mse_a
        mse_v_lda[i] = mse_v
        f1_a_lda[i] = f1_a
        f1_v_lda[i] = f1_v
        accuracy_scores_s_lda[i] = accuracy_task*100
        balance_acc_list_s_lda[i] = balanced_acc_task*100
        r2_s_lda[i] = r2_task
        mse_s_lda[i] = mse_task
        f1_s_lda[i] = f1_task

        accuracy_arousal, balanced_acc_arousal, accuracy_valence, balanced_acc_valence, r2_a, r2_v, mse_a, mse_v, f1_a, f1_v, accuracy_task, balanced_acc_task, r2_task, mse_task, f1_task, num_zeros, num_ones = train_xgboost(X_train, y_train_arousal, y_train_valence, y_train_stress, X_test, y_test_arousal , y_test_valence, y_test_stress)

        accuracy_scores_a_xg[i] = accuracy_arousal*100
        balance_acc_list_a_xg[i] = balanced_acc_arousal*100
        accuracy_scores_v_xg[i] = accuracy_valence*100
        balance_acc_list_v_xg[i] = balanced_acc_valence*100
        r2_a_xg[i] = r2_a
        r2_v_xg[i] = r2_v
        mse_a_xg[i] = mse_a
        mse_v_xg[i] = mse_v
        f1_a_xg[i] = f1_a
        f1_v_xg[i] = f1_v
        accuracy_scores_s_xg[i] = accuracy_task*100
        balance_acc_list_s_xg[i] = balanced_acc_task*100
        r2_s_xg[i] = r2_task
        mse_s_xg[i] = mse_task
        f1_s_xg[i] = f1_task
        num_ones_xg[i] = num_ones
        num_zeros_xg[i] = num_zeros

    for i in pi:

        print(f"--------------Participant ID: {i}--------------")
        print("----Model: MLP----")
        print(f"Accuracy - Arousal: {accuracy_scores_a_mlp[i]}")
        print(f"Balance Accuracy - Arousal: {balance_acc_list_a_mlp[i]}")
        print(f"R2 Loss - Arousal: {r2_a_mlp[i]}")
        print(f"MSE Loss - Arousal: {mse_a_mlp[i]}")
        print(f"F1 Score - Arousal: {f1_a_mlp[i]}")
        print(f"Accuracy - Valence: {accuracy_scores_v_mlp[i]}")
        print(f"Balance Accuracy - Valence: {balance_acc_list_v_mlp[i]}")
        print(f"R2 Loss - Valence: {r2_v_mlp[i]}")
        print(f"MSE Loss - Valence: {mse_v_mlp[i]}")
        print(f"F1 Score - Valence: {f1_v_mlp[i]}")
        print(f"Accuracy - Stimulus-label: {accuracy_scores_s_mlp[i]}")
        print(f"Balance Accuracy - Stimulus-label: {balance_acc_list_s_mlp[i]}")
        print(f"R2 Loss - Stimulus-label: {r2_s_mlp[i]}")
        print(f"MSE Loss - Stimulus-label: {mse_s_mlp[i]}")
        print(f"F1 Score - Stimulus-label: {f1_s_mlp[i]}")

        print(f"--------------Participant ID: {i}--------------")
        print("----Model: Logistic Regression----")
        print(f"Accuracy - Arousal: {accuracy_scores_a_lr[i]}")
        print(f"Balance Accuracy - Arousal: {balance_acc_list_a_lr[i]}")
        print(f"R2 Loss - Arousal: {r2_a_lr[i]}")
        print(f"MSE Loss - Arousal: {mse_a_lr[i]}")
        print(f"F1 Score - Arousal: {f1_a_lr[i]}")
        print(f"Accuracy - Valence: {accuracy_scores_v_lr[i]}")
        print(f"Balance Accuracy - Valence: {balance_acc_list_v_lr[i]}")
        print(f"R2 Loss - Valence: {r2_v_lr[i]}")
        print(f"MSE Loss - Valence: {mse_v_lr[i]}")
        print(f"F1 Score - Valence: {f1_v_lr[i]}")
        print(f"Accuracy - Stimulus-label: {accuracy_scores_s_lr[i]}")
        print(f"Balance Accuracy - Stimulus-label: {balance_acc_list_s_lr[i]}")
        print(f"R2 Loss - Stimulus-label: {r2_s_lr[i]}")
        print(f"MSE Loss - Stimulus-label: {mse_s_lr[i]}")
        print(f"F1 Score - Stimulus-label: {f1_s_lr[i]}")

        print("----Model: Decision Tree----")
        print(f"Accuracy - Arousal: {accuracy_scores_a_dt[i]}")
        print(f"Balance Accuracy - Arousal: {balance_acc_list_a_dt[i]}")
        print(f"R2 Loss - Arousal: {r2_a_dt[i]}")
        print(f"MSE Loss - Arousal: {mse_a_dt[i]}")
        print(f"F1 Score - Arousal: {f1_a_dt[i]}")
        print(f"Accuracy - Valence: {accuracy_scores_v_dt[i]}")
        print(f"Balance Accuracy - Valence: {balance_acc_list_v_dt[i]}")
        print(f"R2 Loss - Valence: {r2_v_dt[i]}")
        print(f"MSE Loss - Valence: {mse_v_dt[i]}")
        print(f"F1 Score - Valence: {f1_v_dt[i]}")
        print(f"Accuracy - Stimulus-label: {accuracy_scores_s_dt[i]}")
        print(f"Balance Accuracy - Stimulus-label: {balance_acc_list_s_dt[i]}")
        print(f"R2 Loss - Stimulus-label: {r2_s_dt[i]}")
        print(f"MSE Loss - Stimulus-label: {mse_s_dt[i]}")
        print(f"F1 Score - Stimulus-label: {f1_s_dt[i]}")

        print("----Model: Random Forest----")
        print(f"Accuracy - Arousal: {accuracy_scores_a_rf[i]}")
        print(f"Balance Accuracy - Arousal: {balance_acc_list_a_rf[i]}")
        print(f"R2 Loss - Arousal: {r2_a_rf[i]}")
        print(f"MSE Loss - Arousal: {mse_a_rf[i]}")
        print(f"F1 Score - Arousal: {f1_a_rf[i]}")
        print(f"Accuracy - Valence: {accuracy_scores_v_rf[i]}")
        print(f"Balance Accuracy - Valence: {balance_acc_list_v_rf[i]}")
        print(f"R2 Loss - Valence: {r2_v_rf[i]}")
        print(f"MSE Loss - Valence: {mse_v_rf[i]}")
        print(f"F1 Score - Valence: {f1_v_rf[i]}")
        print(f"Accuracy - Stimulus-label: {accuracy_scores_s_rf[i]}")
        print(f"Balance Accuracy - Stimulus-label: {balance_acc_list_s_rf[i]}")
        print(f"R2 Loss - Stimulus-label: {r2_s_rf[i]}")
        print(f"MSE Loss - Stimulus-label: {mse_s_rf[i]}")
        print(f"F1 Score - Stimulus-label: {f1_s_rf[i]}")

        print("----Model: SVM----")
        print(f"Accuracy - Arousal: {accuracy_scores_a_svm[i]}")
        print(f"Balance Accuracy - Arousal: {balance_acc_list_a_svm[i]}")
        print(f"R2 Loss - Arousal: {r2_a_svm[i]}")
        print(f"MSE Loss - Arousal: {mse_a_svm[i]}")
        print(f"F1 Score - Arousal: {f1_a_svm[i]}")
        print(f"Accuracy - Valence: {accuracy_scores_v_svm[i]}")
        print(f"Balance Accuracy - Valence: {balance_acc_list_v_svm[i]}")
        print(f"R2 Loss - Valence: {r2_v_svm[i]}")
        print(f"MSE Loss - Valence: {mse_v_svm[i]}")
        print(f"F1 Score - Valence: {f1_v_svm[i]}")
        print(f"Accuracy - Stimulus-label: {accuracy_scores_s_svm[i]}")
        print(f"Balance Accuracy - Stimulus-label: {balance_acc_list_s_svm[i]}")
        print(f"R2 Loss - Stimulus-label: {r2_s_svm[i]}")
        print(f"MSE Loss - Stimulus-label: {mse_s_svm[i]}")
        print(f"F1 Score - Stimulus-label: {f1_s_svm[i]}")

        print("----Model: LDA----")
        print(f"Accuracy - Arousal: {accuracy_scores_a_lda[i]}")
        print(f"Balance Accuracy - Arousal: {balance_acc_list_a_lda[i]}")
        print(f"R2 Loss - Arousal: {r2_a_lda[i]}")
        print(f"MSE Loss - Arousal: {mse_a_lda[i]}")
        print(f"F1 Score - Arousal: {f1_a_lda[i]}")
        print(f"Accuracy - Valence: {accuracy_scores_v_lda[i]}")
        print(f"Balance Accuracy - Valence: {balance_acc_list_v_lda[i]}")
        print(f"R2 Loss - Valence: {r2_v_lda[i]}")
        print(f"MSE Loss - Valence: {mse_v_lda[i]}")
        print(f"F1 Score - Valence: {f1_v_lda[i]}")
        print(f"Accuracy - Stimulus-label: {accuracy_scores_s_lda[i]}")
        print(f"Balance Accuracy - Stimulus-label: {balance_acc_list_s_lda[i]}")
        print(f"R2 Loss - Stimulus-label: {r2_s_lda[i]}")
        print(f"MSE Loss - Stimulus-label: {mse_s_lda[i]}")
        print(f"F1 Score - Stimulus-label: {f1_s_lda[i]}")

        print("----Model: XG----")
        print(f"Accuracy - Arousal: {accuracy_scores_a_xg[i]}")
        print(f"Balance Accuracy - Arousal: {balance_acc_list_a_xg[i]}")
        print(f"R2 Loss - Arousal: {r2_a_xg[i]}")
        print(f"MSE Loss - Arousal: {mse_a_xg[i]}")
        print(f"F1 Score - Arousal: {f1_a_xg[i]}")
        print(f"Accuracy - Valence: {accuracy_scores_v_xg[i]}")
        print(f"Balance Accuracy - Valence: {balance_acc_list_v_xg[i]}")
        print(f"R2 Loss - Valence: {r2_v_xg[i]}")
        print(f"MSE Loss - Valence: {mse_v_xg[i]}")
        print(f"F1 Score - Valence: {f1_v_xg[i]}")
        print(f"Accuracy - Stimulus-label: {accuracy_scores_s_xg[i]}")
        print(f"Balance Accuracy - Stimulus-label: {balance_acc_list_s_xg[i]}")
        print(f"R2 Loss - Stimulus-label: {r2_s_xg[i]}")
        print(f"MSE Loss - Stimulus-label: {mse_s_xg[i]}")
        print(f"F1 Score - Stimulus-label: {f1_s_xg[i]}")


    print(f"-----Average Accuracy Arousal-----")
    print(f"Logistic Regression: {sum(accuracy_scores_a_lr.values()) / len(pi)}")
    print(f"Decision Tree: {sum(accuracy_scores_a_dt.values()) / len(pi)}")
    print(f"Random Forest: {sum(accuracy_scores_a_rf.values()) / len(pi)}")
    print(f"SVM: {sum(accuracy_scores_a_svm.values()) / len(pi)}")
    print(f"LDA: {sum(accuracy_scores_a_lda.values()) / len(pi)}")
    print(f"XG: {sum(accuracy_scores_a_xg.values()) / len(pi)}")
    print(f"MLP: {sum(accuracy_scores_a_mlp.values()) / len(pi)}")

    print(f"-----Average Balanced Accuracy Arousal-----")
    print(f"Logistic Regression: {sum(balance_acc_list_a_lr.values()) / len(pi)}")
    print(f"Decision Tree: {sum(balance_acc_list_a_dt.values()) / len(pi)}")
    print(f"Random Forest: {sum(balance_acc_list_a_rf.values()) / len(pi)}")
    print(f"SVM: {sum(balance_acc_list_a_svm.values()) / len(pi)}")
    print(f"LDA: {sum(balance_acc_list_a_lda.values()) / len(pi)}")
    print(f"XG: {sum(balance_acc_list_a_xg.values()) / len(pi)}")
    print(f"MLP: {sum(balance_acc_list_a_mlp.values()) / len(pi)}")

    print(f"-----Average R2 Loss Arousal-----")
    print(f"Logistic Regression: {sum(r2_a_lr.values()) / len(pi)}")
    print(f"Decision Tree: {sum(r2_a_dt.values()) / len(pi)}")
    print(f"Random Forest: {sum(r2_a_rf.values()) / len(pi)}")
    print(f"SVM: {sum(r2_a_svm.values()) / len(pi)}")
    print(f"LDA: {sum(r2_a_lda.values()) / len(pi)}")
    print(f"XG: {sum(r2_a_xg.values()) / len(pi)}")
    print(f"MLP: {sum(r2_a_mlp.values()) / len(pi)}")

    print(f"-----Average MSE Loss Arousal-----")
    print(f"Logistic Regression: {sum(mse_a_lr.values()) / len(pi)}")
    print(f"Decision Tree: {sum(mse_a_dt.values()) / len(pi)}")
    print(f"Random Forest: {sum(mse_a_rf.values()) / len(pi)}")
    print(f"SVM: {sum(mse_a_svm.values()) / len(pi)}")
    print(f"LDA: {sum(mse_a_lda.values()) / len(pi)}")
    print(f"XG: {sum(mse_a_xg.values()) / len(pi)}")
    print(f"MLP: {sum(mse_a_mlp.values()) / len(pi)}")

    print(f"-----Average F1 Arousal-----")
    print(f"Logistic Regression: {sum(f1_a_lr.values()) / len(pi)}")
    print(f"Decision Tree: {sum(f1_a_dt.values()) / len(pi)}")
    print(f"Random Forest: {sum(f1_a_rf.values()) / len(pi)}")
    print(f"SVM: {sum(f1_a_svm.values()) / len(pi)}")
    print(f"LDA: {sum(f1_a_lda.values()) / len(pi)}")
    print(f"XG: {sum(f1_a_xg.values()) / len(pi)}")
    print(f"MLP: {sum(f1_a_mlp.values()) / len(pi)}")

    print(f"-----Average Accuracy Valence-----")
    print(f"Logistic Regression: {sum(accuracy_scores_v_lr.values()) / len(pi)}")
    print(f"Decision Tree: {sum(accuracy_scores_v_dt.values()) / len(pi)}")
    print(f"Random Forest: {sum(accuracy_scores_v_rf.values()) / len(pi)}")
    print(f"SVM: {sum(accuracy_scores_v_svm.values()) / len(pi)}")
    print(f"LDA: {sum(accuracy_scores_v_lda.values()) / len(pi)}")
    print(f"XG: {sum(accuracy_scores_v_xg.values()) / len(pi)}")
    print(f"MLP: {sum(accuracy_scores_v_mlp.values()) / len(pi)}")

    print(f"-----Average Balanced Accuracy Valence-----")
    print(f"Logistic Regression: {sum(balance_acc_list_v_lr.values()) / len(pi)}")
    print(f"Decision Tree: {sum(balance_acc_list_v_dt.values()) / len(pi)}")
    print(f"Random Forest: {sum(balance_acc_list_v_rf.values()) / len(pi)}")
    print(f"SVM: {sum(balance_acc_list_v_svm.values()) / len(pi)}")
    print(f"LDA: {sum(balance_acc_list_v_lda.values()) / len(pi)}")
    print(f"XG: {sum(balance_acc_list_v_xg.values()) / len(pi)}")
    print(f"MLP: {sum(balance_acc_list_v_mlp.values()) / len(pi)}")

    print(f"-----Average R2 Loss Valence-----")
    print(f"Logistic Regression: {sum(r2_v_lr.values()) / len(pi)}")
    print(f"Decision Tree: {sum(r2_v_dt.values()) / len(pi)}")
    print(f"Random Forest: {sum(r2_v_rf.values()) / len(pi)}")
    print(f"SVM: {sum(r2_v_svm.values()) / len(pi)}")
    print(f"LDA: {sum(r2_v_lda.values()) / len(pi)}")
    print(f"XG: {sum(r2_v_xg.values()) / len(pi)}")
    print(f"MLP: {sum(r2_v_mlp.values()) / len(pi)}")

    print(f"-----Average MSE Loss Valence-----")
    print(f"Logistic Regression: {sum(mse_v_lr.values()) / len(pi)}")
    print(f"Decision Tree: {sum(mse_v_dt.values()) / len(pi)}")
    print(f"Random Forest: {sum(mse_v_rf.values()) / len(pi)}")
    print(f"SVM: {sum(mse_v_svm.values()) / len(pi)}")
    print(f"LDA: {sum(mse_v_lda.values()) / len(pi)}")
    print(f"XG: {sum(mse_v_xg.values()) / len(pi)}")
    print(f"MLP: {sum(mse_v_mlp.values()) / len(pi)}")

    print(f"-----Average F1 Valence-----")
    print(f"Logistic Regression: {sum(f1_v_lr.values()) / len(pi)}")
    print(f"Decision Tree: {sum(f1_v_dt.values()) / len(pi)}")
    print(f"Random Forest: {sum(f1_v_rf.values()) / len(pi)}")
    print(f"SVM: {sum(f1_v_svm.values()) / len(pi)}")
    print(f"LDA: {sum(f1_v_lda.values()) / len(pi)}")
    print(f"XG: {sum(f1_v_xg.values()) / len(pi)}")
    print(f"MLP: {sum(f1_v_mlp.values()) / len(pi)}")

    print(f"-----Average Accuracy Stimulus-label-----")
    print(f"Logistic Regression: {sum(accuracy_scores_s_lr.values()) / len(pi)}")
    print(f"Decision Tree: {sum(accuracy_scores_s_dt.values()) / len(pi)}")
    print(f"Random Forest: {sum(accuracy_scores_s_rf.values()) / len(pi)}")
    print(f"SVM: {sum(accuracy_scores_s_svm.values()) / len(pi)}")
    print(f"LDA: {sum(accuracy_scores_s_lda.values()) / len(pi)}")
    print(f"XG: {sum(accuracy_scores_s_xg.values()) / len(pi)}")
    print(f"MLP: {sum(accuracy_scores_s_mlp.values()) / len(pi)}")

    print(f"-----Average Balanced Accuracy Stimulus-label-----")
    print(f"Logistic Regression: {sum(balance_acc_list_s_lr.values()) / len(pi)}")
    print(f"Decision Tree: {sum(balance_acc_list_s_dt.values()) / len(pi)}")
    print(f"Random Forest: {sum(balance_acc_list_s_rf.values()) / len(pi)}")
    print(f"SVM: {sum(balance_acc_list_s_svm.values()) / len(pi)}")
    print(f"LDA: {sum(balance_acc_list_s_lda.values()) / len(pi)}")
    print(f"XG: {sum(balance_acc_list_s_xg.values()) / len(pi)}")
    print(f"MLP: {sum(balance_acc_list_s_mlp.values()) / len(pi)}")

    print(f"-----Average R2 Loss Stimulus-label-----")
    print(f"Logistic Regression: {sum(r2_s_lr.values()) / len(pi)}")
    print(f"Decision Tree: {sum(r2_s_dt.values()) / len(pi)}")
    print(f"Random Forest: {sum(r2_s_rf.values()) / len(pi)}")
    print(f"SVM: {sum(r2_s_svm.values()) / len(pi)}")
    print(f"LDA: {sum(r2_s_lda.values()) / len(pi)}")
    print(f"XG: {sum(r2_s_xg.values()) / len(pi)}")
    print(f"MLP: {sum(r2_s_mlp.values()) / len(pi)}")

    print(f"-----Average MSE Loss Stimulus-label-----")
    print(f"Logistic Regression: {sum(mse_s_lr.values()) / len(pi)}")
    print(f"Decision Tree: {sum(mse_s_dt.values()) / len(pi)}")
    print(f"Random Forest: {sum(mse_s_rf.values()) / len(pi)}")
    print(f"SVM: {sum(mse_s_svm.values()) / len(pi)}")
    print(f"LDA: {sum(mse_s_lda.values()) / len(pi)}")
    print(f"XG: {sum(mse_s_xg.values()) / len(pi)}")
    print(f"MLP: {sum(mse_s_mlp.values()) / len(pi)}")

    print(f"-----Average F1 Stimulus-label-----")
    print(f"Logistic Regression: {sum(f1_s_lr.values()) / len(pi)}")
    print(f"Decision Tree: {sum(f1_s_dt.values()) / len(pi)}")
    print(f"Random Forest: {sum(f1_s_rf.values()) / len(pi)}")
    print(f"SVM: {sum(f1_s_svm.values()) / len(pi)}")
    print(f"LDA: {sum(f1_s_lda.values()) / len(pi)}")
    print(f"XG: {sum(f1_s_xg.values()) / len(pi)}")
    print(f"MLP: {sum(f1_s_mlp.values()) / len(pi)}")


    return accuracy_scores_a_rf, accuracy_scores_v_rf, f1_a_rf,  f1_v_rf,  accuracy_scores_s_rf, f1_s_rf, accuracy_scores_a_xg, accuracy_scores_v_xg, f1_a_xg,  f1_v_xg,  accuracy_scores_s_xg, f1_s_xg, num_zeros_xg, num_zeros_rf, num_ones_rf, num_ones_xg

EDA

In [None]:
eda_path = "../Data_files/EDA_labels.csv"
eda_df = csv_read(eda_path)
relevant_features = ['ku_eda','sk_eda','dynrange','slope','variance','entropy','insc','fd_mean','max_scr','min_scr','nSCR','meanAmpSCR','meanRespSCR','sumAmpSCR','sumRespSCR']
identifiers = ['Participant ID','Video ID','Gender','arousal_category','valence_category','taskwiselabel','three_class_label']
eda_df

In [None]:
eda_df, pi, vi = feature_analysis(eda_df,relevant_features,identifiers)

In [None]:
validation(eda_df, pi, relevant_features)

Balancing - 333 samples acc to Arousal 

In [None]:
from tqdm import tqdm
filtered_rows = []
for index, row in tqdm(eda_df.iterrows()):
    if row['arousal_category'] == 1:
        filtered_rows.append(row)

filtered_df = pd.DataFrame(filtered_rows)
balanced_eda_df = pd.concat([eda_df, filtered_df], ignore_index=True)
balanced_eda_df    

In [None]:
eda_df, pi, vi = feature_analysis(eda_df,relevant_features,identifiers)

In [None]:
validation(eda_df, pi, relevant_features)

PPG

In [None]:
ppg_path = "../Data_files/PPG_labels.csv"
ppg_df = csv_read(ppg_path)
relevant_features = ['BPM','IBI','PPG_Rate_Mean','HRV_MedianNN','HRV_Prc20NN','HRV_MinNN','HRV_HTI','HRV_TINN','HRV_LF','HRV_VHF','HRV_LFn','HRV_HFn','HRV_LnHF','HRV_SD1SD2','HRV_CVI','HRV_PSS','HRV_PAS','HRV_PI','HRV_C1d','HRV_C1a','HRV_DFA_alpha1','HRV_MFDFA_alpha1_Width','HRV_MFDFA_alpha1_Peak','HRV_MFDFA_alpha1_Mean','HRV_MFDFA_alpha1_Max','HRV_MFDFA_alpha1_Delta','HRV_MFDFA_alpha1_Asymmetry','HRV_ApEn','HRV_ShanEn','HRV_FuzzyEn','HRV_MSEn','HRV_CMSEn','HRV_RCMSEn','HRV_CD','HRV_HFD','HRV_KFD','HRV_LZC']
identifiers = ['Participant ID','Video ID','Gender','arousal_category','valence_category','taskwiselabel','three_class_label']
ppg_df

In [None]:
ppg_df, pi, vi = feature_analysis(ppg_df,relevant_features,identifiers)

In [None]:
validation(ppg_df, pi, relevant_features)

Balanced Arousal

In [None]:
from tqdm import tqdm
filtered_rows = []
for index, row in tqdm(ppg_df.iterrows()):
    if row['arousal_category'] == 1:
        filtered_rows.append(row)

filtered_df = pd.DataFrame(filtered_rows)
balanced_ppg_df = pd.concat([ppg_df, filtered_df], ignore_index=True)
balanced_ppg_df    


In [None]:
balanced_ppg_df, pi, vi = feature_analysis(balanced_ppg_df,relevant_features,identifiers)

In [None]:
validation(balanced_ppg_df, pi, relevant_features)

EDA + PPG

In [None]:
eda_path = "../Data_files/EDA_labels.csv"
eda_df = csv_read(eda_path)
relevant_features = ['ku_eda','sk_eda','dynrange','slope','variance','entropy','insc','fd_mean','max_scr','min_scr','nSCR','meanAmpSCR','meanRespSCR','sumAmpSCR','sumRespSCR']
identifiers = ['Participant ID','Video ID','Gender','arousal_category','valence_category','taskwiselabel','three_class_label']
ppg_path = "../Data_files/PPG_labels.csv"
ppg_df = csv_read(ppg_path)
relevant_features = ['BPM','IBI','PPG_Rate_Mean','HRV_MedianNN','HRV_Prc20NN','HRV_MinNN','HRV_HTI','HRV_TINN','HRV_LF','HRV_VHF','HRV_LFn','HRV_HFn','HRV_LnHF','HRV_SD1SD2','HRV_CVI','HRV_PSS','HRV_PAS','HRV_PI','HRV_C1d','HRV_C1a','HRV_DFA_alpha1','HRV_MFDFA_alpha1_Width','HRV_MFDFA_alpha1_Peak','HRV_MFDFA_alpha1_Mean','HRV_MFDFA_alpha1_Max','HRV_MFDFA_alpha1_Delta','HRV_MFDFA_alpha1_Asymmetry','HRV_ApEn','HRV_ShanEn','HRV_FuzzyEn','HRV_MSEn','HRV_CMSEn','HRV_RCMSEn','HRV_CD','HRV_HFD','HRV_KFD','HRV_LZC']
identifiers = ['Participant ID','Video ID','Gender','arousal_category','valence_category','taskwiselabel','three_class_label']
combined_df = pd.concat([ppg_df, eda_df], axis=1)
combined_df

In [None]:
combined_df = combined_df.loc[:, ~combined_df.columns.duplicated()]

In [None]:
relevant_features = ['BPM', 'IBI', 'PPG_Rate_Mean', 'HRV_MedianNN', 'HRV_Prc20NN',
       'HRV_MinNN', 'HRV_HTI', 'HRV_TINN', 'HRV_LF', 'HRV_VHF', 'HRV_LFn',
       'HRV_HFn', 'HRV_LnHF', 'HRV_SD1SD2', 'HRV_CVI', 'HRV_PSS', 'HRV_PAS',
       'HRV_PI', 'HRV_C1d', 'HRV_C1a', 'HRV_DFA_alpha1',
       'HRV_MFDFA_alpha1_Width', 'HRV_MFDFA_alpha1_Peak',
       'HRV_MFDFA_alpha1_Mean', 'HRV_MFDFA_alpha1_Max',
       'HRV_MFDFA_alpha1_Delta', 'HRV_MFDFA_alpha1_Asymmetry', 'HRV_ApEn',
       'HRV_ShanEn', 'HRV_FuzzyEn', 'HRV_MSEn', 'HRV_CMSEn', 'HRV_RCMSEn',
       'HRV_CD', 'HRV_HFD', 'HRV_KFD', 'HRV_LZC', 'ku_eda', 'sk_eda', 'dynrange', 'slope',
       'variance', 'entropy', 'insc', 'fd_mean', 'max_scr', 'min_scr', 'nSCR',
       'meanAmpSCR', 'meanRespSCR', 'sumAmpSCR', 'sumRespSCR']

combined_df, pi, vi = feature_analysis(combined_df,relevant_features,identifiers)

In [None]:
validation(combined_df, pi, relevant_features)

arousal balanced

In [None]:
from tqdm import tqdm
filtered_rows = []
for index, row in tqdm(combined_df.iterrows()):
    if row['arousal_category'] == 1:
        filtered_rows.append(row)

filtered_df = pd.DataFrame(filtered_rows)
combined_b_df = pd.concat([combined_df, filtered_df], ignore_index=True)
combined_b_df   

In [None]:
relevant_features = ['BPM', 'IBI', 'PPG_Rate_Mean', 'HRV_MedianNN', 'HRV_Prc20NN',
       'HRV_MinNN', 'HRV_HTI', 'HRV_TINN', 'HRV_LF', 'HRV_VHF', 'HRV_LFn',
       'HRV_HFn', 'HRV_LnHF', 'HRV_SD1SD2', 'HRV_CVI', 'HRV_PSS', 'HRV_PAS',
       'HRV_PI', 'HRV_C1d', 'HRV_C1a', 'HRV_DFA_alpha1',
       'HRV_MFDFA_alpha1_Width', 'HRV_MFDFA_alpha1_Peak',
       'HRV_MFDFA_alpha1_Mean', 'HRV_MFDFA_alpha1_Max',
       'HRV_MFDFA_alpha1_Delta', 'HRV_MFDFA_alpha1_Asymmetry', 'HRV_ApEn',
       'HRV_ShanEn', 'HRV_FuzzyEn', 'HRV_MSEn', 'HRV_CMSEn', 'HRV_RCMSEn',
       'HRV_CD', 'HRV_HFD', 'HRV_KFD', 'HRV_LZC', 'ku_eda', 'sk_eda', 'dynrange', 'slope',
       'variance', 'entropy', 'insc', 'fd_mean', 'max_scr', 'min_scr', 'nSCR',
       'meanAmpSCR', 'meanRespSCR', 'sumAmpSCR', 'sumRespSCR']

combined_df, pi, vi = feature_analysis(combined_df,relevant_features,identifiers)

In [None]:
validation(combined_df, pi, relevant_features)

Custom MLP

In [None]:
eda_path = "../Data_files/EDA_labels.csv"
eda_df = pd.read_csv(eda_path)
eda_df

Balancing Arousal Data

In [None]:
from tqdm import tqdm
filtered_rows = []
for index, row in tqdm(eda_df.iterrows()):
    if row['arousal_category'] == 1:
        filtered_rows.append(row)

filtered_df = pd.DataFrame(filtered_rows)
balance_a_eda_df = pd.concat([eda_df, filtered_df], ignore_index=True)
balance_a_eda_df  

In [None]:
def windowed_preprocess(train_df, test_df):
    train = []
    test = []
    for index, row in train_df.iterrows():
        # Convert each row (Series) into a list of NumPy arrays
        row_as_list1 = np.array([np.array(value) for value in row.to_numpy()])
        train.append(row_as_list1)

    for index, row in test_df.iterrows():
        # Convert each row (Series) into a list of NumPy arrays
        row_as_list2 = np.array([np.array(value) for value in row.to_numpy()])
        test.append(row_as_list2)

    return train, test

In [None]:
# Define custom dataset and DataLoader (assuming you have training_data)
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, data, targets):
        self.data = data
        self.targets = targets

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        return torch.from_numpy(self.data[index]).float(), self.targets[index]

In [None]:
class EDA_HC_NN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(EDA_HC_NN, self).__init__()

        # Define hidden layer dimensions
        hidden_dims = [50, 100]

        # Create sequential layers using nn.Linear and nn.ReLU activations
        self.layers = nn.Sequential(
            nn.Linear(input_dim, hidden_dims[0]),
            nn.ReLU(inplace=True),  # Efficient in-place activation
            nn.Linear(hidden_dims[0], hidden_dims[1]),
            nn.ReLU(inplace=True),
            # nn.Linear(hidden_dims[1], hidden_dims[2]),
            # nn.ReLU(inplace=True),
            nn.Linear(hidden_dims[1], output_dim)  # Final output layer
        )

    def forward(self, x):
        x = self.layers(x)
        return x

In [None]:
def trainer(autofet_df_con, col, input_dim, output_dim):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    batch_size = 32
    epochs = 200
    
    learning_rate = 0.001  # Default learning rate
    beta1 = 0.9  # Default beta1 for Adam in scikit-learn
    beta2 = 0.999  # Default beta2 for Adam in scikit-learn
    epsilon = 1e-8  # Default epsilon for Adam

    total_test_accuracy = 0.0
    total_test_f1 = 0.0
    total_participants = 0
    
    for pid in autofet_df_con['Participant ID'].unique():
        print(pid)

        fet = ['ku_eda', 'sk_eda', 'dynrange', 'slope', 'variance',
       'entropy', 'insc', 'fd_mean', 'max_scr', 'min_scr', 'nSCR',
       'meanAmpSCR', 'meanRespSCR', 'sumAmpSCR', 'sumRespSCR']

        train_data = autofet_df_con[autofet_df_con['Participant ID'] != pid]
        test_data = autofet_df_con[autofet_df_con['Participant ID'] == pid]

        # print(train_data, test_data)
        X_train = train_data[fet]
        X_test = test_data[fet]

        X_train, X_test = windowed_preprocess(X_train, X_test)
        # print(training_data)
        train_y = train_data[col].to_list()
        test_y =  test_data[col].to_list()

        # Define custom dataset and DataLoader
        custom_dataset = CustomDataset(X_train, train_y)
        train_dataloader = DataLoader(custom_dataset, batch_size=batch_size) #, collate_fn=pad_collate) #shuffle=True, 

        # Initialize model
        model = EDA_HC_NN(input_dim, output_dim).to(device)

        # Loss function
        # criterion = nn.BCELoss()
        criterion = nn.BCEWithLogitsLoss()

        # Optimizer
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(beta1, beta2), eps=epsilon)

        # Metrics
        accuracy = torchmetrics.Accuracy(task='binary')  # Specify task='binary'
        f1 = F1Score(task='binary')

        for epoch in range(epochs):

            model.train()

            running_loss = 0.0
            running_accuracy = torchmetrics.Accuracy(task='binary').to(device)  # Reinitialize for each epoch
            running_f1 = torchmetrics.F1Score(task='binary').to(device)  # Reinitialize for each epoch
        
            for data, target in tqdm(train_dataloader):

                data, target = data.to(device), target.to(device)  # Move data and target to GPU
                optimizer.zero_grad()

                output = model(data)

                target = target.unsqueeze(1).float()
                # print(target.size())
                
                # Calculate loss
                loss = criterion(output, target)
                
                # Backward pass and optimize
                loss.backward()
                optimizer.step()
                
                # Update metrics
                # print(f"loss {loss}")
                running_loss += loss.item()
                predicted_classes = torch.sigmoid(output) > 0.5
                
                running_accuracy.update(predicted_classes, target)
                running_f1.update(predicted_classes, target)

            # Compute average metrics
            avg_loss = running_loss / len(train_dataloader)
            avg_accuracy = running_accuracy.compute()
            avg_f1 = running_f1.compute()
            
            # Print and log statistics
            # print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss}, Accuracy: {avg_accuracy}, F1 Score: {avg_f1}")

        #---------eval------------------  
            
        # Define custom dataset and DataLoader
        custom_dataset = CustomDataset(X_test, test_y)
        test_dataloader = DataLoader(custom_dataset, batch_size=batch_size) #, collate_fn=pad_collate)#shuffle=True, 

        # Metrics
        test_accuracy = torchmetrics.Accuracy(task='binary').to(device)  # Specify task='binary'
        test_f1 = torchmetrics.F1Score(task='binary').to(device)

        pred = []
        true = []

        model.eval()  # Set the model to evaluation mode

        with torch.no_grad():  # Disable gradient computation during testing
            for test_data, test_target in tqdm(test_dataloader):
                test_data, test_target = test_data.to(device), test_target.to(device)  # Move data and target to GPU
                
                # if len(test_data.shape) == 2:
                #     test_data = test_data.unsqueeze(1) 
                # print(test_data.size())    

                test_output = model(test_data)
                
                # Convert target to FloatTensor
                test_target = test_target.unsqueeze(1).float()
                # print(test_target.size())
            
                test_predicted_classes = torch.sigmoid(test_output) > 0.5
                pred.extend(test_predicted_classes)
                true.extend(test_target)
                test_accuracy.update(test_predicted_classes, test_target)
                test_f1.update(test_predicted_classes, test_target)

        # Compute average metrics
        test_avg_accuracy = test_accuracy.compute()
        test_avg_f1 = test_f1.compute()

        true = [int(tensor.item()) for tensor in true]
        pred = [int(tensor.item()) for tensor in pred]  
        print(f"True: {true}")
        print(f"Pred: {pred}")
        # print(len(true), len(pred))
        print(classification_report(true, pred, labels=[0,1]))

        # Print or log results
        print(f"Test Accuracy: {test_avg_accuracy}, Test F1 Score: {test_avg_f1}")
        # Accumulate metrics across all participants
        total_test_accuracy += test_avg_accuracy
        total_test_f1 += test_avg_f1
        total_participants += 1

    # Calculate average metrics across all participants
    avg_test_accuracy = total_test_accuracy / total_participants
    avg_test_f1 = total_test_f1 / total_participants

    # Print or log average results
    print(f"{col}: {seed}: Average Test Accuracy: {avg_test_accuracy}, Average Test F1 Score: {avg_test_f1}")  


In [None]:
input_dim =15
output_dim = 1

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
trainer(eda_df, 'valence_category', input_dim, output_dim)

In [None]:
trainer(eda_df, 'arousal_category', input_dim, output_dim)

In [None]:
trainer(eda_df, 'taskwiselabel', input_dim, output_dim)

Output of Balanced Arousal

In [None]:
trainer(balance_a_eda_df, 'valence_category', input_dim, output_dim)

In [None]:
trainer(balance_a_eda_df, 'arousal_category', input_dim, output_dim)

In [None]:
trainer(balance_a_eda_df, 'taskwiselabel', input_dim, output_dim)

In [None]:
ppg_path = "../Data_files/PPG_labels.csv"
ppg_df = pd.read_csv(ppg_path)
ppg_df

In [None]:
fet = ['BPM', 'IBI', 'PPG_Rate_Mean', 'HRV_MedianNN',
       'HRV_Prc20NN', 'HRV_MinNN', 'HRV_HTI', 'HRV_TINN', 'HRV_LF', 'HRV_VHF',
       'HRV_LFn', 'HRV_HFn', 'HRV_LnHF', 'HRV_SD1SD2', 'HRV_CVI', 'HRV_PSS',
       'HRV_PAS', 'HRV_PI', 'HRV_C1d', 'HRV_C1a', 'HRV_DFA_alpha1',
       'HRV_MFDFA_alpha1_Width', 'HRV_MFDFA_alpha1_Peak',
       'HRV_MFDFA_alpha1_Mean', 'HRV_MFDFA_alpha1_Max',
       'HRV_MFDFA_alpha1_Delta', 'HRV_MFDFA_alpha1_Asymmetry', 'HRV_ApEn',
       'HRV_ShanEn', 'HRV_FuzzyEn', 'HRV_MSEn', 'HRV_CMSEn', 'HRV_RCMSEn',
       'HRV_CD', 'HRV_HFD', 'HRV_KFD', 'HRV_LZC','ku_eda',
       'sk_eda', 'dynrange', 'slope', 'variance', 'entropy', 'insc', 'fd_mean',
       'max_scr', 'min_scr', 'nSCR', 'meanAmpSCR', 'meanRespSCR', 'sumAmpSCR',
       'sumRespSCR'] 

Balanced Arousal

In [None]:
from tqdm import tqdm
filtered_rows = []
for index, row in tqdm(combined_df.iterrows()):
    if row['arousal_category'] == 1:
        filtered_rows.append(row)

filtered_df = pd.DataFrame(filtered_rows)
balanced_a_ppg_df = pd.concat([combined_df, filtered_df], ignore_index=True)
balanced_a_ppg_df 

In [None]:
class PPG_HC_NN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(PPG_HC_NN, self).__init__()

        # Define hidden layer dimensions
        hidden_dims = [50, 100]

        # Create sequential layers using nn.Linear and nn.ReLU activations
        self.layers = nn.Sequential(
            nn.Linear(input_dim, hidden_dims[0]),
            nn.ReLU(inplace=True),  # Efficient in-place activation
            nn.Linear(hidden_dims[0], hidden_dims[1]),
            nn.ReLU(inplace=True),
            # nn.Linear(hidden_dims[1], hidden_dims[2]),
            # nn.ReLU(inplace=True),
            nn.Linear(hidden_dims[1], output_dim)  # Final output layer
        )

    def forward(self, x):
        x = self.layers(x)
        return x

In [None]:
def trainer(autofet_df_con, col, input_dim, output_dim):

    batch_size = 32
    epochs = 200
    
    learning_rate = 0.001  # Default learning rate
    beta1 = 0.9  # Default beta1 for Adam in scikit-learn
    beta2 = 0.999  # Default beta2 for Adam in scikit-learn
    epsilon = 1e-8  # Default epsilon for Adam

    total_test_accuracy = 0.0
    total_test_f1 = 0.0
    total_participants = 0
    
    for pid in autofet_df_con['Participant ID'].unique():
        print(pid)

        fet = ['BPM', 'IBI', 'PPG_Rate_Mean', 'HRV_MedianNN',
       'HRV_Prc20NN', 'HRV_MinNN', 'HRV_HTI', 'HRV_TINN', 'HRV_LF', 'HRV_VHF',
       'HRV_LFn', 'HRV_HFn', 'HRV_LnHF', 'HRV_SD1SD2', 'HRV_CVI', 'HRV_PSS',
       'HRV_PAS', 'HRV_PI', 'HRV_C1d', 'HRV_C1a', 'HRV_DFA_alpha1',
       'HRV_MFDFA_alpha1_Width', 'HRV_MFDFA_alpha1_Peak',
       'HRV_MFDFA_alpha1_Mean', 'HRV_MFDFA_alpha1_Max',
       'HRV_MFDFA_alpha1_Delta', 'HRV_MFDFA_alpha1_Asymmetry', 'HRV_ApEn',
       'HRV_ShanEn', 'HRV_FuzzyEn', 'HRV_MSEn', 'HRV_CMSEn', 'HRV_RCMSEn',
       'HRV_CD', 'HRV_HFD', 'HRV_KFD', 'HRV_LZC']

        train_data = autofet_df_con[autofet_df_con['Participant ID'] != pid]
        test_data = autofet_df_con[autofet_df_con['Participant ID'] == pid]

        # print(train_data, test_data)
        X_train = train_data[fet]
        X_test = test_data[fet]

        X_train, X_test = windowed_preprocess(X_train, X_test)
        # print(training_data)
        train_y = train_data[col].to_list()
        test_y =  test_data[col].to_list()

        # Define custom dataset and DataLoader
        custom_dataset = CustomDataset(X_train, train_y)
        train_dataloader = DataLoader(custom_dataset, batch_size=batch_size) #, collate_fn=pad_collate) #shuffle=True, 

        # Initialize model
        model = PPG_HC_NN(input_dim, output_dim).to(device)

        # Loss function
        # criterion = nn.BCELoss()
        criterion = nn.BCEWithLogitsLoss()

        # Optimizer
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(beta1, beta2), eps=epsilon)

        # Metrics
        accuracy = torchmetrics.Accuracy(task='binary')  # Specify task='binary'
        f1 = F1Score(task='binary')

        for epoch in range(epochs):

            model.train()

            running_loss = 0.0
            running_accuracy = torchmetrics.Accuracy(task='binary').to(device)  # Reinitialize for each epoch
            running_f1 = torchmetrics.F1Score(task='binary').to(device)  # Reinitialize for each epoch
        
            for data, target in tqdm(train_dataloader):

                data, target = data.to(device), target.to(device)  # Move data and target to GPU
                optimizer.zero_grad()

                output = model(data)

                target = target.unsqueeze(1).float()
                # print(target.size())
                
                # Calculate loss
                loss = criterion(output, target)
                
                # Backward pass and optimize
                loss.backward()
                optimizer.step()
                
                # Update metrics
                # print(f"loss {loss}")
                running_loss += loss.item()
                predicted_classes = torch.sigmoid(output) > 0.5
                
                running_accuracy.update(predicted_classes, target)
                running_f1.update(predicted_classes, target)

            # Compute average metrics
            avg_loss = running_loss / len(train_dataloader)
            avg_accuracy = running_accuracy.compute()
            avg_f1 = running_f1.compute()
            
            # Print and log statistics
            # print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss}, Accuracy: {avg_accuracy}, F1 Score: {avg_f1}")

        #---------eval------------------  
            
        # Define custom dataset and DataLoader
        custom_dataset = CustomDataset(X_test, test_y)
        test_dataloader = DataLoader(custom_dataset, batch_size=batch_size) #, collate_fn=pad_collate)#shuffle=True, 

        # Metrics
        test_accuracy = torchmetrics.Accuracy(task='binary').to(device)  # Specify task='binary'
        test_f1 = torchmetrics.F1Score(task='binary').to(device)

        pred = []
        true = []

        model.eval()  # Set the model to evaluation mode

        with torch.no_grad():  # Disable gradient computation during testing
            for test_data, test_target in tqdm(test_dataloader):
                test_data, test_target = test_data.to(device), test_target.to(device)  # Move data and target to GPU
                
                # if len(test_data.shape) == 2:
                #     test_data = test_data.unsqueeze(1) 
                # print(test_data.size())    

                test_output = model(test_data)
                
                # Convert target to FloatTensor
                test_target = test_target.unsqueeze(1).float()
                # print(test_target.size())
            
                test_predicted_classes = torch.sigmoid(test_output) > 0.5
                pred.extend(test_predicted_classes)
                true.extend(test_target)
                test_accuracy.update(test_predicted_classes, test_target)
                test_f1.update(test_predicted_classes, test_target)

        # Compute average metrics
        test_avg_accuracy = test_accuracy.compute()
        test_avg_f1 = test_f1.compute()

        true = [int(tensor.item()) for tensor in true]
        pred = [int(tensor.item()) for tensor in pred]  
        print(f"True: {true}")
        print(f"Pred: {pred}")
        # print(len(true), len(pred))
        print(classification_report(true, pred, labels=[0,1]))

        # Print or log results
        print(f"Test Accuracy: {test_avg_accuracy}, Test F1 Score: {test_avg_f1}")
        # Accumulate metrics across all participants
        total_test_accuracy += test_avg_accuracy
        total_test_f1 += test_avg_f1
        total_participants += 1

    # Calculate average metrics across all participants
    avg_test_accuracy = total_test_accuracy / total_participants
    avg_test_f1 = total_test_f1 / total_participants

    # Print or log average results
    print(f"{col}: seed {seed}: Average Test Accuracy: {avg_test_accuracy}, Average Test F1 Score: {avg_test_f1}")  


In [None]:
trainer(ppg_df, 'taskwiselabel', input_dim, output_dim)

In [None]:
trainer(ppg_df, 'arousal_category', input_dim, output_dim)

In [None]:
trainer(ppg_df, 'valence_category', input_dim, output_dim)

In [None]:
trainer(balanced_a_ppg_df, 'taskwiselabel', input_dim, output_dim)

In [None]:
trainer(balanced_a_ppg_df, 'arousal_category', input_dim, output_dim)

In [None]:
trainer(balanced_a_ppg_df, 'valence_category', input_dim, output_dim)

EDA + PPG 

In [None]:
combined_df = pd.concat([ppg_df, eda_df], axis=1)
combined_df = combined_df.loc[:, ~combined_df.columns.duplicated()]

In [None]:
class Combined_HC_NN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Combined_HC_NN, self).__init__()

        # Define hidden layer dimensions
        hidden_dims = [50, 100]

        # Create sequential layers using nn.Linear and nn.ReLU activations
        self.layers = nn.Sequential(
            nn.Linear(input_dim, hidden_dims[0]),
            nn.ReLU(inplace=True),  # Efficient in-place activation
            nn.Linear(hidden_dims[0], hidden_dims[1]),
            nn.ReLU(inplace=True),
            # nn.Linear(hidden_dims[1], hidden_dims[2]),
            # nn.ReLU(inplace=True),
            nn.Linear(hidden_dims[1], output_dim)  # Final output layer
        )

    def forward(self, x):
        x = self.layers(x)
        return x

In [None]:
def trainer(autofet_df_con, col, input_dim, output_dim):

    batch_size = 32
    epochs = 200
    
    learning_rate = 0.001  # Default learning rate
    beta1 = 0.9  # Default beta1 for Adam in scikit-learn
    beta2 = 0.999  # Default beta2 for Adam in scikit-learn
    epsilon = 1e-8  # Default epsilon for Adam

    total_test_accuracy = 0.0
    total_test_f1 = 0.0
    total_participants = 0
    
    for pid in autofet_df_con['Participant ID'].unique():
        print(pid)

        fet = ['BPM', 'IBI', 'PPG_Rate_Mean', 'HRV_MedianNN',
       'HRV_Prc20NN', 'HRV_MinNN', 'HRV_HTI', 'HRV_TINN', 'HRV_LF', 'HRV_VHF',
       'HRV_LFn', 'HRV_HFn', 'HRV_LnHF', 'HRV_SD1SD2', 'HRV_CVI', 'HRV_PSS',
       'HRV_PAS', 'HRV_PI', 'HRV_C1d', 'HRV_C1a', 'HRV_DFA_alpha1',
       'HRV_MFDFA_alpha1_Width', 'HRV_MFDFA_alpha1_Peak',
       'HRV_MFDFA_alpha1_Mean', 'HRV_MFDFA_alpha1_Max',
       'HRV_MFDFA_alpha1_Delta', 'HRV_MFDFA_alpha1_Asymmetry', 'HRV_ApEn',
       'HRV_ShanEn', 'HRV_FuzzyEn', 'HRV_MSEn', 'HRV_CMSEn', 'HRV_RCMSEn',
       'HRV_CD', 'HRV_HFD', 'HRV_KFD', 'HRV_LZC','ku_eda',
       'sk_eda', 'dynrange', 'slope', 'variance', 'entropy', 'insc', 'fd_mean',
       'max_scr', 'min_scr', 'nSCR', 'meanAmpSCR', 'meanRespSCR', 'sumAmpSCR',
       'sumRespSCR'] 

        train_data = autofet_df_con[autofet_df_con['Participant ID'] != pid]
        test_data = autofet_df_con[autofet_df_con['Participant ID'] == pid]

        # print(train_data, test_data)
        X_train = train_data[fet]
        X_test = test_data[fet]

        X_train, X_test = windowed_preprocess(X_train, X_test)
        # print(training_data)
        train_y = train_data[col].to_list()
        test_y =  test_data[col].to_list()

        # Define custom dataset and DataLoader
        custom_dataset = CustomDataset(X_train, train_y)
        train_dataloader = DataLoader(custom_dataset, batch_size=batch_size) #, collate_fn=pad_collate) #shuffle=True, 

        # Initialize model
        model = Combined_HC_NN(input_dim, output_dim).to(device)

        # Loss function
        # criterion = nn.BCELoss()
        criterion = nn.BCEWithLogitsLoss()

        # Optimizer
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(beta1, beta2), eps=epsilon)

        # Metrics
        accuracy = torchmetrics.Accuracy(task='binary')  # Specify task='binary'
        f1 = F1Score(task='binary')

        for epoch in range(epochs):

            model.train()

            running_loss = 0.0
            running_accuracy = torchmetrics.Accuracy(task='binary').to(device)  # Reinitialize for each epoch
            running_f1 = torchmetrics.F1Score(task='binary').to(device)  # Reinitialize for each epoch
        
            for data, target in tqdm(train_dataloader):

                data, target = data.to(device), target.to(device)  # Move data and target to GPU
                optimizer.zero_grad()

                output = model(data)

                target = target.unsqueeze(1).float()
                # print(target.size())
                
                # Calculate loss
                loss = criterion(output, target)
                
                # Backward pass and optimize
                loss.backward()
                optimizer.step()
                
                # Update metrics
                # print(f"loss {loss}")
                running_loss += loss.item()
                predicted_classes = torch.sigmoid(output) > 0.5
                
                running_accuracy.update(predicted_classes, target)
                running_f1.update(predicted_classes, target)

            # Compute average metrics
            avg_loss = running_loss / len(train_dataloader)
            avg_accuracy = running_accuracy.compute()
            avg_f1 = running_f1.compute()
            
            # Print and log statistics
            # print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss}, Accuracy: {avg_accuracy}, F1 Score: {avg_f1}")

        #---------eval------------------  
            
        # Define custom dataset and DataLoader
        custom_dataset = CustomDataset(X_test, test_y)
        test_dataloader = DataLoader(custom_dataset, batch_size=batch_size) #, collate_fn=pad_collate)#shuffle=True, 

        # Metrics
        test_accuracy = torchmetrics.Accuracy(task='binary').to(device)  # Specify task='binary'
        test_f1 = torchmetrics.F1Score(task='binary').to(device)

        pred = []
        true = []

        model.eval()  # Set the model to evaluation mode

        with torch.no_grad():  # Disable gradient computation during testing
            for test_data, test_target in tqdm(test_dataloader):
                test_data, test_target = test_data.to(device), test_target.to(device)  # Move data and target to GPU
                
                # if len(test_data.shape) == 2:
                #     test_data = test_data.unsqueeze(1) 
                # print(test_data.size())    

                test_output = model(test_data)
                
                # Convert target to FloatTensor
                test_target = test_target.unsqueeze(1).float()
                # print(test_target.size())
            
                test_predicted_classes = torch.sigmoid(test_output) > 0.5
                pred.extend(test_predicted_classes)
                true.extend(test_target)
                test_accuracy.update(test_predicted_classes, test_target)
                test_f1.update(test_predicted_classes, test_target)

        # Compute average metrics
        test_avg_accuracy = test_accuracy.compute()
        test_avg_f1 = test_f1.compute()

        true = [int(tensor.item()) for tensor in true]
        pred = [int(tensor.item()) for tensor in pred]  
        print(f"True: {true}")
        print(f"Pred: {pred}")
        # print(len(true), len(pred))
        print(classification_report(true, pred, labels=[0,1]))

        # Print or log results
        print(f"Test Accuracy: {test_avg_accuracy}, Test F1 Score: {test_avg_f1}")
        # Accumulate metrics across all participants
        total_test_accuracy += test_avg_accuracy
        total_test_f1 += test_avg_f1
        total_participants += 1

    # Calculate average metrics across all participants
    avg_test_accuracy = total_test_accuracy / total_participants
    avg_test_f1 = total_test_f1 / total_participants

    # Print or log average results
    print(f"{col}: seed {seed}: Average Test Accuracy: {avg_test_accuracy}, Average Test F1 Score: {avg_test_f1}")  


In [None]:
trainer(combined_df, 'valence_category', input_dim, output_dim)

In [None]:
trainer(combined_df, 'arousal_category', input_dim, output_dim)

In [None]:
trainer(combined_df, 'taskwiselabel', input_dim, output_dim)

Balanced Arousal

In [None]:
combined_df = pd.concat([balance_a_eda_df, balance_a_eda_df], axis=1)
combined_df = combined_df.loc[:, ~combined_df.columns.duplicated()]

In [None]:
trainer(combined_df, 'valence_category', input_dim, output_dim)

In [None]:
trainer(combined_df, 'taskwiselabel', input_dim, output_dim)

In [None]:
trainer(combined_df, 'arousal_category', input_dim, output_dim)