In [2]:
import pandas as pd
import numpy as np
import glob
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, f1_score, accuracy_score, precision_score, recall_score

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Define the path to the directory containing the feature files
directory_path = '/content/drive/MyDrive/FYP_dataset/features'

In [5]:
# Get a list of all feature file paths in the directory
feature_files = glob.glob(directory_path + '/*.csv')

In [6]:
print(len(feature_files))

1802


In [7]:
# Define lists to store the feature and target data
X = []
y_a = []
y_v = []

In [8]:
# Iterate over each feature file
for file in feature_files:
    # Read the feature file into a DataFrame
    df = pd.read_csv(file)

    # Extract the features and target variables
    features = df.drop(['Arousal_Value', 'Valence_Value', 'frameTime'], axis=1).values
    arousal = df['Arousal_Value'].values
    valence = df['Valence_Value'].values

    # Append the data to the lists
    X.append(features)
    y_a.append(arousal)
    y_v.append(valence)

In [8]:
# Concatenate the feature and target arrays
X = np.concatenate(X)
y_a = np.concatenate(y_a)
y_v = np.concatenate(y_v)

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Reshape the input data for LSTM
X_reshaped = X_scaled.reshape(X_scaled.shape[0], X_scaled.shape[1], 1)


In [9]:
# Define the number of folds for cross-validation
num_folds = 5

# Initialize lists to store evaluation results
mse_a = []
mae_a = []
rmse_a = []
r2_a = []
f1_a = []
accuracy_a = []
precision_a = []
recall_a = []

In [None]:
# Perform tenfold cross-validation
from sklearn.model_selection import KFold, train_test_split
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)
for train_index, test_index in kf.split(X):
    X_train, X_temp = X[train_index], X[test_index]
    y_a_train, y_a_temp = y_a[train_index], y_a[test_index]

    # Split the remaining data into validation and test sets
    X_val, X_test, y_a_val, y_a_test = train_test_split(X_temp, y_a_temp, test_size=0.5, random_state=42)

    # Initialize and train the SVR model
    svr_model_arosal = SVR(kernel='rbf')
    svr_model_arosal.fit(X_train, y_a_train)

    # Evaluate the model on the validation set
    predictions_val = svr_model_arosal.predict(X_val)

    # Calculate additional evaluation metrics for validation set
    mse_a.append(mean_squared_error(y_a_val, predictions_val))
    mae_a.append(mean_absolute_error(y_a_val, predictions_val))
    rmse_a.append(np.sqrt(mean_squared_error(y_a_val, predictions_val)))
    r2_a.append(r2_score(y_a_val, predictions_val))

    # Evaluate the model on the test set
    predictions_a = svr_model_arosal.predict(X_test)

    # Calculate additional evaluation metrics for test set
    mse_a.append(mean_squared_error(y_a_test, predictions_a))
    mae_a.append(mean_absolute_error(y_a_test, predictions_a))
    rmse_a.append(np.sqrt(mean_squared_error(y_a_test, predictions_a)))
    r2_a.append(r2_score(y_a_test, predictions_a))

    # Convert regression predictions to binary labels
    threshold = 0
    binary_predictions_a = (predictions_a >= threshold).astype(int)
    y_a_test_binary = (y_a_test >= threshold).astype(int)

    # Calculate F1-score, accuracy, precision, and recall for binary classification
    f1_a.append(f1_score(y_a_test_binary, binary_predictions_a))
    accuracy_a.append(accuracy_score(y_a_test_binary, binary_predictions_a))
    precision_a.append(precision_score(y_a_test_binary, binary_predictions_a))
    recall_a.append(recall_score(y_a_test_binary, binary_predictions_a))


In [None]:
# Calculate average performance across all folds
average_mse_a = np.mean(mse_a)
average_mae_a = np.mean(mae_a)
average_rmse_a = np.mean(rmse_a)
average_r2_a = np.mean(r2_a)
average_f1_a = np.mean(f1_a)
average_accuracy_a = np.mean(accuracy_a)
average_precision_a = np.mean(precision_a)
average_recall_a = np.mean(recall_a)

print(f'Average Arousal MSE: {average_mse_a:.4f}')
print(f'Average Arousal MAE: {average_mae_a:.4f}')
print(f'Average Arousal RMSE: {average_rmse_a:.4f}')
print(f'Average Arousal R2 Score: {average_r2_a:.4f}')
print(f'Average Arousal F1 Score: {average_f1_a:.4f}')
print(f'Average Arousal Accuracy: {average_accuracy_a:.4f}')
print(f'Average Arousal Precision: {average_precision_a:.4f}')
print(f'Average Arousal Recall: {average_recall_a:.4f}')