In [1]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score


In [2]:
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Load the feature dataset
df_eng = pd.read_excel("C:/Users/vyache/Downloads/Features_English.xlsx")

# Select only feature columns (replace 'feature_cols' with the actual list of columns)
feature_cols = df_eng.columns.difference(["Recording", "Speaker"])  # Exclude non-feature columns

# Standardize the features before applying PCA
scaler = StandardScaler()
df_eng_scaled = scaler.fit_transform(df_eng[feature_cols])

# Apply PCA
pca = PCA(n_components=3)  # Adjust number of components as needed
principal_components = pca.fit_transform(df_eng_scaled)

# Convert PCA output to DataFrame
df_pca = pd.DataFrame(principal_components, columns=[f"PC{i+1}" for i in range(3)])

# Add back identifiers (Recording, Speaker) for merging
df_pca["Recording"] = df_eng["Recording"]
df_pca["Speaker"] = df_eng["Speaker"]

# Load self-report dataset
df_self_reports = pd.read_excel("C:/Users/vyache/Desktop/Questionnaire data_Eng.xlsx")

# Merge PCA-transformed features with self-reports
df_merged = pd.merge(df_pca, df_self_reports, on=["Recording", "Speaker"])

# Define thresholds for resilience and cognitive load
low_threshold_resilience = df_self_reports["Emotional Resilience"].quantile(0.33)
high_threshold_resilience = df_self_reports["Emotional Resilience"].quantile(0.66)

low_threshold_load = df_self_reports["Cognitive Load"].quantile(0.33)
high_threshold_load = df_self_reports["Cognitive Load"].quantile(0.66)

# Categorization functions for 2 categories (Low vs High)
def categorize_resilience(score):
    if score < high_threshold_resilience:  # Low resilience
        return 0  # Low
    else:  # High resilience
        return 1  # High

def categorize_cognitive_load(score):
    if score < high_threshold_load:  # Low cognitive load
        return 0  # Low
    else:  # High cognitive load
        return 1  # High

# Apply categorization
df_merged["resilience_category"] = df_merged["Emotional Resilience"].apply(categorize_resilience)
df_merged["cognitive_load_category"] = df_merged["Cognitive Load"].apply(categorize_cognitive_load)

# Verify categories
print(df_merged["resilience_category"].value_counts())
print(df_merged["cognitive_load_category"].value_counts())

# Select PCA features
X = df_merged[["PC1", "PC2", "PC3"]]
y_resilience = df_merged["resilience_category"]
y_cognitive_load = df_merged["cognitive_load_category"]

# Standardize PCA features
X_scaled = scaler.fit_transform(X)

resilience_category
0    41
1    25
Name: count, dtype: int64
cognitive_load_category
0    40
1    26
Name: count, dtype: int64


In [3]:
# Split data for resilience classification
X_train_res, X_test_res, y_train_res, y_test_res = train_test_split(
    X_scaled, y_resilience, test_size=0.2, random_state=42, stratify=y_resilience
)

# Split data for cognitive load classification
X_train_cog, X_test_cog, y_train_cog, y_test_cog = train_test_split(
    X_scaled, y_cognitive_load, test_size=0.2, random_state=42, stratify=y_cognitive_load
)


In [7]:
# Train SVM classifier for resilience
svm_res = SVC(kernel='rbf', class_weight='balanced', random_state=42)
svm_res.fit(X_train_res, y_train_res)

# Predict on test set
y_pred_res = svm_res.predict(X_test_res)

# Evaluate resilience classification performance
print("Resilience Classification Report:")
print(classification_report(y_test_res, y_pred_res))
print("Accuracy:", accuracy_score(y_test_res, y_pred_res))


Resilience Classification Report:
              precision    recall  f1-score   support

           0       0.56      0.56      0.56         9
           1       0.20      0.20      0.20         5

    accuracy                           0.43        14
   macro avg       0.38      0.38      0.38        14
weighted avg       0.43      0.43      0.43        14

Accuracy: 0.42857142857142855


In [9]:
# Train SVM classifier for cognitive load
svm_cog = SVC(kernel='rbf', class_weight='balanced', random_state=42)
svm_cog.fit(X_train_cog, y_train_cog)

# Predict on test set
y_pred_cog = svm_cog.predict(X_test_cog)

# Evaluate cognitive load classification performance
print("Cognitive Load Classification Report:")
print(classification_report(y_test_cog, y_pred_cog))
print("Accuracy:", accuracy_score(y_test_cog, y_pred_cog))


Cognitive Load Classification Report:
              precision    recall  f1-score   support

           0       0.58      0.88      0.70         8
           1       0.50      0.17      0.25         6

    accuracy                           0.57        14
   macro avg       0.54      0.52      0.47        14
weighted avg       0.55      0.57      0.51        14

Accuracy: 0.5714285714285714


In [11]:
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

# For Resilience
precision_res = precision_score(y_test_res, y_pred_res, average='macro')
recall_res = recall_score(y_test_res, y_pred_res, average='macro')
f1_res = f1_score(y_test_res, y_pred_res, average='macro')
conf_matrix_res = confusion_matrix(y_test_res, y_pred_res)

print("Resilience Classification:")
print("Macro Precision:", precision_res)
print("Macro Recall:", recall_res)
print("Macro F1-score:", f1_res)
print("Confusion Matrix:\n", conf_matrix_res)

# For Cognitive Load
precision_cog = precision_score(y_test_cog, y_pred_cog, average='macro')
recall_cog = recall_score(y_test_cog, y_pred_cog, average='macro')
f1_cog = f1_score(y_test_cog, y_pred_cog, average='macro')
conf_matrix_cog = confusion_matrix(y_test_cog, y_pred_cog)

print("Cognitive Load Classification:")
print("Macro Precision:", precision_cog)
print("Macro Recall:", recall_cog)
print("Macro F1-score:", f1_cog)
print("Confusion Matrix:\n", conf_matrix_cog)


Resilience Classification:
Macro Precision: 0.37777777777777777
Macro Recall: 0.37777777777777777
Macro F1-score: 0.37777777777777777
Confusion Matrix:
 [[5 4]
 [4 1]]
Cognitive Load Classification:
Macro Precision: 0.5416666666666667
Macro Recall: 0.5208333333333334
Macro F1-score: 0.475
Confusion Matrix:
 [[7 1]
 [5 1]]
