In [2]:
import pandas as pd
import numpy as np
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, roc_auc_score
from scipy.stats import pearsonr
from sklearn.preprocessing import MinMaxScaler

In [72]:
features_csv = 'C:\\cv_project\\code\\output_csv\\normalized_feature_matrix.csv'
labels_csv = 'C:\\cv_project\\code\\output_csv\\final_labels.csv'

In [73]:
# Load data
features_df = pd.read_csv(features_csv)
labels_df = pd.read_csv(labels_csv)

# # Select specific columns to include
# columns_to_include = ['video', 'Excited', 'Smiled', 'EyeContact', 'Calm', 'NotStressed']
# labels_df = labels_df[columns_to_include]

# Ensure uppercase video names in both
features_df['video'] = features_df['video'].str.upper()
labels_df['video'] = labels_df['video'].str.upper()

# Merge on 'video'
df = pd.merge(features_df, labels_df, on='video')

# Separate features and targets
X = df.drop(columns=['video'] + list(labels_df.columns[1:]))
y = df[labels_df.columns[1:]]

# Check columns with all NaN values
all_nan_cols = X.columns[X.isna().all()]
print("All-NaN Columns:\n", all_nan_cols)

# Drop them
X = X.drop(columns=all_nan_cols)


# Scale features (SVR is sensitive to scale)
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

All-NaN Columns:
 Index([], dtype='object')


In [74]:
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
from scipy.stats import pearsonr
import pandas as pd

# Store evaluation metrics
correlations = {}
mse_scores = {}
best_params = {}

# Define hyperparameter grid
param_grid = {
    'C': [0.1, 1, 10],
    'epsilon': [0.01, 0.1, 0.5],
    'kernel': ['rbf', 'linear']
}

# Loop over each target label
for label in y.columns:
    print(f"\n🔧 Training SVR with hyperparameter tuning for: {label}")

    svr = SVR()
    grid = GridSearchCV(
        estimator=svr,
        param_grid=param_grid,
        scoring='neg_mean_squared_error',
        cv=3,
        n_jobs=-1,
        verbose=0
    )

    # Fit on training data
    grid.fit(X_train, y_train[label])
    best_model = grid.best_estimator_
    best_params[label] = grid.best_params_

    # Predict on test set
    preds = best_model.predict(X_test)

    # Compute metrics
    mse = mean_squared_error(y_test[label], preds)
    mse_scores[label] = mse

    corr, _ = pearsonr(y_test[label], preds)
    correlations[label] = corr

# 📊 Display summary
summary_df = pd.DataFrame({
    'MSE': mse_scores,
    'Correlation': correlations
}).sort_values(by='MSE')

print("\n📈 Performance Summary:")
print(summary_df)

# 🛠 Best parameters
print("\n🔍 Best Parameters per label:")
for label, params in best_params.items():
    print(f"{label}: {params}")



🔧 Training SVR with hyperparameter tuning for: Overall

🔧 Training SVR with hyperparameter tuning for: RecommendHiring

🔧 Training SVR with hyperparameter tuning for: Colleague

🔧 Training SVR with hyperparameter tuning for: Engaged

🔧 Training SVR with hyperparameter tuning for: Excited

🔧 Training SVR with hyperparameter tuning for: EyeContact

🔧 Training SVR with hyperparameter tuning for: Smiled

🔧 Training SVR with hyperparameter tuning for: SpeakingRate

🔧 Training SVR with hyperparameter tuning for: NoFillers

🔧 Training SVR with hyperparameter tuning for: Friendly

🔧 Training SVR with hyperparameter tuning for: Paused

🔧 Training SVR with hyperparameter tuning for: EngagingTone

🔧 Training SVR with hyperparameter tuning for: StructuredAnswers

🔧 Training SVR with hyperparameter tuning for: Calm

🔧 Training SVR with hyperparameter tuning for: NotStressed

🔧 Training SVR with hyperparameter tuning for: Focused

🔧 Training SVR with hyperparameter tuning for: Authentic

🔧 Training

In [75]:
# Show results
print("\n📊 Pearson Correlations:")
for label, corr in correlations.items():
    print(f"{label}: {corr:.4f}")

print("\n📉 Mean Squared Errors:")
for label, mse in mse_scores.items():
    print(f"{label}: {mse:.4f}")


📊 Pearson Correlations:
Overall: 0.5571
RecommendHiring: 0.5515
Colleague: 0.7933
Engaged: 0.8016
Excited: 0.8740
EyeContact: 0.6237
Smiled: 0.7186
SpeakingRate: 0.7202
NoFillers: 0.5070
Friendly: 0.7380
Paused: 0.7136
EngagingTone: 0.8928
StructuredAnswers: 0.3164
Calm: 0.6091
NotStressed: 0.5353
Focused: 0.4726
Authentic: 0.7831
NotAwkward: 0.5670
Total: 0.5090

📉 Mean Squared Errors:
Overall: 0.3313
RecommendHiring: 0.4219
Colleague: 0.1651
Engaged: 0.1643
Excited: 0.2744
EyeContact: 0.3581
Smiled: 0.2190
SpeakingRate: 0.0801
NoFillers: 0.6808
Friendly: 0.2681
Paused: 0.1023
EngagingTone: 0.3565
StructuredAnswers: 0.5355
Calm: 0.1634
NotStressed: 0.1479
Focused: 0.1934
Authentic: 0.0447
NotAwkward: 0.3540
Total: 51.1749
