In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score, f1_score

# Load data
df = pd.read_csv("../EDA/depressed_with_anxiety_labels.csv")

# Define target and features
target = 'anxiety_binary'
drop_cols = ['participant_id', 'anxiety_binary', 'anxiety_score', 'phq8_score', 'phq8_binary']  # exclude from modeling
features = [col for col in df.columns if col not in drop_cols]

X = df[features]
y = df[target]

In [2]:
# Initialize models
models = {
    "SVM (Linear)": SVC(kernel='linear', probability=True, random_state=42),
    "Logistic (L2)": LogisticRegression(penalty='l2', solver='liblinear', random_state=42),
    "Voting Ensemble": VotingClassifier(
        estimators=[
            ('svm', SVC(kernel='linear', probability=True, random_state=42)),
            ('logreg', LogisticRegression(penalty='l2', solver='liblinear', random_state=42))
        ],
        voting='soft'
    )
}

# 5-Fold Stratified CV
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

print("🔍 5-Fold Cross-Validation Results\n")
for name, model in models.items():
    acc_scores = []
    f1_scores = []

    for train_index, test_index in kf.split(X, y):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        acc_scores.append(accuracy_score(y_test, y_pred))
        f1_scores.append(f1_score(y_test, y_pred))

    print(f"📌 {name}")
    print(f"   ✅ Mean Accuracy: {np.mean(acc_scores):.4f}")
    print(f"   🎯 Mean F1 Score: {np.mean(f1_scores):.4f}")
    print("--------------------------------------------------")


🔍 5-Fold Cross-Validation Results

📌 SVM (Linear)
   ✅ Mean Accuracy: 0.8803
   🎯 Mean F1 Score: 0.9045
--------------------------------------------------
📌 Logistic (L2)
   ✅ Mean Accuracy: 0.8121
   🎯 Mean F1 Score: 0.8639
--------------------------------------------------
📌 Voting Ensemble
   ✅ Mean Accuracy: 0.8803
   🎯 Mean F1 Score: 0.9045
--------------------------------------------------
