In [14]:
import pandas as pd 
import numpy as np
import joblib
import seaborn as sns
from matplotlib import style
import matplotlib.pyplot as plt
from sklearn.inspection import permutation_importance
%matplotlib inline

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn import tree 
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, make_scorer, RocCurveDisplay, roc_auc_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, LeaveOneOut, cross_val_predict
from xgboost import XGBClassifier

import warnings
warnings.filterwarnings("ignore")

df = pd.read_csv('results/all_audio_features.csv')
df = df.drop(columns = ['file_name'])

X = df.drop(columns = ['group'])
y = df['group']

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

normalizer = StandardScaler()
normalizer.fit(X)
scaled_features = normalizer.transform(X)
X = pd.DataFrame(scaled_features, index=X.index, columns=X.columns)

X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)


In [15]:
def evaluate_model_with_loo(model, X_test, y_test, show_roc=False):
    """
    Evaluate a model using Leave-One-Out cross-validation
    """
    # Initialize Leave-One-Out cross-validator
    loo = LeaveOneOut()
    
    # Get predictions and predicted probabilities
    y_pred_loo = cross_val_predict(model, X_test, y_test, cv=loo)
    y_proba_loo = cross_val_predict(model, X_test, y_test, cv=loo, method='predict_proba')[:, 1]
    
    # Calculate evaluation metrics
    metrics = {
        'accuracy': accuracy_score(y_test, y_pred_loo),
        'precision': precision_score(y_test, y_pred_loo),
        'recall': recall_score(y_test, y_pred_loo),
        'f1': f1_score(y_test, y_pred_loo),
        'roc_auc': roc_auc_score(y_test, y_proba_loo),
        'confusion_matrix': confusion_matrix(y_test, y_pred_loo)
    }
    
    # Print evaluation results
    print("=== LOO Validation Metrics ===")
    print(f"Accuracy: {metrics['accuracy']:.4f}")
    print(f"Precision: {metrics['precision']:.4f}")
    print(f"Recall: {metrics['recall']:.4f}")
    print(f"F1-Score: {metrics['f1']:.4f}")
    print(f"ROC AUC: {metrics['roc_auc']:.4f}")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred_loo))
    
    # Display ROC curve if requested
    if show_roc:
        RocCurveDisplay.from_predictions(y_test, y_proba_loo)
        plt.title("ROC Curve (Leave-One-Out Validation)")
        plt.show()
    
    return metrics

# LogisticRegression

In [16]:
np.random.seed(42)
log_reg = LogisticRegression(max_iter=1000, random_state=42)
log_reg.fit(X_train, y_train)
metrics = evaluate_model_with_loo(log_reg, X_test, y_test)

=== LOO Validation Metrics ===
Accuracy: 0.8667
Precision: 0.8333
Recall: 0.8333
F1-Score: 0.8333
ROC AUC: 0.8519

Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.89      0.89         9
           1       0.83      0.83      0.83         6

    accuracy                           0.87        15
   macro avg       0.86      0.86      0.86        15
weighted avg       0.87      0.87      0.87        15



# RandomForestClassifier

In [17]:
np.random.seed(42)
rf0 = RandomForestClassifier()
rf0.fit(X_train, y_train)
metrics = evaluate_model_with_loo(rf0, X_test, y_test)

=== LOO Validation Metrics ===
Accuracy: 0.6667
Precision: 0.6667
Recall: 0.3333
F1-Score: 0.4444
ROC AUC: 0.7130

Classification Report:
              precision    recall  f1-score   support

           0       0.67      0.89      0.76         9
           1       0.67      0.33      0.44         6

    accuracy                           0.67        15
   macro avg       0.67      0.61      0.60        15
weighted avg       0.67      0.67      0.63        15



# XGBClassifier

In [18]:
np.random.seed(42)
xgboost = XGBClassifier(random_state=42, eval_metric='logloss', use_label_encoder=False)
xgboost.fit(X_train, y_train)
metrics = evaluate_model_with_loo(xgboost, X_test, y_test)

=== LOO Validation Metrics ===
Accuracy: 0.6667
Precision: 0.6667
Recall: 0.3333
F1-Score: 0.4444
ROC AUC: 0.7407

Classification Report:
              precision    recall  f1-score   support

           0       0.67      0.89      0.76         9
           1       0.67      0.33      0.44         6

    accuracy                           0.67        15
   macro avg       0.67      0.61      0.60        15
weighted avg       0.67      0.67      0.63        15

