In [None]:
import os
import json
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.svm import SVC
from scipy.stats import expon
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, roc_curve, auc, recall_score, precision_score

def plot_ROC_curve(all_y_true, all_y_proba, filename='roc_curve.png'):
    """Generate and save ROC curve"""

    fpr, tpr, _ = roc_curve(all_y_true, all_y_proba)
    roc_auc = auc(fpr, tpr)
    plt.figure()
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC (AUC = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.legend(loc="lower right")
    #plt.savefig(filename)
    plt.close()


folds_paths = {
    "PD_vs_HC": "folds/PD_vs_HC_folds.json",
    "CTR_DCL_vs_Park_DCL": "folds/CTR_DCL_vs_Park_DCL_folds.json",
    "CTR_noDCL_vs_Park_noDCL": "folds/CTR_noDCL_vs_Park_noDCL_folds.json",
    "Park_noDCL_vs_Park_DCL": "folds/Park_noDCL_vs_Park_DCL_folds.json",
}

folds_data = {}
for name, path in folds_paths.items():
    with open(path, "r") as f:
        folds_data[name] = json.load(f)

# Load feature sets
features_dict = {
    "articulation": pd.read_csv('features_motor_libre/articulationfeatures_ML.csv').set_index('id'),
    "prosody": pd.read_csv('features_motor_libre/prosodyfeatures_ML.csv').set_index('id'),
    "phonological": pd.read_csv('features_motor_libre/phonologicalfeatures_ML.csv').set_index('id'),
    "earlyfusion": 'asd',
}

for fold_name, fold_data in folds_data.items():
    print(f"Running experiments for: {fold_name}")

    for feat_type, feat_df in features_dict.items():
        print(f" - Feature set: {feat_type}")
        

        # Save results
        test_predictions_df = pd.read_csv(f"results_motor_libre/{fold_name}/{feat_type}_test_predictions.csv")

        all_y_true = test_predictions_df['y_true'].to_list()
        all_y_proba = test_predictions_df['y_proba'].to_list()
        all_y_pred = test_predictions_df['y_pred'].to_list()

        # plot_ROC_curve(all_y_true, all_y_proba, f'results_motor_libre/{fold_name}/plots/roc_curve_{feat_type}.png')

: 

In [3]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
import pandas as pd
from sklearn.metrics import roc_auc_score


def plot_combined_ROC_curve(all_feature_results, save_path='roc_comparison_2x2.pdf'):
    """Create a publication-quality 2x2 ROC plot with per-subplot legends."""

    import matplotlib.pyplot as plt
    from sklearn.metrics import roc_curve, auc

    plt.rcParams.update({
        'font.family': 'serif',
        'font.size': 13,
        'axes.titlesize': 15,
        'axes.labelsize': 13,
        'legend.fontsize': 11,
        'xtick.labelsize': 11,
        'ytick.labelsize': 11
    })

    fig, axes = plt.subplots(2, 2, figsize=(9, 9))
    axes = axes.flatten()

    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']
    linestyles = ['-', '-', '-', '-', (0, (3, 1, 1, 1))]

    title_mapping = {
        "PD_vs_HC": "(a) PD vs. HC",
        "CTR_DCL_vs_Park_DCL": "(b) PD-MCI vs. HC",
        "CTR_noDCL_vs_Park_noDCL": "(c) PD-nMCI vs. HC",
        "Park_noDCL_vs_Park_DCL": "(d) PD-nMCI vs. PD-MCI"
    }

    legend_labels = {
        "articulation": "Articulation",
        "prosody": "Prosody",
        "phonological": "Phonological",
        "earlyfusion": "All dim.",
    }

    for idx, (fold_name, feature_results) in enumerate(all_feature_results.items()):
        ax = axes[idx]

        for i, (feat_type, (y_true, y_proba)) in enumerate(feature_results.items()):
            fpr, tpr, _ = roc_curve(y_true, y_proba)
            roc_auc = auc(fpr, tpr)

            readable_label = legend_labels.get(feat_type, feat_type)

            ax.plot(fpr, tpr,
                    color=colors[i % len(colors)],
                    linestyle=linestyles[i % len(linestyles)],
                    lw=2.2,
                    label=f'{readable_label} ({roc_auc:.2f})')

        ax.plot([0, 1], [0, 1], color='gray', lw=1, linestyle='--', alpha=0.6)

        ax.set_xlim(-0.02, 1.02)
        ax.set_ylim(-0.02, 1.02)
        ax.set_xlabel('1 - Specificity')
        if idx % 2 == 0:
            ax.set_ylabel('Sensitivity')

        ax.set_title(title_mapping.get(fold_name, fold_name), pad=10)
        ax.grid(True, alpha=0.3)
        ax.legend(loc='lower right')

    # Remove unused subplots
    for j in range(len(all_feature_results), 4):
        fig.delaxes(axes[j])

    plt.tight_layout(h_pad=2.0, w_pad=2.0)
    plt.savefig(save_path, format='pdf', bbox_inches='tight', dpi=600, pad_inches=0.05, transparent=True)
    plt.close()


# === Run for all folds and collect results ===
all_feature_results = {}

for fold_name, fold_data in folds_data.items():
    print(f"Running experiments for: {fold_name}")

    feature_results = {}

    for feat_type, feat_df in features_dict.items():
        print(f" - Feature set: {feat_type}")
        
        # Paths
        path_rbf = f"results_motor_libre/{fold_name}/{feat_type}_test_predictions.csv"
        path_linear = f"results_linear/results_motor_libre/{fold_name}/{feat_type}_test_predictions.csv"
        
        best_auc = -1
        best_y_true, best_y_proba = None, None
        selected_model = None

        # Try RBF
        try:
            df_rbf = pd.read_csv(path_rbf)
            y_true_rbf = df_rbf['y_true'].tolist()
            y_proba_rbf = df_rbf['score'].tolist()
            auc_rbf = roc_auc_score(y_true_rbf, y_proba_rbf)
            if auc_rbf > best_auc:
                best_auc = auc_rbf
                best_y_true = y_true_rbf
                best_y_proba = y_proba_rbf
                selected_model = "rbf"
        except Exception as e:
            print(f"   > Failed to load RBF: {e}")

        # Try Linear
        try:
            df_lin = pd.read_csv(path_linear)
            y_true_lin = df_lin['y_true'].tolist()
            y_proba_lin = df_lin['score'].tolist()
            auc_lin = roc_auc_score(y_true_lin, y_proba_lin)
            if auc_lin > best_auc:
                best_auc = auc_lin
                best_y_true = y_true_lin
                best_y_proba = y_proba_lin
                selected_model = "linear"
        except Exception as e:
            print(f"   > Failed to load Linear: {e}")

        if selected_model:
            print(f"   > Selected: {selected_model} (AUC = {best_auc:.3f})")
            feature_results[feat_type] = (best_y_true, best_y_proba)
        else:
            print(f"   > No valid predictions found for {feat_type}.")

    all_feature_results[fold_name] = feature_results

# === Final combined figure ===
plot_combined_ROC_curve(all_feature_results)


Running experiments for: PD_vs_HC
 - Feature set: articulation
   > Selected: rbf (AUC = 0.792)
 - Feature set: prosody
   > Selected: linear (AUC = 0.697)
 - Feature set: phonological
   > Selected: rbf (AUC = 0.763)
 - Feature set: earlyfusion
   > Selected: linear (AUC = 0.785)
Running experiments for: CTR_DCL_vs_Park_DCL
 - Feature set: articulation
   > Selected: linear (AUC = 0.871)
 - Feature set: prosody
   > Selected: linear (AUC = 0.738)
 - Feature set: phonological
   > Selected: rbf (AUC = 0.949)
 - Feature set: earlyfusion
   > Selected: linear (AUC = 0.895)
Running experiments for: CTR_noDCL_vs_Park_noDCL
 - Feature set: articulation
   > Selected: rbf (AUC = 0.679)
 - Feature set: prosody
   > Selected: rbf (AUC = 0.625)
 - Feature set: phonological
   > Selected: linear (AUC = 0.692)
 - Feature set: earlyfusion
   > Selected: rbf (AUC = 0.694)
Running experiments for: Park_noDCL_vs_Park_DCL
 - Feature set: articulation
   > Selected: linear (AUC = 0.771)
 - Feature set:

In [7]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, roc_auc_score
import pandas as pd

def plot_individual_ROC_curves(all_feature_results, save_dir="roc_plots"):
    """Create and save one ROC plot per fold_name in all_feature_results."""
    import os
    os.makedirs(save_dir, exist_ok=True)

    plt.rcParams.update({
        'font.family': 'serif',
        'font.size': 13,
        'axes.titlesize': 15,
        'axes.labelsize': 13,
        'legend.fontsize': 11,
        'xtick.labelsize': 11,
        'ytick.labelsize': 11
    })

    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']
    linestyles = ['-', '-', '-', '-', (0, (3, 1, 1, 1))]

    title_mapping = {
        "PD_vs_HC": "PD vs. HC",
        "CTR_DCL_vs_Park_DCL": "PD-MCI vs. HC",
        "CTR_noDCL_vs_Park_noDCL": "PD-nMCI vs. HC",
        "Park_noDCL_vs_Park_DCL": "PD-nMCI vs. PD-MCI"
    }

    legend_labels = {
        "articulation": "Articulation",
        "prosody": "Prosody",
        "phonological": "Phonological",
        "earlyfusion": "All dim.",
    }

    for fold_name, feature_results in all_feature_results.items():
        fig, ax = plt.subplots(figsize=(5, 5))

        for i, (feat_type, (y_true, y_proba)) in enumerate(feature_results.items()):
            fpr, tpr, _ = roc_curve(y_true, y_proba)
            roc_auc = auc(fpr, tpr)
            readable_label = legend_labels.get(feat_type, feat_type)
            
            ax.plot(
                fpr, tpr,
                color=colors[i % len(colors)],
                linestyle=linestyles[i % len(linestyles)],
                lw=2.2,
                label=f'{readable_label} ({roc_auc:.2f})'
            )

        ax.plot([0, 1], [0, 1], color='gray', lw=1, linestyle='--', alpha=0.6)
        ax.set_xlim(-0.02, 1.02)
        ax.set_ylim(-0.02, 1.02)
        ax.set_xlabel('1 - Specificity')
        ax.set_ylabel('Sensitivity')
        ax.set_title(title_mapping.get(fold_name, fold_name), pad=10)
        ax.grid(True, alpha=0.3)
        ax.legend(loc='lower right')

        save_path = os.path.join(save_dir, f"roc_{fold_name}.png")
        plt.savefig(save_path, bbox_inches='tight', dpi=600, transparent=True)
        plt.close()

plot_individual_ROC_curves(all_feature_results)
