# Model Comparison

ROC curve comparison of pion classifiers trained on dEdX and residual range:
- **CNN**: full hit-level profiles (34k tracks)
- **Hybrid CNN+MLP**: CNN sequences + chi²/ndof_p + track length + score (279k tracks)
- **XGBoost (p25/50/75 + mean)**: 3-percentile features (8 features)
- **XGBoost (p10/25/50/75/90 + mean)**: 5-percentile features (12 features)
- **MLP (chi²/ndof_p + track length + score)**: contemporary method baseline (279k tracks)
- **XGBoost (mean only)**: mean dEdX and mean RR only

In [None]:
import os, sys

# Run from project root regardless of where the notebook is opened from
if os.path.basename(os.getcwd()) == 'notebooks':
    os.chdir('..')

import pickle
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc

def load_results(path):
    with open(path, 'rb') as f:
        return pickle.load(f)

cnn      = load_results('results/cnn_hit_level.pkl')
hybrid   = load_results('results/hybrid_cnn_mlp.pkl')
xgb_p5   = load_results('results/xgb_p5.pkl')
xgb_p3   = load_results('results/xgb_p3.pkl')
mlp      = load_results('results/mlp_summary.pkl')
xgb_mean = load_results('results/xgb_mean_dEdX_RR.pkl')

for r in [cnn, hybrid, xgb_p5, xgb_p3, mlp, xgb_mean]:
    print(f"{r['model_name']}")
    print(f"  AUC: {r['auc']:.3f}  |  Purity: {100*r['purity']:.1f}%  |  Efficiency: {100*r['efficiency']:.1f}%")

In [None]:
models = [cnn, hybrid, xgb_p5, xgb_p3, mlp, xgb_mean]
colors  = ['steelblue', 'crimson', 'darkorange', 'mediumorchid', 'forestgreen', 'seagreen']

fig, axes = plt.subplots(1, 2, figsize=(14, 6))

for r, color in zip(models, colors):
    # --- ROC curve ---
    fpr, tpr, _ = roc_curve(r['labels'], r['probs'])
    roc_auc = auc(fpr, tpr)
    axes[0].plot(fpr, tpr, color=color, lw=2,
                 label=f"{r['model_name']} (AUC = {roc_auc:.3f})")

    # --- Purity vs Efficiency curve ---
    thresholds = np.linspace(0.01, 0.99, 200)
    purities, efficiencies = [], []
    for t in thresholds:
        preds = (r['probs'] >= t).astype(int)
        tp = np.sum((preds == 1) & (r['labels'] == 1))
        fp = np.sum((preds == 1) & (r['labels'] == 0))
        fn = np.sum((preds == 0) & (r['labels'] == 1))
        pur = tp / (tp + fp) if (tp + fp) > 0 else 0
        eff = tp / (tp + fn) if (tp + fn) > 0 else 0
        purities.append(pur)
        efficiencies.append(eff)
    axes[1].plot(efficiencies, purities, color=color, lw=2, label=r['model_name'])

    # --- Operating points ---
    preds = (r['probs'] >= r['threshold']).astype(int)
    tp = np.sum((preds == 1) & (r['labels'] == 1))
    fp = np.sum((preds == 1) & (r['labels'] == 0))
    fn = np.sum((preds == 0) & (r['labels'] == 1))
    tn = np.sum((preds == 0) & (r['labels'] == 0))
    tpr_op = tp / (tp + fn)
    fpr_op = fp / (fp + tn)
    pur_op = tp / (tp + fp)
    axes[0].scatter(fpr_op, tpr_op, color=color, marker='o', s=80, zorder=5,
                    label=f'  operating point (t={r["threshold"]:.2f})')
    axes[1].scatter(tpr_op, pur_op, color=color, marker='o', s=80, zorder=5,
                    label=f'  operating point (t={r["threshold"]:.2f})')

axes[0].plot([0, 1], [0, 1], 'k--', alpha=0.4, lw=1)
axes[0].set_xlabel('False Positive Rate', fontsize=12)
axes[0].set_ylabel('True Positive Rate (Efficiency)', fontsize=12)
axes[0].set_title('ROC Curve', fontsize=12)
axes[0].legend(fontsize=8)
axes[0].set_xlim([0, 1])
axes[0].set_ylim([0, 1])

axes[1].set_xlabel('Efficiency (Recall)', fontsize=12)
axes[1].set_ylabel('Purity (Precision)', fontsize=12)
axes[1].set_title('Purity vs Efficiency', fontsize=12)
axes[1].legend(fontsize=8)
axes[1].set_xlim([0, 1])
axes[1].set_ylim([0, 1])

plt.suptitle('Pion classifier comparison — dEdX + residual range', fontsize=13, y=1.01)
plt.tight_layout()
plt.show()