# Threshold Sensitivity Study

Explore how the sparsification threshold affects equation discovery.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sc_sindy import (
    sindy_stls,
    build_library_2d,
    compute_derivatives_finite_diff,
    compute_structure_metrics,
)
from sc_sindy.systems import VanDerPol

In [None]:
# Generate data
system = VanDerPol(mu=1.0)
t, X = system.simulate([1.0, 0.0], t_span=(0, 20), dt=0.01)
X_dot = compute_derivatives_finite_diff(X, t[1] - t[0])
Theta, labels = build_library_2d(X)
true_xi = system.true_coefficients(labels)

## Sweep Threshold Values

In [None]:
thresholds = np.logspace(-3, 0, 20)
results = []

for thresh in thresholds:
    xi, _ = sindy_stls(Theta, X_dot, threshold=thresh)
    metrics = compute_structure_metrics(xi, true_xi)
    n_active = np.sum(np.abs(xi) > 0.01)
    results.append({
        'threshold': thresh,
        'f1': metrics['f1'],
        'precision': metrics['precision'],
        'recall': metrics['recall'],
        'n_active': n_active
    })

In [None]:
# Plot results
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# F1 score
axes[0].semilogx([r['threshold'] for r in results], [r['f1'] for r in results], 'b-o')
axes[0].set_xlabel('Threshold')
axes[0].set_ylabel('F1 Score')
axes[0].set_title('F1 Score vs Threshold')
axes[0].axhline(y=1.0, color='g', linestyle='--', alpha=0.5)

# Precision/Recall
axes[1].semilogx([r['threshold'] for r in results], [r['precision'] for r in results], 'r-o', label='Precision')
axes[1].semilogx([r['threshold'] for r in results], [r['recall'] for r in results], 'b-o', label='Recall')
axes[1].set_xlabel('Threshold')
axes[1].legend()
axes[1].set_title('Precision/Recall vs Threshold')

# Number of active terms
axes[2].semilogx([r['threshold'] for r in results], [r['n_active'] for r in results], 'g-o')
axes[2].set_xlabel('Threshold')
axes[2].set_ylabel('Active Terms')
axes[2].set_title('Sparsity vs Threshold')

plt.tight_layout()
plt.show()

## Find Optimal Threshold

In [None]:
best = max(results, key=lambda x: x['f1'])
print(f"Optimal threshold: {best['threshold']:.4f}")
print(f"Best F1 score: {best['f1']:.3f}")
print(f"Active terms: {best['n_active']}")