# Interactive Visualization of Piecewise-Constant Metrics

This notebook demonstrates why classification metrics like F1 score are piecewise-constant functions and illustrates the challenges this creates for continuous optimization methods.

## Key Concepts

- **Piecewise-constant**: Metrics only change at unique probability values
- **Breakpoints**: The unique predicted probabilities where metrics can change
- **Flat regions**: Intervals between breakpoints where the metric stays constant
- **Optimization challenge**: Continuous optimizers may miss the global optimum

In [None]:
# Import optimal_cutoffs functions
import sys

import ipywidgets as widgets
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import display
from scipy import optimize

sys.path.append('..')
from optimal_cutoffs.optimizers import _metric_score

from optimal_cutoffs import get_confusion_matrix, get_optimal_threshold

# Set up matplotlib for interactive plots
%matplotlib widget
plt.style.use('default')

## 1. Basic Demonstration

Let's start with a simple example to see the piecewise-constant nature:

In [None]:
# Example data
y_true = np.array([0, 0, 1, 1, 0, 1, 0])
y_prob = np.array([0.1, 0.3, 0.4, 0.6, 0.7, 0.8, 0.9])

print("Example data:")
print(f"True labels:  {y_true}")
print(f"Probabilities: {y_prob}")
print(f"\nUnique probabilities (breakpoints): {np.unique(y_prob)}")

In [None]:
def plot_piecewise_metric(y_true, y_prob, metric='f1', title_suffix=''):
    """Plot a metric vs threshold showing piecewise-constant behavior."""

    # Generate dense threshold grid for smooth plotting
    thresholds = np.linspace(0.05, 0.95, 500)
    scores = [_metric_score(y_true, y_prob, t, metric) for t in thresholds]

    # Find breakpoints (unique probabilities)
    breakpoints = np.unique(y_prob)
    breakpoint_scores = [_metric_score(y_true, y_prob, t, metric) for t in breakpoints]

    # Find optimal threshold
    optimal_threshold = get_optimal_threshold(y_true, y_prob, metric, method='smart_brute')
    optimal_score = _metric_score(y_true, y_prob, optimal_threshold, metric)

    # Create plot
    fig, ax = plt.subplots(1, 1, figsize=(12, 6))

    # Plot the metric function
    ax.plot(thresholds, scores, 'b-', linewidth=2, label=f'{metric.upper()} Score')

    # Mark breakpoints
    ax.scatter(breakpoints, breakpoint_scores, color='red', s=80, zorder=5,
              label=f'Breakpoints ({len(breakpoints)} points)')

    # Mark optimal
    ax.scatter([optimal_threshold], [optimal_score], color='green', s=150,
              marker='*', zorder=6, label=f'Optimal (t={optimal_threshold:.3f})')

    # Add vertical lines at breakpoints
    for bp in breakpoints:
        ax.axvline(x=bp, color='red', linestyle='--', alpha=0.3)

    ax.set_xlabel('Decision Threshold')
    ax.set_ylabel(f'{metric.upper()} Score')
    ax.set_title(f'Piecewise-Constant Nature of {metric.upper()} Score{title_suffix}')
    ax.grid(True, alpha=0.3)
    ax.legend()
    ax.set_ylim(0, 1.05)

    plt.tight_layout()
    plt.show()

    return fig, optimal_threshold, optimal_score

# Plot F1 score for our example
fig, opt_thresh, opt_score = plot_piecewise_metric(y_true, y_prob, 'f1')
print(f"\nOptimal F1 threshold: {opt_thresh:.3f} (F1 = {opt_score:.3f})")

## 2. Interactive Exploration

Use the sliders below to see how changing the data affects the piecewise-constant structure:

In [None]:
def create_interactive_demo():
    """Create an interactive widget for exploring piecewise-constant behavior."""

    # Create sliders for data generation
    n_samples_slider = widgets.IntSlider(
        value=10, min=5, max=20, step=1,
        description='N Samples:'
    )

    pos_ratio_slider = widgets.FloatSlider(
        value=0.5, min=0.1, max=0.9, step=0.1,
        description='Pos Ratio:'
    )

    seed_slider = widgets.IntSlider(
        value=42, min=0, max=100, step=1,
        description='Random Seed:'
    )

    metric_dropdown = widgets.Dropdown(
        options=['f1', 'accuracy', 'precision', 'recall'],
        value='f1',
        description='Metric:'
    )

    def update_plot(n_samples, pos_ratio, seed, metric):
        # Generate random data
        np.random.seed(seed)
        n_pos = int(n_samples * pos_ratio)
        n_neg = n_samples - n_pos

        y_true = np.concatenate([np.zeros(n_neg), np.ones(n_pos)])
        y_prob = np.random.beta(2, 2, n_samples)  # Bell-shaped distribution

        # Sort by probability for cleaner visualization
        sort_idx = np.argsort(y_prob)
        y_true = y_true[sort_idx]
        y_prob = y_prob[sort_idx]

        # Plot
        plt.clf()
        fig, opt_thresh, opt_score = plot_piecewise_metric(
            y_true, y_prob, metric,
            title_suffix=f'\n{n_samples} samples, {len(np.unique(y_prob))} unique probabilities'
        )

        print(f"Generated {n_samples} samples ({n_pos} positive, {n_neg} negative)")
        print(f"Optimal {metric} threshold: {opt_thresh:.3f} (score = {opt_score:.3f})")
        print(f"Number of breakpoints: {len(np.unique(y_prob))}")

    # Create interactive widget
    interactive_plot = widgets.interactive(
        update_plot,
        n_samples=n_samples_slider,
        pos_ratio=pos_ratio_slider,
        seed=seed_slider,
        metric=metric_dropdown
    )

    display(interactive_plot)

create_interactive_demo()

## 3. Optimization Methods Comparison

Let's compare different optimization approaches on the same data:

In [None]:
def compare_optimization_methods(y_true, y_prob, metric='f1'):
    """Compare different threshold optimization methods."""

    print(f"Comparing optimization methods for {metric.upper()} score...\n")

    # Method 1: Smart brute force (our recommended approach)
    thresh_brute = get_optimal_threshold(y_true, y_prob, metric, method='smart_brute')
    score_brute = _metric_score(y_true, y_prob, thresh_brute, metric)

    # Method 2: scipy.optimize.minimize_scalar (continuous optimization)
    result = optimize.minimize_scalar(
        lambda t: -_metric_score(y_true, y_prob, t, metric),
        bounds=(0, 1),
        method='bounded'
    )
    thresh_minimize = result.x
    score_minimize = _metric_score(y_true, y_prob, thresh_minimize, metric)

    # Method 3: With fallback (what our 'minimize' method actually does)
    thresh_fallback = get_optimal_threshold(y_true, y_prob, metric, method='minimize')
    score_fallback = _metric_score(y_true, y_prob, thresh_fallback, metric)

    # Display results
    methods = [
        ('Smart Brute Force', thresh_brute, score_brute),
        ('minimize_scalar Only', thresh_minimize, score_minimize),
        ('With Fallback', thresh_fallback, score_fallback)
    ]

    for name, threshold, score in methods:
        print(f"{name:18} | Threshold: {threshold:.4f} | {metric.upper()}: {score:.4f}")

    # Create visualization
    thresholds = np.linspace(0.01, 0.99, 500)
    scores = [_metric_score(y_true, y_prob, t, metric) for t in thresholds]

    unique_probs = np.unique(y_prob)
    unique_scores = [_metric_score(y_true, y_prob, t, metric) for t in unique_probs]

    fig, ax = plt.subplots(1, 1, figsize=(12, 6))

    # Plot metric function
    ax.plot(thresholds, scores, 'b-', linewidth=1.5, alpha=0.7, label=f'{metric.upper()} Score')

    # Plot breakpoints
    ax.scatter(unique_probs, unique_scores, color='lightcoral', s=30, alpha=0.6,
              label=f'Breakpoints ({len(unique_probs)} points)')

    # Plot results from different methods
    colors = ['green', 'red', 'blue']
    markers = ['*', 'x', 'D']

    for (name, threshold, score), color, marker in zip(methods, colors, markers, strict=False):
        ax.scatter([threshold], [score], color=color, s=120, marker=marker,
                  zorder=5, label=f'{name}\n(t={threshold:.3f})', edgecolors='black')

    ax.set_xlabel('Decision Threshold')
    ax.set_ylabel(f'{metric.upper()} Score')
    ax.set_title('Comparison of Optimization Methods')
    ax.grid(True, alpha=0.3)
    ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')

    plt.tight_layout()
    plt.show()

    return methods

# Test with example data
np.random.seed(123)
n = 15
y_test = np.random.randint(0, 2, n)
y_prob_test = np.random.beta(2, 2, n)

print(f"Test data: {n} samples with {len(np.unique(y_prob_test))} unique probabilities\n")
results = compare_optimization_methods(y_test, y_prob_test, 'f1')

## 4. Why the Fallback Mechanism Works

The key insight is that **the optimal threshold must be one of the unique predicted probabilities**. Here's why:

In [None]:
def demonstrate_optimal_at_breakpoints():
    """Show that the optimal threshold is always at a breakpoint."""

    # Create example with clear optimal point
    y_true = np.array([0, 0, 1, 1, 0, 1])
    y_prob = np.array([0.2, 0.3, 0.6, 0.7, 0.8, 0.9])

    print("Demonstrating that optimal threshold is at a breakpoint...\n")
    print(f"Data: labels = {y_true}")
    print(f"      probs  = {y_prob}\n")

    # Evaluate F1 at each unique probability
    unique_probs = np.unique(y_prob)
    print("F1 score at each unique probability (breakpoint):")

    for _i, prob in enumerate(unique_probs):
        f1 = _metric_score(y_true, y_prob, prob, 'f1')
        tp, tn, fp, fn = get_confusion_matrix(y_true, y_prob, prob)
        print(f"  t = {prob:.1f}: F1 = {f1:.3f} | TP={tp}, TN={tn}, FP={fp}, FN={fn}")

    # Find optimal
    optimal_thresh = get_optimal_threshold(y_true, y_prob, 'f1')
    optimal_f1 = _metric_score(y_true, y_prob, optimal_thresh, 'f1')

    print(f"\n→ Optimal: t = {optimal_thresh:.1f}, F1 = {optimal_f1:.3f}")

    # Now test a threshold between breakpoints
    between_thresh = 0.65  # Between 0.6 and 0.7
    between_f1 = _metric_score(y_true, y_prob, between_thresh, 'f1')

    print(f"\nFor comparison, at t = {between_thresh:.2f} (between breakpoints):")
    print(f"  F1 = {between_f1:.3f} (same as t = 0.6 because both give same predictions)")

    # Visualize predictions at different thresholds
    print("\nPrediction vectors:")
    for thresh in [0.6, 0.65, 0.7]:
        predictions = (y_prob >= thresh).astype(int)
        print(f"  t = {thresh:.2f}: {predictions}")

    print("\n→ Note: t=0.6 and t=0.65 give the same predictions, hence same F1!")

demonstrate_optimal_at_breakpoints()

## 5. Multiple Metrics Comparison

Different metrics often have different optimal thresholds:

In [None]:
def compare_multiple_metrics(y_true, y_prob):
    """Show how different metrics have different optimal thresholds."""

    metrics = ['accuracy', 'f1', 'precision', 'recall']
    colors = ['blue', 'red', 'green', 'orange']

    thresholds = np.linspace(0.05, 0.95, 200)

    fig, ax = plt.subplots(1, 1, figsize=(12, 8))

    results = {}

    for metric, color in zip(metrics, colors, strict=False):
        # Calculate scores across threshold range
        scores = [_metric_score(y_true, y_prob, t, metric) for t in thresholds]
        ax.plot(thresholds, scores, color=color, linewidth=2, label=metric.capitalize())

        # Find optimal threshold
        optimal_thresh = get_optimal_threshold(y_true, y_prob, metric)
        optimal_score = _metric_score(y_true, y_prob, optimal_thresh, metric)

        # Mark optimal point
        ax.scatter([optimal_thresh], [optimal_score], color=color, s=150,
                  marker='*', zorder=5, edgecolors='black', linewidth=1)

        results[metric] = (optimal_thresh, optimal_score)

    # Add breakpoint lines
    unique_probs = np.unique(y_prob)
    for prob in unique_probs:
        ax.axvline(x=prob, color='gray', linestyle='--', alpha=0.3)

    ax.set_xlabel('Decision Threshold')
    ax.set_ylabel('Metric Score')
    ax.set_title('Different Metrics Have Different Optimal Thresholds\n' +
                '(Stars show optimal points, dashed lines show breakpoints)')
    ax.grid(True, alpha=0.3)
    ax.legend()
    ax.set_ylim(0, 1.05)

    plt.tight_layout()
    plt.show()

    # Print results
    print("Optimal thresholds by metric:")
    for metric, (thresh, score) in results.items():
        print(f"  {metric:9}: t = {thresh:.3f}, score = {score:.3f}")

    return results

# Demo with well-separated data
y_demo = np.array([0, 0, 0, 1, 1, 1])
p_demo = np.array([0.1, 0.3, 0.4, 0.6, 0.8, 0.9])

print(f"Demo data: labels = {y_demo}")
print(f"           probs  = {p_demo}\n")

metric_results = compare_multiple_metrics(y_demo, p_demo)

## 6. Practical Implications

### Key Takeaways

1. **Piecewise-Constant Nature**: Classification metrics only change at unique probability values

2. **Optimization Challenge**: Continuous optimizers can get stuck in flat regions and miss the global optimum

3. **Smart Solution**: Evaluate metrics at all unique probabilities (guaranteed global optimum)

4. **Fallback Mechanism**: Combine continuous optimization with discrete evaluation for robustness

5. **Metric Differences**: Different metrics often have different optimal thresholds

### When This Matters Most

- **Imbalanced datasets**: Default 0.5 threshold is often far from optimal
- **Cost-sensitive decisions**: When false positives and false negatives have different costs
- **Metric optimization**: When you need to maximize a specific metric (F1, precision, recall)
- **Model deployment**: When converting probabilities to hard predictions

### Computational Efficiency

The smart brute force approach is actually very efficient:
- **Time complexity**: O(k) where k = number of unique probabilities
- **Typical case**: k ≪ n (much fewer unique probabilities than samples)
- **Guaranteed optimum**: No risk of local minima or convergence issues

In [None]:
# Final demonstration: efficiency comparison
import time


def efficiency_demo():
    """Demonstrate the efficiency of smart brute force vs continuous optimization."""

    # Generate larger dataset
    np.random.seed(42)
    n_samples = 1000
    y_large = np.random.randint(0, 2, n_samples)
    p_large = np.random.beta(2, 2, n_samples)

    n_unique = len(np.unique(p_large))

    print(f"Efficiency test with {n_samples} samples, {n_unique} unique probabilities\n")

    methods = [
        ('smart_brute', 'Smart Brute Force'),
        ('minimize', 'Minimize with Fallback'),
        ('gradient', 'Gradient Method')
    ]

    for method_code, method_name in methods:
        start_time = time.time()
        threshold = get_optimal_threshold(y_large, p_large, 'f1', method=method_code)
        end_time = time.time()

        score = _metric_score(y_large, p_large, threshold, 'f1')
        duration = end_time - start_time

        print(f"{method_name:20} | Time: {duration:.4f}s | F1: {score:.4f} | Threshold: {threshold:.4f}")

    print(f"\n→ Smart brute force evaluates only {n_unique} points vs {n_samples} samples!")

efficiency_demo()

## Conclusion

This notebook demonstrated the piecewise-constant nature of classification metrics and why this creates challenges for traditional optimization methods. The `optimal-classification-cutoffs` library addresses these challenges through:

1. **Smart algorithms** that leverage the mathematical structure of the problem
2. **Fallback mechanisms** that ensure robust optimization
3. **Efficient implementation** that scales well with dataset size

For more details, see the [full documentation](https://finite-sample.github.io/optimal_classification_cutoffs/).