In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from loss_simulator import simulate_empirical_risk

# Set style
sns.set_style("whitegrid")
plt.rcParams['font.size'] = 12
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['axes.titlesize'] = 16
plt.rcParams['legend.fontsize'] = 11

# Empirical Risk Visualization (Smooth Losses)

This notebook visualizes the empirical risk R̂(θ) for **smooth, continuous loss functions** as a function of the confidence threshold θ.

**Difference from selective_sim:**
- Uses smooth losses (e.g., squared error) instead of 0-1 loss
- Loss is continuous and differentiable
- Still parametrized by instability ε

**Key concepts:**
- **ε** (instability): Controls deviation from monotonicity
  - 0 = perfectly monotone
  - 1 = maximum instability
- **Smooth loss**: Continuous, differentiable (e.g., squared error)
- **Target α**: Risk level we want to control

In [None]:
# Parameters
n = 500
alpha = 0.1
base_strength = 5

# Instability parameters
instability_parameters = [0.2, 0.5, 0.8, 1]

# Theta grid
theta_grid = np.linspace(0, 1, 1000)

print(f"Simulating empirical risk (smooth losses)...")
print(f"Dataset size: n = {n}")
print(f"Target risk: α = {alpha}")
print(f"Base strength: {base_strength}\\n")

In [None]:
# Create figure
fig, ax = plt.subplots(1, 1, figsize=(12, 7))

# Black to red gradient
colors = plt.cm.Reds(np.linspace(0.3, 1.0, len(instability_parameters)))

for idx, instability_parameter in enumerate(instability_parameters):
    # Simulate
    P_hat, y_true, empirical_risk = simulate_empirical_risk(
        n, alpha, theta_grid, instability_parameter, base_strength=base_strength
    )
    
    label = f'ε = {instability_parameter:.1f}'
    if instability_parameter == 0:
        label += ' (monotone)'
    
    ax.plot(theta_grid, empirical_risk, linewidth=2.5, 
            color=colors[idx], label=label, alpha=0.8)
    
    print(f"ε = {instability_parameter:.1f}: "
          f"Mean true prob = {(1-y_true.mean()):.3f}, "
          f"Risk range = [{empirical_risk.min():.3f}, {empirical_risk.max():.3f}]")

# Target line
ax.axhline(y=alpha, color='darkred', linestyle='--', linewidth=2.5, 
           label=f'Target: α = {alpha}', zorder=0, alpha=0.7)

# Formatting
ax.set_xlabel('θ', fontsize=14, color='black')
ax.set_ylabel('R̂(θ)', fontsize=14, color='black')
ax.legend(fontsize=11, loc='best', frameon=True, shadow=True)
ax.grid(True, alpha=0.3)
ax.set_xlim([0.5, 1])

# Ticks and spines
ax.tick_params(axis='x', colors='black', which='both', bottom=True, top=False)
ax.tick_params(axis='y', colors='black', which='both', left=True, right=False)
sns.despine(ax=ax, top=True, right=True)
for spine in ['bottom', 'left']:
    ax.spines[spine].set_edgecolor('black')
    ax.spines[spine].set_linewidth(1.5)

plt.tight_layout()
plt.savefig('./outputs/empirical_risk_smooth.pdf', dpi=300, bbox_inches='tight')
print("\\n✓ Plot saved to ./outputs/empirical_risk_smooth.pdf")

## Interpretation

**Comparison with selective_sim:**

1. **Smooth vs Discrete**: 
   - This plot uses smooth losses (squared error)
   - selective_sim uses 0-1 loss (non-smooth)

2. **Risk curves**:
   - Smooth losses create continuous risk curves
   - May exhibit different behavior under instability

3. **Practical relevance**:
   - Regression problems use smooth losses
   - Shows CRC works for both discrete and continuous cases

**Key observations:**
- Higher ε (darker red) = more instability
- Risk behavior changes with instability level
- Target α provides risk control benchmark