In [None]:
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import sys
sys.path.insert(0, str(Path.cwd().parent.parent.parent))

from modules._import_helper import safe_import_from
from scipy import stats

RareEventEstimator = safe_import_from(
    '05_simulation_monte_carlo.src.rare_events',
    'RareEventEstimator'
)

# Setup
plt.style.use('seaborn-v0_8-darkgrid')
reports_dir = Path.cwd().parent / 'reports'
reports_dir.mkdir(exist_ok=True)

print("✓ Imports successful")

## 1. Problem: Estimating $P(X > c)$ for Large $c$

**Example:** $X \sim \mathcal{N}(0, 1)$, estimate $P(X > 4)$

**True value:** $P(X > 4) \approx 3.17 \times 10^{-5}$ (very rare!)

**Naive MC estimator:**
$$\hat{P} = \frac{1}{N} \sum_{i=1}^N \mathbb{1}(X_i > c), \quad X_i \sim \mathcal{N}(0,1)$$

**Relative error:** $\frac{\text{SE}(\hat{P})}{P} \approx \sqrt{\frac{1-P}{NP}} \approx \frac{1}{\sqrt{NP}}$ for small $P$

In [None]:
threshold = 4.0
true_prob = 1 - stats.norm(0, 1).cdf(threshold)
print(f"True P(X > {threshold}): {true_prob:.6e}")
print(f"Expected samples per event: {1/true_prob:.0f}")

# For 10% relative error
target_relative_error = 0.1
required_n = int(1 / (target_relative_error**2 * true_prob))
print(f"\nSamples needed for 10% relative error: {required_n:,}")

## 2. Pitfall #1: Underestimation Bias

**Problem:** If no events observed, $\hat{P} = 0$ → infinite relative error!

**Demo:** Run naive MC with insufficient samples

In [None]:
# Naive MC with varying sample sizes
sample_sizes = [1000, 10000, 100000, 1000000]
n_trials = 100

fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes = axes.flatten()

for idx, n in enumerate(sample_sizes):
    estimates = []
    
    for trial in range(n_trials):
        rng = np.random.default_rng(42 + trial)
        samples = rng.standard_normal(n)
        p_hat = np.mean(samples > threshold)
        estimates.append(p_hat)
    
    # Plot histogram
    ax = axes[idx]
    ax.hist(estimates, bins=30, alpha=0.7, edgecolor='black')
    ax.axvline(true_prob, color='red', linestyle='--', linewidth=2, label='True value')
    ax.axvline(np.mean(estimates), color='green', linestyle='-', linewidth=2, label='Mean estimate')
    
    # Stats
    zero_count = sum(1 for e in estimates if e == 0)
    mean_est = np.mean(estimates)
    rel_error = abs(mean_est - true_prob) / true_prob
    
    ax.set_title(f'N = {n:,}\nZero estimates: {zero_count}/{n_trials}\nRel. Error: {rel_error:.2%}', 
                 fontsize=10)
    ax.set_xlabel('Estimated Probability', fontsize=9)
    ax.set_ylabel('Frequency', fontsize=9)
    ax.legend(fontsize=8)
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(reports_dir / '02_rare_event_naive_mc.png', dpi=150, bbox_inches='tight')
plt.show()

print("✓ Key observation: Many zero estimates for small N!")

## 3. Pitfall #2: Confidence Interval Invalidity

**Normal approximation:**
$$\hat{P} \pm 1.96 \sqrt{\frac{\hat{P}(1-\hat{P})}{N}}$$

**Problem:** Fails when $N \cdot \hat{P} < 5$ (CLT doesn't apply)

**Demo:** Check 95% CI coverage across trials

In [None]:
# Check CI coverage
n = 10000
coverage_count = 0

for trial in range(n_trials):
    rng = np.random.default_rng(42 + trial)
    samples = rng.standard_normal(n)
    p_hat = np.mean(samples > threshold)
    
    # 95% CI
    se = np.sqrt(p_hat * (1 - p_hat) / n)
    ci_lower = p_hat - 1.96 * se
    ci_upper = p_hat + 1.96 * se
    
    # Check coverage
    if ci_lower <= true_prob <= ci_upper:
        coverage_count += 1

coverage = coverage_count / n_trials

print(f"95% CI Coverage: {coverage:.2%} (should be ~95%)")
print(f"❌ Coverage is poor for rare events with naive MC!")

## 4. Solution: Importance Sampling

**Idea:** Sample from $q(x)$ concentrated on $\{x: x > c\}$

**Shifted Gaussian proposal:** $q(x) = \mathcal{N}(c, 1)$

**Estimator:**
$$\hat{P}_{\text{IS}} = \frac{1}{N} \sum_{i=1}^N \mathbb{1}(X_i > c) \frac{p(X_i)}{q(X_i)}, \quad X_i \sim q$$

**Optimal shift:** $\mu^* = c$ (exponential tilting)

In [None]:
# Importance sampling for rare event
estimator = RareEventEstimator(seed=42)

# Compare naive vs IS
n_samples = 10000

# Naive
naive_result = estimator.estimate_tail_probability(
    distribution='normal',
    threshold=threshold,
    n_samples=n_samples,
    method='naive'
)

# IS
is_result = estimator.estimate_tail_probability(
    distribution='normal',
    threshold=threshold,
    n_samples=n_samples,
    method='importance_sampling'
)

print(f"True probability: {true_prob:.6e}\n")

print(f"Naive MC ({n_samples:,} samples):")
print(f"  Estimate: {naive_result.probability:.6e}")
print(f"  Rel. Error: {naive_result.relative_error:.2%}")
print(f"  CI: [{naive_result.ci_lower:.6e}, {naive_result.ci_upper:.6e}]\n")

print(f"Importance Sampling ({n_samples:,} samples):")
print(f"  Estimate: {is_result.probability:.6e}")
print(f"  Rel. Error: {is_result.relative_error:.2%}")
print(f"  CI: [{is_result.ci_lower:.6e}, {is_result.ci_upper:.6e}]")
print(f"  VRF: {is_result.variance_reduction_factor:.2f}x")
print(f"  Effective N: {is_result.effective_sample_size:.0f}")

print(f"\n✓ IS achieves {is_result.variance_reduction_factor:.0f}x variance reduction!")

## 5. Comparison: Relative Error vs Threshold

**Experiment:** Vary threshold $c$ (making event rarer) and compare methods

In [None]:
# Compare across thresholds
thresholds = [2.5, 3.0, 3.5, 4.0, 4.5, 5.0]
n_samples_comp = 50000
n_trials_comp = 50

results_comparison = {'naive': [], 'is': []}
true_probs = []

for c in thresholds:
    true_p = 1 - stats.norm(0, 1).cdf(c)
    true_probs.append(true_p)
    
    naive_errors = []
    is_errors = []
    
    for trial in range(n_trials_comp):
        seed = 42 + trial
        est = RareEventEstimator(seed=seed)
        
        # Naive
        naive_res = est.estimate_tail_probability(
            'normal', c, n_samples_comp, method='naive'
        )
        if naive_res.probability > 0:  # Avoid division by zero
            naive_errors.append(abs(naive_res.probability - true_p) / true_p)
        
        # IS
        is_res = est.estimate_tail_probability(
            'normal', c, n_samples_comp, method='importance_sampling'
        )
        is_errors.append(abs(is_res.probability - true_p) / true_p)
    
    results_comparison['naive'].append(np.median(naive_errors))
    results_comparison['is'].append(np.median(is_errors))

# Plot
fig, ax = plt.subplots(figsize=(10, 6))

ax.semilogy(thresholds, results_comparison['naive'], 'o-', label='Naive MC', 
            linewidth=2, markersize=10, color='red')
ax.semilogy(thresholds, results_comparison['is'], 's-', label='Importance Sampling', 
            linewidth=2, markersize=10, color='blue')

# Add second y-axis for true probability
ax2 = ax.twinx()
ax2.semilogy(thresholds, true_probs, 'd--', label='True P(X>c)', 
             linewidth=2, markersize=8, color='green', alpha=0.6)
ax2.set_ylabel('True Probability', fontsize=12)

ax.set_xlabel('Threshold c', fontsize=12)
ax.set_ylabel('Median Relative Error', fontsize=12)
ax.set_title('Rare Event Estimation: Naive vs Importance Sampling', 
             fontsize=14, fontweight='bold')
ax.legend(loc='upper left', fontsize=10)
ax2.legend(loc='upper right', fontsize=10)
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(reports_dir / '02_rare_event_comparison.png', dpi=150, bbox_inches='tight')
plt.show()

print("\n✓ IS maintains low error as events become rarer!")

## 6. Adaptive Sampling

**Problem:** Optimal proposal depends on unknown threshold

**Solution:** Start with pilot sample, estimate optimal shift, then refine

**Algorithm:**
1. Pilot phase: $N_0$ samples from $\mathcal{N}(c, 1)$
2. Estimate $\hat{\mu} = \mathbb{E}[X | X > c]$
3. Main phase: Sample from $\mathcal{N}(\hat{\mu}, 1)$

In [None]:
# Adaptive sampling
adaptive_result = estimator.adaptive_sampling(
    distribution='normal',
    threshold=threshold,
    n_pilot=1000,
    n_main=9000,
    target_relative_error=0.05
)

print(f"Adaptive Sampling:")
print(f"  Estimate: {adaptive_result.probability:.6e}")
print(f"  Rel. Error: {adaptive_result.relative_error:.2%}")
print(f"  Iterations: {adaptive_result.n_iterations}")
print(f"  Total samples: {adaptive_result.n_iterations * (1000 + 9000):,}")
print(f"  VRF: {adaptive_result.variance_reduction_factor:.2f}x")

print(f"\n✓ Adaptive method achieves target accuracy automatically!")

## 7. Exercise: Heavy-Tailed Distribution

**Problem:** Estimate $P(X > 5)$ for $X \sim t_3$ (Student-t with 3 df)

**Challenge:** Heavier tails than Gaussian → IS proposal needs adjustment

**Task:** Compare naive MC vs IS with $t_3$ shifted to center at threshold

In [None]:
# Exercise: Implement here
# Hint: Use scipy.stats.t for Student-t distribution

# YOUR CODE HERE
pass

## Solution

In [None]:
# Solution
df = 3
threshold_t = 5.0
true_prob_t = 1 - stats.t(df).cdf(threshold_t)

print(f"True P(X > {threshold_t}) for t_3: {true_prob_t:.6e}\n")

# Naive MC
n_t = 50000
rng_t = np.random.default_rng(42)
samples_t = stats.t(df).rvs(size=n_t, random_state=rng_t)
p_naive_t = np.mean(samples_t > threshold_t)
se_naive_t = np.sqrt(p_naive_t * (1 - p_naive_t) / n_t)

print(f"Naive MC ({n_t:,} samples):")
print(f"  Estimate: {p_naive_t:.6e}")
print(f"  Rel. Error: {abs(p_naive_t - true_prob_t) / true_prob_t:.2%}\n")

# IS with shifted t-distribution
# Proposal: t_3 shifted to center at threshold
samples_is_t = stats.t(df, loc=threshold_t).rvs(size=n_t, random_state=rng_t)
indicators = (samples_is_t > threshold_t).astype(float)

# Importance weights: p(x) / q(x)
log_p = stats.t(df).logpdf(samples_is_t)
log_q = stats.t(df, loc=threshold_t).logpdf(samples_is_t)
weights = np.exp(log_p - log_q)

p_is_t = np.mean(indicators * weights)
var_is = np.var(indicators * weights, ddof=1) / n_t
se_is_t = np.sqrt(var_is)

vrf_t = (se_naive_t**2) / (se_is_t**2) if se_is_t > 0 else np.inf

print(f"Importance Sampling ({n_t:,} samples):")
print(f"  Estimate: {p_is_t:.6e}")
print(f"  Rel. Error: {abs(p_is_t - true_prob_t) / true_prob_t:.2%}")
print(f"  VRF: {vrf_t:.2f}x")

print(f"\n✓ IS effective even for heavy-tailed distributions!")

## Key Takeaways

### Pitfalls of Naive MC for Rare Events
1. **Zero estimate bias:** $\hat{P} = 0$ when no events observed → infinite relative error
2. **Invalid CIs:** Normal approximation fails when $N \cdot P < 5$
3. **Exponential cost:** Relative error $\propto 1/\sqrt{NP}$ → for $P = 10^{-6}$, need $N \sim 10^{10}$ for 1% error

### Solutions
1. **Importance sampling:** Shift proposal to rare event region
   - VRF scales with $1/P$ (exponential improvement!)
   - Optimal shift: $\mu^* = c$ for Gaussian tails
2. **Adaptive sampling:** Learn optimal proposal from pilot sample
3. **Cross-entropy method:** Iteratively optimize proposal (not covered here)

### When to Use
- **Rare events:** $P < 10^{-3}$
- **Tail probabilities:** Risk analysis, safety engineering, finance
- **Failure probabilities:** Structural reliability, system safety

### Diagnostics
- **Relative error:** Should be $< 10\%$ for reliable estimates
- **Effective sample size:** Check $N_{\text{eff}} > 100$ for stable weights
- **CI coverage:** Verify with multiple trials

**Further reading:**
- Bucklew (2004): *Introduction to Rare Event Simulation*
- Rubinstein & Kroese (2016): *Simulation and the Monte Carlo Method*
- Asmussen & Glynn (2007): *Stochastic Simulation*