In [None]:
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from modules._import_helper import safe_import_from

# Import from our MC toolbox
MCIntegrator, convergence_analysis = safe_import_from(
    '05_simulation_monte_carlo.src.mc_integration',
    'MCIntegrator', 'convergence_analysis'
)

# Setup
np.random.seed(42)
reports_dir = Path('../reports')
reports_dir.mkdir(exist_ok=True)

# Plot styling
plt.style.use('default')
plt.rcParams['figure.figsize'] = (10, 6)
plt.rcParams['font.size'] = 11

## 1. Intuition: Why Monte Carlo Works

**The core idea:** To estimate an integral $I = \int_a^b f(x)dx$, we:
1. Sample $N$ random points $x_1, ..., x_N \sim \text{Uniform}[a,b]$
2. Compute the sample mean: $\hat{I} = (b-a) \cdot \frac{1}{N}\sum_{i=1}^N f(x_i)$

**Why it works:**
- **Law of Large Numbers (LLN):** $\hat{I} \to I$ as $N \to \infty$ (almost surely)
- **Central Limit Theorem (CLT):** $\sqrt{N}(\hat{I} - I) \sim \mathcal{N}(0, \sigma^2)$ for large $N$
  - Standard error: $SE = \sigma / \sqrt{N}$ where $\sigma^2 = \text{Var}[f(X)]$
  - Error decreases as $O(N^{-1/2})$ **regardless of dimension**

**When MC wins:**
- High-dimensional integrals (curse of dimensionality affects grid methods)
- Complex domains (MC doesn't care about geometry)
- When you only need moderate accuracy (1-3 significant figures)

**Key insight:** MC error is **probabilistic**. We must always report confidence intervals!

## 2. Experiment 1: Simple 1D Integral

Let's estimate $I = \int_0^1 x^2 dx = \frac{1}{3}$ and verify:
1. The estimate converges to the true value
2. The 95% confidence interval has correct coverage
3. Error decreases as $O(N^{-1/2})$

In [None]:
# Define integrand and ground truth
def f_simple(x):
    return x**2

true_value = 1/3
a, b = 0.0, 1.0

# Run MC with increasing sample sizes
sample_sizes = [100, 500, 1000, 5000, 10000, 50000]
integrator = MCIntegrator(seed=42)

results = []
for n in sample_sizes:
    result = integrator.integrate(func=f_simple, a=a, b=b, n_samples=n)
    results.append(result)
    print(f"N={n:6d}: estimate={result.estimate:.6f}, "
          f"SE={result.std_error:.6f}, "
          f"95% CI=[{result.ci_lower:.6f}, {result.ci_upper:.6f}]")

print(f"\nTrue value: {true_value:.6f}")

In [None]:
# Plot: Convergence with confidence bands
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Left: Estimate vs N with CI
estimates = [r.estimate for r in results]
ci_lower = [r.ci_lower for r in results]
ci_upper = [r.ci_upper for r in results]

ax1.plot(sample_sizes, estimates, 'o-', label='MC estimate', linewidth=2)
ax1.fill_between(sample_sizes, ci_lower, ci_upper, alpha=0.3, label='95% CI')
ax1.axhline(true_value, color='red', linestyle='--', linewidth=2, label='True value')
ax1.set_xlabel('Number of samples (N)')
ax1.set_ylabel('Estimate')
ax1.set_title('Convergence with Confidence Intervals')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Right: Error vs N in log-log (should be -1/2 slope)
errors = [abs(r.estimate - true_value) for r in results]
ax2.loglog(sample_sizes, errors, 'o-', label='Absolute error', linewidth=2)
# Reference line: O(N^{-1/2})
ref_line = errors[0] * (np.array(sample_sizes) / sample_sizes[0])**(-0.5)
ax2.loglog(sample_sizes, ref_line, 'k--', alpha=0.5, label='$O(N^{-1/2})$ reference')
ax2.set_xlabel('Number of samples (N)')
ax2.set_ylabel('Absolute error')
ax2.set_title('Convergence Rate (Log-Log)')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(reports_dir / '01_simple_convergence.png', dpi=150, bbox_inches='tight')
plt.show()

print("✓ Plot saved to reports/01_simple_convergence.png")

**Observations:**
1. The estimate converges to 0.333... as N increases
2. The confidence interval shrinks with more samples
3. Error follows $O(N^{-1/2})$ in log-log plot (parallel to reference line)

**Critical point:** The error is **stochastic**. If we repeat with a different seed, we get a different trajectory, but the same average behavior.

## 3. Checking CI Coverage

**Question:** If we claim "95% confidence interval", does the true value fall inside the CI 95% of the time?

Let's verify by running many independent trials:

In [None]:
# Run 100 independent MC estimates
n_trials = 100
n_samples = 5000
coverage_count = 0

for trial in range(n_trials):
    integrator = MCIntegrator(seed=42 + trial)  # Different seed each trial
    result = integrator.integrate(func=f_simple, a=a, b=b, n_samples=n_samples)
    
    # Check if true value is in CI
    if result.ci_lower <= true_value <= result.ci_upper:
        coverage_count += 1

coverage = 100 * coverage_count / n_trials
print(f"CI coverage over {n_trials} trials: {coverage:.1f}%")
print(f"Expected: ~95%")
print(f"\n✓ Confidence intervals are well-calibrated" if 93 <= coverage <= 97 else "⚠ CI may be miscalibrated")

## 4. Experiment 2: Higher-Dimensional Integral

**MC advantage:** Error rate $O(N^{-1/2})$ is **dimension-independent**.

Grid methods scale as $O(N^{-2/d})$ in $d$ dimensions → exponential slowdown!

**Example:** Estimate volume of unit sphere in 5D:
$$I = \int_{[-1,1]^5} \mathbb{1}_{x_1^2 + ... + x_5^2 \leq 1} dx = \frac{8\pi^{5/2}}{15\Gamma(5/2)} \approx 5.264$$

In [None]:
# 5D unit sphere indicator
def f_sphere(x):
    """x is (n_samples, 5) array"""
    return (np.sum(x**2, axis=1) <= 1.0).astype(float)

# True volume
from scipy.special import gamma
dim = 5
true_volume = 2**dim * np.pi**(dim/2) / gamma(dim/2 + 1)

# MC estimation
integrator = MCIntegrator(seed=42)
bounds = [(-1, 1)] * dim  # [-1,1]^5
result = integrator.integrate_multidim(
    func=f_sphere,
    bounds=bounds,
    n_samples=100000
)

print(f"True volume: {true_volume:.6f}")
print(f"MC estimate: {result.estimate:.6f} ± {result.std_error:.6f}")
print(f"95% CI: [{result.ci_lower:.6f}, {result.ci_upper:.6f}]")
print(f"Relative error: {abs(result.estimate - true_volume)/true_volume * 100:.2f}%")
print(f"\n✓ True value within CI: {result.ci_lower <= true_volume <= result.ci_upper}")

**Key takeaway:** With 100k samples, we get ~1% accuracy in 5D. 
A grid method would need $(100000)^{5/2} \approx 10^{12}$ points to match this accuracy!

**Rule of thumb:** MC is preferred when $d \gtrsim 4$ and moderate accuracy suffices.

## 5. Convergence Analysis Across Multiple Runs

Instead of a single trajectory, let's see the **average error behavior** across many runs:

In [None]:
# Convergence analysis with multiple runs
results_conv = convergence_analysis(
    func=f_simple,
    a=0.0,
    b=1.0,
    true_value=true_value,
    n_samples_list=[100, 500, 1000, 5000, 10000, 50000],
    n_runs=20,  # Average over 20 independent runs
    seed=42
)

# Plot
fig, ax = plt.subplots(figsize=(10, 6))
ax.loglog(results_conv['n_samples'], results_conv['mean_error'], 'o-', 
          linewidth=2, markersize=8, label='Mean absolute error')
ax.fill_between(
    results_conv['n_samples'],
    np.array(results_conv['mean_error']) - np.array(results_conv['std_error']),
    np.array(results_conv['mean_error']) + np.array(results_conv['std_error']),
    alpha=0.3,
    label='±1 std dev'
)

# Reference line
N = np.array(results_conv['n_samples'])
ref = results_conv['mean_error'][0] * (N / N[0])**(-0.5)
ax.loglog(N, ref, 'k--', alpha=0.5, label='$O(N^{-1/2})$')

ax.set_xlabel('Number of samples (N)')
ax.set_ylabel('Mean absolute error')
ax.set_title('Convergence: Average Over 20 Independent Runs')
ax.legend()
ax.grid(True, alpha=0.3)
plt.savefig(reports_dir / '01_convergence_analysis.png', dpi=150, bbox_inches='tight')
plt.show()

print("✓ Plot saved to reports/01_convergence_analysis.png")

## 6. Common Pitfalls

### Pitfall 1: Confusing Variance with Standard Error
- **Variance** $\sigma^2 = \text{Var}[f(X)]$ measures spread of $f$ values
- **Standard error** $SE = \sigma/\sqrt{N}$ measures uncertainty in the **mean estimate**
- SE decreases with N; variance does NOT

In [None]:
# Demonstration
def high_variance_func(x):
    return 100 * x**2  # Same shape as f_simple, but 100x larger variance

n = 10000
integrator = MCIntegrator(seed=42)
r1 = integrator.integrate(f_simple, 0, 1, n)
r2 = integrator.integrate(high_variance_func, 0, 1, n)

print("Function with normal variance:")
print(f"  Estimate: {r1.estimate:.4f} ± {r1.std_error:.4f}")
print("\nFunction with 100x variance:")
print(f"  Estimate: {r2.estimate:.4f} ± {r2.std_error:.4f}")
print(f"\n⚠ Higher variance → larger SE → wider CI")
print(f"But estimate is still unbiased!")

### Pitfall 2: Reusing the Same Random Seed

**Never** reuse the same seed for independent experiments!

In [None]:
# BAD: Same seed → identical samples
bad_results = []
for _ in range(5):
    integrator = MCIntegrator(seed=42)  # ⚠ Same seed!
    r = integrator.integrate(f_simple, 0, 1, 1000)
    bad_results.append(r.estimate)

print("With same seed (WRONG):")
print(f"  Estimates: {bad_results}")
print(f"  Std dev: {np.std(bad_results):.6f}  ← ZERO variance!")

# GOOD: Different seeds → independent samples
good_results = []
for i in range(5):
    integrator = MCIntegrator(seed=42 + i)  # ✓ Different seeds
    r = integrator.integrate(f_simple, 0, 1, 1000)
    good_results.append(r.estimate)

print("\nWith different seeds (CORRECT):")
print(f"  Estimates: {[f'{x:.6f}' for x in good_results]}")
print(f"  Std dev: {np.std(good_results):.6f}  ← Shows natural variation")

### Pitfall 3: Ignoring Bias from Poor Sampling

MC is **unbiased** only if samples are truly uniform (or match the required distribution).

In [None]:
# Example: biased sampling
n = 10000
np.random.seed(42)

# Correct: uniform samples
x_uniform = np.random.uniform(0, 1, n)
estimate_correct = np.mean(f_simple(x_uniform))

# WRONG: non-uniform samples (e.g., beta distribution)
x_biased = np.random.beta(0.5, 0.5, n)  # Concentrates at 0 and 1
estimate_biased = np.mean(f_simple(x_biased))

print(f"True value: {true_value:.6f}")
print(f"Uniform samples: {estimate_correct:.6f}  ✓")
print(f"Biased samples:  {estimate_biased:.6f}  ✗ (systematically wrong)")
print(f"\n⚠ Bias: {abs(estimate_biased - true_value):.6f}")

## 7. Key Takeaways

✓ **MC error is $O(N^{-1/2})$, dimension-independent** → ideal for high-D integrals

✓ **Always report uncertainty** (SE or CI) → MC estimates are random!

✓ **Convergence diagnostics:** Plot error vs N on log-log to verify -1/2 slope

✓ **CI coverage checks:** Run multiple trials to verify calibration

✗ **Pitfalls:** Confusing variance with SE, reusing seeds, biased sampling

## 8. Exercises

**Exercise 1:** Estimate $\int_0^\pi \sin(x) dx = 2$ using MC. Report estimate with 95% CI.

**Exercise 2:** Estimate the area under $e^{-x^2}$ from 0 to 1. Compare with numerical integration (scipy.integrate.quad).

**Exercise 3:** Verify the $O(N^{-1/2})$ scaling for a 3D integral of your choice.

**Exercise 4:** What happens if you use $N=100$ samples and repeat the estimate 1000 times? Plot the histogram of estimates and verify it's approximately Gaussian (CLT).

**Exercise 5:** For the 5D sphere problem, plot how many samples you need to achieve 1%, 0.1%, and 0.01% relative error (approximately).

**Exercise 6:** Create an integral where naive MC is inefficient (hint: integrand is nearly zero almost everywhere). We'll fix this with variance reduction in the next notebook!

In [None]:
# Your solutions here

---
## Solutions (Spoilers Below!)

<details>
<summary>Click to reveal solutions</summary>

```python
# Exercise 1
integrator = MCIntegrator(seed=42)
result = integrator.integrate(func=np.sin, a=0, b=np.pi, n_samples=10000)
print(f"Estimate: {result.estimate:.6f} ± {result.std_error:.6f}")
print(f"95% CI: [{result.ci_lower:.6f}, {result.ci_upper:.6f}]")
print(f"True value: 2.0")

# Exercise 2
from scipy.integrate import quad
f = lambda x: np.exp(-x**2)
mc_result = integrator.integrate(func=f, a=0, b=1, n_samples=50000)
quad_result, _ = quad(f, 0, 1)
print(f"MC: {mc_result.estimate:.6f}")
print(f"Quad: {quad_result:.6f}")

# Exercise 4
estimates = []
for i in range(1000):
    integrator = MCIntegrator(seed=42 + i)
    r = integrator.integrate(f_simple, 0, 1, 100)
    estimates.append(r.estimate)
plt.hist(estimates, bins=30, density=True, alpha=0.7)
plt.axvline(1/3, color='red', linestyle='--', label='True value')
plt.xlabel('Estimate')
plt.ylabel('Density')
plt.legend()
plt.title('Distribution of Estimates (CLT verification)')
plt.show()
```

</details>

---
**Next notebook:** [02_variance_reduction_importance_sampling.ipynb](02_variance_reduction_importance_sampling.ipynb) - Learn how to make MC 10-1000x more efficient!