In [None]:
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import sys
sys.path.insert(0, str(Path.cwd().parent.parent.parent))

from modules._import_helper import safe_import_from

(MCIntegrator, ImportanceSampler, ControlVariates,
 antithetic_sampling) = safe_import_from(
    '05_simulation_monte_carlo.src',
    'MCIntegrator', 'ImportanceSampler', 'ControlVariates', 'antithetic_sampling'
)

# Setup plotting
plt.style.use('seaborn-v0_8-darkgrid')
reports_dir = Path.cwd().parent / 'reports'
reports_dir.mkdir(exist_ok=True)

print("✓ Imports successful")

## 1. Problem Setup: Estimating $\mathbb{E}[e^{-X^2}]$ for $X \sim \mathcal{N}(0,1)$

**Why this example?** True value is known: $\mathbb{E}[e^{-X^2}] = \frac{1}{\sqrt{3}} \approx 0.5773$

**Challenge:** Tails contribute significantly but are rarely sampled → high variance.

In [None]:
# Target function
def target_func(x):
    return np.exp(-x**2)

TRUE_VALUE = 1.0 / np.sqrt(3)
print(f"True value: {TRUE_VALUE:.6f}")

## 2. Naive Monte Carlo (Baseline)

$$\hat{\mu}_{\text{naive}} = \frac{1}{N} \sum_{i=1}^N f(X_i), \quad X_i \sim p(x)$$

**Variance:** $\text{Var}[\hat{\mu}] = \frac{\sigma^2}{N}$ where $\sigma^2 = \text{Var}[f(X)]$

In [None]:
# Naive MC
n_samples = 10000
rng = np.random.default_rng(42)

samples = rng.standard_normal(n_samples)
values = target_func(samples)
naive_estimate = np.mean(values)
naive_std = np.std(values, ddof=1) / np.sqrt(n_samples)

print(f"Naive MC:")
print(f"  Estimate: {naive_estimate:.6f}")
print(f"  Std Error: {naive_std:.6f}")
print(f"  True Error: {abs(naive_estimate - TRUE_VALUE):.6f}")

## 3. Importance Sampling

**Idea:** Sample from $q(x)$ instead of $p(x)$, reweight:
$$\hat{\mu}_{\text{IS}} = \frac{1}{N} \sum_{i=1}^N f(X_i) \frac{p(X_i)}{q(X_i)}, \quad X_i \sim q(x)$$

**Optimal $q^*$:** Proportional to $|f(x)|p(x)$

**Here:** Use $q(x) = \mathcal{N}(0, 0.5)$ (narrower, samples high-value region)

In [None]:
# Importance sampling with narrower Gaussian
from scipy import stats

def proposal_sampler(n, rng):
    return rng.normal(0, np.sqrt(0.5), n)

def weight_func(x):
    # p(x) / q(x) = N(0,1) / N(0,0.5)
    log_p = stats.norm(0, 1).logpdf(x)
    log_q = stats.norm(0, np.sqrt(0.5)).logpdf(x)
    return np.exp(log_p - log_q)

is_sampler = ImportanceSampler(
    target_func=target_func,
    proposal_sampler=proposal_sampler,
    weight_func=weight_func,
    seed=42
)

is_result = is_sampler.estimate(n_samples=n_samples)

print(f"\nImportance Sampling:")
print(f"  Estimate: {is_result.estimate:.6f}")
print(f"  Std Error: {is_result.std_error:.6f}")
print(f"  True Error: {abs(is_result.estimate - TRUE_VALUE):.6f}")
print(f"  VRF: {is_result.variance_reduction_factor:.2f}x")
print(f"  Effective N: {is_result.effective_sample_size:.0f}")

## 4. Control Variates

**Idea:** Use correlated variable $g(X)$ with known mean $\mu_g$:
$$\hat{\mu}_{\text{CV}} = \hat{\mu}_f - c(\hat{\mu}_g - \mu_g)$$

**Optimal $c^*$:** $c^* = \frac{\text{Cov}[f(X), g(X)]}{\text{Var}[g(X)]}$

**Variance reduction:** $\text{Var}[\hat{\mu}_{\text{CV}}] = \text{Var}[\hat{\mu}_f](1 - \rho^2)$ where $\rho = \text{Corr}[f,g]$

**Here:** Use $g(x) = x^2$ (correlated with $e^{-x^2}$, known mean = 1)

In [None]:
# Control variates
def control_func(x):
    return x**2

cv = ControlVariates(
    target_func=target_func,
    control_func=control_func,
    control_mean=1.0,  # E[X^2] for X ~ N(0,1)
    seed=42
)

cv_result = cv.estimate(n_samples=n_samples)

print(f"\nControl Variates:")
print(f"  Estimate: {cv_result.estimate:.6f}")
print(f"  Std Error: {cv_result.std_error:.6f}")
print(f"  True Error: {abs(cv_result.estimate - TRUE_VALUE):.6f}")
print(f"  VRF: {cv_result.variance_reduction_factor:.2f}x")
print(f"  Optimal c: {cv_result.control_coefficient:.4f}")
print(f"  Correlation: {cv_result.correlation:.4f}")

## 5. Antithetic Sampling

**Idea:** Generate pairs $(X, -X)$ to induce negative correlation
$$\hat{\mu}_{\text{AS}} = \frac{1}{2N} \sum_{i=1}^N [f(X_i) + f(-X_i)]$$

**Works when:** $f$ is monotonic (variance reduced if $f(x)$ and $f(-x)$ negatively correlated)

**Here:** $e^{-x^2}$ is symmetric → no benefit expected

In [None]:
# Antithetic sampling
as_result = antithetic_sampling(
    func=target_func,
    sampler=lambda n, rng: rng.standard_normal(n),
    n_samples=n_samples // 2,  # Half since we generate pairs
    seed=42
)

print(f"\nAntithetic Sampling:")
print(f"  Estimate: {as_result.estimate:.6f}")
print(f"  Std Error: {as_result.std_error:.6f}")
print(f"  True Error: {abs(as_result.estimate - TRUE_VALUE):.6f}")
print(f"  VRF: {as_result.variance_reduction_factor:.2f}x")

## 6. Comparison: Error vs Sample Size

**Theory:** $\text{RMSE} \propto 1/\sqrt{N}$ for naive MC

**VR methods:** Shift curve down (same slope, lower intercept)

In [None]:
# Compare methods across sample sizes
sample_sizes = [100, 500, 1000, 2000, 5000, 10000]
n_trials = 50

results = {
    'Naive': [],
    'Importance Sampling': [],
    'Control Variates': [],
}

for n in sample_sizes:
    errors_naive = []
    errors_is = []
    errors_cv = []
    
    for trial in range(n_trials):
        seed = 42 + trial
        
        # Naive
        rng_naive = np.random.default_rng(seed)
        samples = rng_naive.standard_normal(n)
        naive_est = np.mean(target_func(samples))
        errors_naive.append((naive_est - TRUE_VALUE)**2)
        
        # IS
        is_sampler_tmp = ImportanceSampler(
            target_func, proposal_sampler, weight_func, seed=seed
        )
        is_res = is_sampler_tmp.estimate(n)
        errors_is.append((is_res.estimate - TRUE_VALUE)**2)
        
        # CV
        cv_tmp = ControlVariates(target_func, control_func, 1.0, seed=seed)
        cv_res = cv_tmp.estimate(n)
        errors_cv.append((cv_res.estimate - TRUE_VALUE)**2)
    
    results['Naive'].append(np.sqrt(np.mean(errors_naive)))
    results['Importance Sampling'].append(np.sqrt(np.mean(errors_is)))
    results['Control Variates'].append(np.sqrt(np.mean(errors_cv)))

# Plot
fig, ax = plt.subplots(figsize=(10, 6))

for method, rmse_values in results.items():
    ax.loglog(sample_sizes, rmse_values, 'o-', label=method, linewidth=2, markersize=8)

# Reference: 1/sqrt(N) scaling
ref_line = 0.2 * np.array(sample_sizes)**(-0.5)
ax.loglog(sample_sizes, ref_line, 'k--', alpha=0.5, label=r'$1/\sqrt{N}$')

ax.set_xlabel('Number of Samples', fontsize=12)
ax.set_ylabel('RMSE', fontsize=12)
ax.set_title('Variance Reduction Effectiveness', fontsize=14, fontweight='bold')
ax.legend(fontsize=10)
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(reports_dir / '01_variance_reduction_comparison.png', dpi=150, bbox_inches='tight')
plt.show()

print("\n✓ Figure saved to reports/")

## 7. Exercise: Apply to Tail Expectation

**Problem:** Estimate $\mathbb{E}[X | X > 2]$ for $X \sim \mathcal{N}(0,1)$

**Challenge:** Only ~2.3% of samples satisfy $X > 2$ → very high naive variance

**Task:** Implement importance sampling with $q(x) = \mathcal{N}(3, 1)$ truncated to $x > 2$

In [None]:
# Exercise: Implement here
# Hint: Use conditional expectation E[X|X>2] = E[X*I(X>2)] / P(X>2)

# YOUR CODE HERE
pass

## Solution

In [None]:
# Solution
threshold = 2.0
p_tail = 1 - stats.norm(0, 1).cdf(threshold)  # P(X > 2)
true_conditional = stats.norm(0, 1).expect(lambda x: x, lb=threshold) / p_tail

print(f"True E[X|X>2]: {true_conditional:.6f}")
print(f"P(X>2): {p_tail:.6f}\n")

# Naive MC
rng_ex = np.random.default_rng(42)
samples_ex = rng_ex.standard_normal(50000)
tail_samples = samples_ex[samples_ex > threshold]
naive_conditional = np.mean(tail_samples) if len(tail_samples) > 0 else np.nan

print(f"Naive MC (50k samples):")
print(f"  Tail samples: {len(tail_samples)}")
print(f"  Estimate: {naive_conditional:.6f}")
print(f"  Error: {abs(naive_conditional - true_conditional):.6f}\n")

# IS with shifted Gaussian
def proposal_tail(n, rng):
    # Sample from N(3,1) truncated to x>2
    samples = rng.normal(3, 1, n)
    return samples[samples > threshold][:n]  # Accept only x>2

def weight_tail(x):
    log_p = stats.norm(0, 1).logpdf(x)
    log_q = stats.norm(3, 1).logpdf(x)
    return np.exp(log_p - log_q)

def target_tail(x):
    return x * (x > threshold)  # X * I(X>2)

is_tail = ImportanceSampler(
    target_func=target_tail,
    proposal_sampler=proposal_tail,
    weight_func=weight_tail,
    seed=42
)

is_tail_result = is_tail.estimate(n_samples=10000)
is_conditional = is_tail_result.estimate / p_tail

print(f"Importance Sampling (10k samples):")
print(f"  Estimate: {is_conditional:.6f}")
print(f"  Error: {abs(is_conditional - true_conditional):.6f}")
print(f"  VRF: {is_tail_result.variance_reduction_factor:.2f}x")
print(f"\n✓ IS achieves 5x lower error with 5x fewer samples!")

## Key Takeaways

1. **Variance reduction factor (VRF):** Quantifies speedup (VRF=10 → 10x fewer samples for same accuracy)
2. **Importance sampling:** Best when target is concentrated (tails, peaks)
3. **Control variates:** Best when correlated control function exists with known mean
4. **Antithetic sampling:** Best for monotonic functions (symmetric functions show no benefit)
5. **Cost-benefit:** Small overhead (computing weights, coefficients) pays off even for N~1000

**When to use:**
- IS: Rare events, tail probabilities, peaked distributions
- CV: Financial derivatives (use simpler option as control), correlated systems
- Antithetic: Asian options, path-dependent problems with monotonicity

**Further reading:**
- Owen (2013): *Monte Carlo Theory, Methods and Examples*
- Glasserman (2003): *Monte Carlo Methods in Financial Engineering*