In [None]:
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from modules._import_helper import safe_import_from

# Import variance reduction tools
ControlVariates, antithetic_sampling = safe_import_from(
    '05_simulation_monte_carlo.src.variance_reduction',
    'ControlVariates', 'antithetic_sampling'
)

# Setup
np.random.seed(42)
reports_dir = Path('../reports')
reports_dir.mkdir(exist_ok=True)

plt.style.use('default')
plt.rcParams['figure.figsize'] = (12, 5)
plt.rcParams['font.size'] = 11

## 1. Control Variates: Core Idea

**Setup:** Want to estimate $\mu = E[Y]$, but $Y$ has high variance.

**Key insight:** If we know a correlated variable $X$ with **known mean** $E[X] = \mu_X$, we can construct:

$$Y_c = Y - c(X - \mu_X)$$

**Properties:**
- $E[Y_c] = E[Y] = \mu$ (unbiased!)
- $\text{Var}[Y_c] = \text{Var}[Y] + c^2\text{Var}[X] - 2c\text{Cov}[X,Y]$
- **Optimal coefficient:** $c^* = \frac{\text{Cov}[X,Y]}{\text{Var}[X]} = \rho_{XY} \frac{\sigma_Y}{\sigma_X}$

$$\text{Var}[Y_c^*] = \text{Var}[Y](1 - \rho_{XY}^2)$$

**Variance reduction:** $VRF = \frac{1}{1 - \rho^2}$
- $\rho = 0.5 \to VRF = 1.33x$
- $\rho = 0.9 \to VRF = 5.26x$
- $\rho = 0.99 \to VRF = 50x$!

**When to use CV:**
- You can identify a control $X$ with known mean
- $X$ and $Y$ are correlated (ideally $|\rho| > 0.5$)
- Simpler than IS when good controls exist

## 2. Example: Asian Option Pricing

**Problem:** Price an Asian call option (payoff depends on average stock price):

$$V = E\left[\max\left(\frac{1}{T}\sum_{i=1}^T S_i - K, 0\right)\right]$$

**Control variate:** Use geometric mean $G = \left(\prod_{i=1}^T S_i\right)^{1/T}$, which has a **known** analytical expectation!

Let's simulate this:

In [None]:
# Simulate geometric Brownian motion paths
def simulate_stock_paths(S0, r, sigma, T, n_steps, n_paths, seed=42):
    """
    Simulate stock price paths under GBM.
    Returns: (n_paths, n_steps+1) array of prices
    """
    np.random.seed(seed)
    dt = T / n_steps
    
    # Generate random walks
    dW = np.random.randn(n_paths, n_steps) * np.sqrt(dt)
    
    # Construct paths
    drift = (r - 0.5 * sigma**2) * dt
    diffusion = sigma * dW
    
    log_paths = np.cumsum(drift + diffusion, axis=1)
    paths = S0 * np.exp(np.hstack([np.zeros((n_paths, 1)), log_paths]))
    
    return paths

# Parameters
S0 = 100  # Initial stock price
K = 100   # Strike price
r = 0.05  # Risk-free rate
sigma = 0.2  # Volatility
T = 1.0   # Maturity
n_steps = 50
n_paths = 10000

# Simulate
paths = simulate_stock_paths(S0, r, sigma, T, n_steps, n_paths)

# Asian call payoffs
arithmetic_mean = np.mean(paths, axis=1)
payoff_arithmetic = np.maximum(arithmetic_mean - K, 0)

# Control: geometric mean
geometric_mean = np.exp(np.mean(np.log(paths), axis=1))
payoff_geometric = np.maximum(geometric_mean - K, 0)

# Known expectation for geometric Asian (analytical formula exists)
# Simplified: use sample mean as "known" for demonstration
# In practice, you'd use the Black-Scholes-like formula
E_geometric = np.mean(payoff_geometric)  # Pretend this is known

print(f"Arithmetic Asian call (target): {np.mean(payoff_arithmetic):.4f}")
print(f"Geometric Asian call (control): {E_geometric:.4f}")
print(f"Correlation: {np.corrcoef(payoff_arithmetic, payoff_geometric)[0,1]:.4f}")

In [None]:
# Apply control variates
cv = ControlVariates(seed=42)

result_cv = cv.estimate(
    target_samples=payoff_arithmetic,
    control_samples=payoff_geometric,
    control_mean=E_geometric
)

# Naive MC (for comparison)
naive_estimate = np.mean(payoff_arithmetic)
naive_se = np.std(payoff_arithmetic, ddof=1) / np.sqrt(len(payoff_arithmetic))

print("\n=== Results ===")
print(f"Naive MC: {naive_estimate:.6f} ± {naive_se:.6f}")
print(f"Control Variates: {result_cv.estimate:.6f} ± {result_cv.std_error:.6f}")
print(f"\nOptimal coefficient: c* = {result_cv.optimal_coef:.4f}")
print(f"Variance Reduction Factor: {result_cv.variance_reduction_factor:.2f}x")
print(f"\n✓ CV is {result_cv.variance_reduction_factor:.1f}x more efficient!")

## 3. Visualizing Control Variates

Let's see the correlation structure and how CV exploits it:

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Left: Scatter plot showing correlation
ax1.scatter(payoff_geometric, payoff_arithmetic, alpha=0.3, s=10)
# Regression line (optimal coefficient)
c_opt = result_cv.optimal_coef
x_line = np.linspace(payoff_geometric.min(), payoff_geometric.max(), 100)
y_line = naive_estimate + c_opt * (x_line - E_geometric)
ax1.plot(x_line, y_line, 'r-', linewidth=2, 
         label=f'CV adjustment (c*={c_opt:.3f})')
ax1.set_xlabel('Control (Geometric payoff)')
ax1.set_ylabel('Target (Arithmetic payoff)')
ax1.set_title(f'Correlation: ρ = {np.corrcoef(payoff_arithmetic, payoff_geometric)[0,1]:.3f}')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Right: VRF vs correlation strength
correlations = np.linspace(0, 0.99, 50)
vrf_theoretical = 1 / (1 - correlations**2)
ax2.plot(correlations, vrf_theoretical, linewidth=2)
ax2.axhline(1, color='gray', linestyle='--', alpha=0.5, label='No improvement')
ax2.axvline(np.corrcoef(payoff_arithmetic, payoff_geometric)[0,1], 
            color='red', linestyle='--', alpha=0.7, label='Our example')
ax2.set_xlabel('Correlation |ρ|')
ax2.set_ylabel('Variance Reduction Factor')
ax2.set_title('VRF = 1/(1-ρ²)')
ax2.set_yscale('log')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(reports_dir / '03_control_variates.png', dpi=150, bbox_inches='tight')
plt.show()

print("✓ Plot saved to reports/03_control_variates.png")

**Key insight:** The stronger the correlation, the larger the VRF. Even $\rho = 0.7$ gives 2x improvement!

## 4. Antithetic Sampling

**Idea:** For **symmetric functions**, generate pairs of negatively correlated samples.

**Method:**
1. Generate $U \sim \text{Uniform}[0,1]$
2. Also use $1-U$ (antithetic pair)
3. Average: $\hat{\mu} = \frac{1}{2}[f(U) + f(1-U)]$

**Why it works:**
- $\text{Cov}[f(U), f(1-U)] < 0$ for monotone $f$
- Variance of average is reduced!
- **Free** variance reduction (no extra function evaluations per effective sample)

**When to use:**
- Function is monotone or symmetric
- Easy to generate antithetic pairs (e.g., $-Z$ for Gaussian $Z$)
- Complementary to other methods

In [None]:
# Example: Estimate E[exp(Z)] for Z ~ N(0,1)
# True value = exp(0.5)
true_value = np.exp(0.5)

def target_func(z):
    return np.exp(z)

# Naive MC
np.random.seed(42)
n_samples = 5000
z_naive = np.random.randn(n_samples)
estimate_naive = np.mean(target_func(z_naive))
se_naive = np.std(target_func(z_naive), ddof=1) / np.sqrt(n_samples)

# Antithetic sampling
result_antithetic = antithetic_sampling(
    target_func=target_func,
    sampler=lambda n: np.random.randn(n),
    antithetic_func=lambda z: -z,  # -Z is antithetic to Z
    n_samples=n_samples // 2,  # Generate half, use antithetic pairs
    seed=42
)

print(f"True value: {true_value:.6f}\n")
print(f"Naive MC: {estimate_naive:.6f} ± {se_naive:.6f}")
print(f"Antithetic: {result_antithetic.estimate:.6f} ± {result_antithetic.std_error:.6f}")
print(f"\nVariance Reduction Factor: {result_antithetic.variance_reduction_factor:.2f}x")
print(f"\n✓ Antithetic sampling: {result_antithetic.variance_reduction_factor:.1f}x improvement for free!")

## 5. Comparing All Three Methods

Let's compare naive MC, CV, and antithetic on the same problem:

In [None]:
# Problem: Estimate E[(X+1)²] for X ~ N(0,1)
# True value = E[X²] + 2E[X] + 1 = 1 + 0 + 1 = 2
true_val = 2.0

def target(x):
    return (x + 1)**2

# Control: use X² (known mean = 1)
def control(x):
    return x**2

n = 10000
np.random.seed(42)

# Method 1: Naive
x_naive = np.random.randn(n)
y_naive = target(x_naive)
est_naive = np.mean(y_naive)
se_naive = np.std(y_naive, ddof=1) / np.sqrt(n)

# Method 2: Control variates
c_samples = control(x_naive)
cv_obj = ControlVariates(seed=42)
result_cv2 = cv_obj.estimate(y_naive, c_samples, control_mean=1.0)

# Method 3: Antithetic
result_anti = antithetic_sampling(
    target_func=target,
    sampler=lambda n: np.random.randn(n),
    antithetic_func=lambda x: -x,
    n_samples=n//2,
    seed=42
)

# Summary table
print(f"{'Method':<20} {'Estimate':<12} {'Std Error':<12} {'VRF':<8}")
print("-" * 52)
print(f"{'Naive MC':<20} {est_naive:<12.6f} {se_naive:<12.6f} {'1.0':<8}")
print(f"{'Control Variates':<20} {result_cv2.estimate:<12.6f} {result_cv2.std_error:<12.6f} {result_cv2.variance_reduction_factor:<8.2f}")
print(f"{'Antithetic':<20} {result_anti.estimate:<12.6f} {result_anti.std_error:<12.6f} {result_anti.variance_reduction_factor:<8.2f}")
print(f"\nTrue value: {true_val}")

## 6. Pitfalls

### Pitfall 1: Using Test Data to Estimate Optimal Coefficient

**DON'T:** Use the same samples to compute $c^*$ and then estimate $\mu$. This introduces **bias**!

In [None]:
# Correct: Use pilot samples to estimate c*, then fresh samples for estimation
print("✓ Our ControlVariates class does this correctly:")
print("  - Computes c* from the provided samples")
print("  - Uses same samples for final estimate (valid because c* is data-driven)\n")
print("⚠ If you split data: use 20% for pilot, 80% for estimation")
print("  (But this is usually not necessary with our approach)")

### Pitfall 2: Choosing a Poor Control

CV only helps if $|\rho| > 0.5$ (roughly). Uncorrelated controls don't hurt much, but they don't help either!

In [None]:
# Bad control: uncorrelated variable
np.random.seed(42)
n = 5000
x = np.random.randn(n)
y = (x + 1)**2  # Target
bad_control = np.random.randn(n)  # Independent!

cv_bad = ControlVariates(seed=42)
result_bad = cv_bad.estimate(y, bad_control, control_mean=0.0)

print(f"Correlation with bad control: {np.corrcoef(y, bad_control)[0,1]:.4f}")
print(f"VRF: {result_bad.variance_reduction_factor:.4f}")
print(f"\n⚠ VRF ≈ 1 means no improvement (and no harm)")
print("✓ Always check correlation before using CV")

### Pitfall 3: Antithetic Pairs for Non-Monotone Functions

Antithetic sampling can **increase** variance for some functions!

In [None]:
# Non-monotone function: sin(x)
def nonmonotone(x):
    return np.sin(x)

# Antithetic: -Z
# But sin(-Z) = -sin(Z), so Cov[sin(Z), sin(-Z)] = -Var[sin(Z)] < 0  ✓
# Actually, this still helps because of negative correlation!

# Counter-example: x² (even function)
def even_func(x):
    return x**2

# Here (-Z)² = Z², so antithetic pairs give NO reduction
result_even = antithetic_sampling(
    target_func=even_func,
    sampler=lambda n: np.random.randn(n),
    antithetic_func=lambda z: -z,
    n_samples=2500,
    seed=42
)

print(f"Antithetic on x²:")
print(f"VRF: {result_even.variance_reduction_factor:.4f}")
print(f"\n⚠ VRF ≈ 1 for even functions (no improvement)")
print("✓ Antithetic works best for monotone or odd functions")

## 7. Key Takeaways

**Control Variates:**
- ✓ Exploits correlation: $VRF = 1/(1-\rho^2)$
- ✓ Works when you have a control with known mean
- ✓ Optimal coefficient: $c^* = \text{Cov}[X,Y] / \text{Var}[X]$
- ✗ Need $|\rho| > 0.5$ for significant gains

**Antithetic Sampling:**
- ✓ Free variance reduction (no extra evaluations)
- ✓ Works for monotone/symmetric functions
- ✓ Easy to implement ($-Z$ for Gaussian, $1-U$ for uniform)
- ✗ No benefit for even functions

**Comparison:**
- **CV:** Best when strong control available (ρ > 0.7)
- **IS:** Best for rare events, tail probabilities
- **Antithetic:** Always try it (low cost, often 1.5-2x gain)
- **Combining methods:** You can use CV + antithetic together!

## 8. Exercises

**Exercise 1:** Estimate $E[e^X]$ for $X \sim \mathcal{N}(0,1)$ using CV with control $X$ (known mean = 0). What VRF do you achieve?

**Exercise 2:** Implement **multiple control variates**: use both $X$ and $X^2$ as controls. Does this improve over single CV?

**Exercise 3:** For Asian option pricing, compare VRF when using (a) arithmetic mean vs geometric mean, (b) final price $S_T$ as control.

**Exercise 4:** Show that antithetic sampling gives exactly $VRF = 2$ for $f(U) = U$ with $U \sim \text{Uniform}[0,1]$.

**Exercise 5:** Estimate $\int_0^1 e^x dx$ using CV with control $\int_0^1 x dx = 0.5$. What's the correlation?

**Exercise 6:** **Challenge:** Combine IS + CV: Use IS with a shifted Gaussian, then apply CV with the unshifted Gaussian as control. Measure total VRF.

In [None]:
# Your solutions here

---
## Solutions

<details>
<summary>Click to reveal</summary>

```python
# Exercise 1
np.random.seed(42)
n = 10000
x = np.random.randn(n)
y = np.exp(x)
cv_ex1 = ControlVariates(seed=42)
result_ex1 = cv_ex1.estimate(y, x, control_mean=0.0)
print(f"VRF: {result_ex1.variance_reduction_factor:.2f}x")
# Note: correlation between exp(X) and X is moderate → VRF ~ 1.5-2x

# Exercise 4 (Proof)
# For f(U) = U:
# Naive: Var[U] = 1/12
# Antithetic: Var[(U + (1-U))/2] = Var[1/2] = 0  (perfect cancellation!)
# Actually VRF = infinity for this degenerate case
# For linear functions, antithetic gives perfect cancellation

# Exercise 5
from scipy.integrate import quad
true_int = np.exp(1) - 1
n = 10000
u = np.random.uniform(0, 1, n)
y_target = np.exp(u)  # Integrand values
y_control = u  # Control values
cv_ex5 = ControlVariates(seed=42)
result_ex5 = cv_ex5.estimate(y_target, y_control, control_mean=0.5)
print(f"Correlation: {np.corrcoef(y_target, y_control)[0,1]:.4f}")
print(f"VRF: {result_ex5.variance_reduction_factor:.2f}x")
```

</details>

---
**Next:** [04_rare_event_probability_estimation.ipynb](04_rare_event_probability_estimation.ipynb) - Advanced techniques for P < 10^-6!