In [7]:
import numpy as np
from scipy.stats import norm, qmc
import time

# =======================================
# 1. Hull-White short-rate path simulator
# =======================================

def simulate_hull_white_paths(
    n_paths,
    n_steps,
    T,
    r0,
    a,
    sigma,
    theta_func=None,
    use_sobol=False,
    antithetic=False,
    seed=None,
):
    """
    Simulate short-rate paths under the one-factor Hull-White model using Euler-Maruyama:

        dr_t = [theta(t) - a r_t] dt + sigma dW_t
    """
    dt = T / n_steps
    t_grid = np.linspace(0.0, T, n_steps + 1)

    if theta_func is None:
        def theta_func(t):
            return a * r0

    if use_sobol:
        sobol_engine = qmc.Sobol(d=n_steps, scramble=True, seed=seed)
        u = sobol_engine.random(n_paths)
        eps = np.finfo(float).eps
        u = np.clip(u, eps, 1 - eps)
        Z = norm.ppf(u)
    else:
        rng = np.random.default_rng(seed)
        if antithetic:
            n_half = (n_paths + 1) // 2
            Z_half = rng.standard_normal(size=(n_half, n_steps))
            Z = np.vstack([Z_half, -Z_half])[:n_paths, :]
        else:
            Z = rng.standard_normal(size=(n_paths, n_steps))

    r_paths = np.zeros((n_paths, n_steps + 1))
    r_paths[:, 0] = r0

    for i in range(n_steps):
        t = t_grid[i]
        theta_t = theta_func(t)
        r_t = r_paths[:, i]
        dr = (theta_t - a * r_t) * dt + sigma * np.sqrt(dt) * Z[:, i]
        r_paths[:, i + 1] = r_t + dr

    return t_grid, r_paths


# ========================================
# 2. Discount factors, bonds, swaps, rates
# ========================================

def compute_discount_factors(r_paths, dt):
    integrals = np.cumsum(r_paths[:, :-1] * dt, axis=1)
    integrals = np.hstack([np.zeros((r_paths.shape[0], 1)), integrals])
    disc = np.exp(-integrals)
    return disc


def bond_price_on_paths(disc, idx_t, idx_T):
    return disc[:, idx_T] / disc[:, idx_t]


def swap_value_on_paths(disc, t_grid, K, pay_indices, idx_t):
    """
    Payer swap value at time index idx_t along each path:

        V_swap(t) = P(t,T_0) - P(t,T_N) - K * sum_i Delta_i P(t,T_i)
    """
    pay_indices = np.asarray(pay_indices, dtype=int)
    idx_T0 = pay_indices[0]
    idx_TN = pay_indices[-1]

    P_tT0 = bond_price_on_paths(disc, idx_t, idx_T0)
    P_tTN = bond_price_on_paths(disc, idx_t, idx_TN)
    P_tTi = disc[:, pay_indices] / disc[:, [idx_t]]

    T_pay = t_grid[pay_indices]
    Delta = np.empty_like(T_pay)
    Delta[0] = T_pay[0] - t_grid[idx_t]
    Delta[1:] = T_pay[1:] - T_pay[:-1]

    fixed_leg = K * np.dot(P_tTi, Delta)
    V_swap = P_tT0 - P_tTN - fixed_leg
    return V_swap


def forward_swap_rate_on_paths(disc, t_grid, pay_indices, idx_t):
    """
    Forward par swap rate at time index idx_t along each path.
    """
    pay_indices = np.asarray(pay_indices, dtype=int)
    idx_T0 = pay_indices[0]
    idx_TN = pay_indices[-1]

    P_tT0 = bond_price_on_paths(disc, idx_t, idx_T0)
    P_tTN = bond_price_on_paths(disc, idx_t, idx_TN)
    P_tTi = disc[:, pay_indices] / disc[:, [idx_t]]

    T_pay = t_grid[pay_indices]
    Delta = np.empty_like(T_pay)
    Delta[0] = T_pay[0] - t_grid[idx_t]
    Delta[1:] = T_pay[1:] - T_pay[:-1]

    denom = np.dot(P_tTi, Delta)
    denom = np.where(denom == 0.0, np.nan, denom)
    F = (P_tT0 - P_tTN) / denom
    return F


# =======================================
# 3. LSM Bermudan swaption pricer
# =======================================

def bermudan_pathwise_pv(
    K,
    exercise_indices,
    pay_indices,
    t_grid,
    r_paths,
):
    """
    LSM Bermudan pricer returning pathwise discounted payoffs X_i.
    """
    n_paths, _ = r_paths.shape
    dt = t_grid[1] - t_grid[0]

    disc = compute_discount_factors(r_paths, dt)
    exercise_indices = np.asarray(exercise_indices, dtype=int)
    pay_indices = np.asarray(pay_indices, dtype=int)

    n_ex = len(exercise_indices)
    swap_values = np.zeros((n_paths, n_ex))
    swap_rates = np.zeros((n_paths, n_ex))
    for j, idx_t in enumerate(exercise_indices):
        swap_values[:, j] = swap_value_on_paths(disc, t_grid, K, pay_indices, idx_t)
        swap_rates[:, j] = forward_swap_rate_on_paths(disc, t_grid, pay_indices, idx_t)

    payoffs = np.maximum(swap_values, 0.0)

    cashflow = np.zeros(n_paths)
    exercise_time_idx = np.full(n_paths, exercise_indices[-1], dtype=int)

    last_ex_idx = exercise_indices[-1]
    last_ex_pos = n_ex - 1
    exercise_now = payoffs[:, last_ex_pos] > 0.0
    cashflow[exercise_now] = payoffs[exercise_now, last_ex_pos]
    exercise_time_idx[exercise_now] = last_ex_idx

    for ex_pos in range(n_ex - 2, -1, -1):
        idx_t = exercise_indices[ex_pos]

        alive = exercise_time_idx > idx_t
        if not np.any(alive):
            continue

        alive_idx = np.where(alive)[0]

        df_ex = disc[alive_idx, exercise_time_idx[alive_idx]]
        df_t = disc[alive_idx, idx_t]
        cont_values = cashflow[alive_idx] * df_ex / df_t

        itm_mask = payoffs[alive_idx, ex_pos] > 0.0
        if np.sum(itm_mask) >= 3:
            Xr = swap_rates[alive_idx, ex_pos][itm_mask]
            Yr = cont_values[itm_mask]
            A = np.vstack([np.ones_like(Xr), Xr, Xr ** 2]).T
            beta, *_ = np.linalg.lstsq(A, Yr, rcond=None)
            X_all = swap_rates[alive_idx, ex_pos]
            A_all = np.vstack([np.ones_like(X_all), X_all, X_all ** 2]).T
            C_hat = A_all @ beta
        else:
            C_hat = np.full_like(cont_values, np.mean(cont_values) if cont_values.size > 0 else 0.0)

        immediate = payoffs[alive_idx, ex_pos]
        exercise_decision = immediate > C_hat

        exercise_paths = alive_idx[exercise_decision]
        cashflow[exercise_paths] = immediate[exercise_decision]
        exercise_time_idx[exercise_paths] = idx_t

    df_0 = disc[np.arange(n_paths), exercise_time_idx]
    pv = cashflow * df_0
    return pv


def price_bermudan_swaption_lsm(
    K,
    exercise_indices,
    pay_indices,
    t_grid,
    r_paths,
):
    pv = bermudan_pathwise_pv(K, exercise_indices, pay_indices, t_grid, r_paths)
    return np.mean(pv)


# =======================================
# 4. European swaption under Hull–White (MC)
# =======================================

def price_european_swaption_hw_mc(
    K,
    exercise_index,
    pay_indices,
    T,
    r0,
    a,
    sigma,
    n_paths,
    n_steps,
    theta_func=None,
    seed=None,
):
    """
    European payer swaption under Hull–White, via Monte Carlo:

        payoff = max(V_swap(T0), 0) discounted back to time 0.
    """
    t_grid, r_paths = simulate_hull_white_paths(
        n_paths=n_paths,
        n_steps=n_steps,
        T=T,
        r0=r0,
        a=a,
        sigma=sigma,
        theta_func=theta_func,
        use_sobol=False,
        antithetic=False,
        seed=seed,
    )
    dt = t_grid[1] - t_grid[0]
    disc = compute_discount_factors(r_paths, dt)

    idx_T0 = exercise_index
    V_swap_T0 = swap_value_on_paths(disc, t_grid, K, pay_indices, idx_T0)
    payoff = np.maximum(V_swap_T0, 0.0)

    df_T0_paths = disc[:, idx_T0]
    Y = payoff * df_T0_paths
    return np.mean(Y)


def estimate_hw_cv_statistics(
    K,
    exercise_indices,
    pay_indices,
    T,
    r0,
    a,
    sigma,
    n_paths,
    n_steps,
    theta_func=None,
    seed=None,
):
    """
    Optional diagnostics: estimates rho, rho^2, beta* on a large pilot sample.
    Not used for CV pricing when beta is estimated per run.
    """
    t_grid, r_paths = simulate_hull_white_paths(
        n_paths=n_paths,
        n_steps=n_steps,
        T=T,
        r0=r0,
        a=a,
        sigma=sigma,
        theta_func=theta_func,
        use_sobol=False,
        antithetic=False,
        seed=seed,
    )
    dt = t_grid[1] - t_grid[0]
    disc = compute_discount_factors(r_paths, dt)

    X = bermudan_pathwise_pv(K, exercise_indices, pay_indices, t_grid, r_paths)

    idx_T0 = exercise_indices[0]
    V_swap_T0 = swap_value_on_paths(disc, t_grid, K, pay_indices, idx_T0)
    Y = np.maximum(V_swap_T0, 0.0) * disc[:, idx_T0]

    var_X = np.var(X, ddof=1)
    var_Y = np.var(Y, ddof=1)
    cov_XY = np.cov(X, Y, ddof=1)[0, 1]

    rho_hat = cov_XY / np.sqrt(var_X * var_Y)
    rho2_hat = rho_hat ** 2
    beta_star = cov_XY / var_Y if var_Y > 0 else 0.0
    vrf_theoretical = 1.0 / (1.0 - rho2_hat) if rho2_hat < 1.0 else np.inf
    return rho_hat, rho2_hat, beta_star, vrf_theoretical


# =======================================
# 5. Pricing wrappers
# =======================================

def price_bermudan_standard_mc(
    K, exercise_indices, pay_indices, T, r0, a, sigma, n_paths, n_steps,
    theta_func=None, seed=None
):
    t_grid, r_paths = simulate_hull_white_paths(
        n_paths=n_paths, n_steps=n_steps, T=T, r0=r0, a=a, sigma=sigma,
        theta_func=theta_func, use_sobol=False, antithetic=False, seed=seed
    )
    return price_bermudan_swaption_lsm(K, exercise_indices, pay_indices, t_grid, r_paths)


def price_bermudan_antithetic(
    K, exercise_indices, pay_indices, T, r0, a, sigma, n_paths, n_steps,
    theta_func=None, seed=None
):
    t_grid, r_paths = simulate_hull_white_paths(
        n_paths=n_paths, n_steps=n_steps, T=T, r0=r0, a=a, sigma=sigma,
        theta_func=theta_func, use_sobol=False, antithetic=True, seed=seed
    )
    return price_bermudan_swaption_lsm(K, exercise_indices, pay_indices, t_grid, r_paths)


def price_bermudan_sobol_qmc(
    K, exercise_indices, pay_indices, T, r0, a, sigma, n_paths, n_steps,
    theta_func=None, seed=None
):
    t_grid, r_paths = simulate_hull_white_paths(
        n_paths=n_paths, n_steps=n_steps, T=T, r0=r0, a=a, sigma=sigma,
        theta_func=theta_func, use_sobol=True, antithetic=False, seed=seed
    )
    return price_bermudan_swaption_lsm(K, exercise_indices, pay_indices, t_grid, r_paths)


def price_bermudan_with_cv_hw_dynamic_beta(
    K,
    exercise_indices,
    pay_indices,
    T,
    r0,
    a,
    sigma,
    n_paths,
    n_steps,
    mu_Y,
    theta_func=None,
    seed=None,
    beta_split=0.25,
):
    """
    Returns (price_cv, beta_hat) so beta can be shown for each run/seed.
    """
    t_grid, r_paths = simulate_hull_white_paths(
        n_paths=n_paths,
        n_steps=n_steps,
        T=T,
        r0=r0,
        a=a,
        sigma=sigma,
        theta_func=theta_func,
        use_sobol=False,
        antithetic=False,
        seed=seed,
    )
    dt = t_grid[1] - t_grid[0]
    disc = compute_discount_factors(r_paths, dt)

    X = bermudan_pathwise_pv(K, exercise_indices, pay_indices, t_grid, r_paths)

    idx_T0 = exercise_indices[0]
    V_swap_T0 = swap_value_on_paths(disc, t_grid, K, pay_indices, idx_T0)
    Y = np.maximum(V_swap_T0, 0.0) * disc[:, idx_T0]

    n_beta = int(np.ceil(beta_split * n_paths))
    n_beta = max(20, min(n_beta, n_paths - 20))

    Xb, Yb = X[:n_beta], Y[:n_beta]
    Xa, Ya = X[n_beta:], Y[n_beta:]

    var_Yb = np.var(Yb, ddof=1)
    if var_Yb <= 0.0 or not np.isfinite(var_Yb):
        beta_hat = 0.0
    else:
        beta_hat = np.cov(Xb, Yb, ddof=1)[0, 1] / var_Yb

    price_mc = np.mean(Xa)
    Y_bar = np.mean(Ya)
    price_cv = price_mc + beta_hat * (mu_Y - Y_bar)

    return price_cv, beta_hat


# =======================================
# 6. Variance estimation helpers (with timing)
# =======================================

def estimate_variance_for_method(method, M, R, base_kwargs, collect_beta=False):
    """
    Run method R times with different seeds.
    If collect_beta=True, the method must return (price, beta).
    """
    prices = np.zeros(R)
    betas = np.zeros(R) if collect_beta else None

    start = time.perf_counter()
    for r in range(R):
        seed = 1000 + r
        kwargs = base_kwargs.copy()
        kwargs.update({"n_paths": M, "seed": seed})

        if collect_beta:
            prices[r], betas[r] = method(**kwargs)
        else:
            prices[r] = method(**kwargs)

    elapsed = time.perf_counter() - start
    mean = np.mean(prices)
    var = np.var(prices, ddof=1)
    se = np.sqrt(var)

    if collect_beta:
        return mean, var, se, elapsed, betas
    return mean, var, se, elapsed


def estimate_variances_all_methods(
    M,
    R,
    K,
    exercise_indices,
    pay_indices,
    T,
    r0,
    a,
    sigma,
    n_steps,
    mu_Y,
    theta_func=None,
):
    base_common = dict(
        K=K,
        exercise_indices=exercise_indices,
        pay_indices=pay_indices,
        T=T,
        r0=r0,
        a=a,
        sigma=sigma,
        n_steps=n_steps,
        theta_func=theta_func,
    )

    mean_std, var_std, se_std, t_std = estimate_variance_for_method(
        price_bermudan_standard_mc, M, R, base_common
    )

    mean_anti, var_anti, se_anti, t_anti = estimate_variance_for_method(
        price_bermudan_antithetic, M, R, base_common
    )

    mean_sob, var_sob, se_sob, t_sob = estimate_variance_for_method(
        price_bermudan_sobol_qmc, M, R, base_common
    )

    base_cv = base_common.copy()
    base_cv.update(dict(mu_Y=mu_Y))
    mean_cv, var_cv, se_cv, t_cv, betas_cv = estimate_variance_for_method(
        price_bermudan_with_cv_hw_dynamic_beta, M, R, base_cv, collect_beta=True
    )

    return {
        "standard":   (mean_std,  var_std,  se_std, t_std),
        "antithetic": (mean_anti, var_anti, se_anti, t_anti),
        "sobol":      (mean_sob,  var_sob,  se_sob, t_sob),
        "control":    (mean_cv,   var_cv,   se_cv,  t_cv, betas_cv),
    }




In [8]:
# =======================================
# 7. Main experiment
# =======================================

if __name__ == "__main__":
    # Contract and model
    T_swaption = 5.0
    T_swap_maturity = 10.0
    dt = 1.0 / 52.0
    n_steps = int(T_swap_maturity / dt)

    r0 = 0.02
    a = 0.1
    sigma = 0.01
    K = 0.02

    t_grid = np.linspace(0.0, T_swap_maturity, n_steps + 1)
    exercise_times = np.arange(T_swaption, T_swap_maturity + 1e-12, 0.25)
    exercise_indices = [int(round(t / dt)) for t in exercise_times]
    pay_indices = exercise_indices

    # --- European HW price mu_Y (high-precision MC) ---
    n_paths_eur = 200_000
    seed_eur = 42
    mu_Y = price_european_swaption_hw_mc(
        K=K,
        exercise_index=exercise_indices[0],
        pay_indices=pay_indices,
        T=T_swap_maturity,
        r0=r0,
        a=a,
        sigma=sigma,
        n_paths=n_paths_eur,
        n_steps=n_steps,
        theta_func=None,
        seed=seed_eur,
    )
    print(f"European payer swaption HW price (mu_Y) ≈ {mu_Y:.6f}\n")

    # Optional diagnostics: large-sample rho and beta* (not used for pricing now)
    n_paths_stat = 200_000
    rho_hat, rho2_hat, beta_star, vrf_theoretical = estimate_hw_cv_statistics(
        K=K,
        exercise_indices=exercise_indices,
        pay_indices=pay_indices,
        T=T_swap_maturity,
        r0=r0,
        a=a,
        sigma=sigma,
        n_paths=n_paths_stat,
        n_steps=n_steps,
        theta_func=None,
        seed=123,
    )
    theoretical_red = 100.0 * rho2_hat
    print(f"Hull–White CV correlation ρ ≈ {rho_hat:.4f}")
    print(f"ρ² ≈ {rho2_hat:.4f}  (theoretical fraction of variance removed)")
    print(f"Pilot β* ≈ {beta_star:.4f}")
    print(f"Theoretical VRF (optimal β, pilot) ≈ {vrf_theoretical:.2f}")
    print(f"Theoretical variance reduction (pilot) ≈ {theoretical_red:.1f}%\n")

    # --- Variance comparison ---
    Ms = [4_096, 8_192, 16_384, 32_768, 65_536, 131_072]
    R = 30

    for M in Ms:
        res = estimate_variances_all_methods(
            M=M,
            R=R,
            K=K,
            exercise_indices=exercise_indices,
            pay_indices=pay_indices,
            T=T_swap_maturity,
            r0=r0,
            a=a,
            sigma=sigma,
            n_steps=n_steps,
            mu_Y=mu_Y,
            theta_func=None,
        )

        mean_std, var_std, se_std, t_std = res["standard"]
        mean_anti, var_anti, se_anti, t_anti = res["antithetic"]
        mean_sob, var_sob, se_sob, t_sob = res["sobol"]
        mean_cv, var_cv, se_cv, t_cv, betas_cv = res["control"]

        vrf_anti = var_std / var_anti
        vrf_sob  = var_std / var_sob
        vrf_cv   = var_std / var_cv

        red_anti = 100.0 * (1.0 - 1.0 / vrf_anti)
        red_sob  = 100.0 * (1.0 - 1.0 / vrf_sob)
        red_cv   = 100.0 * (1.0 - 1.0 / vrf_cv)

        print(f"M = {M:6d}")
        print(f"  Standard MC      : mean={mean_std:.6f},  SE={se_std:.6f},  "
              f"time={t_std:.2f}s")
        print(f"  Antithetic MC    : mean={mean_anti:.6f}, SE={se_anti:.6f}, "
              f"VRF={vrf_anti:.2f}, variance red.={red_anti:.1f}%, "
              f"time={t_anti:.2f}s")
        print(f"  Sobol QMC        : mean={mean_sob:.6f},  SE={se_sob:.6f},  "
              f"VRF={vrf_sob:.2f}, variance red.={red_sob:.1f}%, "
              f"time={t_sob:.2f}s")

        print(f"  HW CV (β per run): mean={mean_cv:.6f},   SE={se_cv:.6f},   "
              f"VRF={vrf_cv:.2f}, variance red.={red_cv:.1f}%, "
              f"time={t_cv:.2f}s")

        # Show beta values for this M (30 seeds)
        print(f"    β summary: mean={betas_cv.mean():.4f}, std={betas_cv.std(ddof=1):.4f}, "
              f"min={betas_cv.min():.4f}, max={betas_cv.max():.4f}")
        print(f"    β values : {np.array2string(betas_cv, precision=4, separator=', ')}")
        print()


European payer swaption HW price (mu_Y) ≈ 0.027921

Hull–White CV correlation ρ ≈ 0.8649
ρ² ≈ 0.7481  (theoretical fraction of variance removed)
Pilot β* ≈ 1.2478
Theoretical VRF (optimal β, pilot) ≈ 3.97
Theoretical variance reduction (pilot) ≈ 74.8%

M =   4096
  Standard MC      : mean=0.085009,  SE=0.000928,  time=8.16s
  Antithetic MC    : mean=0.084941, SE=0.000391, VRF=5.62, variance red.=82.2%, time=7.78s
  Sobol QMC        : mean=0.084930,  SE=0.000304,  VRF=9.30, variance red.=89.2%, time=11.93s
  HW CV (β per run): mean=0.085233,   SE=0.000644,   VRF=2.08, variance red.=51.8%, time=8.95s
    β summary: mean=1.2605, std=0.0200, min=1.2083, max=1.2939
    β values : [1.2891, 1.2884, 1.271 , 1.2778, 1.2632, 1.2939, 1.2187, 1.2777, 1.252 ,
 1.2598, 1.2747, 1.263 , 1.2531, 1.2754, 1.2675, 1.2304, 1.2083, 1.2574,
 1.2807, 1.2671, 1.2636, 1.2476, 1.2581, 1.2392, 1.2362, 1.2721, 1.2558,
 1.2713, 1.2493, 1.252 ]

M =   8192
  Standard MC      : mean=0.084838,  SE=0.000652,  time=13.1

In [10]:
import numpy as np
from scipy.stats import norm, qmc
import time

# ==========================================================
# 1) Hull–White short-rate path simulator (same as yours)
# ==========================================================

def simulate_hull_white_paths(
    n_paths,
    n_steps,
    T,
    r0,
    a,
    sigma,
    theta_func=None,
    use_sobol=False,
    antithetic=False,
    seed=None,
):
    """
    Simulate short-rate paths under one-factor Hull–White (Euler):
        dr_t = [theta(t) - a r_t] dt + sigma dW_t
    """
    dt = T / n_steps
    t_grid = np.linspace(0.0, T, n_steps + 1)

    if theta_func is None:
        def theta_func(t):
            return a * r0

    if use_sobol:
        sobol_engine = qmc.Sobol(d=n_steps, scramble=True, seed=seed)
        u = sobol_engine.random(n_paths)
        eps = np.finfo(float).eps
        u = np.clip(u, eps, 1 - eps)
        Z = norm.ppf(u)
    else:
        rng = np.random.default_rng(seed)
        if antithetic:
            n_half = (n_paths + 1) // 2
            Z_half = rng.standard_normal(size=(n_half, n_steps))
            Z = np.vstack([Z_half, -Z_half])[:n_paths, :]
        else:
            Z = rng.standard_normal(size=(n_paths, n_steps))

    r_paths = np.zeros((n_paths, n_steps + 1))
    r_paths[:, 0] = r0

    for i in range(n_steps):
        t = t_grid[i]
        theta_t = theta_func(t)
        r_t = r_paths[:, i]
        dr = (theta_t - a * r_t) * dt + sigma * np.sqrt(dt) * Z[:, i]
        r_paths[:, i + 1] = r_t + dr

    return t_grid, r_paths


# ==========================================================
# 2) Discount factors and swap primitives (same as yours)
# ==========================================================

def compute_discount_factors(r_paths, dt):
    integrals = np.cumsum(r_paths[:, :-1] * dt, axis=1)
    integrals = np.hstack([np.zeros((r_paths.shape[0], 1)), integrals])
    disc = np.exp(-integrals)
    return disc


def bond_price_on_paths(disc, idx_t, idx_T):
    return disc[:, idx_T] / disc[:, idx_t]


def swap_value_on_paths(disc, t_grid, K, pay_indices, idx_t):
    """
    Payer swap value at time index idx_t:
      V_swap(t) = P(t,T0) - P(t,TN) - K * sum_i Delta_i P(t,Ti)
    """
    pay_indices = np.asarray(pay_indices, dtype=int)
    idx_T0 = pay_indices[0]
    idx_TN = pay_indices[-1]

    P_tT0 = bond_price_on_paths(disc, idx_t, idx_T0)
    P_tTN = bond_price_on_paths(disc, idx_t, idx_TN)
    P_tTi = disc[:, pay_indices] / disc[:, [idx_t]]

    T_pay = t_grid[pay_indices]
    Delta = np.empty_like(T_pay)
    Delta[0] = T_pay[0] - t_grid[idx_t]
    Delta[1:] = T_pay[1:] - T_pay[:-1]

    fixed_leg = K * np.dot(P_tTi, Delta)
    V_swap = P_tT0 - P_tTN - fixed_leg
    return V_swap


def forward_swap_rate_on_paths(disc, t_grid, pay_indices, idx_t):
    """
    Forward par swap rate at time index idx_t.
    """
    pay_indices = np.asarray(pay_indices, dtype=int)
    idx_T0 = pay_indices[0]
    idx_TN = pay_indices[-1]

    P_tT0 = bond_price_on_paths(disc, idx_t, idx_T0)
    P_tTN = bond_price_on_paths(disc, idx_t, idx_TN)
    P_tTi = disc[:, pay_indices] / disc[:, [idx_t]]

    T_pay = t_grid[pay_indices]
    Delta = np.empty_like(T_pay)
    Delta[0] = T_pay[0] - t_grid[idx_t]
    Delta[1:] = T_pay[1:] - T_pay[:-1]

    denom = np.dot(P_tTi, Delta)
    denom = np.where(denom == 0.0, np.nan, denom)
    F = (P_tT0 - P_tTN) / denom
    return F


# ==========================================================
# 3) European swaption under Hull–White (MC) (same as yours)
# ==========================================================

def price_european_swaption_hw_mc(
    K,
    exercise_index,
    pay_indices,
    T,
    r0,
    a,
    sigma,
    n_paths,
    n_steps,
    theta_func=None,
    seed=None,
):
    t_grid, r_paths = simulate_hull_white_paths(
        n_paths=n_paths, n_steps=n_steps, T=T, r0=r0, a=a, sigma=sigma,
        theta_func=theta_func, use_sobol=False, antithetic=False, seed=seed
    )
    dt = t_grid[1] - t_grid[0]
    disc = compute_discount_factors(r_paths, dt)

    idx_T0 = exercise_index
    V_swap_T0 = swap_value_on_paths(disc, t_grid, K, pay_indices, idx_T0)
    payoff = np.maximum(V_swap_T0, 0.0)

    Y = payoff * disc[:, idx_T0]
    return float(np.mean(Y))


# ==========================================================
# 4) LSM policy training (TRAIN SET ONLY)
# ==========================================================

def fit_lsm_policy_coeffs(
    K,
    exercise_indices,
    pay_indices,
    t_grid,
    r_paths_train,
):
    """
    Fit continuation-value regressions on a training sample only.
    Returns coefficients b_j for each exercise date j (except last).
    Basis: [1, S, S^2], with S = forward swap rate.
    """
    dt = t_grid[1] - t_grid[0]
    disc = compute_discount_factors(r_paths_train, dt)

    exercise_indices = np.asarray(exercise_indices, dtype=int)
    pay_indices = np.asarray(pay_indices, dtype=int)

    n_paths = r_paths_train.shape[0]
    n_ex = len(exercise_indices)

    # Precompute swap value and swap rate on training paths
    swap_values = np.zeros((n_paths, n_ex))
    swap_rates = np.zeros((n_paths, n_ex))
    for j, idx_t in enumerate(exercise_indices):
        swap_values[:, j] = swap_value_on_paths(disc, t_grid, K, pay_indices, idx_t)
        swap_rates[:, j] = forward_swap_rate_on_paths(disc, t_grid, pay_indices, idx_t)

    payoffs = np.maximum(swap_values, 0.0)

    cashflow = np.zeros(n_paths)
    exercise_time_idx = np.full(n_paths, exercise_indices[-1], dtype=int)

    # Last exercise date
    last_idx = exercise_indices[-1]
    last_pos = n_ex - 1
    ex_now = payoffs[:, last_pos] > 0.0
    cashflow[ex_now] = payoffs[ex_now, last_pos]
    exercise_time_idx[ex_now] = last_idx

    coeffs = [None] * n_ex  # coeffs[last_pos] remains None

    # Backward induction: fit continuation regression at each date
    for ex_pos in range(n_ex - 2, -1, -1):
        idx_t = exercise_indices[ex_pos]
        alive = exercise_time_idx > idx_t
        if not np.any(alive):
            coeffs[ex_pos] = None
            continue

        alive_idx = np.where(alive)[0]
        df_ex = disc[alive_idx, exercise_time_idx[alive_idx]]
        df_t = disc[alive_idx, idx_t]
        cont_values = cashflow[alive_idx] * df_ex / df_t

        itm = payoffs[alive_idx, ex_pos] > 0.0
        if np.sum(itm) >= 3:
            Xr = swap_rates[alive_idx, ex_pos][itm]
            Yr = cont_values[itm]
            A = np.vstack([np.ones_like(Xr), Xr, Xr**2]).T
            b, *_ = np.linalg.lstsq(A, Yr, rcond=None)
            coeffs[ex_pos] = b

            X_all = swap_rates[alive_idx, ex_pos]
            A_all = np.vstack([np.ones_like(X_all), X_all, X_all**2]).T
            C_hat = A_all @ b
        else:
            coeffs[ex_pos] = None
            C_hat = np.full_like(cont_values, np.mean(cont_values) if cont_values.size else 0.0)

        immediate = payoffs[alive_idx, ex_pos]
        exercise = immediate > C_hat

        ex_paths = alive_idx[exercise]
        cashflow[ex_paths] = immediate[exercise]
        exercise_time_idx[ex_paths] = idx_t

    return coeffs


# ==========================================================
# 5) Policy evaluation (PRICE SET ONLY) -> pathwise PVs X_i
# ==========================================================

def evaluate_policy_pathwise_pv(
    K,
    exercise_indices,
    pay_indices,
    t_grid,
    r_paths_price,
    coeffs,
):
    """
    Apply a FIXED policy (coeffs from training) on pricing paths.
    Returns pathwise discounted PVs X_i (this is your textbook MC sample).
    """
    dt = t_grid[1] - t_grid[0]
    disc = compute_discount_factors(r_paths_price, dt)

    exercise_indices = np.asarray(exercise_indices, dtype=int)
    pay_indices = np.asarray(pay_indices, dtype=int)

    n_paths = r_paths_price.shape[0]
    n_ex = len(exercise_indices)

    # Precompute swap values/rates on pricing paths
    swap_values = np.zeros((n_paths, n_ex))
    swap_rates = np.zeros((n_paths, n_ex))
    for j, idx_t in enumerate(exercise_indices):
        swap_values[:, j] = swap_value_on_paths(disc, t_grid, K, pay_indices, idx_t)
        swap_rates[:, j] = forward_swap_rate_on_paths(disc, t_grid, pay_indices, idx_t)

    payoffs = np.maximum(swap_values, 0.0)

    cashflow = np.zeros(n_paths)
    exercise_time_idx = np.full(n_paths, exercise_indices[-1], dtype=int)

    # Last date
    last_idx = exercise_indices[-1]
    last_pos = n_ex - 1
    ex_now = payoffs[:, last_pos] > 0.0
    cashflow[ex_now] = payoffs[ex_now, last_pos]
    exercise_time_idx[ex_now] = last_idx

    for ex_pos in range(n_ex - 2, -1, -1):
        idx_t = exercise_indices[ex_pos]
        alive = exercise_time_idx > idx_t
        if not np.any(alive):
            continue

        alive_idx = np.where(alive)[0]
        df_ex = disc[alive_idx, exercise_time_idx[alive_idx]]
        df_t = disc[alive_idx, idx_t]
        cont_values = cashflow[alive_idx] * df_ex / df_t

        immediate = payoffs[alive_idx, ex_pos]

        b = coeffs[ex_pos]
        if b is None:
            C_hat = np.full_like(cont_values, np.mean(cont_values) if cont_values.size else 0.0)
        else:
            S = swap_rates[alive_idx, ex_pos]
            A = np.vstack([np.ones_like(S), S, S**2]).T
            C_hat = A @ b

        exercise = immediate > C_hat
        ex_paths = alive_idx[exercise]
        cashflow[ex_paths] = immediate[exercise]
        exercise_time_idx[ex_paths] = idx_t

    X = cashflow * disc[np.arange(n_paths), exercise_time_idx]
    return X


def european_pathwise_pv_on_price_paths(
    K,
    exercise_indices,
    pay_indices,
    t_grid,
    r_paths_price,
):
    """
    Compute European swaption pathwise PVs Y_i on the pricing paths.
    """
    dt = t_grid[1] - t_grid[0]
    disc = compute_discount_factors(r_paths_price, dt)

    idx_T0 = int(exercise_indices[0])
    V_swap_T0 = swap_value_on_paths(disc, t_grid, K, pay_indices, idx_T0)
    payoff = np.maximum(V_swap_T0, 0.0)
    Y = payoff * disc[:, idx_T0]
    return Y


# ==========================================================
# 6) Pricing set generator for method comparisons
# ==========================================================

def simulate_pricing_paths(
    n_paths,
    n_steps,
    T,
    r0,
    a,
    sigma,
    theta_func=None,
    method="standard",  # "standard" | "antithetic" | "sobol"
    seed=None,
):
    use_sobol = (method == "sobol")
    antithetic = (method == "antithetic")
    return simulate_hull_white_paths(
        n_paths=n_paths, n_steps=n_steps, T=T, r0=r0, a=a, sigma=sigma,
        theta_func=theta_func, use_sobol=use_sobol, antithetic=antithetic, seed=seed
    )


def mean_and_within_se(samples):
    """
    Textbook within-run SE for a sample mean: sd(samples)/sqrt(M).
    """
    s = np.asarray(samples, dtype=float)
    m = float(np.mean(s))
    se = float(np.std(s, ddof=1) / np.sqrt(s.size))
    return m, se


# ==========================================================
# 7) ONE RUN: train policy, simulate pricing paths, compute estimators
# ==========================================================

def one_run_train_price(
    *,
    K,
    exercise_indices,
    pay_indices,
    T,
    r0,
    a,
    sigma,
    n_steps,
    n_train,
    n_price,
    mu_Y,
    beta_fixed=None,         # if None: estimate beta on pricing set
    theta_func=None,
    seed=None,
    pricing_method="standard"  # "standard" | "antithetic" | "sobol"
):
    """
    Returns a dictionary with:
      - price_std, se_std_within
      - price_cv,  se_cv_within, beta_used
    where estimators are textbook sample means over pricing paths.
    """
    # ---- Train policy (always pseudo-random for clarity / stability) ----
    t_grid, r_train = simulate_hull_white_paths(
        n_paths=n_train, n_steps=n_steps, T=T, r0=r0, a=a, sigma=sigma,
        theta_func=theta_func, use_sobol=False, antithetic=False,
        seed=None if seed is None else seed + 999_999
    )
    coeffs = fit_lsm_policy_coeffs(K, exercise_indices, pay_indices, t_grid, r_train)

    # ---- Pricing paths (method-specific) ----
    t_grid, r_price = simulate_pricing_paths(
        n_paths=n_price, n_steps=n_steps, T=T, r0=r0, a=a, sigma=sigma,
        theta_func=theta_func, method=pricing_method, seed=seed
    )

    # Pathwise Bermudan PVs X_i using fixed policy
    X = evaluate_policy_pathwise_pv(K, exercise_indices, pay_indices, t_grid, r_price, coeffs)
    price_std, se_std_within = mean_and_within_se(X)

    # Pathwise European PVs Y_i (same pricing paths)
    Y = european_pathwise_pv_on_price_paths(K, exercise_indices, pay_indices, t_grid, r_price)

    # Beta choice for classical CV
    if beta_fixed is None:
        varY = np.var(Y, ddof=1)
        beta_used = 0.0 if varY <= 0 or not np.isfinite(varY) else (np.cov(X, Y, ddof=1)[0, 1] / varY)
    else:
        beta_used = float(beta_fixed)

    Z = X + beta_used * (mu_Y - Y)   # pathwise classical CV samples
    price_cv, se_cv_within = mean_and_within_se(Z)

    return {
        "price_std": price_std,
        "se_std_within": se_std_within,
        "price_cv": price_cv,
        "se_cv_within": se_cv_within,
        "beta_used": beta_used,
    }


# ==========================================================
# 8) Benchmark harness (apples-to-apples)
# ==========================================================

def benchmark_apples_to_apples(
    *,
    Ms,
    R,
    K,
    exercise_indices,
    pay_indices,
    T,
    r0,
    a,
    sigma,
    n_steps,
    n_train,
    mu_Y,
    beta_fixed=None,     # set for "textbook fixed beta"; or None for per-run beta
    theta_func=None,
):
    """
    For each M, compare:
      - Standard (policy-eval) with pseudo-random pricing paths
      - Antithetic (policy-eval) with antithetic pricing paths
      - Sobol QMC (policy-eval) with scrambled Sobol pricing paths
      - Classical CV (policy-eval + pathwise CV) for each pricing method
        (here CV is shown under the same pricing generator as standard;
         you can also run CV on antithetic/sobol by changing pricing_method)
    """
    methods = ["standard", "antithetic", "sobol"]

    for M in Ms:
        print(f"\nM (pricing paths) = {M:,} | training paths = {n_train:,} | R = {R}")

        # Collect across-run results per pricing generator
        results = {}

        for pm in methods:
            prices_std = np.zeros(R)
            prices_cv  = np.zeros(R)
            se_within_std = np.zeros(R)
            se_within_cv  = np.zeros(R)
            betas = np.zeros(R)

            start = time.perf_counter()
            for r in range(R):
                seed = 1000 + r
                out = one_run_train_price(
                    K=K,
                    exercise_indices=exercise_indices,
                    pay_indices=pay_indices,
                    T=T,
                    r0=r0,
                    a=a,
                    sigma=sigma,
                    n_steps=n_steps,
                    n_train=n_train,
                    n_price=M,
                    mu_Y=mu_Y,
                    beta_fixed=beta_fixed,
                    theta_func=theta_func,
                    seed=seed,
                    pricing_method=pm,
                )
                prices_std[r] = out["price_std"]
                prices_cv[r]  = out["price_cv"]
                se_within_std[r] = out["se_std_within"]
                se_within_cv[r]  = out["se_cv_within"]
                betas[r] = out["beta_used"]

            elapsed = time.perf_counter() - start

            results[pm] = {
                "std_mean": float(np.mean(prices_std)),
                "std_se_across": float(np.std(prices_std, ddof=1)),
                "std_se_within_avg": float(np.mean(se_within_std)),
                "std_var_across": float(np.var(prices_std, ddof=1)),

                "cv_mean": float(np.mean(prices_cv)),
                "cv_se_across": float(np.std(prices_cv, ddof=1)),
                "cv_se_within_avg": float(np.mean(se_within_cv)),
                "cv_var_across": float(np.var(prices_cv, ddof=1)),

                "beta_mean": float(np.mean(betas)),
                "beta_std": float(np.std(betas, ddof=1)),
                "time": elapsed,
            }

        # Print: for each pricing generator, show standard and CV, plus VRF within that generator
        for pm in methods:
            row = results[pm]
            base_var = row["std_var_across"]
            vrf_cv = base_var / row["cv_var_across"] if row["cv_var_across"] > 0 else np.nan
            red_cv = 100.0 * (1.0 - 1.0 / vrf_cv) if np.isfinite(vrf_cv) and vrf_cv > 0 else np.nan

            label = {"standard": "Pseudo-random", "antithetic": "Antithetic", "sobol": "Scrambled Sobol"}[pm]

            print(f"  Pricing generator: {label}")
            print(f"    Standard mean={row['std_mean']:.6f} | "
                  f"SE_within≈{row['std_se_within_avg']:.6f} | SE_across={row['std_se_across']:.6f}")
            print(f"    Classical CV mean={row['cv_mean']:.6f} | "
                  f"SE_within≈{row['cv_se_within_avg']:.6f} | SE_across={row['cv_se_across']:.6f} | "
                  f"VRF={vrf_cv:.2f} (red={red_cv:.1f}%)")
            print(f"    beta: mean={row['beta_mean']:.4f}, std={row['beta_std']:.4f} | time={row['time']:.2f}s")


# ==========================================================
# 9) Main experiment (textbook-style apples-to-apples)
# ==========================================================

if __name__ == "__main__":
    # Contract and model
    T_swaption = 5.0
    T_swap_maturity = 10.0
    dt = 1.0 / 52.0
    n_steps = int(T_swap_maturity / dt)

    r0 = 0.02
    a = 0.1
    sigma = 0.01
    K = 0.02

    t_grid = np.linspace(0.0, T_swap_maturity, n_steps + 1)
    exercise_times = np.arange(T_swaption, T_swap_maturity + 1e-12, 0.25)
    exercise_indices = [int(round(t / dt)) for t in exercise_times]
    pay_indices = exercise_indices

    # European expectation mu_Y for control variate (high-precision MC)
    mu_Y = price_european_swaption_hw_mc(
        K=K,
        exercise_index=exercise_indices[0],
        pay_indices=pay_indices,
        T=T_swap_maturity,
        r0=r0,
        a=a,
        sigma=sigma,
        n_paths=200_000,
        n_steps=n_steps,
        theta_func=None,
        seed=42,
    )
    print(f"European payer swaption HW price (mu_Y) ≈ {mu_Y:.6f}")

    # If you want "most textbook": fix beta once (pilot), then reuse beta_fixed for all runs.
    # Easiest pilot: estimate beta on one big pricing sample using trained policy.
    # Set beta_fixed=None to estimate beta each run (less textbook, still fine).
    beta_fixed = None

    Ms = [4_096, 8_192, 16_384, 32_768, 65_536, 131_072]
    R = 30
    n_train = 20_000  # fixed training size for fairness

    benchmark_apples_to_apples(
        Ms=Ms,
        R=R,
        K=K,
        exercise_indices=exercise_indices,
        pay_indices=pay_indices,
        T=T_swap_maturity,
        r0=r0,
        a=a,
        sigma=sigma,
        n_steps=n_steps,
        n_train=n_train,
        mu_Y=mu_Y,
        beta_fixed=beta_fixed,
        theta_func=None,
    )


European payer swaption HW price (mu_Y) ≈ 0.027921

M (pricing paths) = 4,096 | training paths = 20,000 | R = 30
  Pricing generator: Pseudo-random
    Standard mean=0.084790 | SE_within≈0.000882 | SE_across=0.000841
    Classical CV mean=0.085056 | SE_within≈0.000441 | SE_across=0.000495 | VRF=2.89 (red=65.4%)
    beta: mean=1.2513, std=0.0099 | time=33.56s
  Pricing generator: Antithetic
    Standard mean=0.084794 | SE_within≈0.000880 | SE_across=0.000282
    Classical CV mean=0.085182 | SE_within≈0.000441 | SE_across=0.000560 | VRF=0.25 (red=-293.8%)
    beta: mean=1.2500, std=0.0094 | time=31.88s
  Pricing generator: Scrambled Sobol
    Standard mean=0.084725 | SE_within≈0.000883 | SE_across=0.000174
    Classical CV mean=0.085015 | SE_within≈0.000441 | SE_across=0.000310 | VRF=0.32 (red=-215.6%)
    beta: mean=1.2518, std=0.0092 | time=39.83s

M (pricing paths) = 8,192 | training paths = 20,000 | R = 30
  Pricing generator: Pseudo-random
    Standard mean=0.084727 | SE_within≈0.00

In [11]:
import numpy as np
from scipy.stats import norm, qmc
import time

# ==========================================================
# 1) Hull–White short-rate path simulator
# ==========================================================

def simulate_hull_white_paths(
    n_paths,
    n_steps,
    T,
    r0,
    a,
    sigma,
    theta_func=None,
    use_sobol=False,
    antithetic=False,
    seed=None,
):
    """
    Simulate short-rate paths under one-factor Hull–White (Euler):
        dr_t = [theta(t) - a r_t] dt + sigma dW_t
    """
    dt = T / n_steps
    t_grid = np.linspace(0.0, T, n_steps + 1)

    if theta_func is None:
        def theta_func(t):
            return a * r0

    if use_sobol:
        sobol_engine = qmc.Sobol(d=n_steps, scramble=True, seed=seed)
        u = sobol_engine.random(n_paths)
        eps = np.finfo(float).eps
        u = np.clip(u, eps, 1 - eps)
        Z = norm.ppf(u)
    else:
        rng = np.random.default_rng(seed)
        if antithetic:
            n_half = (n_paths + 1) // 2
            Z_half = rng.standard_normal(size=(n_half, n_steps))
            Z = np.vstack([Z_half, -Z_half])[:n_paths, :]
        else:
            Z = rng.standard_normal(size=(n_paths, n_steps))

    r_paths = np.zeros((n_paths, n_steps + 1))
    r_paths[:, 0] = r0

    for i in range(n_steps):
        t = t_grid[i]
        theta_t = theta_func(t)
        r_t = r_paths[:, i]
        dr = (theta_t - a * r_t) * dt + sigma * np.sqrt(dt) * Z[:, i]
        r_paths[:, i + 1] = r_t + dr

    return t_grid, r_paths


# ==========================================================
# 2) Discount factors and swap primitives
# ==========================================================

def compute_discount_factors(r_paths, dt):
    integrals = np.cumsum(r_paths[:, :-1] * dt, axis=1)
    integrals = np.hstack([np.zeros((r_paths.shape[0], 1)), integrals])
    disc = np.exp(-integrals)
    return disc


def bond_price_on_paths(disc, idx_t, idx_T):
    return disc[:, idx_T] / disc[:, idx_t]


def swap_value_on_paths(disc, t_grid, K, pay_indices, idx_t):
    """
    Payer swap value at time index idx_t:
      V_swap(t) = P(t,T0) - P(t,TN) - K * sum_i Delta_i P(t,Ti)
    """
    pay_indices = np.asarray(pay_indices, dtype=int)
    idx_T0 = pay_indices[0]
    idx_TN = pay_indices[-1]

    P_tT0 = bond_price_on_paths(disc, idx_t, idx_T0)
    P_tTN = bond_price_on_paths(disc, idx_t, idx_TN)
    P_tTi = disc[:, pay_indices] / disc[:, [idx_t]]

    T_pay = t_grid[pay_indices]
    Delta = np.empty_like(T_pay)
    Delta[0] = T_pay[0] - t_grid[idx_t]
    Delta[1:] = T_pay[1:] - T_pay[:-1]

    fixed_leg = K * np.dot(P_tTi, Delta)
    V_swap = P_tT0 - P_tTN - fixed_leg
    return V_swap


def forward_swap_rate_on_paths(disc, t_grid, pay_indices, idx_t):
    """
    Forward par swap rate at time index idx_t.
    """
    pay_indices = np.asarray(pay_indices, dtype=int)
    idx_T0 = pay_indices[0]
    idx_TN = pay_indices[-1]

    P_tT0 = bond_price_on_paths(disc, idx_t, idx_T0)
    P_tTN = bond_price_on_paths(disc, idx_t, idx_TN)
    P_tTi = disc[:, pay_indices] / disc[:, [idx_t]]

    T_pay = t_grid[pay_indices]
    Delta = np.empty_like(T_pay)
    Delta[0] = T_pay[0] - t_grid[idx_t]
    Delta[1:] = T_pay[1:] - T_pay[:-1]

    denom = np.dot(P_tTi, Delta)
    denom = np.where(denom == 0.0, np.nan, denom)
    F = (P_tT0 - P_tTN) / denom
    return F


# ==========================================================
# 3) European swaption under Hull–White (MC)
# ==========================================================

def price_european_swaption_hw_mc(
    K,
    exercise_index,
    pay_indices,
    T,
    r0,
    a,
    sigma,
    n_paths,
    n_steps,
    theta_func=None,
    seed=None,
):
    t_grid, r_paths = simulate_hull_white_paths(
        n_paths=n_paths, n_steps=n_steps, T=T, r0=r0, a=a, sigma=sigma,
        theta_func=theta_func, use_sobol=False, antithetic=False, seed=seed
    )
    dt = t_grid[1] - t_grid[0]
    disc = compute_discount_factors(r_paths, dt)

    idx_T0 = exercise_index
    V_swap_T0 = swap_value_on_paths(disc, t_grid, K, pay_indices, idx_T0)
    payoff = np.maximum(V_swap_T0, 0.0)

    Y = payoff * disc[:, idx_T0]
    return float(np.mean(Y))


# ==========================================================
# 4) LSM policy training (TRAIN SET ONLY)
# ==========================================================

def fit_lsm_policy_coeffs(
    K,
    exercise_indices,
    pay_indices,
    t_grid,
    r_paths_train,
):
    """
    Fit continuation-value regressions on a training sample only.
    Returns coefficients b_j for each exercise date j (except last).
    Basis: [1, S, S^2], with S = forward swap rate.
    """
    dt = t_grid[1] - t_grid[0]
    disc = compute_discount_factors(r_paths_train, dt)

    exercise_indices = np.asarray(exercise_indices, dtype=int)
    pay_indices = np.asarray(pay_indices, dtype=int)

    n_paths = r_paths_train.shape[0]
    n_ex = len(exercise_indices)

    # Precompute swap value and swap rate on training paths
    swap_values = np.zeros((n_paths, n_ex))
    swap_rates = np.zeros((n_paths, n_ex))
    for j, idx_t in enumerate(exercise_indices):
        swap_values[:, j] = swap_value_on_paths(disc, t_grid, K, pay_indices, idx_t)
        swap_rates[:, j] = forward_swap_rate_on_paths(disc, t_grid, pay_indices, idx_t)

    payoffs = np.maximum(swap_values, 0.0)

    cashflow = np.zeros(n_paths)
    exercise_time_idx = np.full(n_paths, exercise_indices[-1], dtype=int)

    # Last exercise date
    last_idx = exercise_indices[-1]
    last_pos = n_ex - 1
    ex_now = payoffs[:, last_pos] > 0.0
    cashflow[ex_now] = payoffs[ex_now, last_pos]
    exercise_time_idx[ex_now] = last_idx

    coeffs = [None] * n_ex  # coeffs[last_pos] remains None

    # Backward induction: fit continuation regression at each date
    for ex_pos in range(n_ex - 2, -1, -1):
        idx_t = exercise_indices[ex_pos]
        alive = exercise_time_idx > idx_t
        if not np.any(alive):
            coeffs[ex_pos] = None
            continue

        alive_idx = np.where(alive)[0]
        df_ex = disc[alive_idx, exercise_time_idx[alive_idx]]
        df_t = disc[alive_idx, idx_t]
        cont_values = cashflow[alive_idx] * df_ex / df_t

        itm = payoffs[alive_idx, ex_pos] > 0.0
        if np.sum(itm) >= 3:
            Xr = swap_rates[alive_idx, ex_pos][itm]
            Yr = cont_values[itm]
            A = np.vstack([np.ones_like(Xr), Xr, Xr**2]).T
            b, *_ = np.linalg.lstsq(A, Yr, rcond=None)
            coeffs[ex_pos] = b

            X_all = swap_rates[alive_idx, ex_pos]
            A_all = np.vstack([np.ones_like(X_all), X_all, X_all**2]).T
            C_hat = A_all @ b
        else:
            coeffs[ex_pos] = None
            C_hat = np.full_like(cont_values, np.mean(cont_values) if cont_values.size else 0.0)

        immediate = payoffs[alive_idx, ex_pos]
        exercise = immediate > C_hat

        ex_paths = alive_idx[exercise]
        cashflow[ex_paths] = immediate[exercise]
        exercise_time_idx[ex_paths] = idx_t

    return coeffs


# ==========================================================
# 5) Policy evaluation (PRICE SET ONLY) -> pathwise PVs X_i
# ==========================================================

def evaluate_policy_pathwise_pv(
    K,
    exercise_indices,
    pay_indices,
    t_grid,
    r_paths_price,
    coeffs,
):
    """
    Apply a FIXED policy (coeffs from training) on pricing paths.
    Returns pathwise discounted PVs X_i.
    """
    dt = t_grid[1] - t_grid[0]
    disc = compute_discount_factors(r_paths_price, dt)

    exercise_indices = np.asarray(exercise_indices, dtype=int)
    pay_indices = np.asarray(pay_indices, dtype=int)

    n_paths = r_paths_price.shape[0]
    n_ex = len(exercise_indices)

    # Precompute swap values/rates on pricing paths
    swap_values = np.zeros((n_paths, n_ex))
    swap_rates = np.zeros((n_paths, n_ex))
    for j, idx_t in enumerate(exercise_indices):
        swap_values[:, j] = swap_value_on_paths(disc, t_grid, K, pay_indices, idx_t)
        swap_rates[:, j] = forward_swap_rate_on_paths(disc, t_grid, pay_indices, idx_t)

    payoffs = np.maximum(swap_values, 0.0)

    cashflow = np.zeros(n_paths)
    exercise_time_idx = np.full(n_paths, exercise_indices[-1], dtype=int)

    # Last date
    last_idx = exercise_indices[-1]
    last_pos = n_ex - 1
    ex_now = payoffs[:, last_pos] > 0.0
    cashflow[ex_now] = payoffs[ex_now, last_pos]
    exercise_time_idx[ex_now] = last_idx

    for ex_pos in range(n_ex - 2, -1, -1):
        idx_t = exercise_indices[ex_pos]
        alive = exercise_time_idx > idx_t
        if not np.any(alive):
            continue

        alive_idx = np.where(alive)[0]
        df_ex = disc[alive_idx, exercise_time_idx[alive_idx]]
        df_t = disc[alive_idx, idx_t]
        cont_values = cashflow[alive_idx] * df_ex / df_t

        immediate = payoffs[alive_idx, ex_pos]

        b = coeffs[ex_pos]
        if b is None:
            C_hat = np.full_like(cont_values, np.mean(cont_values) if cont_values.size else 0.0)
        else:
            S = swap_rates[alive_idx, ex_pos]
            A = np.vstack([np.ones_like(S), S, S**2]).T
            C_hat = A @ b

        exercise = immediate > C_hat
        ex_paths = alive_idx[exercise]
        cashflow[ex_paths] = immediate[exercise]
        exercise_time_idx[ex_paths] = idx_t

    X = cashflow * disc[np.arange(n_paths), exercise_time_idx]
    return X


def european_pathwise_pv_on_price_paths(
    K,
    exercise_indices,
    pay_indices,
    t_grid,
    r_paths_price,
):
    dt = t_grid[1] - t_grid[0]
    disc = compute_discount_factors(r_paths_price, dt)

    idx_T0 = int(exercise_indices[0])
    V_swap_T0 = swap_value_on_paths(disc, t_grid, K, pay_indices, idx_T0)
    payoff = np.maximum(V_swap_T0, 0.0)
    Y = payoff * disc[:, idx_T0]
    return Y


# ==========================================================
# 6) Pricing-set generator for method comparisons
# ==========================================================

def simulate_pricing_paths(
    n_paths,
    n_steps,
    T,
    r0,
    a,
    sigma,
    theta_func=None,
    method="standard",  # "standard" | "antithetic" | "sobol"
    seed=None,
):
    use_sobol = (method == "sobol")
    antithetic = (method == "antithetic")
    return simulate_hull_white_paths(
        n_paths=n_paths,
        n_steps=n_steps,
        T=T,
        r0=r0,
        a=a,
        sigma=sigma,
        theta_func=theta_func,
        use_sobol=use_sobol,
        antithetic=antithetic,
        seed=seed,
    )


# ==========================================================
# 7) One run: PRICING ONLY (policy + beta are fixed)
# ==========================================================

def one_run_pricing_only(
    *,
    K,
    exercise_indices,
    pay_indices,
    T,
    r0,
    a,
    sigma,
    n_steps,
    n_price,
    mu_Y,
    policy_coeffs,
    beta_star,
    theta_func=None,
    seed=None,
    pricing_method="standard",
):
    t_grid, r_price = simulate_pricing_paths(
        n_paths=n_price,
        n_steps=n_steps,
        T=T,
        r0=r0,
        a=a,
        sigma=sigma,
        theta_func=theta_func,
        method=pricing_method,
        seed=seed,
    )

    X = evaluate_policy_pathwise_pv(
        K=K,
        exercise_indices=exercise_indices,
        pay_indices=pay_indices,
        t_grid=t_grid,
        r_paths_price=r_price,
        coeffs=policy_coeffs,
    )
    Y = european_pathwise_pv_on_price_paths(
        K=K,
        exercise_indices=exercise_indices,
        pay_indices=pay_indices,
        t_grid=t_grid,
        r_paths_price=r_price,
    )

    price_std = float(np.mean(X))
    Z = X + beta_star * (mu_Y - Y)
    price_cv = float(np.mean(Z))

    return price_std, price_cv


# ==========================================================
# 8) Benchmark harness (textbook apples-to-apples)
# ==========================================================

def benchmark_textbook(
    *,
    Ms,
    R,
    K,
    exercise_indices,
    pay_indices,
    T,
    r0,
    a,
    sigma,
    n_steps,
    mu_Y,
    policy_coeffs,
    beta_star,
    theta_func=None,
):
    methods = ["standard", "antithetic", "sobol"]

    for M in Ms:
        print(f"\nM (pricing paths) = {M:,} | R = {R}")
        for pm in methods:
            prices_std = np.zeros(R)
            prices_cv = np.zeros(R)

            start = time.perf_counter()
            for r in range(R):
                seed = 1000 + r
                p_std, p_cv = one_run_pricing_only(
                    K=K,
                    exercise_indices=exercise_indices,
                    pay_indices=pay_indices,
                    T=T,
                    r0=r0,
                    a=a,
                    sigma=sigma,
                    n_steps=n_steps,
                    n_price=M,
                    mu_Y=mu_Y,
                    policy_coeffs=policy_coeffs,
                    beta_star=beta_star,
                    theta_func=theta_func,
                    seed=seed,
                    pricing_method=pm,
                )
                prices_std[r] = p_std
                prices_cv[r] = p_cv

            elapsed = time.perf_counter() - start

            var_std = float(np.var(prices_std, ddof=1))
            var_cv = float(np.var(prices_cv, ddof=1))
            vrf = var_std / var_cv if var_cv > 0 else np.nan
            red = 100.0 * (1.0 - 1.0 / vrf) if np.isfinite(vrf) and vrf > 0 else np.nan

            label = {"standard": "Pseudo-random", "antithetic": "Antithetic", "sobol": "Scrambled Sobol"}[pm]
            print(
                f"  {label:14s}: "
                f"Std mean={prices_std.mean():.6f}, SE_across={np.std(prices_std, ddof=1):.6f} | "
                f"CV mean={prices_cv.mean():.6f}, SE_across={np.std(prices_cv, ddof=1):.6f} | "
                f"VRF={vrf:.2f} (red={red:.1f}%) | time={elapsed:.2f}s"
            )


# ==========================================================
# 9) Main experiment (train once, beta once, pricing varies)
# ==========================================================

if __name__ == "__main__":
    # Contract and model
    T_swaption = 5.0
    T_swap_maturity = 10.0
    dt = 1.0 / 52.0
    n_steps = int(T_swap_maturity / dt)

    r0 = 0.02
    a = 0.1
    sigma = 0.01
    K = 0.02

    t_grid = np.linspace(0.0, T_swap_maturity, n_steps + 1)
    exercise_times = np.arange(T_swaption, T_swap_maturity + 1e-12, 0.25)
    exercise_indices = [int(round(t / dt)) for t in exercise_times]
    pay_indices = exercise_indices

    # --- mu_Y for CV (high-precision MC) ---
    mu_Y = price_european_swaption_hw_mc(
        K=K,
        exercise_index=exercise_indices[0],
        pay_indices=pay_indices,
        T=T_swap_maturity,
        r0=r0,
        a=a,
        sigma=sigma,
        n_paths=200_000,
        n_steps=n_steps,
        theta_func=None,
        seed=42,
    )
    print(f"European payer swaption HW price (mu_Y) ≈ {mu_Y:.6f}\n")

    # ======================================================
    # Train the LSM policy ONCE and freeze it
    # ======================================================
    TRAIN_SEED = 777
    n_train = 20_000

    t_grid_train, r_train = simulate_hull_white_paths(
        n_paths=n_train,
        n_steps=n_steps,
        T=T_swap_maturity,
        r0=r0,
        a=a,
        sigma=sigma,
        theta_func=None,
        use_sobol=False,
        antithetic=False,
        seed=TRAIN_SEED,
    )

    policy_coeffs = fit_lsm_policy_coeffs(
        K=K,
        exercise_indices=exercise_indices,
        pay_indices=pay_indices,
        t_grid=t_grid_train,
        r_paths_train=r_train,
    )

    # ======================================================
    # Estimate beta* ONCE (pilot) and freeze it
    # ======================================================
    PILOT_SEED = 888
    n_pilot = 100_000

    t_grid_pilot, r_pilot = simulate_hull_white_paths(
        n_paths=n_pilot,
        n_steps=n_steps,
        T=T_swap_maturity,
        r0=r0,
        a=a,
        sigma=sigma,
        theta_func=None,
        use_sobol=False,
        antithetic=False,
        seed=PILOT_SEED,
    )

    X_pilot = evaluate_policy_pathwise_pv(
        K=K,
        exercise_indices=exercise_indices,
        pay_indices=pay_indices,
        t_grid=t_grid_pilot,
        r_paths_price=r_pilot,
        coeffs=policy_coeffs,
    )

    Y_pilot = european_pathwise_pv_on_price_paths(
        K=K,
        exercise_indices=exercise_indices,
        pay_indices=pay_indices,
        t_grid=t_grid_pilot,
        r_paths_price=r_pilot,
    )

    varY = np.var(Y_pilot, ddof=1)
    beta_star = float(np.cov(X_pilot, Y_pilot, ddof=1)[0, 1] / varY) if varY > 0 else 0.0
    print(f"Fixed pilot beta* ≈ {beta_star:.4f}\n")

    # --- Benchmark ---
    Ms = [4_096, 8_192, 16_384, 32_768, 65_536, 131_072]
    R = 30

    benchmark_textbook(
        Ms=Ms,
        R=R,
        K=K,
        exercise_indices=exercise_indices,
        pay_indices=pay_indices,
        T=T_swap_maturity,
        r0=r0,
        a=a,
        sigma=sigma,
        n_steps=n_steps,
        mu_Y=mu_Y,
        policy_coeffs=policy_coeffs,
        beta_star=beta_star,
        theta_func=None,
    )


European payer swaption HW price (mu_Y) ≈ 0.027921

Fixed pilot beta* ≈ 1.2463


M (pricing paths) = 4,096 | R = 30
  Pseudo-random : Std mean=0.084720, SE_across=0.000801 | CV mean=0.084984, SE_across=0.000467 | VRF=2.94 (red=66.0%) | time=5.17s
  Antithetic    : Std mean=0.084725, SE_across=0.000211 | CV mean=0.085113, SE_across=0.000545 | VRF=0.15 (red=-567.0%) | time=8.44s
  Scrambled Sobol: Std mean=0.084656, SE_across=0.000150 | CV mean=0.084944, SE_across=0.000292 | VRF=0.26 (red=-279.0%) | time=12.30s

M (pricing paths) = 8,192 | R = 30
  Pseudo-random : Std mean=0.084659, SE_across=0.000578 | CV mean=0.084953, SE_across=0.000308 | VRF=3.52 (red=71.6%) | time=17.40s
  Antithetic    : Std mean=0.084706, SE_across=0.000149 | CV mean=0.084988, SE_across=0.000421 | VRF=0.13 (red=-693.9%) | time=17.52s
  Scrambled Sobol: Std mean=0.084708, SE_across=0.000116 | CV mean=0.084959, SE_across=0.000237 | VRF=0.24 (red=-320.7%) | time=21.91s

M (pricing paths) = 16,384 | R = 30
  Pseudo-ra

In [None]:
import numpy as np
from scipy.stats import norm, qmc
import time

# ==========================================================
# 1) Hull–White short-rate path simulator
# ==========================================================

def simulate_hull_white_paths(
    n_paths,
    n_steps,
    T,
    r0,
    a,
    sigma,
    theta_func=None,
    use_sobol=False,
    antithetic=False,
    seed=None,
):
    """
    Simulate short-rate paths under one-factor Hull–White (Euler):
        dr_t = [theta(t) - a r_t] dt + sigma dW_t
    """
    dt = T / n_steps
    t_grid = np.linspace(0.0, T, n_steps + 1)

    if theta_func is None:
        def theta_func(t):
            return a * r0

    if use_sobol:
        sobol_engine = qmc.Sobol(d=n_steps, scramble=True, seed=seed)
        u = sobol_engine.random(n_paths)
        eps = np.finfo(float).eps
        u = np.clip(u, eps, 1 - eps)
        Z = norm.ppf(u)
    else:
        rng = np.random.default_rng(seed)
        if antithetic:
            n_half = (n_paths + 1) // 2
            Z_half = rng.standard_normal(size=(n_half, n_steps))
            Z = np.vstack([Z_half, -Z_half])[:n_paths, :]
        else:
            Z = rng.standard_normal(size=(n_paths, n_steps))

    r_paths = np.zeros((n_paths, n_steps + 1))
    r_paths[:, 0] = r0

    for i in range(n_steps):
        t = t_grid[i]
        theta_t = theta_func(t)
        r_t = r_paths[:, i]
        dr = (theta_t - a * r_t) * dt + sigma * np.sqrt(dt) * Z[:, i]
        r_paths[:, i + 1] = r_t + dr

    return t_grid, r_paths


# ==========================================================
# 2) Discount factors and swap primitives
# ==========================================================

def compute_discount_factors(r_paths, dt):
    integrals = np.cumsum(r_paths[:, :-1] * dt, axis=1)
    integrals = np.hstack([np.zeros((r_paths.shape[0], 1)), integrals])
    disc = np.exp(-integrals)
    return disc


def bond_price_on_paths(disc, idx_t, idx_T):
    return disc[:, idx_T] / disc[:, idx_t]


def swap_value_on_paths(disc, t_grid, K, pay_indices, idx_t):
    """
    Payer swap value at time index idx_t:
      V_swap(t) = P(t,T0) - P(t,TN) - K * sum_i Delta_i P(t,Ti)
    """
    pay_indices = np.asarray(pay_indices, dtype=int)
    idx_T0 = pay_indices[0]
    idx_TN = pay_indices[-1]

    P_tT0 = bond_price_on_paths(disc, idx_t, idx_T0)
    P_tTN = bond_price_on_paths(disc, idx_t, idx_TN)
    P_tTi = disc[:, pay_indices] / disc[:, [idx_t]]

    T_pay = t_grid[pay_indices]
    Delta = np.empty_like(T_pay)
    Delta[0] = T_pay[0] - t_grid[idx_t]
    Delta[1:] = T_pay[1:] - T_pay[:-1]

    fixed_leg = K * np.dot(P_tTi, Delta)
    V_swap = P_tT0 - P_tTN - fixed_leg
    return V_swap


def forward_swap_rate_on_paths(disc, t_grid, pay_indices, idx_t):
    """
    Forward par swap rate at time index idx_t.
    """
    pay_indices = np.asarray(pay_indices, dtype=int)
    idx_T0 = pay_indices[0]
    idx_TN = pay_indices[-1]

    P_tT0 = bond_price_on_paths(disc, idx_t, idx_T0)
    P_tTN = bond_price_on_paths(disc, idx_t, idx_TN)
    P_tTi = disc[:, pay_indices] / disc[:, [idx_t]]

    T_pay = t_grid[pay_indices]
    Delta = np.empty_like(T_pay)
    Delta[0] = T_pay[0] - t_grid[idx_t]
    Delta[1:] = T_pay[1:] - T_pay[:-1]

    denom = np.dot(P_tTi, Delta)
    denom = np.where(denom == 0.0, np.nan, denom)
    F = (P_tT0 - P_tTN) / denom
    return F


# ==========================================================
# 3) European swaption under Hull–White (MC)
# ==========================================================

def price_european_swaption_hw_mc(
    K,
    exercise_index,
    pay_indices,
    T,
    r0,
    a,
    sigma,
    n_paths,
    n_steps,
    theta_func=None,
    seed=None,
):
    t_grid, r_paths = simulate_hull_white_paths(
        n_paths=n_paths, n_steps=n_steps, T=T, r0=r0, a=a, sigma=sigma,
        theta_func=theta_func, use_sobol=False, antithetic=False, seed=seed
    )
    dt = t_grid[1] - t_grid[0]
    disc = compute_discount_factors(r_paths, dt)

    idx_T0 = exercise_index
    V_swap_T0 = swap_value_on_paths(disc, t_grid, K, pay_indices, idx_T0)
    payoff = np.maximum(V_swap_T0, 0.0)

    Y = payoff * disc[:, idx_T0]
    return float(np.mean(Y))


# ==========================================================
# 4) LSM policy training (TRAIN SET ONLY)
# ==========================================================

def fit_lsm_policy_coeffs(
    K,
    exercise_indices,
    pay_indices,
    t_grid,
    r_paths_train,
):
    """
    Fit continuation-value regressions on a training sample only.
    Returns coefficients b_j for each exercise date j (except last).
    Basis: [1, S, S^2], with S = forward swap rate.
    """
    dt = t_grid[1] - t_grid[0]
    disc = compute_discount_factors(r_paths_train, dt)

    exercise_indices = np.asarray(exercise_indices, dtype=int)
    pay_indices = np.asarray(pay_indices, dtype=int)

    n_paths = r_paths_train.shape[0]
    n_ex = len(exercise_indices)

    swap_values = np.zeros((n_paths, n_ex))
    swap_rates = np.zeros((n_paths, n_ex))
    for j, idx_t in enumerate(exercise_indices):
        swap_values[:, j] = swap_value_on_paths(disc, t_grid, K, pay_indices, idx_t)
        swap_rates[:, j] = forward_swap_rate_on_paths(disc, t_grid, pay_indices, idx_t)

    payoffs = np.maximum(swap_values, 0.0)

    cashflow = np.zeros(n_paths)
    exercise_time_idx = np.full(n_paths, exercise_indices[-1], dtype=int)

    last_idx = exercise_indices[-1]
    last_pos = n_ex - 1
    ex_now = payoffs[:, last_pos] > 0.0
    cashflow[ex_now] = payoffs[ex_now, last_pos]
    exercise_time_idx[ex_now] = last_idx

    coeffs = [None] * n_ex  # last stays None

    for ex_pos in range(n_ex - 2, -1, -1):
        idx_t = exercise_indices[ex_pos]
        alive = exercise_time_idx > idx_t
        if not np.any(alive):
            coeffs[ex_pos] = None
            continue

        alive_idx = np.where(alive)[0]
        df_ex = disc[alive_idx, exercise_time_idx[alive_idx]]
        df_t = disc[alive_idx, idx_t]
        cont_values = cashflow[alive_idx] * df_ex / df_t

        itm = payoffs[alive_idx, ex_pos] > 0.0
        if np.sum(itm) >= 3:
            S_itm = swap_rates[alive_idx, ex_pos][itm]
            Y_itm = cont_values[itm]
            A = np.vstack([np.ones_like(S_itm), S_itm, S_itm**2]).T
            b, *_ = np.linalg.lstsq(A, Y_itm, rcond=None)
            coeffs[ex_pos] = b

            S_all = swap_rates[alive_idx, ex_pos]
            A_all = np.vstack([np.ones_like(S_all), S_all, S_all**2]).T
            C_hat = A_all @ b
        else:
            coeffs[ex_pos] = None
            C_hat = np.full_like(cont_values, np.mean(cont_values) if cont_values.size else 0.0)

        immediate = payoffs[alive_idx, ex_pos]
        exercise = immediate > C_hat

        ex_paths = alive_idx[exercise]
        cashflow[ex_paths] = immediate[exercise]
        exercise_time_idx[ex_paths] = idx_t

    return coeffs


# ==========================================================
# 5) Policy evaluation (PRICE SET ONLY) -> pathwise PVs X_i
# ==========================================================

def evaluate_policy_pathwise_pv(
    K,
    exercise_indices,
    pay_indices,
    t_grid,
    r_paths_price,
    coeffs,
):
    """
    Apply a FIXED policy (coeffs from training) on pricing paths.
    Returns pathwise discounted PVs X_i.
    """
    dt = t_grid[1] - t_grid[0]
    disc = compute_discount_factors(r_paths_price, dt)

    exercise_indices = np.asarray(exercise_indices, dtype=int)
    pay_indices = np.asarray(pay_indices, dtype=int)

    n_paths = r_paths_price.shape[0]
    n_ex = len(exercise_indices)

    swap_values = np.zeros((n_paths, n_ex))
    swap_rates = np.zeros((n_paths, n_ex))
    for j, idx_t in enumerate(exercise_indices):
        swap_values[:, j] = swap_value_on_paths(disc, t_grid, K, pay_indices, idx_t)
        swap_rates[:, j] = forward_swap_rate_on_paths(disc, t_grid, pay_indices, idx_t)

    payoffs = np.maximum(swap_values, 0.0)

    cashflow = np.zeros(n_paths)
    exercise_time_idx = np.full(n_paths, exercise_indices[-1], dtype=int)

    last_idx = exercise_indices[-1]
    last_pos = n_ex - 1
    ex_now = payoffs[:, last_pos] > 0.0
    cashflow[ex_now] = payoffs[ex_now, last_pos]
    exercise_time_idx[ex_now] = last_idx

    for ex_pos in range(n_ex - 2, -1, -1):
        idx_t = exercise_indices[ex_pos]
        alive = exercise_time_idx > idx_t
        if not np.any(alive):
            continue

        alive_idx = np.where(alive)[0]
        df_ex = disc[alive_idx, exercise_time_idx[alive_idx]]
        df_t = disc[alive_idx, idx_t]
        cont_values = cashflow[alive_idx] * df_ex / df_t

        immediate = payoffs[alive_idx, ex_pos]

        b = coeffs[ex_pos]
        if b is None:
            C_hat = np.full_like(cont_values, np.mean(cont_values) if cont_values.size else 0.0)
        else:
            S = swap_rates[alive_idx, ex_pos]
            A = np.vstack([np.ones_like(S), S, S**2]).T
            C_hat = A @ b

        exercise = immediate > C_hat
        ex_paths = alive_idx[exercise]
        cashflow[ex_paths] = immediate[exercise]
        exercise_time_idx[ex_paths] = idx_t

    X = cashflow * disc[np.arange(n_paths), exercise_time_idx]
    return X


def european_pathwise_pv_on_price_paths(
    K,
    exercise_indices,
    pay_indices,
    t_grid,
    r_paths_price,
):
    dt = t_grid[1] - t_grid[0]
    disc = compute_discount_factors(r_paths_price, dt)

    idx_T0 = int(exercise_indices[0])
    V_swap_T0 = swap_value_on_paths(disc, t_grid, K, pay_indices, idx_T0)
    payoff = np.maximum(V_swap_T0, 0.0)
    Y = payoff * disc[:, idx_T0]
    return Y


# ==========================================================
# 6) Pricing-set generator for method comparisons
# ==========================================================

def simulate_pricing_paths(
    n_paths,
    n_steps,
    T,
    r0,
    a,
    sigma,
    theta_func=None,
    method="standard",  # "standard" | "antithetic" | "sobol"
    seed=None,
):
    use_sobol = (method == "sobol")
    antithetic = (method == "antithetic")
    return simulate_hull_white_paths(
        n_paths=n_paths,
        n_steps=n_steps,
        T=T,
        r0=r0,
        a=a,
        sigma=sigma,
        theta_func=theta_func,
        use_sobol=use_sobol,
        antithetic=antithetic,
        seed=seed,
    )


# ==========================================================
# 7) Helpers: effective samples + correlation
# ==========================================================

def effective_samples(X, Y, method):
    """
    For antithetic, work on pair-averaged observations to match the actual estimator.
    For standard/sobol, return as-is.
    """
    X = np.asarray(X, dtype=float)
    Y = np.asarray(Y, dtype=float)

    if method != "antithetic":
        return X, Y

    n = X.shape[0]
    m = n // 2
    if m == 0:
        return X, Y

    Xp = 0.5 * (X[0:2*m:2] + X[1:2*m:2])
    Yp = 0.5 * (Y[0:2*m:2] + Y[1:2*m:2])

    if 2*m < n:
        Xp = np.concatenate([Xp, X[2*m:]])
        Yp = np.concatenate([Yp, Y[2*m:]])

    return Xp, Yp


def corr_xy(X, Y):
    X = np.asarray(X, dtype=float)
    Y = np.asarray(Y, dtype=float)
    vx = np.var(X, ddof=1)
    vy = np.var(Y, ddof=1)
    if vx <= 0.0 or vy <= 0.0 or (not np.isfinite(vx)) or (not np.isfinite(vy)):
        return np.nan
    cxy = np.cov(X, Y, ddof=1)[0, 1]
    return float(cxy / np.sqrt(vx * vy))


# ==========================================================
# 8) One run: PRICING ONLY (policy + beta fixed)
#    -> returns separate timings for Std and incremental CV
# ==========================================================

def one_run_pricing_only(
    *,
    K,
    exercise_indices,
    pay_indices,
    T,
    r0,
    a,
    sigma,
    n_steps,
    n_price,
    mu_Y,
    policy_coeffs,
    beta_star,
    theta_func=None,
    seed=None,
    pricing_method="standard",
):
    # Time "Std": simulation + Bermudan evaluation + mean
    t0 = time.perf_counter()
    t_grid, r_price = simulate_pricing_paths(
        n_paths=n_price,
        n_steps=n_steps,
        T=T,
        r0=r0,
        a=a,
        sigma=sigma,
        theta_func=theta_func,
        method=pricing_method,
        seed=seed,
    )

    X = evaluate_policy_pathwise_pv(
        K=K,
        exercise_indices=exercise_indices,
        pay_indices=pay_indices,
        t_grid=t_grid,
        r_paths_price=r_price,
        coeffs=policy_coeffs,
    )
    price_std = float(np.mean(X))
    t_std = time.perf_counter() - t0

    # Time "CV": incremental cost given paths (compute Y + combine + mean)
    t1 = time.perf_counter()
    Y = european_pathwise_pv_on_price_paths(
        K=K,
        exercise_indices=exercise_indices,
        pay_indices=pay_indices,
        t_grid=t_grid,
        r_paths_price=r_price,
    )
    Z = X + beta_star * (mu_Y - Y)
    price_cv = float(np.mean(Z))
    t_cv = time.perf_counter() - t1

    Xeff, Yeff = effective_samples(X, Y, pricing_method)
    rho_hat = corr_xy(Xeff, Yeff)

    return price_std, price_cv, rho_hat, t_std, t_cv


# ==========================================================
# 9) Pilot: estimate beta* per method and freeze it
# ==========================================================

def estimate_beta_star_for_method(
    *,
    method,
    K,
    exercise_indices,
    pay_indices,
    T,
    r0,
    a,
    sigma,
    n_steps,
    n_pilot,
    policy_coeffs,
    theta_func=None,
    seed=None,
):
    t_grid_pilot, r_pilot = simulate_pricing_paths(
        n_paths=n_pilot,
        n_steps=n_steps,
        T=T,
        r0=r0,
        a=a,
        sigma=sigma,
        theta_func=theta_func,
        method=method,
        seed=seed,
    )

    X = evaluate_policy_pathwise_pv(
        K=K,
        exercise_indices=exercise_indices,
        pay_indices=pay_indices,
        t_grid=t_grid_pilot,
        r_paths_price=r_pilot,
        coeffs=policy_coeffs,
    )
    Y = european_pathwise_pv_on_price_paths(
        K=K,
        exercise_indices=exercise_indices,
        pay_indices=pay_indices,
        t_grid=t_grid_pilot,
        r_paths_price=r_pilot,
    )

    Xeff, Yeff = effective_samples(X, Y, method)

    varY = np.var(Yeff, ddof=1)
    beta_star = float(np.cov(Xeff, Yeff, ddof=1)[0, 1] / varY) if varY > 0 else 0.0
    rho = corr_xy(Xeff, Yeff)
    return beta_star, rho


# ==========================================================
# 10) Benchmark harness: method-specific beta* and timings
# ==========================================================

def benchmark_textbook(
    *,
    Ms,
    R,
    K,
    exercise_indices,
    pay_indices,
    T,
    r0,
    a,
    sigma,
    n_steps,
    mu_Y,
    policy_coeffs,
    beta_star_by_method,  # dict: method -> beta*
    theta_func=None,
):
    methods = ["standard", "antithetic", "sobol"]

    for M in Ms:
        print(f"\nM (pricing paths) = {M:,} | R = {R}")
        for pm in methods:
            beta_star = float(beta_star_by_method[pm])

            prices_std = np.zeros(R)
            prices_cv = np.zeros(R)
            rhos = np.zeros(R)

            t_std_runs = np.zeros(R)
            t_cv_runs = np.zeros(R)

            for r in range(R):
                seed = 1000 + r
                p_std, p_cv, rho_hat, t_std, t_cv = one_run_pricing_only(
                    K=K,
                    exercise_indices=exercise_indices,
                    pay_indices=pay_indices,
                    T=T,
                    r0=r0,
                    a=a,
                    sigma=sigma,
                    n_steps=n_steps,
                    n_price=M,
                    mu_Y=mu_Y,
                    policy_coeffs=policy_coeffs,
                    beta_star=beta_star,
                    theta_func=theta_func,
                    seed=seed,
                    pricing_method=pm,
                )
                prices_std[r] = p_std
                prices_cv[r] = p_cv
                rhos[r] = rho_hat
                t_std_runs[r] = t_std
                t_cv_runs[r] = t_cv

            var_std = float(np.var(prices_std, ddof=1))
            var_cv = float(np.var(prices_cv, ddof=1))

            rho_bar = float(np.nanmean(rhos))
            var_cv_pred = var_std * (1.0 - rho_bar**2) if np.isfinite(rho_bar) else np.nan

            vrf = var_std / var_cv if var_cv > 0 else np.nan
            red = 100.0 * (1.0 - 1.0 / vrf) if np.isfinite(vrf) and vrf > 0 else np.nan

            label = {"standard": "Pseudo-random", "antithetic": "Antithetic", "sobol": "Scrambled Sobol"}[pm]

            t_std_mean = float(np.mean(t_std_runs))
            t_cv_mean = float(np.mean(t_cv_runs))
            t_tot_mean = t_std_mean + t_cv_mean

            print(
                f"  {label:14s}: beta*={beta_star:.4f} | "
                f"Std mean={prices_std.mean():.6f}, SE_across={np.std(prices_std, ddof=1):.6f}, time_std={t_std_mean:.2f}s | "
                f"CV mean={prices_cv.mean():.6f}, SE_across={np.std(prices_cv, ddof=1):.6f}, time_cv={t_cv_mean:.2f}s | "
                f"rho≈{rho_bar:.4f} | "
                f"Var(CV) pred={var_cv_pred:.3e}, Var(CV) emp={var_cv:.3e} | "
                f"VRF={vrf:.2f} (red={red:.1f}%) | "
                f"time_total≈{t_tot_mean:.2f}s"
            )


# ==========================================================
# 11) Main experiment (train once, beta per method, pricing varies)
# ==========================================================

if __name__ == "__main__":
    # Contract and model
    T_swaption = 5.0
    T_swap_maturity = 10.0
    dt = 1.0 / 52.0
    n_steps = int(T_swap_maturity / dt)

    r0 = 0.02
    a = 0.1
    sigma = 0.01
    K = 0.02

    t_grid = np.linspace(0.0, T_swap_maturity, n_steps + 1)
    exercise_times = np.arange(T_swaption, T_swap_maturity + 1e-12, 0.25)
    exercise_indices = [int(round(t / dt)) for t in exercise_times]
    pay_indices = exercise_indices

    # --- mu_Y for CV (high-precision MC) ---
    mu_Y = price_european_swaption_hw_mc(
        K=K,
        exercise_index=exercise_indices[0],
        pay_indices=pay_indices,
        T=T_swap_maturity,
        r0=r0,
        a=a,
        sigma=sigma,
        n_paths=200_000,
        n_steps=n_steps,
        theta_func=None,
        seed=42,
    )
    print(f"European payer swaption HW price (mu_Y) ≈ {mu_Y:.6f}\n")

    # ======================================================
    # Train the LSM policy ONCE and freeze it (standard MC)
    # ======================================================
    TRAIN_SEED = 777
    n_train = 20_000

    t_grid_train, r_train = simulate_hull_white_paths(
        n_paths=n_train,
        n_steps=n_steps,
        T=T_swap_maturity,
        r0=r0,
        a=a,
        sigma=sigma,
        theta_func=None,
        use_sobol=False,
        antithetic=False,
        seed=TRAIN_SEED,
    )

    policy_coeffs = fit_lsm_policy_coeffs(
        K=K,
        exercise_indices=exercise_indices,
        pay_indices=pay_indices,
        t_grid=t_grid_train,
        r_paths_train=r_train,
    )

    # ======================================================
    # Estimate beta* ONCE PER PRICING METHOD (pilot) and freeze
    # ======================================================
    n_pilot = 100_000
    PILOT_BASE_SEED = 888

    beta_star_by_method = {}
    rho_by_method = {}

    for j, method in enumerate(["standard", "antithetic", "sobol"]):
        beta_star, rho = estimate_beta_star_for_method(
            method=method,
            K=K,
            exercise_indices=exercise_indices,
            pay_indices=pay_indices,
            T=T_swap_maturity,
            r0=r0,
            a=a,
            sigma=sigma,
            n_steps=n_steps,
            n_pilot=n_pilot,
            policy_coeffs=policy_coeffs,
            theta_func=None,
            seed=PILOT_BASE_SEED + 10 * j,
        )
        beta_star_by_method[method] = beta_star
        rho_by_method[method] = rho

    print("Fixed pilot betas (by method):")
    print(f"  standard   beta* ≈ {beta_star_by_method['standard']:.4f} | rho≈{rho_by_method['standard']:.4f}")
    print(f"  antithetic beta* ≈ {beta_star_by_method['antithetic']:.4f} | rho≈{rho_by_method['antithetic']:.4f}")
    print(f"  sobol      beta* ≈ {beta_star_by_method['sobol']:.4f} | rho≈{rho_by_method['sobol']:.4f}\n")

    # --- Benchmark ---
    Ms = [4_096, 8_192, 16_384, 32_768, 65_536, 131_072, 262_144]
    R = 30

    benchmark_textbook(
        Ms=Ms,
        R=R,
        K=K,
        exercise_indices=exercise_indices,
        pay_indices=pay_indices,
        T=T_swap_maturity,
        r0=r0,
        a=a,
        sigma=sigma,
        n_steps=n_steps,
        mu_Y=mu_Y,
        policy_coeffs=policy_coeffs,
        beta_star_by_method=beta_star_by_method,
        theta_func=None,
    )


European payer swaption HW price (mu_Y) ≈ 0.027921



  sample = self._random(n, workers=workers)


Fixed pilot betas (by method):
  standard   beta* ≈ 1.2463 | rho≈0.8662
  antithetic beta* ≈ 1.2425 | rho≈0.8634
  sobol      beta* ≈ 1.2474 | rho≈0.8650


M (pricing paths) = 4,096 | R = 30
  Pseudo-random : beta*=1.2463 | Std mean=0.084720, SE_across=0.000801 | CV mean=0.084984, SE_across=0.000467 | rho≈0.8650 | Var(CV) pred=1.617e-07, Var(CV) emp=2.183e-07 | VRF=2.94 (red=66.0%) | time=4.16s
  Antithetic    : beta*=1.2425 | Std mean=0.084725, SE_across=0.000211 | CV mean=0.085112, SE_across=0.000543 | rho≈0.8648 | Var(CV) pred=1.123e-08, Var(CV) emp=2.953e-07 | VRF=0.15 (red=-563.4%) | time=4.65s
  Scrambled Sobol: beta*=1.2474 | Std mean=0.084656, SE_across=0.000150 | CV mean=0.084944, SE_across=0.000292 | rho≈0.8649 | Var(CV) pred=5.664e-09, Var(CV) emp=8.529e-08 | VRF=0.26 (red=-279.5%) | time=8.59s

M (pricing paths) = 8,192 | R = 30
  Pseudo-random : beta*=1.2463 | Std mean=0.084659, SE_across=0.000578 | CV mean=0.084953, SE_across=0.000308 | rho≈0.8648 | Var(CV) pred=8.428e-08

KeyboardInterrupt: 