# Monte Carlo Estimation of $\mathbb{E}\!\left[\lVert r_{t+1}\rVert^2\right]$ (squared norm of reference path) for Adam

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def estimate_path_norm_squared_stats_vectorized(
    shape,
    num_trials=100,
    num_steps=100,
    betas=(0.9, 0.999),
    eps=1e-8,
    smoothing=0.2,
    seed=None
):
    if seed is not None:
        np.random.seed(seed)

    beta1, beta2 = betas
    shape = tuple(shape)
    trials_shape = (num_trials,) + shape

    # Initialize states
    m = np.zeros(trials_shape)
    v = np.zeros(trials_shape)
    path = np.zeros(trials_shape)

    for t in range(1, num_steps + 1):
        # Vectorized sampling of Gaussian noise for all trials
        grad = np.random.randn(*trials_shape)

        # Adam update
        m = beta1 * m + (1 - beta1) * grad
        v = beta2 * v + (1 - beta2) * grad**2

        m_hat = m / (1 - beta1 ** t)
        v_hat = v / (1 - beta2 ** t)
        adam_step = m_hat / (np.sqrt(v_hat) + eps)

        # Normalize each vector across the last dimensions
        norm = np.linalg.norm(adam_step, axis=tuple(range(1, len(trials_shape))), keepdims=True)
        normalized_step = adam_step / np.clip(norm, a_min=1e-12, a_max=None)

        # Exponential average
        path = (1 - smoothing) * path + smoothing * normalized_step

    # Compute squared norms for all trials
    path_squared_norms = np.sum(path**2, axis=tuple(range(1, len(trials_shape))))
    return path_squared_norms.mean(), path_squared_norms.std()


In [None]:
dims = [15, 42, 650]
for dim in dims:
    mean, std = estimate_path_norm_squared_stats_vectorized(shape=(dim,), num_trials=1000, num_steps=1000)
    print(f"Dim.: {dim}, Mean: {mean:.4f}, Std: {std:.4f}")

In [None]:
def plot_path_norm_stats_vs_dimension(
    dims,
    num_trials=100,
    num_steps=100,
    betas=(0.9, 0.999),
    eps=1e-8,
    smoothing=0.2,
    seed=None
):
    means = []
    stds = []

    for d in dims:
        mean, std = estimate_path_norm_squared_stats_vectorized(
            shape=(d,),
            num_trials=num_trials,
            num_steps=num_steps,
            betas=betas,
            eps=eps,
            smoothing=smoothing,
            seed=seed
        )
        means.append(mean)
        stds.append(std)

    # Convert to NumPy arrays
    dims = np.array(dims)
    means = np.array(means)
    stds = np.array(stds)

    # Plot results
    plt.figure(figsize=(8, 5))
    plt.plot(dims, means, label='Mean of $\\|\\text{path}\\|^2$')
    plt.fill_between(dims, means - stds, means + stds, alpha=0.3, label='±1 Std Dev')
    plt.xlabel('Dimension')
    plt.ylabel(r'$\mathbb{E}[\|\mathrm{path}\|^2]$')
    plt.title(f'Path Norm Squared vs Dimension (steps={num_steps}, trials={num_trials})')
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.show()


In [None]:
dims = [15, 42, 650]
plot_path_norm_stats_vs_dimension(dims, num_trials=500, num_steps=1000)