In [5]:
import numpy as np

# ---------- parameters ----------
n_train = 50_000
n_test  = 50_000
mu_func = lambda x: 50 - x          # conditional mean
sigma2  = 4                         # conditional variance
xmin, xmax = 0.0, 30.0              # support of X

# ---------- data generation ----------
rng = np.random.default_rng(seed=42)
X_train = rng.uniform(xmin, xmax, n_train)
X_test  = rng.uniform(xmin, xmax, n_test)

def sample_y(x):
    return rng.normal(loc=mu_func(x), scale=np.sqrt(sigma2))

Y_train = sample_y(X_train)
Y_test  = sample_y(X_test)

# ---------- estimators ----------
def f_star(x):            # Bayes predictor
    return mu_func(x)

Yhat_star_test = f_star(X_test)

ybar_train = Y_train.mean()
def f_tilde(x):           # simple constant estimator
    return np.full_like(x, ybar_train)

Yhat_tilde_test = f_tilde(X_test)

# ---------- empirical risks ----------
mse_star  = np.mean((Y_test  - Yhat_star_test)  ** 2)
mse_tilde = np.mean((Y_test  - Yhat_tilde_test) ** 2)

print(f"Test MSE for Bayes estimator  f*:   {mse_star:.4f}")
print(f"Test MSE for competitor      f~:   {mse_tilde:.4f}")
print("\n(Theoretical Bayes risk is 4.0)")


Test MSE for Bayes estimator  f*:   3.9882
Test MSE for competitor      f~:   79.0262

(Theoretical Bayes risk is 4.0)


Because $f^\*$ is optimal, its empirical risk converges to the Bayes risk (≈ 4).
The constant predictor has no access to $X$; consequently its risk equals $\operatorname{Var}(Y)$ (≈ $\sigma^2 + \operatorname{Var}[\mu(X)]$), which is strictly larger than 4, so the simulation confirms the theory.