In [2]:
from tabulate import tabulate
import numpy as np
import scipy.stats

# Problem

Compare the risk of the MLE and the James-Stein estimator by simulation.
Try various values of $n$ and various vectors $\theta$.
Summarize your results.

# Solution

## The context
Recall that the model is $X_i \sim N(\theta_i, 1)$, independently,
for $\theta_1, ..., \theta_n$ given parameters.
In other words $X \sim N(\theta, I)$.
The loss is $L^2$-error-squared loss, namely
$$
L(\theta, \hat\theta)
= {||\theta - \hat\theta||}_2^2
= \sum_{i=1}^n {( \theta - \theta_i )}^2.
$$
The MLE is $\hat\theta_{MLE} = X$ while the James-Stein estimator is $\hat\theta_{JS} = s(X) X$ for
$$
s(X) = \max \left( 0, 1 - \frac{n-2}{\sum_{i=1}^n X_i^2} \right).
$$

## Setup of the simulation
For any parameter $\theta$ and any point estimator $\hat\theta$ we may estimate the risk
by *simulation*:
$$
R(\theta,\hat\theta)
= \mathbb{E}_\theta {||\theta-\hat\theta||}_2^2
= \int_{\mathbb{R}^n} {||\theta-\hat\theta(x)||}_2^2 f(x;\theta) dx
\sim \frac{1}{B} \sum_{j=1}^B {\left|\left|\theta-\hat\theta\left(X^{(j)}\right)\right|\right|}_2^2
=: R^*(\theta,\hat\theta)
$$
for $X^{(1)}, ..., X^{(B)} \sim N(\theta, I)$.
For completeness' sake note that we may expand the estimated risk further as
$$
R^*(\theta,\hat\theta)
= \frac{1}{B} \sum_{j=1}^B \sum_{i=1}^n {\left(\theta_i-\hat\theta_i\left(X^{(j)}\right)\right)}^2.
$$

In [3]:
# Produce the simulated random variables X^(1), ..., X^(B)
def simulation_sample(theta, B):
    """
    
    Returns a two-dimensional array of the form
    
       X^(1)
       ...
       X^(B)
       
    where each row is a random variable with distribution N(theta, I).
    
    The two-dimensional array returned is thus
    
       X^(1)_1  ...   X^(1)_n
       ...      ...   ...
       X^(B)_1  ...   X^(B)_n
    
    """
    n = len(theta)
    return scipy.stats.multivariate_normal.rvs(mean=theta, cov=np.identity(n), size=B)

In [4]:
# The maximum likelihood estimator
def mle(X):
    """
    
    Takes as input an array of the form
    
       X^(1)
       ...
       X^(B)
       
    where each row is a vector of the same length.
    
    The two-dimensional array input is thus
    
       X^(1)_1  ...   X^(1)_n
       ...      ...   ...
       X^(B)_1  ...   X^(B)_n
       
    The function returns the original array since
    the MLE of X^(i) is X^(i) itself.
    
    """
    return X

In [6]:
# The James-Stein estimator
def compute_js_est(X):
    """
    
    Takes as input an array of the form
    
       X^(1)
       ...
       X^(B)
       
    where each row is a vector of the same length.
    
    The two-dimensional array input is thus
    
       X^(1)_1  ...   X^(1)_n
       ...      ...   ...
       X^(B)_1  ...   X^(B)_n
       
    The function returns
    
       s[X^(1)] X^(1)
       ...
       s[X^(B)] X^(B)
       
    or
    
       s[X^(1)] X^(1)_1  ...   s[X^(1)] X^(1)_n
       ...               ...   ...
       s[X^(B)] X^(B)_1  ...   s[X^(B)] X^(B)_n
    
    """
    n = X.shape[1]
    return np.array([
        max(0, 1 - (n - 2)/np.square(Xj).sum())*Xj
        for Xj in X
    ])

In [7]:
# The simulation estimate of the risk
def risk_estimate(theta, estimator, B):
    
    # Simulated random variables X^(1), ..., X^(B)
    X = simulation_sample(theta, B)
    
    # Compute the risk estimate
    return np.square(estimator(X) - theta).sum()/B

### Sanity check
For an *unbiased* estimator $\theta$ of $\hat\theta$
the risk is related to the variance-covariance matrix of $\hat\theta$ via
$$
R(\theta, \hat\theta)
= \mathbb{E}_\theta {||\theta-\hat\theta||}_2^2
= \sum_{i=1}^n \mathbb{E}_\theta {(\theta_i - \hat\theta_i)}^2
= \sum_{i=1}^n \mathbb{V}_\theta (\hat\theta_i)
= \text{diag}\,\mathbb{V}_\theta (\hat\theta).
$$
In particular for the MLE $\hat\theta_{MLE} = X \sim N(\theta, I)$,
which is an unbiased estimator of $\theta$,
$$
R(\theta, \hat\theta_{MLE})
= \text{diag}\, \mathbb{V}_\theta (X)
= \text{diag}\, I
= n.
$$
We thus expect
$$
R^*(\theta, \hat\theta_{MLE}) \to n
\text{ as } B\to\infty.
$$

In [8]:
risk_est = risk_estimate(theta=np.zeros(17), estimator=mle, B=int(1e6))
print(
    f"When n=17 and B=10,000 the MLE risk estimate is {risk_est:.3f}."
)

When n=17 and B=10,000 the MLE risk estimate is 16.998.


## Computing both risks

In [9]:
def risks(theta, B):
    mle_risk_est = risk_estimate(theta, mle, B)
    js_risk_est = risk_estimate(theta, compute_js_est, B)
    return mle_risk_est, js_risk_est

def report_result(risks):
    mle_risk, js_risk = risks
    print(
        f"Risk of the MLE:                   {mle_risk:.3f}\n"
        f"Risk of the James-Stein estimator: {js_risk:.3f}"
    )
    
def run_experiment(n, Delta, B):
    print(
        "Experiment settings: "
        f"(n={n}; Delta={Delta}, B=10^{np.log10(B):.0f})"
    )
    report_result(risks(
        theta=np.random.uniform(low=-Delta, high=Delta, size=n),
        B=int(1e5)
    ))

## Run experiments
Remember that the theoretical value of the risk of the MLE is $n$.
(This can be used to gauge whether $B$ is sufficiently large to
ensure an accurate approximation of the risk of the James-Stein estimator.)

### n = 10

In [10]:
run_experiment(n=10, Delta=0.001, B=int(1e6))

Experiment settings: (n=10; Delta=0.001, B=10^6)
Risk of the MLE:                   9.998
Risk of the James-Stein estimator: 1.266


In [11]:
run_experiment(n=10, Delta=0.5, B=int(1e6))

Experiment settings: (n=10; Delta=0.5, B=10^6)
Risk of the MLE:                   9.998
Risk of the James-Stein estimator: 1.838


In [12]:
run_experiment(n=10, Delta=1, B=int(1e6))

Experiment settings: (n=10; Delta=1, B=10^6)
Risk of the MLE:                   9.996
Risk of the James-Stein estimator: 2.642


In [13]:
run_experiment(n=10, Delta=3, B=int(1e6))

Experiment settings: (n=10; Delta=3, B=10^6)
Risk of the MLE:                   10.017
Risk of the James-Stein estimator: 8.214


In [14]:
run_experiment(n=10, Delta=5, B=int(1e6))

Experiment settings: (n=10; Delta=5, B=10^6)
Risk of the MLE:                   9.999
Risk of the James-Stein estimator: 9.349


In [15]:
run_experiment(n=10, Delta=100, B=int(1e6))

Experiment settings: (n=10; Delta=100, B=10^6)
Risk of the MLE:                   10.027
Risk of the James-Stein estimator: 10.004


### n = 50

In [16]:
run_experiment(n=50, Delta=0.001, B=int(1e5))

Experiment settings: (n=50; Delta=0.001, B=10^5)
Risk of the MLE:                   49.973
Risk of the James-Stein estimator: 1.097


In [17]:
run_experiment(n=50, Delta=0.5, B=int(1e5))

Experiment settings: (n=50; Delta=0.5, B=10^5)
Risk of the MLE:                   49.947
Risk of the James-Stein estimator: 4.198


In [18]:
run_experiment(n=50, Delta=1, B=int(1e5))

Experiment settings: (n=50; Delta=1, B=10^5)
Risk of the MLE:                   49.989
Risk of the James-Stein estimator: 13.881


In [19]:
run_experiment(n=50, Delta=3, B=int(1e5))

Experiment settings: (n=50; Delta=3, B=10^5)
Risk of the MLE:                   50.024
Risk of the James-Stein estimator: 39.159


In [20]:
run_experiment(n=50, Delta=5, B=int(1e5))

Experiment settings: (n=50; Delta=5, B=10^5)
Risk of the MLE:                   50.001
Risk of the James-Stein estimator: 44.760


In [21]:
run_experiment(n=50, Delta=100, B=int(1e5))

Experiment settings: (n=50; Delta=100, B=10^5)
Risk of the MLE:                   50.006
Risk of the James-Stein estimator: 49.996


### n = 200

In [22]:
run_experiment(n=200, Delta=0.001, B=int(1e5))

Experiment settings: (n=200; Delta=0.001, B=10^5)
Risk of the MLE:                   200.040
Risk of the James-Stein estimator: 1.062


In [23]:
run_experiment(n=200, Delta=0.5, B=int(1e5))

Experiment settings: (n=200; Delta=0.5, B=10^5)
Risk of the MLE:                   200.042
Risk of the James-Stein estimator: 17.554


In [24]:
run_experiment(n=200, Delta=1, B=int(1e5))

Experiment settings: (n=200; Delta=1, B=10^5)
Risk of the MLE:                   199.881
Risk of the James-Stein estimator: 51.554


In [25]:
run_experiment(n=200, Delta=3, B=int(1e5))

Experiment settings: (n=200; Delta=3, B=10^5)
Risk of the MLE:                   199.900
Risk of the James-Stein estimator: 154.489


In [26]:
run_experiment(n=200, Delta=5, B=int(1e5))

Experiment settings: (n=200; Delta=5, B=10^5)
Risk of the MLE:                   200.070
Risk of the James-Stein estimator: 177.273


In [27]:
run_experiment(n=200, Delta=100, B=int(1e5))

Experiment settings: (n=200; Delta=100, B=10^5)
Risk of the MLE:                   199.972
Risk of the James-Stein estimator: 199.835


## Summarize experiments
Since the theoretical value of the risk of the MLE is $n$,
in this section we only compute the risk of the James-Stein operator (by simulation).

In [31]:
class Experiment():
    
    def __init__(self, n, delta, B):
        self.n = n
        self.delta = delta
        self.B = B
        self.has_run = False
        self.js_est = None
        
    def run(self):
        self.js_est = risk_estimate(
            theta=np.random.uniform(
                low=-self.delta,
                high=self.delta,
                size=self.n
            ),
            estimator=compute_js_est,
            B=B
        )
        self.has_run = True
        
# Auxiliary functions to tabulate the results

def table_row(delta, n_list, experiment_list):
    return (
        [
            f"Delta = {delta}",
        ]
        + [
            f"{experiment_list[n_list.index(n)][delta_list.index(delta)].js_est:.1f}"
            for n in n_list
        ]
    )
    
def full_table(n_list, delta_list, experiment_list):
    return (
        [
            ['MLE risk (theory)'] +  list(n_list),
        ]
        + [
            table_row(delta, n_list, experiment_list)
            for delta in delta_list
        ]
    )

In [32]:
# Parameters to run through
B = int(1e4)
n_list = (10, 50, 200)
delta_list = (0.001, 0.5, 1, 3, 5, 100)

# Populate the experiment list
experiment_list = [[Experiment(n, delta, B) for delta in delta_list] for n in n_list]

In [33]:
# Run all of the experiments
for experiment_sublist in experiment_list:
    for experiment in experiment_sublist:
        experiment.run()

In [34]:
# Table the results
print(tabulate(
    full_table(n_list, delta_list, experiment_list),
    headers = [''] +[f'n={n}' for n in n_list]
))

                     n=10    n=50    n=200
-----------------  ------  ------  -------
MLE risk (theory)    10      50      200
Delta = 0.001         1.3     1.1      1
Delta = 0.5           2.1     5.1     17.4
Delta = 1             4.3    12.1     55.7
Delta = 3             8.3    38.5    150.8
Delta = 5             9.4    45.1    179.5
Delta = 100          10      50.1    199.9


## Conclusion
When the parameters are concentrated near zero,
the James-Stein estimator has a much lower risk
(approaching one) than the MLE.
This remains true even for a large number of
samples/parameters.

As the parameters stray further away from zero,
however, the James-Stein estimator's risk increases
(albeit always bounded above by the
risk of the MLE, as predicted by theory).