In [None]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import minimize

In Example 3.7 it was already shown what the Fisher information matrix for an $x_1, .. x_n$ iid sample from $N(\mu, \sigma^2)$ is, so no need to repeat that here.

To check the quadratic approximation for the profile likelihood of $\mu$ and $\sigma^2$ we must plot and compare the profile likelihoods given in Example 3.10:

$$
L(\mu) \propto (\sigma_{\mu}^2)^{-n/2}
$$

where:

$$
\sigma_{\mu}^2 = \frac{1}{n} \sum_i (x_i - \mu)^2
$$

and:

$$
L(\sigma^2) \propto (\sigma^2)^{-n/2} \exp \{-n \hat{\sigma}^2 / (2 \sigma^2) \}
$$

where:

$$
\hat{\sigma}^2 = \frac{1}{n} \sum_{i=1}^n (x_i - \bar{x})^2
$$

against the quadratic approximations given by:

$$
log L(\theta_i) \approx -\frac{1}{2}(I^{ii})^{-1} (\theta_i - \hat{\theta}_i)^2
$$

where $\theta = (\mu, \sigma^2)$.

In [None]:
data = np.array([0.88, 1.07, 1.27, 1.54, 1.91, 2.27, 3.84, 4.50, 4.64, 9.41])

In [None]:
n = len(data)
mu_hat = np.mean(data)
sigma_hat_sq = (1 / n) * np.sum((data - mu_hat)**2)
np.round(mu_hat, 2), np.round(sigma_hat_sq, 2)

In [None]:
I = np.array([[n / sigma_hat_sq, 0],
              [0, n / (2 * sigma_hat_sq**2)]])
I_inv = np.linalg.inv(I)
display(I)
display(I_inv)

In [None]:
def approx_likelihood(
    parameter: float, parameter_mle: float, inv_fisher_info: float) -> np.ndarray:
    neg_log_like = approx_neg_log_likelihood(parameter, parameter_mle, inv_fisher_info)
    like = np.exp(-neg_log_like)
    like /= np.max(like)
    return like

def approx_neg_log_likelihood(
    parameter: float, parameter_mle: float, inv_fisher_info: float) -> float:
    log_like = -0.5 * (1 / inv_fisher_info) * (parameter - parameter_mle)**2
    neg_log_like = - log_like
    return neg_log_like

In [None]:
mu = np.linspace(np.min(data), np.max(data), 100)
mu_approx_like = approx_likelihood(mu, mu_hat, I_inv[0, 0])
sigma_sq = np.linspace(-3, 3, 100)**2
sigma_sq = sigma_sq[sigma_sq > 0.04] # prevent divide by zero in log
sigma_sq_approx_like = approx_likelihood(sigma_sq, sigma_hat_sq, I_inv[1, 1])

In [None]:
def profile_likelihood_mu(mu: np.ndarray, data: np.ndarray) -> np.ndarray:
    like = []
    for m in mu:
        neg_log_like = profile_neg_log_likelihood_mu(m, data)
        like.append(np.exp(-neg_log_like))
    like /= np.max(like)
    like = np.array(like)
    return like

def profile_neg_log_likelihood_mu(mu: float, data: np.ndarray) -> float:
    n = len(data)
    sigma_mu_sq = (1 / n) * np.sum((data - mu)**2)
    like = sigma_mu_sq **(-n / 2)
    neg_log_like = -np.log(like)
    return neg_log_like

In [None]:
mu_profile_like = profile_likelihood_mu(mu, data)

In [None]:
def profile_likelihood_sigma_sq(
    sigma_sq: np.ndarray, sigma_sq_hat: float, n: int) -> np.ndarray:
    like = []
    for ss in sigma_sq:
        neg_log_like = profile_neg_log_likelihood_sigma_sq(ss, sigma_sq_hat, n)
        like.append(np.exp(-neg_log_like))
    like /= np.max(like)
    like = np.array(like)
    return like

def profile_neg_log_likelihood_sigma_sq(
    sigma_sq: float, sigma_hat_sq: float, n: int) -> float:
    like = np.exp((-n * sigma_hat_sq) / (2 * sigma_sq)) * sigma_sq**(-n/2)
    neg_log_like = -np.log(like)
    return neg_log_like

In [None]:
sigma_sq_profile_like = profile_likelihood_sigma_sq(sigma_sq, sigma_hat_sq, n)

In [None]:
def plot_likelihoods(parameter, profile_like, approx_like) -> None:
    plt.plot(parameter, approx_like, label='quadratic approx')
    plt.plot(parameter, profile_like, label='profile likelihood')
    plt.title('Likelihood')
    plt.xlabel(r'$\mu$')
    plt.ylabel('Likelihood')
    plt.legend();

In [None]:
plot_likelihoods(mu, mu_profile_like, mu_approx_like)

The quadratic approximation for the profile likelihood of $\mu$ looks good.

In [None]:
plot_likelihoods(sigma_sq, sigma_sq_profile_like, sigma_sq_approx_like)

The quadratic approximation for the profile likelihood of $\sigma^2$ is not so good apart from in the vicinity of the MLE estimate. The approximation is worse at lower values of $\sigma^2$.

Now we are asked to check that the curvatures at the maximum correspond to the appropriate entries of the observed Fisher information matrix. To do this it is necessary to find the Hessian term at the maximum of these 4 functions. Below we can see that the values match up nicely.

In [None]:
x0=3.0
mu_mle_approx = minimize(approx_neg_log_likelihood, args=(mu_hat, I_inv[0, 0]), x0=x0)
mu_mle_profile = minimize(profile_neg_log_likelihood_mu, args=(data), x0=x0)

In [None]:
(np.round(mu_mle_approx.hess_inv[0][0], 2), np.round(mu_mle_profile.hess_inv[0][0], 2),
np.round(I_inv[0, 0], 2))

In [None]:
x0=6.0
sigma_sq_mle_approx = minimize(approx_neg_log_likelihood,
                               args=(sigma_hat_sq, I_inv[1, 1]), x0=x0)
sigma_sq_mle_profile = minimize(profile_neg_log_likelihood_sigma_sq,
                                args=(sigma_hat_sq, n), x0=x0)

In [None]:
(np.round(sigma_sq_mle_approx.hess_inv[0][0], 2),
 np.round(sigma_sq_mle_profile.hess_inv[0][0], 2), np.round(I_inv[1, 1], 2))