In [None]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import probplot, tvar

In [None]:
data = [82, 59, 59, 57, 39, 38, 22, 16, 10, 10, 10, 10, 
        10, 9, 9, 8, 5, 5, 5, 4, 4, 2, 2, 1, 0.7, 0.4, 0.4]

# the r file has one of the 9's replaced by an 8 which is not what is in the book

In [None]:
def qq_plot(data: np.ndarray, title: str, ylabel: str, ylim: tuple) -> None:
    probplot(data, dist='norm', fit=False, plot=plt)
    plt.title(title)
    plt.xlabel('Quantiles of standard normal')
    plt.ylabel(ylabel)
    plt.ylim(ylim)
    plt.show()

In [None]:
qq_plot(data, '(a) Population data', 'y', (-2, 85))

In [None]:
def log_likelihood(lambda_: np.ndarray, data: np.ndarray) -> np.ndarray:
    log_like = []
    n = len(data)
    for lam in lambda_:
        y_lambda = (data**lam - 1) / lam
        mu_hat = (1 / n) * np.sum(y_lambda)
        sigma_sq_hat = tvar(y_lambda) * (n - 1) / n 
        ll = (-n / 2) * np.log(sigma_sq_hat) + (lam - 1) * np.sum(np.log(data))
        log_like.append(ll)
    log_like = log_like - np.max(log_like)
    return np.array(log_like)

In [None]:
lambda_ = np.linspace(0.01, 1.5, num=40)
log_like = log_likelihood(lambda_, data)
lambda_hat = lambda_[np.argmax(log_like)]
# not sure why the book says 0.12 as this appears to be correct when inspecting the
# log likelihood function
print('lambda_hat = ', np.round(lambda_hat, 2))

In [None]:
def plot_profile_likelihood(
    lambda_: np.ndarray,
    log_likelihood: np.array,
    mle_lambda: float) -> None:
    plt.plot(lambda_, log_likelihood)
    plt.axvline(x=mle_lambda, linewidth=1)
    plt.xlabel(r'$\lambda$')
    plt.ylabel('Log-likelihood')
    plt.title(r'(b) Profile likelihood of $\lambda$');

In [None]:
plot_profile_likelihood(lambda_, log_like, lambda_hat)

In [None]:
data_sqrt = np.sqrt(data)
qq_plot(data_sqrt, '(c) Square-root transform', r'$\sqrt{y}$',
        (np.min(data_sqrt) - 0.3, np.max(data_sqrt) + 0.3))

In [None]:
data_log = np.log(data)
qq_plot(data_log, '(d) Log-transform', r'$\log{y}$',
        (np.min(data_log) - 0.3, np.max(data_log) + 0.3))