In [None]:
import matplotlib.pyplot as plt
import numpy as np
from typing import List, Tuple
from scipy.interpolate import interp1d
from scipy.special import comb

(a) Hardy-Weinberg is trinomial with probabilities $(\theta^2, 2 \theta (1-\theta), (1-\theta)^2)$. So the likelihood function is given by:

$$L(\theta) = {n_1 + n_2 + n_3 \choose n_1} {n_2 + n_3 \choose n_2} {n_3 \choose n_3} \theta^{2n_1} (2 \theta (1-\theta))^{n_2} (1-\theta)^{2n_3} $$

So the log-likelihood is given by:

$$\log L(\theta) = constant + 2n_1 \log \theta + n_2 \log \theta + n_2 \log (1-\theta) + 2n_3 \log (1-\theta) $$

Then the MLE of $\theta$ is given by the solution to the score equation:

\begin{align}
S(\theta) = 0 & = \frac{\partial}{\partial \theta} \log L(\theta) \\
& = \frac{2n_1 + n_2}{\theta} - \frac{n_2 + 2n_3}{1-\theta} \\
\end{align}

Solving for $\theta$ we have that the MLE is given by:

$$\hat{\theta} = \frac{2n_1 + n_2}{2n_1 + 2n_2 + 2n_3}$$

And the Fisher information is given by:

\begin{align}
I(\theta) & = - \frac{\partial}{\partial \theta^2} \log L(\theta) \\
& = \frac{2n_1 + n_2}{\theta^2} + \frac{n_2 + 2n_3}{(1-\theta)^2} \\
\end{align}

(b) Given $n_1 = 125$, $n_2 = 34$ and $n_3 = 10$ we have:

In [None]:
def likelihood_hw(theta: List[float], n1: int, n2: int, n3: int) -> float:
    AA = theta ** (2*n1)
    Aa = (2 * theta* (1 - theta)) ** n2
    aa = (1 - theta) ** (2*n3)
    like = comb(n1+n2+n3, n1) * comb(n2+n3, n2) * comb(n3, n3) * AA * Aa * aa
    return like / np.max(like)

def mle_hw(n1: int, n2: int, n3: int) -> float:
    mle = (2*n1 + n2) / (2*n1 + 2*n2 + 2*n3)
    return mle

def standard_error(theta_hat: float, n1: int, n2: int, n3: int) -> float:
    obs_fisher_info = (2*n1 + n2)/theta_hat**2 + (n2 + 2*n3)/(1 - theta_hat)**2
    se = 1 / np.sqrt(obs_fisher_info)
    return se
    

def plot_log_likelihood(theta: List[float], likelihood: List[float]):
    plt.plot(theta, likelihood)
    plt.title('Hardy-Weinberg law likelihood')
    plt.xlabel(r'$\theta$')
    plt.ylabel('Likelihood');

In [None]:
theta = np.linspace(0, 1, 100)
n1 = 125
n2 = 34
n3 = 10
likelihood = likelihood_hw(theta, n1, n2, n3)
plot_log_likelihood(theta, likelihood)
mle = round(mle_hw(n1, n2, n3), 2)
print(f"MLE = {mle}")
se = round(standard_error(mle, n1, n2, n3), 2)
print(f"standard error = {se}")

(c) Comparing the 95% like-likelihood based interval with the Wald interval we see that they are the same.

In [None]:
def likelihood_interval(theta: List[float],
                        likelihood: List[float],
                        cutoff: float) -> Tuple[float, float]:
    # intersection points occur below and above the maximum likelihood estimate
    mle_index = np.argmax(likelihood)
    interp_below_max = interp1d(likelihood[:mle_index], theta[:mle_index])
    interp_above_max = interp1d(likelihood[mle_index:], theta[mle_index :])
    lower_int = np.round(interp_below_max(cutoff).flatten()[0], 2)
    upper_int = np.round(interp_above_max(cutoff).flatten()[0], 2)
    return (lower_int, upper_int)

In [None]:
c = 0.15 # 95% likelihood interval
print(f'95% likelihood interval (c = {c}) for θ is {likelihood_interval(theta, likelihood, c)}') 

In [None]:
print(f'95% Wald confidence interval for θ is {(round(mle - 1.96 * se, 2), round(mle + 1.96 * se, 2))}') 

(d) Hardy-Weinberg is now binomial with probabilities $(1-(1-\theta)^2), (1-\theta)^2)$. So the likelihood function is given by:

$$L(\theta) = {n_1 + n_2 + n_3 \choose n_1 + n_2} {n_3 \choose n_3} (\theta (2-\theta))^{n_1 + n_2} (1-\theta)^{2n_3} $$

Note that the constant term can be dropped here like above without changing the likelihood. The above steps would then be repeated.