In [24]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm

# Part (a)

The MLE for $\psi=p_2-p_1$ is $\hat \psi =\hat p_2 - \hat p_1$ where $\hat p_i = X_i/n$ where $X_i$ is the number of patients with improvement in group $i$ and $n=50$ is the total number of patients in each group (see Chapter 9, Exercise 7).

In [25]:
n = 50

In [26]:
X_1 = 30

In [27]:
X_2 = 40

In [28]:
p_1_hat = X_1 / n

In [29]:
p_2_hat = X_2 / n

In [30]:
psi_hat = p_2_hat - p_1_hat

In [31]:
print(f'MLE for p_2 - p_1: {psi_hat:.4f}')

MLE for p_2 - p_1: 0.2000


We use the multiparameter Delta method to estimate the standard error. We have $\psi=g(p_1,p_2)=p_2-p_1$ and $\nabla g = (-1,1)^T$. Consider the function $\ell = \sum_{i=1}^2 \log \left( \binom{n}{X_i} p_i^{X_i} (1-p_i)^{n-X_i}\right)$. Then $\partial l / \partial p_i \partial p_j = 0$ for $i\neq j$ and
$$\frac{\partial^2 \ell}{\partial p_i^2} = -\frac{np_i}{p_i^2} - \frac{n-np_i}{(1-p_i)^2} \text{ and }
\mathbb{E}_{(p_1,p_2)}(\partial^2 \ell/ \partial p_i^2) = -\frac{n}{p_i(1-p_i)}.$$ Thus, the Fisher information is
$$I(p_1,p_2)=n \begin{pmatrix} \frac{1}{p_1(1-p_1)} & 0 \\ 0 & \frac{1}{p_2(1-p_2)}\end{pmatrix} \text{ with inverse }
J(p_1,p_2)=\frac{1}{n} \begin{pmatrix} p_1(1-p_1) & 0 \\ 0 & p_2(1-p_2)\end{pmatrix}.$$
By the multiparameter delta method,
$$\hat{\mathrm{se}}(\hat \psi)=\frac{1}{\sqrt{n}}\sqrt{\hat p_1(1 - \hat p_1) + \hat p_2 (1 - \hat p_2)}.$$

In [32]:
se_hat = n**(-1/2) * (p_1_hat * (1 - p_1_hat) + p_2_hat * (1 - p_2_hat))**(1/2)

In [33]:
print(f'Estimated standard error using delta method: {se_hat:.4f}')

Estimated standard error using delta method: 0.0894


Now we use this to estimate a 90% confidence interval.

In [39]:
alpha = 0.1

In [40]:
z = norm.ppf(1 - alpha/2)

In [41]:
lower = psi_hat - z * se_hat

In [42]:
upper = psi_hat + z * se_hat

In [43]:
print(f'Estimated 90% confidence interval: ({lower:.4f}, {upper:.4f})')

Estimated 90% confidence interval: (0.0529, 0.3471)


# Part (b)

Now we estimate the standard error using the parametric pootstrap. We sample from the distributions $\mathrm{Bern}(\hat p_1)$ and $\mathrm{Bern}(\hat p_2)$ to create each bootstrap sample.

In [46]:
B = int(1e5)

In [47]:
bootstrap = np.empty(B)

In [48]:
for i in range(B):
    X_1_star = np.random.binomial(n, p_1_hat)
    X_2_star = np.random.binomial(n, p_2_hat)
    p_1_star = X_1_star / n
    p_2_star = X_2_star / n
    psi_star = p_2_star - p_1_star
    bootstrap[i] = psi_star

In [50]:
se_boot = np.sqrt(((bootstrap - psi_hat)**2).sum() / B)

In [51]:
print(f'Bootstrap standard error: {se_boot:.4f}')

Bootstrap standard error: 0.0893


In [53]:
lower = psi_hat - z * se_boot

In [54]:
upper = psi_hat + z * se_boot

In [55]:
print(f'Estimated 90% confidence interval: ({lower:.4f}, {upper:.4f})')

Estimated 90% confidence interval: (0.0531, 0.3469)


# Part (c)

# Part (d)

# Part (e)