In [57]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm, beta

# Part (a)

The MLE for $\psi=p_2-p_1$ is $\hat \psi =\hat p_2 - \hat p_1$ where $\hat p_i = X_i/n$ where $X_i$ is the number of patients with improvement in group $i$ and $n=50$ is the total number of patients in each group (see Chapter 9, Exercise 7).

Now let's estimate the standard error and a confidence interval.

In [58]:
n = 50

In [59]:
X_1 = 30

In [60]:
X_2 = 40

In [61]:
p_1_hat = X_1 / n

In [62]:
p_2_hat = X_2 / n

In [63]:
psi_hat = p_2_hat - p_1_hat

In [64]:
print(f'MLE for p_2 - p_1: {psi_hat:.4f}')

MLE for p_2 - p_1: 0.2000


We use the multiparameter Delta method to estimate the standard error. We have $\psi=g(p_1,p_2)=p_2-p_1$ and $\nabla g = (-1,1)^T$. Consider the function $\ell = \sum_{i=1}^2 \log \left( \binom{n}{X_i} p_i^{X_i} (1-p_i)^{n-X_i}\right)$. Then $\partial l / \partial p_i \partial p_j = 0$ for $i\neq j$ and
$$\frac{\partial^2 \ell}{\partial p_i^2} = -\frac{np_i}{p_i^2} - \frac{n-np_i}{(1-p_i)^2} \text{ and }
\mathbb{E}_{(p_1,p_2)}(\partial^2 \ell/ \partial p_i^2) = -\frac{n}{p_i(1-p_i)}.$$ Thus, the Fisher information is
$$I(p_1,p_2)=n \begin{pmatrix} \frac{1}{p_1(1-p_1)} & 0 \\ 0 & \frac{1}{p_2(1-p_2)}\end{pmatrix} \text{ with inverse }
J(p_1,p_2)=\frac{1}{n} \begin{pmatrix} p_1(1-p_1) & 0 \\ 0 & p_2(1-p_2)\end{pmatrix}.$$
By the multiparameter delta method,
$$\hat{\mathrm{se}}(\hat \psi)=\frac{1}{\sqrt{n}}\sqrt{\hat p_1(1 - \hat p_1) + \hat p_2 (1 - \hat p_2)}.$$

In [65]:
se_hat = n**(-1/2) * (p_1_hat * (1 - p_1_hat) + p_2_hat * (1 - p_2_hat))**(1/2)

In [66]:
print(f'Estimated standard error using delta method: {se_hat:.4f}')

Estimated standard error using delta method: 0.0894


Now we use this to estimate a 90% confidence interval.

In [67]:
alpha = 0.1

In [68]:
z = norm.ppf(1 - alpha/2)

In [69]:
lower = psi_hat - z * se_hat

In [70]:
upper = psi_hat + z * se_hat

In [71]:
print(f'Estimated 90% confidence interval: ({lower:.4f}, {upper:.4f})')

Estimated 90% confidence interval: (0.0529, 0.3471)


# Part (b)

Now we estimate the standard error using the parametric bootstrap. We sample from the distributions $\mathrm{Bern}(\hat p_1)$ and $\mathrm{Bern}(\hat p_2)$ to create each bootstrap sample.

In [72]:
B = int(1e5)

In [73]:
# The bootstrap estimates of tau
bootstrap = np.empty(B)

In [74]:
for i in range(B):
    X_1_star = np.random.binomial(n, p_1_hat)
    X_2_star = np.random.binomial(n, p_2_hat)
    p_1_star = X_1_star / n
    p_2_star = X_2_star / n
    psi_star = p_2_star - p_1_star
    bootstrap[i] = psi_star

In [75]:
# Standard error of the bootstrap estimates
se_boot = np.sqrt(((bootstrap - psi_hat)**2).sum() / B)

In [76]:
print(f'Bootstrap standard error: {se_boot:.4f}')

Bootstrap standard error: 0.0895


In [77]:
lower = psi_hat - z * se_boot

In [78]:
upper = psi_hat + z * se_boot

In [79]:
print(f'Estimated 90% confidence interval: ({lower:.4f}, {upper:.4f})')

Estimated 90% confidence interval: (0.0528, 0.3472)


# Part (c)

Now we estimate the posterior mean and a posterior confidence interval by sampling. The posterior distribution $p_i|x_i$ is $\mathrm{Beta}(x_i+1,n-x_i+1)$. Thus we sample from a $\mathrm{Beta}(x_i+1,n-x_i+1)$ for each $i$ and estimate the posterior mean of $\tau=p_2-p_1$ by transforming the two samples.

In [80]:
B = int(1e5)

In [81]:
P_1 = beta.rvs(a=X_1+1, b=n-X_1+1, size=B)

In [82]:
P_2 = beta.rvs(a=X_2+1, b=n-X_2+1, size=B)

In [83]:
Tau = P_2 - P_1

In [84]:
tau_hat = Tau.mean()

In [85]:
print(f'Estimate of posterior mean from sampling: {tau_hat:.4f}')

Estimate of posterior mean from sampling: 0.1923


In [86]:
lower = np.quantile(Tau, alpha / 2)

In [87]:
upper = np.quantile(Tau, 1 - alpha / 2)

In [88]:
print(f'Estimated posterior confidence interval from sampling: ({lower:.4f}, {upper:.4f})')

Estimated posterior confidence interval from sampling: (0.0461, 0.3347)


# Part (d)

By equivariance, the MLE of $\psi$ is
$$\hat \psi = \log\left(\frac{\hat p_1}{1-\hat p_1} \div \frac{\hat p_2}{1-\hat p_2}\right)$$ 
where $\hat p_i = X_i / n$ is the MLE for $p_i$. Setting $g(p_1,p_2)=\psi$, we have $\nabla g = \left(\frac{1}{p_1(1-p_1)}, -\frac{1}{p_2(1-p_2)}\right)^T$. The Fisher information is
$$I(p_1,p_2)=n \begin{pmatrix} \frac{1}{p_1(1-p_1)} & 0 \\ 0 & \frac{1}{p_2(1-p_2)}\end{pmatrix} \text{ with inverse }
J(p_1,p_2)=\frac{1}{n} \begin{pmatrix} p_1(1-p_1) & 0 \\ 0 & p_2(1-p_2)\end{pmatrix}.$$ Thus, an estimate of the standard error is
$$\hat{\mathrm{se}} = \sqrt{(\hat \nabla g)^T \hat J (\hat \nabla g)} = \sqrt{\frac{1}{n}\left(\frac{1}{p_1(1-p_1)} + \frac{1}{p_2(1-p_2)}\right)}.$$
We compute this for the given values of $X_1,X_2$ below.

In [91]:
psi_hat = np.log(p_1_hat / (1 - p_1_hat)) - np.log(p_2_hat / (1 - p_2_hat))

In [92]:
print(f'MLE of psi: {psi_hat:.4f}')

MLE of psi: -0.9808


In [93]:
se_hat = (1 / (n * p_1_hat * (1 - p_1_hat)) + 1 / (n * p_2_hat * (1 - p_2_hat)))**(1/2) 

In [94]:
lower = psi_hat - z * se_hat

In [95]:
upper = psi_hat + z * se_hat

In [96]:
print(f'90% confidence interval for psi: ({lower:.4f}, {upper:.4f})')

90% confidence interval for psi: (-1.7316, -0.2301)


# Part (e)

Finally, we estimate the posterior mean and a posterior confidence interval for $\psi$ by sampling. The posterior distribution $p_i|x_i$ is $\mathrm{Beta}(x_i+1,n-x_i+1)$. Thus we sample from a $\mathrm{Beta}(x_i+1,n-x_i+1)$ for each $i$ and estimate the posterior mean of $\psi=\log(p_1 / (1-p_1) \div p_2 / (1-p_2))$ by transforming the two samples.

In [97]:
P_1 = beta.rvs(a=X_1+1, b=n-X_1+1, size=B)

In [98]:
P_2 = beta.rvs(a=X_2+1, b=n-X_2+1, size=B)

In [99]:
Psi = np.log(P_1 / (1 - P_1)) - np.log(P_2 / (1 - P_2))

In [100]:
psi_hat = Psi.mean()

In [101]:
print(f'Estimate of posterior mean from sampling: {psi_hat:.4f}')

Estimate of posterior mean from sampling: -0.9518


In [102]:
lower = np.quantile(Psi, alpha / 2)

upper = np.quantile(Psi, 1 - alpha / 2)

print(f'Estimated posterior confidence interval from sampling: ({lower:.4f}, {upper:.4f})')

Estimated posterior confidence interval from sampling: (-1.6998, -0.2212)


We see that in all cases the posterior mean and confidence interval match closely whether obtained via sampling or via the delta method.