
## Bootstrap Confidence Intervals (Simulation Example)

This section illustrates how bootstrap confidence intervals can outperform
classical CLT-based intervals when assumptions (normality, large $n$) fail.


In [None]:

import numpy as np
import matplotlib.pyplot as plt

rng = np.random.default_rng(2026)



### Skewed population and small sample


In [None]:

population = rng.exponential(scale=1.0, size=10_000)

n = 15
sample = rng.choice(population, size=n, replace=False)

np.mean(sample), np.median(sample)



### Classical CLT confidence interval for the mean


In [None]:

alpha = 0.05
z = 1.96

xbar = np.mean(sample)
s = np.std(sample, ddof=1)

ci_clt = (
    xbar - z * s / np.sqrt(n),
    xbar + z * s / np.sqrt(n)
)

ci_clt



### Bootstrap percentile confidence interval


In [None]:

B = 10_000
boot_means = np.empty(B)

for b in range(B):
    boot_sample = rng.choice(sample, size=n, replace=True)
    boot_means[b] = np.mean(boot_sample)

ci_boot = np.percentile(boot_means, [2.5, 97.5])
ci_boot



### Bootstrap sampling distribution


In [None]:

plt.hist(boot_means, bins=40, density=True)
plt.axvline(ci_boot[0])
plt.axvline(ci_boot[1])
plt.axvline(xbar)
plt.title("Bootstrap Sampling Distribution of the Mean")
plt.show()



### Ground truth comparison (instructor check)


In [None]:

true_mean = np.mean(population)

(
    true_mean >= ci_clt[0] and true_mean <= ci_clt[1],
    true_mean >= ci_boot[0] and true_mean <= ci_boot[1],
)



### Bootstrap confidence interval for the median
(No simple CLT-based interval available)


In [None]:

boot_medians = np.empty(B)

for b in range(B):
    boot_sample = rng.choice(sample, size=n, replace=True)
    boot_medians[b] = np.median(boot_sample)

np.percentile(boot_medians, [2.5, 97.5])
