In [1]:
import numpy as np
import random
import scipy.stats

### Generate the data.

In [2]:
def generate_data(n=25):
    return scipy.stats.t(df=3).rvs(size=n)

### Estimate the statistic of interest.

In [3]:
def estimator(data):
    return (np.quantile(data, q=0.75) - np.quantile(data, q=0.25))/1.34

##### Sanity check: when the sample size is sufficiently large the estimator approaches the true value.

In [4]:
true_distribution = scipy.stats.t(df=3)
true_value = (true_distribution.ppf(0.75) - true_distribution.ppf(0.25))/1.34

n = int(1e5)
estimate = estimator(generate_data(n=n))

print(
    f"The true value is: {true_value:.3f}.\n"
    f"The estimate is:   {estimate:.3f}."
)

The true value is: 1.142.
The estimate is:   1.148.


### Bootstrap replications
Now passing the *estimator* as an *argument* (instead of being hard-coded in).

In [5]:
def bootstrap_resample(data):
    return random.choices(population=data, k=len(data))

def bootstrap_replications(data=generate_data(n=25), estimator=estimator, B=100):
    return [estimator(bootstrap_resample(data)) for _ in range(B)]

### Automated production of the confidence intervals
Once again: now passing the *estimator* as an *argument* (instead of being hard-coded in).

In [6]:
def bootstrap_confidence_intervals(data=generate_data(n=50), estimator=estimator, B=100, alpha=0.05):
    
    z = scipy.stats.norm.isf(alpha/2)

    estimate = estimator(data=data)

    replicated_data = bootstrap_replications(data=data, estimator=estimator, B=B)
    se_boot_estimate = np.std(replicated_data)

    lower_bound_normal = estimate - z*se_boot_estimate
    upper_bound_normal = estimate + z*se_boot_estimate
    
    lower_bound_pivotal = 2*estimate - np.quantile(replicated_data, q=1-alpha/2)
    upper_bound_pivotal = 2*estimate - np.quantile(replicated_data, q=alpha/2)
    
    lower_bound_percentile = np.quantile(replicated_data, q=alpha/2)
    upper_bound_percentile = np.quantile(replicated_data, q=1-alpha/2)
    
    return {
        "normal": (lower_bound_normal, upper_bound_normal),
        "pivotal": (lower_bound_pivotal, upper_bound_pivotal),
        "percentile": (lower_bound_percentile, upper_bound_percentile)
    }

In [7]:
confidence_intervals = bootstrap_confidence_intervals(
    data=generate_data(n=25),
    estimator=estimator,
    B=int(1e3),
    alpha=0.05
)

print(
    "95% bootstrap Normal confidence interval:    "
    f"({confidence_intervals["normal"][0]:.3f}, {confidence_intervals["normal"][1]:.3f})\n"
    "95% bootstrap pivotal confidence interval:   "
    f"({confidence_intervals["pivotal"][0]:.3f}, {confidence_intervals["pivotal"][1]:.3f})\n"
    "95% bootstrap percentile confidence interval:"
    f"({confidence_intervals["percentile"][0]:.3f}, {confidence_intervals["percentile"][1]:.3f})\n"
)

95% bootstrap Normal confidence interval:    (0.062, 1.170)
95% bootstrap pivotal confidence interval:   (0.021, 0.955)
95% bootstrap percentile confidence interval:(0.277, 1.211)



### Empirical estimation of the coverage & length

In [129]:
true_distribution = scipy.stats.t(df=3)
true_value = (true_distribution.ppf(0.75) - true_distribution.ppf(0.25))/1.34

# Auxiliary functions used in the many trial run below
def in_interval(number, interval):
    lower_bound, upper_bound = interval
    return lower_bound < number < upper_bound

def length_of_interval(interval):
    lower_bound, upper_bound = interval
    return upper_bound - lower_bound

# Initialize coverage_count, coverage_estimate, and interval_lenghts
coverage_count = {"normal": 0, "pivotal": 0, "percentile": 0}
coverage_estimate = {"normal": 0, "pivotal": 0, "percentile": 0}
interval_lengths = {"normal": [], "pivotal": [], "percentile": []}
interval_length_estimate  = {"normal": 0, "pivotal": 0, "percentile": 0}

trials = 500

# Run the trials, counting every time the true skewness is in each of the bootstrap intervals
for _ in range(trials):
    confidence_intervals = bootstrap_confidence_intervals(
        data=generate_data(n=25),
        estimator = estimator,
        B=1000,
        alpha=0.05
    )
    
    for method in ("normal", "pivotal", "percentile"):
        coverage_count[method] += in_interval(true_value, confidence_intervals[method])
        interval_lengths[method].append(
            length_of_interval(confidence_intervals[method])
        )

# Estimate the coverage & length of each bootstrap interval
for method in ("normal", "pivotal", "percentile"):
    coverage_estimate[method] = coverage_count[method]/trials
    interval_length_estimate[method] = np.mean(interval_lengths[method])
    
# Print the results
print(
    "Estimated coverages & lengths for all three types of bootstrap confidence intervals.\n"
    + "".join([
        "["
        + method.capitalize()
        + f"] Coverage: {coverage_estimate[method]*100:.0f}%, "
        + f"estimated length: {interval_length_estimate[method]:.2f}.\n"
        for method in ("normal", "pivotal", "percentile")
    ])
)

Estimated coverages & lengths for all three types of bootstrap confidence intervals.
[Normal] Coverage: 93%, estimated length: 1.32.
[Pivotal] Coverage: 84%, estimated length: 1.29.
[Percentile] Coverage: 97%, estimated length: 1.29.

