In [2]:
import numpy as np
import random
import scipy.stats

In [3]:
(2 + np.e)*np.sqrt(np.e-1)

6.184877138632554

### Generating the data

In [4]:
def generate_data(n=50):
    
    y = scipy.stats.norm.rvs(size=n)
    x = np.exp(y)
    
    return x

### Plug-in estimator for the skewness

In [5]:
def skewness_estimator(data):
    return(scipy.stats.skew(data))

In [9]:
estimate = skewness_estimator(generate_data(n=int(5e5)))
true_skewness = (2 + np.e)*np.sqrt(np.e - 1)
print(
    f"The estimate for the skewness is {estimate:.2f}.\n"
    f"The true value of the skewness is {true_skewness:.2f}."
)

The estimate for the skewness is 6.25.
The true value of the skewness is 6.18.


### Bootstrap estimate of the estimator's standard error

In [10]:
def bootstrap_resample(data):
    return random.choices(population=data, k=len(data))

def skewness_estimator_replications(data, B=1000):
    return [skewness_estimator(bootstrap_resample(data)) for _ in range(B)]

def se_boot(data, B):
    return np.std(skewness_estimator_replications(data, B))

In [18]:
data = generate_data(n=50)
estimate = skewness_estimator(data=data)
se_boot_estimate = se_boot(data=data, B=100)

print(
    f"The estimate for the skewness is {estimate:.2f}.\n"
    f"(The true value of the skewness is {true_skewness:.2f}.)\n\n"
    f"The bootstrap standard error estimate is {se_boot_estimate:.2f}."
)
    

The estimate for the skewness is 2.45.
(The true value of the skewness is 6.18.)

The bootstrap standard error estimate is 0.54.


### Bootstrap confidence intervals

#### Normal interval

In [22]:
alpha = 0.05
n = 50
B = 100

z = scipy.stats.norm.isf(alpha/2)

data = generate_data(n=n)
skewness_estimate = skewness_estimator(data=data)

se_boot_estimate = se_boot(data, B=B)

lower_bound = skewness_estimate - z*se_boot(data, B=B)
upper_bound = skewness_estimate + z*se_boot(data, B=B)

print(
    "A 95% bootstrap Normal confidence interval for the skewness is as follows: "
    f"({lower_bound:.3f}, {upper_bound:.3f})"
)

A 95% bootstrap Normal confidence interval for the skewness is as follows: (1.948, 5.963)


#### Pivotal interval

In [23]:
alpha = 0.05
n = 50
B = 100

data = generate_data(n=n)
skewness_estimate = skewness_estimator(data=data)

replicated_data = skewness_estimator_replications(data=data, B=B)

lower_bound = 2*skewness_estimate - np.quantile(replicated_data, q=1-alpha/2)
upper_bound = 2*skewness_estimate - np.quantile(replicated_data, q=alpha/2)

print(
    "A 95% bootstrap pivotal confidence interval for the skewness is as follows: "
    f"({lower_bound:.3f}, {upper_bound:.3f})"
)

A 95% bootstrap pivotal confidence interval for the skewness is as follows: (0.629, 1.763)


#### Percentile interval

In [26]:
alpha = 0.05
n = 50
B = 100

data = generate_data(n=n)

replicated_data = skewness_estimator_replications(data=data, B=B)

lower_bound = np.quantile(replicated_data, q=alpha/2)
upper_bound = np.quantile(replicated_data, q=1-alpha/2)

print(
    "A 95% bootstrap percentile confidence interval for the skewness is as follows: "
    f"({lower_bound:.3f}, {upper_bound:.3f})"
)

A 95% bootstrap percentile confidence interval for the skewness is as follows: (0.983, 2.437)


### Automated production of the confidence intervals

In [163]:
def bootstrap_confidence_intervals(data=generate_data(n=50), B=100, alpha=0.05):
    
    z = scipy.stats.norm.isf(alpha/2)

    skewness_estimate = skewness_estimator(data=data)

    replicated_data = skewness_estimator_replications(data=data, B=B)
    se_boot_estimate = np.std(replicated_data)

    lower_bound_normal = skewness_estimate - z*se_boot_estimate
    upper_bound_normal = skewness_estimate + z*se_boot_estimate
    
    lower_bound_pivotal = 2*skewness_estimate - np.quantile(replicated_data, q=1-alpha/2)
    upper_bound_pivotal = 2*skewness_estimate - np.quantile(replicated_data, q=alpha/2)
    
    lower_bound_percentile = np.quantile(replicated_data, q=alpha/2)
    upper_bound_percentile = np.quantile(replicated_data, q=1-alpha/2)
    
    return {
        "normal": (lower_bound_normal, upper_bound_normal),
        "pivotal": (lower_bound_pivotal, upper_bound_pivotal),
        "percentile": (lower_bound_percentile, upper_bound_percentile)
    }

In [166]:
confidence_intervals = bootstrap_confidence_intervals(data=generate_data(n=50), B=100, alpha=0.05)
print(
    "95% bootstrap Normal confidence interval for the skewness:    "
    f"({confidence_intervals["normal"][0]:.3f}, {confidence_intervals["normal"][1]:.3f})\n"
    "95% bootstrap pivotal confidence interval for the skewness:   "
    f"({confidence_intervals["pivotal"][0]:.3f}, {confidence_intervals["pivotal"][1]:.3f})\n"
    "95% bootstrap percentile confidence interval for the skewness:"
    f"({confidence_intervals["percentile"][0]:.3f}, {confidence_intervals["percentile"][1]:.3f})\n"
)

95% bootstrap Normal confidence interval for the skewness:    (1.324, 4.465)
95% bootstrap pivotal confidence interval for the skewness:   (2.233, 4.772)
95% bootstrap percentile confidence interval for the skewness:(1.016, 3.555)



### Empirical estimation of the coverage

In [167]:
trials = 500

true_skewness = (2 + np.e)*np.sqrt(np.e - 1)

# Auxiliary function used in the many trial run below
def in_interval(number, interval):
    lower_bound, upper_bound = interval
    return lower_bound < number < upper_bound

coverage_count = {
    "normal": 0,
    "pivotal": 0,
    "percentile": 0
}
coverage_estimate = coverage_count

# Run the trials, counting every time the true skewness is in each of the bootstrap intervals
for _ in range(trials):
    confidence_intervals = bootstrap_confidence_intervals(data=generate_data(n=50), B=100, alpha=0.05)
    
    for method in ("normal", "pivotal", "percentile"):
        coverage_count[method] += in_interval(true_skewness, confidence_intervals[method])

# Estimate the coverage of each bootstrap interval
for method in ("normal", "pivotal", "percentile"):
    coverage_estimate[method] = coverage_count[method]/trials

In [168]:
print(
    "Estimated coverages for all three types of bootstrap confidence intervals.\n"
    + "".join([
        method.capitalize() + f" interval: {coverage_estimate[method]*100:.0f}%.\n"
        for method in ("normal", "pivotal", "percentile")
    ])
)

Estimated coverages for all three types of bootstrap confidence intervals.
Normal interval: 12%.
Pivotal interval: 16%.
Percentile interval: 2%.

