In [78]:
import numpy as np
from scipy.stats import norm, t
import plotly.express as px
import plotly.graph_objects as go

### Task definition
- Let $X1, ...,Xn ~ Normal(µ, 1)$
- Let $\hatθ = e^µ$
- Let $\hatθ = e^\overline{X}$
### Requirements:
- Create a data set (using $µ = 5$) consisting of $n=100$ observations
- Use the bootstrap to get the se and 95 percent confidence interval for θ
- Plot a histogram of the bootstrap replications. This is an estimate of the distribution of $\hatθ$
- Compare this to the true sampling distribution of $\hatθ$

In [79]:
T_of_F = lambda sample_: np.exp(np.mean(sample_))
mu = 5
n = 100
sample = norm.rvs(loc = mu, scale = 1, size = n)
px.histogram(sample, nbins = 40, title = 'Sample Distribution')

In [80]:
mu_hat = sample.mean()
theta_hat = T_of_F(sample)
print(f'mu_hat = {mu_hat}')
print(f'theta_hat = {theta_hat}')

bootstrap_repetitions = 1000
param_bootstrap_estimations = list()
for i in range(bootstrap_repetitions):
    param_bootstrap_sample = norm.rvs(loc=mu_hat, scale=1, size=len(sample))
    param_bootstrap_estimations.append(T_of_F(param_bootstrap_sample))
param_bootstrap_estimations = np.sort(param_bootstrap_estimations)
param_se_hat = np.array(param_bootstrap_estimations).std()
print(f'param_se_hat = {param_se_hat}')

nonparam_bootstrap_estimations = list()
for i in range(bootstrap_repetitions):
    nonparam_bootstrap_sample = np.random.choice(sample, size = len(sample), replace = True)
    nonparam_bootstrap_estimations.append(T_of_F(nonparam_bootstrap_sample))
nonparam_bootstrap_estimations = np.sort(nonparam_bootstrap_estimations)
nonparam_se_hat = np.array(nonparam_bootstrap_estimations).std()
print(f'nonparam_se_hat = {nonparam_se_hat}')

delta_se_hat = theta_hat / np.sqrt(n)
print(f'delta_se_hat = {delta_se_hat}')

alpha = 0.05
z = norm.ppf(1-alpha/2)
param_normal_bounds = theta_hat - param_se_hat * z, theta_hat + param_se_hat * z
print(f'Pararmetric CI:({param_normal_bounds})')
nonparam_normal_bounds = theta_hat - nonparam_se_hat * z, theta_hat + nonparam_se_hat * z
print(f'Nonpararmetric CI:({nonparam_normal_bounds})')
delta_normal_bounds = theta_hat - delta_se_hat * z, theta_hat + delta_se_hat * z
print(f'Delta CI:({delta_normal_bounds})')

mu_hat = 5.019567446503926
theta_hat = 151.34582446792814
param_se_hat = 15.812275150639586
nonparam_se_hat = 16.17669822104704
delta_se_hat = 15.134582446792814
Pararmetric CI:((120.3543346590369, 182.33731427681937))
Nonpararmetric CI:((119.64007856590278, 183.0515703699535))
Delta CI:((121.68258795116213, 181.00906098469414))


In [81]:
px.histogram(param_bootstrap_estimations,
             title = 'Histogram of the parametric bootstrap replications of theta')

In [82]:
px.histogram(nonparam_bootstrap_estimations,
             title = 'Histogram of the nonparametric bootstrap replications of theta')

In [103]:
x_axis = np.linspace(100, 200, 500)
theta_cdf = lambda x: norm.cdf(np.log(x), loc = 5, scale = 1/10)
theta_real_cdf = list(map(theta_cdf, x_axis))
theta_real_pdf = np.append([0], np.diff(theta_real_cdf))
fig = px.line(x = x_axis, y = theta_real_pdf,
              title = 'True sampling distribution function for theta_hat')
fig.show()

In [104]:
x_axis_bar = np.linspace(100, 200, 200)
param_bootstrap_hist, _ = np.histogram(param_bootstrap_estimations, bins = 200, range = [100,200])
param_bootstrap_hist = param_bootstrap_hist / sum(param_bootstrap_hist)
nonparam_bootstrap_hist, _ = np.histogram(nonparam_bootstrap_estimations, bins = 200, range = [100,200])
nonparam_bootstrap_hist = nonparam_bootstrap_hist / sum(nonparam_bootstrap_hist)

In [105]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x = x_axis_bar, y = nonparam_bootstrap_hist, opacity = 0.6,
    name = 'nonparam. bootstrap'
))
fig.add_trace(go.Scatter(
    x = x_axis_bar, y = param_bootstrap_hist, opacity = 0.6,
    name = 'param. bootstrap'
))
fig.add_trace(go.Scatter(
    x = x_axis, y = theta_real_pdf, opacity = 0.8,
    name = 'true distribution'
))
fig.update_layout(title = 'Comparison: true dist. vs param vs nonparam bootstrap')
fig.show()