In [260]:
import numpy as np
import random
import scipy.stats

Load the data(entered by hand).

In [261]:
data = (
    (576, 3.39),
    (635, 3.30),
    (558, 2.81),
    (578, 3.03),
    (666, 3.44),
    (580, 3.07),
    (555, 3.00),
    (661, 3.43),
    (651, 3.36),
    (605, 3.13),
    (653, 3.12),
    (575, 2.74),
    (545, 2.76),
    (572, 2.88),
    (594, 3.96)
)

# Part 1: The correlation coefficient estimate

Construct the function, which, given data in the form `[(Y1, Z1), (Y2, Z2), ...]`,
returns the plug-in correlation coefficient.

In [262]:
def plugin_corr_coeff(data):
    
    # Compute the empirical means.
    empirical_mean_y, empirical_mean_z = np.mean(data, 0)
    
    # Compute the population variances and covariances.
    [
        [population_variance_y, population_covariance],
        [_, population_variance_z]
    ] = np.cov(data, rowvar=False, ddof=0)
    
    # Return the plug-in correlation coefficient.
    return population_covariance/np.sqrt(population_variance_y*population_variance_z)

Apply the function to the data at hand to obtain the correlation estimate.

In [279]:
print(f"The correlation coefficient estimate is {plugin_corr_coeff(data):.3f}.")

The correlation coefficient estimate is 0.546.


# Part 2: The bootstrap estimate for the standard error

Given a list of $n$ samples, randomly choose with replacement $n$ of the original samples
(which means that duplicates are possible).

In [264]:
def bootstrap_resample(data):
    return random.choices(population=data, k=len(data))

Given data in the form `[(Y1, Z1), (Y2, Z2), ...]` and
given `B` the number of bootstrap replications to perform,
returns the plug-in estimate for the correlation coefficient
for each replication.

In [295]:
def plugin_corr_coff_replications(data, B):
    return [plugin_corr_coeff(bootstrap_resample(data)) for _ in range(B)]

Given data in the form `[(Y1, Z1), (Y2, Z2), ...]` and
given `B` the number of bootstrap replications to perform,
returns the bootstrap estimate for the standard error.

In [303]:
def se_boot(data, B):
    return np.std(plugin_corr_coff_replications(data, B))

Compute the standard error estimate after one bootstrap iteration with 1000 replications.

In [302]:
print(f"The bootstrap estimate for the standard error is {se_boot(data, int(1e3)):.3f}.")

The bootstrap estimate for the standard error is 0.200.


# Part 3: The confidence intervals

## Part 3.1: The Normal interval

Note that we use the *survival function* instead of the inverse CDF of the standard normal distribution.
See the first bonus problem for an explanation as to why.

In [359]:
alpha = 0.05
B = int(1e3)

z = scipy.stats.norm.isf(alpha/2)

lower_bound = plugin_corr_coeff(data) - z*se_boot(data, B=B)
upper_bound = plugin_corr_coeff(data) + z*se_boot(data, B=B)

print(
    "A 95% bootstrap Normal confidence interval for the correlation coefficient is as follows: "
    f"({lower_bound:.3f}, {upper_bound:.3f})"
)

A 95% bootstrap Normal confidence interval for the correlation coefficient is as follows: (0.167, 0.924)


## Part 3.1: The pivotal interval

In [360]:
alpha = 0.05
B = int(1e3)

corr_coeff_est = plugin_corr_coeff(data)

replicated_data = plugin_corr_coff_replications(data, B=B)

lower_bound = 2*corr_coeff_est - np.quantile(a=replicated_data, q=1-alpha/2)
upper_bound = 2*corr_coeff_est - np.quantile(a=replicated_data, q=alpha/2)

print(
    "A 95% bootstrap pivotal confidence interval for the correlation coefficient is as follows: "
    f"({lower_bound:.3f}, {upper_bound:.3f})"
)

A 95% bootstrap pivotal confidence interval for the correlation coefficient is as follows: (0.159, 0.891)


## Part 3.2: The percentile interval

In [364]:
alpha = 0.05
B = int(1e3)

replicated_data = plugin_corr_coff_replications(data, B=B)

lower_bound = np.quantile(a=replicated_data, q=alpha/2)
upper_bound = np.quantile(a=replicated_data, q=1-alpha/2)

print(
    "A 95% bootstrap percentile confidence interval for the correlation coefficient is as follows: "
    f"({lower_bound:.3f}, {upper_bound:.3f})"
)

A 95% bootstrap percentile confidence interval for the correlation coefficient is as follows: (0.175, 0.936)
