Repeat the previous exercise 1000 times.
Compare the coverage of the two confidence intervals for $\rho$.

In [2]:
from collections import namedtuple

import numpy as np
import random
import scipy.stats

In [3]:
Statistics_list = namedtuple('Statistics_list', [
    'n', 'mean', 'variance', 'covariance', 'correlation'
])

def generate_data():
    return scipy.stats.multivariate_normal.rvs(
        mean = [3, 8],
        cov  = [[1, 1], [1, 2]],
        size = 100
    )
    
def estimates(data):
    """
    Compute the plug-in estimates.
    Note that the estimate of the covariance is therefore unbiased
    (i.e. dividing by n-1).
    """
    
    (n, _) = data.shape
    mean = data.mean(axis=0)
    covariance_matrix = np.cov(data, rowvar=False)
    variance = np.diagonal(covariance_matrix)
    covariance = covariance_matrix[0, 1]
    correlation = covariance/np.sqrt(variance[0]*variance[1])
    
    return Statistics_list(n, mean, variance, covariance, correlation)

def fisher_interval(statistics_list, alpha):
    
    std_err_theta = (statistics_list.n-3)**(-1/2)
    theta_est = np.arctanh(statistics_list.correlation)
    z = scipy.stats.norm.isf(alpha/2)

    return (
        np.tanh(theta_est - z*std_err_theta),
        np.tanh(theta_est + z*std_err_theta)
    )
    
def bootstrap_resample(data):
    return np.array(random.choices(population=data, k=len(data)))

def bootstrap_replications(data, B):
    return np.array([
        estimates(bootstrap_resample(data)).correlation
        for _ in range(B)
    ])
    
# We use a *percentile* bootstrap confidence interval
# since the distribution of the plug-in estimate for
# the correlation coefficient CANNOT be normally distributed
# (it is contained between -1 and 1).
def bootstrap_percentile_interval(data, B, alpha):
    
    replicated_data = bootstrap_replications(data, B)
    
    lower_bound = np.quantile(replicated_data, q=alpha/2)
    upper_bound = np.quantile(replicated_data, q=1-alpha/2)
    
    return (lower_bound, upper_bound)

def is_in_interval(number, interval):
    lower_bound, upper_bound = interval
    return lower_bound < number < upper_bound

def test_intervals(true_correlation, data, B, alpha):
    
    statistics_list = estimates(data)
    fisher = fisher_interval(statistics_list, alpha)
    bootstrap = bootstrap_percentile_interval(data, B, alpha)
    
    return (
        is_in_interval(true_correlation, fisher),
        is_in_interval(true_correlation, bootstrap)
    )

In [12]:
trials = 1000
true_correlation = 1/np.sqrt(2)

result = np.array([
    test_intervals(
        true_correlation,
        generate_data(),
        B=int(1e3),
        alpha=0.05
    )
    for _ in range(trials)
])

fisher_coverage, bootstrap_coverage = result.mean(axis=0)
print(
    f"Afer {trials} trials, the empirical coverages are as follow.\n"
    f"Fisher interval:    {fisher_coverage:5}\n"
    f"Bootstrap interval: {bootstrap_coverage:5}"
)

Afer 1000 trials, the empirical coverages are as follow.
Fisher interval:    0.954
Bootstrap interval: 0.938
