In [1]:
import numpy as np
from scipy.stats import norm, t

In [3]:
np.random.seed(1)

In [4]:
N = 1000
mu = 5
sigma = 2
X = np.random.randn(N) * sigma + mu

In [85]:
# Z - comnfidence interval

mu_hat = np.mean(X)
sigma_hat = np.std(X, ddof=1)
z_left = norm.ppf(0.025)
z_right = norm.ppf(0.975)
lower = mu_hat + z_left * sigma_hat / np.sqrt(N)
upper = mu_hat + z_right * sigma_hat / np.sqrt(N)

print(z_left, z_right)
print(lower, mu_hat, upper)

-1.9599639845400545 1.959963984540054
4.955959806754385 5.077624952319204 5.199290097884023


In [87]:
# T-confidence interval
mu_hat = np.mean(X)
sigma_hat = np.std(X, ddof=1)

t_left = t.ppf(0.025, df=N - 1)
t_right = t.ppf(0.975, df=N - 1)
lower = mu_hat + t_left * sigma_hat / np.sqrt(N)
upper = mu_hat + t_right * sigma_hat / np.sqrt(N)

print(t_left, t_right)
print(lower, mu_hat, upper)

# Because there as many samples t confidence is close to z

-1.962341461133449 1.9623414611334487
4.9558122244324165 5.077624952319204 5.199437680205992


In [105]:
# Interpretation of confidence interval
# If we do this experiment many times, then for the 95% CI
# the 95% CI should contain the true value 95% of the time

def experiment():
    X = np.random.randn(N) * sigma + mu
    mu_hat = np.mean(X)
    sigma_hat = np.std(X, ddof=1)
    t_left = t.ppf(0.025, df=N - 1)
    t_right = t.ppf(0.975, df=N - 1)
    t_right = t.ppf(0.975, df=N - 1)
    lower = mu_hat + t_left * sigma_hat / np.sqrt(N)
    upper = mu_hat + t_right * sigma_hat / np.sqrt(N)
    return mu > lower and mu < upper


In [100]:
def multi_experiment(M):
  results = [experiment() for _ in range(M)]
  return np.mean(results)

In [108]:
multi_experiment(1000)

0.951