In [1]:
import numpy as np
from scipy.stats import norm, t

In [2]:
np.random.seed(1)

In [3]:
N = 1000
mu = 5
sigma = 2
X = np.random.randn(N)*sigma + mu

In [4]:
#z-conf intervals
interval = .95
left = (1 - interval) / 2
right = left + interval

mu_hat = np.mean(X)
sigma_hat = np.std(X, ddof = 1)
z_left = norm.ppf(left)
z_right = norm.ppf(right)
lower = mu_hat + z_left * sigma_hat / np.sqrt(N)
upper = mu_hat + z_right * sigma_hat / np.sqrt(N)
print("lower bound:",lower)
print("sample mean:",mu_hat)
print("upper bound:", upper)

lower bound: 4.955959806754385
sample mean: 5.077624952319204
upper bound: 5.199290097884023


In [5]:
#t-confidence intervals
#z-conf intervals
interval = .95
left = (1 - interval) / 2
right = left + interval

mu_hat = np.mean(X)
sigma_hat = np.std(X, ddof = 1)
t_left = t.ppf(left, df = N - 1)
t_right = t.ppf(right, df = N - 1)
lower = mu_hat + t_left * sigma_hat / np.sqrt(N)
upper = mu_hat + t_right * sigma_hat / np.sqrt(N)
print("lower bound:",lower)
print("sample mean:",mu_hat)
print("upper bound:", upper)

lower bound: 4.9558122244324165
sample mean: 5.077624952319204
upper bound: 5.199437680205992


In [6]:
#interpetation
#if we do this exp many times then for the 95% ci
#should containt the true value 95% of the time
def experiment():
    X = np.random.randn(N)*sigma + mu
    mu_hat = np.mean(X)
    sigma_hat = np.std(X, ddof = 1)
    t_left = t.ppf(left, df = N - 1)
    t_right = t.ppf(right, df = N - 1)
    lower = mu_hat + t_left * sigma_hat / np.sqrt(N)
    upper = mu_hat + t_right * sigma_hat / np.sqrt(N)    
    return mu >  lower and mu < upper

exp_results = [experiment() for i in range(1000)]

print("mu is in the 95% conf interval: ", sum(exp_results)/len(exp_results))

mu is in the 95% conf interval:  0.96
