## Confidence Interval

In [1]:
# Built-in library
import re
import json
from typing import Any, Dict, List, Optional, Union
import logging
import warnings

# Standard imports
import numpy as np
import pandas as pd
from rich import print

# import torch

# Visualization
import matplotlib.pyplot as plt


# Pandas settings
pd.options.display.max_rows = 1_000
pd.options.display.max_columns = 1_000
pd.options.display.max_colwidth = 600

warnings.filterwarnings("ignore")

# Black code formatter (Optional)
%load_ext lab_black

# auto reload imports
%load_ext autoreload
%autoreload 2

In [2]:
from scipy.stats import norm, t
from statsmodels.stats.weightstats import ztest

In [3]:
# Set seed
SEED: int = 123

np.random.seed(SEED)
N: int = 1_000
mu: int = 5
sigma: int = 2

# Generate random data from a standardnormal distribution
X: np.ndarray = np.random.randn(N) * sigma + mu

### Z-Confidence Interval

$$ (\hat{\mu} - \frac{ 1.96  (\hat{\sigma})}{\sqrt{N}}), (\hat{\mu} + \frac{ 1.96  (\hat{\sigma})}{\sqrt{N}}) $$

$z_{left} \approx -1.96 $

$z_{right} \approx +1.96 $

In [4]:
# Calculate the z confidence interval
mu_hat: float = np.mean(X)
sigma_hat: float = np.std(X, ddof=1)  # divide by N-1 instead of by N
z_left: float = norm.ppf(0.025)  # (-ve)
z_right: float = norm.ppf(0.975)  # (+ve)
lower: float = mu_hat + (z_left * sigma_hat) / np.sqrt(N)
upper: float = mu_hat + (z_right * sigma_hat) / np.sqrt(N)

In [5]:
print(f"mu_hat: {mu_hat}\nlower: {lower}\nupper: {upper}")

print(f"The mean, mu_hat is between: lower= {lower} and upper={upper}")

### T-Confidence Interval

$$ (\hat{\mu} - \frac{ t_{left}  (\hat{\sigma})}{\sqrt{N}}), (\hat{\mu} + \frac{ t_{right}  (\hat{\sigma})}{\sqrt{N}}) $$

In [6]:
# Calculate the t confidence interval
mu_hat: float = np.mean(X)
sigma_hat: float = np.std(X, ddof=1)  # divide by N-1 instead of by N
t_left: float = t.ppf(0.025, df=N - 1)  # (-ve)
t_right: float = t.ppf(0.975, df=N - 1)  # (+ve)
lower: float = mu_hat + (t_left * sigma_hat) / np.sqrt(N)
upper: float = mu_hat + (t_right * sigma_hat) / np.sqrt(N)

In [7]:
print(f"mu_hat: {mu_hat}\nlower: {lower}\nupper: {upper}")

print(f"The mean, mu_hat is between: lower= {lower} and upper={upper}")

In [8]:
def _t_confidence_experiment() -> bool:
    """This is used to carry the t-confidence interval test."""
    mu: int = 5
    # Generate random data from a standardnormal distribution
    X: np.ndarray = np.random.randn(N) * sigma + mu

    mu_hat: float = np.mean(X)
    sigma_hat: float = np.std(X, ddof=1)  # divide by N-1 instead of by N
    t_left: float = t.ppf(0.025, df=N - 1)  # (-ve)
    t_right: float = t.ppf(0.975, df=N - 1)  # (+ve)

    # Calculate the lower and upper boundaries
    lower: float = mu_hat + (t_left * sigma_hat) / np.sqrt(N)
    upper: float = mu_hat + (t_right * sigma_hat) / np.sqrt(N)
    return mu > lower and mu < upper


def multi_experiment(*, M: int) -> float:
    """This is used to carry the out multiple t-confidence interval test."""
    results = [_t_confidence_experiment() for _ in range(M)]
    return np.mean(results)

In [9]:
M: int = 10_000

# Carry the experiment N times
# The estimated mean, mu_hat should be between the intervals 95% of the time
multi_experiment(M=M)

0.9463