<a href="https://colab.research.google.com/github/gr3ybr0w/cookbook/blob/master/statistics/confidence_intervales.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from typing import Union, Tuple, List, Sequence
import numpy as np
from scipy import stats

In [2]:
def calculate_confidence_interval(
    data: Union[List[float], np.ndarray, Sequence[float]],
    confidence_level: float = 0.95
) -> Tuple[float, Tuple[float, float]]:
    """
    Calculate the confidence interval for a sample of data.

    Parameters:
    data (Union[List[float], np.ndarray, Sequence[float]]): Sample data
    confidence_level (float): Confidence level (default: 0.95 for 95% confidence)

    Returns:
    Tuple[float, Tuple[float, float]]: A tuple containing:
        - sample mean (float)
        - confidence interval bounds (tuple of two floats: lower, upper)

    Raises:
    ValueError: If confidence_level is not between 0 and 1
    ValueError: If data is empty
    """
    if not 0 < confidence_level < 1:
        raise ValueError("Confidence level must be between 0 and 1")

    data = np.array(data)
    if len(data) == 0:
        raise ValueError("Data array cannot be empty")

    sample_mean = np.mean(data)
    sample_size = len(data)
    sample_std = np.std(data, ddof=1)  # ddof=1 for sample standard deviation

    # Calculate standard error
    standard_error = sample_std / np.sqrt(sample_size)

    # Find t-critical value
    # The t-critical value is used instead of the z-score (normal distribution) when working
    # with small sample sizes (typically n < 30). It accounts for the additional uncertainty
    # in estimating population parameters from small samples. As sample size increases,
    # the t-distribution approaches the normal distribution. The t-critical value is larger
    # than the corresponding z-score, resulting in wider (more conservative) confidence intervals.
    degrees_freedom = sample_size - 1
    t_critical = stats.t.ppf((1 + confidence_level) / 2, degrees_freedom)

    # Calculate margin of error
    margin_of_error = t_critical * standard_error

    # Calculate confidence interval
    lower_bound = sample_mean - margin_of_error
    upper_bound = sample_mean + margin_of_error

    return sample_mean, (lower_bound, upper_bound)

# Example usage
if __name__ == "__main__":
    # Sample data
    sample_data: List[float] = [23, 25, 21, 22, 20, 24, 23, 21, 22, 24]

    # Calculate 95% confidence interval
    mean, (ci_lower, ci_upper) = calculate_confidence_interval(sample_data)

    print(f"Sample Mean: {mean:.2f}")
    print(f"95% Confidence Interval: ({ci_lower:.2f}, {ci_upper:.2f})")

    # Calculate 99% confidence interval
    mean, (ci_lower, ci_upper) = calculate_confidence_interval(sample_data, confidence_level=0.99)
    print(f"99% Confidence Interval: ({ci_lower:.2f}, {ci_upper:.2f})")

Sample Mean: 22.50
95% Confidence Interval: (21.37, 23.63)
99% Confidence Interval: (20.88, 24.12)
