<a href="https://colab.research.google.com/github/castudil/exploratory-data-analysis/blob/main/S6%20Statistical%20Inference/Confidence_Interval.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
from scipy.stats import t
from statsmodels.stats import weightstats
import statsmodels.api as sm

# Load mtcars dataset
mtcars = sm.datasets.get_rdataset('mtcars').data

# Select "mpg" variable
mpg = mtcars['mpg']

# Calculate sample statistics
n = len(mpg)
mean = np.mean(mpg)
std = np.std(mpg, ddof=1)  # ddof=1 for sample standard deviation

# Set confidence level
alpha = 0.95

# Compute critical value (two-tailed)
t_critical = t.ppf(1 - (1 - alpha) / 2, df=n-1)

# Compute standard error of the mean
se = std / np.sqrt(n)

# Compute confidence interval
confidence_interval = weightstats.DescrStatsW(mpg).tconfint_mean(alpha=alpha, alternative='two-sided')

# Extract lower and upper bounds of confidence interval
lower_bound = confidence_interval[0]
upper_bound = confidence_interval[1]

# Print results
print("Sample Size (n):", n)
print("Sample Mean:", mean)
print("Sample Standard Deviation:", std)
print("Standard Error of the Mean:", se)
print("Confidence Level:", alpha)
print("Critical Value (t_critical):", t_critical)
print("Confidence Interval:", confidence_interval)
print("Lower Bound:", lower_bound)
print("Upper Bound:", upper_bound)


Sample Size (n): 32
Sample Mean: 20.090625000000003
Sample Standard Deviation: 6.026948052089104
Standard Error of the Mean: 1.0654239593728145
Confidence Level: 0.95
Critical Value (t_critical): 2.0395134463964077
Confidence Interval: (20.023272662902, 20.157977337098007)
Lower Bound: 20.023272662902
Upper Bound: 20.157977337098007
