# Chapter 4: Statistical Inference

### Import Required Libraries

In [None]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

### Simulate Two Sample Groups

In [None]:
np.random.seed(42)
group1 = np.random.normal(loc=50, scale=10, size=100)
group2 = np.random.normal(loc=53, scale=10, size=100)

### Visualize Distributions

In [None]:
plt.hist(group1, bins=15, alpha=0.5, label='Group 1')
plt.hist(group2, bins=15, alpha=0.5, label='Group 2')
plt.legend()
plt.title('Histogram of Two Groups')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.show()

### Descriptive Statistics

In [None]:
print(f'Group 1 Mean: {np.mean(group1):.2f}, Std: {np.std(group1):.2f}')
print(f'Group 2 Mean: {np.mean(group2):.2f}, Std: {np.std(group2):.2f}')

### Independent T-Test

In [None]:
t_stat, p_val = stats.ttest_ind(group1, group2)
print(f'T-Statistic: {t_stat:.3f}, P-Value: {p_val:.3f}')

### Confidence Interval of the Mean (Group 1)

In [None]:
conf_int = stats.norm.interval(0.95, loc=np.mean(group1), scale=stats.sem(group1))
print(f'95% Confidence Interval: {conf_int}')

### Hypothesis Test Interpretation

- If p < 0.05, reject the null hypothesis: the means are significantly different.
- Confidence interval gives plausible values for the population mean.
- Always validate assumptions before applying tests.