In [None]:
# Setup
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import sys

# Add repo root to path for imports
repo_root = Path().resolve().parents[2]
if str(repo_root) not in sys.path:
    sys.path.insert(0, str(repo_root))

from modules._import_helper import safe_import_from

# Import seeding utilities
set_seed = safe_import_from('00_repo_standards.src.mlphys_core.seeding', 'set_seed')

# Set seed for reproducibility
set_seed(42)

# Create reports directory
reports_dir = Path("../reports")
reports_dir.mkdir(exist_ok=True)

print("‚úÖ Setup complete")

## 1. Intuition: Uncertainty in Physics Measurements

**Key concepts:**
- **Measurement noise**: When you measure a physical quantity (temperature, voltage, position), you never get the exact "true" value
- **Repeated measurements** reveal the distribution of possible values
- **Two types of uncertainty**:
  - **Aleatoric (irreducible)**: Inherent randomness in the measurement process (e.g., thermal noise in sensors)
  - **Epistemic (reducible)**: Uncertainty due to lack of knowledge (e.g., uncertain model parameters, insufficient data)
- In physics labs, we report: $\mu \pm \sigma$ (mean ¬± standard error)
- This notebook shows how to quantify and visualize both types

**Physics analogy**: Measuring the decay time of a particle:
- Aleatoric: Each particle's decay is inherently random
- Epistemic: With more measurements, we better estimate the mean decay time

## 2. Minimal Math: Sampling Distribution

**Setup:**
- True (unknown) parameter: $\theta_{\text{true}}$
- Noisy measurements: $x_i = \theta_{\text{true}} + \epsilon_i$, where $\epsilon_i \sim \mathcal{N}(0, \sigma^2)$
- Sample mean estimator: $\hat{\theta} = \frac{1}{n}\sum_{i=1}^n x_i$

**Key results:**
1. **Expectation**: $\mathbb{E}[\hat{\theta}] = \theta_{\text{true}}$ (unbiased)
2. **Variance**: $\text{Var}(\hat{\theta}) = \frac{\sigma^2}{n}$ (decreases with more data!)
3. **Central Limit Theorem**: $\hat{\theta} \sim \mathcal{N}\left(\theta_{\text{true}}, \frac{\sigma^2}{n}\right)$ for large $n$

**Interpretation:**
- $\sigma$ = aleatoric uncertainty (measurement noise)
- $\sigma / \sqrt{n}$ = epistemic uncertainty (standard error of the mean)

## 3. Implementation: Repeated Measurements Experiment

In [None]:
# Simulate measuring a physical constant (e.g., decay time in seconds)
true_value = 5.0  # True parameter we're trying to measure
measurement_noise = 1.5  # Standard deviation of measurement error (aleatoric)

def make_measurement(n_measurements: int) -> np.ndarray:
    """Simulate n noisy measurements of the true value."""
    noise = np.random.normal(0, measurement_noise, size=n_measurements)
    return true_value + noise

# Take measurements with different sample sizes
sample_sizes = [5, 20, 100, 500]
measurements_dict = {n: make_measurement(n) for n in sample_sizes}

# Compute estimates and uncertainties
results = {}
for n, measurements in measurements_dict.items():
    mean_estimate = np.mean(measurements)
    std_error = np.std(measurements, ddof=1) / np.sqrt(n)  # Standard error
    results[n] = {
        'mean': mean_estimate,
        'sem': std_error,
        'measurements': measurements
    }
    print(f"n={n:3d}: Œ∏ÃÇ = {mean_estimate:.3f} ¬± {std_error:.3f} (true: {true_value:.3f})")

print(f"\n‚úÖ Notice: Standard error decreases as ‚àön (epistemic uncertainty reduces)")
print(f"   But individual measurements still vary by ~{measurement_noise:.2f} (aleatoric)")

## 4. Experiments: Visualizing Uncertainty Reduction

In [None]:
# Experiment 1: Show sampling distributions for different sample sizes
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes = axes.ravel()

for idx, n in enumerate(sample_sizes):
    ax = axes[idx]
    measurements = results[n]['measurements']
    mean_est = results[n]['mean']
    sem = results[n]['sem']
    
    # Histogram of measurements
    ax.hist(measurements, bins=20, alpha=0.6, color='steelblue', 
            edgecolor='black', density=True, label='Measurements')
    
    # True distribution overlay
    x_range = np.linspace(measurements.min() - 1, measurements.max() + 1, 200)
    true_dist = (1 / (measurement_noise * np.sqrt(2*np.pi))) * \
                np.exp(-0.5 * ((x_range - true_value) / measurement_noise)**2)
    ax.plot(x_range, true_dist, 'k--', linewidth=2, label=f'True dist (œÉ={measurement_noise})')
    
    # Mark true value and estimate
    ax.axvline(true_value, color='green', linewidth=2, label='True value')
    ax.axvline(mean_est, color='red', linewidth=2, label=f'Estimate ¬± SE')
    ax.axvspan(mean_est - 1.96*sem, mean_est + 1.96*sem, 
               alpha=0.2, color='red', label='95% CI')
    
    ax.set_xlabel('Measurement value')
    ax.set_ylabel('Density')
    ax.set_title(f'n = {n} measurements\n'
                 f'Estimate: {mean_est:.2f} ¬± {sem:.3f}')
    ax.legend(fontsize=8)
    ax.grid(alpha=0.3)

plt.tight_layout()
plt.savefig(reports_dir / '01_sampling_distributions.png', dpi=150, bbox_inches='tight')
plt.show()

print("‚úÖ Saved: reports/01_sampling_distributions.png")
print("\nüìä Observation: As n increases, the estimate gets closer to true value")
print("   and the confidence interval (red band) narrows.")

In [None]:
# Experiment 2: Standard Error vs Sample Size (epistemic uncertainty reduction)
n_range = np.arange(5, 501, 5)
standard_errors = []

# Run many experiments to get empirical SEM
n_experiments = 100
for n in n_range:
    estimates = [np.mean(make_measurement(n)) for _ in range(n_experiments)]
    standard_errors.append(np.std(estimates))

# Theoretical prediction: œÉ / ‚àön
theoretical_sem = measurement_noise / np.sqrt(n_range)

# Plot
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(n_range, standard_errors, 'o', markersize=4, alpha=0.6, 
        label='Empirical SEM (100 trials)')
ax.plot(n_range, theoretical_sem, 'r-', linewidth=2, 
        label=r'Theoretical: $\sigma / \sqrt{n}$')
ax.set_xlabel('Sample size (n)', fontsize=12)
ax.set_ylabel('Standard Error of Mean (SEM)', fontsize=12)
ax.set_title('Epistemic Uncertainty Decreases with More Data', fontsize=14)
ax.legend(fontsize=11)
ax.grid(alpha=0.3)
ax.set_xlim(0, 500)

plt.tight_layout()
plt.savefig(reports_dir / '01_epistemic_uncertainty_reduction.png', dpi=150, bbox_inches='tight')
plt.show()

print("‚úÖ Saved: reports/01_epistemic_uncertainty_reduction.png")
print("\nüìä Key insight: Epistemic uncertainty (SEM) ‚àù 1/‚àön")
print("   To halve your uncertainty, you need 4√ó more data!")

## 5. Confidence Intervals vs Credible Intervals

**Two philosophies for quantifying uncertainty:**

### Frequentist Confidence Interval (CI)
- **Interpretation**: "If we repeated this experiment many times, 95% of constructed intervals would contain the true value"
- **The parameter is fixed**, the interval is random (changes with each dataset)
- Example: $\hat{\theta} \pm 1.96 \cdot \text{SE}$ (assumes normal distribution)

### Bayesian Credible Interval
- **Interpretation**: "There is a 95% probability that the true value lies in this interval"
- **The interval is fixed** (for a given dataset), the parameter has a probability distribution
- Requires: prior belief about $\theta$ + Bayes' rule to get posterior
- More natural for "degree of belief" statements

**When they agree:** With flat priors and lots of data, Bayesian credible intervals ‚âà frequentist CIs

In [None]:
# Sanity Check: Verify CI coverage
# If we construct 95% CIs many times, ~95% should contain the true value

n_samples = 50
n_experiments = 1000
coverage_count = 0

ci_lower_bounds = []
ci_upper_bounds = []

for _ in range(n_experiments):
    measurements = make_measurement(n_samples)
    mean_est = np.mean(measurements)
    sem = np.std(measurements, ddof=1) / np.sqrt(n_samples)
    
    # 95% CI (assumes normal distribution)
    ci_lower = mean_est - 1.96 * sem
    ci_upper = mean_est + 1.96 * sem
    
    ci_lower_bounds.append(ci_lower)
    ci_upper_bounds.append(ci_upper)
    
    # Check if interval contains true value
    if ci_lower <= true_value <= ci_upper:
        coverage_count += 1

coverage_rate = coverage_count / n_experiments
print(f"Empirical CI coverage: {coverage_rate:.1%} (expected: ~95%)")
print(f"‚úÖ SANITY CHECK {'PASSED' if 0.94 <= coverage_rate <= 0.96 else 'WARNING'}")

# Visualize a few confidence intervals
fig, ax = plt.subplots(figsize=(10, 6))
n_show = 50
colors = ['green' if ci_lower_bounds[i] <= true_value <= ci_upper_bounds[i] 
          else 'red' for i in range(n_show)]

for i in range(n_show):
    ax.plot([ci_lower_bounds[i], ci_upper_bounds[i]], [i, i], 
            color=colors[i], linewidth=1.5, alpha=0.7)
    
ax.axvline(true_value, color='blue', linewidth=2, linestyle='--', 
           label=f'True value = {true_value}')
ax.set_xlabel('Parameter value', fontsize=12)
ax.set_ylabel('Experiment number', fontsize=12)
ax.set_title(f'50 Confidence Intervals (n={n_samples} each)\n'
             f'Green: Contains true value | Red: Misses true value', fontsize=13)
ax.legend(fontsize=11)
ax.grid(alpha=0.3, axis='x')

plt.tight_layout()
plt.savefig(reports_dir / '01_confidence_intervals_coverage.png', dpi=150, bbox_inches='tight')
plt.show()

print("\n‚úÖ Saved: reports/01_confidence_intervals_coverage.png")

## 6. Key Takeaways

‚úÖ **Two types of uncertainty:**
- **Aleatoric**: Irreducible randomness (measurement noise, $\sigma$)
- **Epistemic**: Reducible with more data (parameter uncertainty, $\sigma/\sqrt{n}$)

‚úÖ **Standard error of the mean** ($\text{SEM} = \sigma/\sqrt{n}$) quantifies epistemic uncertainty

‚úÖ **Confidence intervals**:
- Frequentist: 95% of repeated experiments contain true value
- Bayesian credible: 95% probability true value is in interval (see Notebook 02)

‚úÖ **More data reduces epistemic uncertainty**, but aleatoric uncertainty remains

**Common pitfalls:**
- ‚ùå Confusing $\sigma$ (data spread) with $\text{SEM}$ (estimate uncertainty)
- ‚ùå Thinking "95% CI" means "95% chance true value is here" (that's Bayesian!)
- ‚ùå Ignoring finite-sample corrections (use $t$-distribution for small $n$)

## 7. Exercises

**Exercise 1:** What sample size $n$ is needed to achieve $\text{SEM} \leq 0.1$ if $\sigma = 2.0$?

**Exercise 2:** If you have $n=25$ measurements with $\bar{x} = 10.5$ and $s = 2.0$, compute the 95% confidence interval for the mean.

**Exercise 3:** Simulate 1000 experiments where you estimate a mean from $n=10$ samples. What fraction of 95% CIs contain the true mean?

**Exercise 4:** Generate data where aleatoric uncertainty is high ($\sigma=5$) and another where it's low ($\sigma=0.5$). For each, plot how epistemic uncertainty changes with $n \in [10, 100]$.

**Exercise 5:** Explain why $\text{SEM} \propto 1/\sqrt{n}$ means you get "diminishing returns" from collecting more data.

**Exercise 6:** Research: What is the $t$-distribution and when should you use $t_{n-1}$ critical values instead of $1.96$ for confidence intervals?

In [None]:
# Your solutions here

---

## Solutions

In [None]:
# Solution 1: SEM = œÉ/‚àön ‚â§ 0.1  =>  n ‚â• (œÉ/0.1)^2
sigma = 2.0
target_sem = 0.1
n_needed = (sigma / target_sem)**2
print(f"Solution 1: n ‚â• {n_needed:.0f} samples needed")
print(f"   (Verify: SEM at n={n_needed:.0f} is {sigma/np.sqrt(n_needed):.3f})")

# Solution 2: 95% CI using t-distribution (small sample)
from scipy import stats
n = 25
mean = 10.5
std = 2.0
sem = std / np.sqrt(n)
t_critical = stats.t.ppf(0.975, df=n-1)  # 2-tailed, df=n-1
ci_lower = mean - t_critical * sem
ci_upper = mean + t_critical * sem
print(f"\nSolution 2: 95% CI = [{ci_lower:.2f}, {ci_upper:.2f}]")
print(f"   (Using t-critical = {t_critical:.3f} for df={n-1})")

# Solution 3: Empirical coverage
set_seed(42)
true_param = 10.0
sigma = 2.0
n_exp = 1000
n_samples = 10
coverage = 0
for _ in range(n_exp):
    data = np.random.normal(true_param, sigma, size=n_samples)
    mean_est = np.mean(data)
    sem = np.std(data, ddof=1) / np.sqrt(n_samples)
    t_crit = stats.t.ppf(0.975, df=n_samples-1)
    ci_low = mean_est - t_crit * sem
    ci_high = mean_est + t_crit * sem
    if ci_low <= true_param <= ci_high:
        coverage += 1
print(f"\nSolution 3: Coverage = {coverage/n_exp:.1%} (expected ~95%)")

# Solution 4: Compare high vs low aleatoric uncertainty
set_seed(42)
n_range = np.arange(10, 101, 5)
sigma_high = 5.0
sigma_low = 0.5

sem_high = [sigma_high / np.sqrt(n) for n in n_range]
sem_low = [sigma_low / np.sqrt(n) for n in n_range]

fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(n_range, sem_high, 'o-', label=f'High aleatoric (œÉ={sigma_high})', linewidth=2)
ax.plot(n_range, sem_low, 's-', label=f'Low aleatoric (œÉ={sigma_low})', linewidth=2)
ax.set_xlabel('Sample size (n)', fontsize=12)
ax.set_ylabel('Epistemic uncertainty (SEM)', fontsize=12)
ax.set_title('Solution 4: Epistemic Uncertainty vs Sample Size', fontsize=13)
ax.legend(fontsize=11)
ax.grid(alpha=0.3)
plt.tight_layout()
plt.savefig(reports_dir / '01_ex4_uncertainty_comparison.png', dpi=150, bbox_inches='tight')
plt.show()
print("\n‚úÖ Solution 4 plot saved")

# Solution 5: Explanation
print("\nSolution 5: Diminishing returns explanation:")
print("   To reduce SEM from 1.0 to 0.5 (50% reduction): need 4√ó more data")
print("   To reduce SEM from 0.5 to 0.25 (another 50%): need another 4√ó (16√ó total)")
print("   Each additional doubling of precision requires 4√ó more effort!")

# Solution 6: t-distribution
print("\nSolution 6: t-distribution usage:")
print("   Use t-distribution when:")
print("   - Sample size is small (n < 30 rule of thumb)")
print("   - Population œÉ is unknown (estimated from sample)")
print("   - Accounts for extra uncertainty from estimating œÉ")
print(f"   Example: n=10 => t_0.975,9 = {stats.t.ppf(0.975, 9):.3f} vs z_0.975 = 1.960")
print(f"            n=100 => t_0.975,99 = {stats.t.ppf(0.975, 99):.3f} (approaches 1.96)")

---

**Next:** [02_bayesian_linear_regression_uq.ipynb](02_bayesian_linear_regression_uq.ipynb) - Learn how Bayesian methods naturally quantify parameter uncertainty