In [None]:
# Setup
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from modules._import_helper import safe_import_from

# Import utilities
set_seed = safe_import_from('00_repo_standards.src.mlphys_core', 'set_seed')
constant_velocity_model, position_observation_model = safe_import_from(
    '04_time_series_state_space.src.kalman',
    'constant_velocity_model', 'position_observation_model'
)

set_seed(42)
plt.style.use('default')

# Create output directory
output_dir = Path('modules/04_time_series_state_space/reports/nb01_noise_models')
output_dir.mkdir(parents=True, exist_ok=True)

print("‚úì Setup complete")

## 3. Simulation: Ground Truth System

Let's simulate the true system with known noise parameters.

In [None]:
def simulate_tracking_system(dt, n_steps, process_noise_std, obs_noise_std, seed=42):
    """
    Simulate a constant-velocity tracking system.
    
    Args:
        dt: Time step (seconds)
        n_steps: Number of timesteps
        process_noise_std: Standard deviation of process noise
        obs_noise_std: Standard deviation of measurement noise
        seed: Random seed for reproducibility
    
    Returns:
        times: Time vector
        true_states: True state trajectory [n_steps, 2]
        observations: Noisy position measurements [n_steps]
    """
    rng = np.random.default_rng(seed)
    
    # Get system matrices
    F, Q = constant_velocity_model(dt, process_noise_std)
    H, R = position_observation_model(obs_noise_std)
    
    # Storage
    true_states = []
    observations = []
    
    # Initial state: stationary at origin
    x = np.array([0.0, 1.0])  # position=0, velocity=1 m/s
    
    for _ in range(n_steps):
        # True dynamics with process noise
        w = rng.multivariate_normal(np.zeros(2), Q)
        x = F @ x + w
        true_states.append(x.copy())
        
        # Noisy measurement
        v = rng.normal(0, obs_noise_std)
        z = H @ x + v
        observations.append(z[0])
    
    times = np.arange(n_steps) * dt
    true_states = np.array(true_states)
    observations = np.array(observations)
    
    return times, true_states, observations

# Baseline simulation
dt = 0.1  # 10 Hz sampling
n_steps = 100
process_noise = 0.02  # Low process noise
obs_noise = 0.15  # Moderate measurement noise

times, true_states, observations = simulate_tracking_system(
    dt, n_steps, process_noise, obs_noise, seed=42
)

print(f"Simulated {n_steps} steps at dt={dt}s")
print(f"True trajectory range: position [{true_states[:, 0].min():.2f}, {true_states[:, 0].max():.2f}]")
print(f"Observation noise std: {obs_noise:.3f}")

### Visualization: True State vs Noisy Measurements

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(12, 8))

# Position
axes[0].plot(times, true_states[:, 0], 'b-', linewidth=2, label='True Position')
axes[0].scatter(times, observations, c='red', s=20, alpha=0.6, label='Noisy Measurements', zorder=3)
axes[0].set_ylabel('Position (m)', fontsize=12)
axes[0].legend(loc='upper left', fontsize=11)
axes[0].grid(True, alpha=0.3)
axes[0].set_title(f'Constant Velocity Tracking (process_noise={process_noise}, obs_noise={obs_noise})', fontsize=13)

# Velocity (not observed)
axes[1].plot(times, true_states[:, 1], 'g-', linewidth=2, label='True Velocity (unobserved)')
axes[1].axhline(1.0, color='gray', linestyle='--', alpha=0.5, label='Nominal velocity')
axes[1].set_xlabel('Time (s)', fontsize=12)
axes[1].set_ylabel('Velocity (m/s)', fontsize=12)
axes[1].legend(loc='upper left', fontsize=11)
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(output_dir / 'baseline_simulation.png', dpi=120, bbox_inches='tight')
plt.show()

print("\nüìä Observation: Measurements are noisy, velocity is hidden!")

## 4. Noise Regimes: Parameter Sweep

**Experiment:** How do different noise levels affect observability?

We'll test 4 regimes:
1. **Low noise**: Easy estimation problem
2. **High measurement noise**: Hard to see true state
3. **High process noise**: System is unpredictable
4. **High both**: Worst case

In [None]:
# Define noise regimes
regimes = [
    {"name": "Low Noise", "process": 0.01, "obs": 0.05},
    {"name": "High Measurement Noise", "process": 0.01, "obs": 0.5},
    {"name": "High Process Noise", "process": 0.2, "obs": 0.05},
    {"name": "High Both", "process": 0.2, "obs": 0.5},
]

# Simulate all regimes
results = []
for regime in regimes:
    times, true_states, observations = simulate_tracking_system(
        dt, n_steps, regime["process"], regime["obs"], seed=42
    )
    
    # Compute metrics
    obs_error = np.abs(observations - true_states[:, 0])
    velocity_std = np.std(true_states[:, 1])
    
    results.append({
        "Regime": regime["name"],
        "Process œÉ": regime["process"],
        "Obs œÉ": regime["obs"],
        "Mean Obs Error": np.mean(obs_error),
        "Velocity Std": velocity_std,
    })

# Display table
import pandas as pd
df_regimes = pd.DataFrame(results)
print("\n" + "="*70)
print("NOISE REGIMES: Impact on System Behavior")
print("="*70)
print(df_regimes.to_string(index=False))
print("="*70)

# Save table
df_regimes.to_csv(output_dir / 'noise_regimes_table.csv', index=False)

print("\nüí° Key Insight:")
print("   - High obs noise ‚Üí measurements unreliable (high mean error)")
print("   - High process noise ‚Üí velocity fluctuates (high std)")
print("   - Both high ‚Üí estimation becomes very difficult")

### Visual Comparison Across Regimes

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
axes = axes.flatten()

for i, regime in enumerate(regimes):
    times, true_states, observations = simulate_tracking_system(
        dt, n_steps, regime["process"], regime["obs"], seed=42
    )
    
    ax = axes[i]
    ax.plot(times, true_states[:, 0], 'b-', linewidth=2, label='True', alpha=0.8)
    ax.scatter(times, observations, c='red', s=15, alpha=0.5, label='Observations')
    ax.set_xlabel('Time (s)', fontsize=11)
    ax.set_ylabel('Position (m)', fontsize=11)
    ax.set_title(f"{regime['name']}\n(œÉ_w={regime['process']}, œÉ_v={regime['obs']})", fontsize=12)
    ax.legend(loc='upper left', fontsize=10)
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(output_dir / 'noise_regimes_comparison.png', dpi=120, bbox_inches='tight')
plt.show()

## 5. Diagnostic: Signal-to-Noise Ratio (SNR)

A useful metric to quantify problem difficulty:

$$
\text{SNR} = \frac{\text{Var}(\text{signal})}{\text{Var}(\text{noise})}
$$

Higher SNR ‚Üí easier estimation problem.

In [None]:
def compute_snr(true_signal, observations):
    """Compute signal-to-noise ratio."""
    signal_var = np.var(true_signal)
    noise_var = np.var(observations - true_signal)
    return signal_var / noise_var if noise_var > 0 else np.inf

# Compute SNR for each regime
snr_results = []
for regime in regimes:
    times, true_states, observations = simulate_tracking_system(
        dt, n_steps, regime["process"], regime["obs"], seed=42
    )
    snr = compute_snr(true_states[:, 0], observations)
    snr_results.append({"Regime": regime["name"], "SNR": snr, "SNR (dB)": 10 * np.log10(snr)})

df_snr = pd.DataFrame(snr_results)
print("\n" + "="*50)
print("Signal-to-Noise Ratio Analysis")
print("="*50)
print(df_snr.to_string(index=False))
print("="*50)
print("\nüìà Higher SNR = easier estimation problem")

# Save
df_snr.to_csv(output_dir / 'snr_analysis.csv', index=False)

## 6. Common Pitfalls

1. **Confusing noise types**: Process noise ‚â† measurement noise
   - Process noise affects state evolution (unobservable)
   - Measurement noise affects observations (what we see)

2. **Wrong covariance units**: Q and R must have correct dimensions
   - Q: [state_dim √ó state_dim]
   - R: [obs_dim √ó obs_dim]

3. **Ignoring time step**: Discrete-time covariance scales with dt
   - For continuous-time noise $\sigma$, discrete Q ‚âà œÉ¬≤¬∑dt

4. **Not validating simulations**: Always plot true vs observed!

5. **Forgetting seeds**: Non-reproducible experiments are useless

---

## 7. Key Takeaways

‚úÖ **Process noise** makes dynamics unpredictable (affects prediction)  
‚úÖ **Measurement noise** makes observations unreliable (affects correction)  
‚úÖ **SNR** quantifies estimation difficulty  
‚úÖ Always simulate with **known ground truth** for validation  
‚úÖ **Reproducibility** via seeds is non-negotiable  

---

## 8. Exercises

Complete these exercises in the cells below. Solutions are at the end.

### Exercise 1: Predict Behavior (Qualitative)

**Task:** Without running code, predict what happens when:
1. Process noise ‚Üí 0 but measurement noise stays high
2. Measurement noise ‚Üí 0 but process noise stays high
3. Both ‚Üí 0 (perfect case)

Write your predictions below:

**Your answer:**

1. Process noise ‚Üí 0:
   - [Your prediction here]

2. Measurement noise ‚Üí 0:
   - [Your prediction here]

3. Both ‚Üí 0:
   - [Your prediction here]

### Exercise 2: Implement SNR Sweep

**Task:** Create a plot showing SNR vs measurement noise (keeping process noise fixed).
- Fix process_noise = 0.02
- Sweep obs_noise from 0.01 to 1.0 (logarithmic scale)
- Plot SNR (dB) vs obs_noise

In [None]:
# Your code here
# Hint: Use np.logspace for logarithmic sweep
# Hint: Loop over noise values, simulate, compute SNR

### Exercise 3: Non-Gaussian Noise

**Task:** Modify the simulation to use **uniform noise** instead of Gaussian.
- Keep the same variance as original Gaussian noise
- Compare visually: do observations look different?
- Question: Will Kalman filter still work optimally? Why/why not?

In [None]:
# Your code here
# Hint: rng.uniform(low, high, size) for uniform noise
# Hint: Uniform variance = (high - low)¬≤ / 12

### Exercise 4: Correlated Noise

**Task:** What if process noise in position and velocity are correlated?
- Modify Q to have off-diagonal terms: Q[0,1] = Q[1,0] = 0.5 * Q[0,0]
- Simulate and visualize
- Does the trajectory look different?

In [None]:
# Your code here

---

## 9. Solutions

### Solution 1: Qualitative Predictions

1. **Process noise ‚Üí 0, high measurement noise:**
   - True trajectory is perfectly smooth (deterministic dynamics)
   - But observations are very noisy
   - Estimation can exploit smoothness to filter out noise
   - **Good scenario for filtering!**

2. **Measurement noise ‚Üí 0, high process noise:**
   - Observations are perfect (we see true state)
   - But true state is erratic/unpredictable
   - No estimation needed (measurements are perfect)
   - But prediction is hard (high uncertainty)

3. **Both ‚Üí 0 (perfect case):**
   - Deterministic dynamics + perfect measurements
   - No uncertainty at all
   - Trivial estimation problem

### Solution 2: SNR Sweep

In [None]:
# Solution
obs_noise_values = np.logspace(-2, 0, 20)  # 0.01 to 1.0
snr_values = []
process_noise_fixed = 0.02

for obs_noise in obs_noise_values:
    times, true_states, observations = simulate_tracking_system(
        dt, n_steps, process_noise_fixed, obs_noise, seed=42
    )
    snr = compute_snr(true_states[:, 0], observations)
    snr_values.append(10 * np.log10(snr))  # Convert to dB

plt.figure(figsize=(10, 5))
plt.semilogx(obs_noise_values, snr_values, 'o-', linewidth=2, markersize=6)
plt.xlabel('Measurement Noise œÉ_v', fontsize=12)
plt.ylabel('SNR (dB)', fontsize=12)
plt.title('Signal-to-Noise Ratio vs Measurement Noise', fontsize=13)
plt.grid(True, alpha=0.3, which='both')
plt.tight_layout()
plt.savefig(output_dir / 'ex2_snr_sweep.png', dpi=120, bbox_inches='tight')
plt.show()

print("‚úì As measurement noise increases, SNR decreases (harder problem)")

### Solution 3: Non-Gaussian (Uniform) Noise

In [None]:
# Solution
def simulate_with_uniform_noise(dt, n_steps, process_noise_std, obs_noise_std, seed=42):
    """Same as simulate_tracking_system but with uniform noise."""
    rng = np.random.default_rng(seed)
    
    F, _ = constant_velocity_model(dt, process_noise_std)
    H, _ = position_observation_model(obs_noise_std)
    
    # Uniform noise with same variance
    # Var(Uniform(a,b)) = (b-a)¬≤/12 = œÉ¬≤
    # So (b-a) = œÉ*sqrt(12), and if centered: a = -œÉ*sqrt(3), b = œÉ*sqrt(3)
    process_noise_range = process_noise_std * np.sqrt(3)
    obs_noise_range = obs_noise_std * np.sqrt(3)
    
    true_states = []
    observations = []
    x = np.array([0.0, 1.0])
    
    for _ in range(n_steps):
        # Uniform process noise
        w = rng.uniform(-process_noise_range, process_noise_range, size=2)
        x = F @ x + w
        true_states.append(x.copy())
        
        # Uniform measurement noise
        v = rng.uniform(-obs_noise_range, obs_noise_range)
        z = H @ x + v
        observations.append(z[0])
    
    times = np.arange(n_steps) * dt
    return times, np.array(true_states), np.array(observations)

# Compare Gaussian vs Uniform
times_g, states_g, obs_g = simulate_tracking_system(dt, n_steps, 0.02, 0.15, seed=42)
times_u, states_u, obs_u = simulate_with_uniform_noise(dt, n_steps, 0.02, 0.15, seed=42)

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

axes[0].plot(times_g, states_g[:, 0], 'b-', linewidth=2, label='True')
axes[0].scatter(times_g, obs_g, c='red', s=15, alpha=0.6, label='Observations')
axes[0].set_title('Gaussian Noise', fontsize=12)
axes[0].set_xlabel('Time (s)')
axes[0].set_ylabel('Position (m)')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

axes[1].plot(times_u, states_u[:, 0], 'b-', linewidth=2, label='True')
axes[1].scatter(times_u, obs_u, c='red', s=15, alpha=0.6, label='Observations')
axes[1].set_title('Uniform Noise (same variance)', fontsize=12)
axes[1].set_xlabel('Time (s)')
axes[1].set_ylabel('Position (m)')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(output_dir / 'ex3_uniform_noise.png', dpi=120, bbox_inches='tight')
plt.show()

print("\nüí° Kalman Filter assumes Gaussian noise for optimality.")
print("   With uniform noise, KF is suboptimal but often still works well.")
print("   For highly non-Gaussian noise, consider particle filters!")

### Solution 4: Correlated Noise

In [None]:
# Solution
def simulate_with_correlated_noise(dt, n_steps, process_noise_std, obs_noise_std, correlation=0.5, seed=42):
    """Simulate with correlated process noise."""
    rng = np.random.default_rng(seed)
    
    F, Q = constant_velocity_model(dt, process_noise_std)
    H, R = position_observation_model(obs_noise_std)
    
    # Modify Q to be correlated
    Q_corr = Q.copy()
    Q_corr[0, 1] = correlation * Q[0, 0]
    Q_corr[1, 0] = correlation * Q[0, 0]
    
    true_states = []
    observations = []
    x = np.array([0.0, 1.0])
    
    for _ in range(n_steps):
        w = rng.multivariate_normal(np.zeros(2), Q_corr)
        x = F @ x + w
        true_states.append(x.copy())
        
        v = rng.normal(0, obs_noise_std)
        z = H @ x + v
        observations.append(z[0])
    
    times = np.arange(n_steps) * dt
    return times, np.array(true_states), np.array(observations)

# Compare uncorrelated vs correlated
times_uc, states_uc, obs_uc = simulate_tracking_system(dt, n_steps, 0.05, 0.1, seed=42)
times_c, states_c, obs_c = simulate_with_correlated_noise(dt, n_steps, 0.05, 0.1, correlation=0.7, seed=42)

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Uncorrelated
axes[0, 0].plot(times_uc, states_uc[:, 0], 'b-', linewidth=2)
axes[0, 0].set_ylabel('Position (m)')
axes[0, 0].set_title('Uncorrelated Noise: Position', fontsize=12)
axes[0, 0].grid(True, alpha=0.3)

axes[1, 0].plot(times_uc, states_uc[:, 1], 'g-', linewidth=2)
axes[1, 0].set_xlabel('Time (s)')
axes[1, 0].set_ylabel('Velocity (m/s)')
axes[1, 0].set_title('Uncorrelated Noise: Velocity', fontsize=12)
axes[1, 0].grid(True, alpha=0.3)

# Correlated
axes[0, 1].plot(times_c, states_c[:, 0], 'b-', linewidth=2)
axes[0, 1].set_ylabel('Position (m)')
axes[0, 1].set_title('Correlated Noise (œÅ=0.7): Position', fontsize=12)
axes[0, 1].grid(True, alpha=0.3)

axes[1, 1].plot(times_c, states_c[:, 1], 'g-', linewidth=2)
axes[1, 1].set_xlabel('Time (s)')
axes[1, 1].set_ylabel('Velocity (m/s)')
axes[1, 1].set_title('Correlated Noise (œÅ=0.7): Velocity', fontsize=12)
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(output_dir / 'ex4_correlated_noise.png', dpi=120, bbox_inches='tight')
plt.show()

print("\nüí° Correlated noise: disturbances in position and velocity happen together.")
print("   Example: Wind gust affects both position and velocity simultaneously.")
print("   Kalman filter can handle this if Q is correctly modeled!")

---

## Summary Report

In [None]:
# Write summary
summary = f"""
# Notebook 01: Noise Models and Simulation - Summary

**Date:** {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}

## Key Results

1. **Noise Regimes Tested:** {len(regimes)}
   - SNR range: {df_snr['SNR (dB)'].min():.1f} to {df_snr['SNR (dB)'].max():.1f} dB

2. **Main Findings:**
   - High measurement noise ‚Üí unreliable observations
   - High process noise ‚Üí unpredictable dynamics
   - SNR quantifies problem difficulty

3. **Outputs Generated:**
   - baseline_simulation.png
   - noise_regimes_comparison.png
   - noise_regimes_table.csv
   - snr_analysis.csv

## Next Steps

‚Üí Notebook 02: Apply Kalman Filter to these noisy measurements!
"""

with open(output_dir / 'summary.md', 'w') as f:
    f.write(summary)

print("\n" + "="*60)
print("‚úì Notebook 01 Complete!")
print("="*60)
print(f"Outputs saved to: {output_dir}")
print("\nFiles created:")
for f in sorted(output_dir.glob('*')):
    print(f"  - {f.name}")