In [None]:
# Setup
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from modules._import_helper import safe_import_from

set_seed = safe_import_from('00_repo_standards.src.mlphys_core', 'set_seed')
(RollingWindowBacktest, naive_forecast, moving_average_forecast) = safe_import_from(
    '04_time_series_state_space.src.forecasting',
    'RollingWindowBacktest', 'naive_forecast', 'moving_average_forecast'
)

set_seed(42)
plt.style.use('default')

output_dir = Path('modules/04_time_series_state_space/reports/nb04_forecasting')
output_dir.mkdir(parents=True, exist_ok=True)

print("‚úì Setup complete")

## 3. Generate Synthetic Time Series

Create realistic data: trend + seasonality + noise

In [None]:
def generate_synthetic_timeseries(n_points=500, trend_coef=0.05, seasonal_period=50, 
                                   seasonal_amplitude=2.0, noise_level=0.3, seed=42):
    """Generate realistic time series with trend + seasonality + noise."""
    rng = np.random.default_rng(seed)
    t = np.arange(n_points)
    
    # Components
    trend = trend_coef * t
    seasonality = seasonal_amplitude * np.sin(2 * np.pi * t / seasonal_period)
    noise = rng.normal(0, noise_level, n_points)
    
    y = trend + seasonality + noise
    return t, y, trend, seasonality

t, y, trend, seasonality = generate_synthetic_timeseries(n_points=500, seed=42)

# Visualize
fig, axes = plt.subplots(2, 1, figsize=(14, 8))

axes[0].plot(t, y, 'b-', linewidth=1, alpha=0.7, label='Observed')
axes[0].plot(t, trend + seasonality, 'r--', linewidth=2, label='True (no noise)')
axes[0].set_ylabel('Value', fontsize=12)
axes[0].legend(fontsize=11)
axes[0].grid(True, alpha=0.3)
axes[0].set_title('Synthetic Time Series (Trend + Seasonality + Noise)', fontsize=13)

axes[1].plot(t, trend, 'g-', linewidth=2, label='Trend')
axes[1].plot(t, seasonality, 'orange', linewidth=2, label='Seasonality')
axes[1].set_xlabel('Time', fontsize=12)
axes[1].set_ylabel('Component', fontsize=12)
axes[1].legend(fontsize=11)
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(output_dir / 'synthetic_timeseries.png', dpi=120, bbox_inches='tight')
plt.show()

print(f"Generated {len(y)} points with trend and seasonal pattern")

## 4. Baseline Models

Implement two simple forecasting methods:
1. **Naive forecast**: $\hat{y}_{t+h} = y_t$ (last value)
2. **Moving average**: $\hat{y}_{t+h} = \frac{1}{w}\sum_{i=0}^{w-1} y_{t-i}$

In [None]:
# Test forecast functions on small example
y_train_test = y[:50]
h_test = 10

# Naive
forecast_naive = naive_forecast(y_train_test, h_test)
print(f"Naive forecast: {forecast_naive[:5]}")
print(f"  (repeats last value: {y_train_test[-1]:.2f})")

# Moving average
forecast_ma = moving_average_forecast(y_train_test, h_test, window=5)
print(f"\nMoving average (w=5): {forecast_ma[:5]}")
print(f"  (averages last 5 values)")

## 5. Rolling Window Backtesting

Proper evaluation using `RollingWindowBacktest`.

In [None]:
# Setup backtesting
train_size = 100
test_size = 20
step_size = 10  # Advance by 10 points each fold

backtest = RollingWindowBacktest(
    train_size=train_size,
    test_size=test_size,
    step_size=step_size
)

print(f"Backtesting setup:")
print(f"  Train size: {train_size}")
print(f"  Test size: {test_size}")
print(f"  Step size: {step_size}")
print(f"  Expected folds: {(len(y) - train_size - test_size) // step_size + 1}")

In [None]:
# Run backtesting for naive forecast
results_naive = backtest.run(y, naive_forecast, verbose=True)

print("\n" + "="*50)
print("NAIVE FORECAST RESULTS")
print("="*50)
print(f"Mean MAE:  {results_naive['metrics']['mean_mae']:.4f} ¬± {results_naive['metrics']['std_mae']:.4f}")
print(f"Mean RMSE: {results_naive['metrics']['mean_rmse']:.4f} ¬± {results_naive['metrics']['std_rmse']:.4f}")
print(f"Mean MAPE: {results_naive['metrics']['mean_mape']:.2f}% ¬± {results_naive['metrics']['std_mape']:.2f}%")
print("="*50)

In [None]:
# Run for moving average
ma_window = 10
forecast_ma_fn = lambda y_train, h: moving_average_forecast(y_train, h, ma_window)

results_ma = backtest.run(y, forecast_ma_fn, verbose=True)

print("\n" + "="*50)
print(f"MOVING AVERAGE (window={ma_window}) RESULTS")
print("="*50)
print(f"Mean MAE:  {results_ma['metrics']['mean_mae']:.4f} ¬± {results_ma['metrics']['std_mae']:.4f}")
print(f"Mean RMSE: {results_ma['metrics']['mean_rmse']:.4f} ¬± {results_ma['metrics']['std_rmse']:.4f}")
print(f"Mean MAPE: {results_ma['metrics']['mean_mape']:.2f}% ¬± {results_ma['metrics']['std_mape']:.2f}%")
print("="*50)

## 6. Visualize Backtest Results

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(14, 10))

# Plot forecasts across folds
ax = axes[0]
ax.plot(t, y, 'k-', linewidth=1, alpha=0.5, label='Actual')

for i, fold in enumerate(results_naive['folds'][:5]):  # Show first 5 folds
    test_indices = fold['test_indices']
    forecasts = fold['forecasts']
    ax.plot(t[test_indices], forecasts, 'o-', markersize=4, alpha=0.7, label=f'Fold {i+1}')

ax.set_ylabel('Value', fontsize=12)
ax.legend(fontsize=9, ncol=2)
ax.grid(True, alpha=0.3)
ax.set_title('Rolling Window Forecasts (Naive, first 5 folds)', fontsize=13)

# Plot metrics across folds
ax = axes[1]
fold_nums = np.arange(len(results_naive['mae']))
ax.plot(fold_nums, results_naive['mae'], 'b-o', linewidth=2, markersize=6, label='Naive MAE')
ax.plot(fold_nums, results_ma['mae'], 'r-s', linewidth=2, markersize=6, label='MA MAE')
ax.axhline(results_naive['metrics']['mean_mae'], color='blue', linestyle='--', alpha=0.5)
ax.axhline(results_ma['metrics']['mean_mae'], color='red', linestyle='--', alpha=0.5)
ax.set_xlabel('Fold', fontsize=12)
ax.set_ylabel('MAE', fontsize=12)
ax.legend(fontsize=11)
ax.grid(True, alpha=0.3)
ax.set_title('Metrics Across Folds', fontsize=13)

plt.tight_layout()
plt.savefig(output_dir / 'backtest_results.png', dpi=120, bbox_inches='tight')
plt.show()

## 7. Common Pitfall: Data Leakage (DEMONSTRATION)

**Wrong way**: Train on entire dataset, then split

In [None]:
# WRONG: Leakage example
# Normalize using ALL data (including future!)
y_normalized_wrong = (y - y.mean()) / y.std()  # Uses future data!

# Then split
split_point = int(0.8 * len(y))
y_train_wrong = y_normalized_wrong[:split_point]
y_test_wrong = y_normalized_wrong[split_point:]

print("‚ö†Ô∏è WRONG APPROACH (with leakage):")
print(f"   Normalized using mean={y.mean():.2f}, std={y.std():.2f} from ALL data")
print(f"   This includes test set statistics!")

# CORRECT: Normalize using ONLY training data
y_train_correct = y[:split_point]
y_test_correct = y[split_point:]

train_mean = y_train_correct.mean()
train_std = y_train_correct.std()

y_train_normalized = (y_train_correct - train_mean) / train_std
y_test_normalized = (y_test_correct - train_mean) / train_std  # Use TRAIN stats!

print("\n‚úÖ CORRECT APPROACH (no leakage):")
print(f"   Normalized using mean={train_mean:.2f}, std={train_std:.2f} from TRAIN data only")
print(f"   Applied same transform to test set")

print("\nüí° Key rule: NEVER use test set statistics in training!")

## 8. Comparison Table

In [None]:
import pandas as pd

comparison = pd.DataFrame([
    {
        "Method": "Naive",
        "MAE": results_naive['metrics']['mean_mae'],
        "RMSE": results_naive['metrics']['mean_rmse'],
        "MAPE (%)": results_naive['metrics']['mean_mape'],
    },
    {
        "Method": f"Moving Avg (w={ma_window})",
        "MAE": results_ma['metrics']['mean_mae'],
        "RMSE": results_ma['metrics']['mean_rmse'],
        "MAPE (%)": results_ma['metrics']['mean_mape'],
    },
])

print("\n" + "="*60)
print("FORECASTING METHODS COMPARISON")
print("="*60)
print(comparison.to_string(index=False))
print("="*60)

comparison.to_csv(output_dir / 'forecast_comparison.csv', index=False)

if results_ma['metrics']['mean_mae'] < results_naive['metrics']['mean_mae']:
    improvement = (results_naive['metrics']['mean_mae'] - results_ma['metrics']['mean_mae']) / results_naive['metrics']['mean_mae'] * 100
    print(f"\n‚ú® Moving average improves MAE by {improvement:.1f}% over naive")
else:
    print("\n‚ö†Ô∏è Naive forecast is better (or data is too noisy for MA)")

## 9. Uncertainty Estimation: Bootstrap

How confident are we in these metrics?

In [None]:
def bootstrap_metric(errors, n_bootstrap=1000, seed=42):
    """Bootstrap confidence interval for MAE."""
    rng = np.random.default_rng(seed)
    bootstrap_maes = []
    
    for _ in range(n_bootstrap):
        sample = rng.choice(errors, size=len(errors), replace=True)
        bootstrap_maes.append(np.mean(np.abs(sample)))
    
    return np.percentile(bootstrap_maes, [2.5, 97.5])

# Collect all errors from naive forecast
all_errors_naive = []
for fold in results_naive['folds']:
    actuals = y[fold['test_indices']]
    forecasts = fold['forecasts']
    all_errors_naive.extend(actuals - forecasts)

all_errors_naive = np.array(all_errors_naive)

# Bootstrap CI
ci_lower, ci_upper = bootstrap_metric(all_errors_naive, n_bootstrap=1000)

print(f"\nNaive Forecast MAE: {np.mean(np.abs(all_errors_naive)):.4f}")
print(f"95% Bootstrap CI: [{ci_lower:.4f}, {ci_upper:.4f}]")
print("\nüí° Confidence interval shows uncertainty in our metric estimate.")

## 10. Key Takeaways

‚úÖ **Rolling window backtesting** simulates production deployment  
‚úÖ **Never shuffle** time series data!  
‚úÖ **Data leakage** occurs when test info leaks into training (normalization, feature engineering)  
‚úÖ **Multiple folds** give robust metric estimates  
‚úÖ **Baseline models** (naive, MA) establish performance floor  
‚úÖ **Bootstrap** quantifies metric uncertainty  

---

## 11. Exercises

### Exercise 1: Implement Exponential Smoothing

**Task:** Implement simple exponential smoothing:
$$\hat{y}_{t+1} = \alpha y_t + (1-\alpha) \hat{y}_t$$

Compare with naive and MA. Tune $\alpha \in [0.1, 0.5, 0.9]$.

In [None]:
# Your code here

### Exercise 2: Expanding Window

**Task:** Modify backtesting to use **expanding window** (train set grows each fold).
Does this improve performance?

In [None]:
# Your code here

### Exercise 3: Residual Analysis

**Task:** Plot residuals (actual - forecast) histogram. Are they Gaussian? Any patterns?

In [None]:
# Your code here

---

## 12. Solutions

### Solution 1: Exponential Smoothing

In [None]:
# Solution
def exponential_smoothing_forecast(y_train, h, alpha=0.3):
    """Simple exponential smoothing."""
    # Initialize with first value
    s = y_train[0]
    
    # Smooth training data
    for y_t in y_train:
        s = alpha * y_t + (1 - alpha) * s
    
    # Forecast: repeat smoothed value
    return np.full(h, s)

# Test different alphas
alphas = [0.1, 0.3, 0.5, 0.7, 0.9]
results_es = []

for alpha in alphas:
    forecast_fn = lambda y_train, h: exponential_smoothing_forecast(y_train, h, alpha)
    results = backtest.run(y, forecast_fn, verbose=False)
    results_es.append({
        "alpha": alpha,
        "MAE": results['metrics']['mean_mae'],
        "RMSE": results['metrics']['mean_rmse'],
    })

df_es = pd.DataFrame(results_es)
print("\n" + "="*50)
print("EXPONENTIAL SMOOTHING RESULTS")
print("="*50)
print(df_es.to_string(index=False))
print("="*50)

best_alpha = df_es.loc[df_es['MAE'].idxmin(), 'alpha']
print(f"\n‚úì Best alpha: {best_alpha}")

### Solution 2: Expanding Window

In [None]:
# Solution: Expanding window backtesting
def expanding_window_backtest(y, forecast_fn, initial_train_size, test_size, step_size):
    """Expanding window: training set grows each fold."""
    mae_list = []
    rmse_list = []
    
    train_end = initial_train_size
    
    while train_end + test_size <= len(y):
        # Train on [0, train_end]
        y_train = y[:train_end]
        
        # Test on [train_end, train_end + test_size]
        y_test = y[train_end:train_end + test_size]
        
        # Forecast
        y_pred = forecast_fn(y_train, test_size)
        
        # Metrics
        mae = np.mean(np.abs(y_test - y_pred))
        rmse = np.sqrt(np.mean((y_test - y_pred)**2))
        
        mae_list.append(mae)
        rmse_list.append(rmse)
        
        # Advance (train set grows)
        train_end += step_size
    
    return {"mean_mae": np.mean(mae_list), "mean_rmse": np.mean(rmse_list)}

# Run expanding window
results_expanding = expanding_window_backtest(
    y, naive_forecast, initial_train_size=100, test_size=20, step_size=10
)

print("\n" + "="*50)
print("EXPANDING WINDOW vs ROLLING WINDOW")
print("="*50)
print(f"Rolling  MAE: {results_naive['metrics']['mean_mae']:.4f}")
print(f"Expanding MAE: {results_expanding['mean_mae']:.4f}")
print("="*50)

if results_expanding['mean_mae'] < results_naive['metrics']['mean_mae']:
    print("\n‚úì Expanding window improves performance (more training data!)")
else:
    print("\n‚ö†Ô∏è Rolling window is better (recent data more relevant)")

### Solution 3: Residual Analysis

In [None]:
# Solution: Analyze residuals
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Histogram
axes[0].hist(all_errors_naive, bins=30, edgecolor='black', alpha=0.7)
axes[0].axvline(0, color='red', linestyle='--', linewidth=2, label='Zero error')
axes[0].set_xlabel('Residual (actual - forecast)', fontsize=12)
axes[0].set_ylabel('Frequency', fontsize=12)
axes[0].set_title('Residual Distribution', fontsize=13)
axes[0].legend(fontsize=11)
axes[0].grid(True, alpha=0.3)

# Q-Q plot (check Gaussianity)
from scipy import stats
stats.probplot(all_errors_naive, dist="norm", plot=axes[1])
axes[1].set_title('Q-Q Plot (vs Normal)', fontsize=13)
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(output_dir / 'ex3_residual_analysis.png', dpi=120, bbox_inches='tight')
plt.show()

# Test normality
_, p_value = stats.shapiro(all_errors_naive[:5000])  # Shapiro-Wilk test (max 5000 samples)
print(f"\nShapiro-Wilk test p-value: {p_value:.4f}")
if p_value > 0.05:
    print("‚úì Residuals are approximately Gaussian (p > 0.05)")
else:
    print("‚ö†Ô∏è Residuals are NOT Gaussian (p < 0.05)")

print(f"\nResidual statistics:")
print(f"  Mean: {np.mean(all_errors_naive):.4f} (should be ~0)")
print(f"  Std:  {np.std(all_errors_naive):.4f}")

---

## Summary Report

In [None]:
summary = f"""
# Notebook 04: Forecasting & Backtesting - Summary

**Date:** {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}

## Key Results

1. **Baseline Methods:**
   - Naive: MAE = {results_naive['metrics']['mean_mae']:.4f}
   - Moving Average (w={ma_window}): MAE = {results_ma['metrics']['mean_mae']:.4f}

2. **Backtesting:**
   - {len(results_naive['folds'])} folds evaluated
   - Rolling window approach (no data leakage)

3. **Data Leakage:**
   - Demonstrated wrong (leakage) vs correct (no leakage) normalization

## Outputs
   - synthetic_timeseries.png
   - backtest_results.png
   - forecast_comparison.csv

## Next Steps

‚Üí Apply these principles to real datasets!
‚Üí Consider more advanced models (ARIMA, Prophet, LSTMs) with same backtesting framework
"""

with open(output_dir / 'summary.md', 'w') as f:
    f.write(summary)

print("\n" + "="*60)
print("‚úì Notebook 04 Complete!")
print("="*60)
print(f"Outputs saved to: {output_dir}")