# TSA Chapter 0: Forecast Evaluation

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/TSA/blob/main/TSA_Ch0/TSA_ch0_forecast_eval/TSA_ch0_forecast_eval.ipynb)

This notebook demonstrates forecast evaluation:
- Error metrics: MAE, RMSE, MAPE, sMAPE
- Residual diagnostics
- Cross-validation with expanding window
- Train/validation/test split for time series
- Forecast comparison across methods

In [None]:
!pip install matplotlib numpy scipy statsmodels pandas -q

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
from matplotlib.patches import Patch
import statsmodels.api as sm

In [None]:
# Style configuration
COLORS = {
    'blue': '#1A3A6E',
    'red': '#DC3545',
    'green': '#2E7D32',
    'orange': '#E67E22',
    'gray': '#666666',
    'purple': '#8E44AD',
}

plt.rcParams.update({
    'axes.facecolor': 'none',
    'figure.facecolor': 'none',
    'savefig.transparent': True,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'font.size': 10,
    'axes.titlesize': 11,
    'axes.labelsize': 10,
    'legend.fontsize': 8,
    'xtick.labelsize': 8,
    'ytick.labelsize': 8,
    'lines.linewidth': 1.5,
    'axes.prop_cycle': plt.cycler('color', list(COLORS.values())),
    'axes.edgecolor': '#333333',
    'axes.linewidth': 0.8,
    'axes.grid': False,
})

np.random.seed(42)

CHARTS_DIR = os.path.join(os.path.dirname(os.path.abspath('.')), '..', '..', 'charts')

def save_chart(fig, name):
    fig.savefig(f'{name}.pdf', bbox_inches='tight', transparent=True, dpi=150)
    fig.savefig(f'{name}.png', bbox_inches='tight', transparent=True, dpi=150)
    # Also save to main charts directory for the lecture
    try:
        charts_path = os.path.join(CHARTS_DIR, name)
        fig.savefig(f'{charts_path}.pdf', bbox_inches='tight', transparent=True, dpi=150)
        fig.savefig(f'{charts_path}.png', bbox_inches='tight', transparent=True, dpi=150)
    except Exception:
        pass  # Skip if running on Colab without the charts dir
    print(f'Saved: {name}.pdf + .png')

def add_legend_below(ax, ncol=3):
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.12), ncol=ncol, frameon=False)

In [None]:
# Chart: ch1_forecast_eval
# Actual vs Forecast with residuals
np.random.seed(42)
fig, axes = plt.subplots(2, 1, figsize=(10, 5), gridspec_kw={'height_ratios': [2, 1]})

t = np.arange(60)
y = 50 + 0.3 * t + 8 * np.sin(2*np.pi*t/12) + np.random.randn(60) * 2

# Simple forecast (lagged)
y_hat = np.zeros(60)
y_hat[0] = y[0]
for i in range(1, 60):
    y_hat[i] = 0.3 * y[i] + 0.7 * y_hat[i-1]

residuals = y - y_hat

axes[0].plot(t, y, color=COLORS['blue'], linewidth=1.5, label='Actual ($X_t$)')
axes[0].plot(t, y_hat, color=COLORS['red'], linewidth=1.5, linestyle='--', label='Forecast ($\\hat{X}_t$)')
axes[0].set_title('Actual vs Forecast', fontweight='bold')
axes[0].set_ylabel('Value')
add_legend_below(axes[0], ncol=2)

axes[1].bar(t, residuals, color=np.where(residuals > 0, COLORS['green'], COLORS['red']),
            alpha=0.6, width=0.8)
axes[1].axhline(y=0, color='black', linewidth=0.5)
axes[1].set_title('Residuals: $e_t = X_t - \\hat{X}_t$', fontweight='bold')
axes[1].set_xlabel('Time ($t$)')
axes[1].set_ylabel('$e_t$')

fig.tight_layout(rect=[0, 0.02, 1, 1])
save_chart(fig, 'ch1_forecast_eval')
plt.show()

In [None]:
# Chart: forecast_accuracy_metrics
# Comparison of SES, Holt, Holt-Winters with error metrics
np.random.seed(42)
fig, axes = plt.subplots(1, 2, figsize=(10, 4))

t = np.arange(36)
y = 50 + 0.4 * t + 10 * np.sin(2*np.pi*t/12) + np.random.randn(36) * 2

# Three methods
ses_pred = np.zeros(36)
ses_pred[0] = y[0]
for i in range(1, 36):
    ses_pred[i] = 0.3 * y[i] + 0.7 * ses_pred[i-1]

holt_pred = 50 + 0.4 * t + np.random.randn(36) * 1
hw_pred = 50 + 0.4 * t + 10 * np.sin(2*np.pi*t/12) + np.random.randn(36) * 0.5

axes[0].plot(t, y, color=COLORS['gray'], linewidth=1.0, label='Actual')
axes[0].plot(t, ses_pred, color=COLORS['blue'], linewidth=1.2, linestyle='--', label='SES')
axes[0].plot(t, holt_pred, color=COLORS['green'], linewidth=1.2, linestyle='--', label='Holt')
axes[0].plot(t, hw_pred, color=COLORS['red'], linewidth=1.2, linestyle='--', label='H-W')
axes[0].set_title('Forecast Comparison', fontweight='bold')
axes[0].set_xlabel('Time')
axes[0].set_ylabel('$X_t$')
add_legend_below(axes[0], ncol=4)

# Metrics comparison
methods = ['SES', 'Holt', 'Holt-Winters']
rmse = [np.sqrt(np.mean((y - ses_pred)**2)),
        np.sqrt(np.mean((y - holt_pred)**2)),
        np.sqrt(np.mean((y - hw_pred)**2))]
mae = [np.mean(np.abs(y - ses_pred)),
       np.mean(np.abs(y - holt_pred)),
       np.mean(np.abs(y - hw_pred))]

x_pos = np.arange(len(methods))
width = 0.35
axes[1].bar(x_pos - width/2, rmse, width, color=COLORS['blue'], alpha=0.7, label='RMSE')
axes[1].bar(x_pos + width/2, mae, width, color=COLORS['orange'], alpha=0.7, label='MAE')

axes[1].set_title('Error Metrics', fontweight='bold')
axes[1].set_xticks(x_pos)
axes[1].set_xticklabels(methods)
axes[1].set_ylabel('Error')
add_legend_below(axes[1], ncol=2)

fig.tight_layout(rect=[0, 0.02, 1, 1])
save_chart(fig, 'forecast_accuracy_metrics')
plt.show()

In [None]:
# Chart: residual_diagnostics
# Four-panel residual diagnostic plot
np.random.seed(42)
fig, axes = plt.subplots(2, 2, figsize=(8, 5.5))

residuals = np.random.randn(100) * 2

# Time plot
axes[0, 0].plot(range(100), residuals, color=COLORS['blue'], linewidth=0.8)
axes[0, 0].axhline(y=0, color='black', linewidth=0.5)
axes[0, 0].set_title('Residuals vs Time', fontweight='bold', fontsize=9)
axes[0, 0].set_xlabel('Time')
axes[0, 0].set_ylabel('$e_t$')

# Histogram
axes[0, 1].hist(residuals, bins=20, color=COLORS['blue'], alpha=0.7, density=True, edgecolor='white')
x_range = np.linspace(-6, 6, 100)
axes[0, 1].plot(x_range, stats.norm.pdf(x_range, 0, 2), color=COLORS['red'], linewidth=1.5)
axes[0, 1].set_title('Residual Histogram', fontweight='bold', fontsize=9)
axes[0, 1].set_xlabel('$e_t$')
axes[0, 1].set_ylabel('Density')

# ACF
n = len(residuals)
r_centered = residuals - np.mean(residuals)
acf_vals = [1.0]
for k in range(1, 21):
    acf_vals.append(np.sum(r_centered[:n-k] * r_centered[k:]) / np.sum(r_centered**2))

axes[1, 0].bar(range(21), acf_vals, color=COLORS['blue'], alpha=0.7, width=0.6)
axes[1, 0].axhline(y=0, color='black', linewidth=0.5)
ci = 1.96 / np.sqrt(n)
axes[1, 0].axhline(y=ci, color=COLORS['red'], linestyle='--', linewidth=0.8)
axes[1, 0].axhline(y=-ci, color=COLORS['red'], linestyle='--', linewidth=0.8)
axes[1, 0].set_title('Residual ACF', fontweight='bold', fontsize=9)
axes[1, 0].set_xlabel('Lag')
axes[1, 0].set_ylabel('Autocorrelation')

# Q-Q plot
stats.probplot(residuals, dist='norm', plot=axes[1, 1])
axes[1, 1].get_lines()[0].set(color=COLORS['blue'], markersize=3, alpha=0.7)
axes[1, 1].get_lines()[1].set(color=COLORS['red'], linewidth=1.5)
axes[1, 1].set_title('Q-Q Plot (Normality)', fontweight='bold', fontsize=9)

fig.tight_layout()
save_chart(fig, 'residual_diagnostics')
plt.show()

In [None]:
# Chart: cross_validation_forecast
# Time series cross-validation with expanding window
np.random.seed(42)
fig, ax = plt.subplots(figsize=(10, 4))

n_total = 50
min_train = 20
h = 3  # forecast horizon

for fold_i, start in enumerate(range(min_train, n_total - h, 4)):
    y_level = fold_i * 1.5

    # Training data (blue)
    ax.barh(y_level, start, left=0, height=1, color=COLORS['blue'], alpha=0.6)

    # Forecast horizon (red)
    ax.barh(y_level, h, left=start, height=1, color=COLORS['red'], alpha=0.6)

# Legend
legend_elements = [Patch(facecolor=COLORS['blue'], alpha=0.6, label='Training'),
                   Patch(facecolor=COLORS['red'], alpha=0.6, label='Test (Forecast)')]

ax.set_title('Rolling Origin Cross-Validation (Time Series CV)', fontweight='bold')
ax.set_xlabel('Time ($t$)')
ax.set_ylabel('Fold')
ax.set_yticks([i * 1.5 for i in range((n_total - h - min_train) // 4 + 1)])
ax.set_yticklabels([f'Fold {i+1}' for i in range((n_total - h - min_train) // 4 + 1)])
ax.legend(handles=legend_elements, loc='upper center', bbox_to_anchor=(0.5, -0.12),
          ncol=2, frameon=False)

fig.tight_layout()
save_chart(fig, 'cross_validation_forecast')
plt.show()

In [None]:
# Chart: train_test_validation
# Train / Validation / Test split visualization
fig, ax = plt.subplots(figsize=(8, 2.5))

ax.barh(0, 70, left=0, height=0.6, color=COLORS['blue'], alpha=0.7, label='Training (70%)')
ax.barh(0, 15, left=70, height=0.6, color=COLORS['green'], alpha=0.7, label='Validation (15%)')
ax.barh(0, 15, left=85, height=0.6, color=COLORS['red'], alpha=0.7, label='Test (15%)')

ax.text(35, 0, 'Training\n(70%)', ha='center', va='center', fontweight='bold', fontsize=10, color='white')
ax.text(77.5, 0, 'Valid.\n(15%)', ha='center', va='center', fontweight='bold', fontsize=9, color='white')
ax.text(92.5, 0, 'Test\n(15%)', ha='center', va='center', fontweight='bold', fontsize=9, color='white')

ax.set_xlim(0, 100)
ax.set_ylim(-0.5, 0.5)
ax.set_xlabel('Time (% of data)')
ax.set_yticks([])
ax.set_title('Train / Validation / Test Split', fontweight='bold')

ax.annotate('', xy=(100, -0.4), xytext=(0, -0.4),
            arrowprops=dict(arrowstyle='->', color=COLORS['gray'], lw=1.5))
ax.text(50, -0.42, 'Chronological order', ha='center', va='top', fontsize=8, color=COLORS['gray'])

fig.tight_layout()
save_chart(fig, 'train_test_validation')
plt.show()

In [None]:
# Chart: real_data_forecast_comparison
# Comparison of SES, Holt, Holt-Winters on real AirPassengers data
try:
    air_df = sm.datasets.get_rdataset('AirPassengers').data
    y_air = air_df['value'].values.astype(float)
    print(f'AirPassengers loaded: {len(y_air)} observations')
except Exception:
    np.random.seed(42)
    t_ap = np.arange(144)
    trend_ap = 100 + 1.8 * t_ap + 0.005 * t_ap**2
    sf_ap = 1 + 0.15 * np.sin(2 * np.pi * t_ap / 12) + 0.08 * np.cos(4 * np.pi * t_ap / 12)
    y_air = trend_ap * sf_ap * (1 + np.random.randn(144) * 0.03)
    print('Using synthetic fallback')

n_air = len(y_air)
train_end = n_air - 24  # Last 2 years for test
t_all = np.arange(n_air)

fig, ax = plt.subplots(figsize=(8, 4.5))

ax.plot(t_all[:train_end], y_air[:train_end], color=COLORS['blue'], linewidth=1.2, label='Training')
ax.plot(t_all[train_end:], y_air[train_end:], color=COLORS['gray'], linewidth=1.5, linestyle='-', label='Test (Actual)')

# SES forecast (flat)
ses_fc = np.full(n_air - train_end, y_air[train_end - 1])
ax.plot(t_all[train_end:], ses_fc, color=COLORS['green'], linewidth=1.2, linestyle='--', label='SES')

# Holt forecast (linear trend)
slope = (y_air[train_end - 1] - y_air[train_end - 13]) / 12
holt_fc = y_air[train_end - 1] + slope * np.arange(1, n_air - train_end + 1)
ax.plot(t_all[train_end:], holt_fc, color=COLORS['orange'], linewidth=1.2, linestyle='--', label='Holt')

# Holt-Winters forecast (seasonal naive + trend)
last_year = y_air[train_end - 12:train_end]
hw_fc = np.array([last_year[h % 12] + slope * (h + 1) for h in range(n_air - train_end)])
ax.plot(t_all[train_end:], hw_fc, color=COLORS['red'], linewidth=1.5, linestyle='--', label='Holt-Winters')

ax.axvline(x=train_end - 0.5, color=COLORS['gray'], linestyle=':', linewidth=0.8)
ax.set_title('Forecast Comparison on AirPassengers', fontweight='bold')
ax.set_xlabel('Time (Months since Jan 1949)')
ax.set_ylabel('Passengers (thousands)')
add_legend_below(ax, ncol=5)

fig.tight_layout()
save_chart(fig, 'real_data_forecast_comparison')
plt.show()

In [None]:
# Chart: multiple_series_comparison
# Different series require different models
np.random.seed(42)
fig, axes = plt.subplots(1, 3, figsize=(10, 3.5))

t = np.arange(60)

# Series 1: with trend + seasonality
y1 = 50 + 0.5 * t + 12 * np.sin(2*np.pi*t/12) + np.random.randn(60) * 2
axes[0].plot(t, y1, color=COLORS['blue'], linewidth=1.2)
axes[0].set_title('Series with Trend + Seasonality\n(Holt-Winters)', fontweight='bold', fontsize=9)
axes[0].set_xlabel('Time')
axes[0].set_ylabel('$X_t$')

# Series 2: just trend
y2 = 100 + 0.8 * t + np.random.randn(60) * 3
axes[1].plot(t, y2, color=COLORS['green'], linewidth=1.2)
axes[1].set_title('Series with Trend, No Seasonality\n(Holt)', fontweight='bold', fontsize=9)
axes[1].set_xlabel('Time')

# Series 3: stationary
y3 = 50 + np.random.randn(60) * 4
axes[2].plot(t, y3, color=COLORS['orange'], linewidth=1.2)
axes[2].set_title('Stationary Series\n(SES)', fontweight='bold', fontsize=9)
axes[2].set_xlabel('Time')

fig.suptitle('Different Series Require Different Models', fontweight='bold', fontsize=11, y=1.02)
fig.tight_layout()
save_chart(fig, 'multiple_series_comparison')
plt.show()