# TSA Chapter 10: SARIMA Rolling Forecast

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/TSA/blob/main/TSA_ch10/TSA_ch10_sarima_forecast/TSA_ch10_sarima_forecast.ipynb)

This notebook demonstrates:
- Performing rolling one-step-ahead SARIMA forecasts on U.S. unemployment with train/validation/test split.

In [None]:
!pip install pandas-datareader statsmodels matplotlib numpy pandas -q

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from statsmodels.tsa.statespace.sarimax import SARIMAX
import pandas_datareader as pdr
import os, warnings
warnings.filterwarnings('ignore')

In [None]:
# Color scheme and style configuration
COLORS = {
    'blue': '#1A3A6E', 'red': '#DC3545', 'green': '#2E7D32',
    'orange': '#E67E22', 'gray': '#666666', 'purple': '#8E44AD',
}

plt.rcParams.update({
    'figure.facecolor': 'none', 'axes.facecolor': 'none',
    'savefig.facecolor': 'none', 'savefig.transparent': True,
    'axes.spines.top': False, 'axes.spines.right': False,
    'axes.grid': False, 'font.size': 10, 'axes.titlesize': 12,
    'axes.labelsize': 10, 'xtick.labelsize': 9, 'ytick.labelsize': 9,
    'legend.fontsize': 9, 'figure.dpi': 150, 'lines.linewidth': 1.2,
    'axes.linewidth': 0.6, 'legend.facecolor': 'none',
    'legend.framealpha': 0, 'legend.edgecolor': 'none',
})

def save_chart(fig, name):
    fig.savefig(f'{name}.pdf', bbox_inches='tight', transparent=True, dpi=150)
    fig.savefig(f'{name}.png', bbox_inches='tight', transparent=True, dpi=150)
    try:
        charts_path = os.path.join('..', '..', '..', 'charts', name)
        fig.savefig(f'{charts_path}.pdf', bbox_inches='tight', transparent=True, dpi=150)
        fig.savefig(f'{charts_path}.png', bbox_inches='tight', transparent=True, dpi=150)
    except Exception:
        pass
    print(f'Saved: {name}.pdf + .png')

def legend_outside(ax, ncol=3, y=-0.18):
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, y), ncol=ncol, frameon=False)

In [None]:
# Download unemployment data
unemp = pdr.get_data_fred('UNRATE', start='2010-01-01', end='2025-01-15')
unemp_series = unemp['UNRATE']
print(f'Data: {len(unemp_series)} observations')

# 70% / 20% / 10% split
train_end = '2020-06-01'
val_start = '2020-07-01'
val_end = '2023-06-01'
test_start = '2023-07-01'

train_data = unemp_series[unemp_series.index <= train_end]
val_data = unemp_series[(unemp_series.index >= val_start) & (unemp_series.index <= val_end)]
test_data = unemp_series[unemp_series.index >= test_start]
train_val_data = unemp_series[unemp_series.index <= val_end]

print(f'Train: {len(train_data)}, Val: {len(val_data)}, Test: {len(test_data)}')

# Rolling one-step-ahead forecast on test set
all_data = unemp_series.copy()
test_indices = test_data.index
n_test_fc = len(test_data)

rolling_forecasts = np.zeros(n_test_fc)
rolling_ci_lower = np.zeros(n_test_fc)
rolling_ci_upper = np.zeros(n_test_fc)

print(f'Computing {n_test_fc} rolling forecasts...')
for i in range(n_test_fc):
    history = all_data[all_data.index < test_indices[i]]
    try:
        m = SARIMAX(history, order=(1,1,1), enforce_stationarity=False, enforce_invertibility=False)
        r = m.fit(disp=False, maxiter=100)
        fc = r.get_forecast(steps=1)
        rolling_forecasts[i] = fc.predicted_mean.values[0]
        ci = fc.conf_int()
        rolling_ci_lower[i] = ci.iloc[0, 0]
        rolling_ci_upper[i] = ci.iloc[0, 1]
    except:
        rolling_forecasts[i] = history.iloc[-1]
        rolling_ci_lower[i] = rolling_forecasts[i] - 1
        rolling_ci_upper[i] = rolling_forecasts[i] + 1

test_rmse = np.sqrt(np.mean((test_data.values - rolling_forecasts)**2))

fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(train_data.index, train_data.values, color=COLORS['blue'], lw=1.5, label='Training (70%)')
ax.plot(val_data.index, val_data.values, color=COLORS['purple'], lw=1.5, label='Validation (20%)')
ax.plot(test_data.index, test_data.values, color=COLORS['green'], lw=2, label='Test (10%)')
ax.plot(test_data.index, rolling_forecasts, color=COLORS['red'], lw=2, ls='--',
        label='ARIMA Rolling Forecast')
ax.fill_between(test_data.index, rolling_ci_lower, rolling_ci_upper, color=COLORS['red'], alpha=0.15)
ax.axvline(x=pd.Timestamp('2020-07-01'), color='gray', ls='--', alpha=0.7)
ax.axvline(x=pd.Timestamp('2023-07-01'), color='black', ls='--', alpha=0.7)
ax.text(0.02, 0.95, f'Test RMSE = {test_rmse:.2f}', transform=ax.transAxes,
        fontsize=11, va='top', fontweight='bold', color=COLORS['red'],
        bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
ax.set_title('ARIMA: Rolling One-Step-Ahead Forecast', fontweight='bold', fontsize=13)
ax.set_xlabel('Date'); ax.set_ylabel('Unemployment Rate (%)')
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.1), ncol=4, frameon=False, fontsize=9)
fig.tight_layout()
save_chart(fig, 'sarima_forecast')
plt.show()