TSA_ch3_case_rolling_forecast
=============================
Case study: Rolling 1-step ahead forecast for US Real GDP using ARIMA(1,1,1).
Uses a train/validation/test (70/15/15) split with expanding window on the
test set. Shows actual vs forecasted values with 95% confidence intervals.

Data Source: FRED via pandas_datareader (GDPC1)


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pandas_datareader as pdr
from statsmodels.tsa.arima.model import ARIMA
import warnings
warnings.filterwarnings('ignore')

# ---------------------------------------------------------------------------
# Chart style settings
# ---------------------------------------------------------------------------
plt.rcParams['figure.facecolor'] = 'none'
plt.rcParams['axes.facecolor'] = 'none'
plt.rcParams['savefig.facecolor'] = 'none'
plt.rcParams['savefig.transparent'] = True
plt.rcParams['axes.grid'] = False
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = ['Helvetica', 'Arial', 'DejaVu Sans']
plt.rcParams['font.size'] = 11
plt.rcParams['axes.labelsize'] = 11
plt.rcParams['axes.titlesize'] = 13
plt.rcParams['xtick.labelsize'] = 9
plt.rcParams['ytick.labelsize'] = 9
plt.rcParams['legend.fontsize'] = 9
plt.rcParams['legend.facecolor'] = 'none'
plt.rcParams['legend.framealpha'] = 0
plt.rcParams['legend.edgecolor'] = 'none'
plt.rcParams['axes.spines.top'] = False
plt.rcParams['axes.spines.right'] = False
plt.rcParams['lines.linewidth'] = 1.5

# Colors
BLUE   = '#1A3A6E'
RED    = '#DC3545'
GREEN  = '#2E7D32'

# ---------------------------------------------------------------------------
# Load data
# ---------------------------------------------------------------------------
gdp = pdr.get_data_fred('GDPC1', start='1960-01-01', end='2024-09-30')
gdp_data = gdp['GDPC1'].dropna()
log_gdp = np.log(gdp_data)

# Define train/validation/test split (70/15/15)
n = len(log_gdp)
train_end_r = int(n * 0.70)
val_end_r = int(n * 0.85)

train_data = log_gdp.iloc[:train_end_r]
val_data = log_gdp.iloc[train_end_r:val_end_r]
test_data = log_gdp.iloc[val_end_r:]

# Rolling 1-step ahead forecasts with CI (on test set only)
rolling_forecasts = []
rolling_upper = []
rolling_lower = []
rolling_dates = []
rolling_actuals = []

for i in range(len(test_data)):
    # Expanding window: use all data up to current point
    current_train = log_gdp.iloc[:val_end_r + i]
    model = ARIMA(current_train, order=(1, 1, 1))
    fit_roll = model.fit()
    fc = fit_roll.get_forecast(steps=1)
    rolling_forecasts.append(fc.predicted_mean.values[0])
    ci = fc.conf_int()
    rolling_lower.append(ci.iloc[0, 0])
    rolling_upper.append(ci.iloc[0, 1])
    rolling_dates.append(test_data.index[i])
    rolling_actuals.append(test_data.iloc[i])

# Plot
fig, ax = plt.subplots(figsize=(14, 6))

# Plot training data
ax.plot(train_data.index, train_data.values, color=BLUE, linewidth=2,
        label='Training (70%)')

# Plot validation data - connect from training
val_conn = pd.concat([train_data.iloc[[-1]], val_data])
ax.plot(val_conn.index, val_conn.values, color=GREEN, linewidth=2,
        label='Validation (15%)')

# Plot test data (actual) - connect from validation
test_conn = pd.concat([val_data.iloc[[-1]], test_data])
ax.plot(test_conn.index, test_conn.values, color='purple', linewidth=2,
        label='Test (15%)')

# Plot rolling forecasts with CI
ax.plot(rolling_dates, rolling_forecasts, color=RED, linewidth=2,
        linestyle='--', label='Rolling Forecast')
ax.fill_between(rolling_dates, rolling_lower, rolling_upper,
                color=RED, alpha=0.15, label='95% CI')

# Mark splits
ax.axvline(x=train_data.index[-1], color='gray', linestyle=':', linewidth=1.5)
ax.axvline(x=val_data.index[-1], color='gray', linestyle=':', linewidth=1.5)

ax.set_title('US Real GDP: Rolling 1-Step Ahead Forecast with Train/Val/Test Split',
             fontweight='bold', fontsize=13)
ax.set_xlabel('Date')
ax.set_ylabel('Log(GDP)')
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.12),
          ncol=6, frameon=False, fontsize=9)

# Calculate test RMSE
rmse = np.sqrt(np.mean((np.array(rolling_actuals) - np.array(rolling_forecasts))**2))
ax.text(0.02, 0.98, f'ARIMA(1,1,1)\nTest RMSE: {rmse:.6f}',
        transform=ax.transAxes, fontsize=9, verticalalignment='top',
        style='italic', color='gray')

fig.tight_layout()

plt.savefig('ch3_case_rolling_forecast.pdf', bbox_inches='tight', dpi=200, transparent=True)
plt.savefig('ch3_case_rolling_forecast.png', bbox_inches='tight', dpi=200, transparent=True)
plt.show()
