# Chapter 3: ARIMA Models - Case Study
## US Real GDP Forecasting (FRED: GDPC1)

This notebook reproduces all case study results from Chapter 3.

**Data Source**: Federal Reserve Economic Data (FRED)  
**Series**: GDPC1 - Real Gross Domestic Product  
**Frequency**: Quarterly, Seasonally Adjusted

In [None]:
# Required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pandas_datareader as pdr
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

# Plot style
plt.rcParams['figure.figsize'] = (12, 5)
plt.rcParams['axes.spines.top'] = False
plt.rcParams['axes.spines.right'] = False

BLUE = '#1A3A6E'
GREEN = '#2E7D32'
RED = '#DC3545'
ORANGE = '#FF8C00'

## Step 1: Load Data from FRED

In [None]:
# Download US Real GDP from FRED
gdp = pdr.get_data_fred('GDPC1', start='1960-01-01', end='2024-09-30')
gdp_data = gdp['GDPC1'].dropna()
log_gdp = np.log(gdp_data)

print(f"Loaded {len(gdp_data)} quarterly observations")
print(f"Period: {gdp_data.index[0].strftime('%Y-Q%q')} to {gdp_data.index[-1].strftime('%Y-Q%q')}")
print(f"\nFirst 5 observations:")
gdp_data.head()

In [None]:
# Plot raw data
fig, ax = plt.subplots(figsize=(14, 5))
ax.plot(gdp_data.index, gdp_data.values, color=BLUE, linewidth=1.5)
ax.set_title('US Real GDP (FRED: GDPC1)', fontweight='bold', fontsize=14)
ax.set_xlabel('Date')
ax.set_ylabel('Billions of Chained 2017 Dollars')
ax.text(0.02, 0.98, 'Source: Federal Reserve Economic Data (FRED)',
        transform=ax.transAxes, fontsize=9, verticalalignment='top', style='italic', color='gray')
plt.tight_layout()
plt.show()

## Step 2: Unit Root Testing (ADF Test)

In [None]:
# ADF test on levels
adf_level = adfuller(log_gdp, maxlag=8, regression='ct')
print("ADF Test on Log GDP (Levels):")
print(f"  Test Statistic: {adf_level[0]:.4f}")
print(f"  p-value: {adf_level[1]:.4f}")
print(f"  Critical Values: 1%: {adf_level[4]['1%']:.2f}, 5%: {adf_level[4]['5%']:.2f}, 10%: {adf_level[4]['10%']:.2f}")
print(f"  Conclusion: {'Reject H0 (Stationary)' if adf_level[1] < 0.05 else 'Cannot Reject H0 (Unit Root Present)'}")

In [None]:
# ADF test on first difference
diff_gdp = log_gdp.diff().dropna()
adf_diff = adfuller(diff_gdp, maxlag=8, regression='c')
print("\nADF Test on GDP Growth (First Difference):")
print(f"  Test Statistic: {adf_diff[0]:.4f}")
print(f"  p-value: {adf_diff[1]:.6f}")
print(f"  Conclusion: {'Reject H0 (Stationary)' if adf_diff[1] < 0.05 else 'Cannot Reject H0 (Unit Root Present)'}")
print(f"\n=> GDP is I(1), use d=1 in ARIMA model")

In [None]:
# Visualize levels vs differenced
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

axes[0].plot(log_gdp.index, log_gdp.values, color=BLUE, linewidth=1.5)
axes[0].set_title('Log GDP (Levels) - Non-Stationary', fontweight='bold')
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Log(GDP)')

axes[1].plot(diff_gdp.index, diff_gdp.values * 100, color=GREEN, linewidth=1.5)
axes[1].axhline(y=0, color='gray', linestyle='--', linewidth=0.8)
axes[1].set_title('GDP Growth Rate (Differenced) - Stationary', fontweight='bold')
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Percent Change')

plt.tight_layout()
plt.show()

## Step 3: ACF/PACF Analysis

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 8))

# ACF/PACF of levels
plot_acf(log_gdp, lags=20, ax=axes[0, 0], color=BLUE)
axes[0, 0].set_title('ACF: Log GDP (Levels)', fontweight='bold')

plot_pacf(log_gdp, lags=20, ax=axes[0, 1], color=BLUE)
axes[0, 1].set_title('PACF: Log GDP (Levels)', fontweight='bold')

# ACF/PACF of differenced series
plot_acf(diff_gdp, lags=20, ax=axes[1, 0], color=GREEN)
axes[1, 0].set_title('ACF: GDP Growth (Differenced)', fontweight='bold')

plot_pacf(diff_gdp, lags=20, ax=axes[1, 1], color=GREEN)
axes[1, 1].set_title('PACF: GDP Growth (Differenced)', fontweight='bold')

plt.tight_layout()
plt.show()

print("Identification from ACF/PACF of differenced series:")
print("  - ACF: Significant spike at lag 1, then cuts off => suggests MA(1)")
print("  - PACF: Significant spike at lag 1, decays => suggests AR(1)")
print("  - Candidate models: ARIMA(1,1,0), ARIMA(0,1,1), ARIMA(1,1,1)")

## Step 4: Model Comparison (AIC/BIC)

In [None]:
# Compare multiple ARIMA models
models = {
    'ARIMA(0,1,0)': (0, 1, 0),
    'ARIMA(1,1,0)': (1, 1, 0),
    'ARIMA(0,1,1)': (0, 1, 1),
    'ARIMA(1,1,1)': (1, 1, 1),
    'ARIMA(2,1,1)': (2, 1, 1),
}

results = []
for name, order in models.items():
    model = ARIMA(log_gdp, order=order)
    fit = model.fit()
    results.append({
        'Model': name,
        'AIC': fit.aic,
        'BIC': fit.bic,
        'Log-Lik': fit.llf
    })

df_results = pd.DataFrame(results)
print("Model Comparison:")
print(df_results.to_string(index=False))
print(f"\nBest model by AIC: {df_results.loc[df_results['AIC'].idxmin(), 'Model']}")

In [None]:
# Plot model comparison
fig, ax = plt.subplots(figsize=(10, 5))
x = np.arange(len(df_results))
width = 0.35

ax.bar(x - width/2, df_results['AIC'], width, label='AIC', color=BLUE)
ax.bar(x + width/2, df_results['BIC'], width, label='BIC', color=ORANGE)

ax.set_xlabel('Model')
ax.set_ylabel('Information Criterion (lower is better)')
ax.set_title('ARIMA Model Comparison: US Real GDP', fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(df_results['Model'])
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.12), ncol=2, frameon=False)

plt.tight_layout()
plt.show()

## Step 5: Fit Best Model and Diagnostics

In [None]:
# Fit ARIMA(1,1,1)
best_model = ARIMA(log_gdp, order=(1, 1, 1))
best_fit = best_model.fit()
print(best_fit.summary())

In [None]:
# Diagnostic plots
residuals = best_fit.resid

fig, axes = plt.subplots(2, 2, figsize=(12, 8))

# Residuals over time
axes[0, 0].plot(residuals.index, residuals.values, color=BLUE, linewidth=0.8)
axes[0, 0].axhline(y=0, color='red', linestyle='--', linewidth=1)
axes[0, 0].set_title('Residuals Over Time', fontweight='bold')
axes[0, 0].set_xlabel('Date')

# Histogram
axes[0, 1].hist(residuals, bins=30, color=BLUE, edgecolor='white', density=True)
axes[0, 1].set_title('Residual Distribution', fontweight='bold')

# ACF of residuals
plot_acf(residuals, lags=20, ax=axes[1, 0], color=BLUE)
axes[1, 0].set_title('ACF of Residuals', fontweight='bold')

# Q-Q plot
res_standardized = (residuals - residuals.mean()) / residuals.std()
(osm, osr), (slope, intercept, r) = stats.probplot(res_standardized, dist="norm")
axes[1, 1].scatter(osm, osr, color=BLUE, s=20, alpha=0.7)
line_min, line_max = min(osm.min(), osr.min()), max(osm.max(), osr.max())
axes[1, 1].plot([line_min, line_max], [line_min, line_max], color=RED, linewidth=1.5, linestyle='--')
axes[1, 1].set_xlabel('Theoretical Quantiles')
axes[1, 1].set_ylabel('Sample Quantiles')
axes[1, 1].set_title('Q-Q Plot', fontweight='bold')

plt.suptitle('ARIMA(1,1,1) Diagnostic Plots', fontweight='bold', fontsize=14, y=1.02)
plt.tight_layout()
plt.show()

In [None]:
# Normality tests
jb_stat, jb_pval = stats.jarque_bera(residuals)
skew = stats.skew(residuals)
kurt = stats.kurtosis(residuals)

print("Residual Normality Analysis:")
print(f"  Skewness: {skew:.3f} (normal = 0)")
print(f"  Excess Kurtosis: {kurt:.3f} (normal = 0)")
print(f"  Jarque-Bera stat: {jb_stat:.2f}, p-value: {jb_pval:.6f}")
print(f"\nConclusion: Residuals are NOT normally distributed")
print("  (due to COVID-19 outlier in 2020Q2)")

## Step 6: Train/Validation/Test Split (70%/15%/15%)

In [None]:
# Define train/val/test split
n = len(log_gdp)
train_end = int(n * 0.70)
val_end = int(n * 0.85)

train_data = log_gdp.iloc[:train_end]
val_data = log_gdp.iloc[train_end:val_end]
test_data = log_gdp.iloc[val_end:]

print(f"Total observations: {n}")
print(f"Training (70%): {len(train_data)} obs ({train_data.index[0].strftime('%Y-Q%q')} to {train_data.index[-1].strftime('%Y-Q%q')})")
print(f"Validation (15%): {len(val_data)} obs ({val_data.index[0].strftime('%Y-Q%q')} to {val_data.index[-1].strftime('%Y-Q%q')})")
print(f"Test (15%): {len(test_data)} obs ({test_data.index[0].strftime('%Y-Q%q')} to {test_data.index[-1].strftime('%Y-Q%q')})")

In [None]:
# Fit model on training data and forecast
model_train = ARIMA(train_data, order=(1, 1, 1))
fit_train = model_train.fit()

# Forecast for val + test + 8 quarters ahead
forecast_steps = len(val_data) + len(test_data) + 8
forecast = fit_train.get_forecast(steps=forecast_steps)
forecast_mean = forecast.predicted_mean
forecast_ci = forecast.conf_int()

# Create forecast dates
forecast_dates = pd.date_range(start=train_data.index[-1], periods=forecast_steps + 1, freq='Q')[1:]

In [None]:
# Plot forecast with train/val/test split
fig, ax = plt.subplots(figsize=(14, 6))

# Training data
ax.plot(train_data.iloc[-30:].index, train_data.iloc[-30:].values,
        color=BLUE, linewidth=2, label='Training (70%)')

# Validation data - connect from training
val_conn = pd.concat([train_data.iloc[[-1]], val_data])
ax.plot(val_conn.index, val_conn.values, color=GREEN, linewidth=2, label='Validation (15%)')

# Test data - connect from validation
test_conn = pd.concat([val_data.iloc[[-1]], test_data])
ax.plot(test_conn.index, test_conn.values, color='purple', linewidth=2, label='Test (15%)')

# Forecast - connect from training
forecast_conn = pd.concat([pd.Series([train_data.iloc[-1]], index=[train_data.index[-1]]), forecast_mean])
ax.plot(forecast_conn.index, forecast_conn.values, color=RED, linewidth=2, linestyle='--', label='Forecast')
ax.fill_between(forecast_dates, forecast_ci.iloc[:, 0], forecast_ci.iloc[:, 1],
                color=RED, alpha=0.15, label='95% CI')

# Mark splits
ax.axvline(x=train_data.index[-1], color='gray', linestyle=':', linewidth=1.5)
ax.axvline(x=val_data.index[-1], color='gray', linestyle=':', linewidth=1.5)

ax.set_title('US Real GDP: ARIMA Out-of-Sample Forecast (70%/15%/15%)', fontweight='bold', fontsize=14)
ax.set_xlabel('Date')
ax.set_ylabel('Log(GDP)')
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.12), ncol=6, frameon=False)

# Calculate test RMSE
test_forecast = forecast_mean.iloc[len(val_data):len(val_data)+len(test_data)]
test_rmse = np.sqrt(np.mean((test_data.values - test_forecast.values)**2))
print(f"Test RMSE: {test_rmse:.6f}")

plt.tight_layout()
plt.show()

## Step 7: Rolling 1-Step Ahead Forecast with 95% CI

In [None]:
# Rolling 1-step ahead forecasts on test set
rolling_forecasts = []
rolling_upper = []
rolling_lower = []
rolling_dates = []
rolling_actuals = []

print("Computing rolling forecasts...")
for i in range(len(test_data)):
    # Expanding window: use all data up to current point
    current_train = log_gdp.iloc[:val_end + i]
    model = ARIMA(current_train, order=(1, 1, 1))
    fit_roll = model.fit()
    fc = fit_roll.get_forecast(steps=1)
    
    rolling_forecasts.append(fc.predicted_mean.values[0])
    ci = fc.conf_int()
    rolling_lower.append(ci.iloc[0, 0])
    rolling_upper.append(ci.iloc[0, 1])
    rolling_dates.append(test_data.index[i])
    rolling_actuals.append(test_data.iloc[i])
    
    if (i + 1) % 10 == 0:
        print(f"  Completed {i + 1}/{len(test_data)} forecasts")

print("Done!")

In [None]:
# Plot rolling forecast
fig, ax = plt.subplots(figsize=(14, 6))

# Training data
ax.plot(train_data.index, train_data.values, color=BLUE, linewidth=2, label='Training (70%)')

# Validation data - connect
val_conn = pd.concat([train_data.iloc[[-1]], val_data])
ax.plot(val_conn.index, val_conn.values, color=GREEN, linewidth=2, label='Validation (15%)')

# Test data - connect
test_conn = pd.concat([val_data.iloc[[-1]], test_data])
ax.plot(test_conn.index, test_conn.values, color='purple', linewidth=2, label='Test (15%)')

# Rolling forecasts with CI
ax.plot(rolling_dates, rolling_forecasts, color=RED, linewidth=2, linestyle='--', label='Rolling Forecast')
ax.fill_between(rolling_dates, rolling_lower, rolling_upper, color=RED, alpha=0.15, label='95% CI')

# Mark splits
ax.axvline(x=train_data.index[-1], color='gray', linestyle=':', linewidth=1.5)
ax.axvline(x=val_data.index[-1], color='gray', linestyle=':', linewidth=1.5)

ax.set_title('US Real GDP: Rolling 1-Step Ahead Forecast (70%/15%/15%)', fontweight='bold', fontsize=14)
ax.set_xlabel('Date')
ax.set_ylabel('Log(GDP)')
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.12), ncol=6, frameon=False)

# Calculate RMSE
rmse = np.sqrt(np.mean((np.array(rolling_actuals) - np.array(rolling_forecasts))**2))
mae = np.mean(np.abs(np.array(rolling_actuals) - np.array(rolling_forecasts)))

print(f"Rolling Forecast Performance on Test Set:")
print(f"  RMSE: {rmse:.6f}")
print(f"  MAE: {mae:.6f}")

plt.tight_layout()
plt.show()

## Summary

### Key Findings:
1. **US Real GDP is I(1)**: ADF test fails to reject unit root on levels, rejects on first difference
2. **Best Model**: ARIMA(1,1,1) selected by AIC
3. **Residuals**: Not normally distributed due to COVID-19 outlier (2020Q2)
4. **Forecast Evaluation**: Rolling 1-step ahead forecasts with 95% confidence intervals

### Data Source:
- **FRED Series**: GDPC1 (Real Gross Domestic Product)
- **Frequency**: Quarterly, Seasonally Adjusted
- **Units**: Billions of Chained 2017 Dollars