# TSA Chapter 3: Case Study: US GDP ARIMA Forecasting

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/TSA/blob/main/TSA_Ch3/TSA_ch3_case_study/TSA_ch3_case_study.ipynb)

This notebook demonstrates:
- Complete Box-Jenkins ARIMA case study on US GDP: identification, estimation, diagnostics, 8-step forecast with CI.


In [None]:
!pip install matplotlib numpy scipy statsmodels pandas -q

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.arima_process import ArmaProcess
from statsmodels.tsa.stattools import acf, pacf, adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.stats.diagnostic import acorr_ljungbox

# Style configuration
COLORS = {
    'blue': '#1A3A6E', 'red': '#DC3545', 'green': '#2E7D32',
    'orange': '#E67E22', 'gray': '#666666', 'purple': '#8E44AD',
}

plt.rcParams.update({
    'axes.facecolor': 'none', 'figure.facecolor': 'none',
    'savefig.transparent': True, 'axes.spines.top': False,
    'axes.spines.right': False, 'axes.grid': False, 'font.size': 9,
    'axes.titlesize': 10, 'axes.labelsize': 9, 'xtick.labelsize': 8,
    'ytick.labelsize': 8, 'legend.fontsize': 8, 'figure.dpi': 150,
    'lines.linewidth': 1.2, 'axes.edgecolor': '#333333', 'axes.linewidth': 0.8,
})

np.random.seed(42)

def save_chart(fig, name):
    fig.savefig(f'{name}.pdf', bbox_inches='tight', transparent=True, dpi=150)
    fig.savefig(f'{name}.png', bbox_inches='tight', transparent=True, dpi=150)
    print(f'Saved: {name}.pdf + .png')

In [None]:
try:
    gdp = pdr.get_data_fred('GDPC1', start='1960-01-01', end='2024-09-30')
    gdp_vals = gdp['GDPC1'].dropna().values
except Exception:
    gdp_vals = macrodata.load_pandas().data['realgdp'].values

log_gdp = np.log(gdp_vals)
diff_gdp = np.diff(log_gdp)

# Step 1: Raw data and ADF test

In [None]:
fig, axes = plt.subplots(2, 3, figsize=(16, 9))

axes[0, 0].plot(log_gdp, color='#1A3A6E', linewidth=1.2)
axes[0, 0].set_title('Log GDP (Levels)', fontweight='bold')
adf_level = adfuller(log_gdp)
axes[0, 0].text(0.02, 0.98, f'ADF p={adf_level[1]:.4f}\nNon-stationary',
                transform=axes[0, 0].transAxes, fontsize=8, va='top',
                bbox=dict(boxstyle='round', facecolor='lightyellow'))

# Step 2: ACF/PACF of differenced
plot_acf(diff_gdp, lags=20, ax=axes[0, 1])
axes[0, 1].set_title('ACF: GDP Growth', fontweight='bold')
plot_pacf(diff_gdp, lags=20, ax=axes[0, 2])
axes[0, 2].set_title('PACF: GDP Growth', fontweight='bold')

# Step 3: Fit ARIMA(1,1,1)
model = ARIMA(log_gdp, order=(1, 1, 1)).fit()
resid = model.resid

# Diagnostics
axes[1, 0].plot(resid, color='#1A3A6E', linewidth=0.8)
axes[1, 0].axhline(0, color='red', linestyle='--')
axes[1, 0].set_title('Residuals', fontweight='bold')

plot_acf(resid, lags=20, ax=axes[1, 1])
axes[1, 1].set_title('Residual ACF', fontweight='bold')

# Forecast
h = 8
forecast = model.get_forecast(steps=h)
fc_mean = forecast.predicted_mean
fc_ci = forecast.conf_int()
if hasattr(fc_ci, 'iloc'):
    ci_lo, ci_hi = fc_ci.iloc[:, 0], fc_ci.iloc[:, 1]
else:
    ci_lo, ci_hi = fc_ci[:, 0], fc_ci[:, 1]

n = len(log_gdp)
axes[1, 2].plot(range(n-30, n), log_gdp[-30:], color='#1A3A6E', linewidth=1.5, label='Observed')
fc_idx = range(n, n+h)
axes[1, 2].plot(fc_idx, fc_mean, color='#DC3545', linewidth=2, linestyle='--', label='Forecast')
axes[1, 2].fill_between(fc_idx, ci_lo, ci_hi, color='#DC3545', alpha=0.2, label='95% CI')
axes[1, 2].set_title('8-Step Forecast', fontweight='bold')
axes[1, 2].legend(fontsize=7)

plt.suptitle('ARIMA Case Study: US GDP', fontweight='bold', fontsize=14)
plt.tight_layout()
save_chart(fig, 'ch3_case_study')
plt.show()

print(f"ARIMA(1,1,1) Results:")
print(f"  AR(1): {model.params[1]:.4f}")
print(f"  MA(1): {model.params[2]:.4f}")
print(f"  AIC: {model.aic:.2f}, BIC: {model.bic:.2f}")