# TSA Chapter 10: SARIMA Model Diagnostics

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/TSA/blob/main/TSA_ch10/TSA_ch10_sarima_diagnostics/TSA_ch10_sarima_diagnostics.ipynb)

This notebook demonstrates:
- Checking residual diagnostics (standardised residuals, histogram, ACF, Q-Q plot) of a SARIMA model fitted on unemployment data.

In [None]:
!pip install pandas-datareader statsmodels scipy matplotlib numpy pandas -q

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.stats.diagnostic import acorr_ljungbox
from scipy import stats
import pandas_datareader as pdr
import os, warnings
warnings.filterwarnings('ignore')

In [None]:
# Color scheme and style configuration
COLORS = {
    'blue': '#1A3A6E', 'red': '#DC3545', 'green': '#2E7D32',
    'orange': '#E67E22', 'gray': '#666666', 'purple': '#8E44AD',
}

plt.rcParams.update({
    'figure.facecolor': 'none', 'axes.facecolor': 'none',
    'savefig.facecolor': 'none', 'savefig.transparent': True,
    'axes.spines.top': False, 'axes.spines.right': False,
    'axes.grid': False, 'font.size': 10, 'axes.titlesize': 12,
    'axes.labelsize': 10, 'xtick.labelsize': 9, 'ytick.labelsize': 9,
    'legend.fontsize': 9, 'figure.dpi': 150, 'lines.linewidth': 1.2,
    'axes.linewidth': 0.6, 'legend.facecolor': 'none',
    'legend.framealpha': 0, 'legend.edgecolor': 'none',
})

def save_chart(fig, name):
    fig.savefig(f'{name}.pdf', bbox_inches='tight', transparent=True, dpi=150)
    fig.savefig(f'{name}.png', bbox_inches='tight', transparent=True, dpi=150)
    try:
        charts_path = os.path.join('..', '..', '..', 'charts', name)
        fig.savefig(f'{charts_path}.pdf', bbox_inches='tight', transparent=True, dpi=150)
        fig.savefig(f'{charts_path}.png', bbox_inches='tight', transparent=True, dpi=150)
    except Exception:
        pass
    print(f'Saved: {name}.pdf + .png')

def legend_outside(ax, ncol=3, y=-0.18):
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, y), ncol=ncol, frameon=False)

In [None]:
# Download unemployment data
unemp = pdr.get_data_fred('UNRATE', start='2010-01-01', end='2025-01-15')
unemp_series = unemp['UNRATE']
print(f'Data: {len(unemp_series)} observations')

# 70% / 20% / 10% split
train_end = '2020-06-01'
val_start = '2020-07-01'
val_end = '2023-06-01'
test_start = '2023-07-01'

train_data = unemp_series[unemp_series.index <= train_end]
val_data = unemp_series[(unemp_series.index >= val_start) & (unemp_series.index <= val_end)]
test_data = unemp_series[unemp_series.index >= test_start]
train_val_data = unemp_series[unemp_series.index <= val_end]

print(f'Train: {len(train_data)}, Val: {len(val_data)}, Test: {len(test_data)}')

# Fit SARIMA on train+val data
model = SARIMAX(train_val_data, order=(1,1,1), seasonal_order=(1,0,1,12),
                enforce_stationarity=False, enforce_invertibility=False)
result = model.fit(disp=False)

residuals = result.resid.dropna()
std_resid = (residuals - residuals.mean()) / residuals.std()

fig, axes = plt.subplots(2, 2, figsize=(10, 6))

axes[0,0].plot(std_resid.index, std_resid.values, color=COLORS['blue'], lw=0.8)
axes[0,0].axhline(0, color='black', lw=0.5)
axes[0,0].axhline(2, color=COLORS['red'], ls='--', alpha=0.5)
axes[0,0].axhline(-2, color=COLORS['red'], ls='--', alpha=0.5)
axes[0,0].set_title('Standardized Residuals', fontweight='bold')

axes[0,1].hist(std_resid.values, bins=20, density=True, color=COLORS['blue'], alpha=0.7, edgecolor='white')
x_norm = np.linspace(-4, 4, 100)
axes[0,1].plot(x_norm, stats.norm.pdf(x_norm), color=COLORS['red'], lw=2)
axes[0,1].set_title('Distribution vs Normal', fontweight='bold')

plot_acf(residuals.values, ax=axes[1,0], lags=20, alpha=0.05)
axes[1,0].set_title('ACF of Residuals', fontweight='bold')

stats.probplot(std_resid.values, dist='norm', plot=axes[1,1])
axes[1,1].get_lines()[0].set_markerfacecolor(COLORS['blue'])
axes[1,1].get_lines()[0].set_markeredgecolor(COLORS['blue'])
axes[1,1].get_lines()[1].set_color(COLORS['red'])
axes[1,1].set_title('Q-Q Plot', fontweight='bold')

lb_test = acorr_ljungbox(residuals, lags=[20], return_df=True)
fig.suptitle(f'SARIMA Diagnostics | Ljung-Box p = {lb_test["lb_pvalue"].iloc[0]:.2f}',
             fontweight='bold', fontsize=12, y=1.02)
fig.tight_layout()
save_chart(fig, 'sarima_diagnostics')
plt.show()