# TSA Chapter 3: Python Lab: Box-Jenkins Methodology

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/TSA/blob/main/TSA_Ch3/TSA_ch3_python_box_jenkins/TSA_ch3_python_box_jenkins.ipynb)

This notebook demonstrates:
- Complete Box-Jenkins workflow: stationarity test, differencing, ACF/PACF, model comparison, residual diagnostics.


In [None]:
!pip install matplotlib numpy scipy statsmodels pandas -q

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.arima_process import ArmaProcess
from statsmodels.tsa.stattools import acf, pacf, adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.stats.diagnostic import acorr_ljungbox

# Style configuration
COLORS = {
    'blue': '#1A3A6E', 'red': '#DC3545', 'green': '#2E7D32',
    'orange': '#E67E22', 'gray': '#666666', 'purple': '#8E44AD',
}

plt.rcParams.update({
    'axes.facecolor': 'none', 'figure.facecolor': 'none',
    'savefig.transparent': True, 'axes.spines.top': False,
    'axes.spines.right': False, 'axes.grid': False, 'font.size': 9,
    'axes.titlesize': 10, 'axes.labelsize': 9, 'xtick.labelsize': 8,
    'ytick.labelsize': 8, 'legend.fontsize': 8, 'figure.dpi': 150,
    'lines.linewidth': 1.2, 'axes.edgecolor': '#333333', 'axes.linewidth': 0.8,
})

np.random.seed(42)

def save_chart(fig, name):
    fig.savefig(f'{name}.pdf', bbox_inches='tight', transparent=True, dpi=150)
    fig.savefig(f'{name}.png', bbox_inches='tight', transparent=True, dpi=150)
    print(f'Saved: {name}.pdf + .png')

In [None]:
# Simulate ARIMA(1,1,1)
ar = np.array([1, -0.6])
ma = np.array([1, 0.3])
stationary = ArmaProcess(ar, ma).generate_sample(nsample=250)
data = np.cumsum(stationary)

print("Box-Jenkins Methodology:")
print("  Step 1: Plot data, check stationarity")
adf = adfuller(data)
print(f"  ADF on levels: stat={adf[0]:.2f}, p={adf[1]:.4f}")

diff_data = np.diff(data)
adf_diff = adfuller(diff_data)
print(f"  Step 2: ADF on diff: stat={adf_diff[0]:.2f}, p={adf_diff[1]:.4f}")

print("  Step 3: Examine ACF/PACF of differenced series")
print("  Step 4: Fit candidate models")

models = {'ARIMA(1,1,0)': (1,1,0), 'ARIMA(0,1,1)': (0,1,1),
          'ARIMA(1,1,1)': (1,1,1), 'ARIMA(2,1,1)': (2,1,1)}

for name, order in models.items():
    fit = ARIMA(data, order=order).fit()
    print(f"  {name}: AIC={fit.aic:.1f}, BIC={fit.bic:.1f}")

best = ARIMA(data, order=(1, 1, 1)).fit()

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(12, 8))
axes[0, 0].plot(data, color='#1A3A6E', linewidth=1)
axes[0, 0].set_title('Original Data (I(1))', fontweight='bold')

plot_acf(diff_data, lags=20, ax=axes[0, 1])
axes[0, 1].set_title('ACF: Differenced', fontweight='bold')

plot_pacf(diff_data, lags=20, ax=axes[1, 0])
axes[1, 0].set_title('PACF: Differenced', fontweight='bold')

plot_acf(best.resid, lags=20, ax=axes[1, 1])
axes[1, 1].set_title('Residual ACF (ARIMA(1,1,1))', fontweight='bold')

plt.suptitle('Box-Jenkins Methodology', fontweight='bold', fontsize=13)
plt.tight_layout()
save_chart(fig, 'sem3_box_jenkins')
plt.show()