# TSA Chapter 3: ARIMA Model Selection using AIC/BIC

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/TSA/blob/main/TSA_ch3/TSA_ch3_case_model_comparison/TSA_ch3_case_model_comparison.ipynb)

This notebook demonstrates:
- Comparing multiple ARIMA specifications to find the best model for US Real GDP.

In [None]:
!pip install numpy pandas matplotlib statsmodels pandas-datareader -q

In [None]:
import numpy as npimport pandas as pdimport matplotlib.pyplot as pltimport pandas_datareader as pdrfrom statsmodels.tsa.arima.model import ARIMA

In [None]:
# Chart style settings - Nature journal qualityplt.rcParams['figure.facecolor'] = 'none'plt.rcParams['axes.facecolor'] = 'none'plt.rcParams['savefig.facecolor'] = 'none'plt.rcParams['axes.grid'] = Falseplt.rcParams['font.family'] = 'sans-serif'plt.rcParams['font.sans-serif'] = ['Helvetica', 'Arial', 'DejaVu Sans']plt.rcParams['font.size'] = 8plt.rcParams['axes.labelsize'] = 9plt.rcParams['axes.titlesize'] = 10plt.rcParams['xtick.labelsize'] = 8plt.rcParams['ytick.labelsize'] = 8plt.rcParams['legend.fontsize'] = 8plt.rcParams['legend.facecolor'] = 'none'plt.rcParams['legend.framealpha'] = 0plt.rcParams['axes.spines.top'] = Falseplt.rcParams['axes.spines.right'] = Falseplt.rcParams['axes.linewidth'] = 0.5plt.rcParams['lines.linewidth'] = 0.75BLUE, ORANGE, RED = '#1A3A6E', '#FF8C00', '#DC3545'# Get data# Fit multiple models# Plot# Mark best modelplt.tight_layout()plt.savefig('ch3_case_model_comparison.pdf', dpi=300, bbox_inches='tight')plt.show()

In [None]:
gdp = pdr.get_data_fred('GDPC1', start='1960-01-01', end='2024-09-30')
gdp_data = gdp['GDPC1'].dropna()
log_gdp = np.log(gdp_data)
models = {
    'ARIMA(0,1,0)': (0, 1, 0),
    'ARIMA(1,1,0)': (1, 1, 0),
    'ARIMA(0,1,1)': (0, 1, 1),
    'ARIMA(1,1,1)': (1, 1, 1),
    'ARIMA(2,1,1)': (2, 1, 1),
}
results = []
for name, order in models.items():
    model = ARIMA(log_gdp, order=order)
    fit = model.fit()
    results.append({'Model': name, 'AIC': fit.aic, 'BIC': fit.bic})
df_results = pd.DataFrame(results)
fig, ax = plt.subplots(figsize=(10, 5))
x = np.arange(len(df_results))
width = 0.35
ax.bar(x - width/2, df_results['AIC'], width, label='AIC', color=BLUE)
ax.bar(x + width/2, df_results['BIC'], width, label='BIC', color=ORANGE)
ax.set_xlabel('Model')
ax.set_ylabel('Information Criterion')
ax.set_title('ARIMA Model Comparison: US Real GDP', fontweight='bold', fontsize=14)
ax.set_xticks(x)
ax.set_xticklabels(df_results['Model'], rotation=15)
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=2, frameon=False)
best_idx = df_results['AIC'].idxmin()
ax.annotate('Best', xy=(best_idx - width/2, df_results.loc[best_idx, 'AIC']),
            xytext=(best_idx - width/2, df_results.loc[best_idx, 'AIC'] - 30),
            ha='center', fontsize=10, color=RED, fontweight='bold',
            arrowprops=dict(arrowstyle='->', color=RED))
print(f"\nBest model by AIC: {df_results.loc[best_idx, 'Model']}")