## ARMA Modeling: Model Selection

**Functions**

`sm.tsa.SARIMAX`

### Exercise 44
Perform a model selection exercise on the term premium using

1. General-to-Specific
2. Specific-to-General
3. Minimizing an Information Criteria

In [1]:
import pandas as pd
data = pd.read_hdf("data/term-premium.h5", "term_premium")
term = data["TERM"]

In [2]:
import statsmodels.tsa.api as tsa

ic = {}
for ar in range(5):
    for ma in range(5):
        print(f"AR: {ar}, MA: {ma}")
        mod = tsa.SARIMAX(term, order=(ar, 0, ma), trend="c")
        res = mod.fit()
        ic[(ar, ma)] = [res.aic, res.bic]

ic = pd.DataFrame(ic, index=["AIC", "BIC"]).T
ic.index = ic.index.set_names(["AR", "MA"])
ic

AR: 0, MA: 0
AR: 0, MA: 1
AR: 0, MA: 2


  warn('Non-invertible starting MA parameters found.'


AR: 0, MA: 3


AR: 0, MA: 4


AR: 1, MA: 0
AR: 1, MA: 1


AR: 1, MA: 2


AR: 1, MA: 3


AR: 1, MA: 4


AR: 2, MA: 0


AR: 2, MA: 1


AR: 2, MA: 2




AR: 2, MA: 3


AR: 2, MA: 4


AR: 3, MA: 0


AR: 3, MA: 1


AR: 3, MA: 2


  warn('Non-stationary starting autoregressive parameters'


AR: 3, MA: 3




AR: 3, MA: 4




AR: 4, MA: 0


AR: 4, MA: 1


AR: 4, MA: 2




AR: 4, MA: 3




AR: 4, MA: 4




Unnamed: 0_level_0,Unnamed: 1_level_0,AIC,BIC
AR,MA,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,2398.226016,2407.592738
0,1,1436.855561,1450.905644
0,2,883.63159,902.365034
0,3,544.623738,568.040543
0,4,383.463622,411.563787
1,0,96.402596,110.452679
1,1,-7.950289,10.783154
1,2,-10.58469,12.832115
1,3,-10.454327,17.645839
1,4,-11.489865,21.293661


In [3]:
aic = ic.sort_values("AIC")
ar, ma = aic.index[0]
print(f"AIC selects AR {ar}, MA {ma}")

bic = ic.sort_values("BIC")
ar, ma = bic.index[0]
print(f"BIC selects AR {ar}, MA {ma}")

AIC selects AR 3, MA 3
BIC selects AR 1, MA 1


In [4]:
res = tsa.SARIMAX(term, order=(4, 0, 4), trend="c").fit()
res.tvalues



intercept     2.897152
ar.L1         3.318498
ar.L2        -1.977173
ar.L3         1.477155
ar.L4         3.352166
ma.L1         4.698840
ma.L2         6.443408
ma.L3         4.758338
ma.L4         1.258880
sigma2       36.891911
dtype: float64

In [5]:
gts_res = tsa.SARIMAX(term, order=(4, 0, 3), trend="c").fit()
gts_res.summary()



0,1,2,3
Dep. Variable:,TERM,No. Observations:,799.0
Model:,"SARIMAX(4, 0, 3)",Log Likelihood,17.177
Date:,"Thu, 21 Nov 2019",AIC,-16.354
Time:,13:22:16,BIC,25.796
Sample:,04-01-1953,HQIC,-0.161
,- 10-01-2019,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,0.1111,0.041,2.739,0.006,0.032,0.191
ar.L1,-0.1830,0.045,-4.075,0.000,-0.271,-0.095
ar.L2,0.0267,0.044,0.603,0.547,-0.060,0.113
ar.L3,0.8225,0.044,18.746,0.000,0.736,0.908
ar.L4,0.2080,0.055,3.764,0.000,0.100,0.316
ma.L1,1.5301,0.043,35.572,0.000,1.446,1.614
ma.L2,1.4769,0.060,24.803,0.000,1.360,1.594
ma.L3,0.5999,0.048,12.423,0.000,0.505,0.695
sigma2,0.0567,0.002,37.460,0.000,0.054,0.060

0,1,2,3
Ljung-Box (Q):,66.17,Jarque-Bera (JB):,3654.12
Prob(Q):,0.01,Prob(JB):,0.0
Heteroskedasticity (H):,0.71,Skew:,0.73
Prob(H) (two-sided):,0.0,Kurtosis:,13.37


In [6]:
res = tsa.SARIMAX(term, order=(1, 0, 0), trend="c").fit()
res.tvalues

intercept      2.857747
ar.L1        152.121330
sigma2        57.164240
dtype: float64

In [7]:
res = tsa.SARIMAX(term, order=(0, 0, 1), trend="c").fit()
res.tvalues

intercept    23.894225
ma.L1        73.711193
sigma2       21.465810
dtype: float64

In [8]:
res = tsa.SARIMAX(term, order=(2, 0, 0), trend="c").fit()
res.tvalues

intercept     4.048361
ar.L1        92.469352
ar.L2       -22.123546
sigma2       49.485291
dtype: float64

In [9]:
res = tsa.SARIMAX(term, order=(1, 0, 1), trend="c").fit()
res.tvalues


intercept      3.848884
ar.L1        115.322300
ma.L1         33.759748
sigma2        42.393718
dtype: float64

In [10]:
res = tsa.SARIMAX(term, order=(2, 0, 1), trend="c").fit()
res.tvalues

intercept     3.534380
ar.L1        18.474449
ar.L2         5.992263
ma.L1        16.806295
sigma2       38.051619
dtype: float64

In [11]:
res = tsa.SARIMAX(term, order=(1, 0, 2), trend="c").fit()
res.tvalues


intercept      3.508203
ar.L1        115.414667
ma.L1         25.930155
ma.L2         -3.632794
sigma2        37.274493
dtype: float64

In [12]:
res = tsa.SARIMAX(term, order=(3, 0, 1), trend="c").fit()
res.tvalues

intercept     3.595492
ar.L1        11.906577
ar.L2         4.737203
ar.L3        -1.857921
ma.L1        12.721134
sigma2       37.367358
dtype: float64

In [13]:
res = tsa.SARIMAX(term, order=(2, 0, 2), trend="c").fit()
res.tvalues



intercept     3.099080
ar.L1         2.107717
ar.L2         3.015378
ma.L1         4.902334
ma.L2         1.639336
sigma2       37.549378
dtype: float64

In [14]:
stg_res = tsa.SARIMAX(term, order=(2, 0, 1), trend="c").fit()
stg_res.summary()

0,1,2,3
Dep. Variable:,TERM,No. Observations:,799.0
Model:,"SARIMAX(2, 0, 1)",Log Likelihood,11.086
Date:,"Thu, 21 Nov 2019",AIC,-12.171
Time:,13:22:21,BIC,11.245
Sample:,04-01-1953,HQIC,-3.175
,- 10-01-2019,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,0.0492,0.014,3.534,0.000,0.022,0.077
ar.L1,0.7163,0.039,18.474,0.000,0.640,0.792
ar.L2,0.2310,0.039,5.992,0.000,0.155,0.307
ma.L1,0.6235,0.037,16.806,0.000,0.551,0.696
sigma2,0.0567,0.001,38.052,0.000,0.054,0.060

0,1,2,3
Ljung-Box (Q):,81.18,Jarque-Bera (JB):,3043.23
Prob(Q):,0.0,Prob(JB):,0.0
Heteroskedasticity (H):,0.73,Skew:,0.59
Prob(H) (two-sided):,0.01,Kurtosis:,12.49
