## ARMA Model Selection

**Functions**

`sm.tsa.SARIMAX`

### Exercise 44
Perform a model selection exercise on the term premium using

1. General-to-Specific
2. Specific-to-General
3. Minimizing an Information Criteria

In [1]:
import pandas as pd
data = pd.read_hdf("data/term-premium.h5", "term_premium")
term = data["TERM"]

In [2]:
import statsmodels.tsa.api as tsa

ic = {}
for ar in range(5):
    for ma in range(5):
        print(f"AR: {ar}, MA: {ma}")
        mod = tsa.SARIMAX(term, order=(ar, 0, ma), trend="c")
        res = mod.fit()
        ic[(ar, ma)] = [res.aic, res.bic]

ic = pd.DataFrame(ic, index=["AIC", "BIC"]).T
ic.index = ic.index.set_names(["AR", "MA"])
ic

AR: 0, MA: 0
AR: 0, MA: 1
AR: 0, MA: 2


  warn('Non-invertible starting MA parameters found.'


AR: 0, MA: 3


AR: 0, MA: 4


AR: 1, MA: 0
AR: 1, MA: 1


AR: 1, MA: 2


AR: 1, MA: 3


AR: 1, MA: 4


AR: 2, MA: 0


AR: 2, MA: 1


AR: 2, MA: 2


AR: 2, MA: 3


AR: 2, MA: 4


AR: 3, MA: 0


AR: 3, MA: 1


AR: 3, MA: 2


  warn('Non-stationary starting autoregressive parameters'


AR: 3, MA: 3




AR: 3, MA: 4




AR: 4, MA: 0


AR: 4, MA: 1


AR: 4, MA: 2


AR: 4, MA: 3




AR: 4, MA: 4




Unnamed: 0_level_0,Unnamed: 1_level_0,AIC,BIC
AR,MA,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,2395.585046,2404.949263
0,1,1435.400654,1449.44698
0,2,882.868033,901.596467
0,3,544.8293,568.239843
0,4,383.919042,412.011694
1,0,96.846257,110.892583
1,1,-7.122076,11.606359
1,2,-9.717014,13.693529
1,3,-9.598418,18.494233
1,4,-10.655067,22.119693


In [3]:
aic = ic.sort_values("AIC")
ar, ma = aic.index[0]
print(f"AIC selects AR {ar}, MA {ma}")

bic = ic.sort_values("BIC")
ar, ma = bic.index[0]
print(f"BIC selects AR {ar}, MA {ma}")

AIC selects AR 3, MA 3
BIC selects AR 1, MA 1


In [4]:
res = tsa.SARIMAX(term, order=(4, 0, 4), trend="c").fit()
res.tvalues



intercept     3.046521
ar.L1         3.057986
ar.L2        -1.659928
ar.L3         1.029939
ar.L4         3.971591
ma.L1         5.319255
ma.L2         6.439103
ma.L3         5.190770
ma.L4         1.889466
sigma2       36.578664
dtype: float64

In [5]:
gts_res = tsa.SARIMAX(term, order=(4, 0, 3), trend="c").fit()
gts_res.summary()



0,1,2,3
Dep. Variable:,TERM,No. Observations:,798.0
Model:,"SARIMAX(4, 0, 3)",Log Likelihood,16.848
Date:,"Sun, 27 Oct 2019",AIC,-15.696
Time:,14:32:59,BIC,26.442
Sample:,04-01-1953,HQIC,0.493
,- 09-01-2019,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,0.1409,0.040,3.555,0.000,0.063,0.219
ar.L1,-0.1645,0.042,-3.921,0.000,-0.247,-0.082
ar.L2,0.0155,0.042,0.373,0.709,-0.066,0.097
ar.L3,0.8057,0.042,18.989,0.000,0.723,0.889
ar.L4,0.1934,0.052,3.719,0.000,0.091,0.295
ma.L1,1.5234,0.040,38.330,0.000,1.446,1.601
ma.L2,1.4722,0.055,26.686,0.000,1.364,1.580
ma.L3,0.6059,0.044,13.687,0.000,0.519,0.693
sigma2,0.0558,0.001,38.450,0.000,0.053,0.059

0,1,2,3
Ljung-Box (Q):,65.07,Jarque-Bera (JB):,3315.11
Prob(Q):,0.01,Prob(JB):,0.0
Heteroskedasticity (H):,0.71,Skew:,0.67
Prob(H) (two-sided):,0.0,Kurtosis:,12.9


In [6]:
res = tsa.SARIMAX(term, order=(1, 0, 0), trend="c").fit()
res.tvalues

intercept      2.812309
ar.L1        152.029969
sigma2        57.117075
dtype: float64

In [7]:
res = tsa.SARIMAX(term, order=(0, 0, 1), trend="c").fit()
res.tvalues

intercept    23.898218
ma.L1        73.808712
sigma2       21.454255
dtype: float64

In [8]:
res = tsa.SARIMAX(term, order=(2, 0, 0), trend="c").fit()
res.tvalues

intercept     4.003844
ar.L1        92.419570
ar.L2       -22.096011
sigma2       49.437148
dtype: float64

In [9]:
res = tsa.SARIMAX(term, order=(1, 0, 1), trend="c").fit()
res.tvalues


intercept      3.815046
ar.L1        115.261685
ma.L1         33.671934
sigma2        42.348194
dtype: float64

In [10]:
res = tsa.SARIMAX(term, order=(2, 0, 1), trend="c").fit()
res.tvalues

intercept     3.504623
ar.L1        18.431356
ar.L2         5.979371
ma.L1        16.751872
sigma2       37.997459
dtype: float64

In [11]:
res = tsa.SARIMAX(term, order=(1, 0, 2), trend="c").fit()
res.tvalues


intercept      3.479535
ar.L1        115.330749
ma.L1         25.889922
ma.L2         -3.615222
sigma2        37.222985
dtype: float64

In [12]:
res = tsa.SARIMAX(term, order=(3, 0, 1), trend="c").fit()
res.tvalues

intercept     3.565521
ar.L1        11.858124
ar.L2         4.739579
ar.L3        -1.867073
ma.L1        12.691501
sigma2       37.314317
dtype: float64

In [13]:
res = tsa.SARIMAX(term, order=(2, 0, 2), trend="c").fit()
res.tvalues

intercept     3.080505
ar.L1         2.101384
ar.L2         3.021012
ma.L1         4.905719
ma.L2         1.644712
sigma2       37.494918
dtype: float64

In [14]:
stg_res = tsa.SARIMAX(term, order=(2, 0, 1), trend="c").fit()
stg_res.summary()

0,1,2,3
Dep. Variable:,TERM,No. Observations:,798.0
Model:,"SARIMAX(2, 0, 1)",Log Likelihood,10.653
Date:,"Sun, 27 Oct 2019",AIC,-11.305
Time:,14:33:04,BIC,12.105
Sample:,04-01-1953,HQIC,-2.311
,- 09-01-2019,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,0.0489,0.014,3.505,0.000,0.022,0.076
ar.L1,0.7164,0.039,18.431,0.000,0.640,0.793
ar.L2,0.2310,0.039,5.979,0.000,0.155,0.307
ma.L1,0.6232,0.037,16.752,0.000,0.550,0.696
sigma2,0.0568,0.001,37.997,0.000,0.054,0.060

0,1,2,3
Ljung-Box (Q):,81.34,Jarque-Bera (JB):,3040.48
Prob(Q):,0.0,Prob(JB):,0.0
Heteroskedasticity (H):,0.73,Skew:,0.59
Prob(H) (two-sided):,0.01,Kurtosis:,12.49
