## ARMA Modeling: Model Selection

**Functions**

`sm.tsa.SARIMAX`

### Exercise 44
Perform a model selection exercise on the term premium using

1. General-to-Specific
2. Specific-to-General
3. Minimizing an Information Criteria

In [1]:
import pandas as pd
data = pd.read_hdf("data/term-premium.h5", "term_premium")
term = data["TERM"]

In [2]:
import statsmodels.tsa.api as tsa

ic = {}
for ar in range(5):
    for ma in range(5):
        print(f"AR: {ar}, MA: {ma}")
        mod = tsa.SARIMAX(term, order=(ar, 0, ma), trend="c")
        res = mod.fit()
        ic[(ar, ma)] = [res.aic, res.bic]

ic = pd.DataFrame(ic, index=["AIC", "BIC"]).T
ic.index = ic.index.set_names(["AR", "MA"])
ic

AR: 0, MA: 0
AR: 0, MA: 1


  warn('Non-invertible starting MA parameters found.'


AR: 0, MA: 2


AR: 0, MA: 3


AR: 0, MA: 4


AR: 1, MA: 0


AR: 1, MA: 1


AR: 1, MA: 2


AR: 1, MA: 3


AR: 1, MA: 4


AR: 2, MA: 0


AR: 2, MA: 1


AR: 2, MA: 2


AR: 2, MA: 3


AR: 2, MA: 4


AR: 3, MA: 0


AR: 3, MA: 1


AR: 3, MA: 2


  warn('Non-stationary starting autoregressive parameters'


AR: 3, MA: 3




AR: 3, MA: 4




AR: 4, MA: 0


AR: 4, MA: 1


AR: 4, MA: 2


AR: 4, MA: 3




AR: 4, MA: 4




Unnamed: 0_level_0,Unnamed: 1_level_0,AIC,BIC
AR,MA,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,2403.038892,2412.410614
0,1,1439.107108,1453.164691
0,2,884.155933,902.899377
0,3,544.541198,567.970503
0,4,382.934321,411.049486
1,0,94.865366,108.922949
1,1,-9.947408,8.796035
1,2,-12.541291,10.888013
1,3,-12.410547,15.704619
1,4,-13.472772,19.328254


In [3]:
aic = ic.sort_values("AIC")
ar, ma = aic.index[0]
print(f"AIC selects AR {ar}, MA {ma}")

bic = ic.sort_values("BIC")
ar, ma = bic.index[0]
print(f"BIC selects AR {ar}, MA {ma}")

AIC selects AR 3, MA 3
BIC selects AR 1, MA 1


In [4]:
res = tsa.SARIMAX(term, order=(4, 0, 4), trend="c").fit()
res.tvalues



intercept     3.214325
ar.L1         3.236575
ar.L2        -1.886639
ar.L3         1.086879
ar.L4         4.377804
ma.L1         5.801864
ma.L2         7.695099
ma.L3         5.852369
ma.L4         2.110348
sigma2       36.649447
dtype: float64

In [5]:
gts_res = tsa.SARIMAX(term, order=(4, 0, 3), trend="c").fit()
gts_res.summary()



0,1,2,3
Dep. Variable:,TERM,No. Observations:,801.0
Model:,"SARIMAX(4, 0, 3)",Log Likelihood,16.879
Date:,"Fri, 24 Jan 2020",AIC,-15.759
Time:,23:56:50,BIC,26.414
Sample:,04-01-1953,HQIC,0.441
,- 12-01-2019,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,0.1325,0.037,3.542,0.000,0.059,0.206
ar.L1,-0.1396,0.091,-1.526,0.127,-0.319,0.040
ar.L2,0.1395,0.088,1.585,0.113,-0.033,0.312
ar.L3,0.7215,0.090,7.997,0.000,0.545,0.898
ar.L4,0.1369,0.063,2.186,0.029,0.014,0.260
ma.L1,1.4947,0.089,16.728,0.000,1.320,1.670
ma.L2,1.3105,0.126,10.374,0.000,1.063,1.558
ma.L3,0.5015,0.067,7.448,0.000,0.370,0.633
sigma2,0.0559,0.001,38.442,0.000,0.053,0.059

0,1,2,3
Ljung-Box (Q):,67.26,Jarque-Bera (JB):,3427.32
Prob(Q):,0.0,Prob(JB):,0.0
Heteroskedasticity (H):,0.71,Skew:,0.67
Prob(H) (two-sided):,0.01,Kurtosis:,13.05


In [6]:
res = tsa.SARIMAX(term, order=(1, 0, 0), trend="c").fit()
res.tvalues

intercept      2.899870
ar.L1        152.442652
sigma2        57.298373
dtype: float64

In [7]:
res = tsa.SARIMAX(term, order=(0, 0, 1), trend="c").fit()
res.tvalues

intercept    23.909363
ma.L1        73.856571
sigma2       21.508055
dtype: float64

In [8]:
res = tsa.SARIMAX(term, order=(2, 0, 0), trend="c").fit()
res.tvalues

intercept     4.070461
ar.L1        92.708278
ar.L2       -22.192438
sigma2       49.614191
dtype: float64

In [9]:
res = tsa.SARIMAX(term, order=(1, 0, 1), trend="c").fit()
res.tvalues


intercept      3.874115
ar.L1        115.592148
ma.L1         33.848390
sigma2        42.504245
dtype: float64

In [10]:
res = tsa.SARIMAX(term, order=(2, 0, 1), trend="c").fit()
res.tvalues

intercept     3.568499
ar.L1        18.515267
ar.L2         5.963615
ma.L1        16.792105
sigma2       38.142901
dtype: float64

In [11]:
res = tsa.SARIMAX(term, order=(1, 0, 2), trend="c").fit()
res.tvalues


intercept      3.540617
ar.L1        115.599079
ma.L1         26.011213
ma.L2         -3.616864
sigma2        37.369782
dtype: float64

In [12]:
res = tsa.SARIMAX(term, order=(3, 0, 1), trend="c").fit()
res.tvalues

intercept     3.624126
ar.L1        11.922279
ar.L2         4.742558
ar.L3        -1.870700
ma.L1        12.733144
sigma2       37.460436
dtype: float64

In [13]:
res = tsa.SARIMAX(term, order=(2, 0, 2), trend="c").fit()
res.tvalues

intercept     3.121539
ar.L1         2.099811
ar.L2         3.021153
ma.L1         4.909944
ma.L2         1.650082
sigma2       37.641998
dtype: float64

In [14]:
stg_res = tsa.SARIMAX(term, order=(2, 0, 1), trend="c").fit()
stg_res.summary()

0,1,2,3
Dep. Variable:,TERM,No. Observations:,801.0
Model:,"SARIMAX(2, 0, 1)",Log Likelihood,12.058
Date:,"Fri, 24 Jan 2020",AIC,-14.116
Time:,23:57:08,BIC,9.314
Sample:,04-01-1953,HQIC,-5.116
,- 12-01-2019,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,0.0495,0.014,3.568,0.000,0.022,0.077
ar.L1,0.7174,0.039,18.515,0.000,0.641,0.793
ar.L2,0.2297,0.039,5.964,0.000,0.154,0.305
ma.L1,0.6227,0.037,16.792,0.000,0.550,0.695
sigma2,0.0566,0.001,38.143,0.000,0.054,0.059

0,1,2,3
Ljung-Box (Q):,81.45,Jarque-Bera (JB):,3065.73
Prob(Q):,0.0,Prob(JB):,0.0
Heteroskedasticity (H):,0.72,Skew:,0.58
Prob(H) (two-sided):,0.01,Kurtosis:,12.51
