## ARMA Modeling: Model Selection

**Functions**

`sm.tsa.SARIMAX`

### Exercise 68
Perform a model selection exercise on the term premium using

1. General-to-Specific
2. Specific-to-General
3. Minimizing an Information Criteria

In [1]:
import pandas as pd
data = pd.read_hdf("data/term-premium.h5", "term_premium")
term = data["TERM"]

In [2]:
import statsmodels.tsa.api as tsa

ic = {}
for ar in range(5):
    for ma in range(5):
        print(f"AR: {ar}, MA: {ma}")
        mod = tsa.SARIMAX(term, order=(ar, 0, ma), trend="c")
        res = mod.fit()
        ic[(ar, ma)] = [res.aic, res.bic]

ic = pd.DataFrame(ic, index=["AIC", "BIC"]).T
ic.index = ic.index.set_names(["AR", "MA"])
ic

AR: 0, MA: 0
AR: 0, MA: 1


  warn('Non-invertible starting MA parameters found.'


AR: 0, MA: 2


AR: 0, MA: 3


AR: 0, MA: 4


AR: 1, MA: 0


AR: 1, MA: 1


AR: 1, MA: 2


AR: 1, MA: 3


AR: 1, MA: 4


AR: 2, MA: 0


AR: 2, MA: 1


AR: 2, MA: 2




AR: 2, MA: 3


AR: 2, MA: 4


AR: 3, MA: 0


AR: 3, MA: 1


AR: 3, MA: 2


  warn('Non-stationary starting autoregressive parameters'


AR: 3, MA: 3




AR: 3, MA: 4




AR: 4, MA: 0


AR: 4, MA: 1


AR: 4, MA: 2




AR: 4, MA: 3




AR: 4, MA: 4




Unnamed: 0_level_0,Unnamed: 1_level_0,AIC,BIC
AR,MA,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,2405.496998,2414.871216
0,1,1440.283144,1454.34447
0,2,884.990373,903.738808
0,3,544.443984,567.879527
0,4,382.674377,410.797028
1,0,94.1188,108.180126
1,1,-10.735286,8.013149
1,2,-13.368616,10.066927
1,3,-13.219488,14.903164
1,4,-14.348534,18.461226


In [3]:
aic = ic.sort_values("AIC")
ar, ma = aic.index[0]
print(f"AIC selects AR {ar}, MA {ma}")

bic = ic.sort_values("BIC")
ar, ma = bic.index[0]
print(f"BIC selects AR {ar}, MA {ma}")

AIC selects AR 3, MA 3
BIC selects AR 1, MA 1


In [4]:
res = tsa.SARIMAX(term, order=(4, 0, 4), trend="c").fit()
res.tvalues



intercept     2.955696
ar.L1         3.042959
ar.L2        -2.606270
ar.L3         2.774847
ar.L4         2.314833
ma.L1         4.724951
ma.L2         6.487431
ma.L3         3.533640
ma.L4         0.475688
sigma2       37.470481
dtype: float64

In [5]:
gts_res = tsa.SARIMAX(term, order=(4, 0, 3), trend="c").fit()
gts_res.summary()

  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'




0,1,2,3
Dep. Variable:,TERM,No. Observations:,802.0
Model:,"SARIMAX(4, 0, 3)",Log Likelihood,17.867
Date:,"Wed, 26 Feb 2020",AIC,-17.734
Time:,15:37:49,BIC,24.45
Sample:,04-01-1953,HQIC,-1.531
,- 01-01-2020,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,0.1307,0.039,3.343,0.001,0.054,0.207
ar.L1,-0.1825,0.056,-3.268,0.001,-0.292,-0.073
ar.L2,0.0666,0.057,1.159,0.246,-0.046,0.179
ar.L3,0.7767,0.059,13.199,0.000,0.661,0.892
ar.L4,0.1967,0.056,3.482,0.000,0.086,0.307
ma.L1,1.5319,0.054,28.505,0.000,1.427,1.637
ma.L2,1.4260,0.079,18.098,0.000,1.272,1.580
ma.L3,0.5820,0.052,11.142,0.000,0.480,0.684
sigma2,0.0556,0.001,38.862,0.000,0.053,0.058

0,1,2,3
Ljung-Box (Q):,66.37,Jarque-Bera (JB):,3429.63
Prob(Q):,0.01,Prob(JB):,0.0
Heteroskedasticity (H):,0.71,Skew:,0.68
Prob(H) (two-sided):,0.01,Kurtosis:,13.04


In [6]:
res = tsa.SARIMAX(term, order=(1, 0, 0), trend="c").fit()
res.tvalues

intercept      2.883430
ar.L1        152.605980
sigma2        57.358547
dtype: float64

In [7]:
res = tsa.SARIMAX(term, order=(0, 0, 1), trend="c").fit()
res.tvalues

intercept    23.913356
ma.L1        74.086670
sigma2       21.525640
dtype: float64

In [8]:
res = tsa.SARIMAX(term, order=(2, 0, 0), trend="c").fit()
res.tvalues

intercept     4.045848
ar.L1        92.797712
ar.L2       -22.200269
sigma2       49.656638
dtype: float64

In [9]:
res = tsa.SARIMAX(term, order=(1, 0, 1), trend="c").fit()
res.tvalues


intercept      3.852726
ar.L1        115.712281
ma.L1         33.880397
sigma2        42.543286
dtype: float64

In [10]:
res = tsa.SARIMAX(term, order=(2, 0, 1), trend="c").fit()
res.tvalues

intercept     3.548229
ar.L1        18.545084
ar.L2         5.997806
ma.L1        16.851933
sigma2       38.186006
dtype: float64

In [11]:
res = tsa.SARIMAX(term, order=(1, 0, 2), trend="c").fit()
res.tvalues


intercept      3.519934
ar.L1        115.775070
ma.L1         26.029376
ma.L2         -3.637821
sigma2        37.408190
dtype: float64

In [12]:
res = tsa.SARIMAX(term, order=(3, 0, 1), trend="c").fit()
res.tvalues

intercept     3.605195
ar.L1        11.946150
ar.L2         4.756587
ar.L3        -1.870262
ma.L1        12.769701
sigma2       37.499575
dtype: float64

In [13]:
res = tsa.SARIMAX(term, order=(2, 0, 2), trend="c").fit()
res.tvalues



intercept     3.111459
ar.L1         2.107424
ar.L2         3.033841
ma.L1         4.929156
ma.L2         1.654634
sigma2       37.678138
dtype: float64

In [14]:
stg_res = tsa.SARIMAX(term, order=(2, 0, 1), trend="c").fit()
stg_res.summary()

0,1,2,3
Dep. Variable:,TERM,No. Observations:,802.0
Model:,"SARIMAX(2, 0, 1)",Log Likelihood,12.476
Date:,"Wed, 26 Feb 2020",AIC,-14.952
Time:,15:37:56,BIC,8.484
Sample:,04-01-1953,HQIC,-5.95
,- 01-01-2020,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,0.0492,0.014,3.548,0.000,0.022,0.076
ar.L1,0.7167,0.039,18.545,0.000,0.641,0.792
ar.L2,0.2305,0.038,5.998,0.000,0.155,0.306
ma.L1,0.6233,0.037,16.852,0.000,0.551,0.696
sigma2,0.0565,0.001,38.186,0.000,0.054,0.059

0,1,2,3
Ljung-Box (Q):,81.45,Jarque-Bera (JB):,3077.17
Prob(Q):,0.0,Prob(JB):,0.0
Heteroskedasticity (H):,0.73,Skew:,0.59
Prob(H) (two-sided):,0.01,Kurtosis:,12.52
