### ARMAモデル，予測

In [None]:
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

import statsmodels.api as sm
from statsmodels.tsa.arima_process import arma_generate_sample
from statsmodels.tsa.arima_model import ARMA

from scipy import stats

np.random.seed(123)
FLAG_fig = False

In [None]:
nobs = 1000
nobs_test = 100
nobs_all = nobs + nobs_test
ar = [1, -1.5, 0.7]
ma = [1.0, 0.6]
dist = lambda n: np.random.randn(n)  # 正規分布,  引数 n はダミー
#dist = lambda n: np.random.standard_t(3, size=n)  # t 分布

y_all = arma_generate_sample(ar, ma, nobs_all, sigma=1, distrvs=dist, burnin=500)

#y0 = y_all[:nobs]
#y_test = y_all[nobs:]

pandas.date_range ドキュメント<br>
https://pandas.pydata.org/pandas-docs/stable/generated/pandas.date_range.html<br>
使い方例：http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases<br>
ここの Offset Aliases にfreqの与え方が示されている<br>

トレーニングデータの作成

In [None]:
index = pd.date_range('1/1/2000', periods=nobs, freq='D')
y = pd.Series(y_all[:nobs], index=index)
print(y.head())
print(y.tail())
print(type(index))

テストデータの作成<br>yの最後の時刻（DateTime）の次からを開始時刻としている。このテストデータは最後の方で用いる

In [None]:
index_tst = pd.date_range('9/27/2002', periods=nobs_test, freq='D')
y_test = pd.Series(y_all[nobs:], index=index_tst)
print(y_test.head())
print(y_test.tail())

In [None]:
y.plot(figsize=(16,4))

if FLAG_fig: plt.savefig('fig_ARMA_Prediction_y.png')
plt.show()

statsmodels.graphics.tsaplots.plot_acf ドキュメント
http://www.statsmodels.org/dev/generated/statsmodels.graphics.tsaplots.plot_acf.html  
statsmodels.graphics.tsaplots.plot_pacf ドキュメント  
http://www.statsmodels.org/dev/generated/statsmodels.graphics.tsaplots.plot_pacf.html

In [None]:
fig = plt.figure(figsize=(12,3))
ax = fig.add_subplot(111)
fig = sm.graphics.tsa.plot_acf(y, lags=20, ax=ax)

if FLAG_fig: plt.savefig('fig_ARMA_Prediction_acf.png')
plt.show()

In [None]:
arma_result = sm.tsa.ARMA(y, order=(2,1)).fit(trend='nc')
print(arma_result.summary())

scipy.stats.normaltest　ドキュメント  
https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.normaltest.html

In [None]:
resid = arma_result.resid # residual sequence
resid.plot(figsize=(12,4))

if FLAG_fig: plt.savefig('fig_ARMA_Prediction_resid.png')
plt.show()

In [None]:
print(stats.normaltest(resid))

statsmodels.graphics.tsaplots.plot_acf <br>
http://www.statsmodels.org/dev/generated/statsmodels.graphics.tsaplots.plot_acf.html

In [None]:
fig = plt.figure(figsize=(12,3))
sig_val = 0.05 # 有意水準
ax1 = fig.add_subplot(111)
fig = sm.graphics.tsa.plot_acf(resid.values.squeeze(), lags=20, alpha=sig_val, ax=ax1)


if FLAG_fig: plt.savefig('fig_ARMA_Prediction_acf_resid.png')
plt.show()

Ref:<br>
Autoregressive Moving Average (ARMA): Artificial data https://www.statsmodels.org/dev/examples/notebooks/generated/tsa_arma_1.html

In [None]:
fig, ax = plt.subplots(figsize=(12,4))
fig = arma_result.plot_predict(start='2002-07-31', end='2002-10-31', ax=ax)
y_test['2002-09-27':'2002-10-31'].plot(color='m', label='real')
legend = ax.legend(loc='upper right')

if FLAG_fig: plt.savefig('fig_ARMA_Prediction_predict.png')
plt.show()