### Statistical Method (AR & ARIMA)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

from math import sqrt
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.tsa.arima.model import ARIMA
import pmdarima as pm # auto arima
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import TimeSeriesSplit

In [None]:
file = "../datasets/clean/df_41017.csv"
df = pd.read_csv(file, index_col=0)
df.index = pd.to_datetime(df.index)

In [None]:
train = df[df.index < pd.to_datetime("2016/01/01")]
test = df[df.index > pd.to_datetime("2016/01/01")]
print(f"Train size: {train.size}\nTest size: {test.size}")

#### Autoregressive

In [None]:
ar_model = AutoReg(train, lags=5).fit()
ar_model.summary()

In [None]:
ar_predict = ar_model.predict(start=train.size, end=train.size+test.size-1)
print(ar_predict)

In [None]:
plt.plot(test)
plt.plot(ar_predict)
print(f"MSE of AR: {mean_squared_error(test,ar_predict)}")

#### ARIMA
Order = `(p,d,q)` \
`p = order of AR term` `d = order of differencing` `q = order of MA term`

In [None]:
arima_model = ARIMA(train, order=(5,0,7)).fit()

In [None]:
arima_model.summary()

In [None]:
arima_predict = arima_model.forecast(steps=len(test))
print(arima_predict)

In [None]:
plt.plot(test)
plt.plot(arima_predict)
print(f"MSE of ARIMA: {mean_squared_error(test,arima_predict)}")

Using auto ARIMA

In [None]:
# !pip3 install pyramid-arima

autoarima_model = pm.auto_arima(train, start_p=1, start_q=1,
                                  test='adf',       
                                  max_p=5, max_q=7, 
                                  m=1,           
                                  d=None,   
                                  seasonal=False,
                                  start_P=0, 
                                  D=0, 
                                  trace=True,
                                  error_action='ignore',  
                                  suppress_warnings=True, 
                                  stepwise=True)


autoarima_model.summary()

In [None]:
autoarima_model.plot_diagnostics(figsize=(10,6))
plt.tight_layout()
plt.show()

In [None]:
autoarima_predict = autoarima_model.predict(n_periods=len(test))
mean_squared_error(test,autoarima_predict)

----

In [None]:
# train_swh = train.swh.values
# train_rows = train_swh.shape[0] - 6
# train_df = np.zeros((train_rows+1, 6))
# for idx, start in enumerate(range(0, len(train_swh) - 5)):
#     row = train_swh[start:start+6]
#     train_df[idx, :] = row

In [None]:
# train_df.shape

In [None]:
# test_swh = test.swh.values
# test_rows = test_swh.shape[0] - 6
# test_df = np.zeros((test_rows+1,6))
# for idx, start in enumerate(range(0, len(test_swh) - 5)):
#     row = test_swh[start:start+6]
#     test_df[idx, :] = row

In [None]:
# test_df.shape

In [None]:
# X = train_df[:, :5]
# y = train_df[:, 5]

In [None]:
# # time series train test split

# tscv = TimeSeriesSplit(n_splits=3)

# for train_index, test_index in tscv.split(train_df):

#     X_train, X_test = X[train_index], X[test_index]
#     y_train, y_test = y[train_index], y[test_index]