# ARIMA Experimentation

## Setup Imports, Etc

In [None]:
import numpy as np
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
from tqdm import tqdm
import util

output_notebook()

## Generate Data

In [None]:
# Number of days to generate data for; only works for 1 just now!
DAYS = 1

# generate a numpy array of raw data first
d = util.gen_data(days=DAYS)

# turn it into a pandas data frame
# df = pd.DataFrame({'Time': d[:, 0], 'ACEs': d[:, 1]})

values = d[:, 1]
train, test = values[:100], values[100:]
history = [x for x in train]
predictions = [x for x in train]
predictions5 = [x for x in values[:104]]

# loop over test array now
for t in tqdm(range(len(test))):
    
    # fit to history so far
    model = ARIMA(history, order=(5, 1, 0))
    model_fit = model.fit()

    # one time step
    output = model_fit.forecast()
    yhat = output[0]
    
    # 5 time steps
    output5 = model_fit.forecast(steps=5)
    yhat5 = output5[4]
    
    # save predictions
    predictions.append(yhat)
    predictions5.append(yhat5)
    
    obs = test[t]
    history.append(obs)

# only consider first 1440 data points on 5-step predictions
predictions5 = predictions5[:1440]

# calculate errors
error = mean_squared_error(history, predictions)
print('Test MSE (one time step)   : %.3f' % error)

error5 = mean_squared_error(history, predictions5)
print('Test MSE (five time steps) : %.3f' % error5)

## Plot Results

In [None]:
p = figure(min_width=900)

p.line(x=np.arange(0, 1440), y=history, color='green')
p.line(x=np.arange(0, 1440), y=predictions, color='red')
p.line(x=np.arange(0, 1440), y=predictions5, color='pink')

show(p)