# time series forecasting

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima_model import ARIMA
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

In [2]:
df = pd.read_csv('df1.csv')

# ARIMA model

In [3]:
# evaluate an ARIMA model for a given order (p,d,q)
def evaluate_arima_model(X, arima_order):
    train_size = int(len(X) * 0.70)
    train, test = X[0:train_size], X[train_size:]
    history = [x for x in train]

    predictions = list()
    for t in range(len(test)):
        model = ARIMA(history, order=arima_order)
        model_fit = model.fit(disp=0)
        yhat = model_fit.forecast()[0]
        predictions.append(yhat)
        history.append(test[t])
        #print('predicted=%f, expected=%f' % (yhat, test[t]))
    
    error = mean_squared_error(test, predictions)
    return error

In [4]:
def evaluate_models(dataset, p_values, d_values, q_values):
    dataset = dataset.astype('float32')
    best_score, best_cfg = float("inf"), None
    for p in p_values:
        for d in d_values:
            for q in q_values:
                order = (p,d,q)
                print('order', order)
                try:
                    mse = evaluate_arima_model(dataset, order)
                    if mse < best_score:
                        best_score, best_cfg = mse, order
                    #print('ARIMA%s MSE=%.3f' % (order,mse))
                except:
                    continue
    print('Best ARIMA%s MSE=%.3f' % (best_cfg, best_score))

In [5]:
def daily_forecast(df_new, p_values, d_values, q_values, best_order):
    X = df_new.y.astype('float').values
    Xtime = df_new.ds.values
    print('test')
    #best_order = evaluate_models(X, p_values, d_values, q_values)
    #evaluate_models(X, p_values, d_values, q_values)
    print('best order:', best_order)
    train_size = int(len(X) * 0.66)
    train, test = X[0:train_size], X[train_size:]
    traintime, testtime = Xtime[0:train_size], Xtime[train_size:]
    history = [x for x in train]

    predictions = list()
    for t in range(len(test)):
        model = ARIMA(history, order=best_order)
        model_fit = model.fit(disp=0)
        yhat = model_fit.forecast()[0]
        predictions.append(yhat)
        history.append(test[t])
        print('predicted=%f, expected=%f, error=%i percentage' % (yhat, test[t], int(abs(yhat - test[t])/test[t]*100)))
        
    #error = mean_squared_error(test, predictions)
    #print('mean squared error', error)
    return train, test, traintime, testtime, predictions   

In [6]:
# parameters
p_values = [5, 7, 9] #[0, 1, 2, 4, 6, 8, 10]
d_values = range(0,3) #range(0, 3)
q_values = range(0,3) #range(0, 3)

In [7]:
best_order = [7, 0, 1]
train_1, test_1, traintime_1, testtime_1, prediction_1 = daily_forecast(df, p_values, d_values, q_values, best_order)
df = pd.DataFrame(prediction_1)
df.to_csv('forecasting.csv')

test
best order: [7, 0, 1]
predicted=35754.182054, expected=37707.000000, error=5 percentage
predicted=37422.016430, expected=34559.000000, error=8 percentage
predicted=35462.736925, expected=26883.000000, error=31 percentage
predicted=30339.965137, expected=26399.000000, error=14 percentage
predicted=28457.393918, expected=26294.000000, error=8 percentage
predicted=29132.567915, expected=27389.000000, error=6 percentage
predicted=30643.410934, expected=28900.000000, error=6 percentage
predicted=31389.170456, expected=34892.000000, error=10 percentage
predicted=34361.482209, expected=34887.000000, error=1 percentage
predicted=35085.282004, expected=27854.000000, error=25 percentage
predicted=31233.036679, expected=28164.000000, error=10 percentage
predicted=29645.998483, expected=29774.000000, error=0 percentage
predicted=30849.711338, expected=45165.000000, error=31 percentage
predicted=40413.236574, expected=72445.000000, error=44 percentage
predicted=59050.229512, expected=79293.000