# Machine Learning - Configure ARIMA 

In [1]:
import pandas as pd
import numpy as np

import warnings

from sklearn.metrics import mean_squared_error
from statsmodels.tsa.arima_model import ARIMA

import matplotlib.pyplot as plt
%matplotlib inline

## Configure ARIMA

The ARIMA model for time series analysis and forecasting can be tricky to configure. There are 3 parameters that require estimation by iterative trial and error from reviewing diagnostic plots and using 40-year-old heuristic rules.

We can automate the process of evaluating a large number of hyperparameters for the ARIMA model by using a grid search procedure.

### Dataset Description

In [2]:
# load dataset
data = pd.read_csv('data/slo_weather_history.csv', index_col=0)

# display first few rows
data.head()

Unnamed: 0_level_0,dew_point_f_avg,dew_point_f_high,dew_point_f_low,events,humidity_%_avg,humidity_%_high,humidity_%_low,precip_in_sum,sea_level_press_in_avg,sea_level_press_in_high,sea_level_press_in_low,temp_f_avg,temp_f_high,temp_f_low,visibility_mi_avg,visibility_mi_high,visibility_mi_low,wind_gust_mph_high,wind_mph_avg,wind_mph_high
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2012-01-01,44.0,50.0,34.0,Fog,80.0,100.0,25.0,0.0,30.15,30.23,30.08,56.0,73.0,39.0,6.0,10.0,0.0,0.0,1.0,8.0
2012-01-02,47.0,52.0,43.0,Fog,93.0,100.0,63.0,0.0,30.23,30.3,30.19,52.0,63.0,42.0,4.0,10.0,0.0,0.0,3.0,14.0
2012-01-03,43.0,50.0,37.0,Fog,85.0,100.0,32.0,0.01,30.24,30.28,30.17,58.0,77.0,39.0,6.0,10.0,0.0,0.0,2.0,10.0
2012-01-04,42.0,47.0,37.0,,69.0,96.0,33.0,0.0,30.24,30.3,30.2,56.0,73.0,39.0,10.0,10.0,8.0,0.0,1.0,9.0
2012-01-05,42.0,51.0,36.0,,66.0,93.0,23.0,0.0,30.15,30.22,30.09,60.0,78.0,42.0,10.0,10.0,7.0,22.0,4.0,18.0


### Develop Model

The data doens't have a strong seasonal component, but we decided to neutralize it and make it stationary by taking the seasonal difference. That is, we can take the observation for a day and subtract the observation from the same day one year ago.

We can invert this operation by adding the value of the observation one year ago. We will need to do this to any forecasts made by a model trained on the seasonally adjusted data.

In [3]:
# create a differenced series
def difference(data, interval=1):
    diff = list()
    for i in range(interval, len(data)):
        value = data[i] - data[i - interval]
        diff.append(value)
    return np.array(diff)

In [4]:
# invert differenced value
def inverse_difference(history, yhat, interval=1):
    return yhat + history[-interval]

In [5]:
# seasonal difference
X = data['temp_f_low'].values
days_in_year = 365
differenced = difference(X, days_in_year)

In [6]:
# evaluate an ARIMA model for a given order (p,d,q)
def evaluate_arima_model(X, arima_order):
    # prepare training dataset
    train_size = len(X) - 7
    train, test = X[0:train_size], X[train_size:]
    history = [x for x in train]
    
    # make predictions
    predictions = []
    
    for t in range(len(test)):
        model = ARIMA(history, order=arima_order)
        model_fit = model.fit(disp=0)
        yhat = model_fit.forecast()[0]
        
        # invert the differenced forecast to something usable
        yhat = inverse_difference(history, yhat, 365)
        
        predictions.append(yhat)
        history.append(test[t])
    
    # calculate out of sample error
    error = np.sqrt(mean_squared_error(test, predictions))
    
    return error

In [7]:
# evaluate combinations of p, d and q values for an ARIMA model
def evaluate_models(dataset, p_values, d_values, q_values):
    dataset = dataset.astype('float32')
    best_score, best_cfg = float("inf"), None
    for p in p_values:
        for d in d_values:
            for q in q_values:
                order = (p,d,q)
                try:
                    rmse = evaluate_arima_model(dataset, order)
                    if rmse < best_score:
                        best_score, best_cfg = rmse, order
                    print('ARIMA%s RMSE=%.3f' % (order, rmse))
                except:
                    continue
    print('Best ARIMA%s RMSE=%.3f' % (best_cfg, best_score))

In [8]:
# evaluate parameters
p_values = range(0, 10)
d_values = range(0, 10)
q_values = range(0, 10)

warnings.filterwarnings("ignore")

evaluate_models(differenced, p_values, d_values, q_values)

ARIMA(0, 0, 0) RMSE=7.617
ARIMA(0, 0, 1) RMSE=6.928
ARIMA(0, 0, 2) RMSE=6.736
ARIMA(0, 0, 3) RMSE=6.429
ARIMA(0, 0, 4) RMSE=6.300
ARIMA(0, 0, 5) RMSE=6.336
ARIMA(0, 0, 6) RMSE=6.284
ARIMA(0, 0, 7) RMSE=6.260
ARIMA(0, 0, 8) RMSE=6.262
ARIMA(0, 0, 9) RMSE=6.258
ARIMA(0, 1, 0) RMSE=6.449
ARIMA(0, 1, 1) RMSE=5.781
ARIMA(0, 1, 2) RMSE=5.960
ARIMA(0, 1, 3) RMSE=6.104
ARIMA(0, 1, 4) RMSE=6.083
ARIMA(0, 1, 5) RMSE=6.053
ARIMA(0, 2, 0) RMSE=9.668
ARIMA(0, 2, 1) RMSE=6.448
ARIMA(1, 0, 0) RMSE=6.345
ARIMA(1, 0, 1) RMSE=6.198
ARIMA(1, 0, 2) RMSE=6.198
ARIMA(1, 0, 3) RMSE=6.196
ARIMA(1, 0, 4) RMSE=6.143
ARIMA(1, 0, 5) RMSE=6.107
ARIMA(1, 0, 6) RMSE=6.155
ARIMA(1, 0, 7) RMSE=6.151
ARIMA(1, 1, 0) RMSE=5.920
ARIMA(1, 1, 1) RMSE=6.184
ARIMA(1, 1, 2) RMSE=6.115
ARIMA(1, 1, 3) RMSE=6.115
ARIMA(1, 1, 4) RMSE=6.085
ARIMA(1, 1, 5) RMSE=6.093
ARIMA(1, 1, 6) RMSE=6.110
ARIMA(1, 1, 7) RMSE=6.048
ARIMA(1, 1, 8) RMSE=6.124
ARIMA(1, 1, 9) RMSE=6.016
ARIMA(1, 2, 0) RMSE=7.251
ARIMA(2, 0, 0) RMSE=6.208
ARIMA(2, 0, 

__Results from previous run__

ARIMA(0, 0, 0) RMSE=7.617
ARIMA(0, 0, 1) RMSE=6.928
ARIMA(0, 0, 2) RMSE=6.736
ARIMA(0, 0, 3) RMSE=6.429
ARIMA(0, 0, 4) RMSE=6.300
ARIMA(0, 0, 5) RMSE=6.336
ARIMA(0, 0, 6) RMSE=6.284
ARIMA(0, 0, 7) RMSE=6.260
ARIMA(0, 0, 8) RMSE=6.262
ARIMA(0, 0, 9) RMSE=6.258
ARIMA(0, 1, 0) RMSE=6.449
ARIMA(0, 1, 1) RMSE=5.781
ARIMA(0, 1, 2) RMSE=5.960
ARIMA(0, 1, 3) RMSE=6.104
ARIMA(0, 1, 4) RMSE=6.083
ARIMA(0, 1, 5) RMSE=6.053
ARIMA(0, 2, 0) RMSE=9.668
ARIMA(0, 2, 1) RMSE=6.448
ARIMA(1, 0, 0) RMSE=6.345
ARIMA(1, 0, 1) RMSE=6.198
ARIMA(1, 0, 2) RMSE=6.198
ARIMA(1, 0, 3) RMSE=6.196
ARIMA(1, 0, 4) RMSE=6.143
ARIMA(1, 0, 5) RMSE=6.107
ARIMA(1, 0, 6) RMSE=6.155
ARIMA(1, 0, 7) RMSE=6.151
ARIMA(1, 1, 0) RMSE=5.920
ARIMA(1, 1, 1) RMSE=6.184
ARIMA(1, 1, 2) RMSE=6.115
ARIMA(1, 1, 3) RMSE=6.115
ARIMA(1, 1, 4) RMSE=6.085
ARIMA(1, 1, 5) RMSE=6.093
ARIMA(1, 1, 6) RMSE=6.110
ARIMA(1, 1, 7) RMSE=6.048
ARIMA(1, 1, 8) RMSE=6.124
ARIMA(1, 1, 9) RMSE=6.016
ARIMA(1, 2, 0) RMSE=7.251
ARIMA(2, 0, 0) RMSE=6.208
ARIMA(2, 0, 1) RMSE=6.198
ARIMA(2, 1, 0) RMSE=5.906
ARIMA(2, 1, 1) RMSE=6.116
ARIMA(2, 1, 2) RMSE=6.207
ARIMA(2, 1, 3) RMSE=6.138
ARIMA(2, 1, 4) RMSE=6.134
ARIMA(2, 1, 6) RMSE=6.137
ARIMA(2, 1, 7) RMSE=6.070
ARIMA(2, 1, 8) RMSE=6.211
ARIMA(2, 1, 9) RMSE=6.004
ARIMA(2, 2, 0) RMSE=6.445
ARIMA(3, 0, 0) RMSE=6.195
ARIMA(3, 0, 1) RMSE=6.227
ARIMA(3, 0, 2) RMSE=6.233
ARIMA(3, 1, 0) RMSE=5.964
ARIMA(3, 1, 1) RMSE=6.098
ARIMA(3, 1, 2) RMSE=6.131
ARIMA(3, 1, 3) RMSE=6.149
ARIMA(3, 1, 6) RMSE=6.146
ARIMA(3, 1, 8) RMSE=6.118
ARIMA(3, 2, 0) RMSE=6.243
ARIMA(4, 0, 0) RMSE=6.187
ARIMA(4, 0, 1) RMSE=6.230
ARIMA(4, 0, 2) RMSE=6.176
ARIMA(4, 0, 3) RMSE=6.282
ARIMA(4, 0, 4) RMSE=6.160
ARIMA(4, 1, 0) RMSE=6.012
ARIMA(4, 1, 1) RMSE=6.083
ARIMA(4, 1, 2) RMSE=6.141
ARIMA(4, 1, 3) RMSE=6.162
ARIMA(4, 1, 5) RMSE=6.081
ARIMA(4, 1, 6) RMSE=6.109
ARIMA(4, 2, 0) RMSE=6.494
ARIMA(5, 0, 0) RMSE=6.191
ARIMA(5, 0, 1) RMSE=6.113
ARIMA(5, 0, 2) RMSE=6.173
ARIMA(5, 0, 3) RMSE=6.222
ARIMA(5, 0, 4) RMSE=6.226
ARIMA(5, 1, 0) RMSE=5.901
ARIMA(5, 1, 1) RMSE=6.102
ARIMA(5, 1, 3) RMSE=6.068
ARIMA(5, 2, 0) RMSE=6.271
ARIMA(6, 0, 0) RMSE=6.186
ARIMA(6, 0, 1) RMSE=6.121
ARIMA(6, 0, 2) RMSE=6.164
ARIMA(6, 0, 3) RMSE=6.205
ARIMA(6, 0, 4) RMSE=6.132
ARIMA(6, 0, 5) RMSE=6.185
ARIMA(6, 0, 6) RMSE=6.296
ARIMA(6, 1, 0) RMSE=5.950
ARIMA(6, 1, 1) RMSE=6.111
ARIMA(6, 2, 0) RMSE=6.279
ARIMA(6, 2, 1) RMSE=5.944
ARIMA(7, 0, 0) RMSE=6.188
ARIMA(7, 0, 1) RMSE=6.200
ARIMA(7, 0, 2) RMSE=6.165
ARIMA(7, 0, 4) RMSE=6.097
ARIMA(7, 0, 5) RMSE=6.298
ARIMA(7, 0, 6) RMSE=6.394
ARIMA(7, 1, 0) RMSE=5.933
ARIMA(7, 1, 1) RMSE=6.126
ARIMA(7, 1, 3) RMSE=6.208
ARIMA(7, 2, 0) RMSE=6.291
ARIMA(7, 2, 1) RMSE=5.916
ARIMA(8, 0, 0) RMSE=6.188
ARIMA(8, 0, 1) RMSE=6.148
ARIMA(8, 0, 2) RMSE=6.164
ARIMA(8, 0, 3) RMSE=6.175
ARIMA(8, 0, 4) RMSE=5.998
ARIMA(8, 0, 5) RMSE=6.287
ARIMA(8, 0, 6) RMSE=6.287
ARIMA(8, 0, 7) RMSE=6.155
ARIMA(8, 1, 0) RMSE=5.887
ARIMA(8, 1, 1) RMSE=6.120
ARIMA(8, 1, 3) RMSE=6.354
ARIMA(8, 2, 0) RMSE=6.202
ARIMA(8, 2, 1) RMSE=5.878
ARIMA(9, 0, 0) RMSE=6.153
ARIMA(9, 0, 1) RMSE=6.152
ARIMA(9, 0, 2) RMSE=6.154
ARIMA(9, 0, 3) RMSE=6.061
ARIMA(9, 0, 4) RMSE=6.048
ARIMA(9, 0, 5) RMSE=5.886
ARIMA(9, 0, 6) RMSE=6.263
ARIMA(9, 0, 7) RMSE=5.959
ARIMA(9, 0, 8) RMSE=6.311
ARIMA(9, 1, 0) RMSE=5.903
ARIMA(9, 1, 1) RMSE=6.095
ARIMA(9, 1, 3) RMSE=5.978
ARIMA(9, 2, 0) RMSE=6.159
ARIMA(9, 2, 1) RMSE=5.894
Best ARIMA(0, 1, 1) RMSE=5.781

