In [2]:
# Chapter 26.4 Shampoo Sales Case Study
import warnings
from math import sqrt
import pandas as pd
from pandas import datetime
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error

# evaluate an ARIMA model for a given order (p, d, q)

def evaluate_arima_model(X, arima_order):
    # prepare training dataset
    train_size = int(len(X)*0.66)
    train, test = X[0:train_size], X[train_size:]
    history = [x for x in train]
    # make predictions
    predictions = list()
    for t in range (len(test)):
        model = ARIMA(history, order=arima_order)
        model_fit = model.fit()
        yhat = model_fit.forecast()[0]
        predictions.append(yhat)
        history.append(test[t])
    # calculate out-of-sample error
    rmse = sqrt(mean_squared_error(test,predictions))
    return rmse

# evaluate combinations of p, d, and q values for an ARIMA model

def evaluate_models(dataset, p_values, d_values, q_values):
    dataset = dataset.astype('float32')
    best_score, best_cfg = float('inf'), None
    for p in p_values:
        for d in d_values:
            for q in q_values:
                order = (p, d, q)
                try:
                    rmse = evaluate_arima_model(dataset, order)
                    if rmse < best_score:
                        best_score, best_cfg = rmse, order
                        print('ARIMA%s RMSE=%.3f' % (best_cfg, best_score))
                except:
                    continue
    print('Best ARIMA%s RMSE=%.3f' % (best_cfg, best_score))

# load data

def parser(x):
    return datetime.strptime('190'+x, '%Y-%m')
series = pd.read_csv('shampoo-sales.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser)

# evaluate parameters

p_values = [0, 1, 2, 4, 6, 8, 10]
d_values = range(0, 3)
q_values = range(0, 3)

warnings.filterwarnings('ignore')
evaluate_models(series.values, p_values, d_values, q_values)

ARIMA(0, 0, 0) RMSE=228.966
ARIMA(0, 0, 1) RMSE=195.596
ARIMA(0, 0, 2) RMSE=154.886
ARIMA(0, 1, 0) RMSE=133.156
ARIMA(0, 1, 1) RMSE=104.077
ARIMA(0, 1, 2) RMSE=68.345
ARIMA(0, 1, 2) RMSE=68.345
ARIMA(0, 1, 2) RMSE=68.345
ARIMA(0, 1, 2) RMSE=68.345
ARIMA(0, 1, 2) RMSE=68.345
ARIMA(0, 1, 2) RMSE=68.345
ARIMA(0, 1, 2) RMSE=68.345
ARIMA(0, 1, 2) RMSE=68.345
ARIMA(0, 1, 2) RMSE=68.345
ARIMA(0, 1, 2) RMSE=68.345
ARIMA(0, 1, 2) RMSE=68.345
ARIMA(0, 1, 2) RMSE=68.345
ARIMA(1, 2, 2) RMSE=65.511
ARIMA(1, 2, 2) RMSE=65.511
ARIMA(1, 2, 2) RMSE=65.511
ARIMA(1, 2, 2) RMSE=65.511
ARIMA(1, 2, 2) RMSE=65.511
ARIMA(1, 2, 2) RMSE=65.511
ARIMA(1, 2, 2) RMSE=65.511
ARIMA(1, 2, 2) RMSE=65.511
ARIMA(1, 2, 2) RMSE=65.511
ARIMA(1, 2, 2) RMSE=65.511
ARIMA(1, 2, 2) RMSE=65.511
ARIMA(1, 2, 2) RMSE=65.511
ARIMA(1, 2, 2) RMSE=65.511
ARIMA(1, 2, 2) RMSE=65.511
ARIMA(1, 2, 2) RMSE=65.511
ARIMA(1, 2, 2) RMSE=65.511
ARIMA(1, 2, 2) RMSE=65.511
ARIMA(1, 2, 2) RMSE=65.511
ARIMA(1, 2, 2) RMSE=65.511
ARIMA(1, 2, 2) RMSE=65.