In [18]:
# grid search sarima hyperparameters for daily female dataset
import pandas as pd

import joblib
import multiprocessing as mp


from math import sqrt
from multiprocessing import cpu_count
from joblib import Parallel
from joblib import delayed
from warnings import catch_warnings
from warnings import filterwarnings
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error
from pandas import read_csv

In [19]:
test_data = pd.read_csv("../../data/test.csv",delimiter=";", decimal=",")
test_data.head(10).append(test_data.tail(10))

Unnamed: 0,Index,Val_1,Val_2,Val_3
0,1,26.95,10.771,22.77
1,2,46.662,10.195,22.66
2,3,30.47,12.611,13.53
3,4,24.794,14.682,18.249
4,5,35.849,14.277,23.727
5,6,42.119,10.278,32.34
6,7,48.004,12.426,28.391
7,8,39.413,12.568,30.624
8,9,26.873,14.962,19.613
9,10,22.352,14.336,17.204


In [20]:
val_1_data = test_data["Val_1"]
val_2_data = test_data["Val_2"]
val_3_data = test_data["Val_3"]

In [21]:
# one-step sarima forecast
def sarima_forecast(history, config):
    order, sorder, trend = config
    # define model
    model = SARIMAX(history, order=order, seasonal_order=sorder, trend=trend, enforce_stationarity=False, enforce_invertibility=False)
    # fit model
    model_fit = model.fit(disp=False)
    # make one step forecast
    yhat = model_fit.predict(len(history), len(history))
    return yhat[0]


In [22]:
# root mean squared error or rmse
def measure_rmse(actual, predicted):
    return sqrt(mean_squared_error(actual, predicted))

In [23]:
def measure_mape(val_1_data, n_test):
    abs_err = np.divide(np.abs(np.subtract(val_1_data[:24], val_1_data[-n_test:])),val_1_data[-n_test:])
    ape = np.multiply(abs_err, np.full(shape=abs_err.shape, fill_value=100))
    shift_adjusted_mape = np.mean(ape, axis=0)
    print("MAPE with adjusted time series: %s" % np.array2string(shift_adjusted_mape))

In [24]:
# walk-forward validation for univariate data
def walk_forward_validation(val_1_data, n_test, cfg):
    predictions = list()
    # split dataset
    train, test = train_test_split(val_1_data, n_test)
    # seed history with training dataset
    history = [x for x in train]
    # step over each time-step in the test set
    for i in range(len(test)):
        # fit model and make forecast for history
        yhat = sarima_forecast(history, cfg)
        # store forecast in list of predictions
        predictions.append(yhat)
        # add actual observation to history for the next loop
        history.append(test[i])
    # estimate prediction error
    error = measure_rmse(test, predictions)
    error2 = measure_mape(test, n_test)
    return error

In [25]:
# score a model, return None on failure
def score_model(val_1_data, n_test, cfg, debug=False):
    result = None
    # convert config to a key
    key = str(cfg)
    # show all warnings and fail on exception if debugging
    if debug:
        result = walk_forward_validation(val_1_data, n_test, cfg)
    else:
        # one failure during model validation suggests an unstable config
        try:
            # never show warnings when grid searching, too noisy
            with catch_warnings():
                filterwarnings("ignore")
                result = walk_forward_validation(val_1_data, n_test, cfg)
        except:
            error = None
    # check for an interesting result
    if result is not None:
        print(' > Model[%s] %.3f' % (key, result))
    return (key, result)

In [26]:
# grid search configs
def grid_search(val_1_data, cfg_list, n_test, parallel=True):
    scores = None
    if parallel:
        # execute configs in parallel
        executor = joblib.Parallel(n_jobs=mp.cpu_count(), backend='multiprocessing')
        tasks = (delayed(score_model)(val_1_data, n_test, cfg) for cfg in cfg_list)
        scores = executor(tasks)
    else:
        scores = [score_model(val_1_data, n_test, cfg) for cfg in cfg_list]
    # remove empty results
    scores = [r for r in scores if r[1] != None]
    # sort configs by error, asc
    scores.sort(key=lambda tup: tup[1])
    return scores


In [27]:
# create a set of sarima configs to try
def sarima_configs(seasonal=[0]):
    models = list()
    # define config lists
    p_params = [0, 1, 2]
    d_params = [0, 1]
    q_params = [0, 1, 2]
    t_params = ['n','c','t','ct']
    P_params = [0, 1, 2]
    D_params = [0, 1]
    Q_params = [0, 1, 2]
    m_params = seasonal
    # create config instances
    for p in p_params:
        for d in d_params:
            for q in q_params:
                for t in t_params:
                    for P in P_params:
                        for D in D_params:
                            for Q in Q_params:
                                for m in m_params:
                                    cfg = [(p,d,q), (P,D,Q,m), t]
                                    models.append(cfg)
    return models

In [31]:
if __name__ == '__main__':
    # load dataset
    print(val_1_data.shape)
    # data split
    n_test = 165
    # model configs
    cfg_list = sarima_configs()
    # grid search
    scores = grid_search(val_1_data, cfg_list, n_test)
    print('done')
    # list top 3 configs
    print('scores', scores)
    for cfg, error in scores[:3]:
        print("hello")
        print(cfg, error)

(43,)
done
scores []
