In [1]:
# grid search sarima hyperparameters for monthly mean temp dataset
from math import sqrt
from multiprocessing import cpu_count
from joblib import Parallel
from joblib import delayed
from warnings import catch_warnings
from warnings import filterwarnings
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error
import pandas as pd

# one-step sarima forecast
def sarima_forecast(history, config):
    order, sorder, trend = config
    # define model
    model = SARIMAX(history, order=order, seasonal_order=sorder, trend=trend,
    enforce_stationarity=False, enforce_invertibility=False)
    # fit model
    model_fit = model.fit(disp=False)
    # make one step forecast
    yhat = model_fit.predict(len(history), len(history))
    return yhat[0]

# root mean squared error or rmse
def measure_rmse(actual, predicted):
    return sqrt(mean_squared_error(actual, predicted))

# split a univariate dataset into train/test sets
def train_test_split(data, n_test):
    return data[:-n_test], data[-n_test:]

In [2]:
# walk-forward validation for univariate data
def walk_forward_validation(data, n_test, cfg):
    predictions = list()
    # split dataset
    train, test = train_test_split(data, n_test)
    # seed history with training dataset
    history = [x for x in train]
    # step over each time-step in the test set
    for i in range(len(test)):
        # fit model and make forecast for history
        yhat = sarima_forecast(history, cfg)
        # store forecast in list of predictions
        predictions.append(yhat)
        # add actual observation to history for the next loop
        history.append(test[i])
        # estimate prediction error
        error = measure_rmse(test, predictions)
    return error

# score a model, return None on failure
def score_model(data, n_test, cfg, debug=False):
    result = None
    # convert config to a key
    key = str(cfg)
    print('trying ', key)
    # show all warnings and fail on exception if debugging
    if debug:
        result = walk_forward_validation(data, n_test, cfg)
    else:
        # one failure during model validation suggests an unstable config
        try:
            # never show warnings when grid searching, too noisy
            with catch_warnings():
                filterwarnings("ignore")
            result = walk_forward_validation(data, n_test, cfg)
        except:
            error = None

    # check for an interesting result
    if result is not None:
        print(' > Model[%s] %.3f' % (key, result))
    return (key, result)

# grid search configs
def grid_search(data, cfg_list, n_test, parallel=True):
    scores = None
    if parallel:
       # execute configs in parallel
        executor = Parallel(n_jobs=cpu_count(), backend='multiprocessing')
        tasks = (delayed(score_model)(data, n_test, cfg) for cfg in cfg_list)
        scores = executor(tasks)
    else:
        scores = [score_model(data, n_test, cfg) for cfg in cfg_list]
        # remove empty results
        scores = [r for r in scores if r[1] != None]
        # sort configs by error, asc
        scores.sort(key=lambda tup: tup[1])
    return scores

In [3]:
# create a set of sarima configs to try
def sarima_configs(seasonal=[0]):
    models = list()
    # define config lists
    p_params = [0, 1,] #2
    d_params = [0, 1]
    q_params = [0, 1 ] #2
    t_params = ['n']
    P_params = [0, 1 ] #2
    D_params = [0, 1]
    Q_params = [0, 1] #2
    m_params = seasonal

    # create config instances
    for p in p_params:
        for d in d_params:
            for q in q_params:
                for t in t_params:
                    for P in P_params:
                        for D in D_params:
                            for Q in Q_params:
                                cfg = [(p,d,q), (P,D,Q,24), t]
                                models.append(cfg)
    return models;                            

In [5]:
if __name__ == '__main__':
    # load dataset
    series = pd.read_csv('/Users/alket/Desktop/dati/new_data_backfill_forwfill.csv', index_col = 0, header=0, 
                         parse_dates=True)
    data = series.iloc[::4,:]

    data = series['nr_people'].values
    data = data[:12]
    print(len(data))
    # data split
    n_test = 3
    # model configs
    cfg_list = sarima_configs(seasonal=24)
    print(cfg_list)
    # grid search
    scores = grid_search(data, cfg_list, n_test)
    print('done')
    # list top 3 configs
    for cfg, error in scores[:3]:
        print(cfg, error)

12
[[(0, 0, 0), (0, 0, 0, 24), 'n'], [(0, 0, 0), (0, 0, 1, 24), 'n'], [(0, 0, 0), (0, 1, 0, 24), 'n'], [(0, 0, 0), (0, 1, 1, 24), 'n'], [(0, 0, 0), (1, 0, 0, 24), 'n'], [(0, 0, 0), (1, 0, 1, 24), 'n'], [(0, 0, 0), (1, 1, 0, 24), 'n'], [(0, 0, 0), (1, 1, 1, 24), 'n'], [(0, 0, 1), (0, 0, 0, 24), 'n'], [(0, 0, 1), (0, 0, 1, 24), 'n'], [(0, 0, 1), (0, 1, 0, 24), 'n'], [(0, 0, 1), (0, 1, 1, 24), 'n'], [(0, 0, 1), (1, 0, 0, 24), 'n'], [(0, 0, 1), (1, 0, 1, 24), 'n'], [(0, 0, 1), (1, 1, 0, 24), 'n'], [(0, 0, 1), (1, 1, 1, 24), 'n'], [(0, 1, 0), (0, 0, 0, 24), 'n'], [(0, 1, 0), (0, 0, 1, 24), 'n'], [(0, 1, 0), (0, 1, 0, 24), 'n'], [(0, 1, 0), (0, 1, 1, 24), 'n'], [(0, 1, 0), (1, 0, 0, 24), 'n'], [(0, 1, 0), (1, 0, 1, 24), 'n'], [(0, 1, 0), (1, 1, 0, 24), 'n'], [(0, 1, 0), (1, 1, 1, 24), 'n'], [(0, 1, 1), (0, 0, 0, 24), 'n'], [(0, 1, 1), (0, 0, 1, 24), 'n'], [(0, 1, 1), (0, 1, 0, 24), 'n'], [(0, 1, 1), (0, 1, 1, 24), 'n'], [(0, 1, 1), (1, 0, 0, 24), 'n'], [(0, 1, 1), (1, 0, 1, 24), 'n'], [(0, 1



trying  [(1, 1, 1), (1, 1, 1, 24), 'n']
trying  [(1, 0, 1), (0, 0, 1, 24), 'n']
trying  [(1, 0, 1), (0, 1, 0, 24), 'n']
trying  [(1, 0, 1), (0, 1, 1, 24), 'n']
trying  [(1, 0, 1), (1, 0, 0, 24), 'n']
trying  [(1, 0, 1), (1, 0, 1, 24), 'n']
trying  [(1, 0, 1), (1, 1, 0, 24), 'n']
trying  [(1, 0, 1), (1, 1, 1, 24), 'n']
trying  [(1, 1, 0), (0, 0, 0, 24), 'n']
trying  [(1, 1, 0), (0, 0, 1, 24), 'n']
trying  [(1, 1, 0), (0, 1, 0, 24), 'n']
trying  [(1, 1, 0), (0, 1, 1, 24), 'n']
trying  [(1, 1, 0), (1, 0, 0, 24), 'n']
trying  [(1, 1, 0), (1, 0, 1, 24), 'n']
trying  [(1, 1, 0), (1, 1, 0, 24), 'n']
trying  [(1, 1, 0), (1, 1, 1, 24), 'n']


Process ForkPoolWorker-16:
Process ForkPoolWorker-9:
Process ForkPoolWorker-14:
Process ForkPoolWorker-12:
Process ForkPoolWorker-10:
Process ForkPoolWorker-15:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/alket/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/Users/alket/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/Users/alket/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/Users/alket/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/Users/alket/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/Users/alket/anaconda3/lib/python3.7/multiprocessing/pro

KeyboardInterrupt: 