In [1]:
import pandas as pd
import numpy as np
import os


# grid search holt winter's exponential smoothing
from math import sqrt
from multiprocessing import cpu_count
from joblib import Parallel
from joblib import delayed
from warnings import catch_warnings
from warnings import filterwarnings
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_squared_error
from numpy import array

In [2]:
train_df, validation_df, test_df = pd.read_csv('data/train_fil_3.csv',index_col=0),pd.read_csv('data/validation_fil_3.csv',index_col=0),pd.read_csv('data/test_fil_3.csv',index_col=0)
for df in [train_df,validation_df,test_df]:
    df.columns = [int(col) for col in df.columns]

In [3]:
#function to calculate MAPE for all observations where y_true is not 0
def mape(y_true, y_predict):
    '''Returns mean percentage error for all predictions where y_true is not 0. Where y_true is 0, the percentage error is 0 as well '''
    return np.mean([np.absolute(y_true[idx] - y_predict[idx])/y_true[idx] * 100 if y_true[idx] != 0 else 0 for idx,_ in enumerate(y_true) ])

def median_pe(y_true, y_predict):
    '''Returns mean percentage error for all predictions where y_true is not 0. Where y_true is 0, the percentage error is 0 as well '''
    return np.median([np.absolute(y_true[idx] - y_predict[idx])/y_true[idx] * 100 if y_true[idx] != 0 else 0 for idx,_ in enumerate(y_true) ])

In [4]:
# one-step Holt Winterâ€™s Exponential Smoothing forecast
def exp_smoothing_forecast(history, config):
	t,d,s,p,b,r = config
	# define model
	history = array(history)
	model = ExponentialSmoothing(history, trend=t, damped=d, seasonal=s, seasonal_periods=p)
	# fit model
	model_fit = model.fit(optimized=True, use_boxcox=b, remove_bias=r)
	# make one step forecast
	yhat = model_fit.predict(len(history), len(history))
	return yhat[0]

# walk-forward validation for univariate data
def walk_forward_validation(train,test, cfg):
	predictions = list()
	# seed history with training dataset
	history = [x for x in train]
	# step over each time-step in the test set
	for i in range(len(test)):
		# fit model and make forecast for history
		yhat = exp_smoothing_forecast(history, cfg)
		# store forecast in list of predictions
		predictions.append(yhat)
		# add actual observation to history for the next loop
		history.append(test[i])
	# estimate prediction error
	error = mape(test, predictions)
	return error
 
# score a model, return None on failure
def score_model(train, test, cfg, debug=False):
	result = None
	# convert config to a key
	key = str(cfg)
	# show all warnings and fail on exception if debugging
	if debug:
		result = walk_forward_validation(train, test, cfg)
	else:
		# one failure during model validation suggests an unstable config
		try:
			# never show warnings when grid searching, too noisy
			with catch_warnings():
				filterwarnings("ignore")
				result = walk_forward_validation(train, test, cfg)
		except:
			error = None
	# check for an interesting result
	if result is not None:
		print(' > Model[%s] %.3f' % (key, result))
	return (key, result)
 
# grid search configs
def grid_search(train, test, cfg_list, parallel=True):
	scores = None
	if parallel:
		# execute configs in parallel
		executor = Parallel(n_jobs=cpu_count(), backend='multiprocessing')
		tasks = (delayed(score_model)(train, test, cfg) for cfg in cfg_list)
		scores = executor(tasks)
	else:
		scores = [score_model(train, test, cfg) for cfg in cfg_list]
	# remove empty results
	scores = [r for r in scores if r[1] != None]
	# sort configs by error, asc
	scores.sort(key=lambda tup: tup[1])
	return scores
 
# create a set of exponential smoothing configs to try
def exp_smoothing_configs(seasonal=[None]):
	models = list()
	# define config lists
	t_params = ['add', 'mul', None]
	d_params = [True, False]
	s_params = ['add', 'mul', None]
	p_params = seasonal
	b_params = [True, False]
	r_params = [True, False]
	# create config instances
	for t in t_params:
		for d in d_params:
			for s in s_params:
				for p in p_params:
					for b in b_params:
						for r in r_params:
							cfg = [t,d,s,p,b,r]
							models.append(cfg)
	return models
 
if __name__ == '__main__':
	# model configs
	cfg_list = exp_smoothing_configs(seasonal=[7,365])
	# grid search
	scores = grid_search(train_df[6], validation_df[5], cfg_list)
	print('done')
	# list top 3 configs
	for cfg, error in scores[:3]:
		print(cfg, error)

 > Model[['add', True, 'add', 7, False, False]] 196.298
 > Model[['add', True, 'add', 7, False, True]] 193.649
 > Model[['add', True, 'add', 365, False, False]] 993.021
 > Model[['add', True, 'add', 365, False, True]] 992.330
 > Model[['add', True, None, 7, False, True]] 296.995
 > Model[['add', True, None, 365, False, True]] 296.995
 > Model[['add', False, 'add', 7, False, True]] 194.952
 > Model[['add', False, 'add', 7, False, False]] 197.830
 > Model[['add', True, None, 7, False, False]] 326.842
 > Model[['add', True, None, 365, False, False]] 326.842
 > Model[['add', False, None, 7, False, True]] 140.505
 > Model[['add', False, None, 7, False, False]] 151.670
 > Model[['add', False, None, 365, False, True]] 140.505
 > Model[['add', False, None, 365, False, False]] 151.670
 > Model[[None, False, 'add', 7, False, True]] 196.062
 > Model[[None, False, 'add', 7, False, False]] 198.858
 > Model[[None, False, None, 7, False, True]] 303.340
 > Model[[None, False, None, 7, False, False]] 3

best Holt Winters parameters:
    * ['add', False, None, 7, False, True] 140.50459808989422
    * ['add', False, None, 365, False, True] 140.50459808989422
    * ['add', False, None, 7, False, False] 151.66966554579284