In [1]:
# load dataset
import os
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import simplejson as json
import numpy as np

df = pd.read_csv("/users/akuppam/documents/Hprog/Py/rnb1015_2.csv")
df.head(5)

Unnamed: 0,date,region,marketing,visits,br,inq,gb,cb,nb,ss,ts,listings
0,1/1/16,AMR,NotPaid,1256288,6043,66022,6071,571,5500,485968.08,144695.8942,10242216
1,1/1/16,AMR,Paid,189297,1092,11806,743,2,741,0.0,14138.8096,2133795
2,1/1/16,UK,NotPaid,201797,435,9662,436,33,403,72698.92472,8317.105056,3561696
3,1/1/16,UK,Paid,55934,88,2749,61,1,60,0.0,815.336426,890424
4,1/1/16,CE,NotPaid,200786,573,9250,435,31,404,40490.73159,12016.78241,3062480


In [2]:
df = pd.read_csv("/users/akuppam/documents/Hprog/Py/rnb1015_2.csv")
df = df.loc[(df['region'] == 'UK') & (df['marketing'] == 'NotPaid')]
df = df[~(df['nb'] < 0)]
df.describe()
df.index = pd.to_datetime(df.date)
cols = ["nb"]
y = df[cols]
y.head()

Unnamed: 0_level_0,nb
date,Unnamed: 1_level_1
2016-01-01,403
2016-01-02,451
2016-01-03,682
2016-01-04,672
2016-01-05,705


In [3]:
'''
t Trend: Additive
d Damped: False
s Seasonal: Additive
p Seasonal Periods: 12
b Box-Cox Transform: False
r Remove Bias: True
'''

'\nt Trend: Additive\nd Damped: False\ns Seasonal: Additive\np Seasonal Periods: 12\nb Box-Cox Transform: False\nr Remove Bias: True\n'

In [6]:
# going back to the original code
# make changes to params
# expt w/ n_test
# finalize this
from math import sqrt
from multiprocessing import cpu_count
from joblib import Parallel
from joblib import delayed
from warnings import catch_warnings
from warnings import filterwarnings
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_squared_error
from pandas import read_csv
from numpy import array
 
# one-step Holt Winter’s Exponential Smoothing forecast
def exp_smoothing_forecast(history, hw_params):
	t,d,s,p,b,r = hw_params
	# define model
	history = array(history)
	model = ExponentialSmoothing(history, trend=t, damped=d, seasonal=s, seasonal_periods=p)
	# fit model
	model_fit = model.fit(optimized=True, use_boxcox=b, remove_bias=r)
	# make one step forecast
	yhat = model_fit.predict(len(history), len(history))
	return yhat[0]
 
# root mean squared error or rmse
def measure_rmse(actual, predicted):
	return sqrt(mean_squared_error(actual, predicted))
 
# split a univariate dataset into train/test sets
def train_test_split(data, n_test):
	return data[:-n_test], data[-n_test:]
 
# walk-forward validation for univariate data
def walk_forward_validation(data, n_test, params):
	predictions = list()
	# split dataset
	train, test = train_test_split(data, n_test)
	# seed history with training dataset
	history = [x for x in train]
	# step over each time-step in the test set
	for i in range(len(test)):
		# fit model and make forecast for history
		yhat = exp_smoothing_forecast(history, params)
		# store forecast in list of predictions
		predictions.append(yhat)
		# add actual observation to history for the next loop
		history.append(test[i])
	# estimate prediction error
	error = measure_rmse(test, predictions)
	return error
 
# score a model, return None on failure
def score_model(data, n_test, params, debug=False):
	result = None
	# convert params to a key
	key = params
	# show all warnings and fail on exception if debugging
	if debug:
		result = walk_forward_validation(data, n_test, params)
	else:
		# one failure during model validation suggests unstable params
		try:
			# never show warnings when grid searching
			with catch_warnings():
				filterwarnings("ignore")
				result = walk_forward_validation(data, n_test, params)
		except:
			error = None
	# check results
	if result is not None:
		print(' > Model[%s] %.3f' % (key, result))
	return (key, result)
 
# grid search params
def grid_search(data, params_list, n_test, parallel=True):
	scores = None
	if parallel:
		# execute configs in parallel
		executor = Parallel(n_jobs=cpu_count(), backend='multiprocessing')
		tasks = (delayed(score_model)(data, n_test, params) for params in params_list)
		scores = executor(tasks)
	else:
		scores = [score_model(data, n_test, params) for params in params_list]
	# remove empty results
	scores = [r for r in scores if r[1] != None]
	# sort params by error, asc
	scores.sort(key=lambda tup: tup[1])
	return scores
 
# create a set of exponential smoothing params
def exp_smoothing_params(seasonal=[None]):
	models = list()
	# define params lists
	t_params = ['add', 'mul', None]
	d_params = [True, False]
	s_params = ['add', 'mul', None]
	p_params = seasonal
	b_params = [True, False]
	r_params = [True, False]
	# create params instances
	for t in t_params:
		for d in d_params:
			for s in s_params:
				for p in p_params:
					for b in b_params:
						for r in r_params:
							params = [t,d,s,p,b,r]
							models.append(params)
	return models
 
if __name__ == '__main__':
	# load dataset
	df.index = pd.to_datetime(df.date)
	cols = ["nb"]
	y = df[cols]
	series_y = y['nb'].iloc[:,]
	data = series_y.values
	# data split
	n_test = 200
	# model params
	params_list = exp_smoothing_params(seasonal=[1,7,365])
	# grid search
	scores = grid_search(data, params_list, n_test)
	print('done')
	# list top 3 params
	for params, error in scores[:1]:
		print(params, error)

with open('model_params.json','w') as out_params:
        json.dump(params, out_params)

with open('model_params.json') as json_file:
    out_params = json.load(json_file)

t = out_params[0]
d = out_params[1]
s = out_params[2]
p = out_params[3]
b = out_params[4]
r = out_params[5]

model = ExponentialSmoothing(series_y, trend=t, damped=d, seasonal=s, seasonal_periods=p)
model_fit = model.fit(optimized=True, use_boxcox=b, remove_bias=r)
predictions = model_fit.fittedvalues
yhat = model_fit.forecast(442)
series_hw = predictions.append(yhat)

 > Model[[None, False, 'add', 7, True, True]] 82.008
 > Model[['add', False, None, 7, True, True]] 117.334
 > Model[[None, False, 'add', 7, True, False]] 81.934
 > Model[['add', False, None, 7, True, False]] 117.320
 > Model[['add', False, None, 365, True, True]] 117.334
 > Model[['add', False, None, 365, True, False]] 117.320
 > Model[[None, False, 'add', 365, True, True]] 138.680
 > Model[['mul', False, 'add', 7, True, True]] 82.068
 > Model[[None, False, 'add', 365, True, False]] 138.282
 > Model[[None, False, 'mul', 7, True, True]] 81.620
 > Model[[None, False, 'mul', 7, True, False]] 81.552
 > Model[['mul', False, 'add', 7, True, False]] 82.027
 > Model[[None, False, 'mul', 365, True, True]] 140.944
 > Model[[None, False, 'mul', 365, True, False]] 140.684
 > Model[[None, False, None, 1, True, True]] 117.289
 > Model[[None, False, None, 1, True, False]] 117.228
 > Model[[None, False, None, 7, True, True]] 117.289
 > Model[[None, False, None, 7, True, False]] 117.228
 > Model[[None,

