### Exponential Smoothing





In [1]:
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
from random import random
import matplotlib.pyplot as plt 
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
import pandas as pd
import numpy as np
from pandas import datetime
from random import random
from pandas.plotting import lag_plot
from pandas.plotting import autocorrelation_plot
from statsmodels.graphics.tsaplots import plot_acf
from sklearn.metrics import mean_squared_error
from math import sqrt
from statsmodels.tsa.ar_model import AR
import warnings
warnings.filterwarnings('ignore')

# grid search ets hyperparameters for monthly mean temp dataset
from math import sqrt
from multiprocessing import cpu_count
from joblib import Parallel
from joblib import delayed
from warnings import catch_warnings
from warnings import filterwarnings
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_squared_error
from pandas import read_csv
from numpy import array

In [3]:
data = pd.read_csv('/Users/alket/Desktop/dati/new_data_backfill_forwfill.csv',index_col = 0)

gbc = data.groupby(by = data['cell_num'])
cell_1 = gbc.get_group('488-1264')
series1 = cell_1['nr_people']

In [4]:
# one-step Holt Winters Exponential Smoothing forecast
def exp_smoothing_forecast(history, config):
    t,d,s,p,b,r = config
    # define model
    history = array(history)
    model = ExponentialSmoothing(history, trend=t, damped=d, seasonal=s, seasonal_periods=p)
    # fit model
    model_fit = model.fit(optimized=True, use_boxcox=b, remove_bias=r)
    # make one step forecast
    yhat = model_fit.predict(len(history), len(history))
    return yhat[0]

# root mean squared error or rmse
def measure_rmse(actual, predicted):
    return sqrt(mean_squared_error(actual, predicted))

# split a univariate dataset into train/test sets
def train_test_split(data, n_test):
    return data[:-n_test], data[-n_test:]

In [5]:
# walk-forward validation for univariate data
def walk_forward_validation(data, n_test, cfg):
    predictions = list()
    # split dataset
    train, test = train_test_split(data, n_test)
    # seed history with training dataset
    history = [x for x in train]
    # step over each time-step in the test set
    for i in range(len(test)):
        # fit model and make forecast for history
        yhat = exp_smoothing_forecast(history, cfg)
        # store forecast in list of predictions
        predictions.append(yhat)
        # add actual observation to history for the next loop
        history.append(test[i])
    # estimate prediction error
    error = measure_rmse(test, predictions)
    return error



# score a model, return None on failure
def score_model(data, n_test, cfg, debug=False):
    result = None
    # convert config to a key
    key = str(cfg)
    # show all warnings and fail on exception if debugging
    if debug:
        result = walk_forward_validation(data, n_test, cfg)
    else:
        # one failure during model validation suggests an unstable config
        try:
            # never show warnings when grid searching, too noisy
            with catch_warnings():
                filterwarnings("ignore")
                result = walk_forward_validation(data, n_test, cfg)
        except:
             error = None
    # check for an interesting result
    if result is not None:
         print(' > Model[%s] %.3f' % (key, result))
    return (key, result)


# grid search configs
def grid_search(data, cfg_list, n_test, parallel=True):
    scores = None
    if parallel:
    # execute configs in parallel
    
        executor = Parallel(n_jobs=cpu_count(), backend='multiprocessing') 
        tasks = (delayed(score_model)(data, n_test, cfg) for cfg in cfg_list) 
        scores = executor(tasks)
    else:
        scores = [score_model(data, n_test, cfg) for cfg in cfg_list]
    # remove empty results
    scores = [r for r in scores if r[1] != None]
    # sort configs by error, asc
    scores.sort(key=lambda tup: tup[1])
    return scores  

# create a set of exponential smoothing configs to try
def exp_smoothing_configs(seasonal=[None]): 
    models = list()
    # define config lists
    t_params = ['add', 'mul', None]
    d_params = [True, False] 
    s_params = ['add', 'mul', None] 
    p_params = seasonal
    b_params = [True, False] 
    r_params = [True, False]
    # create config instances
    for t in t_params:
        for d in d_params:
            for s in s_params:
                for p in p_params:
                    for b in b_params:
                        for r in r_params:
                            cfg = [t,d,s,p,b,r]
                            models.append(cfg)
    return models

### Trasforma dati 

In [6]:
serie_dati = cell_1
counter = 0
dict2data = {}
error_list = []

print(counter)
counter +=1
dates4dec = []
cell_values = []

for index, row in serie_dati.iterrows():
    
        date = row['date']
        h = str(row['hours'])
   
        h = h.split('.')
    
        if len(h[0])<2:
            h = h[1]+h[0]
        else: 
            h = h[0]
   
        minutes = str(row['minutes'])
        m = ''
        minutes = minutes.split('.')
        if len(minutes[0])<2: 
            m = minutes[0] +'0'
        else: 
            m = minutes[0]
        #print(date, h, m)
        data_f = date+' '+h+':'+m+':'+'00'
        #print(data_f)
        cell_values.append(row['nr_people'])
        dates4dec.append(data_f)   

0


In [7]:
dict_i = {'ds': dates4dec, 'y':cell_values}
data4deco = pd.DataFrame(dict_i, index=None, columns=None)  
data4deco.head()

Unnamed: 0,ds,y
0,2017-04-16 00:00:00,394.0
1,2017-04-16 00:15:00,396.0
2,2017-04-16 00:30:00,387.0
3,2017-04-16 00:45:00,375.0
4,2017-04-16 01:00:00,365.0


In [8]:
if __name__ == '__main__':
    
    data2dict = {}
    
    # load dataset
    #series = read_csv('../../data/monthly-mean-temp.csv', header=0, index_col=0) 
    #data = series.values
    data = data4deco['y']
    print(type(data))
    
    data = data.to_frame()
    print(type(data))
    
    data = data.values
    print(data[0:10])
    # data split
    n_test = 12

    # model configs
    cfg_list = exp_smoothing_configs(seasonal=[0,96])

    # grid search
    scores = grid_search(data[:,0], cfg_list, n_test)
    print('done')

    # list top 3 configs
    for cfg, error in scores[:3]:
        print(cfg, error)
        data2dict[cfg]= error
        

<class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'>
[[394.]
 [396.]
 [387.]
 [375.]
 [365.]
 [357.]
 [349.]
 [340.]
 [332.]
 [321.]]
 > Model[['add', True, 'add', 96, False, True]] 9.942
 > Model[['add', True, 'add', 96, False, False]] 9.942
 > Model[['add', True, 'add', 96, True, True]] 9.487
 > Model[['add', True, 'add', 96, True, False]] 9.503
 > Model[[None, False, 'add', 96, True, True]] 9.488
 > Model[['add', True, 'mul', 96, True, True]] 9.169
 > Model[['add', True, 'mul', 96, True, False]] 9.204
 > Model[[None, False, 'add', 96, True, False]] 9.504
 > Model[['add', True, None, 0, True, True]] 8.302
 > Model[[None, False, 'add', 96, False, True]] 9.964
 > Model[[None, False, 'add', 96, False, False]] 9.963
 > Model[['add', True, None, 0, True, False]] 8.290
 > Model[[None, False, 'mul', 96, True, True]] 9.168
 > Model[[None, False, 'mul', 96, True, False]] 9.203
 > Model[['add', True, None, 0, False, True]] 8.343
 > Model[[None, False, 'mul', 96, False, Tru

Process ForkPoolWorker-6:
Process ForkPoolWorker-5:
Process ForkPoolWorker-1:
Process ForkPoolWorker-4:
Process ForkPoolWorker-2:
Process ForkPoolWorker-3:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):


KeyboardInterrupt: 

In [32]:
data2dict

{}