In [1]:
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
from random import random
import matplotlib.pyplot as plt 
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
import pandas as pd
import numpy as np
from pandas import datetime
from random import random
from pandas.plotting import lag_plot
from pandas.plotting import autocorrelation_plot
from statsmodels.graphics.tsaplots import plot_acf
from sklearn.metrics import mean_squared_error
from math import sqrt
from statsmodels.tsa.ar_model import AR
import warnings
warnings.filterwarnings('ignore')

# grid search ets hyperparameters for monthly mean temp dataset
from math import sqrt
from multiprocessing import cpu_count
from joblib import Parallel
from joblib import delayed
from warnings import catch_warnings
from warnings import filterwarnings
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_squared_error
from pandas import read_csv
from numpy import array


# one-step Holt Winters Exponential Smoothing forecast
def exp_smoothing_forecast(history, config):
    t,d,s,p,b,r = config
    # define model
    history = array(history)
    model = ExponentialSmoothing(history, trend=t, damped=d, seasonal=s, seasonal_periods=p)
    # fit model
    model_fit = model.fit(optimized=True, use_boxcox=b, remove_bias=r)
    # make one step forecast
    yhat = model_fit.predict(len(history), len(history))
    return yhat[0]

# root mean squared error or rmse
def measure_rmse(actual, predicted):
    return sqrt(mean_squared_error(actual, predicted))

# split a univariate dataset into train/test sets
def train_test_split(data, n_test):
    return data[:-n_test], data[-n_test:]


# walk-forward validation for univariate data
def walk_forward_validation(data, n_test, cfg):
    predictions = list()
    # split dataset
    train, test = train_test_split(data, n_test)
    # seed history with training dataset
    history = [x for x in train]
    # step over each time-step in the test set
    for i in range(len(test)):
        # fit model and make forecast for history
        yhat = exp_smoothing_forecast(history, cfg)
        # store forecast in list of predictions
        predictions.append(yhat)
        # add actual observation to history for the next loop
        history.append(test[i])
    # estimate prediction error
    error = measure_rmse(test, predictions)
    return error



# score a model, return None on failure
def score_model(data, n_test, cfg, debug=False):
    result = None
    # convert config to a key
    key = str(cfg)
    # show all warnings and fail on exception if debugging
    if debug:
        result = walk_forward_validation(data, n_test, cfg)
    else:
        # one failure during model validation suggests an unstable config
        try:
            # never show warnings when grid searching, too noisy
            with catch_warnings():
                filterwarnings("ignore")
                result = walk_forward_validation(data, n_test, cfg)
        except:
             error = None
    # check for an interesting result
    #if result is not None:
    #     print(' > Model[%s] %.3f' % (key, result))
    return (key, result)


# grid search configs
def grid_search(data, cfg_list, n_test, parallel=True):
    scores = None
    if parallel:
    # execute configs in parallel
    
        executor = Parallel(n_jobs=cpu_count(), backend='multiprocessing') 
        tasks = (delayed(score_model)(data, n_test, cfg) for cfg in cfg_list) 
        scores = executor(tasks)
    else:
        scores = [score_model(data, n_test, cfg) for cfg in cfg_list]
    # remove empty results
    scores = [r for r in scores if r[1] != None]
    # sort configs by error, asc
    scores.sort(key=lambda tup: tup[1])
    return scores  

# create a set of exponential smoothing configs to try
def exp_smoothing_configs(seasonal=[None]): 
    models = list()
    # define config lists
    t_params = ['add', 'mul', None]
    d_params = [True, False] 
    s_params = ['add', 'mul', None] 
    p_params = seasonal
    b_params = [True, False] 
    r_params = [True, False]
    # create config instances
    for t in t_params:
        for d in d_params:
            for s in s_params:
                for p in p_params:
                    for b in b_params:
                        for r in r_params:
                            cfg = [t,d,s,p,b,r]
                            models.append(cfg)
    return models


In [2]:
if __name__ == '__main__':


    count = 0
    data2dict = {}

    data = pd.read_csv('/Users/alket/Desktop/dati/new_data_backfill_forwfill.csv',index_col = 0)
    gbc = data.groupby(by = data['cell_num'])  


    for cell_i, df_i in gbc:
        
        week = df_i
        print('week = ', len(week))
        
        week_red = week.iloc[::4, :]
        print('week reduced = ', len(week_red))
        week_red = week_red[:168]
        final_series = week_red['nr_people'].values
        print('final length ', len(final_series))
        #data = df_i['nr_people'].to_frame()
        #data = data[:96]
        print(len(data))
        #print(type(data))

        #data = data.values
        # data split
        n_test = 12
        cell_num = cell_i
        
        # model configs
        cfg_list = exp_smoothing_configs()

        # grid search
        scores = grid_search(final_series, cfg_list, n_test)
        
        #print(scores)
        # list top 3 configs
        for cfg, error in scores[:1]:
            configuration = cfg+','+str(error)
            print(cell_num, configuration)
            data2dict[cell_num]= configuration


# trend, damped, seasonal='add', seasonal_per = 96, Box-Cox, bias
# 486-1252:[None, False, None, 0, False, False],0.8660254037844386
with open('BestETS_config_parametres.csv', 'w') as f:
    for key, value in data2dict.items():
        f.write('%s:%s\n' % (key, value))

print('done')        

week =  11808
week reduced =  2952
final length  168
2609568
486-1252 ['mul', True, None, None, False, True],2.915592748982327
week =  11808
week reduced =  2952
final length  168
2609568
486-1253 ['mul', True, None, None, True, True],3.2107662668349994
week =  11808
week reduced =  2952
final length  168
2609568
486-1254 ['mul', True, None, None, True, True],3.4101502458130963
week =  11808
week reduced =  2952
final length  168
2609568
486-1255 ['mul', True, None, None, True, True],3.1682062881637596
week =  11808
week reduced =  2952
final length  168
2609568
486-1256 ['mul', True, None, None, True, True],3.4381780287105923
week =  11808
week reduced =  2952
final length  168
2609568
486-1257 ['add', True, None, None, True, True],2.92318285562195
week =  11808
week reduced =  2952
final length  168
2609568
486-1258 ['add', True, None, None, True, True],2.9317843425449666
week =  11808
week reduced =  2952
final length  168
2609568
486-1259 ['add', True, None, None, False, False],3.2

489-1265 ['add', True, None, None, True, False],10.049706412215379
week =  11808
week reduced =  2952
final length  168
2609568
489-1266 ['add', True, None, None, False, False],14.813955385937623
week =  11808
week reduced =  2952
final length  168
2609568
489-1267 ['add', True, None, None, False, False],5.316371660629688
week =  11808
week reduced =  2952
final length  168
2609568
489-1268 ['mul', True, None, None, False, False],7.114156114522284
week =  11808
week reduced =  2952
final length  168
2609568
490-1252 ['add', True, None, None, False, False],14.778690331927434
week =  11808
week reduced =  2952
final length  168
2609568
490-1253 ['mul', True, None, None, True, True],6.221357925504476
week =  11808
week reduced =  2952
final length  168
2609568
490-1254 ['mul', True, None, None, True, True],28.089440632400482
week =  11808
week reduced =  2952
final length  168
2609568
490-1255 ['mul', True, None, None, True, True],8.07875881296418
week =  11808
week reduced =  2952
final 

493-1262 [None, False, None, None, False, False],1.3228756555322954
week =  11808
week reduced =  2952
final length  168
2609568
493-1263 [None, False, None, None, True, False],1.2909944487358023
week =  11808
week reduced =  2952
final length  168
2609568
493-1264 [None, False, None, None, True, False],1.1902380714237975
week =  11808
week reduced =  2952
final length  168
2609568
493-1265 [None, False, None, None, True, False],1.1902380714238079
week =  11808
week reduced =  2952
final length  168
2609568
493-1266 ['mul', False, None, None, True, True],0.40762783895988736
week =  11808
week reduced =  2952
final length  168
2609568
493-1267 ['add', True, None, None, False, False],2.173791167812808
week =  11808
week reduced =  2952
final length  168
2609568
493-1268 [None, False, None, None, True, False],3.5118845842842363
week =  11808
week reduced =  2952
final length  168
2609568
494-1252 ['mul', False, None, None, False, False],1.8455773035690777
week =  11808
week reduced =  295

497-1258 ['add', True, None, None, False, True],3.0568452641988584
week =  11808
week reduced =  2952
final length  168
2609568
497-1259 ['mul', False, None, None, False, False],4.7159997450213735
week =  11808
week reduced =  2952
final length  168
2609568
497-1260 ['mul', True, None, None, False, False],2.5684915516264826
week =  11808
week reduced =  2952
final length  168
2609568
497-1261 [None, False, None, None, True, False],0.5807431085828582
week =  11808
week reduced =  2952
final length  168
2609568
497-1262 ['add', True, None, None, False, True],0.6439873758816783
week =  11808
week reduced =  2952
final length  168
2609568
497-1263 ['mul', False, None, None, True, False],0.6859224534332449
week =  11808
week reduced =  2952
final length  168
2609568
497-1264 ['mul', True, None, None, False, True],0.48763914455805174
week =  11808
week reduced =  2952
final length  168
2609568
497-1265 [None, False, None, None, True, False],0.40561399763649153
week =  11808
week reduced =  2