In [15]:
import datetime
import time
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.optimizers import SGD
from sklearn.preprocessing import MinMaxScaler
from keras import metrics


from statsmodels.compat.pandas import deprecate_kwarg
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
# additive decompose a contrived additive time series
from random import randrange
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose 

# the main library has a small set of functionality
from stldecompose import decompose, forecast
from stldecompose.forecast_funcs import (naive,
                                         drift, 
                                         mean, 
                                         seasonal_naive)
%load_ext autoreload
%autoreload 2

# funzioni per preparare i dati e calcolare le previsioni
def split_sequence(sequence, n_steps_in, n_steps_out):
    X, y = list(), list()
    for i in range(len(sequence)):
         # find the end of this pattern
         end_ix = i + n_steps_in
         out_end_ix = end_ix + n_steps_out
         # check if we are beyond the sequence
         if out_end_ix > len(sequence):
             break
         # gather input and output parts of the pattern
         seq_x, seq_y = sequence[i:end_ix], sequence[end_ix:out_end_ix]
         X.append(seq_x)
         y.append(seq_y)
    return np.array(X), np.array(y)


def get_forcast_per_component(series, st_in, st_out, train_test_size):
    
    # split into samples
    X, y = split_sequence(series, st_in, st_out)

    train_X, test_X = X[:train_test_size], X[train_test_size:]
    train_y, test_y = y[:train_test_size], y[train_test_size:]

    # define model
    model = Sequential()
    model.add(Dense(500, activation='relu', input_dim=st_in)) 
    model.add(Dense(st_out))

    model.compile(optimizer='adam', loss='mse' , metrics=[metrics.mae, 'accuracy'])

    # fit model
    model.fit(train_X, train_y, batch_size=64, epochs=50, verbose=0)
    
    # predict 
    predicted = []
    for i in range(len(test_X)):
        x_input = test_X[i].reshape(1, st_in)
        yhat = model.predict(x_input, verbose=0)
        
        #predicted.append(np.rint(yhat[0]))   
        predicted.append(np.around(yhat[0], decimals=1)) 
    predicted = np.array(predicted)
    return predicted


# carica dati
data = pd.read_csv('/Users/alket/Desktop/dati/new_data_backfill_forwfill.csv', index_col = 0, 
                   header=0, parse_dates=True)

# aggrega dati
agg_by_cell = data.groupby(by = ['cell_num'])

# dichiara counter e struttura dati per i dati d'errore per cella
counter = 0
dict2data = {}

# ittera per tutte le celle
for ii, kk in agg_by_cell:
    # metti i dati nel formatto giusto
    cell = ii
    error_list = []
    print(counter)
    counter +=1
    #if counter > 4: break
    dates4dec = []
    cell_values = []

    for index, row in kk.iterrows():
    
        date = row['date']
        h = str(row['hours'])
   
        h = h.split('.')
    
        if len(h[0])<2:
            h = h[1]+h[0]
        else: 
            h = h[0]
   
        minutes = str(row['minutes'])
        m = ''
        minutes = minutes.split('.')
        if len(minutes[0])<2: 
            m = minutes[0] +'0'
        else: 
            m = minutes[0]
        #print(date, h, m)
        data_f = date+' '+h+':'+m+':'+'00'
        #print(data_f)
        cell_values.append(row['nr_people'])
        dates4dec.append(data_f) 


    dict_i = {'ds': dates4dec, 'y':cell_values}
    data4deco = pd.DataFrame(dict_i, index=None, columns=None)  
    data4deco.head()

    data4deco['ds'] = pd.to_datetime(data4deco['ds'])
    data4deco = data4deco.set_index('ds')
    data4deco.head()


    # decomponi i dati in trend, residual e seasonal
    #decomp = decompose(data4deco['y'], period=96)
    decompfreq = 96
    decomp = seasonal_decompose(data4deco['y'], freq=decompfreq, model='additive') 
    # visualizza grafico : da commentare per non consumare memoria
    #with plt.rc_context():
    #     plt.rc("figure", figsize=(18,10))
    #     decomp.plot()
    #     plt.show()

    trend = decomp.trend.values
    seasonal = decomp.seasonal.values
    residual = decomp.resid.values
    
    # prendi i valori non nan
    
    #print(data_trend)
    start = decompfreq//2
    #print(start)

    data_trend = trend[start:]
    #print('start ', data_trend)
    data_trend = data_trend[:-start]
    #print('trend finish ', data_trend)
    #print(data_trend.shape)

    data_residual = residual[start:]
    #print('start ', data_residual)
    data_residual = data_residual[:-start]
    #print('residual finish ', data_residual)
    #print(data_residual.shape)

    data_seasonal = seasonal[start:]
    #print('start ', data_seasonal)
    data_seasonal = data_seasonal[:-start]
    #print('season finish ', data_seasonal)
    #print(data_seasonal.shape)
    
    # imposta step di previsione e chiama funzione get_forcast_per_component su ogni componente
    n_steps_in, n_steps_out = 26, 25
    train_test_size = 9000

    forcasted_trend = get_forcast_per_component(data_trend, n_steps_in, n_steps_out, train_test_size)
    forcasted_residual = get_forcast_per_component(data_residual, n_steps_in, n_steps_out, train_test_size)
    forcasted_season = get_forcast_per_component(data_seasonal, n_steps_in, n_steps_out, train_test_size)

    # combina le previsioni 
    final_prediction = forcasted_trend + forcasted_residual + forcasted_season

    # fai lo split del serie dati originale
    X, y = split_sequence(kk['nr_people'].values, n_steps_in, n_steps_out) 
    
    # aggiusta lunghezza vettori per effetto perdita valori dalla decomposizione 
    X = X[start:]
    X = X[:-start]
    
    y = y[start:]
    y = y[:-start]
    
    # prepare train-test della serie originale
    train_X, train_y = X[:train_test_size], X[train_test_size:]
    train_y, test_y = y[:train_test_size], y[train_test_size:]
    
    # assegna a expected il valore del test set
    expected = test_y
    #print(expected.shape)
    #print(final_prediction.shape)
    
    # calcola differenza (errore) tra predicted e expected 
    difference = abs(expected - final_prediction)

    # calcola errore medio e altre misure 
    mean_error =  np.reshape(difference, difference.shape[0] * difference.shape[1])
    print('Mean error', np.mean(mean_error))
    
    # collect data 2 dictionary
    minimum = np.amin(mean_error)   
    per75 = np.percentile(mean_error, 75)
    per50 = np.percentile(mean_error, 50)
    per25 = np.percentile(mean_error, 25)
    maximum = np.amax(mean_error)
    l5i = [minimum, per25, per50, per75, maximum]
    dict2data[cell] = l5i

    
with open('error_data_4_MLP_with_Additive_Decomposition_25.csv', 'w') as f:
    for key, value in dict2data.items():
        f.write('%s:%s\n' % (key, value))    

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
0
Mean error 1.714109708068068
1
Mean error 1.9581156977847678
2
Mean error 2.029245667059918
3
Mean error 2.1679534098727644
4
Mean error 2.136533416759511
5
Mean error 1.7132321451303207
6
Mean error 1.5667663842336654
7
Mean error 1.5937490931710832
8
Mean error 1.5514711033214221
9
Mean error 1.5475973000014125
10
Mean error 1.5267918832780363
11
Mean error 17.188029941756593
12
Mean error 19.46611418164466
13
Mean error 11.655484612032849
14
Mean error 4.03734787010233
15
Mean error 0.7643471464333366
16
Mean error 0.8251314571331893
17
Mean error 1.7777325411939155
18
Mean error 2.269356878697648
19
Mean error 2.080276486907661
20
Mean error 2.0849812294252827
21
Mean error 2.1156408748153814
22
Mean error 2.099338835638148
23
Mean error 1.759033783383158
24
Mean error 1.685066883737749
25
Mean error 1.61388880299053
26
Mean error 1.605595816448163
27
Mean error 2.6471930657608183
28
Mean erro