In [12]:
import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import MaxPooling1D
from tensorflow.keras.optimizers import SGD
from tensorflow.keras import metrics

from statsmodels.compat.pandas import deprecate_kwarg
import pandas as pd
import numpy as np


# the main library has a small set of functionality
from statsmodels.tsa.seasonal import seasonal_decompose

In [13]:
# split a univariate sequence into samples
def split_sequence(sequence, n_steps_in, n_steps_out):
    X, y = list(), list()
    for i in range(len(sequence)):
        # find the end of this pattern
        end_ix = i + n_steps_in
        out_end_ix = end_ix + n_steps_out
        # check if we are beyond the sequence
        if out_end_ix > len(sequence):
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix:out_end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

def get_forcast_per_component(series, st_in, st_out, train_test_size):
    
    # split into samples
    X, y = split_sequence(series, st_in, st_out)

    n_features = 1
    X = X.reshape((X.shape[0], X.shape[1], n_features))
    
    train_X, test_X = X[:train_test_size], X[train_test_size:]
    train_y, test_y = y[:train_test_size], y[train_test_size:]

    # define model
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(st_in, n_features)))
    model.add(MaxPooling1D(pool_size=2)) 
    model.add(Flatten())
    model.add(Dense(50, activation='relu')) 
    model.add(Dense(st_out)) 
    model.compile(optimizer='adam', loss='mse', metrics=[metrics.mae, 'accuracy'])
    
    # fit model
    model.fit(train_X, train_y, epochs=150, verbose=0)
    
    # predict 
    predicted = []
    for i in range(len(test_X)):
        x_input = test_X[i].reshape(1, st_in, n_features)
        yhat = model.predict(x_input, verbose=0)
        
        #predicted.append(np.rint(yhat[0]))   
        predicted.append(np.around(yhat[0], decimals=1)) 
    predicted = np.array(predicted)
    return predicted


In [16]:
# load the data 
trs = pd.read_csv("data/transactions.csv")
trs['date'] = pd.to_datetime(trs['date'])
trs['date'] = pd.to_datetime(trs['date'])
t2day = trs['date'].value_counts().sort_values()
t2day = t2day.to_frame()
series = t2day.date

In [25]:
# decomponi i dati in trend, residual e seasonal
dati_scomp = seasonal_decompose(series, model = 'multiplicative', freq = 7)



trend = dati_scomp.trend.values
seasonal = dati_scomp.seasonal.values
residual = dati_scomp.resid.values

  dati_scomp = seasonal_decompose(series, model = 'multiplicative', freq = 7)


In [29]:
trend = trend[np.logical_not(np.isnan(trend))]
seasonal = seasonal[np.logical_not(np.isnan(seasonal))]
residual = residual[np.logical_not(np.isnan(residual))]

In [None]:
 # imposta step di previsione e chiama funzione get_forcast_per_component su ogni componente
n_steps_in, n_steps_out = 26, 26
train_test_size = 9000

forcasted_trend = get_forcast_per_component(trend, n_steps_in, n_steps_out, train_test_size)
forcasted_residual = get_forcast_per_component(residual, n_steps_in, n_steps_out, train_test_size)
forcasted_season = get_forcast_per_component(seasonal, n_steps_in, n_steps_out, train_test_size)

# combina le previsioni 
final_prediction = forcasted_trend + forcasted_residual + forcasted_season

# fai lo split del serie dati originale
X, y = split_sequence(kk['nr_people'].values, n_steps_in, n_steps_out) 

# prepare train-test della serie originale
train_X, train_y = X[:train_test_size], X[train_test_size:]
train_y, test_y = y[:train_test_size], y[train_test_size:]

# assegna a expected il valore del test set
expected = test_y

# calcola differenza (errore) tra predicted e expected 
difference = abs(expected - final_prediction)

# calcola errore medio e altre misure 
mean_error =  np.reshape(difference, difference.shape[0] * difference.shape[1])
print('Mean error', np.mean(mean_error))
    
# collect data 2 dictionary
minimum = np.amin(mean_error)   
per75 = np.percentile(mean_error, 75)
per50 = np.percentile(mean_error, 50)
per25 = np.percentile(mean_error, 25)
maximum = np.amax(mean_error)
l5i = [minimum, per25, per50, per75, maximum]
dict2data[cell] = l5i
    
MAPE = np.mean(abs(100 * (difference/expected)))
dict2MAPE[cell] = MAPE
    
with open('MAE_error_data_4_CNN_with_STL_Decomposition_in_26_out_26_period_96.csv', 'w') as f:
    for key, value in dict2data.items():
        f.write('%s:%s\n' % (key, value))
        
with open('MAPE_error_data_4_CNN_with_STL_Decomposition_in_26_out_26_period_96.csv', 'w') as f:
    for key, value in dict2MAPE.items():
        f.write('%s:%s\n' % (key, value))    