In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import math
from pyFTS.benchmarks import Measures

ModuleNotFoundError: No module named 'pyFTS'

In [None]:
import warnings 
warnings.filterwarnings("ignore", category=DeprecationWarning)

## Auxiliary Functions

In [None]:
def normalize(df):
    mindf = df.min()
    maxdf = df.max()
    return (df-mindf)/(maxdf-mindf)

In [None]:
def denormalize(norm, _min, _max):
    return [(n * (_max-_min)) + _min for n in norm]

In [None]:
def save_obj(obj, name ):
    with open('results/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name ):
    with open('results/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

## Load Dataset

In [None]:
#Set target and input variables 
target_station = 'DHHL_3'

#All neighbor stations with residual correlation greater than .90
neighbor_stations_90 = ['DHHL_3',  'DHHL_4','DHHL_5','DHHL_10','DHHL_11','DHHL_9','DHHL_2', 'DHHL_6','DHHL_7','DHHL_8']

In [7]:
df = pd.read_pickle("df_oahu.pkl")
df_ssa_clean = pd.read_pickle("df_ssa_clean.pkl")
df_ssa_residual = pd.read_pickle("df_ssa_residual.pkl")

In [8]:
## Remove columns with many corrupted or missing values
df.drop(columns=['AP_1', 'AP_7'], inplace=True)
df_ssa_clean.drop(columns=['AP_1', 'AP_7'], inplace=True)
df_ssa_residual.drop(columns=['AP_1', 'AP_7'], inplace=True)

In [9]:
# Get data form the interval of interest
interval = ((df.index >= '2010-06') & (df.index < '2010-08'))
df = df.loc[interval]
df_ssa_clean = df_ssa_clean.loc[interval]
df_ssa_residual = df_ssa_residual.loc[interval]

In [10]:
#Normalize Data

# Save Min-Max for Denorm
min_raw = df[target_station].min()
min_clean = df_ssa_clean[target_station].min()
min_residual = df_ssa_residual[target_station].min()

max_raw = df[target_station].max()
max_clean = df_ssa_clean[target_station].max()
max_residual = df_ssa_residual[target_station].max()

# Perform Normalization
norm_df_ssa_clean = normalize(df_ssa_clean)
norm_df_ssa_residual = normalize(df_ssa_residual)

## Create Nested cross-validation Indexes
Month Forward-chaining

C. Bergmeir and J. M. Benítez. On the use of cross-validation for time series predictor evaluation. Inf. Sci., 191:192–213, May 2012. ISSN 0020–0255. doi: 10.1016/j.ins.2011.12.028. URL http://dx.doi.org/10.1016/j.ins.2011.12.028.

L. J. Tashman. Out-of-sample tests of forecasting accuracy: an analysis and review. International Journal of Forecasting, 16(4):437–450, 2000. URLhttps://ideas.repec.org/a/eee/intfor/v16y2000i4p437-450.html.

## Rolling window

Training: 4 weeks
Validation: 1 week
Test: 1 week

Roliing daily

In [11]:
import datetime

In [12]:
def getRollingWindow(index):
    pivot = index
    train_start = pivot.strftime('%Y-%m-%d')
    pivot = pivot + datetime.timedelta(days=27)
    train_end = pivot.strftime('%Y-%m-%d')

    pivot = pivot + datetime.timedelta(days=1)
    validation_start = pivot.strftime('%Y-%m-%d')
    pivot = pivot + datetime.timedelta(days=6)
    validation_end = pivot.strftime('%Y-%m-%d')

    pivot = pivot + datetime.timedelta(days=1)
    test_start = pivot.strftime('%Y-%m-%d')
    pivot = pivot + datetime.timedelta(days=6)
    test_end = pivot.strftime('%Y-%m-%d')
    
    return train_start, train_end, validation_start, validation_end, test_start, test_end

In [13]:
def calculate_rolling_error(cv_name, df, forecasts, order_list):
    cv_results = pd.DataFrame(columns=['Split', 'RMSE', 'SMAPE', 'U'])

    limit = df.index[-1].strftime('%Y-%m-%d')

    test_end = ""
    index = df.index[0]

    for i in np.arange(len(forecasts)):
#        print("Index: ", index.strftime('%Y-%m-%d'))  

        train_start, train_end, validation_start, validation_end, test_start, test_end = getRollingWindow(index)
        test = df[test_start : test_end]
    
        yhat = forecasts[i]
        order = order_list[i]
        
        rmse = Measures.rmse(test[target_station].iloc[order:], yhat[:-1])
#        print("RMSE: ",rmse)
        
        smape = Measures.smape(test[target_station].iloc[order:], yhat[:-1])
#        print("SMAPE: ",smape)
        
        u = Measures.UStatistic(test[target_station].iloc[order:], yhat[:-1])
#        print("U Statistic: ",u)
       
        res = {'Split' : index.strftime('%Y-%m-%d') ,'RMSE' : rmse, 'SMAPE' : smape, 'U' : u}
        cv_results = cv_results.append(res, ignore_index=True)
        cv_results.to_csv(cv_name+".csv")        

        index = index + datetime.timedelta(days=7)

In [14]:
def reconstruct_ssa_series(clean, residual):
    return [r + c for r, c in zip(residual,clean)]

In [15]:
def get_final_forecast(forecasts_clean, forecasts_residual, order_list_clean, order_list_residual):
    
    forecasts_final = []
    order_list = []
    
    for i in np.arange(len(forecasts_clean)):
        f_clean = denormalize(forecasts_clean[i], min_clean, max_clean)
        f_residual = denormalize(forecasts_residual[i], min_residual, max_residual)

        o_clean = order_list_clean[i]
        o_residual = order_list_residual[i]

        max_order = max(o_clean, o_residual)

        f_final = reconstruct_ssa_series(f_clean[max_order-o_clean:], f_residual[max_order-o_residual:])
        
        forecasts_final.append(f_final)
        order_list.append(max_order)
        
    return forecasts_final, order_list

## Check data

In [None]:
train_start, train_end, validation_start, validation_end, test_start, test_end = getRollingWindow(df.index[0]  + datetime.timedelta(weeks=19))

In [None]:
plt.plot(df[test_start : test_end].DHHL_3)
plt.show()

In [None]:
arima_params = (2, 1, 2)
sarima_params = (1, 1, 1, 61)
fcst_clean = sarima_forecast(df_ssa_clean[train_start : train_end].DHHL_3, df_ssa_clean[test_start : test_end].DHHL_3, arima_params, sarima_params)

In [None]:
plt.plot(df_ssa_clean[test_start : test_end].DHHL_3.values)
plt.plot(fcst_clean)
plt.show()

## Persistence

In [None]:
def persistence_forecast(train, test, step):
    predictions = []
    
    for t in np.arange(0,len(test), step):
        yhat = [test.iloc[t]]  * step
        predictions.extend(yhat)
        
    return predictions

In [None]:
def rolling_cv_persistence(df, step):

    forecasts = []
    lags_list = []

    limit = df.index[-1].strftime('%Y-%m-%d')

    test_end = ""
    index = df.index[0]

    while test_end < limit :
        print("Index: ", index.strftime('%Y-%m-%d'))  

        train_start, train_end, validation_start, validation_end, test_start, test_end = getRollingWindow(index)
        index = index + datetime.timedelta(days=7)
        
        train = df[train_start : train_end]
        validation = df[validation_start : validation_end]
        test = df[test_start : test_end]
    
        # Concat train & validation for test
        train = train.append(validation)
        yhat = persistence_forecast(train[target_station], test[target_station], step)        
        
        lags_list.append(1)
        forecasts.append(yhat)

    return forecasts, lags_list

In [None]:
forecasts_clean, order_list_clean = rolling_cv_persistence(norm_df_ssa_clean, 1)
forecasts_residual, order_list_residual = rolling_cv_persistence(norm_df_ssa_residual, 1)

In [None]:
forecasts_final, order_list = get_final_forecast(forecasts_clean, forecasts_residual, order_list_clean, order_list_residual)

In [None]:
calculate_rolling_error("rolling_cv_oahu_persistence", df, forecasts_final, order_list)

## SARIMA

In [None]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
from itertools import product
import sys

In [None]:
def evaluate_SARIMA_models(test_name, train, validation, parameters_list, period_length):

    sarima_results = pd.DataFrame(columns=['Order','RMSE'])
    best_score, best_cfg = float("inf"), None

    for param in parameters_list:
        arima_order = (param[0],param[1],param[2])
        sarima_order = (param[3],param[4],param[5],period_length)
        print('Testing SARIMA%s %s ' % (str(arima_order),str(sarima_order)))
        try:
            fcst = sarima_forecast(train, validation, arima_order, sarima_order)
            rmse = Measures.rmse(validation.values, fcst)
            
            if rmse < best_score:
                best_score, best_cfg = rmse, (arima_order, sarima_order)

            res = {'Parameters' : str(param) ,'RMSE' : rmse}
            print('SARIMA%s %s RMSE=%.3f' % (str(arima_order),str(sarima_order),rmse))
            sarima_results = sarima_results.append(res, ignore_index=True)
            sarima_results.to_csv(test_name+".csv")
        except:
            print(sys.exc_info())
            print('Invalid model%s %s ' % (str(arima_order),str(sarima_order)))
            continue
    
    print('Best SARIMA(%s) RMSE=%.3f' % (best_cfg, best_score))
    return best_cfg

In [None]:
def OLD_sarima_forecast(train, test, arima_order, sarima_order):
    
    whole_data = train.append(test)
    test_data = test
    
    training_mod = SARIMAX(train.values, order=arima_order, seasonal_order=sarima_order, disp=True, trend='c')
    training_res = training_mod.fit()
    
    mod = SARIMAX(whole_data.values, order=arima_order, seasonal_order=sarima_order, trend='c')
    res = mod.filter(training_res.params)
    
    insample = res.predict()
    wlen = len(whole_data)
    tlen = len(test_data)

    predictions = insample[wlen-tlen:]
    return predictions

In [None]:
def sarima_forecast(train, test, arima_order, sarima_order):

    predictions = []
    window_size = sarima_order[3] * 5
    step = 5
    
    history = list(train.iloc[-window_size:])

    print("Fitting model at:", datetime.datetime.now())
    model = SARIMAX(history, order=arima_order, seasonal_order=sarima_order,enforce_invertibility=False,enforce_stationarity=False)
    model_fit = model.fit(disp=True,enforce_invertibility=False,enforce_stationarity=False, maxiter=100)

    #save the state parameter
    est_params = model_fit.params
    est_state = model_fit.predicted_state[:, -1]
    est_state_cov = model_fit.predicted_state_cov[:, :, -1]

    st = 0
        
    print("Forecasting at:", datetime.datetime.now())
    for t in np.arange(1,len(test)+1,step):
        obs = test.iloc[st:t].values
        history.extend(obs)
        history = history[-window_size:]
        
        mod_updated = SARIMAX(history, order=arima_order, seasonal_order=sarima_order,enforce_invertibility=False,enforce_stationarity=False)
        mod_updated.initialize_known(est_state, est_state_cov)
        mod_frcst = mod_updated.smooth(est_params)
        
        yhat = mod_frcst.forecast(step)   
        predictions.extend(yhat)
            
        est_params = mod_frcst.params
        est_state = mod_frcst.predicted_state[:, -1]
        est_state_cov = mod_frcst.predicted_state_cov[:, :, -1]
            
        st = t
    print("Forecasting complete at:", datetime.datetime.now())
                
    return predictions[:len(test)]

In [None]:
def rolling_cv_SARIMA(df, step):

#    p_values = [0,1,2]
#    d_values = [0,1]
#    q_values = [0,1,2]
#    P_values = [0,1]
#    D_Values = [0,1]
#    Q_Values = [0,1]

    p_values = [2]
    d_values = [1]
    q_values = [2]
    P_values = [1]
    D_Values = [1]
    Q_Values = [1]

    parameters = product(p_values, d_values, q_values, P_values, D_Values, Q_Values)
    parameters_list = list(parameters)
    period_length = 61 #de 5:00 as 20:00

    
    forecasts = []
    lags_list = []

    limit = df.index[-1].strftime('%Y-%m-%d')

    test_end = ""
    index = df.index[0]

    while test_end < limit :
        print("Index: ", index.strftime('%Y-%m-%d'))  

        train_start, train_end, validation_start, validation_end, test_start, test_end = getRollingWindow(index)
        index = index + datetime.timedelta(days=7)
        
        train = df[train_start : train_end]
        validation = df[validation_start : validation_end]
        test = df[test_start : test_end]
    
        # Perform grid search
        #(arima_params, sarima_params) = evaluate_SARIMA_models("nested_test_sarima_oahu", train[target_station], validation[target_station], parameters_list, period_length)
        arima_params = (2, 1, 2)
        sarima_params = (1, 1, 1, 61)
        
        # Concat train & validation for test
        train = train.append(validation)
        yhat = sarima_forecast(train[target_station], test[target_station], arima_params, sarima_params)        
        
        lags_list.append(1)
        forecasts.append(yhat)

    return forecasts, lags_list

In [None]:
forecasts_clean, order_list_clean = rolling_cv_SARIMA(norm_df_ssa_clean, 1)
forecasts_residual, order_list_residual = rolling_cv_SARIMA(norm_df_ssa_residual, 1)

In [None]:
forecasts_final, order_list = get_final_forecast(forecasts_clean, forecasts_residual, order_list_clean, order_list_residual)

In [None]:
calculate_rolling_error("rolling_cv_oahu_sarima", df, forecasts_final, order_list)

## Vector Autoregressive

In [None]:
from statsmodels.tsa.api import VAR, DynamicVAR

In [None]:
def evaluate_VAR_models(test_name, train, validation,target, maxlags_list):
    var_results = pd.DataFrame(columns=['Order','RMSE'])
    best_score, best_cfg, best_model = float("inf"), None, None
    
    for lgs in maxlags_list:
        model = VAR(train)
        results = model.fit(maxlags=lgs, ic='aic')
        
        order = results.k_ar
        forecast = []

        for i in range(len(validation)-order) :
            forecast.extend(results.forecast(validation.values[i:i+order],1))

        forecast_df = pd.DataFrame(columns=validation.columns, data=forecast)
        rmse = Measures.rmse(validation[target].iloc[order:], forecast_df[target].values)

        if rmse < best_score:
            best_score, best_cfg, best_model = rmse, order, results

        res = {'Order' : str(order) ,'RMSE' : rmse}
        print('VAR (%s)  RMSE=%.3f' % (str(order),rmse))
        var_results = var_results.append(res, ignore_index=True)
        var_results.to_csv(test_name+".csv")
        
    print('Best VAR(%s) RMSE=%.3f' % (best_cfg, best_score))
    return best_model

In [None]:
def var_forecast(train, test, target, order, step):
    model = VAR(train.values)
    results = model.fit(maxlags=order)
    lag_order = results.k_ar
    print("Lag order:" + str(lag_order))
    forecast = []

    for i in np.arange(0,len(test)-lag_order+1,step) :
        forecast.extend(results.forecast(test.values[i:i+lag_order],step))

    forecast_df = pd.DataFrame(columns=test.columns, data=forecast)
    return forecast_df[target].values

In [None]:
def rolling_cv_var(df, step):
    maxlags_list = [1,2,4,6,8,10,20,40]
    forecasts = []
    order_list = []

    limit = df.index[-1].strftime('%Y-%m-%d')

    test_end = ""
    index = df.index[0]

    while test_end < limit :
        print("Index: ", index.strftime('%Y-%m-%d'))  

        train_start, train_end, validation_start, validation_end, test_start, test_end = getRollingWindow(index)
        index = index + datetime.timedelta(days=7)
        
        train = df[train_start : train_end]
        validation = df[validation_start : validation_end]
        test = df[test_start : test_end]
    
        # Perform grid search
        best_model = evaluate_VAR_models("nested_test_var_oahu", train[neighbor_stations_90], validation[neighbor_stations_90],target_station, maxlags_list)

        # Concat train & validation for test
        train = train.append(validation)
        order = best_model.k_ar
        yhat = var_forecast(train[neighbor_stations_90], test[neighbor_stations_90], target_station, order, step)
        
        order_list.append(order)
        forecasts.append(yhat)

    return forecasts, order_list

In [None]:
forecasts_clean, order_list_clean = rolling_cv_var(norm_df_ssa_clean, 1)
forecasts_residual, order_list_residual = rolling_cv_var(norm_df_ssa_residual, 1)

In [None]:
forecasts_final, order_list = get_final_forecast(forecasts_clean, forecasts_residual, order_list_clean, order_list_residual)

In [None]:
calculate_rolling_error("rolling_cv_oahu_var", df, forecasts_final, order_list)

In [None]:
train_start, train_end, validation_start, validation_end, test_start, test_end = getRollingWindow(df.index[0])
y_pred_oahu = forecasts_final[0]
y_obs_oahu = df[test_start : test_end].DHHL_3.values

#x_date = pd.date_range("00:00", "24:00", freq="10min").strftime('%H:%M')

#xn = np.arange(len(x_date))
xn = np.arange(0,len(y_pred_oahu)-1)

fig = plt.figure(figsize=(10,5))
ax = fig.add_subplot(111)
plt.plot(xn, y_obs_oahu[order_list[0]:], label='Observed')
plt.plot(xn, y_pred_oahu[:-1], color ='orange', label='Predicted')
ax.set_xlabel('Time')
ax.set_ylabel('Irradiance [W/m2]')
ax.legend(loc='best')
ticks = [10,70,130,190,250,310,370]
ax.set_xticks(ticks)

ax.set_xticklabels(['06-Jul','07-Jul','08-Jul','09-Jul','10-Jul','11-Jul','12-Jul'])
plt.show()
fig.savefig("plot_oahu_ssa", bbox_inches='tight')

## High Order FTS

In [None]:
from pyFTS.partitioners import Grid, Entropy, Util as pUtil
from pyFTS.models import hofts

In [3]:
def evaluate_hofts_models(test_name, train, validation, partitioners_list, order_list, partitions_list):
    
    hofts_results = pd.DataFrame(columns=['Partitioner','Partitions','Order','RMSE'])
    best_score, best_cfg, best_model = float("inf"), None, None

    for _partitioner in partitioners_list:
        for _order in order_list:
            for npartitions in partitions_list:
                print('HOFTS %s - %s - %s' % (str(_partitioner), npartitions, str(_order)))
                fuzzy_sets = _partitioner(data=train.values, npart=npartitions)
                model_simple_hofts = hofts.HighOrderFTS(order=_order)

                model_simple_hofts.fit(train.values, order=_order, partitioner=fuzzy_sets)
                
                forecast = model_simple_hofts.predict(validation.values)
                rmse = Measures.rmse(validation.iloc[_order:], forecast[:-1])

                if rmse < best_score:
                    best_score, best_cfg = rmse, (_order,npartitions,_partitioner)
                    best_model = model_simple_hofts

                res = {'Partitioner':str(_partitioner), 'Partitions':npartitions, 'Order' : str(_order) ,'RMSE' : rmse}
                print('HOFTS %s - %s - %s  RMSE=%.3f' % (str(_partitioner), npartitions, str(_order),rmse))
                hofts_results = hofts_results.append(res, ignore_index=True)
                hofts_results.to_csv(test_name+".csv")

    print('Best HOFTS(%s) RMSE=%.3f' % (best_cfg, best_score))
    
    return best_cfg

In [None]:
def hofts_forecast(train_df, test_df, _order, _partitioner, _npartitions):
    
    fuzzy_sets = _partitioner(data=train_df.values, npart=_npartitions)
    model_simple_hofts = hofts.HighOrderFTS()
    

    model_simple_hofts.fit(train_df.values, order=_order, partitioner=fuzzy_sets)

    
    forecast = model_simple_hofts.predict(test_df.values)

    return forecast

In [None]:
def rolling_cv_hofts(df, step):
    
    partitioners_list = [Grid.GridPartitioner, Entropy.EntropyPartitioner]
    eval_order_list = np.arange(1,3)
    partitions_list = np.arange(10,100,10)
    
    forecasts = []
    order_list = []

    limit = df.index[-1].strftime('%Y-%m-%d')

    test_end = ""
    index = df.index[0]

    while test_end < limit :
        print("Index: ", index.strftime('%Y-%m-%d'))  

        train_start, train_end, validation_start, validation_end, test_start, test_end = getRollingWindow(index)
        index = index + datetime.timedelta(days=1)
        
        train = df[train_start : train_end]
        validation = df[validation_start : validation_end]
        test = df[test_start : test_end]

        # Perform grid search
        (order,nparts,partitioner) = evaluate_hofts_models("nested_eval_hofts_oahu", train[target_station], validation[target_station], partitioners_list, eval_order_list, partitions_list)

        # Concat train & validation for test
        train = train.append(validation)

        # Perform forecast
        yhat = hofts_forecast(train[target_station], test[target_station], order, partitioner, nparts)
        
        order_list.append(order)
        forecasts.append(yhat)

    return forecasts, order_list

In [None]:
forecasts_clean, order_list_clean = rolling_cv_hofts(norm_df_ssa_clean, 1)
forecasts_residual, order_list_residual = rolling_cv_hofts(norm_df_ssa_residual, 1)

In [None]:
forecasts_final, order_list = get_final_forecast(forecasts_clean, forecasts_residual, order_list_clean, order_list_residual)

In [None]:
calculate_rolling_error("rolling_cv_oahu_hofts", df, forecasts_final, order_list)

## Conditional Variance FTS

In [None]:
from pyFTS.models.nonstationary import cvfts
from pyFTS.models.nonstationary import partitioners as nspartitioners

In [None]:
def evaluate_cvfts_models(test_name, train, validation, partitions_list):
    
    cvfts_results = pd.DataFrame(columns=['Partitions','RMSE'])
    best_score, best_cfg, best_model = float("inf"), None, None

    for npartitions in partitions_list:
                
        fuzzy_sets =  nspartitioners.PolynomialNonStationaryPartitioner(data=train.values, part=Grid.GridPartitioner(data=train.values, npart=npartitions), degree=2)
                
        model_cvfts = cvfts.ConditionalVarianceFTS()
        model_cvfts.fit(train.values, parameters=1, partitioner=fuzzy_sets, num_batches=1000)
                                
        forecast = model_cvfts.predict(validation.values)
        rmse = Measures.rmse(validation.iloc[1:], forecast[:-1])

        if rmse < best_score:
            best_score, best_cfg = rmse, npartitions
            best_model = model_cvfts

        res = {'Partitions':npartitions, 'RMSE' : rmse}
        print('CVFTS %s -  RMSE=%.3f' % (npartitions, rmse))
        cvfts_results = cvfts_results.append(res, ignore_index=True)
        cvfts_results.to_csv(test_name+".csv")

    print('Best CVFTS(%s) RMSE=%.3f' % (best_cfg, best_score))
    
    return best_cfg

In [None]:
def cvfts_forecast(train, test, _partitions):
    
    fuzzy_sets =  nspartitioners.PolynomialNonStationaryPartitioner(data=train.values, part=Grid.GridPartitioner(data=train.values, npart=_partitions), degree=2)
                    
    model_cvfts = cvfts.ConditionalVarianceFTS()
    model_cvfts.fit(train.values, parameters=1, partitioner=fuzzy_sets, num_batches=1000)

    forecast = model_cvfts.predict(test.values)

    return forecast

In [None]:
def rolling_cv_cvfts(df, step):
    
    partitions_list = np.arange(80,100,10)
    
    forecasts = []

    limit = df.index[-1].strftime('%Y-%m-%d')

    test_end = ""
    index = df.index[0]

    while test_end < limit :
        print("Index: ", index.strftime('%Y-%m-%d'))  

        train_start, train_end, validation_start, validation_end, test_start, test_end = getRollingWindow(index)
        index = index + datetime.timedelta(days=7)
        
        train = df[train_start : train_end]
        validation = df[validation_start : validation_end]
        test = df[test_start : test_end]

        # Perform grid search
        nparts = evaluate_cvfts_models("nested_eval_cvfts_oahu", train[target_station], validation[target_station], partitions_list)

        # Concat train & validation for test
        train = train.append(validation)

        # Perform forecast
        yhat = cvfts_forecast(train[target_station], test[target_station],nparts)
        
        order_list.append(1)
        forecasts.append(yhat)

    return forecasts, order_list

In [None]:
forecasts_clean, order_list_clean = rolling_cv_cvfts(norm_df_ssa_clean, 1)
forecasts_residual, order_list_residual = rolling_cv_cvfts(norm_df_ssa_residual, 1)

In [None]:
forecasts_final, order_list = get_final_forecast(forecasts_clean, forecasts_residual, order_list_clean, order_list_residual)

In [None]:
calculate_rolling_error("rolling_cv_oahu_cvfts", df, forecasts_final, order_list)

## Clustered Multivariate FTS

In [None]:
!pip install -U git+https://github.com/cseveriano/spatio-temporal-forecasting

In [32]:
import imp
imp.reload(sthofts)

<module 'models.sthofts' from 'C:\\Users\\cseve\\Anaconda3\\lib\\site-packages\\models\\sthofts.py'>

In [33]:
from models import KMeansPartitioner
from models import sthofts

In [34]:
def cmvfts_forecast(train_df, test_df, target, _order, npartitions):
    
    print("KMean Partition at:", datetime.datetime.now())

    _partitioner = KMeansPartitioner.KMeansPartitioner(data=train_df.values, npart=npartitions, batch_size=1000, init_size=npartitions*3)

    model_sthofts = sthofts.SpatioTemporalHighOrderFTS()
    
    print("CMVFTS fit at:", datetime.datetime.now())
    model_sthofts.fit(train_df.values, order=_order, partitioner=_partitioner)
    
    print("CMVFTS prediction at:", datetime.datetime.now())
    forecast = model_sthofts.predict(test_df.values)
    forecast_df = pd.DataFrame(data=forecast, columns=test_df.columns)
    return forecast_df[target].values

In [35]:
def evaluate_cmvfts_models(test_name, train, validation, order_list, partitions_list):
    
    cmvfts_results = pd.DataFrame(columns=['Partitions','Order','RMSE'])
    best_score, best_cfg = float("inf"), None

    for _order in order_list:
        for npartitions in partitions_list:
            
            forecast = cmvfts_forecast(train, validation, target_station, _order, npartitions)
            rmse = Measures.rmse(validation[target_station].iloc[_order:], forecast[:-1])

            if rmse < best_score:
                best_score, best_cfg = rmse, (_order,npartitions)

            res = {'Partitions':npartitions, 'Order' : str(_order) ,'RMSE' : rmse}
            print('CMVFTS %s - %s  RMSE=%.3f' % (npartitions, str(_order),rmse))
            cmvfts_results = cmvfts_results.append(res, ignore_index=True)
            cmvfts_results.to_csv(test_name+".csv")

    print('Best CMVFTS(%s) RMSE=%.3f' % (best_cfg, best_score))
    
    return best_cfg

In [36]:
def rolling_cv_cmvfts(df, step):
    
    #eval_order_list = np.arange(1,3)
    eval_order_list = [2]
#    partitions_list = np.arange(80,110,10)
#    partitions_list = [20,30]
    partitions_list = [80]
    
    forecasts = []
    order_list = []

    limit = df.index[-1].strftime('%Y-%m-%d')

    test_end = ""
    index = df.index[0]

    while test_end < limit :
        print("Index: ", index.strftime('%Y-%m-%d'))  

        train_start, train_end, validation_start, validation_end, test_start, test_end = getRollingWindow(index)
        index = index + datetime.timedelta(days=7)
        
        train = df[train_start : train_end]
        validation = df[validation_start : validation_end]
        test = df[test_start : test_end]

        # Perform grid search
        (order,nparts) = evaluate_cmvfts_models("nested_eval_cmvfts_oahu", train[neighbor_stations_90], validation[neighbor_stations_90], eval_order_list, partitions_list)

        # Concat train & validation for test
        train = train.append(validation)

        # Perform forecast
        yhat = cmvfts_forecast(train[neighbor_stations_90], test[neighbor_stations_90],target_station, order, nparts)
        
        order_list.append(order)
        forecasts.append(yhat)

    return forecasts, order_list

In [None]:
forecasts_clean, order_list_clean = rolling_cv_cmvfts(norm_df_ssa_clean, 1)
forecasts_residual, order_list_residual = rolling_cv_cmvfts(norm_df_ssa_residual, 1)

Index:  2010-06-01
KMean Partition at: 2018-08-17 10:27:38.086219
CMVFTS fit at: 2018-08-17 10:27:38.335553
CMVFTS prediction at: 2018-08-17 10:34:38.660364
New forecast with membership
CMVFTS 80 - 2  RMSE=0.018
Best CMVFTS((2, 80)) RMSE=0.018
KMean Partition at: 2018-08-17 10:36:22.035763
CMVFTS fit at: 2018-08-17 10:36:22.158439
CMVFTS prediction at: 2018-08-17 10:46:23.265118
New forecast with membership
Index:  2010-06-08
KMean Partition at: 2018-08-17 10:47:31.254263
CMVFTS fit at: 2018-08-17 10:47:31.361915
CMVFTS prediction at: 2018-08-17 10:54:26.039490
New forecast with membership
CMVFTS 80 - 2  RMSE=0.017
Best CMVFTS((2, 80)) RMSE=0.017
KMean Partition at: 2018-08-17 10:55:34.475596
CMVFTS fit at: 2018-08-17 10:55:34.626190
CMVFTS prediction at: 2018-08-17 11:03:54.567306
New forecast with membership
Index:  2010-06-15
KMean Partition at: 2018-08-17 11:05:01.862509
CMVFTS fit at: 2018-08-17 11:05:02.001321


In [24]:
forecasts_final, order_list = get_final_forecast(forecasts_clean, forecasts_residual, order_list_clean, order_list_residual)
calculate_rolling_error("rolling_cv_oahu_ssa_cmvfts-TEST-nomembership", df, forecasts_final, order_list)

In [26]:
train_start, train_end, validation_start, validation_end, test_start, test_end = getRollingWindow(df.index[0])

In [29]:
df[test_start : test_end].DHHL_3.values

array([0.00000000e+00, 0.00000000e+00, 1.79391160e-01, 2.12541840e+00,
       6.94642440e+00, 2.02229386e+01, 2.48435741e+01, 5.18624013e+01,
       1.24123383e+02, 1.17993704e+02, 1.41461509e+02, 2.60756724e+02,
       4.20528198e+02, 4.68190023e+02, 4.82245572e+02, 5.97745613e+02,
       6.52623933e+02, 6.66029313e+02, 5.55528980e+02, 7.41387598e+02,
       4.46944939e+02, 6.11650233e+02, 4.28221159e+02, 3.77150701e+02,
       6.42719262e+02, 5.68548517e+02, 4.95615063e+02, 5.92335313e+02,
       2.46854434e+02, 3.17784821e+02, 7.10079530e+02, 4.11159989e+02,
       9.15244491e+02, 6.23574118e+02, 5.96034033e+02, 3.99511503e+02,
       9.26209347e+02, 9.05101853e+02, 8.82787867e+02, 8.55001248e+02,
       1.00219136e+03, 9.21208182e+02, 8.14409070e+02, 7.47384484e+02,
       6.77385716e+02, 6.15338756e+02, 5.55774688e+02, 5.16017944e+02,
       3.67251260e+02, 3.74723090e+02, 3.29060077e+02, 1.73392541e+02,
       9.06996940e+01, 3.23340144e+01, 8.07244078e+01, 4.73759467e+01,
      

In [31]:
plt.plot(df[test_start : test_end].DHHL_3.values)
plt.plot(forecasts_final[0])
plt.show()

## LSTM - Multivariate

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.constraints import maxnorm

In [None]:
# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [None]:
def lstm_multi_forecast(train_df, test_df, _order, _steps, _neurons, _epochs):

    
    nfeat = len(train_df.columns)
    nlags = _order
    nsteps = _steps
    nobs = nlags * nfeat
    
    train_reshaped_df = series_to_supervised(train_df, n_in=nlags, n_out=nsteps)
    train_X, train_Y = train_reshaped_df.iloc[:,:nobs].values, train_reshaped_df.iloc[:,-nfeat].values
    train_X = train_X.reshape((train_X.shape[0], nlags, nfeat))

    test_reshaped_df = series_to_supervised(test_df, n_in=nlags, n_out=nsteps)
    test_X, test_Y = test_reshaped_df.iloc[:,:nobs].values, test_reshaped_df.iloc[:,-nfeat].values
    test_X = test_X.reshape((test_X.shape[0], nlags, nfeat))
    
    # design network
    model = Sequential()
    model.add(LSTM(_neurons, input_shape=(train_X.shape[1], train_X.shape[2])))
    model.add(Dense(1))
    model.compile(loss='mae', optimizer='adam')

    # design network
#    model = Sequential()
#    model.add(LSTM(_neurons, return_sequences=True, input_shape=(train_X.shape[1], train_X.shape[2]), kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
#    model.add(LSTM(_neurons, return_sequences=True, input_shape=(train_X.shape[1], train_X.shape[2]), kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
#    model.add(LSTM(_neurons, return_sequences=True, input_shape=(train_X.shape[1], train_X.shape[2]), kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
#    model.add(LSTM(_neurons, input_shape=(train_X.shape[1], train_X.shape[2]), kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
#    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
#    model.compile(loss='mae', optimizer='adam')

#    model = Sequential()
#    _dropout = 0.1
#    model.add(LSTM(_neurons, return_sequences=True, input_shape=(train_X.shape[1], train_X.shape[2]), kernel_initializer='normal', activation='relu', dropout=_dropout, kernel_constraint=maxnorm(3)))
#    model.add(LSTM(_neurons, return_sequences=True, input_shape=(train_X.shape[1], train_X.shape[2]), kernel_initializer='normal', activation='relu', dropout=_dropout, kernel_constraint=maxnorm(3)))
#    model.add(LSTM(_neurons, return_sequences=True, input_shape=(train_X.shape[1], train_X.shape[2]), kernel_initializer='normal', activation='relu', dropout=_dropout, kernel_constraint=maxnorm(3)))
#    model.add(LSTM(_neurons, input_shape=(train_X.shape[1], train_X.shape[2]), kernel_initializer='normal', activation='relu', dropout=_dropout, kernel_constraint=maxnorm(3)))
#    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
#    model.compile(loss='mae', optimizer='adam')

    # fit network
    model.fit(train_X, train_Y, epochs=_epochs, batch_size=1000, verbose=False, shuffle=False)
    
    forecast = model.predict(test_X)
    
    fcst = [f[0] for f in forecast]

    return fcst

In [None]:
def OLD_evaluate_lstm_models(test_name, train, validation, order_list, neurons_list, epochs_list):
    
    lstm_results = pd.DataFrame(columns=['Order','Neurons','Epochs','RMSE'])
    best_score, best_cfg = float("inf"), None

    for _order in order_list:
        for _neurons in neurons_list:
            for _epochs in epochs_list:
                forecast = lstm_multi_forecast(train, validation, _order, 1, _neurons, _epochs)

                obs = validation[target_station].values
                rmse = Measures.rmse(obs[_order:], forecast)

                if rmse < best_score:
                    best_score, best_cfg = rmse, (_order,_neurons,_epochs)

                res = {'Order' : str(_order) ,'Neurons' : str(_neurons) ,'Epochs' : str(_epochs) ,'RMSE' : rmse}
                print('LSTM %s - %s - %s  RMSE=%.3f' % (str(_order), _neurons, str(_epochs),rmse))
                lstm_results = lstm_results.append(res, ignore_index=True)
                lstm_results.to_csv(test_name+".csv")

    print('Best LSTM(%s) RMSE=%.3f' % (best_cfg, best_score))
    
    return best_cfg

In [None]:
def evaluate_multivariate_lstm_models(test_name, train_df, validation_df, neurons_list, order_list, epochs_list):
    
    lstm_results = pd.DataFrame(columns=['Neurons','Order','Epochs','RMSE'])
    best_score, best_cfg = float("inf"), None
    
    nfeat = len(train_df.columns)
    nsteps = 1
    
    for _neurons in neurons_list:
        for _order in order_list:
            for epochs in epochs_list:
                    
                    nobs = nfeat * _order
                    
                    train_reshaped_df = series_to_supervised(train_df, n_in=_order, n_out=nsteps)
                    train_X, train_Y = train_reshaped_df.iloc[:,:nobs].values, train_reshaped_df.iloc[:,-nfeat].values
                    train_X = train_X.reshape((train_X.shape[0], _order, nfeat))                    
                    
                    val_reshaped_df = series_to_supervised(validation_df, n_in=_order, n_out=nsteps)
                    validation_X, validation_Y = val_reshaped_df.iloc[:,:nobs].values, val_reshaped_df.iloc[:,-nfeat].values
                    validation_X = validation_X.reshape((validation_X.shape[0], _order, nfeat))
                    
                    # design network
                    model = Sequential()
                    model.add(LSTM(_neurons, input_shape=(train_X.shape[1], train_X.shape[2])))
                    model.add(Dense(1))
                    model.compile(loss='mae', optimizer='adam')
 
                    # design network
                    #model = Sequential()
                    #model.add(LSTM(_neurons, return_sequences=True, input_shape=(train_X.shape[1], train_X.shape[2]), kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
                    #model.add(LSTM(_neurons, return_sequences=True, input_shape=(train_X.shape[1], train_X.shape[2]), kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
                    #model.add(LSTM(_neurons, return_sequences=True, input_shape=(train_X.shape[1], train_X.shape[2]), kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
                    #model.add(LSTM(_neurons, input_shape=(train_X.shape[1], train_X.shape[2]), kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
                    #model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
                    #model.compile(loss='mae', optimizer='adam')
                    

                    # fit network
                    history = model.fit(train_X, train_Y, epochs=epochs, batch_size=1000, verbose=False, shuffle=False)
                    forecast = model.predict(validation_X)
                    fcst = [f[0] for f in forecast]
                    
                    
                    rmse = Measures.rmse(validation_Y, fcst)
                    #rmse = math.sqrt(mean_squared_error(validation_Y, forecast))
                    
                    params = (_neurons, _order,epochs)
                    if rmse < best_score:
                        best_score, best_cfg = rmse, params

                    res = {'Neurons':_neurons, 'Order':_order, 'Epochs' : epochs ,'RMSE' : rmse}
                    print('LSTM %s  RMSE=%.3f' % (params,rmse))
                    lstm_results = lstm_results.append(res, ignore_index=True)
                    lstm_results.to_csv(test_name+".csv")

    print('Best LSTM(%s) RMSE=%.3f' % (best_cfg, best_score))
    return best_cfg

In [None]:
def rolling_cv_lstm_multi(df, step):
    
    neurons_list = np.arange(50,110,50)
    order_list = np.arange(2,4)
    epochs_list = [100]

    neurons_list = [5]
    order_list = [2]
    epochs_list = [1]

    lags_list = []
    forecasts = []

    limit = df.index[-1].strftime('%Y-%m-%d')

    test_end = ""
    index = df.index[0]

    while test_end < limit :
        print("Index: ", index.strftime('%Y-%m-%d'))  

        train_start, train_end, validation_start, validation_end, test_start, test_end = getRollingWindow(index)
        index = index + datetime.timedelta(days=7)
        
        train = df[train_start : train_end]
        validation = df[validation_start : validation_end]
        test = df[test_start : test_end]

        # Perform grid search
        (_neurons, _order,epochs) = evaluate_multivariate_lstm_models("nested_eval_lstm_multi_oahu", train[neighbor_stations_90], validation[neighbor_stations_90], neurons_list, order_list, epochs_list)

        # Concat train & validation for test
        train = train.append(validation)

        # Perform forecast
        yhat = lstm_multi_forecast(train[neighbor_stations_90], test[neighbor_stations_90], _order, 1, _neurons,epochs)
        
        yhat.append(0) #para manter o formato do vetor de metricas
        
        lags_list.append(_order)
        forecasts.append(yhat)

    return forecasts, lags_list

In [None]:
forecasts_clean, order_list_clean = rolling_cv_lstm_multi(norm_df_ssa_clean, 1)
forecasts_residual, order_list_residual = rolling_cv_lstm_multi(norm_df_ssa_residual, 1)

In [None]:
forecasts_final, order_list = get_final_forecast(forecasts_clean, forecasts_residual, order_list_clean, order_list_residual)

In [None]:
calculate_rolling_error("rolling_cv_oahu_lstm_multi", df, forecasts_final, order_list)

## LSTM Univariate

In [None]:
def rolling_cv_lstm_uni(df, step):
    
    neurons_list = np.arange(50,110,50)
    order_list = np.arange(2,4)
    epochs_list = [100]

    neurons_list = [5]
    order_list = [2]
    epochs_list = [1]

    lags_list = []
    forecasts = []

    limit = df.index[-1].strftime('%Y-%m-%d')

    test_end = ""
    index = df.index[0]

    while test_end < limit :
        print("Index: ", index.strftime('%Y-%m-%d'))  

        train_start, train_end, validation_start, validation_end, test_start, test_end = getRollingWindow(index)
        index = index + datetime.timedelta(days=7)
        
        train = df[train_start : train_end]
        validation = df[validation_start : validation_end]
        test = df[test_start : test_end]

        # Perform grid search
        (_neurons, _order,epochs) = evaluate_multivariate_lstm_models("nested_eval_lstm_multi_oahu", train[[target_station]], validation[[target_station]], neurons_list, order_list, epochs_list)

        # Concat train & validation for test
        train = train.append(validation)

        # Perform forecast
        yhat = lstm_multi_forecast(train[[target_station]], test[[target_station]], _order, 1, _neurons,epochs)
        
        yhat.append(0) #para manter o formato do vetor de metricas
        
        lags_list.append(_order)
        forecasts.append(yhat)

    return forecasts, lags_list

In [None]:
forecasts_clean, order_list_clean = rolling_cv_lstm_uni(norm_df_ssa_clean, 1)
forecasts_residual, order_list_residual = rolling_cv_lstm_uni(norm_df_ssa_residual, 1)

In [None]:
forecasts_final, order_list = get_final_forecast(forecasts_clean, forecasts_residual, order_list_clean, order_list_residual)

In [None]:
calculate_rolling_error("rolling_cv_oahu_lstm_uni", df, forecasts_final, order_list)

## MLP Multivariate

In [None]:
def mlp_multi_forecast(train_df, test_df, _order, _steps, _neurons, _epochs):

    
    nfeat = len(train_df.columns)
    nlags = _order
    nsteps = _steps
    nobs = nlags * nfeat
    
    train_reshaped_df = series_to_supervised(train_df, n_in=nlags, n_out=nsteps)
    train_X, train_Y = train_reshaped_df.iloc[:,:nobs].values, train_reshaped_df.iloc[:,-nfeat].values
    
    test_reshaped_df = series_to_supervised(test_df, n_in=nlags, n_out=nsteps)
    test_X, test_Y = test_reshaped_df.iloc[:,:nobs].values, test_reshaped_df.iloc[:,-nfeat].values
    
    # design network
    model = designMLPNetwork(_neurons,train_X.shape[1])
    
    # fit network
    model.fit(train_X, train_Y, epochs=_epochs, batch_size=1000, verbose=False, shuffle=False)
    
    forecast = model.predict(test_X)
    
    fcst = [f[0] for f in forecast]

    return fcst

In [None]:
def evaluate_multivariate_mlp_models(test_name, train_df, validation_df, neurons_list, order_list, epochs_list):
    
    lstm_results = pd.DataFrame(columns=['Neurons','Order','Epochs','RMSE'])
    best_score, best_cfg = float("inf"), None
    
    nfeat = len(train_df.columns)
    nsteps = 1
    
    for _neurons in neurons_list:
        for _order in order_list:
            for epochs in epochs_list:
                    
                    nobs = nfeat * _order
                    
                    train_reshaped_df = series_to_supervised(train_df, n_in=_order, n_out=nsteps)
                    train_X, train_Y = train_reshaped_df.iloc[:,:nobs].values, train_reshaped_df.iloc[:,-nfeat].values
                    
                    val_reshaped_df = series_to_supervised(validation_df, n_in=_order, n_out=nsteps)
                    validation_X, validation_Y = val_reshaped_df.iloc[:,:nobs].values, val_reshaped_df.iloc[:,-nfeat].values
                   
                    model = designMLPNetwork(_neurons,train_X.shape[1])
                                        
                    # fit network
                    history = model.fit(train_X, train_Y, epochs=epochs, batch_size=1000, verbose=False, shuffle=False)
                    forecast = model.predict(validation_X)
                    fcst = [f[0] for f in forecast]
                    
                    
                    rmse = Measures.rmse(validation_Y, fcst)
                    #rmse = math.sqrt(mean_squared_error(validation_Y, forecast))
                    
                    params = (_neurons, _order,epochs)
                    if rmse < best_score:
                        best_score, best_cfg = rmse, params

                    res = {'Neurons':_neurons, 'Order':_order, 'Epochs' : epochs ,'RMSE' : rmse}
                    print('LSTM %s  RMSE=%.3f' % (params,rmse))
                    lstm_results = lstm_results.append(res, ignore_index=True)
                    lstm_results.to_csv(test_name+".csv")

    print('Best MLP(%s) RMSE=%.3f' % (best_cfg, best_score))
    return best_cfg

In [None]:
def designMLPNetwork(neurons, shape):

    model = Sequential()
    model.add(Dense(neurons, activation='relu', input_dim=shape))
    model.add(Dense(neurons, activation='relu'))
    model.add(Dense(neurons, activation='relu'))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')

    return model

In [None]:
def rolling_cv_mlp_multi(df, step):
    
    neurons_list = np.arange(50,110,50)
    order_list = np.arange(2,4)
    epochs_list = [100]

    neurons_list = [50]
    order_list = [4]
    epochs_list = [500]

    lags_list = []
    forecasts = []

    limit = df.index[-1].strftime('%Y-%m-%d')

    test_end = ""
    index = df.index[0]

    while test_end < limit :
        print("Index: ", index.strftime('%Y-%m-%d'))  

        train_start, train_end, validation_start, validation_end, test_start, test_end = getRollingWindow(index)
        index = index + datetime.timedelta(days=7)
        
        train = df[train_start : train_end]
        validation = df[validation_start : validation_end]
        test = df[test_start : test_end]

        # Perform grid search
        (_neurons, _order,epochs) = evaluate_multivariate_mlp_models("nested_eval_mlp_multi_oahu", train[neighbor_stations_90], validation[neighbor_stations_90], neurons_list, order_list, epochs_list)

        # Concat train & validation for test
        train = train.append(validation)

        # Perform forecast
        yhat = mlp_multi_forecast(train[neighbor_stations_90], test[neighbor_stations_90], _order, 1, _neurons,epochs)
        
        yhat.append(0) #para manter o formato do vetor de metricas
        
        lags_list.append(_order)
        forecasts.append(yhat)

    return forecasts, lags_list

In [None]:
forecasts_clean, order_list_clean = rolling_cv_mlp_multi(norm_df_ssa_clean, 1)
forecasts_residual, order_list_residual = rolling_cv_mlp_multi(norm_df_ssa_residual, 1)

In [None]:
forecasts_final, order_list = get_final_forecast(forecasts_clean, forecasts_residual, order_list_clean, order_list_residual)
calculate_rolling_error("rolling_cv_oahu_mlp_multi", df, forecasts_final, order_list)

## MLP Univariate

In [None]:
def rolling_cv_mlp_uni(df, step):
    
    neurons_list = [50]
    order_list = [4]
    epochs_list = [500]

    lags_list = []
    forecasts = []

    limit = df.index[-1].strftime('%Y-%m-%d')

    test_end = ""
    index = df.index[0]

    while test_end < limit :
        print("Index: ", index.strftime('%Y-%m-%d'))  

        train_start, train_end, validation_start, validation_end, test_start, test_end = getRollingWindow(index)
        index = index + datetime.timedelta(days=7)
        
        train = df[train_start : train_end]
        validation = df[validation_start : validation_end]
        test = df[test_start : test_end]

        # Perform grid search
        (_neurons, _order,epochs) = evaluate_multivariate_mlp_models("nested_eval_mlp_uni_oahu", train[[target_station]], validation[[target_station]], neurons_list, order_list, epochs_list)

        # Concat train & validation for test
        train = train.append(validation)

        # Perform forecast
        yhat = mlp_multi_forecast(train[[target_station]], test[[target_station]], _order, 1, _neurons,epochs)
        
        yhat.append(0) #para manter o formato do vetor de metricas
        
        lags_list.append(_order)
        forecasts.append(yhat)

    return forecasts, lags_list

In [None]:
forecasts_clean, order_list_clean = rolling_cv_mlp_uni(norm_df_ssa_clean, 1)
forecasts_residual, order_list_residual = rolling_cv_mlp_uni(norm_df_ssa_residual, 1)

In [None]:
forecasts_final, order_list = get_final_forecast(forecasts_clean, forecasts_residual, order_list_clean, order_list_residual)
calculate_rolling_error("rolling_cv_oahu_mlp_uni", df, forecasts_final, order_list)