In [1]:
from sklearn.cluster import MeanShift, estimate_bandwidth

import pandas as pd
import numpy as np
from models import KMeansPartitioner
from sklearn import preprocessing
from pyFTS.partitioners import Grid, Util as pUtil
from pyFTS.models import hofts

from models import sthofts
import matplotlib.pyplot as plt
import datetime
import dispy
import math
from sklearn.metrics import mean_squared_error

In [2]:
!pip install -U git+https://github.com/petroniocandido/pyFTS

Collecting git+https://github.com/petroniocandido/pyFTS
  Cloning https://github.com/petroniocandido/pyFTS to /private/var/folders/13/t7d8w0nd0hv6w9_p2rntvym00000gr/T/pip-req-build-qwajmn_5
Building wheels for collected packages: pyFTS
  Running setup.py bdist_wheel for pyFTS ... [?25ldone
[?25h  Stored in directory: /private/var/folders/13/t7d8w0nd0hv6w9_p2rntvym00000gr/T/pip-ephem-wheel-cache-___riyly/wheels/84/d7/1e/a333c7128f25b347640740859808db094c4478e98663cd2297
Successfully built pyFTS
Installing collected packages: pyFTS
  Found existing installation: pyFTS 1.2.2
    Uninstalling pyFTS-1.2.2:
      Successfully uninstalled pyFTS-1.2.2
Successfully installed pyFTS-1.2.2


## Auxiliary Functions

In [3]:
def normalize(df):
    mindf = df.min()
    maxdf = df.max()
    return (df-mindf)/(maxdf-mindf)

In [4]:
def split_data(df, df_clean, df_residual, interval):
    sample_df = df.loc[interval]
    residual_sample_df = df_residual.loc[interval]
    clean_sample_df = df_clean.loc[interval]

    norm_residual_sample_df = normalize(residual_sample_df)
    norm_clean_sample_df = normalize(clean_sample_df)


    week = (sample_df.index.day - 1) // 7 + 1

    # PARA OS TESTES:
    # 2 SEMANAS PARA TREINAMENTO
    train_df = sample_df.loc[week <= 2]
    train_residual_df = norm_residual_sample_df.loc[week <= 2]
    train_clean_df = norm_clean_sample_df.loc[week <= 2]

    # 1 SEMANA PARA VALIDACAO
    validation_df = sample_df.loc[week == 3]
    validation_residual_df = norm_residual_sample_df.loc[week == 3]
    validation_clean_df = norm_clean_sample_df.loc[week == 3]

    # 1 SEMANA PARA TESTES
    test_df = sample_df.loc[week > 3]
    test_residual_df = norm_residual_sample_df.loc[week > 3]
    test_clean_df = norm_clean_sample_df.loc[week > 3]
    
    return (train_df, train_clean_df, train_residual_df, validation_df, validation_clean_df, validation_residual_df, test_df, test_clean_df, test_residual_df)

## Loading Data

In [5]:
df = pd.read_pickle("df_wind_speed.pkl")
df_ssa_clean = pd.read_pickle("df_wind_speed_ssa_clean.pkl")
df_ssa_residual = pd.read_pickle("df_wind_speed_ssa_residual.pkl")

Entire data split by season 

In [6]:
interval = ((df.index >= '2017-05') & (df.index <= '2017-06'))
(train_df, train_clean_df, train_residual_df, 
 validation_df, validation_clean_df, validation_residual_df, 
 test_df, test_clean_df, test_residual_df) = split_data(df, df_ssa_clean, df_ssa_residual, interval)

In [7]:
target_station = 'WTG01'

In [8]:
#All neighbor stations with residual correlation greater than .90
neighbor_stations_90 = ['WTG01','WTG02','WTG03','WTG05','WTG06']

## Parameter Tuning

## SARIMA

In [13]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
from itertools import product

  from pandas.core import datetools


In [14]:
# evaluate an ARIMA model for a given order (p,d,q)
from statsmodels.tsa.statespace.sarimax import SARIMAX
import math
from sklearn.metrics import mean_squared_error

def evaluate_sarima_model(train, validation, arima_order, sarima_order):
    
    whole_data = train.append(validation)
    test_data = validation
    
    training_mod = SARIMAX(whole_data.values, order=arima_order, seasonal_order=sarima_order, disp=True)
    training_res = training_mod.fit()
    
    mod = SARIMAX(whole_data.values, order=arima_order, seasonal_order=sarima_order)
    res = mod.filter(training_res.params)
    
    insample = res.predict()
    wlen = len(whole_data)
    tlen = len(test_data)

    predictions = insample[wlen-tlen:]    
    # calculate out of sample error
    error = math.sqrt(mean_squared_error(validation, predictions))
    return error

In [15]:
# evaluate combinations of p, d and q values for an ARIMA model
import sys
def evaluate_sarima_models(test_name, train_df, validation_df, parameters_list, period_length):

    sarima_results = pd.DataFrame(columns=['Order','RMSE'])
    best_score, best_cfg = float("inf"), None

    for param in parameters_list:
        arima_order = (param[0],param[1],param[2])
        sarima_order = (param[3],param[4],param[5],period_length)
        print('Testing SARIMA%s %s ' % (str(arima_order),str(sarima_order)))
        try:
            rmse = evaluate_sarima_model(train_df, validation_df, arima_order, sarima_order)
            if rmse < best_score:
                best_score, best_cfg = rmse, param

            res = {'Order' : str(param) ,'RMSE' : rmse}
            print('SARIMA%s %s RMSE=%.3f' % (str(arima_order),str(sarima_order),rmse))
            sarima_results = sarima_results.append(res, ignore_index=True)
            sarima_results.to_csv(test_name+".csv")
        except:
            print(sys.exc_info())
            print('Invalid model%s %s ' % (str(arima_order),str(sarima_order)))
            continue
    print('Best SARIMA%s RMSE=%.3f' % (best_cfg, best_score))

In [None]:
p_values = [0,1,2]
d_values = [0,1]
q_values = [0,1,2]
P_values = [0,1]
D_Values = [0,1]
Q_Values = [0,1]

parameters = product(p_values, d_values, q_values, P_values, D_Values, Q_Values)
parameters_list = list(parameters)
period_length = 144 #de 00:00 as 23:50
evaluate_sarima_models("wind_sarima-1-clean",train_clean_df[target_station], validation_clean_df[target_station], parameters_list, period_length)

Testing SARIMA(0, 0, 0) (0, 0, 0, 144) 
(<class 'ValueError'>, ValueError('Number of states in statespace model must be a positive number.',), <traceback object at 0x112f31948>)
Invalid model(0, 0, 0) (0, 0, 0, 144) 
Testing SARIMA(0, 0, 0) (0, 0, 1, 144) 
SARIMA(0, 0, 0) (0, 0, 1, 144) RMSE=0.364
Testing SARIMA(0, 0, 0) (0, 1, 0, 144) 
(<class 'ValueError'>, ValueError('could not broadcast input array from shape (0,0) into shape (144,144)',), <traceback object at 0x112f0ac08>)
Invalid model(0, 0, 0) (0, 1, 0, 144) 
Testing SARIMA(0, 0, 0) (0, 1, 1, 144) 


In [None]:
evaluate_sarima_models("wind_sarima-1-residual",train_residual_df[target_station], validation_residual_df[target_station], parameters_list, period_length)

## Vector Autorregressive - VAR

In [9]:
from statsmodels.tsa.api import VAR, DynamicVAR

  from pandas.core import datetools


In [10]:
def evaluate_VAR_models(test_name, train, validation,target, maxlags_list):
    var_results = pd.DataFrame(columns=['Order','RMSE'])
    best_score, best_cfg = float("inf"), None
    
    for lgs in maxlags_list:
        model = VAR(train)
        results = model.fit(maxlags=lgs, ic='aic')
        
        order = results.k_ar
        forecast = []

        for i in range(len(validation)-order) :
            forecast.extend(results.forecast(validation.values[i:i+order],1))

        forecast_df = pd.DataFrame(columns=validation.columns, data=forecast)
        rmse = math.sqrt(mean_squared_error(forecast_df[target].values, validation[target].iloc[order:]))

        if rmse < best_score:
            best_score, best_cfg = rmse, order

        res = {'Order' : str(order) ,'RMSE' : rmse}
        print('VAR (%s)  RMSE=%.3f' % (str(order),rmse))
        var_results = var_results.append(res, ignore_index=True)
        var_results.to_csv(test_name+".csv")
        
    print('Best VAR(%s) RMSE=%.3f' % (best_cfg, best_score))


In [11]:
maxlags_list = [1,2,4,6,8,10,20,40]
evaluate_VAR_models("wind_var_clean", train_clean_df[neighbor_stations_90], validation_clean_df[neighbor_stations_90],target_station, maxlags_list)
evaluate_VAR_models("wind_var_residual", train_residual_df[neighbor_stations_90], validation_residual_df[neighbor_stations_90],target_station, maxlags_list)

VAR (1)  RMSE=0.012
VAR (2)  RMSE=0.004
VAR (4)  RMSE=0.003
VAR (6)  RMSE=0.003
VAR (8)  RMSE=0.003
VAR (10)  RMSE=0.003
VAR (20)  RMSE=0.002
VAR (39)  RMSE=0.003
Best VAR(20) RMSE=0.002
VAR (1)  RMSE=0.089
VAR (2)  RMSE=0.087
VAR (4)  RMSE=0.085
VAR (6)  RMSE=0.085
VAR (8)  RMSE=0.084
VAR (10)  RMSE=0.084
VAR (20)  RMSE=0.083
VAR (26)  RMSE=0.084
Best VAR(20) RMSE=0.083


## High Order Fuzzy Time Series

In [85]:
!pip install -U git+https://github.com/petroniocandido/pyFTS

Collecting git+https://github.com/petroniocandido/pyFTS
  Cloning https://github.com/petroniocandido/pyFTS to /private/var/folders/13/t7d8w0nd0hv6w9_p2rntvym00000gr/T/pip-req-build-4e3y8zbt
Building wheels for collected packages: pyFTS
  Running setup.py bdist_wheel for pyFTS ... [?25ldone
[?25h  Stored in directory: /private/var/folders/13/t7d8w0nd0hv6w9_p2rntvym00000gr/T/pip-ephem-wheel-cache-_3kkffbn/wheels/84/d7/1e/a333c7128f25b347640740859808db094c4478e98663cd2297
Successfully built pyFTS
Installing collected packages: pyFTS
  Found existing installation: pyFTS 1.2.2
    Uninstalling pyFTS-1.2.2:
      Successfully uninstalled pyFTS-1.2.2
Successfully installed pyFTS-1.2.2


In [12]:
from pyFTS.models.multivariate import common, variable, mvfts
from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime
from pyFTS.partitioners import Grid, Entropy, Util as pUtil
from pyFTS.models.multivariate import common, variable, mvfts
from pyFTS.models import hofts
from pyFTS.common import Transformations
tdiff = Transformations.Differential(1)

In [13]:
def evaluate_hofts_models(test_name, train, validation, partitioners_list, order_list, partitions_list):
    
    hofts_results = pd.DataFrame(columns=['Partitioner','Partitions','Order','RMSE'])
    best_score, best_cfg = float("inf"), None


    for _partitioner in partitioners_list:
        for _order in order_list:
            for npartitions in partitions_list:
                fuzzy_sets = _partitioner(data=train.values, npart=npartitions)
                model_simple_hofts = hofts.HighOrderFTS()

                #model_simple_hofts.append_transformation(Transformations.Differential(1))
                model_simple_hofts.fit(train.values, order=_order, partitioner=fuzzy_sets)

                forecast = model_simple_hofts.predict(validation.values)

                rmse = math.sqrt(mean_squared_error(validation.iloc[_order:], forecast[:-1]))

                if rmse < best_score:
                    best_score, best_cfg = rmse, (_order,npartitions,_partitioner)

                res = {'Partitioner':str(_partitioner), 'Partitions':npartitions, 'Order' : str(_order) ,'RMSE' : rmse}
                print('HOFTS %s - %s - %s  RMSE=%.3f' % (str(_partitioner), npartitions, str(_order),rmse))
                hofts_results = hofts_results.append(res, ignore_index=True)
                hofts_results.to_csv(test_name+".csv")

    print('Best HOFTS(%s) RMSE=%.3f' % (best_cfg, best_score))

In [14]:
partitioners_list = [Grid.GridPartitioner, Entropy.EntropyPartitioner]
order_list = np.arange(1,5)
partitions_list = np.arange(10,100,10)

evaluate_hofts_models("hofts_wind_clean", train_clean_df[target_station], validation_clean_df[target_station], partitioners_list, order_list, partitions_list)
evaluate_hofts_models("hofts_wind_residual", train_residual_df[target_station], validation_residual_df[target_station], partitioners_list, order_list, partitions_list)

HOFTS <class 'pyFTS.partitioners.Grid.GridPartitioner'> - 10 - 1  RMSE=0.077
HOFTS <class 'pyFTS.partitioners.Grid.GridPartitioner'> - 20 - 1  RMSE=0.048
HOFTS <class 'pyFTS.partitioners.Grid.GridPartitioner'> - 30 - 1  RMSE=0.037
HOFTS <class 'pyFTS.partitioners.Grid.GridPartitioner'> - 40 - 1  RMSE=0.029
HOFTS <class 'pyFTS.partitioners.Grid.GridPartitioner'> - 50 - 1  RMSE=0.026
HOFTS <class 'pyFTS.partitioners.Grid.GridPartitioner'> - 60 - 1  RMSE=0.021
HOFTS <class 'pyFTS.partitioners.Grid.GridPartitioner'> - 70 - 1  RMSE=0.022
HOFTS <class 'pyFTS.partitioners.Grid.GridPartitioner'> - 80 - 1  RMSE=0.020
HOFTS <class 'pyFTS.partitioners.Grid.GridPartitioner'> - 90 - 1  RMSE=0.018
HOFTS <class 'pyFTS.partitioners.Grid.GridPartitioner'> - 10 - 2  RMSE=0.074
HOFTS <class 'pyFTS.partitioners.Grid.GridPartitioner'> - 20 - 2  RMSE=0.041
HOFTS <class 'pyFTS.partitioners.Grid.GridPartitioner'> - 30 - 2  RMSE=0.031
HOFTS <class 'pyFTS.partitioners.Grid.GridPartitioner'> - 40 - 2  RMSE=0.023

## High Order NonStationary FTS

In [28]:
from pyFTS.models.nonstationary import cvfts
from pyFTS.models.nonstationary import partitioners as nspartitioners

In [102]:
import importlib
importlib.reload(honsfts)

<module 'pyFTS.models.nonstationary.honsfts' from '/Users/cseveriano/anaconda3/lib/python3.6/site-packages/pyFTS/models/nonstationary/honsfts.py'>

In [27]:
def evaluate_honsfts_models(test_name, train, validation, partitioners_list, order_list, partitions_list):
    
    honsfts_results = pd.DataFrame(columns=['Partitioner','Partitions','Order','RMSE'])
    best_score, best_cfg = float("inf"), None


    for _partitioner in partitioners_list:
        for _order in order_list:
            for npartitions in partitions_list:
                    fuzzy_sets =  nspartitioners.PolynomialNonStationaryPartitioner(data=train.values, part=_partitioner(data=train.values, npart=npartitions), degree=2)
                    
                    model_cvfts = cvfts.ConditionalVarianceFTS()
                    
                    model_cvfts.fit(train.values, order=_order, parameters=1, partitioner=fuzzy_sets)

                    forecast = model_cvfts.predict(validation.values)

                    rmse = math.sqrt(mean_squared_error(validation.iloc[_order:], forecast[:-1]))
                    params = (_order,npartitions,_partitioner)
                    if rmse < best_score:
                        best_score, best_cfg = rmse, params

                    res = {'Partitioner':str(_partitioner), 'Partitions':npartitions, 'Order' : str(_order) ,'RMSE' : rmse}
                    print('CVFTS %s  RMSE=%.3f' % (params,rmse))
                    honsfts_results = honsfts_results.append(res, ignore_index=True)
                    honsfts_results.to_csv(test_name+".csv")

    print('Best CVFTS(%s) RMSE=%.3f' % (best_cfg, best_score))

In [33]:
partitioners_list = [Grid.GridPartitioner, Entropy.EntropyPartitioner]
order_list = np.arange(1,2)
partitions_list = np.arange(80,110,10)

evaluate_honsfts_models("cvfts_wind_clean", train_clean_df[target_station], validation_clean_df[target_station], partitioners_list, order_list, partitions_list)
evaluate_honsfts_models("cvfts_wind_residual", train_residual_df[target_station], validation_residual_df[target_station], partitioners_list, order_list, partitions_list)



CVFTS (1, 80, <class 'pyFTS.partitioners.Grid.GridPartitioner'>)  RMSE=0.096
CVFTS (1, 90, <class 'pyFTS.partitioners.Grid.GridPartitioner'>)  RMSE=0.096
CVFTS (1, 100, <class 'pyFTS.partitioners.Grid.GridPartitioner'>)  RMSE=0.096
CVFTS (1, 80, <class 'pyFTS.partitioners.Entropy.EntropyPartitioner'>)  RMSE=0.091
CVFTS (1, 90, <class 'pyFTS.partitioners.Entropy.EntropyPartitioner'>)  RMSE=0.091
CVFTS (1, 100, <class 'pyFTS.partitioners.Entropy.EntropyPartitioner'>)  RMSE=0.090
Best CVFTS((1, 100, <class 'pyFTS.partitioners.Entropy.EntropyPartitioner'>)) RMSE=0.090


## Long Short-term Memory LSTM

In [35]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

Using TensorFlow backend.


## Long Short-term Memory LSTM - Multivariate

In [34]:
# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [46]:
def evaluate_multivariate_lstm_models(test_name, train_df, validation_df, neurons_list, order_list, epochs_list):
    
    lstm_results = pd.DataFrame(columns=['Neurons','Order','Epochs','RMSE'])
    best_score, best_cfg = float("inf"), None
    
    nfeat = len(train_df.columns)
    nsteps = 1
    
    for _neurons in neurons_list:
        for _order in order_list:
            for epochs in epochs_list:
                    
                    nobs = nfeat * _order
                    
                    train_reshaped_df = series_to_supervised(train_df, n_in=_order, n_out=nsteps)
                    train_X, train_Y = train_reshaped_df.iloc[:,:nobs].values, train_reshaped_df.iloc[:,-nfeat].values
                    train_X = train_X.reshape((train_X.shape[0], _order, nfeat))                    
                    
                    val_reshaped_df = series_to_supervised(validation_df, n_in=_order, n_out=nsteps)
                    validation_X, validation_Y = val_reshaped_df.iloc[:,:nobs].values, val_reshaped_df.iloc[:,-nfeat].values
                    validation_X = validation_X.reshape((validation_X.shape[0], _order, nfeat))
                    
                    # design network
                    model = Sequential()
                    model.add(LSTM(_neurons, input_shape=(train_X.shape[1], train_X.shape[2])))
                    model.add(Dense(1))
                    model.compile(loss='mae', optimizer='adam')
                    
                    # fit network
                    history = model.fit(train_X, train_Y, epochs=epochs, batch_size=72, verbose=False, shuffle=False)
                    forecast = model.predict(validation_X)
                    
                    rmse = math.sqrt(mean_squared_error(validation_Y, forecast))
                    params = (_neurons, _order,epochs)
                    if rmse < best_score:
                        best_score, best_cfg = rmse, params

                    res = {'Neurons':_neurons, 'Order':_order, 'Epochs' : epochs ,'RMSE' : rmse}
                    print('LSTM %s  RMSE=%.3f' % (params,rmse))
                    lstm_results = lstm_results.append(res, ignore_index=True)
                    lstm_results.to_csv(test_name+".csv")

    print('Best LSTM(%s) RMSE=%.3f' % (best_cfg, best_score))

In [45]:
neurons_list = np.arange(10,110,10)
order_list = np.arange(1,5)
epochs_list = [100]

evaluate_multivariate_lstm_models("lstm_multi_wind_clean", train_clean_df[neighbor_stations_90], validation_clean_df[neighbor_stations_90], neurons_list, order_list, epochs_list)
evaluate_multivariate_lstm_models("lstm_multi_wind_residual", train_residual_df[neighbor_stations_90], validation_residual_df[neighbor_stations_90], neurons_list, order_list, epochs_list)

LSTM (10, 1, 100)  RMSE=0.021
LSTM (10, 2, 100)  RMSE=0.026
LSTM (10, 3, 100)  RMSE=0.029
LSTM (10, 4, 100)  RMSE=0.030
LSTM (20, 1, 100)  RMSE=0.015
LSTM (20, 2, 100)  RMSE=0.026
LSTM (20, 3, 100)  RMSE=0.029
LSTM (20, 4, 100)  RMSE=0.030
LSTM (30, 1, 100)  RMSE=0.015
LSTM (30, 2, 100)  RMSE=0.016
LSTM (30, 3, 100)  RMSE=0.029
LSTM (30, 4, 100)  RMSE=0.017
LSTM (40, 1, 100)  RMSE=0.019
LSTM (40, 2, 100)  RMSE=0.019
LSTM (40, 3, 100)  RMSE=0.013
LSTM (40, 4, 100)  RMSE=0.011
LSTM (50, 1, 100)  RMSE=0.016
LSTM (50, 2, 100)  RMSE=0.017
LSTM (50, 3, 100)  RMSE=0.012
LSTM (50, 4, 100)  RMSE=0.011
LSTM (60, 1, 100)  RMSE=0.015
LSTM (60, 2, 100)  RMSE=0.019
LSTM (60, 3, 100)  RMSE=0.017
LSTM (60, 4, 100)  RMSE=0.011
LSTM (70, 1, 100)  RMSE=0.014
LSTM (70, 2, 100)  RMSE=0.018
LSTM (70, 3, 100)  RMSE=0.013
LSTM (70, 4, 100)  RMSE=0.010
LSTM (80, 1, 100)  RMSE=0.016
LSTM (80, 2, 100)  RMSE=0.015
LSTM (80, 3, 100)  RMSE=0.016
LSTM (80, 4, 100)  RMSE=0.011
LSTM (90, 1, 100)  RMSE=0.016
LSTM (90, 

In [48]:
neurons_list = np.arange(10,110,10)
order_list = np.arange(1,5)
epochs_list = [100]

evaluate_multivariate_lstm_models("lstm_uni_wind_clean", train_clean_df[[target_station]], validation_clean_df[[target_station]], neurons_list, order_list, epochs_list)
evaluate_multivariate_lstm_models("lstm_uni_wind_residual", train_residual_df[[target_station]], validation_residual_df[[target_station]], neurons_list, order_list, epochs_list)

LSTM (10, 1, 100)  RMSE=0.012
LSTM (10, 2, 100)  RMSE=0.018
LSTM (10, 3, 100)  RMSE=0.023
LSTM (10, 4, 100)  RMSE=0.022
LSTM (20, 1, 100)  RMSE=0.012
LSTM (20, 2, 100)  RMSE=0.019
LSTM (20, 3, 100)  RMSE=0.024
LSTM (20, 4, 100)  RMSE=0.026
LSTM (30, 1, 100)  RMSE=0.013
LSTM (30, 2, 100)  RMSE=0.019
LSTM (30, 3, 100)  RMSE=0.022
LSTM (30, 4, 100)  RMSE=0.023
LSTM (40, 1, 100)  RMSE=0.013
LSTM (40, 2, 100)  RMSE=0.019
LSTM (40, 3, 100)  RMSE=0.021
LSTM (40, 4, 100)  RMSE=0.022
LSTM (50, 1, 100)  RMSE=0.013
LSTM (50, 2, 100)  RMSE=0.019
LSTM (50, 3, 100)  RMSE=0.022
LSTM (50, 4, 100)  RMSE=0.018
LSTM (60, 1, 100)  RMSE=0.013
LSTM (60, 2, 100)  RMSE=0.019
LSTM (60, 3, 100)  RMSE=0.022
LSTM (60, 4, 100)  RMSE=0.017
LSTM (70, 1, 100)  RMSE=0.013
LSTM (70, 2, 100)  RMSE=0.019
LSTM (70, 3, 100)  RMSE=0.023
LSTM (70, 4, 100)  RMSE=0.021
LSTM (80, 1, 100)  RMSE=0.013
LSTM (80, 2, 100)  RMSE=0.019
LSTM (80, 3, 100)  RMSE=0.022
LSTM (80, 4, 100)  RMSE=0.017
LSTM (90, 1, 100)  RMSE=0.013
LSTM (90, 

## Multi Layer Perceptron 

In [51]:
def evaluate_multivariate_mlp_models(test_name, train_df, validation_df, neurons_list, order_list, epochs_list):
    
    lstm_results = pd.DataFrame(columns=['Neurons','Order','Epochs','RMSE'])
    best_score, best_cfg = float("inf"), None
    
    nfeat = len(train_df.columns)
    nsteps = 1
    
    for _neurons in neurons_list:
        for _order in order_list:
            for epochs in epochs_list:
                    
                    nobs = nfeat * _order
                    
                    train_reshaped_df = series_to_supervised(train_df, n_in=_order, n_out=nsteps)
                    train_X, train_Y = train_reshaped_df.iloc[:,:nobs].values, train_reshaped_df.iloc[:,-nfeat].values
                    
                    val_reshaped_df = series_to_supervised(validation_df, n_in=_order, n_out=nsteps)
                    validation_X, validation_Y = val_reshaped_df.iloc[:,:nobs].values, val_reshaped_df.iloc[:,-nfeat].values
                    
                    
                    # design network
                    model = Sequential()
                    model.add(Dense(_neurons, activation='relu', input_dim=train_X.shape[1]))
                    model.add(Dense(1))
                    model.compile(loss='mean_squared_error', optimizer='adam')
                    
                    # fit network
                    history = model.fit(train_X, train_Y, epochs=epochs, batch_size=72, verbose=False, shuffle=False)
                    forecast = model.predict(validation_X)
                    
                    rmse = math.sqrt(mean_squared_error(validation_Y, forecast))
                    params = (_neurons, _order,epochs)
                    if rmse < best_score:
                        best_score, best_cfg = rmse, params

                    res = {'Neurons':_neurons, 'Order':_order, 'Epochs' : epochs ,'RMSE' : rmse}
                    print('MLP %s  RMSE=%.3f' % (params,rmse))
                    lstm_results = lstm_results.append(res, ignore_index=True)
                    lstm_results.to_csv(test_name+".csv")

    print('Best MLP(%s) RMSE=%.3f' % (best_cfg, best_score))

## MLP - Multivariate

In [52]:
neurons_list = np.arange(10,110,10)
order_list = np.arange(1,5)
epochs_list = [100]

evaluate_multivariate_mlp_models("mlp_multi_wind_clean", train_clean_df[neighbor_stations_90], validation_clean_df[neighbor_stations_90], neurons_list, order_list, epochs_list)
evaluate_multivariate_mlp_models("mlp_multi_wind_residual", train_residual_df[neighbor_stations_90], validation_residual_df[neighbor_stations_90], neurons_list, order_list, epochs_list)

MLP (10, 1, 100)  RMSE=0.017
MLP (10, 2, 100)  RMSE=0.017
MLP (10, 3, 100)  RMSE=0.013
MLP (10, 4, 100)  RMSE=0.028
MLP (20, 1, 100)  RMSE=0.024
MLP (20, 2, 100)  RMSE=0.013
MLP (20, 3, 100)  RMSE=0.013
MLP (20, 4, 100)  RMSE=0.019
MLP (30, 1, 100)  RMSE=0.015
MLP (30, 2, 100)  RMSE=0.012
MLP (30, 3, 100)  RMSE=0.013
MLP (30, 4, 100)  RMSE=0.021
MLP (40, 1, 100)  RMSE=0.018
MLP (40, 2, 100)  RMSE=0.017
MLP (40, 3, 100)  RMSE=0.013
MLP (40, 4, 100)  RMSE=0.012
MLP (50, 1, 100)  RMSE=0.013
MLP (50, 2, 100)  RMSE=0.017
MLP (50, 3, 100)  RMSE=0.012
MLP (50, 4, 100)  RMSE=0.014
MLP (60, 1, 100)  RMSE=0.017
MLP (60, 2, 100)  RMSE=0.015
MLP (60, 3, 100)  RMSE=0.012
MLP (60, 4, 100)  RMSE=0.014
MLP (70, 1, 100)  RMSE=0.016
MLP (70, 2, 100)  RMSE=0.011
MLP (70, 3, 100)  RMSE=0.011
MLP (70, 4, 100)  RMSE=0.012
MLP (80, 1, 100)  RMSE=0.014
MLP (80, 2, 100)  RMSE=0.019
MLP (80, 3, 100)  RMSE=0.011
MLP (80, 4, 100)  RMSE=0.013
MLP (90, 1, 100)  RMSE=0.017
MLP (90, 2, 100)  RMSE=0.018
MLP (90, 3, 10

## MLP - Univariate

In [53]:
neurons_list = np.arange(10,110,10)
order_list = np.arange(1,5)
epochs_list = [100]

evaluate_multivariate_mlp_models("mlp_uni_wind_clean", train_clean_df[[target_station]], validation_clean_df[[target_station]], neurons_list, order_list, epochs_list)
evaluate_multivariate_mlp_models("mlp_uni_wind_residual", train_residual_df[[target_station]], validation_residual_df[[target_station]], neurons_list, order_list, epochs_list)

MLP (10, 1, 100)  RMSE=0.012
MLP (10, 2, 100)  RMSE=0.016
MLP (10, 3, 100)  RMSE=0.012
MLP (10, 4, 100)  RMSE=0.032
MLP (20, 1, 100)  RMSE=0.012
MLP (20, 2, 100)  RMSE=0.016
MLP (20, 3, 100)  RMSE=0.019
MLP (20, 4, 100)  RMSE=0.029
MLP (30, 1, 100)  RMSE=0.012
MLP (30, 2, 100)  RMSE=0.017
MLP (30, 3, 100)  RMSE=0.023
MLP (30, 4, 100)  RMSE=0.011
MLP (40, 1, 100)  RMSE=0.012
MLP (40, 2, 100)  RMSE=0.016
MLP (40, 3, 100)  RMSE=0.011
MLP (40, 4, 100)  RMSE=0.011
MLP (50, 1, 100)  RMSE=0.012
MLP (50, 2, 100)  RMSE=0.020
MLP (50, 3, 100)  RMSE=0.017
MLP (50, 4, 100)  RMSE=0.010
MLP (60, 1, 100)  RMSE=0.012
MLP (60, 2, 100)  RMSE=0.015
MLP (60, 3, 100)  RMSE=0.011
MLP (60, 4, 100)  RMSE=0.009
MLP (70, 1, 100)  RMSE=0.012
MLP (70, 2, 100)  RMSE=0.014
MLP (70, 3, 100)  RMSE=0.011
MLP (70, 4, 100)  RMSE=0.011
MLP (80, 1, 100)  RMSE=0.012
MLP (80, 2, 100)  RMSE=0.012
MLP (80, 3, 100)  RMSE=0.015
MLP (80, 4, 100)  RMSE=0.008
MLP (90, 1, 100)  RMSE=0.012
MLP (90, 2, 100)  RMSE=0.016
MLP (90, 3, 10