In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import math
from sklearn.metrics import mean_squared_error

# Funções Auxiliares

In [2]:
def normalize(df):
    mindf = df.min()
    maxdf = df.max()
    return (df-mindf)/(maxdf-mindf)

In [3]:
def denormalize(norm, _min, _max):
    return [(n * (_max-_min)) + _min for n in norm]

In [4]:
def split_data(df, df_clean, df_residual, interval):
    sample_df = df.loc[interval]
    residual_sample_df = df_residual.loc[interval]
    clean_sample_df = df_clean.loc[interval]

    week = (sample_df.index.day - 1) // 7 + 1

    # PARA OS TESTES:
    # 2 SEMANAS PARA TREINAMENTO
    train_df = sample_df.loc[week <= 2]
    train_residual_df = residual_sample_df.loc[week <= 2]
    train_clean_df = clean_sample_df.loc[week <= 2]

    # 1 SEMANA PARA VALIDACAO
    validation_df = sample_df.loc[week == 3]
    validation_residual_df = residual_sample_df.loc[week == 3]
    validation_clean_df = clean_sample_df.loc[week == 3]

    # 1 SEMANA PARA TESTES
    test_df = sample_df.loc[week > 3]
    test_residual_df = residual_sample_df.loc[week > 3]
    test_clean_df = clean_sample_df.loc[week > 3]
    
    return (train_df, train_clean_df, train_residual_df, validation_df, validation_clean_df, validation_residual_df, test_df, test_clean_df, test_residual_df)

In [5]:
def calculate_rmse(test, forecast, order, step):
    rmse = math.sqrt(mean_squared_error(test.iloc[(order):], forecast[:-step]))
    print("RMSE : "+str(rmse))
    return rmse

In [6]:
def reconstruct_ssa_series(clean, residual):
    return [r + c for r, c in zip(residual,clean)]

In [7]:
def save_obj(obj, name ):
    with open('results/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name ):
    with open('results/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [8]:
def difference(raw_df, interval=1):
    df_diff = pd.DataFrame(columns=raw_df.columns, index=raw_df.index[1:])
    
    for col in raw_df.columns:
        raw_array = raw_df[col]
        diff = []
        for i in range(interval, len(raw_array)):
            value = raw_array[i] - raw_array[i - interval]
            diff.append(value)
        
        df_diff[col] = diff
    return df_diff

In [9]:
def inverse_difference(raw_series, diff_series):
    inverted = []
    for i in range(len(diff_series)):
        interval = len(raw_series)-i
        value = diff_series[i] + raw_series[-interval]
        inverted.append(value)
        
    return inverted

# Load Dataset
Split the data into train, validation and test subsets

In [10]:
#Set target and input variables 
target_station = 'DHHL_3'

#All neighbor stations with residual correlation greater than .90
neighbor_stations_90 = ['DHHL_3',  'DHHL_4','DHHL_5','DHHL_10','DHHL_11','DHHL_9','DHHL_2', 'DHHL_6','DHHL_7','DHHL_8']

In [11]:
df = pd.read_pickle("df_oahu.pkl")
df_ssa_clean = pd.read_pickle("df_ssa_clean.pkl")
df_ssa_residual = pd.read_pickle("df_ssa_residual.pkl")

In [12]:
## Remove columns with many corrupted or missing values
df.drop(columns=['AP_1', 'AP_7'], inplace=True)
df_ssa_clean.drop(columns=['AP_1', 'AP_7'], inplace=True)
df_ssa_residual.drop(columns=['AP_1', 'AP_7'], inplace=True)

In [13]:
#Normalize Data

# Save Min-Max for Denorm
min_raw = df[target_station].min()
min_clean = df_ssa_clean[target_station].min()
min_residual = df_ssa_residual[target_station].min()

max_raw = df[target_station].max()
max_clean = df_ssa_clean[target_station].max()
max_residual = df_ssa_residual[target_station].max()

# Perform Normalization
norm_df_ssa_clean = normalize(df_ssa_clean)
norm_df_ssa_residual = normalize(df_ssa_residual)

In [14]:
# Split data
interval = ((df.index >= '2010-06') & (df.index < '2011-06'))
#interval = ((df.index >= '2010-11') & (df.index <= '2010-12'))

(train_df, train_clean_df, train_residual_df, 
 validation_df, validation_clean_df, validation_residual_df, 
 test_df, test_clean_df, test_residual_df) = split_data(df, norm_df_ssa_clean, norm_df_ssa_residual, interval)

## Forecasting with SSA Decomposition

For each dataset, all the time series were decomposed in 2 components (trend plus harmonic and residual) and the 2 resulting datasets were used for different configurations of each model.

## Persistence

In [56]:
def persistence_forecast(train, test, step):
    predictions = []
    
    for t in np.arange(0,len(test), step):
        yhat = [test.iloc[t]]  * step
        predictions.extend(yhat)
        
    return predictions

In [62]:
step = 3
persistence_order = 1

forecast_clean = persistence_forecast(train_clean_df[target_station], test_clean_df[target_station],step)
forecast_clean = denormalize(forecast_clean, min_clean, max_clean)

forecast_residual = persistence_forecast(train_residual_df[target_station], test_residual_df[target_station],step)
forecast_residual = denormalize(forecast_residual, min_residual, max_residual)

In [65]:
final_forecast = reconstruct_ssa_series(forecast_clean, forecast_residual)
rmse = calculate_rmse(test_df[target_station], final_forecast, persistence_order, step-1)

RMSE : 146.86289269429628


In [66]:
result = {'rmse': rmse, 'final': final_forecast, 'clean': forecast_clean, 'residual': forecast_residual}
save_obj(result, name="oahu_persistence_3")

## SARIMA

In [29]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
import math
from sklearn.metrics import mean_squared_error

In [186]:
def old_sarima_forecast(train, test, arima_order, sarima_order, step):
    window_size = sarima_order[3] * 3
    history = list(train[-window_size:].values)
    predictions = []
    
    for t in np.arange(1,len(test)+1,step):
        print("Sample: "+str(t))
        obs = test.iloc[:t].values
        history.extend(obs)
        history = history[-window_size:]
        print("Histlen: "+str(len(history)))
        
        model = SARIMAX(history, order=arima_order, seasonal_order=sarima_order,enforce_invertibility=False,enforce_stationarity=False)
        model_fit = model.fit(disp=True,enforce_invertibility=False, method='powell', maxiter=200)
        yhat = model_fit.forecast(steps=step)[0:step]
        print("Forecast: "+str(yhat))
        predictions.extend(yhat)
    
    return predictions

In [187]:
def old_sarima_forecast(train, test, arima_order, sarima_order, step):

    predictions = []
    
    for date in train.index.to_period('M').unique():
        print("Predicting : "+str(date))
        history = list(train[str(date)])
        test_steps = len(test[str(date)])
        print("Number of steps : "+str(test_steps))
        
        model = SARIMAX(history, order=arima_order, seasonal_order=sarima_order,enforce_invertibility=False,enforce_stationarity=False)
        model_fit = model.fit(disp=True,enforce_invertibility=False, method='powell', maxiter=200)
        yhat = model_fit.forecast(test_steps)      
        
        predictions.extend(yhat)
    
    return predictions

In [40]:
def old_sarima_forecast(train, test, arima_order, sarima_order, step):

    predictions = []
    window_size = sarima_order[3] * 5
    print("Window size : "+str(window_size))
    
    for date in train.index.to_period('M').unique():
        
        history = list(train[str(date)].iloc[-window_size:])
        print("Predicting : "+str(date))
        
        st = 0
        for t in np.arange(1,len(test)+1,step):
            print("Sample: "+str(t))
            obs = test.iloc[st:t].values
            history.extend(obs)
            history = history[-window_size:]
            print("Histlen: "+str(len(history)))
        
            model = SARIMAX(history, order=arima_order, seasonal_order=sarima_order,enforce_invertibility=False,enforce_stationarity=False)
            model_fit = model.fit(disp=True,enforce_invertibility=False,  maxiter=200)
            yhat = model_fit.forecast(step)      
            st = t
            predictions.extend(yhat)
    
    return predictions

In [46]:
def sarima_forecast(train, test, arima_order, sarima_order, step):

    predictions = []
    window_size = sarima_order[3] * 5
    print("Window size : "+str(window_size))
    
    for date in train.index.to_period('M').unique():
        
        history = list(train[str(date)].iloc[-window_size:])
        
        model = SARIMAX(history, order=arima_order, seasonal_order=sarima_order,enforce_invertibility=False,enforce_stationarity=False)
        model_fit = model.fit(disp=True,enforce_invertibility=False,  method='powell', maxiter=200)
        
        #save the state parameter
        est_params = model_fit.params
        est_state = model_fit.predicted_state[:, -1]
        est_state_cov = model_fit.predicted_state_cov[:, :, -1]

        print("Predicting : "+str(date))
        
        st = 0
        test_date = test[str(date)]
        
        for t in np.arange(1,len(test_date)+1,step):
            print("Sample: "+str(t))
            obs = test_date.iloc[st:t].values
            history.extend(obs)
            history = history[-window_size:]
            
            mod_updated = SARIMAX(history, order=arima_order, seasonal_order=sarima_order,enforce_invertibility=False,enforce_stationarity=False)
            mod_updated.initialize_known(est_state, est_state_cov)
            mod_frcst = mod_updated.smooth(est_params)

        
            yhat = mod_frcst.forecast(step)   
            predictions.extend(yhat)
            
            est_params = mod_frcst.params
            est_state = mod_frcst.predicted_state[:, -1]
            est_state_cov = mod_frcst.predicted_state_cov[:, :, -1]
            
            st = t
                
    return predictions

In [47]:
#Clean - SARIMA(2, 1, 2, 1, 1, 1)
#Residual - SARIMA(2, 0, 1, 1, 1, 1)
order = 1
step = 1
arima_order_clean = (2, 1, 2)
sarima_order_clean = (1, 1, 1, 61)
forecast_clean = sarima_forecast(train_clean_df[target_station], test_clean_df[target_station], arima_order_clean, sarima_order_clean, step)
forecast_clean = denormalize(forecast_clean, min_clean, max_clean)

Window size : 305
Optimization terminated successfully.
         Current function value: 20.126977
         Iterations: 1
         Function evaluations: 92
Predicting : 2010-06
Sample: 1
Sample: 2
Sample: 3
Sample: 4
Sample: 5
Sample: 6
Sample: 7
Sample: 8
Sample: 9
Sample: 10
Sample: 11
Sample: 12
Sample: 13
Sample: 14
Sample: 15
Sample: 16
Sample: 17
Sample: 18
Sample: 19
Sample: 20
Sample: 21
Sample: 22
Sample: 23
Sample: 24
Sample: 25
Sample: 26
Sample: 27
Sample: 28
Sample: 29
Sample: 30
Sample: 31
Sample: 32
Sample: 33
Sample: 34
Sample: 35
Sample: 36
Sample: 37
Sample: 38
Sample: 39
Sample: 40
Sample: 41
Sample: 42
Sample: 43
Sample: 44
Sample: 45
Sample: 46
Sample: 47
Sample: 48
Sample: 49
Sample: 50
Sample: 51
Sample: 52
Sample: 53
Sample: 54
Sample: 55
Sample: 56
Sample: 57
Sample: 58
Sample: 59
Sample: 60
Sample: 61
Sample: 62
Sample: 63
Sample: 64
Sample: 65
Sample: 66
Sample: 67
Sample: 68
Sample: 69
Sample: 70
Sample: 71
Sample: 72
Sample: 73
Sample: 74
Sample: 75
Sample:

In [48]:
arima_order_residual = (2, 0, 1)
sarima_order_residual = (1, 1, 1, 61)
forecast_residual = sarima_forecast(train_residual_df[target_station], test_residual_df[target_station], arima_order_residual, sarima_order_residual,step)
forecast_residual = denormalize(forecast_residual, min_residual, max_residual)

Window size : 305
Optimization terminated successfully.
         Current function value: -0.908377
         Iterations: 7
         Function evaluations: 736
Predicting : 2010-06
Sample: 1
Sample: 2
Sample: 3
Sample: 4
Sample: 5
Sample: 6
Sample: 7
Sample: 8
Sample: 9
Sample: 10
Sample: 11
Sample: 12
Sample: 13
Sample: 14
Sample: 15
Sample: 16
Sample: 17
Sample: 18
Sample: 19
Sample: 20
Sample: 21
Sample: 22
Sample: 23
Sample: 24
Sample: 25
Sample: 26
Sample: 27
Sample: 28
Sample: 29
Sample: 30
Sample: 31
Sample: 32
Sample: 33
Sample: 34
Sample: 35
Sample: 36
Sample: 37
Sample: 38
Sample: 39
Sample: 40
Sample: 41
Sample: 42
Sample: 43
Sample: 44
Sample: 45
Sample: 46
Sample: 47
Sample: 48
Sample: 49
Sample: 50
Sample: 51
Sample: 52
Sample: 53
Sample: 54
Sample: 55
Sample: 56
Sample: 57
Sample: 58
Sample: 59
Sample: 60
Sample: 61
Sample: 62
Sample: 63
Sample: 64
Sample: 65
Sample: 66
Sample: 67
Sample: 68
Sample: 69
Sample: 70
Sample: 71
Sample: 72
Sample: 73
Sample: 74
Sample: 75
Sample

In [51]:
len(test_df[target_station])

6893

In [52]:
final_forecast = reconstruct_ssa_series(forecast_clean, forecast_residual)
rmse = calculate_rmse(test_df[target_station], final_forecast, order, step)

RMSE : 94.39462176062362


In [194]:
result = {'rmse': rmse, 'final': final_forecast, 'clean': forecast_clean, 'residual': forecast_residual}

RMSE : 178.31337062654006


In [199]:
save_obj(result, name="oahu_sarima_1")

## Vector Autoregressive - VAR

In [67]:
from statsmodels.tsa.api import VAR, DynamicVAR

In [68]:
def var_forecast(train, test, target, order, step):
    model = VAR(train.values)
    results = model.fit(maxlags=order)
    lag_order = results.k_ar
    print("Lag order:" + str(lag_order))
    forecast = []

    for i in np.arange(0,len(test)-lag_order+1,step) :
        forecast.extend(results.forecast(test.values[i:i+lag_order],step))

    forecast_df = pd.DataFrame(columns=test.columns, data=forecast)
    return forecast_df[target].values

In [153]:
# Clean = VAR(2)
# Residual = VAR(4)

var_order = 4
step = 3

forecast_clean = var_forecast(train_clean_df[neighbor_stations_90], test_clean_df[neighbor_stations_90], target_station, var_order, step)
forecast_clean = denormalize(forecast_clean, min_clean, max_clean)

Lag order:4


In [154]:
forecast_residual = var_forecast(train_residual_df[neighbor_stations_90], test_residual_df[neighbor_stations_90], target_station, var_order, step)
forecast_residual = denormalize(forecast_residual, min_residual, max_residual)

Lag order:4


In [156]:
final_forecast = reconstruct_ssa_series(forecast_clean, forecast_residual)
rmse = calculate_rmse(test_df[target_station], final_forecast, var_order, step-1)

RMSE : 77.80076224477287


In [81]:
result = {'rmse': rmse, 'final': final_forecast, 'clean': forecast_clean, 'residual': forecast_residual}
save_obj(result, name="oahu_var_3")

## VAR Univariate Test

In [20]:
test_clean_df.columns

Index(['DHHL_3', 'DHHL_4', 'DHHL_5', 'DHHL_10', 'DHHL_11', 'DHHL_9', 'DHHL_2',
       'DHHL_1', 'AP_6', 'AP_5', 'AP_4', 'DHHL_6', 'DHHL_7', 'DHHL_8'],
      dtype='object')

In [21]:
# Clean = VAR(2)
# Residual = VAR(4)

var_order = 4
step = 1
test_var = ['DHHL_3', 'DHHL_4', 'DHHL_5', 'DHHL_10', 'DHHL_11', 'DHHL_9', 'DHHL_2',
       'DHHL_1', 'AP_6', 'AP_5', 'AP_4', 'DHHL_6', 'DHHL_7', 'DHHL_8']

forecast_clean = var_forecast(train_clean_df[test_var], test_clean_df[test_var], target_station, var_order, step)
forecast_clean = denormalize(forecast_clean, min_clean, max_clean)

forecast_residual = var_forecast(train_residual_df[test_var], test_residual_df[test_var], target_station, var_order, step)
forecast_residual = denormalize(forecast_residual, min_residual, max_residual)

final_forecast = reconstruct_ssa_series(forecast_clean, forecast_residual)
rmse = calculate_rmse(test_df[target_station], final_forecast, var_order, step)

Lag order:4
Lag order:4
RMSE : 76.0418195515108


## Long Short Term Memory - LSTM

In [16]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

Using TensorFlow backend.


In [20]:
from keras.layers import Dropout
from keras.constraints import maxnorm

## Multivariate LSTM

In [17]:
# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [27]:
def lstm_multi_forecast(train_df, validation_df, test_df, _order, _steps, _neurons, _epochs):

    
    nfeat = len(train_df.columns)
    nlags = _order
    nsteps = _steps
    nobs = nlags * nfeat
    
    train_reshaped_df = series_to_supervised(train_df, n_in=nlags, n_out=nsteps)
    train_X, train_Y = train_reshaped_df.iloc[:,:nobs].values, train_reshaped_df.iloc[:,-nfeat].values
    train_X = train_X.reshape((train_X.shape[0], nlags, nfeat))

    validation_reshaped_df = series_to_supervised(validation_df, n_in=nlags, n_out=nsteps)
    validation_X, validation_Y = validation_reshaped_df.iloc[:,:nobs].values, validation_reshaped_df.iloc[:,-nfeat].values
    validation_X = validation_X.reshape((validation_X.shape[0], nlags, nfeat))

    test_reshaped_df = series_to_supervised(test_df, n_in=nlags, n_out=nsteps)
    test_X, test_Y = test_reshaped_df.iloc[:,:nobs].values, test_reshaped_df.iloc[:,-nfeat].values
    test_X = test_X.reshape((test_X.shape[0], nlags, nfeat))
    
    # design network
    model = Sequential()
    model.add(LSTM(_neurons, input_shape=(train_X.shape[1], train_X.shape[2])))
    model.add(Dense(1))
    model.compile(loss='mae', optimizer='adam')

    # design network
    model = Sequential()
    model.add(LSTM(_neurons, return_sequences=True, input_shape=(train_X.shape[1], train_X.shape[2]), kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
    model.add(LSTM(_neurons, return_sequences=True, input_shape=(train_X.shape[1], train_X.shape[2]), kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
    model.add(LSTM(_neurons, return_sequences=True, input_shape=(train_X.shape[1], train_X.shape[2]), kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
    model.add(LSTM(_neurons, input_shape=(train_X.shape[1], train_X.shape[2]), kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
    model.compile(loss='mae', optimizer='adam')

    # fit network
    model.fit(train_X, train_Y, epochs=_epochs, batch_size=72, verbose=False, shuffle=False)
    
    # predict validation
    model.predict(validation_X)
    
    forecast = model.predict(test_X)
        
    return forecast

In [28]:
neurons = 50
lstm_order = 2
epochs = 100
steps = 1

forecast_clean = lstm_multi_forecast(train_clean_df[neighbor_stations_90], validation_clean_df[neighbor_stations_90], test_clean_df[neighbor_stations_90], lstm_order, steps, neurons, epochs)
forecast_clean = denormalize(forecast_clean, min_clean, max_clean)

In [29]:
forecast_residual = lstm_multi_forecast(train_residual_df[neighbor_stations_90], validation_residual_df[neighbor_stations_90], test_residual_df[neighbor_stations_90], lstm_order, steps, neurons, epochs)
forecast_residual = denormalize(forecast_residual, min_residual, max_residual)

In [30]:
final_forecast = reconstruct_ssa_series(forecast_clean, forecast_residual)
final_forecast.extend(np.zeros(steps)) ## para manter o mesmo tamanho dos demais

In [31]:
len(final_forecast)

6892

In [33]:
rmse = calculate_rmse(test_df[target_station], final_forecast, lstm_order, steps)

RMSE : 79.46864832232241


In [115]:
result = {'rmse': rmse, 'final': final_forecast, 'clean': forecast_clean, 'residual': forecast_residual}
save_obj(result, name="oahu_lstm_multi_3")

## LSTM - Univariate

In [124]:
neurons = 50
lstm_order = 2
epochs = 100
steps = 3

forecast_clean = lstm_multi_forecast(train_clean_df[[target_station]], test_clean_df[[target_station]], lstm_order, steps, neurons, epochs)
forecast_clean = denormalize(forecast_clean, min_clean, max_clean)

forecast_residual = lstm_multi_forecast(train_residual_df[[target_station]], test_residual_df[[target_station]], lstm_order, steps, neurons, epochs)
forecast_residual = denormalize(forecast_residual, min_residual, max_residual)

In [125]:
final_forecast = reconstruct_ssa_series(forecast_clean, forecast_residual)
final_forecast.extend(np.zeros(steps)) ## para manter o mesmo tamanho dos demais

In [126]:
rmse = calculate_rmse(test_df[target_station], final_forecast, lstm_order, steps)

RMSE : 99.64093376934977


In [131]:
result = {'rmse': rmse, 'final': final_forecast, 'clean': forecast_clean, 'residual': forecast_residual}
save_obj(result, name="oahu_lstm_uni_3")

## Multi Layer Perceptron - MLP

In [127]:
# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [128]:
def mlp_forecast(train_df, test_df, _order, _steps, _neurons, _epochs):

    
    nfeat = len(train_df.columns)
    nlags = _order
    nsteps = _steps
    nobs = nlags * nfeat
    
    train_reshaped_df = series_to_supervised(train_df, n_in=nlags, n_out=nsteps)
    train_X, train_Y = train_reshaped_df.iloc[:,:nobs].values, train_reshaped_df.iloc[:,-nfeat].values
    
    test_reshaped_df = series_to_supervised(test_df, n_in=nlags, n_out=nsteps)
    test_X, test_Y = test_reshaped_df.iloc[:,:nobs].values, test_reshaped_df.iloc[:,-nfeat].values
    
    # design network
    model = Sequential()
    model.add(Dense(neurons, activation='relu', input_dim=train_X.shape[1]))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')

    # fit network
    history = model.fit(train_X, train_Y, epochs=_epochs, batch_size=72, verbose=False, shuffle=False)   

    forecast = model.predict(test_X)
        
    return forecast

In [144]:
neurons = 50
mlp_order = 2
epochs = 100
steps = 3

forecast_clean = mlp_forecast(train_clean_df[neighbor_stations_90], test_clean_df[neighbor_stations_90], mlp_order, steps, neurons, epochs)
forecast_clean = denormalize(forecast_clean, min_clean, max_clean)

In [145]:
forecast_residual = mlp_forecast(train_residual_df[neighbor_stations_90], test_residual_df[neighbor_stations_90], mlp_order, steps, neurons, epochs)
forecast_residual = denormalize(forecast_residual, min_residual, max_residual)

In [146]:
final_forecast = reconstruct_ssa_series(forecast_clean, forecast_residual)
final_forecast.extend(np.zeros(steps))  ## para manter o mesmo tamanho dos demais

In [147]:
rmse = calculate_rmse(test_df[target_station], final_forecast, mlp_order, steps-2)

RMSE : 102.37108952850684


In [148]:
result = {'rmse': rmse, 'final': final_forecast, 'clean': forecast_clean, 'residual': forecast_residual}
save_obj(result, name="oahu_mlp_multi_3")

## Univariate MLP

In [151]:
neurons = 50
mlp_order = 4
epochs = 500
steps = 3

forecast_clean = mlp_forecast(train_clean_df[[target_station]], test_clean_df[[target_station]], mlp_order, steps, neurons, epochs)
forecast_clean = denormalize(forecast_clean, min_clean, max_clean)

forecast_residual = mlp_forecast(train_residual_df[[target_station]], test_residual_df[[target_station]], mlp_order, steps, neurons, epochs)
forecast_residual = denormalize(forecast_residual, min_residual, max_residual)

final_forecast = reconstruct_ssa_series(forecast_clean, forecast_residual)
final_forecast.extend(np.zeros(steps)) ## para manter o mesmo tamanho dos demais

rmse = calculate_rmse(test_df[target_station], final_forecast, mlp_order, steps-2)

RMSE : 106.03522514780022


In [152]:
result = {'rmse': rmse, 'final': final_forecast, 'clean': forecast_clean, 'residual': forecast_residual}
save_obj(result, name="oahu_mlp_uni_3")

## High Order FTS

In [25]:
from pyFTS.partitioners import Grid, Entropy, Util as pUtil
from pyFTS.models import hofts
from pyFTS.common import Transformations

In [457]:
def hofts_forecast(train_df, test_df, _order, _partitioner, _npartitions):
    
    fuzzy_sets = _partitioner(data=train_df.values, npart=_npartitions)
    model_simple_hofts = hofts.HighOrderFTS()
    

    model_simple_hofts.fit(train_df.values, order=_order, partitioner=fuzzy_sets)

    
    forecast = model_simple_hofts.predict(test_df.values)

    return forecast

In [458]:
hofts_order = 2
partitioner = Entropy.EntropyPartitioner
nparts = 90


forecast_clean = hofts_forecast(train_clean_df[target_station], test_clean_df[target_station], hofts_order, partitioner, nparts)
forecast_clean = denormalize(forecast_clean, min_clean, max_clean)

In [459]:
forecast_residual = hofts_forecast(train_residual_df[target_station], test_residual_df[target_station], hofts_order, partitioner, nparts)
forecast_residual = denormalize(forecast_residual, min_residual, max_residual)

In [460]:
step = 1
final_forecast = reconstruct_ssa_series(forecast_clean, forecast_residual)
rmse = calculate_rmse(test_df[target_station], final_forecast, hofts_order, step)

RMSE : 85.65272570879797


In [461]:
result = {'rmse': rmse, 'final': final_forecast, 'clean': forecast_clean, 'residual': forecast_residual}
save_obj(result, name="oahu_hofts_1")

## Clustered Multivariate

In [300]:
from models import KMeansPartitioner
from models import sthofts

In [301]:
import importlib
importlib.reload(sthofts)

<module 'models.sthofts' from '/Users/cseveriano/Google Drive/Doutorado/Codes/spatio-temporal-forecasting/src/models/sthofts.py'>

In [303]:
def sthofts_forecast(train_df, test_df, target, _order, npartitions):
    
    _partitioner = KMeansPartitioner.KMeansPartitioner(data=train_df.values, npart=npartitions, batch_size=1000, init_size=npartitions*3)
    model_sthofts = sthofts.SpatioTemporalHighOrderFTS()
    
    model_sthofts.fit(train_df.values,  num_batches=100, order=_order, partitioner=_partitioner)
    forecast = model_sthofts.predict(test_df.values)
    forecast_df = pd.DataFrame(data=forecast, columns=test_df.columns)
    return forecast_df[target].values

In [314]:
sthofts_order = 2
nparts = 20


forecast_clean = sthofts_forecast(train_clean_df[neighbor_stations_90], test_clean_df[neighbor_stations_90], target_station, sthofts_order, nparts)
forecast_clean = denormalize(forecast_clean, min_clean, max_clean)

[ 20:56:13] Start training
[ 20:56:13] Starting batch 1
[ 20:56:14] Finish batch 1
[ 20:56:14] Starting batch 2
[ 20:56:15] Finish batch 2
[ 20:56:15] Starting batch 3
[ 20:56:16] Finish batch 3
[ 20:56:16] Starting batch 4
[ 20:56:17] Finish batch 4
[ 20:56:17] Starting batch 5
[ 20:56:18] Finish batch 5
[ 20:56:18] Starting batch 6
[ 20:56:19] Finish batch 6
[ 20:56:19] Starting batch 7
[ 20:56:20] Finish batch 7
[ 20:56:20] Starting batch 8
[ 20:56:21] Finish batch 8
[ 20:56:21] Starting batch 9
[ 20:56:22] Finish batch 9
[ 20:56:22] Starting batch 10
[ 20:56:23] Finish batch 10
[ 20:56:23] Starting batch 11
[ 20:56:24] Finish batch 11
[ 20:56:24] Starting batch 12
[ 20:56:25] Finish batch 12
[ 20:56:25] Starting batch 13
[ 20:56:26] Finish batch 13
[ 20:56:26] Starting batch 14
[ 20:56:27] Finish batch 14
[ 20:56:27] Starting batch 15
[ 20:56:28] Finish batch 15
[ 20:56:28] Starting batch 16
[ 20:56:29] Finish batch 16
[ 20:56:29] Starting batch 17
[ 20:56:30] Finish batch 17
[ 20:

In [315]:
forecast_residual = sthofts_forecast(train_residual_df[neighbor_stations_90], test_residual_df[neighbor_stations_90], target_station, sthofts_order, nparts)
forecast_residual = denormalize(forecast_residual, min_residual, max_residual)

[ 20:58:38] Start training
[ 20:58:38] Starting batch 1
[ 20:58:39] Finish batch 1
[ 20:58:39] Starting batch 2
[ 20:58:40] Finish batch 2
[ 20:58:40] Starting batch 3
[ 20:58:41] Finish batch 3
[ 20:58:41] Starting batch 4
[ 20:58:41] Finish batch 4
[ 20:58:41] Starting batch 5
[ 20:58:42] Finish batch 5
[ 20:58:42] Starting batch 6
[ 20:58:43] Finish batch 6
[ 20:58:43] Starting batch 7
[ 20:58:45] Finish batch 7
[ 20:58:45] Starting batch 8
[ 20:58:46] Finish batch 8
[ 20:58:46] Starting batch 9
[ 20:58:47] Finish batch 9
[ 20:58:47] Starting batch 10
[ 20:58:48] Finish batch 10
[ 20:58:48] Starting batch 11
[ 20:58:49] Finish batch 11
[ 20:58:49] Starting batch 12
[ 20:58:49] Finish batch 12
[ 20:58:49] Starting batch 13
[ 20:58:50] Finish batch 13
[ 20:58:50] Starting batch 14
[ 20:58:51] Finish batch 14
[ 20:58:51] Starting batch 15
[ 20:58:52] Finish batch 15
[ 20:58:52] Starting batch 16
[ 20:58:53] Finish batch 16
[ 20:58:53] Starting batch 17
[ 20:58:54] Finish batch 17
[ 20:

In [316]:
step = 1
final_forecast = reconstruct_ssa_series(forecast_clean, forecast_residual)
rmse = calculate_rmse(test_df[target_station], final_forecast, sthofts_order, step)

RMSE : 95.6574336871836


In [317]:
result = {'rmse': rmse, 'final': final_forecast, 'clean': forecast_clean, 'residual': forecast_residual}
save_obj(result, name="oahu_sthofts_1")

## Conditional Variance FTS

In [22]:
from pyFTS.models.nonstationary import cvfts
from pyFTS.models.nonstationary import partitioners as nspartitioners

In [23]:
def cvfts_forecast(train, test, _partitioner,_partitions):
    
    fuzzy_sets =  nspartitioners.PolynomialNonStationaryPartitioner(data=train.values, part=_partitioner(data=train.values, npart=_partitions), degree=2)
                    
    model_cvfts = cvfts.ConditionalVarianceFTS()
    model_cvfts.fit(train.values, parameters=1, partitioner=fuzzy_sets)

    forecast = model_cvfts.predict(test.values)

    return forecast

In [26]:
partitioner = Grid.GridPartitioner
nparts = 90


forecast_clean = cvfts_forecast(train_clean_df[target_station], test_clean_df[target_station], partitioner, nparts)
forecast_clean = denormalize(forecast_clean, min_clean, max_clean)

forecast_residual = cvfts_forecast(train_residual_df[target_station], test_residual_df[target_station], partitioner, nparts)
forecast_residual = denormalize(forecast_residual, min_residual, max_residual)

In [27]:
step = 1
final_forecast = reconstruct_ssa_series(forecast_clean, forecast_residual)
rmse = calculate_rmse(test_df[target_station], final_forecast, 1, step)

RMSE : 93.53504176386768


  


In [28]:
result = {'rmse': rmse, 'final': final_forecast, 'clean': forecast_clean, 'residual': forecast_residual}
save_obj(result, name="oahu_cvfts_1")