In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle

In [3]:
import math
from sklearn.metrics import mean_squared_error

# Funções Auxiliares

In [4]:
def normalize(df):
    mindf = df.min()
    maxdf = df.max()
    return (df-mindf)/(maxdf-mindf)

In [5]:
def denormalize(norm, _min, _max):
    return [(n * (_max-_min)) + _min for n in norm]

In [6]:
def split_data(df, df_clean, df_residual, interval):
    sample_df = df.loc[interval]
    residual_sample_df = df_residual.loc[interval]
    clean_sample_df = df_clean.loc[interval]

    week = (sample_df.index.day - 1) // 7 + 1

    # PARA OS TESTES:
    # 2 SEMANAS PARA TREINAMENTO
    train_df = sample_df.loc[week <= 2]
    train_residual_df = residual_sample_df.loc[week <= 2]
    train_clean_df = clean_sample_df.loc[week <= 2]

    # 1 SEMANA PARA VALIDACAO
    validation_df = sample_df.loc[week == 3]
    validation_residual_df = residual_sample_df.loc[week == 3]
    validation_clean_df = clean_sample_df.loc[week == 3]

    # 1 SEMANA PARA TESTES
    test_df = sample_df.loc[week > 3]
    test_residual_df = residual_sample_df.loc[week > 3]
    test_clean_df = clean_sample_df.loc[week > 3]
    
    return (train_df, train_clean_df, train_residual_df, validation_df, validation_clean_df, validation_residual_df, test_df, test_clean_df, test_residual_df)

In [7]:
def calculate_rmse(test, forecast, order, step):
    rmse = math.sqrt(mean_squared_error(test.iloc[(order):], forecast[:-step]))
    print("RMSE : "+str(rmse))
    return rmse

In [8]:
def reconstruct_ssa_series(clean, residual):
    return [r + c for r, c in zip(residual,clean)]

In [9]:
def save_obj(obj, name ):
    with open('results/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name ):
    with open('results/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [10]:
def difference(raw_df, interval=1):
    df_diff = pd.DataFrame(columns=raw_df.columns, index=raw_df.index[1:])
    
    for col in raw_df.columns:
        raw_array = raw_df[col]
        diff = []
        for i in range(interval, len(raw_array)):
            value = raw_array[i] - raw_array[i - interval]
            diff.append(value)
        
        df_diff[col] = diff
    return df_diff

In [11]:
def inverse_difference(raw_series, diff_series):
    inverted = []
    for i in range(len(diff_series)):
        interval = len(raw_series)-i
        value = diff_series[i] + raw_series[-interval]
        inverted.append(value)
        
    return inverted

# Load Dataset
Split the data into train, validation and test subsets

In [12]:
#Set target and input variables 
target_station = 'WTG01'

#All neighbor stations with residual correlation greater than .90
neighbor_stations_90 = ['WTG01','WTG02','WTG03','WTG05','WTG06']

In [13]:
df = pd.read_pickle("df_wind_speed.pkl")
df_ssa_clean = pd.read_pickle("df_wind_speed_ssa_clean.pkl")
df_ssa_residual = pd.read_pickle("df_wind_speed_ssa_residual.pkl")

In [12]:
#Normalize Data

# Save Min-Max for Denorm
min_raw = df[target_station].min()
min_clean = df_ssa_clean[target_station].min()
min_residual = df_ssa_residual[target_station].min()

max_raw = df[target_station].max()
max_clean = df_ssa_clean[target_station].max()
max_residual = df_ssa_residual[target_station].max()

# Perform Normalization
norm_df_ssa_clean = normalize(df_ssa_clean)
norm_df_ssa_residual = normalize(df_ssa_residual)

In [13]:
# Split data
interval = ((df.index >= '2017-05') & (df.index <= '2018-05'))
#interval = ((df.index >= '2010-11') & (df.index <= '2010-12'))

(train_df, train_clean_df, train_residual_df, 
 validation_df, validation_clean_df, validation_residual_df, 
 test_df, test_clean_df, test_residual_df) = split_data(df, norm_df_ssa_clean, norm_df_ssa_residual, interval)

In [15]:
len(df['2017-06-01'])

144

## Forecasting with SSA Decomposition

For each dataset, all the time series were decomposed in 2 components (trend plus harmonic and residual) and the 2 resulting datasets were used for different configurations of each model.

## Persistence

In [14]:
def persistence_forecast(train, test, step):
    predictions = []
    
    for t in np.arange(0,len(test), step):
        yhat = [test.iloc[t]]  * step
        predictions.extend(yhat)
        
    return predictions

In [15]:
step = 1
persistence_order = 1

forecast_clean = persistence_forecast(train_clean_df[target_station], test_clean_df[target_station],step)
forecast_clean = denormalize(forecast_clean, min_clean, max_clean)

forecast_residual = persistence_forecast(train_residual_df[target_station], test_residual_df[target_station],step)
forecast_residual = denormalize(forecast_residual, min_residual, max_residual)

In [18]:
final_forecast = reconstruct_ssa_series(forecast_clean, forecast_residual)
rmse = calculate_rmse(test_df[target_station], final_forecast, persistence_order, step)

RMSE : 0.6324497271451677


In [19]:
result = {'rmse': rmse, 'final': final_forecast, 'clean': forecast_clean, 'residual': forecast_residual}
save_obj(result, name="wind_persistence_1")

## SARIMA

In [23]:
from statsmodels.tsa.statespace.sarimax import SARIMAX

  from pandas.core import datetools


In [187]:
def sarima_forecast(train, test, arima_order, sarima_order, step):

    predictions = []
    window_size = sarima_order[3] * 5
    
    for date in train.index.to_period('M').unique():
        
        history = list(train[str(date)].iloc[-window_size:])
        
        model = SARIMAX(history, order=arima_order, seasonal_order=sarima_order,enforce_invertibility=False,enforce_stationarity=False)
        model_fit = model.fit(disp=True,enforce_invertibility=False,  method='powell', maxiter=200)
        
        #save the state parameter
        est_params = model_fit.params
        est_state = model_fit.predicted_state[:, -1]
        est_state_cov = model_fit.predicted_state_cov[:, :, -1]

        print("Predicting : "+str(date))
        
        st = 0
        test_date = test[str(date)]
        
        for t in np.arange(1,len(test_date)+1,step):
            obs = test_date.iloc[st:t].values
            history.extend(obs)
            history = history[-window_size:]
            
            mod_updated = SARIMAX(history, order=arima_order, seasonal_order=sarima_order,enforce_invertibility=False,enforce_stationarity=False)
            mod_updated.initialize_known(est_state, est_state_cov)
            mod_frcst = mod_updated.smooth(est_params)

        
            yhat = mod_frcst.forecast(step)   
            predictions.extend(yhat)
            
            est_params = mod_frcst.params
            est_state = mod_frcst.predicted_state[:, -1]
            est_state_cov = mod_frcst.predicted_state_cov[:, :, -1]
            
            st = t
                
    return predictions

In [None]:
def sarima_forecast(train, test, arima_order, sarima_order, step):

    predictions = []
    
    for date in train.index.to_period('M').unique():
        print("Predicting : "+str(date))
        history = list(train[str(date)])
        test_steps = len(test[str(date)])
        print("Number of steps : "+str(test_steps))
        
        model = SARIMAX(history, order=arima_order, seasonal_order=sarima_order,enforce_invertibility=False,enforce_stationarity=False)
        model_fit = model.fit(disp=True,enforce_invertibility=False, method='powell', maxiter=200)
        yhat = model_fit.forecast(test_steps)      
        
        predictions.extend(yhat)
    
    return predictions

In [188]:
#Clean - SARIMA(2, 1, 2, 1, 1, 1)
#Residual - SARIMA(2, 0, 1, 1, 1, 1)
order = 1
step = 1
arima_order_clean = (2, 1, 2)
sarima_order_clean = (1, 1, 1, 61)
forecast_clean = sarima_forecast(train_clean_df[target_station], test_clean_df[target_station], arima_order_clean, sarima_order_clean, step)
forecast_clean = denormalize(forecast_clean, min_clean, max_clean)

Predicting : 2010-06
Number of steps : 549
Optimization terminated successfully.
         Current function value: -3.657188
         Iterations: 3
         Function evaluations: 331
Predicting : 2010-07
Number of steps : 610
Optimization terminated successfully.
         Current function value: -3.466340
         Iterations: 5
         Function evaluations: 539
Predicting : 2010-08
Number of steps : 610
Optimization terminated successfully.
         Current function value: -3.581201
         Iterations: 4
         Function evaluations: 464
Predicting : 2010-09
Number of steps : 549
Optimization terminated successfully.
         Current function value: -3.512425
         Iterations: 5
         Function evaluations: 553
Predicting : 2010-10
Number of steps : 610
Optimization terminated successfully.
         Current function value: -3.685367
         Iterations: 3
         Function evaluations: 323
Predicting : 2010-11
Number of steps : 549
Optimization terminated successfully.
         

In [189]:
arima_order_residual = (2, 0, 1)
sarima_order_residual = (1, 1, 1, 61)
forecast_residual = sarima_forecast(train_residual_df[target_station], test_residual_df[target_station], arima_order_residual, sarima_order_residual,step)
forecast_residual = denormalize(forecast_residual, min_residual, max_residual)

Predicting : 2010-06
Number of steps : 549
Optimization terminated successfully.
         Current function value: -1.139909
         Iterations: 4
         Function evaluations: 303
Predicting : 2010-07
Number of steps : 610
Optimization terminated successfully.
         Current function value: -0.954949
         Iterations: 6
         Function evaluations: 470
Predicting : 2010-08
Number of steps : 610
Optimization terminated successfully.
         Current function value: -1.135208
         Iterations: 5
         Function evaluations: 369
Predicting : 2010-09
Number of steps : 549
Optimization terminated successfully.
         Current function value: -1.059952
         Iterations: 4
         Function evaluations: 298
Predicting : 2010-10
Number of steps : 610
Optimization terminated successfully.
         Current function value: -1.192249
         Iterations: 4
         Function evaluations: 346
Predicting : 2010-11
Number of steps : 549
Optimization terminated successfully.
         

In [190]:
final_forecast = reconstruct_ssa_series(forecast_clean, forecast_residual)
rmse = calculate_rmse(test_df[target_station], final_forecast, order, step)

RMSE : 178.31337062654006


178.31337062654006

In [194]:
result = {'rmse': rmse, 'final': final_forecast, 'clean': forecast_clean, 'residual': forecast_residual}

RMSE : 178.31337062654006


In [199]:
save_obj(result, name="wind_sarima_1")

## Vector Autoregressive - VAR

In [24]:
from statsmodels.tsa.api import VAR, DynamicVAR

In [25]:
def var_forecast(train, test, target, order, step):
    model = VAR(train.values)
    results = model.fit(maxlags=order)
    lag_order = results.k_ar
    print("Lag order:" + str(lag_order))
    forecast = []

    for i in np.arange(0,len(test)-lag_order+1,step) :
        forecast.extend(results.forecast(test.values[i:i+lag_order],step))

    forecast_df = pd.DataFrame(columns=test.columns, data=forecast)
    return forecast_df[target].values

In [26]:
# Clean = VAR(2)
# Residual = VAR(4)

var_order = 4
step = 1

forecast_clean = var_forecast(train_clean_df[neighbor_stations_90], test_clean_df[neighbor_stations_90], target_station, var_order, step)
forecast_clean = denormalize(forecast_clean, min_clean, max_clean)

Lag order:4


In [27]:
forecast_residual = var_forecast(train_residual_df[neighbor_stations_90], test_residual_df[neighbor_stations_90], target_station, var_order, step)
forecast_residual = denormalize(forecast_residual, min_residual, max_residual)

Lag order:4


In [28]:
final_forecast = reconstruct_ssa_series(forecast_clean, forecast_residual)
rmse = calculate_rmse(test_df[target_station], final_forecast, var_order, step)

RMSE : 0.42108261396600716


In [29]:
result = {'rmse': rmse, 'final': final_forecast, 'clean': forecast_clean, 'residual': forecast_residual}
save_obj(result, name="wind_var_1")

## Long Short Term Memory - LSTM

In [30]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

Using TensorFlow backend.


## Multivariate LSTM

In [31]:
# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [32]:
def lstm_multi_forecast(train_df, test_df, _order, _steps, _neurons, _epochs):

    
    nfeat = len(train_df.columns)
    nlags = _order
    nsteps = _steps
    nobs = nlags * nfeat
    
    train_reshaped_df = series_to_supervised(train_df, n_in=nlags, n_out=nsteps)
    train_X, train_Y = train_reshaped_df.iloc[:,:nobs].values, train_reshaped_df.iloc[:,-nfeat].values
    train_X = train_X.reshape((train_X.shape[0], nlags, nfeat))
    
    test_reshaped_df = series_to_supervised(test_df, n_in=nlags, n_out=nsteps)
    test_X, test_Y = test_reshaped_df.iloc[:,:nobs].values, test_reshaped_df.iloc[:,-nfeat].values
    test_X = test_X.reshape((test_X.shape[0], nlags, nfeat))
    
    # design network
    model = Sequential()
    model.add(LSTM(_neurons, input_shape=(train_X.shape[1], train_X.shape[2])))
    model.add(Dense(1))
    model.compile(loss='mae', optimizer='adam')
    
    # fit network
    model.fit(train_X, train_Y, epochs=_epochs, batch_size=72, verbose=False, shuffle=False)
    
    forecast = model.predict(test_X)
        
    return forecast

In [33]:
neurons = 100
lstm_order = 4
epochs = 100
steps = 1

forecast_clean = lstm_multi_forecast(train_clean_df[neighbor_stations_90], test_clean_df[neighbor_stations_90], lstm_order, steps, neurons, epochs)
forecast_clean = denormalize(forecast_clean, min_clean, max_clean)

In [34]:
forecast_residual = lstm_multi_forecast(train_residual_df[neighbor_stations_90], test_residual_df[neighbor_stations_90], lstm_order, steps, neurons, epochs)
forecast_residual = denormalize(forecast_residual, min_residual, max_residual)

In [35]:
final_forecast = reconstruct_ssa_series(forecast_clean, forecast_residual)
final_forecast.append(0) ## para manter o mesmo tamanho dos demais

In [36]:
rmse = calculate_rmse(test_df[target_station], final_forecast, lstm_order, step)

RMSE : 0.4330875880143308


In [37]:
result = {'rmse': rmse, 'final': final_forecast, 'clean': forecast_clean, 'residual': forecast_residual}
save_obj(result, name="wind_lstm_multi_1")

## LSTM - Univariate

In [38]:
neurons = 50
lstm_order = 4
epochs = 100
steps = 1

forecast_clean = lstm_multi_forecast(train_clean_df[[target_station]], test_clean_df[[target_station]], lstm_order, steps, neurons, epochs)
forecast_clean = denormalize(forecast_clean, min_clean, max_clean)

forecast_residual = lstm_multi_forecast(train_residual_df[[target_station]], test_residual_df[[target_station]], lstm_order, steps, neurons, epochs)
forecast_residual = denormalize(forecast_residual, min_residual, max_residual)

final_forecast = reconstruct_ssa_series(forecast_clean, forecast_residual)
final_forecast.append(0) ## para manter o mesmo tamanho dos demais

rmse = calculate_rmse(test_df[target_station], final_forecast, lstm_order, step)

RMSE : 0.4381514919938163


In [39]:
result = {'rmse': rmse, 'final': final_forecast, 'clean': forecast_clean, 'residual': forecast_residual}
save_obj(result, name="wind_lstm_uni_1")

## Multi Layer Perceptron - MLP

In [40]:
# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [41]:
def mlp_forecast(train_df, test_df, _order, _steps, _neurons, _epochs):

    
    nfeat = len(train_df.columns)
    nlags = _order
    nsteps = _steps
    nobs = nlags * nfeat
    
    train_reshaped_df = series_to_supervised(train_df, n_in=nlags, n_out=nsteps)
    train_X, train_Y = train_reshaped_df.iloc[:,:nobs].values, train_reshaped_df.iloc[:,-nfeat].values
    
    test_reshaped_df = series_to_supervised(test_df, n_in=nlags, n_out=nsteps)
    test_X, test_Y = test_reshaped_df.iloc[:,:nobs].values, test_reshaped_df.iloc[:,-nfeat].values
    
    # design network
    model = Sequential()
    model.add(Dense(neurons, activation='relu', input_dim=train_X.shape[1]))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')

    # fit network
    history = model.fit(train_X, train_Y, epochs=_epochs, batch_size=72, verbose=False, shuffle=False)   

    forecast = model.predict(test_X)
        
    return forecast

In [42]:
neurons = 90
mlp_order = 3
epochs = 100
steps = 1

forecast_clean = mlp_forecast(train_clean_df[neighbor_stations_90], test_clean_df[neighbor_stations_90], mlp_order, steps, neurons, epochs)
forecast_clean = denormalize(forecast_clean, min_clean, max_clean)

In [43]:
forecast_residual = mlp_forecast(train_residual_df[neighbor_stations_90], test_residual_df[neighbor_stations_90], mlp_order, steps, neurons, epochs)
forecast_residual = denormalize(forecast_residual, min_residual, max_residual)

In [44]:
final_forecast = reconstruct_ssa_series(forecast_clean, forecast_residual)
final_forecast.append(0) ## para manter o mesmo tamanho dos demais

In [45]:
rmse = calculate_rmse(test_df[target_station], final_forecast, mlp_order, steps)

RMSE : 0.43929870532978144


In [46]:
result = {'rmse': rmse, 'final': final_forecast, 'clean': forecast_clean, 'residual': forecast_residual}
save_obj(result, name="wind_mlp_multi_1")

## Univariate MLP

In [47]:
neurons = 50
mlp_order = 4
epochs = 100
steps = 1

forecast_clean = mlp_forecast(train_clean_df[[target_station]], test_clean_df[[target_station]], mlp_order, steps, neurons, epochs)
forecast_clean = denormalize(forecast_clean, min_clean, max_clean)

forecast_residual = mlp_forecast(train_residual_df[[target_station]], test_residual_df[[target_station]], mlp_order, steps, neurons, epochs)
forecast_residual = denormalize(forecast_residual, min_residual, max_residual)

final_forecast = reconstruct_ssa_series(forecast_clean, forecast_residual)
final_forecast.append(0) ## para manter o mesmo tamanho dos demais

rmse = calculate_rmse(test_df[target_station], final_forecast, mlp_order, steps)

RMSE : 0.4424553425781263


In [48]:
result = {'rmse': rmse, 'final': final_forecast, 'clean': forecast_clean, 'residual': forecast_residual}
save_obj(result, name="wind_mlp_uni_1")

## High Order FTS

In [49]:
from pyFTS.partitioners import Grid, Entropy, Util as pUtil
from pyFTS.models import hofts
from pyFTS.common import Transformations

In [50]:
def hofts_forecast(train_df, test_df, _order, _partitioner, _npartitions):
    
    fuzzy_sets = _partitioner(data=train_df.values, npart=_npartitions)
    model_simple_hofts = hofts.HighOrderFTS()
    

    model_simple_hofts.fit(train_df.values, order=_order, partitioner=fuzzy_sets)

    
    forecast = model_simple_hofts.predict(test_df.values)

    return forecast

In [51]:
hofts_order = 3
partitioner = Grid.GridPartitioner
nparts = 80


forecast_clean = hofts_forecast(train_clean_df[target_station], test_clean_df[target_station], hofts_order, partitioner, nparts)
forecast_clean = denormalize(forecast_clean, min_clean, max_clean)

In [52]:
forecast_residual = hofts_forecast(train_residual_df[target_station], test_residual_df[target_station], hofts_order, partitioner, nparts)
forecast_residual = denormalize(forecast_residual, min_residual, max_residual)

In [53]:
step = 1
final_forecast = reconstruct_ssa_series(forecast_clean, forecast_residual)
rmse = calculate_rmse(test_df[target_station], final_forecast, hofts_order, step)

RMSE : 0.5398536727385147


In [54]:
result = {'rmse': rmse, 'final': final_forecast, 'clean': forecast_clean, 'residual': forecast_residual}
save_obj(result, name="wind_hofts_1")

## Clustered Multivariate

In [55]:
from models import KMeansPartitioner
from models import sthofts

In [301]:
import importlib
importlib.reload(sthofts)

<module 'models.sthofts' from '/Users/cseveriano/Google Drive/Doutorado/Codes/spatio-temporal-forecasting/src/models/sthofts.py'>

In [56]:
def sthofts_forecast(train_df, test_df, target, _order, npartitions):
    
    _partitioner = KMeansPartitioner.KMeansPartitioner(data=train_df.values, npart=npartitions, batch_size=1000, init_size=npartitions*3)
    model_sthofts = sthofts.SpatioTemporalHighOrderFTS()
    
    model_sthofts.fit(train_df.values, dump = 'time', num_batches=100, order=_order, partitioner=_partitioner)
    forecast = model_sthofts.predict(test_df.values)
    forecast_df = pd.DataFrame(data=forecast, columns=test_df.columns)
    return forecast_df[target].values

In [57]:
sthofts_order = 4
nparts = 20


forecast_clean = sthofts_forecast(train_clean_df[neighbor_stations_90], test_clean_df[neighbor_stations_90], target_station, sthofts_order, nparts)
forecast_clean = denormalize(forecast_clean, min_clean, max_clean)

[ 16:05:30] Start training
[ 16:05:30] Starting batch 1
[ 16:05:32] Finish batch 1
[ 16:05:32] Starting batch 2
[ 16:05:35] Finish batch 2
[ 16:05:35] Starting batch 3
[ 16:05:38] Finish batch 3
[ 16:05:38] Starting batch 4
[ 16:05:41] Finish batch 4
[ 16:05:41] Starting batch 5
[ 16:05:44] Finish batch 5
[ 16:05:44] Starting batch 6
[ 16:05:47] Finish batch 6
[ 16:05:47] Starting batch 7
[ 16:05:50] Finish batch 7
[ 16:05:50] Starting batch 8
[ 16:05:53] Finish batch 8
[ 16:05:53] Starting batch 9
[ 16:05:56] Finish batch 9
[ 16:05:56] Starting batch 10
[ 16:05:59] Finish batch 10
[ 16:05:59] Starting batch 11
[ 16:06:02] Finish batch 11
[ 16:06:02] Starting batch 12
[ 16:06:04] Finish batch 12
[ 16:06:04] Starting batch 13
[ 16:06:07] Finish batch 13
[ 16:06:07] Starting batch 14
[ 16:06:10] Finish batch 14
[ 16:06:10] Starting batch 15
[ 16:06:13] Finish batch 15
[ 16:06:13] Starting batch 16
[ 16:06:16] Finish batch 16
[ 16:06:16] Starting batch 17
[ 16:06:19] Finish batch 17
[ 16:

In [58]:
forecast_residual = sthofts_forecast(train_residual_df[neighbor_stations_90], test_residual_df[neighbor_stations_90], target_station, sthofts_order, nparts)
forecast_residual = denormalize(forecast_residual, min_residual, max_residual)

[ 16:12:52] Start training
[ 16:12:52] Starting batch 1
[ 16:12:59] Finish batch 1
[ 16:12:59] Starting batch 2
[ 16:13:06] Finish batch 2
[ 16:13:06] Starting batch 3
[ 16:13:13] Finish batch 3
[ 16:13:13] Starting batch 4
[ 16:13:21] Finish batch 4
[ 16:13:21] Starting batch 5
[ 16:13:26] Finish batch 5
[ 16:13:26] Starting batch 6
[ 16:13:33] Finish batch 6
[ 16:13:33] Starting batch 7
[ 16:13:41] Finish batch 7
[ 16:13:41] Starting batch 8
[ 16:13:47] Finish batch 8
[ 16:13:47] Starting batch 9
[ 16:13:53] Finish batch 9
[ 16:13:53] Starting batch 10
[ 16:13:58] Finish batch 10
[ 16:13:58] Starting batch 11
[ 16:14:06] Finish batch 11
[ 16:14:06] Starting batch 12
[ 16:14:13] Finish batch 12
[ 16:14:13] Starting batch 13
[ 16:14:21] Finish batch 13
[ 16:14:21] Starting batch 14
[ 16:14:29] Finish batch 14
[ 16:14:29] Starting batch 15
[ 16:14:35] Finish batch 15
[ 16:14:35] Starting batch 16
[ 16:14:42] Finish batch 16
[ 16:14:42] Starting batch 17
[ 16:14:48] Finish batch 17
[ 16:

In [59]:
step = 1
final_forecast = reconstruct_ssa_series(forecast_clean, forecast_residual)
rmse = calculate_rmse(test_df[target_station], final_forecast, sthofts_order, step)

RMSE : 0.7407508188869937


In [60]:
result = {'rmse': rmse, 'final': final_forecast, 'clean': forecast_clean, 'residual': forecast_residual}
save_obj(result, name="wind_sthofts_1")

## Conditional Variance FTS - CVFTS

In [61]:
from pyFTS.models.nonstationary import cvfts
from pyFTS.models.nonstationary import partitioners as nspartitioners

In [65]:
def cvfts_forecast(train, test, _partitioner,_partitions):
    
    fuzzy_sets =  nspartitioners.PolynomialNonStationaryPartitioner(data=train.values, part=_partitioner(data=train.values, npart=_partitions), degree=2)
                    
    model_cvfts = cvfts.ConditionalVarianceFTS()
    model_cvfts.fit(train.values, parameters=1, partitioner=fuzzy_sets)

    forecast = model_cvfts.predict(test.values)

    return forecast

In [66]:
partitioner = Grid.GridPartitioner
nparts = 90


forecast_clean = cvfts_forecast(train_clean_df[target_station], test_clean_df[target_station], partitioner, nparts)
forecast_clean = denormalize(forecast_clean, min_clean, max_clean)

forecast_residual = cvfts_forecast(train_residual_df[target_station], test_residual_df[target_station], partitioner, nparts)
forecast_residual = denormalize(forecast_residual, min_residual, max_residual)

In [67]:
step = 1
final_forecast = reconstruct_ssa_series(forecast_clean, forecast_residual)
rmse = calculate_rmse(test_df[target_station], final_forecast, 1, step)

RMSE : 0.5781277828227142


  


In [68]:
result = {'rmse': rmse, 'final': final_forecast, 'clean': forecast_clean, 'residual': forecast_residual}
save_obj(result, name="wind_cvfts_1")