In [2]:
from sklearn.cluster import MeanShift, estimate_bandwidth

import pandas as pd
import numpy as np
from models import KMeansPartitioner
from sklearn import preprocessing
from pyFTS.partitioners import Grid, Util as pUtil
from pyFTS.models import hofts

from models import sthofts
import matplotlib.pyplot as plt

# Funções Auxiliares

In [3]:
def normalized_rmse(targets, forecasts):
    if isinstance(targets, list):
        targets = np.array(targets)
    if isinstance(forecasts, list):
        forecasts = np.array(forecasts)
    return ((np.sqrt(np.nanmean((targets - forecasts) ** 2))) / np.nanmean(targets) ) * 100

In [4]:
def normalize(df):
    mindf = df.min()
    maxdf = df.max()
    return (df-mindf)/(maxdf-mindf)

In [5]:
def denormalize(norm, _min, _max):
    return [(n * (_max-_min)) + _min for n in norm]

# Base de Dados
Montagem de casos de treinamento, validação e testes

In [6]:
df = pd.read_pickle("df_oahu.pkl")
df_ssa_clean = pd.read_pickle("df_ssa_clean.pkl")
df_ssa_residual = pd.read_pickle("df_ssa_residual.pkl")

In [8]:
interval = '2010-11'

sample_df = df.loc[interval]
residual_sample_df = df_ssa_residual.loc[interval]
clean_sample_df = df_ssa_clean.loc[interval]

norm_residual_sample_df = normalize(residual_sample_df)
norm_clean_sample_df = normalize(clean_sample_df)


week = (sample_df.index.day - 1) // 7 + 1

# PARA OS TESTES:
# 2 SEMANAS PARA TREINAMENTO
train_df = sample_df.loc[week <= 2]
train_residual_df = norm_residual_sample_df.loc[week <= 2]
train_clean_df = norm_clean_sample_df.loc[week <= 2]

# 1 SEMANA PARA VALIDACAO
validation_df = sample_df.loc[week == 3]
validation_residual_df = norm_residual_sample_df.loc[week == 3]
validation_clean_df = norm_clean_sample_df.loc[week == 3]

# 1 SEMANA PARA TESTES
test_df = sample_df.loc[week > 3]
test_residual_df = norm_residual_sample_df.loc[week > 3]
test_clean_df = norm_clean_sample_df.loc[week > 3]

# Avaliação de Modelos

## 1 - Spatio-temporal High Order FTS 

In [9]:
def sthofts_forecast(_order, npartitions, col, train, validation):
    
    fuzzy_sets = KMeansPartitioner.KMeansPartitioner(data=train, npart=npartitions, batch_size=1000, init_size=npartitions*3)
    model_sthofts = sthofts.SpatioTemporalHighOrderFTS("FTS", nlags=_order, partitioner=fuzzy_sets)
    
    model_sthofts.fit(np.array(train.values), dump = 'time', num_batches=100)
    forecast_sthofts = model_hofts.predict(np.array(validation.values))
    forecast_hofts_df = pd.DataFrame(data=forecast_hofts, columns=validation.columns)
    return forecast_hofts_df[col].values

In [None]:
    eval_order_list = np.arange(1,3)
    partitions_list = np.arange(10,100,10)


In [None]:
forecast = sthofts_forecast(_order, npartitions, col, train_df, validation_df)

In [None]:
norm_residual_forecast = sthofts_forecast(_order, npartitions, col, train_residual_df, validation_residual_df)
norm_clean_forecast = sthofts_forecast(_order, npartitions, col, train_clean_df, validation_clean_df)

residual_forecast = denormalize(norm_residual_forecast, residual_sample_df[col].min(), residual_sample_df[col].max())
clean_forecast = denormalize(norm_clean_forecast, clean_sample_df[col].min(), clean_sample_df[col].max())

forecast = [r + c for r, c in zip(residual_forecast,clean_forecast)]

In [None]:
y_est = forecast
y_obs = validation_df['AP_1'].values

#_nrmse = normalized_rmse(y_obs[(_order-1):], y_est)
_nrmse = normalized_rmse(y_obs[_order:], y_est[:-1])
print("nRMSE: ", _nrmse, "\n")

plt.figure(figsize=(20,10))
plt.plot( y_obs[_order:])
plt.plot(y_est[:-1])

In [None]:
train = np.array(train_df.values)
validation = np.array(validation_df.values)

k = 20

fuzzy_sets = KMeansPartitioner.KMeansPartitioner(data=train, npart=k, batch_size=1000, init_size=k*3)

_order = 6

model_hofts = sthofts.SpatioTemporalHighOrderFTS("FTS", nlags=_order, partitioner=fuzzy_sets)


In [None]:
model_hofts.fit(train, dump = 'time', num_batches=100)
#model_hofts.fit(train, dump = 'time', num_batches=100, distributed=True, nodes=['192.168.1.3','192.168.1.8'])
#model_hofts.fit(train, dump = 'time', num_batches=100, distributed=True, nodes=['192.168.1.3'])




In [None]:
forecast_hofts = model_hofts.predict(validation)

In [None]:
norm_frcst_df = pd.DataFrame(data=forecast_hofts, columns=df.columns)
denormalized_frcst_df = (norm_frcst_df * (sample_df.max()-sample_df.min())) + sample_df.min()

In [None]:
forecast_hofts_df = pd.DataFrame(data=forecast_hofts, columns=df.columns)

In [None]:
len(forecast_hofts_df[col].values)

In [None]:
y_obs = validation_df[col].values
y_est = forecast_hofts_df[col].values

#_nrmse = normalized_rmse(y_obs[(_order-1):], y_est)
_nrmse = normalized_rmse(y_obs[_order:], y_est[:-1])
print("nRMSE: ", _nrmse, "\n")
plt.figure(figsize=(20,10))
plt.plot( y_obs[_order:])
plt.plot(y_est[:-1])

In [None]:

_nrmse = normalized_rmse(y_obs[(_order - 1):], y_est)
print("nRMSE: ", _nrmse, "\n")

# Simple HOFTS

In [None]:
from pyFTS.models.multivariate import common, variable, mvfts
from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime
from pyFTS.partitioners import Grid, Util as pUtil
from pyFTS.models.multivariate import common, variable, mvfts
from pyFTS.models import hofts
from pyFTS.common import Transformations
tdiff = Transformations.Differential(1)

In [None]:
def hofts_forecast(_order, npartitions, col, train, validation):
    
    fuzzy_sets = Grid.GridPartitioner(data=train[col].values, npart=npartitions)
    model_simple_hofts = hofts.HighOrderFTS("FTS", partitioner=fuzzy_sets)
    
    #model_simple_hofts.append_transformation(Transformations.Differential(1))
    model_simple_hofts.fit(train[col].values, order=_order)
    
    return model_simple_hofts.predict(validation[col].values)

In [None]:
_order = 6
npartitions = 70
col = 'AP_1'


norm_residual_forecast = hofts_forecast(_order, npartitions, col, train_residual_df, validation_residual_df)
norm_clean_forecast = hofts_forecast(_order, npartitions, col, train_clean_df, validation_clean_df)

residual_forecast = denormalize(norm_residual_forecast, residual_sample_df[col].min(), residual_sample_df[col].max())
clean_forecast = denormalize(norm_clean_forecast, clean_sample_df[col].min(), clean_sample_df[col].max())

forecast = [r + c for r, c in zip(residual_forecast,clean_forecast)]

In [None]:
#forecast = hofts_forecast(_order, npartitions, col, train_df, validation_df)

In [None]:
y_est = forecast
y_obs = validation_df['AP_1'].values

#_nrmse = normalized_rmse(y_obs[(_order-1):], y_est)
_nrmse = normalized_rmse(y_obs[_order:], y_est[:-1])
print("nRMSE: ", _nrmse, "\n")

In [None]:
plt.figure(figsize=(20,10))
plt.plot( y_obs[_order:])
plt.plot(y_est[:-1])

#plt.plot( y_obs[_order-1:])
#plt.plot(y_est)

In [None]:
len(y_est)

# Persistence

In [None]:
def persistence_forecast(data, data_clean, order):
    l = len(data)
    d = list(data)
    dc = list(data_clean)
    fcst = []
    for k in np.arange(order, l):
        irr = d[k-1]
        irr_clean = dc[k-1]
        irr_clean_nxt = dc[k]
        
        irr_nxt = (irr/irr_clean) * irr_clean_nxt
        fcst.append(irr_nxt)
    return fcst
        
    

In [None]:
list(validation_df[col])

In [None]:


fc = persistence_forecast(validation_df[col], validation_clean_df[col], _order)

In [None]:
_nrmse = normalized_rmse(y_obs[(_order):], fc)
print("nRMSE: ", _nrmse, "\n")

In [None]:
plt.figure(figsize=(20,10))
plt.plot( y_obs[_order:])
plt.plot(fc)


# Multivariate

In [None]:
def add_date(df):
    df_mv = df.copy()
    df_mv['date'] = df.index
    return df_mv

In [None]:
train_mv = add_date(train_df)
train_residual_mv = add_date(train_residual_df)
train_clean_mv = add_date(train_clean_df)

validation_mv = add_date(validation_df)
validation_residual_mv = add_date(validation_residual_df)
validation_clean_mv = add_date(validation_clean_df)

In [None]:
from pyFTS.models.multivariate import common, variable, mvfts
from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime

#fig, axes = plt.subplots(nrows=2, ncols=1,figsize=[15,10])

sp = {'seasonality': DateTime.minute_of_day}

vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24, 
                          data=train_mv, partitioner_specific=sp)

vavg = variable.Variable("Irradiance", data_label=col, partitioner=Grid.GridPartitioner, npart=50, 
                         data=train_mv) 
#vhour.partitioner.plot(axes[1])

#plt.tight_layout()


In [None]:
from pyFTS.models.multivariate import common, variable, mvfts

model1 = mvfts.MVFTS("")

model1.append_variable(vhour)

model1.append_variable(vavg)

model1.target_variable = vavg

model1.fit(train_mv)

In [None]:
forecast = model1.predict(validation_mv)

In [None]:
y_est = forecast
y_obs = validation_df['AP_1'].values

#_nrmse = normalized_rmse(y_obs[(_order-1):], y_est)
_nrmse = normalized_rmse(y_obs[1:], y_est[:-1])
print("nRMSE: ", _nrmse, "\n")
plt.figure(figsize=(20,10))
plt.plot( y_obs[1:])
plt.plot(y_est[:-1])

# SARIMA

In [None]:
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from itertools import product
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Initial approximation of parameters
ps = range(0, 2)
ds = range(0, 2)
qs = range(0, 2)
Ps = range(0, 2)
Ds = range(0, 2)
Qs = range(0, 2)
D=1
d=1
parameters = product(ps, ds, qs, Ps, Ds, Qs)
parameters_list = list(parameters)

print("Num combinations: ", len(parameters_list))
# Model Selection
results = []
best_aic = float("inf")
warnings.filterwarnings('ignore')
for param in parameters_list:
    print(" Testing combination: ", param)
    try:
        model = SARIMAX(train_df[col].values, order=(param[0], param[1], param[2]), seasonal_order=(param[3], param[4], param[5], 96)).fit(disp=-1)
    except ValueError:
        print('bad parameter combination:', param)
        continue
    aic = model.aic
    if aic < best_aic:
        best_model = model
        best_aic = aic
        best_param = param
    results.append([param, model.aic])

In [None]:
model = SARIMAX(train_df[col].values, order=(3, 1, 1), seasonal_order=(1, 1, 1, 24)).fit()


In [None]:
training_mod = SARIMAX(train_df[col].values, order=(6, 1, 1), seasonal_order=(1, 1, 1, 96))
training_res = training_mod.fit()

whole_data = train_df.append(validation_df)
test_data = validation_df

mod = SARIMAX(whole_data[col].values, order=(6, 1, 1), seasonal_order=(1, 1, 1, 96))
res = mod.filter(training_res.params)

insample = res.predict()
T = len(test_data)

In [None]:
wlen = len(whole_data)
tlen = len(test_data)

forecast = insample[wlen-tlen:]

In [None]:
y_est = forecast
y_obs = validation_df['AP_1'].values

_nrmse = normalized_rmse(y_obs, y_est)
#_nrmse = normalized_rmse(y_obs[1:], y_est[:-1])
print("nRMSE: ", _nrmse, "\n")
plt.figure(figsize=(20,10))
plt.plot( y_obs)
plt.plot(y_est)