In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

from numpy.fft import fft, ifft
import os
import sys

In [2]:
## Mac
#os.chdir("/Users/cseveriano/spatio-temporal-forecasting/")

## Windows
os.chdir("C:\\Users\\cseve\\Google Drive\\Doutorado\\Codes\\spatio-temporal-forecasting")
#sys.path.append('C:\\Users\\cseve\\Google Drive\\Doutorado\\Codes\\spatio-temporal-forecasting\\src\\ext-libraries\\pyFTS')

################################

fln_df = pd.read_csv('data/processed/SONDA/FLN-15min.csv', sep=";", parse_dates=['date'], index_col='date')
fln_df = fln_df[(fln_df.index >= '2013-11-01') & (fln_df.index <= '2015-11-01')]
fln_df = fln_df.fillna(method='ffill')

joi_df = pd.read_csv('data/processed/SONDA/JOI-15min.csv', sep=";", parse_dates=['date'], index_col='date')
joi_df = joi_df[(joi_df.index >= '2013-11-01') & (joi_df.index <= '2015-11-01')]
joi_df = joi_df.fillna(method='ffill')

sbr_df = pd.read_csv('data/processed/SONDA/SBR-15min.csv', sep=";", parse_dates=['date'], index_col='date')
sbr_df = sbr_df[(sbr_df.index >= '2013-11-01') & (sbr_df.index <= '2015-11-01')]
sbr_df = sbr_df.fillna(method='ffill')

In [3]:
fln_df = fln_df.resample('H').mean()
joi_df = joi_df.resample('H').mean()
sbr_df = sbr_df.resample('H').mean()

In [4]:
def remove_periodic(X, df_index, detrending=True):
    rad = np.array(X)
    
    if detrending:
        det_rad = rad - np.average(rad)
    else:
        det_rad = rad
    
    det_rad_fft = fft(det_rad)

    # Get the power spectrum
    rad_ps = [np.abs(rd)**2 for rd in det_rad_fft]
    
    frequency_threshold = 0.1e12
    clean_rad_fft = [det_rad_fft[i] if rad_ps[i] > frequency_threshold else 0 
                     for i in range(len(det_rad_fft))]
    
    rad_series_clean = ifft(clean_rad_fft)
    rad_series_clean = [value.real for value in rad_series_clean]
    
    if detrending:
        rad_trends = rad_series_clean + np.average(rad)
    
    rad_clean_ts = pd.Series(rad_trends, index=df_index)
    
    rad_clean_ts[(rad_clean_ts.index.hour < 6) | (rad_clean_ts.index.hour > 20)] = 0
    
    return rad - rad_clean_ts.values, rad_clean_ts.values

In [5]:
fln_residual, fln_clean = remove_periodic(fln_df['glo_avg'],fln_df.index)
joi_residual, joi_clean = remove_periodic(joi_df['glo_avg'],joi_df.index)
sbr_residual, sbr_clean = remove_periodic(sbr_df['glo_avg'],sbr_df.index)

In [6]:
fln_df = pd.DataFrame(data={'glo_avg': fln_df.glo_avg.tolist(), 'residual': fln_residual, 'clean': fln_clean, 'date' : fln_df.index}, index=fln_df.index)
joi_df = pd.DataFrame(data={'glo_avg': joi_df.glo_avg.tolist(), 'residual': joi_residual, 'clean': joi_clean, 'date' : joi_df.index}, index=joi_df.index)
sbr_df = pd.DataFrame(data={'glo_avg': sbr_df.glo_avg.tolist(), 'residual': sbr_residual, 'clean': sbr_clean, 'date' : sbr_df.index}, index=sbr_df.index)

In [7]:
fln_train = fln_df[(fln_df.index >= '2013-11-01') & (fln_df.index <= '2014-10-31')]
fln_test = fln_df[(fln_df.index >= '2014-11-01') & (fln_df.index <= '2015-10-31')]

joi_train = joi_df[(joi_df.index >= '2013-11-01') & (joi_df.index <= '2014-10-31')]
joi_test = joi_df[(joi_df.index >= '2014-11-01') & (joi_df.index <= '2015-10-31')]

sbr_train = sbr_df[(sbr_df.index >= '2013-11-01') & (sbr_df.index <= '2014-10-31')]
sbr_test = sbr_df[(sbr_df.index >= '2014-11-01') & (sbr_df.index <= '2015-10-31')]

In [8]:
from pyFTS.models.multivariate import common, variable, mvfts
from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime
from pyFTS.partitioners import Grid, Util as pUtil
from pyFTS.models.multivariate import common, variable, mvfts

In [9]:
sp = {'seasonality': DateTime.day_of_year , 'names': ['Jan','Feb','Mar','Apr','May','Jun','Jul', 'Aug','Sep','Oct','Nov','Dec']}
vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=12, 
                           data=fln_train, partitioner_specific=sp)

sp = {'seasonality': DateTime.minute_of_day}
vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24, 
                          data=fln_train, partitioner_specific=sp)

vavg = variable.Variable("Irradiance", data_label="glo_avg", partitioner=Grid.GridPartitioner, npart=60, 
                         data=fln_train) 

In [10]:
from pyFTS.models.multivariate import common, variable, mvfts

model1 = mvfts.MVFTS("")

model1.append_variable(vmonth)

model1.append_variable(vhour)

model1.append_variable(vavg)

model1.target_variable = vavg

model1.fit(fln_train, num_batches=200, save=True, batch_save=True, file_path='mvfts_sonda_fln_gloavg', batch_save_interval=10)

[ 13:12:02] Start training
[ 13:12:02] Starting batch 1
[ 13:12:02] Finish batch 1
[ 13:12:02] Starting batch 2
[ 13:12:02] Finish batch 2
[ 13:12:02] Starting batch 3
[ 13:12:02] Finish batch 3
[ 13:12:02] Starting batch 4
[ 13:12:03] Finish batch 4
[ 13:12:03] Starting batch 5
[ 13:12:03] Finish batch 5
[ 13:12:03] Starting batch 6
[ 13:12:03] Finish batch 6
[ 13:12:03] Starting batch 7
[ 13:12:03] Finish batch 7
[ 13:12:03] Starting batch 8
[ 13:12:03] Finish batch 8
[ 13:12:03] Starting batch 9
[ 13:12:03] Finish batch 9
[ 13:12:03] Starting batch 10
[ 13:12:04] Finish batch 10
[ 13:12:04] Starting batch 11
[ 13:12:04] Finish batch 11
[ 13:12:04] Starting batch 12
[ 13:12:04] Finish batch 12
[ 13:12:04] Starting batch 13
[ 13:12:04] Finish batch 13
[ 13:12:04] Starting batch 14
[ 13:12:05] Finish batch 14
[ 13:12:05] Starting batch 15
[ 13:12:05] Finish batch 15
[ 13:12:05] Starting batch 16
[ 13:12:05] Finish batch 16
[ 13:12:05] Starting batch 17
[ 13:12:05] Finish batch 17
[ 13:

In [20]:
from pyFTS.common import Util

model1 = Util.load_obj('mvfts_sonda_fln_gloavg')
model1.name = "MVFTS Normal"

In [21]:
forecasted1 = model1.predict(fln_test)

In [22]:
forecasted_final = forecasted1

In [14]:
forecasted_final = forecasted1 + fln_test.clean

In [23]:
from pyFTS.benchmarks import Measures

_rmse = Measures.rmse(fln_test.glo_avg.tolist(), forecasted1)

#forecasted1

print("RMSE: ", _rmse, "\n")

RMSE:  88.4437258762 



In [24]:
def normalized_rmse(targets, forecasts):
    if isinstance(targets, list):
        targets = np.array(targets)
    if isinstance(forecasts, list):
        forecasts = np.array(forecasts)
    return ((np.sqrt(np.nanmean((targets - forecasts) ** 2))) / np.nanmean(targets) ) * 100


In [25]:
_nrmse = normalized_rmse(fln_test.glo_avg.tolist(), forecasted1)
print("nRMSE: ", _nrmse, "\n")

nRMSE:  51.5096653512 



In [26]:
from pyFTS.models import hofts

train = fln_train.residual

fuzzy_sets = Grid.GridPartitioner(data=train, npart=100)
model_hofts = hofts.HighOrderFTS("FTS", partitioner=fuzzy_sets)
model_hofts.fit(train, order=6)

[ 16:26:55] Start training
[ 16:26:55] Starting batch 1
[ 16:27:12] Finish batch 1
[ 16:27:12] Starting batch 2
[ 16:27:30] Finish batch 2
[ 16:27:30] Starting batch 3
[ 16:27:47] Finish batch 3
[ 16:27:47] Starting batch 4
[ 16:28:08] Finish batch 4
[ 16:28:08] Starting batch 5
[ 16:28:25] Finish batch 5
[ 16:28:25] Starting batch 6
[ 16:28:42] Finish batch 6
[ 16:28:42] Starting batch 7
[ 16:29:00] Finish batch 7
[ 16:29:00] Starting batch 8
[ 16:29:21] Finish batch 8
[ 16:29:21] Starting batch 9
[ 16:29:38] Finish batch 9
[ 16:29:38] Starting batch 10
[ 16:29:54] Finish batch 10
[ 16:29:54] Starting batch 11
[ 16:29:54] Finish batch 11
[ 16:29:54] Finish training


In [28]:
forecast_hofts = model_hofts.predict(fln_test.residual)

In [30]:
forecasted_final = forecast_hofts + fln_test[5:].clean

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1,figsize=[20,5])

test = fln_test[5:100]
fcst = forecasted_final[:95]
ax.plot(test['date'].values, test['glo_avg'].values)
ax.plot(test['date'].values, fcst)

In [31]:
_nrmse = normalized_rmse(fln_test.glo_avg.tolist()[5:], forecasted_final)
print("nRMSE: ", _nrmse, "\n")

nRMSE:  12.6151151411 



## Vector Autoregressive (VAR)

In [33]:
import statsmodels.api as sm
from statsmodels.tsa.api import VAR, DynamicVAR

In [35]:
mdata = sm.datasets.macrodata.load_pandas().data
dates = mdata[['year', 'quarter']].astype(int).astype(str)
quarterly = dates["year"] + "Q" + dates["quarter"]
from statsmodels.tsa.base.datetools import dates_from_str
quarterly = dates_from_str(quarterly)
mdata = mdata[['realgdp','realcons','realinv']]
mdata.index = pd.DatetimeIndex(quarterly)
data = np.log(mdata).diff().dropna()

In [72]:
train_df = pd.DataFrame(data={'fln_res': fln_train.residual.tolist(), 'joi_res': joi_train.residual.tolist(), 'sbr_res': sbr_train.residual.tolist()}, index=fln_train.index)
test_df = pd.DataFrame(data={'fln_res': fln_test.residual.tolist(), 'joi_res': joi_test.residual.tolist(), 'sbr_res': sbr_test.residual.tolist()}, index=fln_test.index)

In [73]:
model = VAR(train_df)

In [106]:
order = 2
results = model.fit(order)
results.summary()

  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Thu, 29, Mar, 2018
Time:                     23:14:07
--------------------------------------------------------------------
No. of Equations:         3.00000    BIC:                    25.7618
Nobs:                     8735.00    HQIC:                   25.7506
Log likelihood:          -149603.    FPE:                1.51646e+11
AIC:                      25.7448    Det(Omega_mle):     1.51282e+11
--------------------------------------------------------------------
Results for equation fln_res
                coefficient       std. error           t-stat            prob
-----------------------------------------------------------------------------
const             -0.074753         0.826255           -0.090           0.928
L1.fln_res         0.806166         0.011003           73.270           0.000
L1.joi_res         0.171834         0.012066           14.241         

In [111]:
results = model.fit(maxlags=24, ic='aic')

In [112]:
results.k_ar

24

In [114]:
order = results.k_ar

In [115]:
forecast = []
for i in range(len(test_df)-order) :
    forecast.append(results.forecast(test_df.values[i:i+order],1))

In [116]:
fcst = [item[0][0] for item in forecast]

In [117]:
obs = fln_test[order:].glo_avg.tolist()

In [118]:
fcst =  [sum(x) for x in zip(fcst, fln_test[order:].clean.tolist())]

In [120]:
_nrmse = normalized_rmse(obs, fcst)
print("nRMSE: ", _nrmse, "\n")

nRMSE:  44.0832659227 

