# Aux

In [None]:
import datetime
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math
from sklearn.metrics import mean_squared_error
import asizeof
import metrics
import kfold
import time
import pickle

numRuns = 1


def getRollingWindow(index):
    pivot = index
    train_start = pivot.strftime('%Y-%m-%d')
    pivot = pivot + datetime.timedelta(days=20)
    train_end = pivot.strftime('%Y-%m-%d')

    pivot = pivot + datetime.timedelta(days=1)
    validation_start = pivot.strftime('%Y-%m-%d')
    pivot = pivot + datetime.timedelta(days=6)
    validation_end = pivot.strftime('%Y-%m-%d')

    pivot = pivot + datetime.timedelta(days=1)
    test_start = pivot.strftime('%Y-%m-%d')
    pivot = pivot + datetime.timedelta(days=6)
    test_end = pivot.strftime('%Y-%m-%d')
    
    return train_start, train_end, validation_start, validation_end, test_start, test_end

# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg


seed = 3025731418
seeds = np.random.mtrand.RandomState(seed)
seeds = seeds.randint(0,4294967296,size=numRuns)


In [1]:
import sys

print(sys.path)

['/home/bruno/Dropbox/PG INF/EMVFTS/notebooks/clean', '/usr/lib/python38.zip', '/usr/lib/python3.8', '/usr/lib/python3.8/lib-dynload', '', '/home/bruno/tet/lib/python3.8/site-packages', '/home/bruno/tet/lib/python3.8/site-packages/IPython/extensions', '/home/bruno/.ipython']


 # Dataset

In [None]:
def normalize(df):
    mindf = df.min()
    maxdf = df.max()
    return (df-mindf)/(maxdf-mindf)

#df = pd.read_csv('https://query.data.world/s/wo5wryokqyg5uvbfqqij2mucgwly5u',  parse_dates=['datetime'], index_col=0)
df = pd.read_csv('solar_oahu_df.csv',  parse_dates=['datetime'], index_col=0)

df = normalize(df)

# Split data
interval = ((df.index >= '2010-06') & (df.index < '2011-06'))
df = df.loc[interval]



limit = df.index[-1].strftime('%Y-%m-%d')

test_end = ""
index = df.index[0]
batches = []
batches_supervised = []
_order = 2
_step = 1
nobs = _order * len(df.columns)
output_index = -len(df.columns)*_step

while test_end < limit:

    #print("Index: ", index.strftime('%Y-%m-%d'))  

    train_start, train_end, validation_start, validation_end, test_start, test_end = getRollingWindow(index)
    index = index + datetime.timedelta(days=7)
    
    train = df[train_start : train_end]
    validation = df[validation_start : validation_end]
    test = df[test_start : test_end]
    train = train.append(validation)
    
    if len(batches) == 0:
        batches.append(train)
        train_reshaped_df = series_to_supervised(train,n_in=_order,n_out=_step)
        train_X, train_Y = train_reshaped_df.iloc[:, :nobs].values, train_reshaped_df.iloc[:, output_index:].values
        batches_supervised.append((train_X,train_Y))
    
    batches.append(test)
    test_reshaped_df = series_to_supervised(test,n_in=_order,n_out=_step)
    test_X, test_Y = test_reshaped_df.iloc[:, :nobs].values, test_reshaped_df.iloc[:, output_index:].values
    batches_supervised.append((test_X,test_Y))
    

# Método eMVFTS

In [5]:
from spatiotemporal.models.clusteredmvfts.fts import evolvingclusterfts
#from ..source.emvfts.fts import evolvingclusterfts
from pyFTS.models.multivariate import granular
from spatiotemporal.util import benchmarks

from pyFTS.benchmarks import Measures
from spatiotemporal.data import loader
import importlib
import copy
import asizeof


step = 1
evolfts_order = 2
tend_evolving = []
mems_evolving = []
#file_evolving = open('file_evolving.pickle','wb')
#exp_name = "BENCHMARK-2_1-EVOLVING"
forecasts = []

_variance_limit = 0.001
_defuzzy = 'weighted'
_t_norm = 'threshold'
_membership_threshold = 0.6
_order = 2
_step = 1

tstart = time.time()
model = evolvingclusterfts.EvolvingClusterFTS(variance_limit=_variance_limit, defuzzy=_defuzzy, t_norm=_t_norm,
                                              membership_threshold=_membership_threshold)


model.fit(batches[0].values, order=_order, verbose=False)

m1 = []
t1 = [time.time() - tstart]

forecasts = [[]]*(len(batches)-1)
forecasts_emvfts_runs = []
for j in range(1,len(batches)):
    auxTime = time.time()
    forecast = model.predict(batches[j].values,steps_ahead=_step)
    t1.append(time.time()-auxTime)
    m1.append(asizeof.asizeof(model))
    forecast_df = pd.DataFrame(data=forecast, columns=batches[0].columns)
    forecasts[j-1] = forecast_df.values
    
# Para economizar tempo (o método é determinístico, então sempre retornará as mesmas saídas)
# O código executa uma vez e replica a lista pelo número de vezes que o código deseja rodar
# Isso é feito para manter o padrão das outras técnicas, e assim o mesmo código que processa
# os resultados pode ser usado para todas as técnicas. Não é uma gambiarra!!!

tempo_emvfts_runs = [time.time() - tstart]*numRuns #Gambiarra
memoria_emvfts_runs = [asizeof.asizeof(model)]*numRuns #Gambiarra
forecasts_emvfts_runs = [forecasts]*numRuns #Gambiarra

save_obj = (forecasts_emvfts_runs,tempo_emvfts_runs,memoria_emvfts_runs)
# save_obj = (m1,t1)

with open('solar_emvfts_m1t1.pickle','wb') as file:
    pickle.dump(save_obj,file)


ModuleNotFoundError: No module named 'tensorflow'

In [None]:
plt.plot(m1)
plt.show()

In [None]:
plt.plot(t1)
plt.show()

# Mondrian Forest

In [None]:
# from mondrianforest import MondrianForestRegressor
from mfr import MFR

x,y = batches_supervised[0]
numModels = y.shape[1]

paramNames = ['n_trees']
paramValues = [list(range(1,26))]

mfrlambda = lambda x={}: MFR(x)
paramStructs = [[]]*numModels

# for i in range(0,numModels):
#     kfoldcv = kfold.KFold(5, mfrlambda, paramNames,paramValues,metrics.RMSE())
#     paramStructs[i],_ = kfoldcv.start(x,y[:,i].reshape(-1,1))

In [None]:
(_,_,_,paramStructs) = pickle.load(open('solar_mondrian.pickle','rb'))
from mfr import MFR


#A implementação do Mondrian Forest só aceita uma saída por vez

forecasts_mondrian_runs = [[]]*numRuns
memoria_mondrian_runs = [[]]*numRuns
tempo_mondrian_runs = [[]]*numRuns

t1 = []
m1 = []

for k in range(0,numRuns):
    tstart = time.time()
    x,y = batches_supervised[0]
    numModels = y.shape[1]
    models = [[]]*numModels
    for i in range(0,numModels):
        models[i] = MFR(dict({'seed':seeds[k]},**paramStructs[i]))
        models[i].train(x,y[:,i])
    t1 = [time.time() - tstart]
    m1.append(sum([asizeof.asizeof(x) for x in models]))
    
    forecasts = [[]]*(len(batches_supervised)-1)
    for j in range(1,len(batches_supervised)):
        x,y = batches_supervised[j]
        forecast = [[]]*numModels
        for i in range(0,numModels):
            forecast[i] = models[i].predict(x).reshape((-1,1))
        forecasts[j-1] = np.hstack(forecast) 
        
        auxTime = time.time()
        for i in range(0,numModels):
            models[i].train(x,y[:,i])
        t1.append(time.time() - auxTime)
        m1.append(sum([asizeof.asizeof(x) for x in models]))
    forecasts_mondrian_runs[k] = forecasts
    tempo_mondrian_runs[k] = time.time() - tstart
    memoria_mondrian_runs[k] = sum([asizeof.asizeof(x) for x in models])
    
    
save_obj = (forecasts_mondrian_runs,tempo_mondrian_runs,memoria_mondrian_runs,paramStructs)
save_obj = (m1,t1)

with open('solar_mondrian_m1t1.pickle','wb') as file:
    pickle.dump(save_obj,file)

In [None]:
plt.plot(m1)
plt.show()

In [None]:
plt.plot(t1)
plt.show()

# OSRELM

In [None]:
import osrelm

x,y = batches_supervised[0]

paramNames = ['regularization_parameter','number_of_hidden_neurons',]
paramValues = [[2**x for x in range(-20,21)],[10,50,100,200,300,400,500,600,700,800,900,1000]]

osrelmlambda = lambda d={}: osrelm.OSRELM(dict({'number_of_input_neurons':x.shape[1]},**d))

kfoldcv = kfold.KFold(5, osrelmlambda, paramNames,paramValues,metrics.RMSE())
# paramStruct,_ = kfoldcv.start(x,y)


In [None]:
import osrelm
(_,_,_,paramStruct) = pickle.load(open('solar_osrelm.pickle','rb'))

forecasts_osrelm_runs = [[]]*numRuns
memoria_osrelm_runs = [[]]*numRuns
tempo_osrelm_runs = [[]]*numRuns

m1=[]
t1 = []

for k in range(0,numRuns):
    tstart = time.time()
    x,y = batches_supervised[0]
    
    model = osrelm.OSRELM(dict({'number_of_input_neurons':x.shape[1],'seed':seeds[k]},**paramStruct))
    model.train(x,y)
    t1 = [time.time() - tstart]

    forecasts = [[]]*(len(batches_supervised)-1)
    for j in range(1,len(batches_supervised)):
        x,y = batches_supervised[j]
        forecasts[j-1] = model.predict(x)
        auxTime = time.time()
        model.train(x,y)
        t1.append(time.time() - auxTime)
        m1.append(asizeof.asizeof(model))
    tempo_osrelm_runs[k] = time.time() - tstart
    forecasts_osrelm_runs[k] = forecasts
    memoria_osrelm_runs[k] = asizeof.asizeof(model)
    
save_obj = (forecasts_osrelm_runs,tempo_osrelm_runs,memoria_osrelm_runs,paramStruct)
save_obj = (m1,t1)

with open('solar_osrelm_m1t1.pickle','wb') as file:
    pickle.dump(save_obj,file)

In [None]:
plt.plot(m1)
plt.show()

In [None]:
plt.plot(t1)
plt.show()

# SGDRegressor

In [None]:
# from sklearn.linear_model import SGDRegressor
from sgdr import SGDR

x,y = batches_supervised[0]
numModels = y.shape[1]

paramNames = ['loss','penalty','alpha','l1_ratio','learning_rate','eta0']
paramValues = [['squared_loss','huber','epsilon_insensitive'],['l2','l1','elasticnet'],\
               [2**x for x in range(-20,21)],[0.15,0.5,0.75],['constant','optimal','invscaling','adaptive'],[0.01,0.05,0.1]]

sgdrlambda = lambda x={}: SGDR(x)
paramStructs = [[]]*numModels

for i in range(0,numModels):
    kfoldcv = kfold.KFold(5, sgdrlambda, paramNames,paramValues,metrics.RMSE())
    #paramStructs[i],_ = kfoldcv.start(x,y[:,i].reshape(-1,1))
    paramStructs[i] = {}



In [None]:
(_,_,_,paramStructs) = pickle.load(open('solar_sgdr.pickle','rb'))

# A implementação do método só aceita uma saída por vez
forecasts_sgdr_runs = [[]]*numRuns
memoria_sgdr_runs = [[]]*numRuns
tempo_sgdr_runs = [[]]*numRuns

m1 = []
t1 = []
for k in range(0,numRuns):
    tstart = time.time()
    x,y = batches_supervised[0]
    numModels = y.shape[1]
    models = [[]]*numModels
    for i in range(0,numModels):
        models[i] = SGDR(dict({'seed':seeds[k]},**paramStructs[i]))
        models[i].train(x,y[:,i])
    t1.append(time.time() - tstart)

    forecasts = [[]]*(len(batches_supervised)-1)
    for j in range(1,len(batches_supervised)):
        x,y = batches_supervised[j]
        forecast = [[]]*numModels
        for i in range(0,numModels):
            forecast[i] = models[i].predict(x).reshape((-1,1))
        forecasts[j-1] = np.hstack(forecast) 
        auxTime = time.time()
        for i in range(0,numModels):
            models[i].train(x,y[:,i])
        t1.append(time.time() - auxTime)
        m1.append(sum([asizeof.asizeof(x) for x in models]))
    tempo_sgdr_runs[k] = time.time() - tstart
    memoria_sgdr_runs[k] = sum([asizeof.asizeof(x) for x in models])
    forecasts_sgdr_runs[k] = forecasts
    
save_obj = (forecasts_sgdr_runs,tempo_sgdr_runs,memoria_sgdr_runs,paramStructs)
save_obj = (m1,t1)

with open('solar_sgdr_m1t1.pickle','wb') as file:
    pickle.dump(save_obj,file)    

In [None]:
plt.plot(m1)
plt.show()

In [None]:
plt.plot(t1)
plt.show()

# Passive Agressive Regressor

In [None]:
# from sklearn.linear_model import PassiveAggressiveRegressor
from par import PAR

# C - regularização - 1.0
# epsilon - threshold para atualizar - 0.1
# fit_intercept = True
# n_iter = epocas. 5
# loss = 'epsilon_insensitive' / 'squared_epsilon_insensitive'

x,y = batches_supervised[0]
numModels = y.shape[1]

paramNames = ['C','loss']
paramValues = [[2**x for x in range(-20,21)],['epsilon_insensitive','squared_epsilon_insensitive']]

parlambda = lambda x={}: PAR(x)
paramStructs = [[]]*numModels

# for i in range(0,numModels):
#     kfoldcv = kfold.KFold(5, parlambda, paramNames,paramValues,metrics.RMSE())
#     paramStructs[i],_ = kfoldcv.start(x,y[:,i].reshape(-1,1))



In [None]:
(_,_,_,paramStructs) = pickle.load(open('solar_par.pickle','rb'))
from par import PAR

# A implementação do método só aceita uma saída por vez
forecasts_par_runs = [[]]*numRuns
tempo_par_runs = [[]]*numRuns
memoria_par_runs = [[]]*numRuns

m1 = []
t1 = []

for k in range(0,numRuns):
    tstart = time.time() 
    x,y = batches_supervised[0]
    numModels = y.shape[1]
    models = [[]]*numModels
    for i in range(0,numModels):
        models[i] = PAR(dict({'seed':seeds[k]},**paramStructs[i]))
        models[i].train(x,y[:,i])
    t1.append(time.time() - tstart)
    
    forecasts = [[]]*(len(batches_supervised)-1)
    for j in range(1,len(batches_supervised)):
        x,y = batches_supervised[j]
        forecast = [[]]*numModels
        for i in range(0,numModels):
            forecast[i] = models[i].predict(x).reshape((-1,1))
        forecasts[j-1] = np.hstack(forecast) 
        auxTime = time.time()
        
        for i in range(0,numModels):
            models[i].train(x,y[:,i])
        t1.append(time.time() - auxTime)
        m1.append(sum([asizeof.asizeof(x) for x in models]))
    tempo_par_runs[k] = time.time() - tstart
    forecasts_par_runs[k] = forecasts
    memoria_par_runs[k] = sum([asizeof.asizeof(x) for x in models])
    
save_obj = (forecasts_par_runs,tempo_par_runs,memoria_par_runs,paramStructs)
save_obj = (m1,t1)

with open('solar_par_m1t1.pickle','wb') as file:
    pickle.dump(save_obj,file)    

In [None]:
plt.plot(m1)
plt.show()

In [None]:
plt.plot(t1)
plt.show()