# Imports

In [None]:
import cudf
import cudf as pd
import numpy as np
import pandas
import shap
import seaborn as sns
import numpy
import cupy
import tensorflow
import os
import random

from cuml import train_test_split
from cuml import SVR
from cuml import RandomForestRegressor as CuRF
from cuml.metrics import mean_squared_error
from keras import Sequential
from keras.src.layers import Input, LSTM, Dense
from pyswarms.single import GlobalBestPSO
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.model_selection import TimeSeriesSplit
from xgboost import XGBRegressor

from dask_cuda import LocalCUDACluster
from dask.distributed import Client

SEED = 100


def reset_seed(rnd_seed=SEED):
    os.environ['PYTHONHASHSEED'] = '0'
    random.seed(rnd_seed)
    numpy.random.seed(rnd_seed)
    cupy.random.seed(rnd_seed)
    tensorflow.random.set_seed(rnd_seed)


cluster = LocalCUDACluster()
client = Client(cluster)
reset_seed()


# Load Datasets

In [None]:
df_water = pd.read_csv('./dataset/water.csv', sep=";", decimal=".", header=0)
df_electricity = pd.read_csv('./dataset/electricity.csv', sep=";", decimal=".", header=0)
df_climatic = pd.read_csv('./dataset/climatic.csv', sep=";", decimal=".", header=0)

df_water["data"] = pd.to_datetime(df_water["data"], format="%d/%m/%Y")
df_electricity["data"] = pd.to_datetime(df_electricity["data"], format="%d/%m/%Y")
df_climatic["data"] = pd.to_datetime(df_climatic["data"], format="%d/%m/%Y")

df_water.set_index("data", inplace=True)
df_electricity.set_index("data", inplace=True)
df_climatic.set_index("data", inplace=True)



# Pré-Processamento
## Dados climáticos faltantes

In [None]:
for index, row in df_climatic[df_climatic.isnull()].to_pandas().iterrows():
    df_mes = df_climatic[df_climatic["mes"] == df_climatic.at[index, "mes"]]
    for col in row.index:
        if pandas.isnull(df_climatic.at[index, col]):
            df_mes.at[index, col] = df_mes[col].sum() / df_mes[col][df_mes[col].isnull() == False].count()
            df_climatic.at[index, col] = df_mes.at[index, col]

## Obtenção dos LAGS

In [None]:
for lag_col in ["consumo"]:
    for i in range(1, 12 + 1):
        lag_eletricity = df_electricity[lag_col].shift(i)
        df_electricity[f'{lag_col}_LAG_' + '{:02d}'.format(i)] = lag_eletricity

        lag_water = df_water[lag_col].shift(i)
        df_water[f'{lag_col}_LAG_' + '{:02d}'.format(i)] = lag_water

## União dos dados climáticos aos dados de consumo

In [None]:
df_water = pd.merge(left=df_water, right=df_climatic, on=["data", "mes", "ano"], how="left")
df_water = df_water.drop("leitura", axis=1)

df_electricity = pd.merge(left=df_electricity, right=df_climatic, on=["data", "mes", "ano"], how="left")
df_electricity = df_electricity.drop("leitura", axis=1)


## Criação das variáveis Dummy (mês e ano)

In [None]:
df_meses = pd.get_dummies(df_electricity["mes"].astype(int), prefix="", prefix_sep="", dtype=int).rename(
    columns={"1": "mes_JAN", "2": "mes_FEV", "3": "mes_MAR", "4": "mes_ABR", "5": "mes_MAI", "6": "mes_JUN",
             "7": "mes_JUL", "8": "mes_AGO", "9": "mes_SET", "10": "mes_OUT", "11": "mes_NOV", "12": "mes_DEZ"}
)
df_anos = pd.get_dummies(df_electricity["ano"].astype(int), prefix="", prefix_sep="", dtype=int).rename(
    columns={"2017": "ano_2017", "2018": "ano_2018", "2019": "ano_2019", "2020": "ano_2020", "2021": "ano_2021",
             "2022": "ano_2022", "2023": "ano_2023", "2024": "ano_2024"}
)
df_electricity = pd.concat([df_electricity, df_meses, df_anos], axis=1)
df_electricity = df_electricity.drop(["mes", "ano"], axis=1)
df_electricity = df_electricity.astype("float32").dropna()

df_meses = pd.get_dummies(df_water["mes"].astype(int), prefix="", prefix_sep="", dtype=int).rename(
    columns={"1": "mes_JAN", "2": "mes_FEV", "3": "mes_MAR", "4": "mes_ABR", "5": "mes_MAI", "6": "mes_JUN",
             "7": "mes_JUL", "8": "mes_AGO", "9": "mes_SET", "10": "mes_OUT", "11": "mes_NOV", "12": "mes_DEZ"}
)
df_anos = pd.get_dummies(df_water["ano"].astype(int), prefix="", prefix_sep="", dtype=int).rename(
    columns={"2017": "ano_2017", "2018": "ano_2018", "2019": "ano_2019", "2020": "ano_2020", "2021": "ano_2021",
             "2022": "ano_2022", "2023": "ano_2023", "2024": "ano_2024"}
)
df_water = pd.concat([df_water, df_meses, df_anos], axis=1)
df_water = df_water.drop(["mes", "ano"], axis=1)
df_water = df_water.astype("float32").dropna()

df_show = df_electricity.to_pandas()
df_show

# Análise de Correlações
## Eletricidade
### Correlação com os LAGS

In [None]:
corr_matrix = df_electricity[df_electricity.to_pandas().filter(like="consumo").columns].dropna().to_pandas().corr(
    numeric_only=True)
sns.heatmap(corr_matrix,
            cmap="coolwarm",
            center=0,
            annot=True,
            fmt='.0g')

### Correlação com as variáveis climáticas

In [None]:
corr_matrix = df_electricity.drop(df_electricity.to_pandas().filter(like="_LAG_").columns,
                                  axis=1).drop(df_electricity.to_pandas().filter(like="mes_").columns,
                                               axis=1).drop(df_electricity.to_pandas().filter(like="ano_").columns,
                                                            axis=1).dropna().to_pandas().corr(numeric_only=True)
sns.heatmap(corr_matrix,
            cmap="coolwarm",
            center=0,
            annot=True,
            fmt='.1g')

## Água
### Correlação com os LAGS

In [None]:
corr_matrix = df_water[df_water.to_pandas().filter(like="consumo").columns].dropna().to_pandas().corr(numeric_only=True)
sns.heatmap(corr_matrix,
            cmap="coolwarm",
            center=0,
            annot=True,
            fmt='.0g')

### Correlação com as variáveis climáticas

In [None]:
corr_matrix = df_water.drop(df_water.to_pandas().filter(like="_LAG_").columns,
                            axis=1).drop(df_water.to_pandas().filter(like="mes_").columns,
                                         axis=1).drop(df_water.to_pandas().filter(like="ano_").columns,
                                                      axis=1).dropna().to_pandas().corr(numeric_only=True)
sns.heatmap(corr_matrix,
            cmap="coolwarm",
            center=0,
            annot=True,
            fmt='.1g')

## Correlação entre o Consumo de Eletricidade e de Água


In [None]:
corr_matrix = pd.merge(left=df_electricity["consumo"], right=df_water["consumo"],
                       on=["data"], how="inner",
                       suffixes=[' electricity', ' water']).dropna().to_pandas().corr(numeric_only=True)
sns.heatmap(corr_matrix,
            cmap="coolwarm",
            center=0,
            annot=True,
            fmt='.1g')


# Análise dos SHAP Values
## Eletricidade
### Random Forest

In [None]:
df_electricity_copy = df_electricity.dropna().copy().to_pandas()

x_electricity = df_electricity_copy.drop("consumo", axis=1)
y_electricity = df_electricity_copy["consumo"]
model_rf = RandomForestRegressor()
shap.initjs()

model_rf.fit(x_electricity, y_electricity)

explainer_rf = shap.Explainer(model_rf)
shap_rf = explainer_rf(x_electricity)

shap.plots.waterfall(shap_rf[0], max_display=10)
shap.plots.force(shap_rf[0])
shap.plots.bar(shap_rf)


### XGBoost

In [None]:
df_electricity_copy = df_electricity.dropna().copy().to_pandas()

x_electricity = df_electricity_copy.drop("consumo", axis=1)
y_electricity = df_electricity_copy["consumo"]

model_xgb = XGBRegressor(objective='reg:squarederror')
shap.initjs()

model_xgb.fit(x_electricity, y_electricity)

explainer_xgb = shap.Explainer(model_xgb)
shap_xgb = explainer_xgb(x_electricity)

shap.plots.waterfall(shap_xgb[0], max_display=10)
shap.plots.force(shap_xgb[0])
shap.plots.bar(shap_xgb)

## Água
### Random Forest

In [None]:
df_water_copy = df_water.dropna().copy().to_pandas()

x_water = df_water_copy.drop("consumo", axis=1)
y_water = df_water_copy["consumo"]
model_rf = RandomForestRegressor()
shap.initjs()

model_rf.fit(x_water, y_water)

explainer_rf = shap.Explainer(model_rf)
shap_rf = explainer_rf(x_water)

shap.plots.waterfall(shap_rf[0], max_display=10)
shap.plots.force(shap_rf[0])
shap.plots.bar(shap_rf)


### XGBoost

In [None]:
df_water_copy = df_water.dropna().copy().to_pandas()

x_water = df_water_copy.drop("consumo", axis=1)
y_water = df_water_copy["consumo"]

model_xgb = XGBRegressor(objective='reg:squarederror')
shap.initjs()

model_xgb.fit(x_water, y_water)

explainer_xgb = shap.Explainer(model_xgb)
shap_xgb = explainer_xgb(x_water)

shap.plots.waterfall(shap_xgb[0], max_display=10)
shap.plots.force(shap_xgb[0])
shap.plots.bar(shap_xgb)

# Configuração dos Otimizadores
## Algoritmo Genético
### Indivíduos

In [None]:
class IndXGB:
    def __init__(self):
        self.fitness = None
        self.estimators = 0
        self.max_depth = 0
        self.booster = None

    def create_random(self):
        self.rand_estimators()
        self.rand_depth()
        self.rand_booster()
        return self

    def rand_estimators(self):
        self.estimators = random.randint(1, 300)

    def rand_depth(self):
        self.max_depth = random.randint(1, 300)

    def rand_booster(self):
        self.booster = random.choice(["gbtree", "gblinear", "dart"])


### Operações

In [59]:
class GAXGB:
    def __init__(self, dataset, n_individuals, n_generations, mutation_rate, seed=SEED):
        reset_seed(seed)
        self.dataset = dataset
        self.n_individuals = n_individuals
        self.n_generations = n_generations
        self.mutation_rate = mutation_rate
        self.population = []
        self.init_pop()
        self.init_gen()

    def init_pop(self):
        for _ in range(self.n_individuals):
            ind = IndXGB().create_random()
            ind = self.get_fitness(ind)
            self.population.append(ind)
            self.population = sorted(self.population, key=lambda a: a.fitness)

    def init_gen(self):
        for _ in range(self.n_generations):
            ind_a = self.population[0]
            ind_b = random.choice(self.population)
            ind_c = self.crossover(ind_a, ind_b)
            if random.uniform(0, 1) < self.mutation_rate:
                ind_c = self.mutation(ind_c)
            ind_c = self.get_fitness(ind_c)
            self.population.append(ind_c)
            self.population = sorted(self.population, key=lambda a: a.fitness)

    def mutation(self, ind):
        random.choice([
            ind.rand_estimators(),
            ind.rand_depth(),
            ind.rand_booster()
        ])
        return ind

    def crossover(self, ind_a, ind_b):
        ind = IndXGB()
        ind.estimators = random.choice([ind_a.estimators, ind_b.estimators])
        ind.max_depth = random.choice([ind_a.max_depth, ind_b.max_depth])
        ind.booster = random.choice([ind_a.booster, ind_b.booster])
        return ind

    def get_fitness(self, individual):
        x_train, x_test, y_train, y_test = train_test_split(self.dataset.drop("consumo", axis=1),
                                                            self.dataset["consumo"],
                                                            test_size=1, shuffle=False)

        model = XGBRegressor(n_estimators=individual.estimators, max_depth=individual.max_depth,
                             booster=individual.booster,
                             device="cuda",
                             verbose=False)

        model.fit(x_train, y_train)
        individual.fitness = int(mean_squared_error(y_test, model.predict(x_test)).get())
        return individual

    def population_dataframe(self):
        df = cudf.DataFrame()
        for ind in self.population:
            df = cudf.concat([df, cudf.DataFrame({
                "N_estimators": ind.estimators,
                "Max_depth": ind.max_depth,
                "Booster": ind.booster,
                "Fitness": ind.fitness
            })])
        return df


## Enxame de Partículas
### Particulas

In [None]:
class PartXGB:
    def __init_(self):
        self.fitness = None
        self.estimators = 0
        self.max_depth = 0
        self.booster = None


### Operações

In [82]:
class PSOXGB:
    def __init__(self, dataset, n_particles, n_iters, seed=SEED):
        reset_seed(seed)
        self.seed = seed
        self.dataset = dataset
        self.n_particles = n_particles
        self.n_iters = n_iters
        self.particles = []
        self.BOOSTERS = ["gbtree", "gblinear", "dart"]
        self.run()

    def run(self):
        lower_bound = [1, 1, 0]
        uppper_bound = [300, 300, 2]
        bounds = (lower_bound, uppper_bound)

        options = {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
        optimizer = GlobalBestPSO(n_particles=self.n_particles,
                                  dimensions=3,
                                  options=options,
                                  bounds=bounds)

        optimizer.optimize(self.get_fitness, iters=self.n_iters)
        self.particles = sorted(self.particles, key=lambda a: a.fitness)

    def get_fitness(self, particles):
        particles = np.round(particles)
        fitness_list = []
        for j in range(self.n_particles):
            fitness_list.append(self.objective_function(particles[j]))
        return fitness_list

    def objective_function(self, particle_arr):
        reset_seed(self.seed)
        particle = PartXGB()
        particle.estimators = int(particle_arr[0])
        particle.max_depth = int(particle_arr[1])
        particle.booster = self.BOOSTERS[int(particle_arr[2])]
        
        if particle.booster == "gblinear":
            updater = "coord_descent"
        else:
            updater = None
        x_train, x_test, y_train, y_test = train_test_split(self.dataset.drop("consumo", axis=1),
                                                            self.dataset["consumo"],
                                                            test_size=1, shuffle=False,
                                                            random_state=self.seed)
        model = XGBRegressor(device="cuda", random_state=self.seed,
                             n_estimators=particle.estimators,
                             max_depth=particle.max_depth, updater=updater,
                             booster=particle.booster, verbosity=0)
        model.fit(x_train, y_train)
        particle.fitness = int(mean_squared_error(y_test, model.predict(x_test)).get())

        self.particles.append(particle)
        return particle.fitness

    def particles_dataframe(self):
        df = cudf.DataFrame()
        for part in self.particles:
            df = cudf.concat([df, cudf.DataFrame({
                "N_estimators": part.estimators,
                "Max_depth": part.max_depth,
                "Booster": part.booster,
                "Fitness": part.fitness
            })])
        return df



# Aplicação dos Otimizadores
## Random Forest
### Eletricidade


### Água

## XGBoost
### Eletricidade

In [84]:

pso_xgb = PSOXGB(df_electricity, 2, 2, 2000)
pso_xgb.particles_dataframe()


2024-06-15 16:30:15,858 - pyswarms.single.global_best - INFO - Optimize for 2 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
  feature_names = data.columns.format()
  feature_names = data.columns.format()
  feature_names = data.columns.format()
  feature_names = data.columns.format()
  feature_names = data.columns.format()
  feature_names = data.columns.format()
  feature_names = data.columns.format()
  feature_names = data.columns.format()
pyswarms.single.global_best: 100%|██████████|2/2, best_cost=4010.0 
2024-06-15 16:30:32,456 - pyswarms.single.global_best - INFO - Optimization finished | best cost: 4010.0, best pos: [259.83607151  68.46491371   0.71714416]


Unnamed: 0,N_estimators,Max_depth,Booster,Fitness
0,260,68,gblinear,4010
0,99,209,gblinear,358967
0,99,209,gblinear,358967
0,268,61,gbtree,56027492


### Água

## SVR
### Eletricidade

### Água

## LSTM
### Eletricidade

### Água

## XGBoost
### Eletricidade

### Água

# Previsões
## Eletricidade
### 3 Passos à frente

In [None]:
# reset_seed()
# x_electricity = df_electricity.drop("consumo", axis=1)
# y_electricity = df_electricity["consumo"]
# 
# xgb_electricity = XGBRegressor()
# rf_electricity = CuRF(n_streams=1, n_bins=x_electricity.shape[1])
# svr_electricity = SVR()
# lstm_electricity = Sequential([
#     Input((x_electricity.shape[1], 1), batch_size=x_electricity.shape[1]),
#     LSTM(30, activation='relu', seed=SEED),
#     Dense(1),
# ])
# lstm_electricity.compile(loss='mse', metrics=['mean_absolute_error'])
# 
# x_train, x_test, y_train, y_test = train_test_split(x_electricity, y_electricity, test_size=3, shuffle=False)
# 
# cvs_electricity = pd.DataFrame()
# for i_train, i_test in TimeSeriesSplit(n_splits=12, test_size=1).split(x_train, y_train):
#     kx_train, kx_test = x_train.iloc[i_train].to_numpy(), x_train.iloc[i_test].to_numpy()
#     ky_train, ky_test = y_train.iloc[i_train].to_numpy(), y_train.iloc[i_test].to_numpy()
# 
#     xgb_electricity.fit(kx_train, ky_train)
#     rf_electricity.fit(kx_train, ky_train)
#     svr_electricity.fit(kx_train, ky_train)
#     lstm_electricity.fit(kx_train, ky_train, shuffle=False, verbose=False, epochs=1, batch_size=x_electricity.shape[1])
#     cvs_electricity = pd.concat([cvs_electricity, pd.DataFrame({
#         "XGB": mean_absolute_percentage_error(xgb_electricity.predict(kx_test), ky_test),
#         "RF": mean_absolute_percentage_error(rf_electricity.predict(kx_test), ky_test),
#         "SVR": mean_absolute_percentage_error(svr_electricity.predict(kx_test), ky_test),
#         "LSTM": mean_absolute_percentage_error(lstm_electricity.predict(kx_test), ky_test)
#     })])
# 
# pred_xgb_electricity = []
# for i_test in range(len(x_test)):
#     sx_test = x_test.iloc[[i_test]]
# 
#     for climatic_column in df_climatic.drop(["ano", "mes"], axis=1).columns:
#         sx_test.at[sx_test.index, climatic_column] = \
#             x_electricity.at[(sx_test.index - pd.DateOffset(years=1)), climatic_column].to_numpy()[0][0]
#     for lag in range(i_test + 1):
#         if lag == 0:
#             continue
#         sx_test['consumo_LAG_' + "{:02d}".format(lag)] = pred_xgb_electricity[-lag]
# 
#     pred_xgb_electricity.append(xgb_electricity.predict(sx_test.to_numpy())[0])
# 
# pred_rf_electricity = []
# for i_test in range(len(x_test)):
#     sx_test = x_test.iloc[[i_test]]
# 
#     for climatic_column in df_climatic.drop(["ano", "mes"], axis=1).columns:
#         sx_test.at[sx_test.index, climatic_column] = \
#             x_electricity.at[(sx_test.index - pd.DateOffset(years=1)), climatic_column].to_numpy()[0][0]
#     for lag in range(i_test + 1):
#         if lag == 0:
#             continue
#         sx_test['consumo_LAG_' + "{:02d}".format(lag)] = pred_rf_electricity[-lag]
# 
#     pred_rf_electricity.append(rf_electricity.predict(sx_test.to_numpy())[0])
# 
# pred_svr_electricity = []
# for i_test in range(len(x_test)):
#     sx_test = x_test.iloc[[i_test]]
# 
#     for climatic_column in df_climatic.drop(["ano", "mes"], axis=1).columns:
#         sx_test.at[sx_test.index, climatic_column] = \
#             x_electricity.at[(sx_test.index - pd.DateOffset(years=1)), climatic_column].to_numpy()[0][0]
#     for lag in range(i_test + 1):
#         if lag == 0:
#             continue
#         sx_test['consumo_LAG_' + "{:02d}".format(lag)] = pred_svr_electricity[-lag]
# 
#     pred_svr_electricity.append(svr_electricity.predict(sx_test.to_numpy())[0])
# 
# pred_lstm_electricity = []
# for i_test in range(len(x_test)):
#     sx_test = x_test.iloc[[i_test]]
# 
#     for climatic_column in df_climatic.drop(["ano", "mes"], axis=1).columns:
#         sx_test.at[sx_test.index, climatic_column] = \
#             x_electricity.at[(sx_test.index - pd.DateOffset(years=1)), climatic_column].to_numpy()[0][0]
#     for lag in range(i_test + 1):
#         if lag == 0:
#             continue
#         sx_test['consumo_LAG_' + "{:02d}".format(lag)] = pred_lstm_electricity[-lag]
# 
#     pred_lstm_electricity.append(lstm_electricity.predict(sx_test.to_numpy())[0])


### 6 Passos à frente

In [None]:
# reset_seed()
# x_electricity = df_electricity.drop("consumo", axis=1)
# y_electricity = df_electricity["consumo"]
# 
# xgb_electricity = XGBRegressor()
# rf_electricity = CuRF(n_streams=1, n_bins=x_electricity.shape[1])
# svr_electricity = SVR()
# lstm_electricity = Sequential([
#     Input((x_electricity.shape[1], 1), batch_size=x_electricity.shape[1]),
#     LSTM(30, activation='relu', seed=SEED),
#     Dense(1),
# ])
# lstm_electricity.compile(loss='mse', metrics=['mean_absolute_error'])
# 
# x_train, x_test, y_train, y_test = train_test_split(x_electricity, y_electricity, test_size=6, shuffle=False)
# 
# cvs_electricity = pd.DataFrame()
# for i_train, i_test in TimeSeriesSplit(n_splits=12, test_size=1).split(x_train, y_train):
#     kx_train, kx_test = x_train.iloc[i_train].to_numpy(), x_train.iloc[i_test].to_numpy()
#     ky_train, ky_test = y_train.iloc[i_train].to_numpy(), y_train.iloc[i_test].to_numpy()
# 
#     xgb_electricity.fit(kx_train, ky_train)
#     rf_electricity.fit(kx_train, ky_train)
#     svr_electricity.fit(kx_train, ky_train)
#     lstm_electricity.fit(kx_train, ky_train, shuffle=False, verbose=False, epochs=1, batch_size=x_electricity.shape[1])
#     cvs_electricity = pd.concat([cvs_electricity, pd.DataFrame({
#         "XGB": mean_absolute_percentage_error(xgb_electricity.predict(kx_test), ky_test),
#         "RF": mean_absolute_percentage_error(rf_electricity.predict(kx_test), ky_test),
#         "SVR": mean_absolute_percentage_error(svr_electricity.predict(kx_test), ky_test),
#         "LSTM": mean_absolute_percentage_error(lstm_electricity.predict(kx_test), ky_test)
#     })])
# 
# pred_xgb_electricity = []
# for i_test in range(len(x_test)):
#     sx_test = x_test.iloc[[i_test]]
# 
#     for climatic_column in df_climatic.drop(["ano", "mes"], axis=1).columns:
#         sx_test.at[sx_test.index, climatic_column] = \
#             x_electricity.at[(sx_test.index - pd.DateOffset(years=1)), climatic_column].to_numpy()[0][0]
#     for lag in range(i_test + 1):
#         if lag == 0:
#             continue
#         sx_test['consumo_LAG_' + "{:02d}".format(lag)] = pred_xgb_electricity[-lag]
# 
#     pred_xgb_electricity.append(xgb_electricity.predict(sx_test.to_numpy())[0])
# 
# pred_rf_electricity = []
# for i_test in range(len(x_test)):
#     sx_test = x_test.iloc[[i_test]]
# 
#     for climatic_column in df_climatic.drop(["ano", "mes"], axis=1).columns:
#         sx_test.at[sx_test.index, climatic_column] = \
#             x_electricity.at[(sx_test.index - pd.DateOffset(years=1)), climatic_column].to_numpy()[0][0]
#     for lag in range(i_test + 1):
#         if lag == 0:
#             continue
#         sx_test['consumo_LAG_' + "{:02d}".format(lag)] = pred_rf_electricity[-lag]
# 
#     pred_rf_electricity.append(rf_electricity.predict(sx_test.to_numpy())[0])
# 
# pred_svr_electricity = []
# for i_test in range(len(x_test)):
#     sx_test = x_test.iloc[[i_test]]
# 
#     for climatic_column in df_climatic.drop(["ano", "mes"], axis=1).columns:
#         sx_test.at[sx_test.index, climatic_column] = \
#             x_electricity.at[(sx_test.index - pd.DateOffset(years=1)), climatic_column].to_numpy()[0][0]
#     for lag in range(i_test + 1):
#         if lag == 0:
#             continue
#         sx_test['consumo_LAG_' + "{:02d}".format(lag)] = pred_svr_electricity[-lag]
# 
#     pred_svr_electricity.append(svr_electricity.predict(sx_test.to_numpy())[0])
# 
# pred_lstm_electricity = []
# for i_test in range(len(x_test)):
#     sx_test = x_test.iloc[[i_test]]
# 
#     for climatic_column in df_climatic.drop(["ano", "mes"], axis=1).columns:
#         sx_test.at[sx_test.index, climatic_column] = \
#             x_electricity.at[(sx_test.index - pd.DateOffset(years=1)), climatic_column].to_numpy()[0][0]
#     for lag in range(i_test + 1):
#         if lag == 0:
#             continue
#         sx_test['consumo_LAG_' + "{:02d}".format(lag)] = pred_lstm_electricity[-lag]
# 
#     pred_lstm_electricity.append(lstm_electricity.predict(sx_test.to_numpy())[0])


### 12 Passos à frente

In [None]:
# reset_seed()
# x_electricity = df_electricity.drop("consumo", axis=1)
# y_electricity = df_electricity["consumo"]
# 
# xgb_electricity = XGBRegressor()
# rf_electricity = CuRF(n_streams=1, n_bins=x_electricity.shape[1])
# svr_electricity = SVR()
# lstm_electricity = Sequential([
#     Input((x_electricity.shape[1], 1), batch_size=x_electricity.shape[1]),
#     LSTM(30, activation='relu', seed=SEED),
#     Dense(1),
# ])
# lstm_electricity.compile(loss='mse', metrics=['mean_absolute_error'])
# 
# x_train, x_test, y_train, y_test = train_test_split(x_electricity, y_electricity, test_size=12, shuffle=False)
# 
# cvs_electricity = pd.DataFrame()
# for i_train, i_test in TimeSeriesSplit(n_splits=12, test_size=1).split(x_train, y_train):
#     kx_train, kx_test = x_train.iloc[i_train].to_numpy(), x_train.iloc[i_test].to_numpy()
#     ky_train, ky_test = y_train.iloc[i_train].to_numpy(), y_train.iloc[i_test].to_numpy()
# 
#     xgb_electricity.fit(kx_train, ky_train)
#     rf_electricity.fit(kx_train, ky_train)
#     svr_electricity.fit(kx_train, ky_train)
#     lstm_electricity.fit(kx_train, ky_train, shuffle=False, verbose=False, epochs=1, batch_size=x_electricity.shape[1])
#     cvs_electricity = pd.concat([cvs_electricity, pd.DataFrame({
#         "XGB": mean_absolute_percentage_error(xgb_electricity.predict(kx_test), ky_test),
#         "RF": mean_absolute_percentage_error(rf_electricity.predict(kx_test), ky_test),
#         "SVR": mean_absolute_percentage_error(svr_electricity.predict(kx_test), ky_test),
#         "LSTM": mean_absolute_percentage_error(lstm_electricity.predict(kx_test), ky_test)
#     })])
# 
# pred_xgb_electricity = []
# for i_test in range(len(x_test)):
#     sx_test = x_test.iloc[[i_test]]
# 
#     for climatic_column in df_climatic.drop(["ano", "mes"], axis=1).columns:
#         sx_test.at[sx_test.index, climatic_column] = \
#             x_electricity.at[(sx_test.index - pd.DateOffset(years=1)), climatic_column].to_numpy()[0][0]
#     for lag in range(i_test + 1):
#         if lag == 0:
#             continue
#         sx_test['consumo_LAG_' + "{:02d}".format(lag)] = pred_xgb_electricity[-lag]
# 
#     pred_xgb_electricity.append(xgb_electricity.predict(sx_test.to_numpy())[0])
# 
# pred_rf_electricity = []
# for i_test in range(len(x_test)):
#     sx_test = x_test.iloc[[i_test]]
# 
#     for climatic_column in df_climatic.drop(["ano", "mes"], axis=1).columns:
#         sx_test.at[sx_test.index, climatic_column] = \
#             x_electricity.at[(sx_test.index - pd.DateOffset(years=1)), climatic_column].to_numpy()[0][0]
#     for lag in range(i_test + 1):
#         if lag == 0:
#             continue
#         sx_test['consumo_LAG_' + "{:02d}".format(lag)] = pred_rf_electricity[-lag]
# 
#     pred_rf_electricity.append(rf_electricity.predict(sx_test.to_numpy())[0])
# 
# pred_svr_electricity = []
# for i_test in range(len(x_test)):
#     sx_test = x_test.iloc[[i_test]]
# 
#     for climatic_column in df_climatic.drop(["ano", "mes"], axis=1).columns:
#         sx_test.at[sx_test.index, climatic_column] = \
#             x_electricity.at[(sx_test.index - pd.DateOffset(years=1)), climatic_column].to_numpy()[0][0]
#     for lag in range(i_test + 1):
#         if lag == 0:
#             continue
#         sx_test['consumo_LAG_' + "{:02d}".format(lag)] = pred_svr_electricity[-lag]
# 
#     pred_svr_electricity.append(svr_electricity.predict(sx_test.to_numpy())[0])
# 
# pred_lstm_electricity = []
# for i_test in range(len(x_test)):
#     sx_test = x_test.iloc[[i_test]]
# 
#     for climatic_column in df_climatic.drop(["ano", "mes"], axis=1).columns:
#         sx_test.at[sx_test.index, climatic_column] = \
#             x_electricity.at[(sx_test.index - pd.DateOffset(years=1)), climatic_column].to_numpy()[0][0]
#     for lag in range(i_test + 1):
#         if lag == 0:
#             continue
#         sx_test['consumo_LAG_' + "{:02d}".format(lag)] = pred_lstm_electricity[-lag]
# 
#     pred_lstm_electricity.append(lstm_electricity.predict(sx_test.to_numpy())[0])


## Água
### 3 Passos à frente

### 6 Passos à frente

12 Passos à frente