# Instalação de bibliotecas

In [None]:
!pip install -U darts

# Importação de bibliotecas

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math

import torch

from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler
from darts.models import DLinearModel, LinearRegressionModel
from darts.models.filtering.moving_average_filter import MovingAverageFilter
from darts.metrics import mape
from darts.utils.statistics import check_seasonality, plot_acf, plot_residuals_analysis

from scipy import signal

# Definição de Constantes e Funções Auxiliares

## Definição de constantes

In [None]:
DATE_FORMAT_STRING = '%d/%m/%Y'
FIG_SIZE = (8,5)

## Anexando funções a TimeSeries

In [None]:
def TimeSeriesRatio(self):
  ratio_series = self.pd_series().div(self.pd_series().shift(1))[1:]
  ratio_dataframe = ratio_series.to_frame(name=f"{self.columns[0]}_ratio")
  ratio_time_series = TimeSeries.from_dataframe(ratio_dataframe)
  return ratio_time_series

def TimeSeriesDetrend(self):
  # Calcular o detrend dos dados da série
  detrended_data = signal.detrend(self.univariate_values())

  # Criar um novo TimeSeries com os dados detrendizados
  return TimeSeries.from_dataframe(pd.DataFrame({'Data': self.time_index, f"{self.columns[0]}_detrend": detrended_data}), time_col='Data', value_cols=f"{self.columns[0]}_detrend")

def TimeSeriesScalerFitAndInverseTransform(self, original):
    scaler = Scaler()
    scaler.fit(original)
    return scaler.inverse_transform(self)

def TimeSeriesInverseDetrend(self, original):
    detrend_target = original.detrend()
    trend_line = (original - detrend_target)
    start_time = self.start_time()
    end_time = self.end_time()
    if self.time_index[-1].to_pydatetime() > trend_line.time_index[-1].to_pydatetime():
        regressor = LinearRegressionModel(lags = 1)
        regressor.fit(trend_line)
        trend_line = trend_line.concatenate(regressor.predict(calculate_dates_diff(trend_line.end_time(), end_time, self.freq.freqstr) + 2))
    return self + trend_line[start_time:end_time]

def TimeSeriesFilter(self, filter = MovingAverageFilter(10)):
  series_filtered = filter.filter(self)
  series_residuals = (self-series_filtered)
  return TimeSeries.from_dataframe(pd.DataFrame({'Data': self.time_index, f"{self.columns[0]}_filtered": series_filtered.univariate_values()}), time_col='Data', value_cols=f"{self.columns[0]}_filtered"), TimeSeries.from_dataframe(pd.DataFrame({'Data': self.time_index, f"{self.columns[0]}_residuals": series_residuals.univariate_values()}), time_col='Data', value_cols=f"{self.columns[0]}_residuals")

TimeSeries.ratio = TimeSeriesRatio
TimeSeries.detrend = TimeSeriesDetrend
TimeSeries.fit_inverse_transform = TimeSeriesScalerFitAndInverseTransform
TimeSeries.fit_inverse_detrend = TimeSeriesInverseDetrend
TimeSeries.filter = TimeSeriesFilter

## Definição de Funções

In [None]:
def plot_trend_line(series, title = None):
  plt.figure(figsize=FIG_SIZE)
  if title:
    plt.title(title)
  series.plot()
  (series - series.detrend()).plot(label = 'trend line')
  plt.show()

In [None]:
def plot_series(series, title = None):
  plt.figure(figsize=FIG_SIZE)
  if title:
    plt.title(title)
  if type(series) == TimeSeries:
    series.plot()
  elif type(series) == list:
    for serie in series:
      serie.plot()
  plt.show()

In [None]:
#squared_log_error
def sle(y_true, y_pred):
    y_true_num = []
    y_pred_num = []

    for i in y_true.values():
        if math.isnan(i) == False:
            y_true_num.append(float(i))

    for i in y_pred.values():
        if math.isnan(i) == False:
            y_pred_num.append(float(i))

    somatorio = 0
    for i in range(0, len(y_true_num)):
        x = (y_pred_num[i]/y_true_num[i])
        somatorio += (np.log(abs(x))) ** 2
    return somatorio

In [None]:
#this function evaluates a model on a given validation set for n time-steps
def eval_model(model, n, series, val_series, target_series = None, scaler = None, detrend = None, returned = [],historical = False, plot = True):
    if target_series != None:
        pred_series = model.predict(n=n, series = target_series)
    else:
        pred_series = model.predict(n=n)
    if historical:
      historical_series = model.historical_forecasts(target_series)
    else:
      historical_series = None
    if scaler != None:
        pred_series = pred_series.fit_inverse_transform(scaler)
        if historical:
          historical_series = historical_series.fit_inverse_transform(scaler)
        target_series = target_series.fit_inverse_transform(scaler)
    if detrend != None:
        pred_series = pred_series.fit_inverse_detrend(detrend)
        if historical:
          historical_series = historical_series.fit_inverse_detrend(detrend)
        target_series = target_series.fit_inverse_detrend(detrend)
    try:
      mape_val = mape(val_series, pred_series[:len(val_series)])
    except:
      mape_val = float('inf')
    try:
      sle_val = sle(val_series, pred_series[:len(val_series)])
    except:
      sle_val = float('inf')
    if historical:
      try:
        mape_train = mape(target_series[-len(historical_series):], historical_series)
      except:
        mape_train = float('inf')
      try:
        sle_train = sle(target_series[-len(historical_series):], historical_series)
      except:
        sle_train = float('inf')
    if plot:
        plt.figure(figsize=FIG_SIZE)
        series.plot(label='actual')
        pred_series.plot(label='forecast')
        if historical:
          historical_series.plot(label='historical')
          plt.title('MAPE: t{:.2f}%'.format(mape_train) + ' v{:.2f}%'.format(mape_val) + ' - SLE: t{:.2f}'.format(sle_train) + ' v{:.2f}'.format(sle_val))
        else:
          plt.title('MAPE: {:.2f}%'.format(mape_val) + ' - SLE: {:.2f}'.format(sle_val))
        plt.legend()
        plt.show()
    try:
      returned_f = []
      for returned_l in returned:
        if returned_l.upper() == 'MAPE_VAL':
          returned_f.append(mape_val)
        elif returned_l.upper() == 'SLE_VAL':
          returned_f.append(sle_val)
        elif returned_l.upper() == 'MAPE_TRAIN':
          returned_f.append(mape_train)
        elif returned_l.upper() == 'SLE_TRAIN':
          returned_f.append(sle_train)
        elif returned_l.upper() == 'PREDICT_VALUES':
          returned_f.append(pred_series)
        elif returned_l.upper() == 'HISTORICAL_VALUES':
          returned_f.append(historical_series)
        else:
          returned_f.append(None)
      return returned_f
    except:
      return None


In [None]:
def calculate_dates_diff(start, end, freq = 'D'):
  date_range = pd.date_range(start=start, end=end, freq=freq)
  return len(date_range)

In [None]:
def set_pl_trainer_kwargs(**kwargs):

  pl_trainer_kwargs = kwargs

  if torch.cuda.is_available():
    try:
      pl_trainer_kwargs['accelerator']
    except:
      pl_trainer_kwargs['accelerator'] = "gpu"
    try:
      pl_trainer_kwargs['devices']
    except:
      pl_trainer_kwargs['devices'] = -1
  else:
      pl_trainer_kwargs['accelerator'] = "cpu"

  if pl_trainer_kwargs['accelerator'] == "cpu":
      try:
        del pl_trainer_kwargs['devices']
      except:
        pass

  return pl_trainer_kwargs

# Criando o dataset

## Leitura da Tabela de Dados

In [None]:
df = pd.read_excel('./data/Biodiesel.xlsx', 'Dados')
df

In [None]:
# Transformando o Dataframe em uma serie temporal do darts
series_national = TimeSeries.from_dataframe(df, 'Data',  'Brasil')
series_national_tx = series_national.ratio()
series_north = TimeSeries.from_dataframe(df, 'Data',  'Norte')
series_south = TimeSeries.from_dataframe(df, 'Data',  'Sul')
series_southeast = TimeSeries.from_dataframe(df, 'Data',  'Sudeste')
series_midwest = TimeSeries.from_dataframe(df, 'Data',  'Centro-Oeste')
series_northeast = TimeSeries.from_dataframe(df, 'Data',  'Nordeste')

plot_series([
    series_national,
    series_national_tx,
    series_north,
    series_south,
    series_southeast,
    series_midwest,
    series_northeast
])

## Tratamento dos dados - Série Original

### Visualização dos dados

In [None]:
plot_series([
    series_national,
    series_national_tx,
    series_north,
    series_south,
    series_southeast,
    series_midwest,
    series_northeast
])

### Divisão dos Conjuntos de Treino e Validação

In [None]:
train_size_percent = 70
train_size = int(len(series_national) * train_size_percent/100)
split_date = series_national.time_index[train_size].strftime('%Y%m%d')
print(f"Treinamento: {series_national.time_index[0].strftime(DATE_FORMAT_STRING)} - {series_national.time_index[train_size - 1].strftime(DATE_FORMAT_STRING)}")
print(f"Validação  : {series_national.time_index[train_size].strftime(DATE_FORMAT_STRING)} - {series_national.time_index[-1].strftime(DATE_FORMAT_STRING)}")

train_tx, val_tx = series_national_tx.split_before(pd.Timestamp(split_date))
train, val = series_national.split_before(pd.Timestamp(split_date))
train_north, val_north = series_north.split_before(pd.Timestamp(split_date))
train_south, val_south = series_south.split_before(pd.Timestamp(split_date))
train_southeast, val_southeast = series_southeast.split_before(pd.Timestamp(split_date))
train_midwest, val_midwest = series_midwest.split_before(pd.Timestamp(split_date))
train_northeast, val_northeast = series_northeast.split_before(pd.Timestamp(split_date))

### Aplicando StandardScaler (normalização de 0 a 1)

In [None]:
scaler = Scaler()
train_tx_scaled = scaler.fit_transform(train_tx)
val_tx_scaled = scaler.transform(val_tx)
series_national_tx_scaled = scaler.transform(series_national_tx)
train_scaled = scaler.fit_transform(train)
val_scaled = scaler.transform(val)
series_national_scaled = scaler.transform(series_national)
train_north_scaled = scaler.fit_transform(train_north)
val_north_scaled = scaler.transform(val_north)
series_north_scaled = scaler.transform(series_north)
train_south_scaled = scaler.fit_transform(train_south)
val_south_scaled = scaler.transform(val_south)
series_south_scaled = scaler.transform(series_south)
train_southeast_scaled = scaler.fit_transform(train_southeast)
val_southeast_scaled = scaler.transform(val_southeast)
series_southeast_scaled = scaler.transform(series_southeast)
train_midwest_scaled = scaler.fit_transform(train_midwest)
val_midwest_scaled = scaler.transform(val_midwest)
series_midwest_scaled = scaler.transform(series_midwest)
train_northeast_scaled = scaler.fit_transform(train_northeast)
val_northeast_scaled = scaler.transform(val_northeast)
series_northeast_scaled = scaler.transform(series_northeast)

plot_series([
    series_national_scaled,
    series_national_tx_scaled,
    series_north_scaled,
    series_south_scaled,
    series_southeast_scaled,
    series_midwest_scaled,
    series_northeast_scaled
])

## Tratamento de Dados - Série sem tendência

In [None]:
# Transformando o Dataframe em uma serie temporal do darts
series_national_detrend = series_national.detrend()
series_national_tx_detrend = series_national_tx.detrend()
series_north_detrend = series_north.detrend()
series_south_detrend = series_south.detrend()
series_southeast_detrend = series_southeast.detrend()
series_midwest_detrend = series_midwest.detrend()
series_northeast_detrend = series_northeast.detrend()

### Visualização dos dados

In [None]:
plot_trend_line(series_national)
plot_trend_line(series_national_tx)
plot_trend_line(series_north)
plot_trend_line(series_south)
plot_trend_line(series_southeast)
plot_trend_line(series_midwest)
plot_trend_line(series_northeast)

In [None]:
plot_series([
    series_national_tx_detrend,
    series_national_detrend,
    series_north_detrend,
    series_south_detrend,
    series_southeast_detrend,
    series_midwest_detrend,
    series_northeast_detrend
])

### Divisão dos Conjuntos de Treino e Validação

In [None]:
train_size_percent_detrend = 70
train_size_detrend = int(len(series_national_detrend) * train_size_percent_detrend/100)
split_date_detrend = series_national_detrend.time_index[train_size_detrend].strftime('%Y%m%d')
print(f"Treinamento: {series_national_detrend.time_index[0].strftime(DATE_FORMAT_STRING)} - {series_national_detrend.time_index[train_size - 1].strftime(DATE_FORMAT_STRING)}")
print(f"Validação  : {series_national_detrend.time_index[train_size].strftime(DATE_FORMAT_STRING)} - {series_national_detrend.time_index[-1].strftime(DATE_FORMAT_STRING)}")

train_tx_detrend, val_tx_detrend = series_national_tx_detrend.split_before(pd.Timestamp(split_date_detrend))
train_detrend, val_detrend = series_national_detrend.split_before(pd.Timestamp(split_date_detrend))
train_north_detrend, val_north_detrend = series_north_detrend.split_before(pd.Timestamp(split_date_detrend))
train_south_detrend, val_south_detrend = series_south_detrend.split_before(pd.Timestamp(split_date_detrend))
train_southeast_detrend, val_southeast_detrend = series_southeast_detrend.split_before(pd.Timestamp(split_date_detrend))
train_midwest_detrend, val_midwest_detrend = series_midwest_detrend.split_before(pd.Timestamp(split_date_detrend))
train_northeast_detrend, val_northeast_detrend = series_northeast_detrend.split_before(pd.Timestamp(split_date_detrend))

### Aplicando StandardScaler (normalização de 0 a 1)

In [None]:
scaler = Scaler()
train_tx_detrend_scaled = scaler.fit_transform(train_tx_detrend)
val_tx_detrend_scaled = scaler.transform(val_tx_detrend)
series_national_tx_detrend_scaled = scaler.transform(series_national_tx_detrend)
train_detrend_scaled = scaler.fit_transform(train_detrend)
val_detrend_scaled = scaler.transform(val_detrend)
series_national_detrend_scaled = scaler.transform(series_national_detrend)
train_north_detrend_scaled = scaler.fit_transform(train_north_detrend)
val_north_detrend_scaled = scaler.transform(val_north_detrend)
series_north_detrend_scaled = scaler.transform(series_north_detrend)
train_south_detrend_scaled = scaler.fit_transform(train_south_detrend)
val_south_detrend_scaled = scaler.transform(val_south_detrend)
series_south_detrend_scaled = scaler.transform(series_south_detrend)
train_southeast_detrend_scaled = scaler.fit_transform(train_southeast_detrend)
val_southeast_detrend_scaled = scaler.transform(val_southeast_detrend)
series_southeast_detrend_scaled = scaler.transform(series_southeast_detrend)
train_midwest_detrend_scaled = scaler.fit_transform(train_midwest_detrend)
val_midwest_detrend_scaled = scaler.transform(val_midwest_detrend)
series_midwest_detrend_scaled = scaler.transform(series_midwest_detrend)
train_northeast_detrend_scaled = scaler.fit_transform(train_northeast_detrend)
val_northeast_detrend_scaled = scaler.transform(val_northeast_detrend)
series_northeast_detrend_scaled = scaler.transform(series_northeast_detrend)

plot_series([
    series_national_detrend_scaled,
    series_national_tx_detrend_scaled,
    series_north_detrend_scaled,
    series_south_detrend_scaled,
    series_southeast_detrend_scaled,
    series_midwest_detrend_scaled,
    series_northeast_detrend_scaled
])

# Treinando o Modelo