In [None]:
!pip install darts
!pip install tensorboard
!pip install torch torchvision

In [None]:
import pandas as pd
import numpy as np
from darts import TimeSeries
from darts.models import TFTModel, TCNModel
from darts.dataprocessing.transformers import Scaler
import matplotlib.pyplot as plt
from google.colab import drive
import os
import torch
import darts
from pytorch_lightning.callbacks import Callback
from pytorch_lightning.callbacks import EarlyStopping
from torchmetrics import MeanAbsolutePercentageError
import time
import datetime
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_percentage_error #MAPE
from sklearn.metrics import mean_absolute_error #MAE
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [None]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"GPU ({torch.cuda.get_device_name(0)}) is available.")
else:
    device = torch.device("cpu")
    print("GPU is not available. Using CPU.")

GPU is not available. Using CPU.


In [None]:
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
os.getcwd()

'/content'

In [None]:
path = "/content/gdrive/MyDrive/Artigo TFT/Dados/tucurui.csv"

def read_data(path):
  # reads data
  data = pd.read_csv(path, delimiter=';', decimal=',').dropna()

  #specify study range (past 3 years)
  data = data.iloc[-365*3:-1,:]

  # some formating
  current_date_format = "%d/%m/%Y"

  data['Data'] = pd.to_datetime(data['Data'], format=current_date_format)

  # create time series
  prec = data[['Data', 'UPH610010000']].copy()
  prec_ts = TimeSeries.from_dataframe(prec, time_col="Data", value_cols=['UPH610010000'],fill_missing_dates=False, freq='D')

  vazao= data[['Data', 'VazaoNatural']].copy()
  vazao_ts = TimeSeries.from_dataframe(vazao, time_col="Data", value_cols=['VazaoNatural'],fill_missing_dates=True, freq='D')

  return prec_ts, vazao_ts

def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def symmetric_mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(2 * np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred))) * 100

def nash_sutcliffe_efficiency(y_true, y_pred):
    numerator = np.sum((y_true - y_pred) ** 2)
    denominator = np.sum((y_true - np.mean(y_true)) ** 2)
    return 1 - (numerator / denominator)

def mean_absolute_error(y_true, y_pred):
    return np.mean(np.abs(y_true - y_pred))

def root_mean_squared_error(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

def split_data(train_test, train_val, prec_ts, vazao_ts):
  # build train, val, test sets for flow(target) and rain data
  flow_train_val, flow_test = vazao_ts.split_before(train_test)
  flow_train, flow_val = flow_train_val.split_before(train_val)

  prec_train_val, prec_test = prec_ts.split_before(train_test)
  prec_train, prec_val = prec_train_val.split_before(train_val)

  return flow_train, flow_test, flow_val, prec_train, prec_test, prec_val

def data_scaling():
    # data scaling
    #inicialize scaler on flow data sets
    transformer_flow = Scaler()

    #transform the data on flow sets
    trans_flow_train = transformer_flow.fit_transform(flow_train)
    trans_flow = transformer_flow.transform(vazao_ts)
    trans_flow_val = transformer_flow.transform(flow_val)
    trans_flow_test = transformer_flow.transform(flow_test)

    #inicialize scaler on rainfall data sets
    transformer_prec = Scaler()

    #transform the data on rainfall sets
    trans_prec_train = transformer_prec.fit_transform(prec_train)
    trans_prec = transformer_prec.transform(prec_ts)
    trans_prec_val = transformer_prec.transform(prec_val)
    trans_prec_test = transformer_prec.transform(prec_test)

    return transformer_flow, transformer_prec, trans_flow_train,trans_flow,trans_flow_val,trans_flow_test,trans_prec_train,trans_prec,trans_prec_val,trans_prec_test

def build_samples_df(flow_test):

    # Define the starting date
    start_date = datetime.datetime(2022, 12, 31)

    # Define the window size and stride
    window_size = 14
    stride = 1

    # Convert TimeSeries to a pandas DataFrame
    timeseries=flow_test
    timeseries_df = timeseries.pd_dataframe()

    # Generate the samples
    num_samples = len(timeseries_df) - window_size + 1
    samples = []
    for i in range(num_samples):
        sample_start = start_date + datetime.timedelta(days=i)
        sample_dates = pd.date_range(start=sample_start, periods=window_size)
        if set(sample_dates).issubset(timeseries_df.index):
            sample = timeseries_df.loc[sample_dates]
            samples.append(TimeSeries.from_dataframe(sample))

    df_samples = pd.DataFrame()


    for i in range(len(samples)):

      valores = (samples[i].values())


      date = samples[i].start_time()

      temp = pd.DataFrame(valores).T
      temp['Date'] = date
      temp.set_index('Date', inplace=True)

      df_samples = pd.concat([df_samples,temp])
    return df_samples, samples

def build_backtest_df(backtest, transformer_flow):
    df_backtest = pd.DataFrame()
    for i in range(len(backtest)):
      valores = transformer_flow.inverse_transform(backtest[i]).values()
      #valores = backtest1[i].values()
      #transformer_flow.inverse_transform
      date = backtest[i].start_time()

      temp = pd.DataFrame(valores).T
      temp['Date'] = date
      temp.set_index('Date', inplace=True)

      df_backtest = pd.concat([df_backtest,temp])
    return df_backtest



prec_ts, vazao_ts = read_data(path)
flow_train, flow_test, flow_val, prec_train, prec_test, prec_val = split_data(0.8, 0.9,prec_ts, vazao_ts)
transformer_flow,transformer_prec, trans_flow_train,trans_flow,trans_flow_val,trans_flow_test,trans_prec_train,trans_prec,trans_prec_val,trans_prec_test = data_scaling()

df_samples, samples=build_samples_df(flow_test)

configs = {
    "config 1": {
        "model": TCNModel,
        "target": trans_flow_train,
        "past_cov": None,
        "future_cov": None,
        "val_series": trans_flow_val,
        "val_past_cov": None,
        "val_future_cov": None,
        "optimizer": torch.optim.Adam,
        "teste": trans_flow_test,
        "cov": False
    },
    "config 2": {
        "model": TCNModel,
        "target": trans_flow_train,
        "past_cov": None,
        "future_cov": None,
        "val_series": trans_flow_val,
        "val_past_cov": None,
        "val_future_cov": None,
        "optimizer": torch.optim.RMSprop,
        "teste": trans_flow_test,
        "cov": False
    },
    "config 3": {
        "model": TCNModel,
        "target": trans_flow_train,
        "past_cov": None,
        "future_cov": None,
        "val_series": trans_flow_val,
        "val_past_cov": None,
        "val_future_cov": None,
        "optimizer": torch.optim.SGD,
        "teste": trans_flow_test,
        "cov": False
    },

    "config 7": {
        "model": TFTModel,
        "target": trans_flow_train,
        "past_cov": None,
        "future_cov": None,
        "val_series": trans_flow_val,
        "val_past_cov": None,
        "val_future_cov": None,
        "optimizer": torch.optim.Adam,
        "teste": trans_flow_test,
        "cov": True
    },
    "config 8": {
        "model": TFTModel,
        "target": trans_flow_train,
        "past_cov": None,
        "future_cov": None,
        "val_series": trans_flow_val,
        "val_past_cov": None,
        "val_future_cov": None,
        "optimizer": torch.optim.RMSprop,
        "teste": trans_flow_test,
        "cov": True
    },
    "config 9": {
        "model": TFTModel,
        "target": trans_flow_train,
        "past_cov": None,
        "future_cov": None,
        "val_series": trans_flow_val,
        "val_past_cov": None,
        "val_future_cov": None,
        "optimizer": torch.optim.SGD,
        "teste": trans_flow_test,
        "cov": True
    }
}


my_stopper = EarlyStopping(
    monitor= "val_loss",
    patience=50,
    min_delta=0.0001,
    mode='min'
)
pl_trainer_kwargs = {"callbacks": [my_stopper]}
#"accelerator": "gpu","devices": -1,
metrics_dict = {}

In [None]:
prec_ts

In [None]:
for config, params in configs.items():
    model_class = params["model"]
    optimizer_cls = params["optimizer"]

    print(config)


    #cria modelo
    if config in ['config 4', 'config 5', 'config 6','config 1','config 2', 'config 3']:

      model = model_class(
          input_chunk_length=30,
          output_chunk_length=14,
          loss_fn=torch.nn.MSELoss(),
          #loss = darts.utils.SmapeLoss(),
          likelihood=None,
          nr_epochs_val_period=1,
          pl_trainer_kwargs=pl_trainer_kwargs,
          optimizer_cls=optimizer_cls,
          optimizer_kwargs={"lr": 0.0005},
          log_tensorboard=True,
          save_checkpoints=True,
          force_reset=True,
          n_epochs=500
          )
    else:
      model = model_class(
          input_chunk_length=30,
          output_chunk_length=14,
          loss_fn=torch.nn.MSELoss(),
          #loss = darts.utils.SmapeLoss(),
          likelihood=None,
          nr_epochs_val_period=1,
          pl_trainer_kwargs=pl_trainer_kwargs,
          optimizer_cls=optimizer_cls,
          optimizer_kwargs={"lr": 0.0005},
          log_tensorboard=True,
          save_checkpoints=True,
          force_reset=True,
          n_epochs=500,
          add_relative_index = params["cov"])

    inicio = time.time()

    #treina modelo
    model.fit(
        params["target"],
        past_covariates=params["past_cov"],
        future_covariates=params["future_cov"],
        val_series=params["val_series"],
        val_past_covariates=params["val_past_cov"],
        val_future_covariates=params["val_future_cov"]
    )

    fim = time.time()
    tempo = fim - inicio

    print(f"Training time for config {config}: {tempo} seconds")

    # roda backtest pra validação posterior
    backtest = model.historical_forecasts(series=params["teste"],
                                      future_covariates = params["future_cov"],
                                      past_covariates = params["past_cov"],
                                      #num_samples=1,
                                      forecast_horizon=14,
                                      stride=1,
                                      retrain=False,
                                      overlap_end=False,
                                      last_points_only=False,
                                      verbose=False)

    df_backtest = build_backtest_df(backtest, transformer_flow)

    # Calculate metrics
    metricas = []
    for i in range(14):
      mape = mean_absolute_percentage_error(df_backtest.iloc[:, i], df_samples.iloc[:, i])
      smape = symmetric_mean_absolute_percentage_error(df_backtest.iloc[:, i], df_samples.iloc[:, i])
      mae = mean_absolute_error(df_backtest.iloc[:, i], df_samples.iloc[:, i])
      rmse = root_mean_squared_error(df_backtest.iloc[:, i], df_samples.iloc[:, i])
      metricas.append({"mape": mape, "smape":smape, "mae": mae, "rmse":rmse})


    # Save metrics for the current configuration in the dictionary
    #metrics_dict[config] = metricas
    metrics_dict[config] = {"metrics": metricas, "tempo": tempo}

In [None]:
# Create a DataFrame from the metrics dictionary
df_metrics = pd.DataFrame(metrics_dict).T

# Calculate the average of each metric
df_metrics["avg_mape"] = df_metrics["metrics"].apply(lambda x: sum(metric["mape"] for metric in x) / len(x))
df_metrics["avg_smape"] = df_metrics["metrics"].apply(lambda x: sum(metric["smape"] for metric in x) / len(x))
df_metrics["avg_mae"] = df_metrics["metrics"].apply(lambda x: sum(metric["mae"] for metric in x) / len(x))
df_metrics["avg_rmse"] = df_metrics["metrics"].apply(lambda x: sum(metric["rmse"] for metric in x) / len(x))

# Drop the "metrics" column as it's no longer needed
df_metrics.drop(columns=["metrics"], inplace=True)
df_metrics["model"] = [configs[config]["model"].__name__ for config in df_metrics.index]
df_metrics["optimizer"] = [configs[config]["optimizer"].__name__ for config in df_metrics.index]

# Display the resulting DataFrame
df_metrics