In [1]:
# =========================================================
# SEÇÃO 1: IMPORTAÇÕES E SETUP GERAL
# =========================================================

In [2]:
import os
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import itertools

# Libs de Modelagem e Estatística
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.datasets import get_rdataset
from dieboldmariano import dm_test
import pmdarima as pm
from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATS, MLP, LSTM, Autoformer, NHITS

# Libs de Avaliação
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tqdm import tqdm
from IPython.display import display, Markdown

warnings.filterwarnings("ignore")

In [3]:
# =========================================================
# SEÇÃO 2: FUNÇÕES AUXILIARES (SETUP E PROCESSAMENTO)
# =========================================================

In [4]:
def definir_seed(seed_value=42):
    np.random.seed(seed_value)
    random.seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)

In [5]:
def salvar_dataset(serie, dataset_name):
    dir_path = "./data/bronze"
    os.makedirs(dir_path, exist_ok=True)
    file_path = os.path.join(dir_path, f"{dataset_name.lower()}.csv")
    df = pd.DataFrame({"date": serie.index, "value": serie.values})
    df.to_csv(file_path, index=False)
    print(f"-> Cópia do dataset '{dataset_name}' salva em: {file_path}")

def carregar_serie(nome):
    print(f"Buscando dados de '{nome}' via statsmodels...")
    nome_base = nome.lower()

    if nome_base == "airpassengers":
        df = get_rdataset("AirPassengers", package="datasets").data
        serie = pd.Series(df['value'].values, index=pd.date_range(start="1949-01-01", periods=len(df), freq="MS"),
                          name="AirPassengers")
    elif nome_base == "lynx":
        df = get_rdataset("lynx", package="datasets").data
        serie = pd.Series(df['value'].values, index=pd.date_range(start="1821", periods=len(df), freq="A"), name="Lynx")
    elif nome_base == "co2":
        df = get_rdataset("CO2", package="datasets").data
        df = df.ffill()
        serie = pd.Series(df['value'].values, index=pd.date_range(start="1958-03-29", periods=len(df), freq="MS"),
                          name="CO2")
    elif nome_base == "sunspots":
        df = get_rdataset("sunspots", package="datasets").data
        serie = pd.Series(df['value'].values, index=pd.date_range(start="1749-01-01", periods=len(df), freq="MS"),
                          name="Sunspots")
    elif nome_base == "austres":
        df = get_rdataset("austres", package="datasets").data
        serie = pd.Series(df['value'].values, index=pd.date_range(start="1971-03-01", periods=len(df), freq="QS-MAR"),
                          name="AustralianResidents")
    elif nome_base == "nottem":
        df = get_rdataset("nottem", package="datasets").data
        serie = pd.Series(df['value'].values, index=pd.date_range(start="1920-01-01", periods=len(df), freq="MS"),
                          name="Nottingham")
    else:
        raise ValueError(f"Lógica de download para a série '{nome}' não implementada.")

    salvar_dataset(serie, nome)
    return serie

In [6]:
def dividir_serie_temporal(serie, percentual_treino=0.85):
    tamanho_total = len(serie)
    ponto_corte_treino = int(tamanho_total * percentual_treino)
    treino = serie.iloc[:ponto_corte_treino]
    teste = serie.iloc[ponto_corte_treino:]
    return treino, teste

def preparar_dados_para_neuralforecast(serie, nome_serie):
    df = serie.reset_index()
    df.columns = ['ds', 'y']
    df['unique_id'] = nome_serie
    return df

In [7]:
# =========================================================
# SEÇÃO 3: FUNÇÕES PARA CÁLCULO DE MÉTRICAS E MODELAGEM
# =========================================================

In [8]:
def calcular_metricas(y_true, y_pred, y_train):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100 if np.all(y_true != 0) else np.inf
    n = len(y_train)
    d = np.sum(np.abs(y_train[1:] - y_train[:-1])) / (n - 1) if n > 1 else np.nan
    mase = np.mean(np.abs(y_true - y_pred)) / d if d is not np.nan and d > 0 else np.inf
    return {'RMSE': rmse, 'MAPE(%)': mape, 'MASE': mase}

In [9]:
# =========================================================
# SEÇÃO 4: PIPELINE AVANÇADO PARA O ARIMA
# =========================================================


In [10]:
def encontrar_melhor_arima_auto(treino_log, freq):
    """Usa auto_arima para encontrar a melhor ordem ARIMA, incluindo sazonalidade."""
    print("Buscando melhor ordem ARIMA com auto_arima...")
    m = 12 if freq.startswith('M') else (4 if freq.startswith('Q') else 1)
    auto_arima_model = pm.auto_arima(treino_log, m=m, seasonal=True, trace=False, error_action='ignore', suppress_warnings=True, stepwise=True)
    print(f"Melhor ordem encontrada: {auto_arima_model.order} Sazonal: {auto_arima_model.seasonal_order}")
    return auto_arima_model.order, auto_arima_model.seasonal_order

In [11]:
# =========================================================
# SEÇÃO 5: PIPELINE DE EXPERIMENTO COMPLETO E AVANÇADO
# =========================================================

In [12]:
# =========================================================
# SEÇÃO 3: PIPELINE DE EXPERIMENTO (COM CORREÇÃO NO ARIMA)
# =========================================================

def executar_experimento(nome_da_serie, horizonte):
    """
    Executa o pipeline completo para um dataset e um horizonte específicos,
    com a correção na função de previsão do ARIMA.
    """
    try:
        SEED = 42; definir_seed(SEED)
        MAX_INPUT_SIZE = 50 
        MAX_STEPS_NEURAL = 150
        
        serie_original = carregar_serie(nome_da_serie)
        
        if len(serie_original) < horizonte + 50:
            print(f"AVISO: Série '{nome_da_serie}' muito curta para o horizonte {horizonte}. Pulando.")
            return None

        percentual_treino = 1 - (horizonte / len(serie_original))
        treino_orig, teste_orig = dividir_serie_temporal(serie_original, percentual_treino=percentual_treino)
        
        treino_log = np.log(treino_orig)
        
        freq = serie_original.index.freqstr or pd.infer_freq(serie_original.index)
        if freq is None: return None

        previsoes_teste = {'y_true': teste_orig.values}
        input_size = min(2 * horizonte, MAX_INPUT_SIZE)
        
        # --- 1. Modelo ARIMA ---
        modelo_arima = None
        try:
            print(f"Processando: ARIMA para horizonte {horizonte}")
            ordem, ordem_sazonal = encontrar_melhor_arima_auto(treino_log, freq)
            modelo_arima = ARIMA(treino_log.asfreq(freq), order=ordem, seasonal_order=ordem_sazonal).fit()
            
            # --- CORREÇÃO APLICADA AQUI ---
            # Usamos .forecast() para prever 'h' passos à frente, em vez de .predict()
            preds_log_teste_arima = modelo_arima.forecast(steps=horizonte)
            previsoes_teste['ARIMA'] = np.exp(preds_log_teste_arima).values

        except Exception as e: print(f"AVISO: ARIMA falhou: {e}")

        # --- 2. Modelos Neurais Puros ---
        df_treino_log_nf = preparar_dados_para_neuralforecast(treino_log, nome_da_serie)
        for nome_modelo, classe_modelo in {'N-BEATS': NBEATS, 'MLP': MLP, 'LSTM': LSTM, 'Autoformer': Autoformer, 'NHITS': NHITS}.items():
            try:
                print(f"Processando: {nome_modelo}")
                modelo_neural = [classe_modelo(input_size=input_size, h=horizonte, max_steps=MAX_STEPS_NEURAL, scaler_type='standard', random_seed=SEED)]
                nf = NeuralForecast(models=modelo_neural, freq=freq)
                nf.fit(df=df_treino_log_nf, verbose=False)
                previsoes_teste[nome_modelo] = np.exp(nf.predict()[classe_modelo.__name__].values)
            except Exception as e: print(f"AVISO: {nome_modelo} falhou: {e}")
        
        # --- 3. Modelo Híbrido ---
        if 'ARIMA' in previsoes_teste and modelo_arima is not None:
            try:
                print("Processando: Híbrido (MIMO)")
                residuos_treino_log = modelo_arima.resid
                df_residuos_nf = preparar_dados_para_neuralforecast(residuos_treino_log, "residuos")
                modelo_residuos = [NBEATS(input_size=input_size, h=horizonte, max_steps=MAX_STEPS_NEURAL, scaler_type='standard', random_seed=SEED)]
                nf_residuos = NeuralForecast(models=modelo_residuos, freq=freq)
                nf_residuos.fit(df=df_residuos_nf, verbose=False)
                preds_residuos_log = nf_residuos.predict()['NBEATS'].values
                previsoes_teste['Híbrido (MIMO)'] = previsoes_teste['ARIMA'] + preds_residuos_log
            except Exception as e: print(f"AVISO: Híbrido (MIMO) falhou: {e}")
            
        df_final = pd.DataFrame(previsoes_teste, index=teste_orig.index)
        df_final['dataset'] = nome_da_serie
        df_final['horizonte'] = horizonte
        return df_final.reset_index().rename(columns={'index': 'ds'})
        
    except Exception as e:
        print(f"ERRO GERAL no processamento de '{nome_da_serie}' para o horizonte {horizonte}: {e}")
        return None

In [13]:
# =========================================================
# SEÇÃO 6: ORQUESTRADOR
# =========================================================

In [None]:
LISTA_DE_DATASETS = ['AirPassengers', 'co2'] 
VETOR_DE_HORIZONTES = [12, 24]
resultados_gerais = []
output_dir = "./data/silver"
os.makedirs(output_dir, exist_ok=True)
output_file = os.path.join(output_dir, "resultados_completos.csv")

for dataset in tqdm(LISTA_DE_DATASETS, desc="Processando Datasets"):
    for horizonte in tqdm(VETOR_DE_HORIZONTES, desc=f"Testando Horizontes para {dataset}", leave=False):
        df_resultado_detalhado = executar_experimento(dataset, horizonte)
        if df_resultado_detalhado is not None:
            resultados_gerais.append(df_resultado_detalhado)

Processando Datasets:   0%|          | 0/2 [00:00<?, ?it/s]

Buscando dados de 'AirPassengers' via statsmodels...
-> Cópia do dataset 'AirPassengers' salva em: ./data/bronze\airpassengers.csv
Processando: ARIMA para horizonte 12
Buscando melhor ordem ARIMA com auto_arima...
Melhor ordem encontrada: (2, 0, 0) Sazonal: (0, 1, 1, 12)


Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Processando: N-BEATS



  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 2.4 M  | train
-------------------------------------------------------
2.4 M     Trainable params
900       Non-trainable params
2.4 M     Total params
9.789     Total estimated model params size (MB)
31        Modules in train mode
0         Modules in eval mode


Epoch 149: 100%|██████████| 1/1 [00:00<00:00,  3.52it/s, v_num=1869, train_loss_step=0.0372, train_loss_epoch=0.0372]

`Trainer.fit` stopped: `max_steps=150` reached.


Epoch 149: 100%|██████████| 1/1 [00:00<00:00,  3.44it/s, v_num=1869, train_loss_step=0.0372, train_loss_epoch=0.0372]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 82.50it/s] 

Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Processando: MLP



  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | mlp          | ModuleList    | 1.1 M  | train
4 | out          | Linear        | 12.3 K | train
-------------------------------------------------------
1.1 M     Trainable params
0         Non-trainable params
1.1 M     Total params
4.350     Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Epoch 149: 100%|██████████| 1/1 [00:00<00:00,  7.57it/s, v_num=1871, train_loss_step=0.0705, train_loss_epoch=0.0705]

`Trainer.fit` stopped: `max_steps=150` reached.


Epoch 149: 100%|██████████| 1/1 [00:00<00:00,  7.19it/s, v_num=1871, train_loss_step=0.0705, train_loss_epoch=0.0705]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 54.77it/s]

Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Processando: LSTM



  | Name            | Type          | Params | Mode 
----------------------------------------------------------
0 | loss            | MAE           | 0      | train
1 | padder          | ConstantPad1d | 0      | train
2 | scaler          | TemporalNorm  | 0      | train
3 | hist_encoder    | LSTM          | 484 K  | train
4 | context_adapter | Linear        | 24.1 K | train
5 | mlp_decoder     | MLP           | 2.4 K  | train
----------------------------------------------------------
510 K     Trainable params
0         Non-trainable params
510 K     Total params
2.042     Total estimated model params size (MB)
11        Modules in train mode
0         Modules in eval mode


Epoch 149: 100%|██████████| 1/1 [00:00<00:00, 26.08it/s, v_num=1873, train_loss_step=0.331, train_loss_epoch=0.331]

`Trainer.fit` stopped: `max_steps=150` reached.


Epoch 149: 100%|██████████| 1/1 [00:00<00:00, 21.82it/s, v_num=1873, train_loss_step=0.331, train_loss_epoch=0.331]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 39.12it/s]

Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Processando: Autoformer



  | Name          | Type          | Params | Mode 
--------------------------------------------------------
0 | loss          | MAE           | 0      | train
1 | padder_train  | ConstantPad1d | 0      | train
2 | scaler        | TemporalNorm  | 0      | train
3 | decomp        | SeriesDecomp  | 0      | train
4 | enc_embedding | DataEmbedding | 384    | train
5 | dec_embedding | DataEmbedding | 384    | train
6 | encoder       | Encoder       | 148 K  | train
7 | decoder       | Decoder       | 141 K  | train
--------------------------------------------------------
290 K     Trainable params
0         Non-trainable params
290 K     Total params
1.162     Total estimated model params size (MB)
85        Modules in train mode
0         Modules in eval mode


Epoch 149: 100%|██████████| 1/1 [00:03<00:00,  0.26it/s, v_num=1875, train_loss_step=0.390, train_loss_epoch=0.390]

`Trainer.fit` stopped: `max_steps=150` reached.


Epoch 149: 100%|██████████| 1/1 [00:03<00:00,  0.26it/s, v_num=1875, train_loss_step=0.390, train_loss_epoch=0.390]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 20.02it/s]

Seed set to 42



Processando: NHITS


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 2.4 M  | train
-------------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.751     Total estimated model params size (MB)
34        Modules in train mode
0         Modules in eval mode


Epoch 149: 100%|██████████| 1/1 [00:00<00:00,  4.35it/s, v_num=1877, train_loss_step=0.0618, train_loss_epoch=0.0618]

`Trainer.fit` stopped: `max_steps=150` reached.


Epoch 149: 100%|██████████| 1/1 [00:00<00:00,  4.25it/s, v_num=1877, train_loss_step=0.0618, train_loss_epoch=0.0618]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 48.69it/s]

Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Processando: Híbrido (MIMO)



  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 2.4 M  | train
-------------------------------------------------------
2.4 M     Trainable params
900       Non-trainable params
2.4 M     Total params
9.789     Total estimated model params size (MB)
31        Modules in train mode
0         Modules in eval mode


Epoch 149: 100%|██████████| 1/1 [00:00<00:00,  3.71it/s, v_num=1879, train_loss_step=0.0121, train_loss_epoch=0.0121]

`Trainer.fit` stopped: `max_steps=150` reached.


Epoch 149: 100%|██████████| 1/1 [00:00<00:00,  3.60it/s, v_num=1879, train_loss_step=0.0121, train_loss_epoch=0.0121]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 97.32it/s] 




Buscando dados de 'AirPassengers' via statsmodels...
-> Cópia do dataset 'AirPassengers' salva em: ./data/bronze\airpassengers.csv
Processando: ARIMA para horizonte 24
Buscando melhor ordem ARIMA com auto_arima...
Melhor ordem encontrada: (2, 0, 0) Sazonal: (0, 1, 1, 12)


Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 2.5 M  | train
-------------------------------------------------------
2.5 M     Trainable params
3.5 K     Non-trainable params
2.5 M     Total params
10.120    Total estimated model params size (MB)
31        Modules in train mode
0         Modules in eval mode


Processando: N-BEATS
Epoch 149: 100%|██████████| 1/1 [00:00<00:00,  3.62it/s, v_num=1881, train_loss_step=0.0168, train_loss_epoch=0.0168]

`Trainer.fit` stopped: `max_steps=150` reached.


Epoch 149: 100%|██████████| 1/1 [00:00<00:00,  3.52it/s, v_num=1881, train_loss_step=0.0168, train_loss_epoch=0.0168]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 78.59it/s] 

Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Processando: MLP



  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | mlp          | ModuleList    | 1.1 M  | train
4 | out          | Linear        | 24.6 K | train
-------------------------------------------------------
1.1 M     Trainable params
0         Non-trainable params
1.1 M     Total params
4.498     Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Epoch 149: 100%|██████████| 1/1 [00:00<00:00,  8.13it/s, v_num=1883, train_loss_step=0.0511, train_loss_epoch=0.0511]

`Trainer.fit` stopped: `max_steps=150` reached.


Epoch 149: 100%|██████████| 1/1 [00:00<00:00,  7.75it/s, v_num=1883, train_loss_step=0.0511, train_loss_epoch=0.0511]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 96.49it/s] 

Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Processando: LSTM



  | Name            | Type          | Params | Mode 
----------------------------------------------------------
0 | loss            | MAE           | 0      | train
1 | padder          | ConstantPad1d | 0      | train
2 | scaler          | TemporalNorm  | 0      | train
3 | hist_encoder    | LSTM          | 484 K  | train
4 | context_adapter | Linear        | 48.2 K | train
5 | mlp_decoder     | MLP           | 2.4 K  | train
----------------------------------------------------------
534 K     Trainable params
0         Non-trainable params
534 K     Total params
2.139     Total estimated model params size (MB)
11        Modules in train mode
0         Modules in eval mode


Epoch 149: 100%|██████████| 1/1 [00:00<00:00, 21.04it/s, v_num=1885, train_loss_step=0.287, train_loss_epoch=0.287]

`Trainer.fit` stopped: `max_steps=150` reached.


Epoch 149: 100%|██████████| 1/1 [00:00<00:00, 17.81it/s, v_num=1885, train_loss_step=0.287, train_loss_epoch=0.287]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 64.26it/s]

Seed set to 42



Processando: Autoformer


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name          | Type          | Params | Mode 
--------------------------------------------------------
0 | loss          | MAE           | 0      | train
1 | padder_train  | ConstantPad1d | 0      | train
2 | scaler        | TemporalNorm  | 0      | train
3 | decomp        | SeriesDecomp  | 0      | train
4 | enc_embedding | DataEmbedding | 384    | train
5 | dec_embedding | DataEmbedding | 384    | train
6 | encoder       | Encoder       | 148 K  | train
7 | decoder       | Decoder       | 141 K  | train
--------------------------------------------------------
290 K     Trainable params
0         Non-trainable params
290 K     Total params
1.162     Total estimated model params size (MB)
85        Modules in train mode
0         Modules in eval mode


Epoch 149: 100%|██████████| 1/1 [00:08<00:00,  0.11it/s, v_num=1887, train_loss_step=0.327, train_loss_epoch=0.327]

`Trainer.fit` stopped: `max_steps=150` reached.


Epoch 149: 100%|██████████| 1/1 [00:08<00:00,  0.11it/s, v_num=1887, train_loss_step=0.327, train_loss_epoch=0.327]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.29it/s]

Seed set to 42



Processando: NHITS


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 2.5 M  | train
-------------------------------------------------------
2.5 M     Trainable params
0         Non-trainable params
2.5 M     Total params
10.040    Total estimated model params size (MB)
34        Modules in train mode
0         Modules in eval mode


Epoch 149: 100%|██████████| 1/1 [00:00<00:00,  3.92it/s, v_num=1889, train_loss_step=0.0312, train_loss_epoch=0.0312]

`Trainer.fit` stopped: `max_steps=150` reached.


Epoch 149: 100%|██████████| 1/1 [00:00<00:00,  3.81it/s, v_num=1889, train_loss_step=0.0312, train_loss_epoch=0.0312]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 87.05it/s] 

Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Processando: Híbrido (MIMO)



  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 2.5 M  | train
-------------------------------------------------------
2.5 M     Trainable params
3.5 K     Non-trainable params
2.5 M     Total params
10.120    Total estimated model params size (MB)
31        Modules in train mode
0         Modules in eval mode


Epoch 149: 100%|██████████| 1/1 [00:00<00:00,  2.64it/s, v_num=1891, train_loss_step=0.00859, train_loss_epoch=0.00859]

`Trainer.fit` stopped: `max_steps=150` reached.


Epoch 149: 100%|██████████| 1/1 [00:00<00:00,  2.56it/s, v_num=1891, train_loss_step=0.00859, train_loss_epoch=0.00859]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 62.90it/s]

Processando Datasets:  50%|█████     | 1/2 [41:03<41:03, 2463.43s/it]






Buscando dados de 'co2' via statsmodels...
-> Cópia do dataset 'co2' salva em: ./data/bronze\co2.csv
Processando: ARIMA para horizonte 12
Buscando melhor ordem ARIMA com auto_arima...
Melhor ordem encontrada: (3, 1, 2) Sazonal: (2, 0, 2, 12)


Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Processando: N-BEATS



  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 2.4 M  | train
-------------------------------------------------------
2.4 M     Trainable params
900       Non-trainable params
2.4 M     Total params
9.789     Total estimated model params size (MB)
31        Modules in train mode
0         Modules in eval mode


Epoch 149: 100%|██████████| 1/1 [00:00<00:00,  4.14it/s, v_num=1893, train_loss_step=0.137, train_loss_epoch=0.137]

`Trainer.fit` stopped: `max_steps=150` reached.


Epoch 149: 100%|██████████| 1/1 [00:00<00:00,  4.00it/s, v_num=1893, train_loss_step=0.137, train_loss_epoch=0.137]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 64.75it/s]

Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Processando: MLP



  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | mlp          | ModuleList    | 1.1 M  | train
4 | out          | Linear        | 12.3 K | train
-------------------------------------------------------
1.1 M     Trainable params
0         Non-trainable params
1.1 M     Total params
4.350     Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Epoch 149: 100%|██████████| 1/1 [00:00<00:00,  8.46it/s, v_num=1895, train_loss_step=0.138, train_loss_epoch=0.138]

`Trainer.fit` stopped: `max_steps=150` reached.


Epoch 149: 100%|██████████| 1/1 [00:00<00:00,  8.12it/s, v_num=1895, train_loss_step=0.138, train_loss_epoch=0.138]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 46.10it/s] 

Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Processando: LSTM



  | Name            | Type          | Params | Mode 
----------------------------------------------------------
0 | loss            | MAE           | 0      | train
1 | padder          | ConstantPad1d | 0      | train
2 | scaler          | TemporalNorm  | 0      | train
3 | hist_encoder    | LSTM          | 484 K  | train
4 | context_adapter | Linear        | 24.1 K | train
5 | mlp_decoder     | MLP           | 2.4 K  | train
----------------------------------------------------------
510 K     Trainable params
0         Non-trainable params
510 K     Total params
2.042     Total estimated model params size (MB)
11        Modules in train mode
0         Modules in eval mode


Epoch 149: 100%|██████████| 1/1 [00:00<00:00, 22.95it/s, v_num=1897, train_loss_step=0.198, train_loss_epoch=0.198]

`Trainer.fit` stopped: `max_steps=150` reached.


Epoch 149: 100%|██████████| 1/1 [00:00<00:00, 20.39it/s, v_num=1897, train_loss_step=0.198, train_loss_epoch=0.198]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 13.58it/s]

Seed set to 42



Processando: Autoformer


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name          | Type          | Params | Mode 
--------------------------------------------------------
0 | loss          | MAE           | 0      | train
1 | padder_train  | ConstantPad1d | 0      | train
2 | scaler        | TemporalNorm  | 0      | train
3 | decomp        | SeriesDecomp  | 0      | train
4 | enc_embedding | DataEmbedding | 384    | train
5 | dec_embedding | DataEmbedding | 384    | train
6 | encoder       | Encoder       | 148 K  | train
7 | decoder       | Decoder       | 141 K  | train
--------------------------------------------------------
290 K     Trainable params
0         Non-trainable params
290 K     Total params
1.162     Total estimated model params size (MB)
85        Modules in train mode
0         Modules in eval mode


Epoch 149: 100%|██████████| 1/1 [00:04<00:00,  0.24it/s, v_num=1899, train_loss_step=0.273, train_loss_epoch=0.273]

`Trainer.fit` stopped: `max_steps=150` reached.


Epoch 149: 100%|██████████| 1/1 [00:04<00:00,  0.23it/s, v_num=1899, train_loss_step=0.273, train_loss_epoch=0.273]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 31.08it/s]

Seed set to 42



Processando: NHITS


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 2.4 M  | train
-------------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.751     Total estimated model params size (MB)
34        Modules in train mode
0         Modules in eval mode


Epoch 149: 100%|██████████| 1/1 [00:00<00:00,  4.65it/s, v_num=1901, train_loss_step=0.149, train_loss_epoch=0.149]

`Trainer.fit` stopped: `max_steps=150` reached.


Epoch 149: 100%|██████████| 1/1 [00:00<00:00,  4.45it/s, v_num=1901, train_loss_step=0.149, train_loss_epoch=0.149]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 120.79it/s]

Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Processando: Híbrido (MIMO)



  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 2.4 M  | train
-------------------------------------------------------
2.4 M     Trainable params
900       Non-trainable params
2.4 M     Total params
9.789     Total estimated model params size (MB)
31        Modules in train mode
0         Modules in eval mode


Epoch 149: 100%|██████████| 1/1 [00:00<00:00,  3.71it/s, v_num=1903, train_loss_step=0.0439, train_loss_epoch=0.0439]

`Trainer.fit` stopped: `max_steps=150` reached.


Epoch 149: 100%|██████████| 1/1 [00:00<00:00,  3.61it/s, v_num=1903, train_loss_step=0.0439, train_loss_epoch=0.0439]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 73.06it/s] 




Buscando dados de 'co2' via statsmodels...
-> Cópia do dataset 'co2' salva em: ./data/bronze\co2.csv
Processando: ARIMA para horizonte 24
Buscando melhor ordem ARIMA com auto_arima...
Melhor ordem encontrada: (3, 1, 1) Sazonal: (2, 0, 2, 12)


In [None]:
if resultados_gerais:
    df_final = pd.concat(resultados_gerais)
    df_final.to_csv(output_file, index=False)
    print(f"\nArquivo '{output_file}' salvo com sucesso!")

In [None]:
# =========================================================
# SEÇÃO 7: GERAÇÃO DE RELATÓRIOS A PARTIR DOS ARQUIVOS SALVOS
# =========================================================

In [None]:
print("\n\n" + "="*60); print("     GERANDO SUÍTE COMPLETA DE RELATÓRIOS"); print("="*60)
try:
    df_results = pd.read_csv(output_file)
    modelos = [col for col in df_results.columns if col not in ['ds', 'y_true', 'dataset', 'horizonte']]
    
    # 1. Cálculo das Métricas a partir dos resultados brutos
    y_train_dict = {dataset: dividir_serie_temporal(carregar_serie(dataset))[0].values for dataset in df_results['dataset'].unique()}
    df_melted = df_results.melt(id_vars=['ds', 'y_true', 'dataset', 'horizonte'], value_vars=modelos, var_name='Modelo', value_name='y_pred')
    
    metricas_gerais = []
    for (dataset, horizonte, modelo), group in df_melted.groupby(['dataset', 'horizonte', 'Modelo']):
        if not group['y_pred'].isnull().all():
            metricas = calcular_metricas(group['y_true'], group['y_pred'], y_train_dict[dataset])
            metricas['dataset'], metricas['horizonte'], metricas['Modelo'] = dataset, horizonte, modelo
            metricas_gerais.append(metricas)
    
    df_metricas_final = pd.DataFrame(metricas_gerais)
    rename_dict = {'RMSE': 'Mean RMSE', 'MAPE(%)': 'Mean MAPE(%)', 'MASE': 'Mean MASE'}
    df_metricas_final.rename(columns=rename_dict, inplace=True)

    # --- Relatório 1: Evolução do Erro por Horizonte ---
    print("\n--- RELATÓRIO 1: EVOLUÇÃO DO ERRO (RMSE) POR HORIZONTE ---")
    plt.figure(figsize=(14, 8))
    sns.lineplot(data=df_metricas_final, x='horizonte', y='Mean RMSE', hue='Modelo', style='Modelo', markers=True, dashes=False)
    plt.title("Evolução do Erro (RMSE) com o Aumento do Horizonte", fontsize=16)
    plt.xlabel("Horizonte de Previsão"), plt.ylabel("RMSE Médio"), plt.grid(True)
    plt.xticks(df_metricas_final['horizonte'].unique()), plt.legend(title='Modelo'), plt.show()

    # --- Relatório 2: Desempenho Detalhado (foco no maior horizonte) ---
    print("\n--- RELATÓRIO 2: DESEMPENHO DETALHADO (HORIZONTE MAIS LONGO) ---")
    maior_horizonte = df_metricas_final['horizonte'].max()
    display(Markdown(f"A tabela a seguir mostra o desempenho detalhado para o horizonte mais longo testado: **{maior_horizonte} passos**."))
    df_detalhado = df_metricas_final[df_metricas_final['horizonte'] == maior_horizonte].set_index(['dataset', 'Modelo']).drop(columns=['horizonte'])
    display(df_detalhado.style.format('{:.3f}'))
    
    # --- Relatório 3: Ranking dos Modelos (foco no maior horizonte) ---
    print("\n--- RELATÓRIO 3: RANKING DOS MODELOS (BASEADO EM RMSE, HORIZONTE MAIS LONGO) ---")
    df_rank = df_metricas_final[df_metricas_final['horizonte'] == maior_horizonte]
    df_rank['Rank'] = df_rank.groupby('dataset')['Mean RMSE'].rank().astype(int)
    df_pivot_rank = df_rank.pivot_table(index='dataset', columns='Modelo', values='Rank')
    if len(df_pivot_rank) > 1: df_pivot_rank.loc['Média do Rank'] = df_pivot_rank.mean(axis=0)
    display(df_pivot_rank.style.format('{:.1f}').highlight_min(axis=1, props='background-color: #4285F4; color: white;'))

    # --- Relatório 4: Teste de Hipótese Diebold-Mariano (foco no maior horizonte) ---
    print("\n--- RELATÓRIO 4: TESTE DE HIPÓTESE DIEBOLD-MARIANO (p-valor, HORIZONTE MAIS LONGO) ---")
    modelo_referencia = 'Híbrido (MIMO)'
    df_teste_maior_h = df_results[df_results['horizonte'] == maior_horizonte]
    dm_results = []
    for dataset_nome, group in df_teste_maior_h.groupby('dataset'):
        if modelo_referencia in group.columns and not group[modelo_referencia].isnull().all():
            row = {'dataset': dataset_nome}
            erros_ref = group['y_true'] - group[modelo_referencia]
            for modelo_comp in [m for m in modelos if m != modelo_referencia and m in group.columns and not group[m].isnull().all()]:
                erros_comp = group['y_true'] - group[modelo_comp]
                try:
                    _, p_value = dm_test(erros_ref, erros_comp, alternative='less')
                    row[modelo_comp] = p_value
                except: row[modelo_comp] = np.nan
            dm_results.append(row)
    if dm_results:
        df_dm = pd.DataFrame(dm_results).set_index('dataset')
        display(df_dm.style.format('{:.3f}').applymap(lambda x: 'background-color: lightgreen' if pd.notna(x) and x < 0.05 else ''))

except FileNotFoundError:
    print(f"\nERRO: Arquivo '{output_file}' não encontrado.")
except Exception as e:
    print(f"Ocorreu um erro ao gerar os relatórios: {e}")