In [1]:
# ===================================================================
# PASSO 1: IMPORTAÇÕES E CONFIGURAÇÃO
# ===================================================================
import pandas as pd
import statsmodels.api as sm
import os
import pickle
import numpy as np
from abc import ABC, abstractmethod
import warnings
from tqdm.notebook import tqdm
import shutil
import matplotlib.pyplot as plt

# Ignorar avisos para uma saída mais limpa
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

# Importações do Darts
from darts import TimeSeries
from darts.models import NBEATSModel, RNNModel
from darts.dataprocessing.transformers import MissingValuesFiller, Scaler
from darts.metrics import mape, mase

# Importações de modelos e utilidades
from pmdarima import auto_arima
from sklearn.preprocessing import MinMaxScaler
from sklearn.neural_network import MLPRegressor

print("Todas as bibliotecas foram importadas.")

Todas as bibliotecas foram importadas.


In [2]:
# ===================================================================
# PASSO 2: CLASSE DATALOADER
# ===================================================================
class DataLoader:
    def __init__(self, base_path='datasets/'):
        self.base_path = base_path
        os.makedirs(self.base_path, exist_ok=True)
    def load_classic_ts_dataset(self, dataset_name: str) -> pd.Series:
        local_path = os.path.join(self.base_path, f"{dataset_name}.csv")
        if os.path.exists(local_path):
            return pd.read_csv(local_path, index_col=0, parse_dates=True).squeeze()
        print(f"Carregando o dataset '{dataset_name}' da biblioteca e salvando localmente...")
        try:
            if dataset_name == 'AirPassengers':
                df = sm.datasets.get_rdataset("AirPassengers").data
                series = pd.Series(df['value'].values, index=pd.date_range(start='1949-01-01', periods=len(df), freq='MS'), name="AirPassengers")
            elif dataset_name == 'co2':
                data = sm.datasets.co2.load_pandas().data
                series = data['co2'].resample('W').mean().ffill().rename("CO2")
            elif dataset_name == 'nottem':
                df = sm.datasets.get_rdataset("nottem").data
                series = pd.Series(df['value'].values, index=pd.date_range(start='1920-01-01', periods=len(df), freq='MS'), name="NottinghamTemp")
            elif dataset_name == 'JohnsonJohnson':
                df = sm.datasets.get_rdataset("JohnsonJohnson").data
                series = pd.Series(df['value'].values, index=pd.date_range(start='1960-01-01', periods=len(df), freq='QE'), name="JohnsonJohnson")
            elif dataset_name == 'UKgas':
                df = sm.datasets.get_rdataset("UKgas").data
                series = pd.Series(df['value'].values, index=pd.date_range(start='1960-01-01', periods=len(df), freq='QE'), name="UKGas")
            elif dataset_name == 'Sunspots':
                df = sm.datasets.sunspots.load_pandas().data
                series = pd.Series(df['SUNACTIVITY'].values, index=pd.to_datetime(df['YEAR'], format='%Y'), name="Sunspots")
            elif dataset_name == 'Nile':
                df = sm.datasets.nile.load_pandas().data.reset_index()
                series = pd.Series(df['volume'].values, index=pd.to_datetime(df['year'], format='%Y'), name="Nile")
            elif dataset_name == 'ukdriverdeaths':
                df = sm.datasets.get_rdataset("UKDriverDeaths").data
                series = pd.Series(df['value'].values, index=pd.date_range(start='1969-01-01', periods=len(df), freq='MS'), name="UKDriverDeaths")
            else:
                raise ValueError(f"Dataset '{dataset_name}' não reconhecido.")
            series.to_csv(local_path)
            return series
        except Exception as e:
            print(f"Erro ao carregar o dataset '{dataset_name}': {e}"); return None


In [3]:
# ===================================================================
# PASSO 3: DEFINIÇÃO DO FRAMEWORK HÍBRIDO (COM CORREÇÃO FINAL)
# ===================================================================
class BaseModel(ABC):
    def __init__(self, name: str): self.name = name
    @abstractmethod
    def fit(self, train_series: TimeSeries, forecast_horizon: int): pass
    @abstractmethod
    def predict(self, n: int) -> TimeSeries: pass
    def __str__(self): return self.name

def _get_safe_pandas_series(darts_series: TimeSeries) -> pd.Series:
    return pd.Series(darts_series.values().flatten(), index=darts_series.time_index)

class HybridForecastingFramework(BaseModel):
    def __init__(self, non_linear_model_class, strategy='mimo', model_name=None, **kwargs):
        self.non_linear_model_class = non_linear_model_class
        self.strategy = strategy
        self.non_linear_params = kwargs
        name = model_name or f"ARIMA-{non_linear_model_class.__name__}-{strategy.upper()}"
        super().__init__(name)
        self.arima_model, self.non_linear_model, self.residuals_train = None, None, None
        self.scaler = Scaler(MinMaxScaler(feature_range=(-1, 1)))

    def fit(self, train_series: TimeSeries, forecast_horizon: int):
        # 1. Treinamento do Componente Linear
        self.arima_model = auto_arima(_get_safe_pandas_series(train_series), seasonal=False, stepwise=True, suppress_warnings=True)
        
        # 2. **CORREÇÃO DEFINITIVA:** Obter resíduos com o método .resid()
        residuals_pd = self.arima_model.resid()
        self.residuals_train = TimeSeries.from_series(residuals_pd)
        
        residuals_scaled = self.scaler.fit_transform(self.residuals_train)

        # 3. Treinamento do Componente Não-Linear
        if self.strategy == 'direct':
            self.non_linear_model = {}
            for h in range(1, forecast_horizon + 1):
                expert_params = self.non_linear_params.copy(); expert_params['output_chunk_length'] = h
                expert = self.non_linear_model_class(**expert_params)
                expert.fit(residuals_scaled)
                self.non_linear_model[h] = expert
        elif self.strategy in ['mimo', 'recursive']:
            self.non_linear_params['output_chunk_length'] = forecast_horizon if self.strategy == 'mimo' else 1
            self.non_linear_model = self.non_linear_model_class(**self.non_linear_params)
            self.non_linear_model.fit(residuals_scaled)

    def predict(self, n: int) -> TimeSeries:
        arima_forecast = TimeSeries.from_series(self.arima_model.predict(n_periods=n))
        residuals_scaled = self.scaler.transform(self.residuals_train)
        
        if self.strategy == 'direct':
            forecasts_np = np.zeros(n)
            for h in range(1, n + 1):
                pred_h = self.non_linear_model[h].predict(n=h, series=residuals_scaled)
                forecasts_np[h-1] = pred_h.values().flatten()[-1]
            pred_ts_scaled = TimeSeries.from_times_and_values(arima_forecast.time_index, forecasts_np)
        else:
            pred_ts_scaled = self.non_linear_model.predict(n=n, series=residuals_scaled)
            
        residual_forecast = self.scaler.inverse_transform(pred_ts_scaled)
        return arima_forecast + residual_forecast

In [4]:
# Modelos Puros (Baselines)
class PureARIMA(BaseModel):
    def __init__(self, name="PureARIMA"): super().__init__(name); self.model = None
    def fit(self, train_series: TimeSeries, forecast_horizon: int):
        self.model = auto_arima(_get_safe_pandas_series(train_series), seasonal=True, m=12, stepwise=True, suppress_warnings=True)
    def predict(self, n: int) -> TimeSeries: return TimeSeries.from_series(self.model.predict(n_periods=n))

class PureLSTM(BaseModel):
    def __init__(self, name="PureLSTM", n_lags=24, n_epochs=100):
        super().__init__(name); self.n_lags, self.n_epochs = n_lags, n_epochs
        self.scaler = Scaler(MinMaxScaler(feature_range=(0, 1))); self.model = None
    def fit(self, train_series: TimeSeries, forecast_horizon: int):
        train_scaled = self.scaler.fit_transform(train_series)
        self.model = RNNModel(model='LSTM', input_chunk_length=self.n_lags, output_chunk_length=1, n_epochs=self.n_epochs, random_state=42)
        self.model.fit(train_scaled)
    def predict(self, n: int) -> TimeSeries:
        prediction_scaled = self.model.predict(n=n)
        return self.scaler.inverse_transform(prediction_scaled)

class PureNBEATS(BaseModel):
    def __init__(self, name="PureNBEATS", n_lags=24, n_epochs=100):
        super().__init__(name); self.n_lags, self.n_epochs = n_lags, n_epochs
        self.scaler = Scaler(MinMaxScaler(feature_range=(0, 1))); self.model = None
    def fit(self, train_series: TimeSeries, forecast_horizon: int):
        train_scaled = self.scaler.fit_transform(train_series)
        self.model = NBEATSModel(input_chunk_length=self.n_lags, output_chunk_length=forecast_horizon, n_epochs=self.n_epochs, random_state=42)
        self.model.fit(train_scaled)
    def predict(self, n: int) -> TimeSeries: return self.model.predict(n=n)

In [5]:
# ===================================================================
# PASSO 4: CLASSE EXPERIMENT RUNNER
# ===================================================================
class ExperimentRunner:
    # ... (código completo da classe ExperimentRunner, sem alterações) ...
    def __init__(self, datasets: dict, models: list, forecast_horizon: int):
        self.datasets, self.models, self.forecast_horizon = datasets, models, forecast_horizon
    def run(self):
        if os.path.exists('results/'): shutil.rmtree('results/')
        os.makedirs('results/predictions', exist_ok=True); os.makedirs('results/metrics', exist_ok=True)
        for ds_name, series in tqdm(self.datasets.items(), desc="Processando Datasets"):
            train, test = series[:-self.forecast_horizon], series[-self.forecast_horizon:]
            for model in tqdm(self.models, desc=f"Modelos para {ds_name}", leave=False):
                try:
                    model.fit(train, self.forecast_horizon)
                    prediction = model.predict(self.forecast_horizon)
                    pred_df = prediction.to_series().to_frame(name='prediction')
                    pred_df.to_csv(f'results/predictions/{ds_name}_{model}.csv')
                    mape_score = mape(test, prediction)
                    mase_score = mase(test, prediction, train)
                    metrics = {'MAPE': mape_score, 'MASE': mase_score}
                    with open(f'results/metrics/{ds_name}_{model}.pkl', 'wb') as f: pickle.dump(metrics, f)
                    print(f"Resultados para {model}: MAPE={mape_score:.2f}%, MASE={mase_score:.3f}")
                except Exception as e:
                    print(f"ERRO ao processar o modelo {model} no dataset {ds_name}: {e}")

In [None]:
# ===================================================================
# PASSO 5: EXECUÇÃO DO EXPERIMENTO
# ===================================================================
# --- Carregando os Dados ---
# LISTA_DE_DATASETS = ['AirPassengers', 'Nile', 'UKgas', 'Sunspots', 'ukdriverdeaths']
LISTA_DE_DATASETS = ['AirPassengers']
data_loader = DataLoader()
datasets_darts = {}
filler = MissingValuesFiller()
for name in LISTA_DE_DATASETS:
    pd_series = data_loader.load_classic_ts_dataset(name)
    if pd_series is not None:
        datasets_darts[name] = filler.transform(TimeSeries.from_series(pd_series, fill_missing_dates=True, freq=None))

# --- Definindo a Lista COMPLETA de Modelos para o Experimento ---
models_to_run = [
    HybridForecastingFramework(non_linear_model_class=NBEATSModel, strategy='direct', model_name="HyS-MF", input_chunk_length=24, n_epochs=100, random_state=42),
    HybridForecastingFramework(non_linear_model_class=NBEATSModel, strategy='mimo', model_name="HyS-MF_MIMO", input_chunk_length=24, n_epochs=100, random_state=42),
    HybridForecastingFramework(non_linear_model_class=RNNModel, strategy='recursive', model_name="ARIMA-LSTM", model='LSTM', input_chunk_length=24, n_epochs=100, random_state=42),
    PureARIMA(), 
    PureLSTM(n_lags=24, n_epochs=100),
    PureNBEATS(n_lags=24, n_epochs=100)
]

# --- Executando o motor de experimentos ---
FORECAST_HORIZON = 12
runner = ExperimentRunner(datasets_darts, models_to_run, FORECAST_HORIZON)

print("\n\n--- INICIANDO A EXECUÇÃO DOS EXPERIMENTOS COM O NOVO FRAMEWORK ---")
runner.run()
print("\n\n--- TODOS OS EXPERIMENTOS FORAM CONCLUÍDOS ---")



--- INICIANDO A EXECUÇÃO DOS EXPERIMENTOS COM O NOVO FRAMEWORK ---


Processando Datasets:   0%|          | 0/1 [00:00<?, ?it/s]

Modelos para AirPassengers:   0%|          | 0/6 [00:00<?, ?it/s]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name            | Type             | Params | Mode 
-------------------------------------------------------------
0 | criterion       | MSELoss          | 0      | train
1 | train_criterion | MSELoss          | 0      | train
2 | val_criterion   | MSELoss          | 0      | train
3 | train_metrics   | MetricCollection | 0      | train
4 | val_metrics     | MetricCollection | 0      | train
5 | stacks          | ModuleList       | 6.2 M  | train
-------------------------------------------------------------
6.2 M     Trainable params
1.4 K     Non-trainable params
6.2 M     Total params
24.780    Total estimated model params size (MB)
396       Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=100` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name            | Type             | Params | Mode 
-------------------------------------------------------------
0 | criterion       | MSELoss          | 0      | train
1 | train_criterion | MSELoss          | 0      | train
2 | val_criterion   | MSELoss          | 0      | train
3 | train_metrics   | MetricCollection | 0      | train
4 | val_metrics     | MetricCollection | 0      | train
5 | stacks          | ModuleList       | 6.2 M  | train
-------------------------------------------------------------
6.2 M     Trainable params
1.4 K     Non-trainable params
6.2 M     Total params
24.780    Total estimated model params size (MB)
396       Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=100` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name            | Type             | Params | Mode 
-------------------------------------------------------------
0 | criterion       | MSELoss          | 0      | train
1 | train_criterion | MSELoss          | 0      | train
2 | val_criterion   | MSELoss          | 0      | train
3 | train_metrics   | MetricCollection | 0      | train
4 | val_metrics     | MetricCollection | 0      | train
5 | stacks          | ModuleList       | 6.2 M  | train
-------------------------------------------------------------
6.2 M     Trainable params
1.4 K     Non-trainable params
6.2 M     Total params
24.781    Total estimated model params size (MB)
396       Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=100` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name            | Type             | Params | Mode 
-------------------------------------------------------------
0 | criterion       | MSELoss          | 0      | train
1 | train_criterion | MSELoss          | 0      | train
2 | val_criterion   | MSELoss          | 0      | train
3 | train_metrics   | MetricCollection | 0      | train
4 | val_metrics     | MetricCollection | 0      | train
5 | stacks          | ModuleList       | 6.2 M  | train
-------------------------------------------------------------
6.2 M     Trainable params
1.4 K     Non-trainable params
6.2 M     Total params
24.782    Total estimated model params size (MB)
396       Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=100` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name            | Type             | Params | Mode 
-------------------------------------------------------------
0 | criterion       | MSELoss          | 0      | train
1 | train_criterion | MSELoss          | 0      | train
2 | val_criterion   | MSELoss          | 0      | train
3 | train_metrics   | MetricCollection | 0      | train
4 | val_metrics     | MetricCollection | 0      | train
5 | stacks          | ModuleList       | 6.2 M  | train
-------------------------------------------------------------
6.2 M     Trainable params
1.4 K     Non-trainable params
6.2 M     Total params
24.782    Total estimated model params size (MB)
396       Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=100` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name            | Type             | Params | Mode 
-------------------------------------------------------------
0 | criterion       | MSELoss          | 0      | train
1 | train_criterion | MSELoss          | 0      | train
2 | val_criterion   | MSELoss          | 0      | train
3 | train_metrics   | MetricCollection | 0      | train
4 | val_metrics     | MetricCollection | 0      | train
5 | stacks          | ModuleList       | 6.2 M  | train
-------------------------------------------------------------
6.2 M     Trainable params
1.4 K     Non-trainable params
6.2 M     Total params
24.783    Total estimated model params size (MB)
396       Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=100` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name            | Type             | Params | Mode 
-------------------------------------------------------------
0 | criterion       | MSELoss          | 0      | train
1 | train_criterion | MSELoss          | 0      | train
2 | val_criterion   | MSELoss          | 0      | train
3 | train_metrics   | MetricCollection | 0      | train
4 | val_metrics     | MetricCollection | 0      | train
5 | stacks          | ModuleList       | 6.2 M  | train
-------------------------------------------------------------
6.2 M     Trainable params
1.4 K     Non-trainable params
6.2 M     Total params
24.784    Total estimated model params size (MB)
396       Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]