In [None]:
# ===================================================================
# PASSO 1: IMPORTAÇÕES E FUNÇÕES AUXILIARES
# ===================================================================
import os
import pickle
import numpy as np
import pandas as pd
import statsmodels.api as sm
from abc import ABC, abstractmethod
from tqdm.notebook import tqdm

# Importações do Darts
from darts import TimeSeries
from darts.models import NBEATSModel, RNNModel, LinearRegressionModel
from darts.dataprocessing.transformers import MissingValuesFiller, Scaler
from darts.metrics import mape, mase

# Importações de modelos e utilidades
from pmdarima import auto_arima
from sklearn.preprocessing import MinMaxScaler
from sklearn.neural_network import MLPRegressor

# Ignorar avisos para uma saída mais limpa
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)
print("Todas as bibliotecas foram importadas.")

In [None]:
# ===================================================================
# PASSO 2: CLASSE DATALOADER
# ===================================================================
class DataLoader:
    def __init__(self, base_path='datasets/'):
        self.base_path = base_path
        os.makedirs(self.base_path, exist_ok=True)

    def load_classic_ts_dataset(self, dataset_name: str) -> pd.Series:
        local_path = os.path.join(self.base_path, f"{dataset_name}.csv")
        if os.path.exists(local_path):
            return pd.read_csv(local_path, index_col=0, parse_dates=True).squeeze()
        print(
            f"Carregando o dataset '{dataset_name}' da biblioteca e salvando localmente...")
        try:
            if dataset_name == 'AirPassengers':
                df = sm.datasets.get_rdataset("AirPassengers").data
                series = pd.Series(df['value'].values, index=pd.date_range(
                    start='1949-01-01', periods=len(df), freq='MS'), name="AirPassengers")
            elif dataset_name == 'co2':
                data = sm.datasets.co2.load_pandas().data
                series = data['co2'].resample('W').mean().ffill().rename("CO2")
            elif dataset_name == 'nottem':
                df = sm.datasets.get_rdataset("nottem").data
                series = pd.Series(df['value'].values, index=pd.date_range(
                    start='1920-01-01', periods=len(df), freq='MS'), name="NottinghamTemp")
            elif dataset_name == 'JohnsonJohnson':
                df = sm.datasets.get_rdataset("JohnsonJohnson").data
                series = pd.Series(df['value'].values, index=pd.date_range(
                    start='1960-01-01', periods=len(df), freq='QE'), name="JohnsonJohnson")
            elif dataset_name == 'UKgas':
                df = sm.datasets.get_rdataset("UKgas").data
                series = pd.Series(df['value'].values, index=pd.date_range(
                    start='1960-01-01', periods=len(df), freq='QE'), name="UKGas")
            elif dataset_name == 'Sunspots':
                df = sm.datasets.sunspots.load_pandas().data
                series = pd.Series(df['SUNACTIVITY'].values, index=pd.to_datetime(
                    df['YEAR'], format='%Y'), name="Sunspots")
            elif dataset_name == 'Nile':
                df = sm.datasets.nile.load_pandas().data.reset_index()
                series = pd.Series(df['volume'].values, index=pd.to_datetime(
                    df['year'], format='%Y'), name="Nile")
            elif dataset_name == 'ukdriverdeaths':
                df = sm.datasets.get_rdataset("UKDriverDeaths").data
                series = pd.Series(df['value'].values, index=pd.date_range(
                    start='1969-01-01', periods=len(df), freq='MS'), name="UKDriverDeaths")
            else:
                raise ValueError(f"Dataset '{dataset_name}' não reconhecido.")
            series.to_csv(local_path)
            return series
        except Exception as e:
            print(f"Erro ao carregar o dataset '{dataset_name}': {e}")
            return None

In [None]:
# ===================================================================
# PASSO 3: DEFINIÇÃO DAS CLASSES DE MODELO E FUNÇÕES AUXILIARES
# ===================================================================

In [None]:
def create_sliding_window_dataset(data, n_in=1, n_out=1):
    X, y = [], []
    for i in range(len(data)):
        end_ix = i + n_in
        out_end_ix = end_ix + n_out
        if out_end_ix > len(data):
            break
        X.append(data[i:end_ix])
        y.append(data[end_ix:out_end_ix])
    return np.array(X), np.array(y)


def get_safe_pandas_series(darts_series: TimeSeries) -> pd.Series:
    """Função auxiliar para converter TimeSeries para pd.Series de forma robusta."""
    return pd.Series(darts_series.values().flatten(), index=darts_series.time_index)

In [None]:
# ===================================================================
# PASSO 4: CLASSE BASE
# ===================================================================

In [None]:
class BaseModel(ABC):
    def __init__(self, name: str): self.name = name
    @abstractmethod
    def fit(self, train_series: TimeSeries, forecast_horizon: int): pass
    @abstractmethod
    def predict(self, n: int) -> TimeSeries: pass
    def __str__(self): return self.name

In [None]:
# ===================================================================
# MODELO ORIGINAL DO ARTIGO (RECURSIVO-DIRETO)
# ===================================================================
class HySMF(BaseModel):
    def __init__(self, name="HyS-MF", input_chunk_length=24, n_epochs=50):
        super().__init__(name)
        self.input_chunk_length, self.n_epochs = input_chunk_length, n_epochs
        self.arima_model, self.nbeats_experts, self.residuals_train = None, {}, None
        self.scaler = Scaler(MinMaxScaler(feature_range=(-1, 1)))

    def fit(self, train_series: TimeSeries, forecast_horizon: int):
        self.arima_model = auto_arima(get_safe_pandas_series(
            train_series), seasonal=False, stepwise=True, suppress_warnings=True)
        in_sample_preds = self.arima_model.predict_in_sample()
        self.residuals_train = train_series - \
            TimeSeries.from_series(
                pd.Series(in_sample_preds, index=train_series.time_index))
        residuals_scaled = self.scaler.fit_transform(self.residuals_train)
        for h in range(1, forecast_horizon + 1):
            expert = NBEATSModel(input_chunk_length=self.input_chunk_length,
                                 output_chunk_length=h, n_epochs=self.n_epochs, random_state=42)
            expert.fit(residuals_scaled)
            self.nbeats_experts[h] = expert

    def predict(self, n: int) -> TimeSeries:
        arima_forecast = TimeSeries.from_series(
            self.arima_model.predict(n_periods=n))
        residual_forecasts_np = np.zeros(n)
        residuals_scaled = self.scaler.transform(self.residuals_train)
        for h in range(1, n + 1):
            pred_h = self.nbeats_experts[h].predict(
                n=h, series=residuals_scaled)
            residual_forecasts_np[h-1] = pred_h.values().flatten()[-1]
        residual_forecasts_ts = TimeSeries.from_times_and_values(
            times=arima_forecast.time_index, values=residual_forecasts_np, columns=arima_forecast.columns)
        residual_forecasts_descaled = self.scaler.inverse_transform(
            residual_forecasts_ts)
        return arima_forecast + residual_forecasts_descaled

In [None]:
# ===================================================================
# VARIAÇÃO 1 (RECURSIVO-MIMO)
# ===================================================================
class HyS_MF_MIMO(BaseModel):
    def __init__(self, name="HyS-MF_MIMO", n_lags=24, n_epochs=50):
        super().__init__(name)
        self.n_lags, self.n_epochs = n_lags, n_epochs
        self.arima_model, self.nbeats_model, self.residuals_train = None, None, None
        self.scaler = Scaler(MinMaxScaler(feature_range=(0, 1)))

    def fit(self, train_series: TimeSeries, forecast_horizon: int):
        self.arima_model = auto_arima(get_safe_pandas_series(
            train_series), seasonal=False, stepwise=True, suppress_warnings=True)
        residuals = self.arima_model.resid()
        self.residuals_train = TimeSeries.from_series(residuals)
        residuals_scaled = self.scaler.fit_transform(self.residuals_train)
        self.nbeats_model = NBEATSModel(
            input_chunk_length=self.n_lags, output_chunk_length=forecast_horizon, n_epochs=self.n_epochs, random_state=42)
        self.nbeats_model.fit(residuals_scaled)

    def predict(self, n: int) -> TimeSeries:
        arima_forecast = TimeSeries.from_series(
            self.arima_model.predict(n_periods=n))
        residual_forecast_scaled = self.nbeats_model.predict(
            n=n, series=self.scaler.transform(self.residuals_train))
        residual_forecast = self.scaler.inverse_transform(
            residual_forecast_scaled)
        return arima_forecast + residual_forecast

In [None]:
# ===================================================================
# VARIAÇÃO 2 (DIRETO-DIRETO) - SUA CONTRIBUIÇÃO ORIGINAL
# ===================================================================
# class HyS_MF_Direto(BaseModel):
#     """
#     Versão do HyS-MF com estratégia Direto-Direto.
#     """

#     def __init__(self, name="HyS-MF-Direto", lags=24, n_epochs=50):
#         super().__init__(name)
#         self.lags, self.n_epochs = lags, n_epochs
#         self.linear_models, self.nbeats_experts, self.scalers = {}, {}, {}
#         self.residuals_train_dict = {}

#     def fit(self, train_series: TimeSeries, forecast_horizon: int):
#         for h in range(1, forecast_horizon + 1):
#             linear_model = LinearRegressionModel(
#                 lags=self.lags, output_chunk_length=h)
#             linear_model.fit(train_series)
#             self.linear_models[h] = linear_model

#             in_sample_preds = linear_model.predict(n=len(train_series))
#             residuals_h = train_series - in_sample_preds
#             self.residuals_train_dict[h] = residuals_h

#             scaler = Scaler(MinMaxScaler(feature_range=(-1, 1)))
#             residuals_scaled = scaler.fit_transform(residuals_h)
#             self.scalers[h] = scaler

#             nbeats_expert = NBEATSModel(
#                 input_chunk_length=self.lags, output_chunk_length=h, n_epochs=self.n_epochs, random_state=42)
#             nbeats_expert.fit(residuals_scaled)
#             self.nbeats_experts[h] = nbeats_expert

#     def predict(self, n: int) -> TimeSeries:
#         final_forecast_values = np.zeros(n)

#         for h in range(1, n + 1):
#             linear_pred = self.linear_models[h].predict(
#                 n=h).values().flatten()[-1]

#             # O modelo N-BEATS usa a série de resíduos na qual foi treinado para prever
#             residual_train_h = self.residuals_train_dict[h]
#             residuals_pred_scaled = self.nbeats_experts[h].predict(
#                 n=h, series=residual_train_h)

#             residuals_pred_descaled = self.scalers[h].inverse_transform(
#                 residuals_pred_scaled).values().flatten()[-1]

#             final_forecast_values[h-1] = linear_pred + residuals_pred_descaled

#         start_date = self.residuals_train_dict[1].end_time(
#         ) + self.residuals_train_dict[1].freq
#         final_index = pd.date_range(
#             start=start_date, periods=n, freq=self.residuals_train_dict[1].freq)

#         return TimeSeries.from_times_and_values(times=final_index, values=final_forecast_values)

In [None]:
# ===================================================================
# MODELOS DE COMPARAÇÃO E BASELINES
# ===================================================================
class PureARIMA(BaseModel):
    def __init__(self, name="PureARIMA"): super().__init__(
        name); self.model = None

    def fit(self, train_series: TimeSeries, forecast_horizon: int):
        self.model = auto_arima(get_safe_pandas_series(
            train_series), seasonal=True, m=12, stepwise=True, suppress_warnings=True)

    def predict(
        self, n: int) -> TimeSeries: return TimeSeries.from_series(self.model.predict(n_periods=n))


class ARIMA_MLP(BaseModel):
    def __init__(self, name="ARIMA-MLP", n_lags=12, max_iter=500):
        super().__init__(name)
        self.n_lags, self.max_iter = n_lags, max_iter
        self.arima_model, self.mlp_model, self.last_residuals = None, None, None

    def fit(self, train_series: TimeSeries, forecast_horizon: int):
        self.arima_model = auto_arima(get_safe_pandas_series(
            train_series), seasonal=False, stepwise=True, suppress_warnings=True)
        residuals = self.arima_model.resid()
        self.last_residuals = residuals[-self.n_lags:]
        X, y = create_sliding_window_dataset(
            residuals, self.n_lags, forecast_horizon)
        self.mlp_model = MLPRegressor(hidden_layer_sizes=(
            20,), max_iter=self.max_iter, random_state=42)
        self.mlp_model.fit(X, y)

    def predict(self, n: int) -> TimeSeries:
        arima_forecast = TimeSeries.from_series(
            self.arima_model.predict(n_periods=n))
        input_residuals = self.last_residuals.values.reshape(1, -1)
        residual_forecast_values = self.mlp_model.predict(
            input_residuals).flatten()
        residual_forecast = TimeSeries.from_times_and_values(
            times=arima_forecast.time_index, values=residual_forecast_values, columns=arima_forecast.columns)
        return arima_forecast + residual_forecast


class ARIMA_LSTM(BaseModel):
    def __init__(self, name="ARIMA-LSTM", n_lags=24, n_epochs=100):
        super().__init__(name)
        self.n_lags, self.n_epochs = n_lags, n_epochs
        self.arima_model, self.lstm_model, self.residuals_train = None, None, None
        self.scaler = Scaler(MinMaxScaler(feature_range=(0, 1)))

    def fit(self, train_series: TimeSeries, forecast_horizon: int):
        self.arima_model = auto_arima(get_safe_pandas_series(
            train_series), seasonal=False, stepwise=True, suppress_warnings=True)
        residuals = self.arima_model.resid()
        self.residuals_train = TimeSeries.from_series(residuals)
        residuals_scaled = self.scaler.fit_transform(self.residuals_train)
        self.lstm_model = RNNModel(model='LSTM', input_chunk_length=self.n_lags,
                                   output_chunk_length=1, n_epochs=self.n_epochs, random_state=42)
        self.lstm_model.fit(residuals_scaled)

    def predict(self, n: int) -> TimeSeries:
        arima_forecast = TimeSeries.from_series(
            self.arima_model.predict(n_periods=n))
        residual_forecast_scaled = self.lstm_model.predict(
            n=n, series=self.scaler.transform(self.residuals_train))
        residual_forecast = self.scaler.inverse_transform(
            residual_forecast_scaled)
        return arima_forecast + residual_forecast


class PureLSTM(BaseModel):

    def __init__(self, name="PureLSTM", n_lags=24, n_epochs=100):
        super().__init__(name)
        self.n_lags, self.n_epochs = n_lags, n_epochs
        self.scaler = Scaler(MinMaxScaler(feature_range=(0, 1)))
        self.model = None

    def fit(self, train_series: TimeSeries, forecast_horizon: int):
        train_scaled = self.scaler.fit_transform(train_series)

        self.model = RNNModel(
            model='LSTM',
            input_chunk_length=self.n_lags,
            output_chunk_length=1,  # Corrigido para 1
            n_epochs=self.n_epochs,
            random_state=42
        )
        self.model.fit(train_scaled)

    def predict(self, n: int) -> TimeSeries:
        # 1. Prever na escala normalizada
        prediction_scaled = self.model.predict(n=n)
        
        # 2. Reverter para a escala original antes de retornar
        return self.scaler.inverse_transform(prediction_scaled)


class PureNBEATS(BaseModel):
    def __init__(self, name="PureNBEATS", n_lags=24, n_epochs=50):
        super().__init__(name)
        self.n_lags, self.n_epochs = n_lags, n_epochs
        self.scaler = Scaler(MinMaxScaler(feature_range=(0, 1)))
        self.model = None

    def fit(self, train_series: TimeSeries, forecast_horizon: int):
        train_scaled = self.scaler.fit_transform(train_series)
        self.model = NBEATSModel(input_chunk_length=self.n_lags,
                                 output_chunk_length=forecast_horizon, n_epochs=self.n_epochs, random_state=42)
        self.model.fit(train_scaled)

    def predict(self, n: int) -> TimeSeries:
        # 1. Prever na escala normalizada
        prediction_scaled = self.model.predict(n=n)
        
        # 2. Reverter para a escala original antes de retornar
        return self.scaler.inverse_transform(prediction_scaled)

In [None]:
# ===================================================================
# PASSO 4: CLASSE EXPERIMENT RUNNER
# ===================================================================
class ExperimentRunner:
    def __init__(self, datasets: dict, models: list, forecast_horizon: int):
        self.datasets, self.models, self.forecast_horizon = datasets, models, forecast_horizon

    def run(self):
        os.makedirs('results/predictions', exist_ok=True)
        os.makedirs('results/metrics', exist_ok=True)
        for ds_name, series in tqdm(self.datasets.items(), desc="Processando Datasets"):
            train, test = series[:-
                                 self.forecast_horizon], series[-self.forecast_horizon:]
            for model in tqdm(self.models, desc=f"Modelos para {ds_name}", leave=False):
                try:
                    model.fit(train, self.forecast_horizon)
                    prediction = model.predict(self.forecast_horizon)
                    pred_df = prediction.to_series().to_frame(name='prediction')
                    pred_df.to_csv(
                        f'results/predictions/{ds_name}_{model.name}.csv')
                    mape_score = mape(test, prediction)
                    mase_score = mase(test, prediction, train)
                    metrics = {'MAPE': mape_score, 'MASE': mase_score}
                    with open(f'results/metrics/{ds_name}_{model.name}.pkl', 'wb') as f:
                        pickle.dump(metrics, f)
                    print(
                        f"Resultados para {model.name} em {ds_name}: MAPE={mape_score:.2f}%, MASE={mase_score:.3f}")
                except Exception as e:
                    print(
                        f"ERRO ao processar o modelo {model.name} no dataset {ds_name}: {e}")

In [None]:
# ===================================================================
# PASSO 5: EXECUÇÃO DO EXPERIMENTO
# ===================================================================
LISTA_DE_DATASETS = ['AirPassengers', 'co2', 'UKgas', 'Sunspots','ukdriverdeaths']
data_loader = DataLoader()
datasets_darts = {}
filler = MissingValuesFiller()
for name in LISTA_DE_DATASETS:
    pd_series = data_loader.load_classic_ts_dataset(name)
    if pd_series is not None:
        series_darts = TimeSeries.from_series(
            pd_series, fill_missing_dates=True, freq=None)
        series_darts = filler.transform(series_darts)
        datasets_darts[name] = series_darts

models_to_run = [
    # --- MODELO PRINCIPAL E VARIAÇÕES ---
    HySMF(),               # Modelo Original do Artigo (ARIMA: Recursivo / N-BEATS: Direto)
    HyS_MF_MIMO(name="HyS-MF_MIMO"), # Variação 1 (ARIMA: Recursivo / N-BEATS: MIMO)

    # --- HÍBRIDOS DE COMPARAÇÃO DA LITERATURA ---
    ARIMA_MLP(),           # Híbrido (ARIMA: Recursivo / MLP: MIMO)
    ARIMA_LSTM(),          # Híbrido (ARIMA: Recursivo / LSTM: Recursivo, devido à biblioteca)

    # --- BASELINES PUROS ---
    PureARIMA(),           # Baseline Linear (Estratégia Recursiva)
    PureLSTM(),            # Baseline Não-Linear (Estratégia Recursiva, devido à biblioteca)
    PureNBEATS()           # Baseline Não-Linear (Estratégia MIMO)
]

FORECAST_HORIZON = 10
runner = ExperimentRunner(datasets_darts, models_to_run, FORECAST_HORIZON)

print("\n\n--- INICIANDO A EXECUÇÃO DOS EXPERIMENTOS ---")
runner.run()
print("\n\n--- TODOS OS EXPERIMENTOS FORAM CONCLUÍDOS ---")