In [None]:
# 03_model_selection.ipynb

# Seleção de Modelos e Comparação com Dados Reais

import pandas as pd
import numpy as np
from statsforecast import StatsForecast
from statsforecast.models import AutoARIMA, AutoMFLES, WindowAverage, HoltWinters
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt
import joblib

# --- 1. Carregar dados e modelo AutoARIMA ---
df = pd.read_csv('jena_climate_2009_2016.csv')
df['Date Time'] = pd.to_datetime(df['Date Time'], format="%d.%m.%Y %H:%M:%S", dayfirst=True)
df = df.set_index('Date Time').resample('h').mean().interpolate().reset_index()

# --- 2. Selecionar variáveis e formatar dataset ---
selected_features = ['p (mbar)', 'Tpot (K)', 'rh (%)', 'rho (g/m**3)', 'wd (deg)']
target_variable = 'T (degC)'
df_model = df[['Date Time', target_variable] + selected_features].copy()
df_model = df_model.rename(columns={'Date Time': 'ds', target_variable: 'y'})
df_model['unique_id'] = 'jena_temp'

# --- 3. Separar treino (menos 24h finais) e teste (últimas 24h) ---
cutoff = df_model['ds'].max() - pd.Timedelta(hours=24)
df_train = df_model[df_model['ds'] <= cutoff].copy()
df_test = df_model[df_model['ds'] > cutoff].copy()

# --- 4. Carregar modelo AutoARIMA treinado ---
autoarima_model = joblib.load('data/autoarima_model.pkl')

# --- 5. Reajustar modelos incluindo AutoARIMA carregado ---
models = [
    autoarima_model,
    HoltWinters(season_length=24),
    WindowAverage(window_size=24)
]

sf = StatsForecast(models=models, freq='h', n_jobs=-1)
sf.fit(df_train[['unique_id', 'ds', 'y']])
forecast = sf.predict(h=24)

# --- 6. Calcular MAE ---
y_true = df_test['y'].values
y_preds = {col: forecast[col].values for col in forecast.columns if col not in ['unique_id', 'ds']}
mae_scores = {model: mean_absolute_error(y_true, y_pred) for model, y_pred in y_preds.items()}

print("\nMAE dos modelos nas últimas 24h:")
for model, mae in mae_scores.items():
    print(f"{model}: {mae:.2f} °C")

# --- 7. Visualização das previsões ---
plt.figure(figsize=(15, 5))
plt.plot(df_model['ds'].iloc[-100:], df_model['y'].iloc[-100:], label='Histórico')
for model in forecast.columns:
    if model not in ['unique_id', 'ds']:
        plt.plot(forecast['ds'], forecast[model], label=model)
plt.title('Comparação de Modelos com Dados Reais')
plt.xlabel('Data')
plt.ylabel('Temperatura (°C)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
