In [None]:
# 02_model_baselines.ipynb

# Modelos Base: Previsão de Temperatura com AutoARIMA, HoltWinters e WindowAverage

import pandas as pd
import numpy as np
from statsforecast import StatsForecast
from statsforecast.models import AutoARIMA, HoltWinters, WindowAverage
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error
import joblib
import os

# --- 1. Carregar e preparar dados ---
df = pd.read_csv('jena_climate_2009_2016.csv')
df['Date Time'] = pd.to_datetime(df['Date Time'], format="%d.%m.%Y %H:%M:%S", dayfirst=True)
df = df.set_index('Date Time').resample('h').mean().interpolate().reset_index()

# --- 2. Filtrar últimos 3 anos ---
cut_date = df['Date Time'].max() - pd.DateOffset(years=3)
df = df[df['Date Time'] >= cut_date]

# --- 3. Selecionar variáveis com base na clusterização DTW ---
selected_features = ['p (mbar)', 'Tpot (K)', 'rh (%)', 'rho (g/m**3)', 'wd (deg)']
target_variable = 'T (degC)'

# --- 4. Preparar dataframe para StatsForecast ---
df_model = df[['Date Time', target_variable] + selected_features].copy()
df_model = df_model.rename(columns={'Date Time': 'ds', target_variable: 'y'})
df_model['unique_id'] = 'jena_temp'

# --- 5. Separar treino (menos as últimas 24h) e teste (últimas 24h) ---
cutoff = df_model['ds'].max() - pd.Timedelta(hours=24)
df_train = df_model[df_model['ds'] <= cutoff].copy()
df_test = df_model[df_model['ds'] > cutoff].copy()

# --- 6. Instanciar e ajustar modelos ---
models = [
    AutoARIMA(max_p=2, max_q=2, max_d=2, seasonal=True, season_length=24, alias='AutoARIMA'),
    HoltWinters(season_length=24, alias='HoltWinters'),
    WindowAverage(window_size=24, alias='WindowAverage')
]

sf = StatsForecast(models=models, freq='h', n_jobs=-1)
sf.fit(df_train[['unique_id', 'ds', 'y']])
forecast = sf.predict(h=24)

# --- 6a. Modelo Naive: Média por hora dos dias 30 e 31 de dezembro de anos anteriores ---
df_train['day_month'] = df_train['ds'].dt.strftime('%m-%d')
df_train['hour'] = df_train['ds'].dt.hour
naive_hist = df_train[df_train['day_month'].isin(['12-30', '12-31'])]
naive_forecast = naive_hist.groupby('hour')['y'].mean().reset_index()
naive_forecast.columns = ['hour', 'naive']
forecast_naive = df_test[['ds']].copy()
forecast_naive['hour'] = forecast_naive['ds'].dt.hour
forecast_naive = forecast_naive.merge(naive_forecast, on='hour', how='left')
forecast['Naive_2day'] = forecast_naive['naive'].values

# --- 6b. Modelo Naive: Média por hora considerando todos os dias de dezembro ---
df_dez = df[df['Date Time'].dt.month == 12].copy()
df_dez['hour'] = df_dez['Date Time'].dt.hour
mean_by_hour_december = df_dez.groupby('hour')['T (degC)'].mean().reset_index()
mean_by_hour_december.columns = ['hour', 'naive_dez']
forecast_naive_dez = df_test[['ds']].copy()
forecast_naive_dez['hour'] = forecast_naive_dez['ds'].dt.hour
forecast_naive_dez = forecast_naive_dez.merge(mean_by_hour_december, on='hour', how='left')
forecast['Naive_dez'] = forecast_naive_dez['naive_dez'].values

# --- 7. Visualizar previsão ---
plt.figure(figsize=(15, 5))
plt.plot(df_model['ds'].iloc[-100:], df_model['y'].iloc[-100:], label='Histórico')
for model in forecast.columns:
    if model not in ['unique_id', 'ds']:
        plt.plot(forecast['ds'], forecast[model], label=model)
plt.title('Previsão de Temperatura com Modelos Base (últimos 3 anos)')
plt.xlabel('Data')
plt.ylabel('Temperatura (°C)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# --- 8. Calcular MAE usando dados reais das últimas 24h ---
y_true = df_test['y'].values
y_preds = {col: forecast[col].values for col in forecast.columns if col not in ['unique_id', 'ds']}
mae_scores = {model: mean_absolute_error(y_true, y_pred) for model, y_pred in y_preds.items()}

print("\nMAE dos modelos nas últimas 24h (últimos 3 anos de treino):")
for model, mae in mae_scores.items():
    print(f"{model}: {mae:.2f} °C")

# --- 9. Salvar modelo AutoARIMA treinado ---
autoarima_model = next((m for m in sf.models if m.alias == 'AutoARIMA'), None)
if autoarima_model is not None:
    print("\nParâmetros do modelo AutoARIMA selecionado:")
    params_str = str(autoarima_model.__dict__)
    print(params_str)
    os.makedirs('data', exist_ok=True)
    with open('data/autoarima_params.txt', 'w') as f:
        f.write(params_str)
    joblib.dump(autoarima_model, 'data/autoarima_model.pkl')
