In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# --- 1. Carregar e preparar dados ---
df = pd.read_csv('jena_climate_2009_2016.csv')
df['Date Time'] = pd.to_datetime(df['Date Time'], format="%d.%m.%Y %H:%M:%S", dayfirst=True)
df = df.set_index('Date Time').resample('h').mean().interpolate().reset_index()

# --- 2. Filtrar apenas dados de dezembro (todos os anos) ---
df = df[df['Date Time'].dt.month == 12]

# --- 3. LSTM com covariáveis defasadas ---
# Selecionar variáveis
features = ['T (degC)', 'p (mbar)', 'rh (%)', 'wd (deg)']
df_lstm = df[features].copy()

# Normalizar
scaler = MinMaxScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df_lstm), columns=features)

# Adicionar lags das covariáveis (evitar vazamento de dados)
look_back = 24
for col in features:
    for lag in range(1, look_back + 1):
        df_scaled[f'{col}_lag{lag}'] = df_scaled[col].shift(lag)

# Remover linhas com NaN após criar lags
df_scaled = df_scaled.dropna().reset_index(drop=True)

# Separar X e y
y = df_scaled['T (degC)']
X = df_scaled.drop(columns=features)  # Remove colunas não defasadas

# Dividir treino e teste (últimas 24h)
X_train, X_test = X[:-24], X[-24:]
y_train, y_test = y[:-24], y[-24:]

# Reshape para LSTM [samples, time steps, features]
X_train = X_train.values.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.values.reshape((X_test.shape[0], 1, X_test.shape[1]))

# Modelo LSTM
model = Sequential()
model.add(LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

# Treinar com validação para avaliar overfitting
history = model.fit(X_train, y_train, validation_split=0.2, epochs=20, batch_size=1, verbose=0)

# Previsão
y_pred = model.predict(X_test)

# Reverter escala para y
temp_scaler = MinMaxScaler()
temp_scaler.fit(df_lstm[['T (degC)']])
y_test_rescaled = temp_scaler.inverse_transform(y_test.values.reshape(-1, 1))
y_pred_rescaled = temp_scaler.inverse_transform(y_pred)

# --- 4. Avaliar desempenho ---
mae_lstm = mean_absolute_error(y_test_rescaled, y_pred_rescaled)
print(f"MAE do modelo LSTM nas últimas 24h: {mae_lstm:.2f} °C")

# --- 5. Plotar resultados ---
plt.figure(figsize=(15, 5))
plt.plot(df['Date Time'].iloc[-100:], df['T (degC)'].iloc[-100:], label='Histórico')
plt.plot(df['Date Time'].iloc[-24:], y_pred_rescaled, label='LSTM', linestyle='--')
plt.title('Previsão de Temperatura com LSTM (sem vazamento)')
plt.xlabel('Data')
plt.ylabel('Temperatura (°C)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# --- 6. Avaliar overfitting ---
plt.figure(figsize=(10, 4))
plt.plot(history.history['loss'], label='Loss - Treino')
plt.plot(history.history['val_loss'], label='Loss - Validação')
plt.title('Curva de Perda do Modelo LSTM')
plt.xlabel('Épocas')
plt.ylabel('Erro quadrático médio')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
