# Modelado – Predicción de Consumo Eléctrico
Modelos baseline, modelos avanzados, tuning y evaluación.


In [3]:
print("Entrenamiento baseline listo para implementar")


Entrenamiento baseline listo para implementar


In [None]:
# ============================================
#   PROYECTO A — MODELOS DE PRONÓSTICO
# ============================================

# -----------------------
# 1. Importar librerías
# -----------------------
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

import lightgbm as lgb
import warnings
warnings.filterwarnings("ignore")

plt.style.use("default")

# -----------------------
# 2. Cargar dataset
# -----------------------
df = pd.read_csv("../data/consumo_sintetico.csv", parse_dates=["timestamp"])
df = df.set_index("timestamp")

df.head()


FileNotFoundError: [Errno 2] No such file or directory: 'data/consumo_sintetico.csv'

In [None]:
# -----------------------
# 3. Features basadas en tiempo
# -----------------------
df["hora"] = df.index.hour
df["dia_semana"] = df.index.dayofweek
df["mes"] = df.index.month
df["dia"] = df.index.day
df["año"] = df.index.year

# Lag features
df["lag_1"] = df["consumo"].shift(1)
df["lag_24"] = df["consumo"].shift(24)
df["lag_168"] = df["consumo"].shift(168)   # 7 days lag

df = df.dropna()


In [None]:
# -----------------------
# 4. Train-test split temporal
# -----------------------
train = df.iloc[:-24*30]   # últimos 30 días como test
test = df.iloc[-24*30:]

X_train = train.drop("consumo", axis=1)
y_train = train["consumo"]
X_test = test.drop("consumo", axis=1)
y_test = test["consumo"]

(len(train), len(test))


In [None]:
# -----------------------
# 5. Modelo Naive
# -----------------------
y_pred_naive = test["lag_1"]

mae = mean_absolute_error(y_test, y_pred_naive)
rmse = np.sqrt(mean_squared_error(y_test, y_pred_naive))

print("NAIVE — MAE:", mae)
print("NAIVE — RMSE:", rmse)

plt.figure(figsize=(14,5))
plt.plot(y_test.reset_index(drop=True), label="Real")
plt.plot(y_pred_naive.reset_index(drop=True), label="Naive")
plt.legend()
plt.title("Modelo Naive")
plt.show()


In [None]:
# -----------------------
# 6. Random Forest
# -----------------------
rf = RandomForestRegressor(n_estimators=300, random_state=42)
rf.fit(X_train, y_train)

y_pred_rf = rf.predict(X_test)

print("RF — MAE:", mean_absolute_error(y_test, y_pred_rf))
print("RF — RMSE:", np.sqrt(mean_squared_error(y_test, y_pred_rf)))

plt.figure(figsize=(14,5))
plt.plot(y_test.reset_index(drop=True), label="Real")
plt.plot(y_pred_rf, label="RF")
plt.legend()
plt.title("Random Forest — Pred vs Real")
plt.show()


In [None]:
# -----------------------
# 7. LightGBM
# -----------------------
lgbm = lgb.LGBMRegressor(n_estimators=500, learning_rate=0.02)
lgbm.fit(X_train, y_train)

y_pred_lgb = lgbm.predict(X_test)

print("LGBM — MAE:", mean_absolute_error(y_test, y_pred_lgb))
print("LGBM — RMSE:", np.sqrt(mean_squared_error(y_test, y_pred_lgb)))

plt.figure(figsize=(14,5))
plt.plot(y_test.reset_index(drop=True), label="Real")
plt.plot(y_pred_lgb, label="LightGBM")
plt.legend()
plt.title("LightGBM — Pred vs Real")
plt.show()


In [None]:
# -----------------------
# 8. Comparación de todos los modelos
# -----------------------
results = pd.DataFrame({
    "MAE": [
        mean_absolute_error(y_test, y_pred_naive),
        mean_absolute_error(y_test, y_pred_rf),
        mean_absolute_error(y_test, y_pred_lgb)
    ],
    "RMSE": [
        np.sqrt(mean_squared_error(y_test, y_pred_naive)),
        np.sqrt(mean_squared_error(y_test, y_pred_rf)),
        np.sqrt(mean_squared_error(y_test, y_pred_lgb))
    ]
}, index=["Naive", "RandomForest", "LightGBM"])

results
