In [None]:
import optuna
import lightgbm as lgb
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error



# Crear características adicionales
def create_features(df_aux):
    """
    Añade características basadas en el índice de tiempo.
    """
    df_1 = df_aux.copy()
    df_1['dayofweek'] = df_1.index.dayofweek
    df_1['quarter'] = df_1.index.quarter
    df_1['month'] = df_1.index.month
    df_1['year'] = df_1.index.year
    df_1['dayofyear'] = df_1.index.dayofyear
    df_1['lag1'] = df_1['PJME_MW'].shift(1)
    df_1['lag2'] = df_1['PJME_MW'].shift(2)
    df_1['lag3'] = df_1['PJME_MW'].shift(3)
    return df_1

# Función objetivo para Optuna
def objective(trial):
    """
    Optimiza hiperparámetros de LGBM usando validación cruzada para series de tiempo.
    """
    # Definición de hiperparámetros a optimizar
    param = {
        "n_estimators": trial.suggest_int("n_estimators", 500, 2000),
        "learning_rate": trial.suggest_loguniform("learning_rate", 0.001, 0.1),
        "max_depth": trial.suggest_int("max_depth", 3, 10),
        "num_leaves": trial.suggest_int("num_leaves", 20, 150),
        "min_child_samples": trial.suggest_int("min_child_samples", 10, 100),
        "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
        "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0),
        "reg_alpha": trial.suggest_loguniform("reg_alpha", 0.01, 1.0),
        "reg_lambda": trial.suggest_loguniform("reg_lambda", 0.01, 1.0),
        "random_state": 42,
    }

    # Validación cruzada para series de tiempo
    test_size = 200  # Prueba con un tamaño razonable
    tss = TimeSeriesSplit(n_splits=5, test_size=test_size, gap=24)
    scores = []
    df = dataframes[0].copy()
    for train_idx, val_idx in tss.split(df):
        train = df.iloc[train_idx]
        test = df.iloc[val_idx]

        # Crear características para los conjuntos de datos
        train = create_features(train)
        test = create_features(test)

        FEATURES = ['dayofyear', 'dayofweek', 'quarter', 'month', 'year',
                    'lag1', 'lag2', 'lag3']
        TARGET = 'PJME_MW'

        X_train = train[FEATURES].dropna()
        y_train = train[TARGET].loc[X_train.index]

        X_test = test[FEATURES].dropna()
        y_test = test[TARGET].loc[X_test.index]

        # Entrenar el modelo
        model = lgb.LGBMRegressor(**param, early_stopping_rounds=50)
        model.fit(
            X_train, y_train,
            eval_set=[(X_test, y_test)],
            eval_metric="rmse",
            verbose=0
        )

        # Predicciones y cálculo del RMSE
        y_pred = model.predict(X_test)
        rmse = mean_squared_error(y_test, y_pred, squared=False)
        scores.append(rmse)

    # Retornar el promedio de RMSE de los folds
    return np.mean(scores)

# Ejecutar la optimización con Optuna
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

# Imprimir los mejores parámetros y el mejor RMSE
print("Mejores parámetros:", study.best_params)
print("Mejor RMSE promedio:", study.best_value)
