In [6]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.6.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Downloading optuna-4.6.0-py3-none-any.whl (404 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m404.7/404.7 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.10.1-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.10.1 optuna-4.6.0


In [7]:
import pandas as pd
import numpy as np
import holidays
import time
import optuna

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.model_selection import TimeSeriesSplit
from lightgbm import LGBMRegressor

In [8]:
df = pd.read_csv('/content/drive/MyDrive/ProbEstatistica/1_ano_dados_data_center_limpa.csv')
df.set_index(df["timestamp"], inplace=True)
df.drop(columns=["timestamp"],inplace=True)
df.index = pd.to_datetime(df.index)

In [14]:
import pandas as pd
import numpy as np
import holidays
import optuna
import time

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_percentage_error
from lightgbm import LGBMRegressor


def tunar_lightgbm_temporal_split(df, n_lags=4, test_size=0.2, n_trials=30, seed=42):
    """
    Faz tuning dos hiperparâmetros do LightGBM usando Optuna com split temporal fixo (80/20).
    """

    df = df.copy()
    df.index = pd.to_datetime(df.index)

    # === Features temporais ===
    df['hora'] = df.index.hour
    df['dia_semana'] = df.index.dayofweek
    df['mes'] = df.index.month
    feriados_br = holidays.Brazil()
    df['feriado'] = df.index.map(lambda x: 1 if x in feriados_br else 0)

    # === Lags ===
    for i in range(1, n_lags + 1):
        df[f'lag_{i}'] = df['requisicoes'].shift(i)

    df = df.dropna()

    X = df.drop(columns='requisicoes')
    y = df['requisicoes']

    # === Split temporal 80/20 ===
    split_idx = int(len(df) * (1 - test_size))
    X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
    y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]

    # === Padronização ===
    scaler = StandardScaler()
    X_train_scaled = pd.DataFrame(
        scaler.fit_transform(X_train),
        index=X_train.index,
        columns=X_train.columns
    )
    X_test_scaled = pd.DataFrame(
        scaler.transform(X_test),
        index=X_test.index,
        columns=X_test.columns
    )

    # === Função objetivo para o Optuna ===
    def objective(trial):
        params = {
            "n_estimators": 500,
            "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
            "num_leaves": trial.suggest_int("num_leaves", 20, 200),
            "max_depth": trial.suggest_int("max_depth", 3, 15),
            "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
            "subsample": trial.suggest_float("subsample", 0.5, 1.0),
            "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
            "reg_alpha": trial.suggest_float("reg_alpha", 1e-3, 10.0, log=True),
            "reg_lambda": trial.suggest_float("reg_lambda", 1e-3, 10.0, log=True),
            "random_state": seed,
            "n_jobs": -1,
            "verbosity": -1
        }

        model = LGBMRegressor(**params)
        model.fit(X_train_scaled, y_train)

        y_pred = model.predict(X_test_scaled)
        mape = mean_absolute_percentage_error(y_test, y_pred)

        return mape

    # === Otimização com Optuna ===
    study = optuna.create_study(direction="minimize", study_name="LightGBM_TemporalSplit_Tuning")
    study.optimize(objective, n_trials=n_trials, show_progress_bar=True)

    print("\nMelhores hiperparâmetros encontrados:")
    for k, v in study.best_params.items():
        print(f"  {k}: {v}")
    print(f"\nMAPE (validação 80/20 temporal): {study.best_value:.4f}")

    return study



In [16]:
study = tunar_lightgbm_temporal_split(df)

[I 2025-11-12 23:02:06,544] A new study created in memory with name: LightGBM_TemporalSplit_Tuning


  0%|          | 0/30 [00:00<?, ?it/s]

[I 2025-11-12 23:02:14,659] Trial 0 finished with value: 0.12423704545347355 and parameters: {'learning_rate': 0.09133160171900243, 'num_leaves': 61, 'max_depth': 8, 'min_child_samples': 37, 'subsample': 0.8365339200914359, 'colsample_bytree': 0.6929037016192361, 'reg_alpha': 9.490200098566147, 'reg_lambda': 0.05622493277355228}. Best is trial 0 with value: 0.12423704545347355.
[I 2025-11-12 23:02:16,141] Trial 1 finished with value: 0.11958174177877369 and parameters: {'learning_rate': 0.02949938785302991, 'num_leaves': 63, 'max_depth': 9, 'min_child_samples': 34, 'subsample': 0.6891687772265828, 'colsample_bytree': 0.8633860196577965, 'reg_alpha': 0.46066135867717156, 'reg_lambda': 0.2706184918985331}. Best is trial 1 with value: 0.11958174177877369.
[I 2025-11-12 23:02:16,690] Trial 2 finished with value: 0.11892891517823548 and parameters: {'learning_rate': 0.054750255414775316, 'num_leaves': 74, 'max_depth': 4, 'min_child_samples': 25, 'subsample': 0.7238938091112218, 'colsample_b

In [20]:
import pandas as pd
import numpy as np
import holidays
import optuna
import time

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_percentage_error
from lightgbm import LGBMRegressor


def tunar_lightgbm_temporal_split(df, n_lags=4, test_size=0.2, n_trials=30, seed=42):
    """
    Faz tuning dos hiperparâmetros do LightGBM usando Optuna com split temporal fixo (80/20).
    Usa apenas as features temporais e os lags como preditoras.
    """

    df = df.copy()
    df.index = pd.to_datetime(df.index)

    # === Features temporais ===
    df['hora'] = df.index.hour
    df['dia_semana'] = df.index.dayofweek
    df['mes'] = df.index.month
    feriados_br = holidays.Brazil()
    df['feriado'] = df.index.map(lambda x: 1 if x in feriados_br else 0)

    # === Lags ===
    for i in range(1, n_lags + 1):
        df[f'lag_{i}'] = df['requisicoes'].shift(i)

    df = df.dropna()

    # === Selecionar apenas as colunas desejadas ===
    features = ['hora', 'dia_semana', 'mes', 'feriado'] + [f'lag_{i}' for i in range(1, n_lags + 1)]
    X = df[features]
    y = df['requisicoes']

    # === Split temporal 80/20 ===
    split_idx = int(len(df) * (1 - test_size))
    X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
    y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]

    # === Padronização ===
    scaler = StandardScaler()
    X_train_scaled = pd.DataFrame(
        scaler.fit_transform(X_train),
        index=X_train.index,
        columns=X_train.columns
    )
    X_test_scaled = pd.DataFrame(
        scaler.transform(X_test),
        index=X_test.index,
        columns=X_test.columns
    )

    # === Função objetivo para o Optuna ===
    def objective(trial):
        params = {
            "n_estimators": 500,
            "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
            "num_leaves": trial.suggest_int("num_leaves", 20, 200),
            "max_depth": trial.suggest_int("max_depth", 3, 15),
            "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
            "subsample": trial.suggest_float("subsample", 0.5, 1.0),
            "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
            "reg_alpha": trial.suggest_float("reg_alpha", 1e-3, 10.0, log=True),
            "reg_lambda": trial.suggest_float("reg_lambda", 1e-3, 10.0, log=True),
            "random_state": seed,
            "n_jobs": -1,
            "verbosity": -1
        }

        model = LGBMRegressor(**params)
        model.fit(X_train_scaled, y_train)

        y_pred = model.predict(X_test_scaled)
        mape = mean_absolute_percentage_error(y_test, y_pred)

        return mape

    # === Otimização com Optuna ===
    study = optuna.create_study(direction="minimize", study_name="LightGBM_TemporalSplit_Tuning")
    study.optimize(objective, n_trials=n_trials, show_progress_bar=True)

    print("\nMelhores hiperparâmetros encontrados:")
    for k, v in study.best_params.items():
        print(f"  {k}: {v}")
    print(f"\nMAPE (validação 80/20 temporal): {study.best_value:.4f}")

    return study


In [21]:
study = tunar_lightgbm_temporal_split(df)

[I 2025-11-12 23:22:22,281] A new study created in memory with name: LightGBM_TemporalSplit_Tuning


  0%|          | 0/30 [00:00<?, ?it/s]

[I 2025-11-12 23:22:23,147] Trial 0 finished with value: 0.15800273195702905 and parameters: {'learning_rate': 0.1548361200646292, 'num_leaves': 199, 'max_depth': 8, 'min_child_samples': 66, 'subsample': 0.9606277046339088, 'colsample_bytree': 0.6751335517464074, 'reg_alpha': 0.018235982530235888, 'reg_lambda': 4.837500373161069}. Best is trial 0 with value: 0.15800273195702905.
[I 2025-11-12 23:22:24,283] Trial 1 finished with value: 0.16609846320101085 and parameters: {'learning_rate': 0.28065483956959175, 'num_leaves': 61, 'max_depth': 10, 'min_child_samples': 86, 'subsample': 0.8436552963378798, 'colsample_bytree': 0.6838230759225465, 'reg_alpha': 0.1355646072919714, 'reg_lambda': 9.268641849110812}. Best is trial 0 with value: 0.15800273195702905.
[I 2025-11-12 23:22:29,176] Trial 2 finished with value: 0.15421099674344793 and parameters: {'learning_rate': 0.04014861906571153, 'num_leaves': 53, 'max_depth': 10, 'min_child_samples': 53, 'subsample': 0.7065941340244806, 'colsample_b