In [45]:
import sklearn

from sklearn.ensemble import VotingRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import *

import xgboost
from xgboost import XGBRegressor

import catboost
from catboost import CatBoostRegressor

from lightgbm import LGBMRegressor

In [None]:
from sklearn.model_selection import cross_val_score

In [40]:
import numpy as np

In [41]:
# Optuna 이용하여 최적화할 목적 함수 정의
def objective(trial):
    lgbm_params = {
        "n_estimators": trial.suggest_int("lgbm_n_estimators", 100, 1000),
        "learning_rate": trial.suggest_float("lgbm_learning_rate", 0.01, 0.1),
        "max_depth": trial.suggest_int("lgbm_max_depth", 3, 20),
        "num_leaves": trial.suggest_int("lgbm_num_leaves", 20, 100),
    }

    xgb_params = {
        "n_estimators": trial.suggest_int("xgb_n_estimators", 100, 1000),
        "learning_rate": trial.suggest_float("xgb_learning_rate", 0.01, 0.1),
        "max_depth": trial.suggest_int("xgb_max_depth", 3, 20),
    }

    cat_params = {
        "iterations": trial.suggest_int("cat_iterations", 100, 1000),
        "learning_rate": trial.suggest_float("cat_learning_rate", 0.01, 0.1),
        "depth": trial.suggest_int("cat_depth", 3, 10),
    }

    lgbm_model = LGBMRegressor(**lgbm_params)
    xgb_model = XGBRegressor(**xgb_params)
    catboost_model = CatBoostRegressor(**cat_params, verbose=False)

    voting_regressor = VotingRegressor(
        estimators=[
            ("lgbm", lgbm_model),
            ("xgb", xgb_model),
            ("catboost", catboost_model)
        ]
    )

    scores = cross_val_score(voting_regressor, X, y, cv=3, scoring="neg_mean_squared_error", error_score="raise")
    rmse_score = np.mean(np.sqrt(-scores))

    return rmse_score

    

In [43]:
!pip3 install optuna

Collecting optuna
  Obtaining dependency information for optuna from https://files.pythonhosted.org/packages/15/da/68883911855d8b4d521f9a370e4e6aab8232b91c1d8d5a8348c4680c6642/optuna-3.6.1-py3-none-any.whl.metadata
  Downloading optuna-3.6.1-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Obtaining dependency information for alembic>=1.5.0 from https://files.pythonhosted.org/packages/df/ed/c884465c33c25451e4a5cd4acad154c29e5341e3214e220e7f3478aa4b0d/alembic-1.13.2-py3-none-any.whl.metadata
  Downloading alembic-1.13.2-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Obtaining dependency information for colorlog from https://files.pythonhosted.org/packages/f3/18/3e867ab37a24fdf073c1617b9c7830e06ec270b1ea4694a624038fc40a03/colorlog-6.8.2-py3-none-any.whl.metadata
  Downloading colorlog-6.8.2-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Obtaining dependency information for Mako from https://files.python

In [44]:
import optuna

ModuleNotFoundError: No module named 'optuna'

In [None]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=10)

trial = study.best_trial

print("Value:", trial.value)
print("Params:")
for key, value in trial.params.items():
    print(f"{key}: {value}")

In [46]:
from sklearn.ensemble import VotingRegressor

In [48]:
best_lgbm_params = {
    "n_estimators": study.best_params["lgbm_n_estimators"],
    "learning_rate": study.best_params["lgbm_learning_rate"],
    "max_depth": study.best_params["lgbm_max_depth"],
    "num_leaves": study.best_params["lgbm_num_leaves"],
    "n_jobs": -1,
}

best_xgb_params = {
    "n_estimators": study.best_params["xgb_n_estimators"],
    "learning_rate": study.best_params["xgb_learning_rate"],
    "max_depth": study.best_params["xgb_max_depth"],
    "n_jobs": -1,
}

best_cat_params = {
    "iterations": study.best_params["cat_iterations"],
    "learning_rate": study.best_params["cat_learning_rate"],
    "depth": study.best_params["cat_depth"],
    "verbose": 0,
}

lgbm_model = LGBMRegressor(**best_lgbm_params)
xgb_model = XGBRegressor(**best_xgb_params)
catboost_model = CatBoostRegressor(**best_cat_params)

voting_regressor = VotingRegressor(
    estimators=[
        ("lgbm", lgbm_model),
        ("xgb", xgb_model),
        ("catboost", catboost_model)
    ],
    weights=[2, 1, 2]   # LGBM과 CatBoost에 더 높은 가중치 부여
)

voting_regressor.fit(X, y)

predictions = voting_regressor.predict(test_data)

NameError: name 'study' is not defined