<a href="https://colab.research.google.com/github/conextm/python/blob/main/xgboost_example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install optuna

Collecting optuna
  Downloading optuna-3.6.1-py3-none-any.whl (380 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.1/380.1 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.1-py3-none-any.whl (233 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.4/233.4 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.3-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.8/78.8 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Mako, colorlog, alembic, optuna
Successfully installed Mako-1.3.3 alembic-1.13.1 colorlog-6.8.2 optuna-3.6.1


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
data = pd.read_csv(url, delimiter=";")

X = data.drop("quality", axis=1)
y = data["quality"]

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error
import optuna

def objective(trial):
    params = {
        "objective": "reg:squarederror",
        "n_estimators": 1000,
        "verbosity": 0,
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
        "max_depth": trial.suggest_int("max_depth", 1, 10),
        "subsample": trial.suggest_float("subsample", 0.05, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.05, 1.0),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 20),
    }

    model = xgb.XGBRegressor(**params)
    model.fit(X_train, y_train, verbose=False)
    predictions = model.predict(X_val)
    rmse = mean_squared_error(y_val, predictions, squared=False)
    return rmse

In [4]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=30)

[I 2024-04-26 23:50:42,912] A new study created in memory with name: no-name-6e737fc1-9b31-4b2d-8ac5-64c02f4c7d51
[I 2024-04-26 23:50:51,377] Trial 0 finished with value: 0.5727919647451677 and parameters: {'learning_rate': 0.002387534462548318, 'max_depth': 7, 'subsample': 0.7650579559521575, 'colsample_bytree': 0.7460968824295521, 'min_child_weight': 4}. Best is trial 0 with value: 0.5727919647451677.
[I 2024-04-26 23:50:56,040] Trial 1 finished with value: 0.5526415868668064 and parameters: {'learning_rate': 0.007252837899345206, 'max_depth': 8, 'subsample': 0.7064108026841609, 'colsample_bytree': 0.9974557615508874, 'min_child_weight': 15}. Best is trial 1 with value: 0.5526415868668064.
[I 2024-04-26 23:50:57,179] Trial 2 finished with value: 0.5450127064055098 and parameters: {'learning_rate': 0.08215271837615362, 'max_depth': 8, 'subsample': 0.8466360244798069, 'colsample_bytree': 0.797669087406835, 'min_child_weight': 8}. Best is trial 2 with value: 0.5450127064055098.
[I 2024-

In [5]:
print('Best hyperparameters:', study.best_params)
print('Best RMSE:', study.best_value)



Best hyperparameters: {'learning_rate': 0.01739706163919491, 'max_depth': 10, 'subsample': 0.649408569321605, 'colsample_bytree': 0.8096964085572826, 'min_child_weight': 12}
Best RMSE: 0.5394016621908936
