<a href="https://colab.research.google.com/github/chi-hun/chi-hun/blob/main/optuna.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# module

In [None]:
!pip install optuna

In [None]:
!pip install catboost

In [68]:
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
import optuna

# data

In [5]:
df = pd.read_csv('/content/sample_data/california_housing_train.csv')

In [6]:
df.head(2)

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-114.31,34.19,15.0,5612.0,1283.0,1015.0,472.0,1.4936,66900.0
1,-114.47,34.4,19.0,7650.0,1901.0,1129.0,463.0,1.82,80100.0


In [7]:
x_train = df
y_train = df.pop('median_house_value')

In [8]:
x_train.shape, y_train.shape

((17000, 8), (17000,))

# how

## xgb

파라미터 및 모델 정의 (점수 반환)

In [44]:
def objective_xgb(trial : optuna.Trial, x_train, y_train):
    param = {"n_estimators" : trial.suggest_int('n_estimators', 50,4000),
        'max_depth':trial.suggest_int('max_depth', 2, 16),
        'min_child_weight':trial.suggest_int('min_child_weight', 1, 300),
        'gamma':trial.suggest_int('gamma', 1, 3),
        'learning_rate': trial.suggest_loguniform('learning_rate',1e-6,1e-2),
        'colsample_bytree':trial.suggest_discrete_uniform('colsample_bytree',0.5, 1, 0.1),
        'lambda': trial.suggest_loguniform('lambda', 1e-3, 10.0),
        'alpha': trial.suggest_loguniform('alpha', 1e-3, 10.0),
        'subsample': trial.suggest_categorical('subsample', [0.6,0.7,0.8,1.0] ),
        # 'tree_method':'gpu_hist', # gpu 사용
        'random_state': 42}
    
    xgb = XGBRegressor(**param)
    x_ttrain, x_ttest, y_ttrain, y_ttest = train_test_split(x_train, y_train, test_size=0.2)
    model = xgb.fit(x_ttrain, y_ttrain, eval_set=[(x_ttest, y_ttest)], early_stopping_rounds=100, verbose=False)
    pre = model.predict(x_ttest)
    mse = mean_squared_error(y_ttest, pre)

    return mse

학습 정의(방향(direction), sampler)

In [61]:
study = optuna.create_study(study_name='xgb_param', direction='minimize', sampler=optuna.samplers.TPESampler(seed=42))

[32m[I 2022-09-05 06:09:53,213][0m A new study created in memory with name: xgb_param[0m


파라미터 검색

In [62]:
study.optimize(lambda trial : objective_xgb(trial, x_train, y_train), n_trials=10)
print(f'score : {study.best_value} \n params : {study.best_trial.params}')


suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use :func:`~optuna.trial.Trial.suggest_float` instead.


suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use :func:`~optuna.trial.Trial.suggest_float` instead.


suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use :func:`~optuna.trial.Trial.suggest_float` instead.


suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use :func:`~optuna.trial.Trial.suggest_float` instead.





[33m[W 2022-09-05 06:10:03,702][0m Trial 0 failed because of the following error: KeyboardInterrupt()[0m
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "<ipython-input-62-cc745a5e9b4d>", line 1, in <lambda>
    study.optimize(lambda trial : objective_xgb(trial, x_train, y_train), n_trials=10)
  File "<ipython-input-44-2022014cda90>", line 16, in objective_xgb
    model = xgb.fit(x_ttrain, y_ttrain, eval_set=[(x_ttest, y_ttest)], early_stopping_rounds=100, verbose=False)
  File "/usr/local/lib/python3.7/dist-packages/xgboost/sklearn.py", line 396, in fit
    callbacks=callbacks)
  File "/usr/local/lib/python3.7/dist-packages/xgboost/training.py", line 216, in train
    xgb_model=xgb_model, callbacks=callbacks)
  File "/usr/local/lib/python3.7/dist-packages/xgboost/training.py", line 74, in _train_internal
    bst.update(dtrain, i, obj)
  File "/usr/local/lib

KeyboardInterrupt: ignored

파라미터 중요도

In [47]:
optuna.visualization.plot_param_importances(study)

최적화

In [48]:
optuna.visualization.plot_optimization_history(study)

## lightgbm

In [66]:
def objective_lgbm(trial : optuna.Trial, x_train, y_train):
    param = {
        "learning_rate": trial.suggest_loguniform('learning_rate',1e-6,1e-2),
        "n_estimators": trial.suggest_int('n_estimators', 50,4000),
        "reg_alpha": trial.suggest_float("reg_alpha", 1e-8, 3e-5),
        "reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 9e-2),
        "max_depth": trial.suggest_int("max_depth", 1, 20),
        "num_leaves": trial.suggest_int("num_leaves", 2, 256),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.4, 1.0),
        "subsample": trial.suggest_float("subsample", 0.3, 1.0),
        "subsample_freq": trial.suggest_int("subsample_freq", 1, 10),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
        "max_bin": trial.suggest_int("max_bin", 200, 500),
        "random_state": 42,
        "verbosity": -1}

    x_ttrain, x_ttest, y_ttrain, y_ttest = train_test_split(x_train, y_train , test_size=0.2)
    lgbm = LGBMRegressor(**param)
    model = lgbm.fit(x_ttrain, y_ttrain, eval_set=[(x_ttest, y_ttest)], early_stopping_rounds=100, verbose=False)
    pre = model.predict(x_ttest)
    mse = mean_squared_error(y_ttest, pre)

    return mse

In [64]:
study = optuna.create_study(study_name='lgbm_param', direction='minimize', sampler=optuna.samplers.TPESampler(seed=42))

[32m[I 2022-09-05 06:10:16,682][0m A new study created in memory with name: lgbm_param[0m


In [69]:
study.optimize(lambda trial : objective_lgbm(trial, x_train, y_train), n_trials=10)
print(f'best score : {study.best_value}\nbest params :{study.best_trial.params}')


suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use :func:`~optuna.trial.Trial.suggest_float` instead.

[32m[I 2022-09-05 06:10:58,202][0m Trial 2 finished with value: 13250239734.376513 and parameters: {'learning_rate': 1.4742753159914662e-05, 'n_estimators': 1497, 'reg_alpha': 1.3687538826668908e-05, 'reg_lambda': 0.0706658386736116, 'max_depth': 4, 'num_leaves': 133, 'colsample_bytree': 0.7554487413172255, 'subsample': 0.3325152889039984, 'subsample_freq': 7, 'min_child_samples': 21, 'max_bin': 219}. Best is trial 2 with value: 13250239734.376513.[0m

suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use :func:`~optuna.trial.Trial.suggest_float` instead.

[32m[I 2022-09-05 06:11:01,773][0m Trial 3 finished with value: 2983980474.326837 and parameters: {'learning_rate': 0.00624513957

best score : 2184388155.3112993
best params :{'learning_rate': 0.0035387588647792408, 'n_estimators': 1915, 'reg_alpha': 3.596631435689668e-06, 'reg_lambda': 0.06419203371762168, 'max_depth': 16, 'num_leaves': 145, 'colsample_bytree': 0.8625803079727365, 'subsample': 0.6456569174550735, 'subsample_freq': 6, 'min_child_samples': 46, 'max_bin': 207}


In [70]:
optuna.visualization.plot_param_importances(study)

In [71]:
optuna.visualization.plot_optimization_history(study)

## catboost

In [75]:
def objective_cat(trial : optuna.Trial, x_train, y_train):
    param = {
      'learning_rate' : trial.suggest_loguniform('learning_rate', 0.0001, 0.3),
      'bagging_temperature' :trial.suggest_loguniform('bagging_temperature', 0.01, 100.00),
      "n_estimators":trial.suggest_int("n_estimators", 50, 10000),
      "max_depth":trial.suggest_int("max_depth", 2, 16),
      'random_strength' :trial.suggest_int('random_strength', 0, 100),
      "colsample_bylevel":trial.suggest_float("colsample_bylevel", 0.4, 1.0),
      "l2_leaf_reg":trial.suggest_float("l2_leaf_reg",1e-8,3e-5),
      "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
      "max_bin": trial.suggest_int("max_bin", 200, 500),
      'od_type': trial.suggest_categorical('od_type', ['IncToDec', 'Iter']),
      "random_state":42}

    x_ttrain, x_ttest, y_ttrain, y_ttest = train_test_split(x_train, y_train, test_size=0.2)
    cat = CatBoostRegressor(**param)
    model = cat.fit(x_ttrain, y_ttrain, eval_set=[(x_ttest, y_ttest)], early_stopping_rounds=100, verbose=0)
    pre = model.predict(x_ttest)
    mse = mean_squared_error(y_ttest, pre)

    return mse

In [73]:
study = optuna.create_study(study_name='catboost_paeam', direction='minimize', sampler=optuna.samplers.TPESampler(seed=42))

[32m[I 2022-09-05 06:33:44,903][0m A new study created in memory with name: catboost_paeam[0m


In [76]:
study.optimize(lambda trial : objective_cat(trial, x_train, y_train), n_trials=10)
print(f'best score : {study.best_value}, params : {study.best_trial.params}')


suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use :func:`~optuna.trial.Trial.suggest_float` instead.


suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use :func:`~optuna.trial.Trial.suggest_float` instead.

[32m[I 2022-09-05 06:36:38,116][0m Trial 1 finished with value: 2211063126.7283087 and parameters: {'learning_rate': 0.23577305025968318, 'bagging_temperature': 21.368329072358772, 'n_estimators': 2162, 'max_depth': 4, 'random_strength': 18, 'colsample_bylevel': 0.5825453457757226, 'l2_leaf_reg': 1.5747445384650815e-05, 'min_child_samples': 46, 'max_bin': 287, 'od_type': 'IncToDec'}. Best is trial 1 with value: 2211063126.7283087.[0m

suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. 

KeyboardInterrupt: ignored