<a href="https://colab.research.google.com/github/chi-hun/chi-hun/blob/main/optuna.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# module

In [None]:
!pip install optuna

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting optuna
  Downloading optuna-3.0.2-py3-none-any.whl (348 kB)
[K     |████████████████████████████████| 348 kB 2.1 MB/s 
Collecting alembic>=1.5.0
  Downloading alembic-1.8.1-py3-none-any.whl (209 kB)
[K     |████████████████████████████████| 209 kB 52.7 MB/s 
Collecting colorlog
  Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)
Collecting cliff
  Downloading cliff-3.10.1-py3-none-any.whl (81 kB)
[K     |████████████████████████████████| 81 kB 9.7 MB/s 
[?25hCollecting cmaes>=0.8.2
  Downloading cmaes-0.8.2-py3-none-any.whl (15 kB)
Collecting Mako
  Downloading Mako-1.2.3-py3-none-any.whl (78 kB)
[K     |████████████████████████████████| 78 kB 7.1 MB/s 
Collecting pbr!=2.1.0,>=2.0.0
  Downloading pbr-5.10.0-py2.py3-none-any.whl (112 kB)
[K     |████████████████████████████████| 112 kB 54.6 MB/s 
[?25hCollecting cmd2>=1.0.0
  Downloading cmd2-2.4.2-py3-none-any.whl 

In [None]:
!pip install catboost

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting catboost
  Downloading catboost-1.1-cp37-none-manylinux1_x86_64.whl (76.8 MB)
[K     |████████████████████████████████| 76.8 MB 1.2 MB/s 
Installing collected packages: catboost
Successfully installed catboost-1.1


In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, StratifiedKFold
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
import optuna

# data

In [None]:
df = pd.read_csv('/content/sample_data/california_housing_train.csv')

In [None]:
df.head(2)

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-114.31,34.19,15.0,5612.0,1283.0,1015.0,472.0,1.4936,66900.0
1,-114.47,34.4,19.0,7650.0,1901.0,1129.0,463.0,1.82,80100.0


In [None]:
x_train = df
y_train = df.pop('median_house_value')

In [None]:
x_train.shape, y_train.shape

((17000, 8), (17000,))

# how

## xgb

파라미터 및 모델 정의 (점수 반환)

In [None]:
def objective_xgb(trial : optuna.Trial, x_train, y_train):
    param = {"n_estimators" : trial.suggest_int('n_estimators', 50,4000),
        'max_depth':trial.suggest_int('max_depth', 1, 16),
        'min_child_weight':trial.suggest_int('min_child_weight', 1, 300),
        'gamma':trial.suggest_float('gamma', 0.01, 1),
        'learning_rate': trial.suggest_float('learning_rate',1e-6,1.0),
        'colsample_bytree':trial.suggest_float('colsample_bytree',0.5, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-3, 1.0), #reg_lambda lambda시 오류
        'alpha': trial.suggest_float('alpha', 1e-3, 10.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.0001, 1.0),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'objective' : " multi:softmax", #classifier
        #'num_class': 5, #classifier
        # 'tree_method':'gpu_hist', # gpu 사용
        'early_stopping_rounds' : 100,     
        'random_state': 42}
        
    
    n_splits = trial.suggest_int('n_split', 3, 10)

    xgb = XGBRegressor(**param)

    total_mse = []
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True)
    for i, (train_idx, val_idx) in enumerate(skf.split(x_train, y_train)):
        x_train_se, y_train_se = x_train.iloc[train_idx, :], y_train.iloc[train_idx]
        x_val_se, y_val_se = x_train.iloc[val_idx, :], y_train.iloc[val_idx]
        model = xgb.fit(x_train_se, y_train_se, eval_set=[(x_val_se, y_val_se)], early_stopping_rounds=100, verbose=False)
        pre = model.predict(x_val_se)
        mse = mean_squared_error(y_val_se, pre)
        total_mse.append(mse)

    return np.mean(total_mse)

학습 정의(방향(direction), sampler)

In [None]:
study = optuna.create_study(study_name='xgb_param', direction='minimize', sampler=optuna.samplers.TPESampler(seed=42))

[32m[I 2022-10-07 03:24:50,841][0m A new study created in memory with name: xgb_param[0m


파라미터 검색

In [None]:
study.optimize(lambda trial : objective_xgb(trial, x_train, y_train), n_trials=10)
print(f'score : {study.best_value} \n params : {study.best_trial.params}')



[32m[I 2022-10-07 03:25:47,418][0m Trial 1 finished with value: 7359310595.8009405 and parameters: {'n_estimators': 666, 'max_depth': 2, 'min_child_weight': 260, 'gamma': 2, 'learning_rate': 0.00708101770538266, 'colsample_bytree': 0.1185260448662222, 'reg_lambda': 9.699128611767781, 'alpha': 8.324593965363416, 'subsample': 1.0}. Best is trial 1 with value: 7359310595.8009405.[0m




[32m[I 2022-10-07 03:26:13,076][0m Trial 2 finished with value: 4809878046.84637 and parameters: {'n_estimators': 2123, 'max_depth': 8, 'min_child_weight': 88, 'gamma': 2, 'learning_rate': 0.0013957991126597664, 'colsample_bytree': 0.3629301836816964, 'reg_lambda': 3.6642520710936233, 'alpha': 4.561243772186143, 'subsample': 0.6}. Best is trial 2 with value: 4809878046.84637.[0m




[32m[I 2022-10-07 03:26:19,251][0m Trial 3 finished with value: 3939104237.7285295 and parameters: {'n_estimators': 233, 'max_depth': 11, 'min_child_weight': 52, 'gamma': 1, 'learning_rate': 0.009488906486996079, 'colsample_bytree': 0.9690688297671034, 'reg_lambda': 8.084165083816496, 'alpha': 3.0468330779645334, 'subsample': 0.7}. Best is trial 3 with value: 3939104237.7285295.[0m




[32m[I 2022-10-07 03:26:30,082][0m Trial 4 finished with value: 3781079944.9876018 and parameters: {'n_estimators': 2006, 'max_depth': 2, 'min_child_weight': 273, 'gamma': 1, 'learning_rate': 0.006625560321255467, 'colsample_bytree': 0.3805399684804699, 'reg_lambda': 5.201160143756931, 'alpha': 5.467556083153454, 'subsample': 0.7}. Best is trial 4 with value: 3781079944.9876018.[0m




[32m[I 2022-10-07 03:27:01,270][0m Trial 5 finished with value: 4960225536.209042 and parameters: {'n_estimators': 3585, 'max_depth': 10, 'min_child_weight': 277, 'gamma': 1, 'learning_rate': 0.001960632641329033, 'colsample_bytree': 0.14070456001948428, 'reg_lambda': 3.25397797730188, 'alpha': 3.887384219605131, 'subsample': 0.7}. Best is trial 4 with value: 3781079944.9876018.[0m




[32m[I 2022-10-07 03:27:23,639][0m Trial 6 finished with value: 2572555869.786902 and parameters: {'n_estimators': 2194, 'max_depth': 4, 'min_child_weight': 241, 'gamma': 1, 'learning_rate': 0.009868882479068572, 'colsample_bytree': 0.7950202923669917, 'reg_lambda': 1.98795809966019, 'alpha': 0.056215649118900396, 'subsample': 0.6}. Best is trial 6 with value: 2572555869.786902.[0m




[32m[I 2022-10-07 03:27:27,683][0m Trial 7 finished with value: 5073633385.248916 and parameters: {'n_estimators': 342, 'max_depth': 7, 'min_child_weight': 35, 'gamma': 3, 'learning_rate': 0.006233357970148752, 'colsample_bytree': 0.39780822236738433, 'reg_lambda': 0.6365199445099503, 'alpha': 3.1105122348349066, 'subsample': 1.0}. Best is trial 6 with value: 2572555869.786902.[0m




[32m[I 2022-10-07 03:27:43,744][0m Trial 8 finished with value: 3110997210.312248 and parameters: {'n_estimators': 1915, 'max_depth': 3, 'min_child_weight': 214, 'gamma': 3, 'learning_rate': 0.005613210698497393, 'colsample_bytree': 0.7938704619591049, 'reg_lambda': 4.938462168047543, 'alpha': 5.227805560990559, 'subsample': 0.6}. Best is trial 6 with value: 2572555869.786902.[0m




[32m[I 2022-10-07 03:28:14,437][0m Trial 9 finished with value: 3010147350.216889 and parameters: {'n_estimators': 2564, 'max_depth': 6, 'min_child_weight': 153, 'gamma': 3, 'learning_rate': 0.002493672999259601, 'colsample_bytree': 0.4693446307320668, 'reg_lambda': 7.555755834291944, 'alpha': 2.288752856750733, 'subsample': 1.0}. Best is trial 6 with value: 2572555869.786902.[0m




[32m[I 2022-10-07 03:29:18,739][0m Trial 10 finished with value: 2705587970.7747507 and parameters: {'n_estimators': 3242, 'max_depth': 11, 'min_child_weight': 262, 'gamma': 3, 'learning_rate': 0.0018665140188014724, 'colsample_bytree': 0.90330309864098, 'reg_lambda': 5.393883076914592, 'alpha': 8.074594111485462, 'subsample': 0.6}. Best is trial 6 with value: 2572555869.786902.[0m


score : 2572555869.786902 
 params : {'n_estimators': 2194, 'max_depth': 4, 'min_child_weight': 241, 'gamma': 1, 'learning_rate': 0.009868882479068572, 'colsample_bytree': 0.7950202923669917, 'reg_lambda': 1.98795809966019, 'alpha': 0.056215649118900396, 'subsample': 0.6}


파라미터 중요도

In [None]:
optuna.visualization.plot_param_importances(study)

최적화

In [None]:
optuna.visualization.plot_optimization_history(study)

## lightgbm

In [None]:
import lightgbm
def objective_lgbm(trial : optuna.Trial, x_train, y_train):
    param = {
    "learning_rate": trial.suggest_float('learning_rate',1e-6,1.0),
    "n_estimators": trial.suggest_int('n_estimators', 50,4000),
    "reg_alpha": trial.suggest_float("reg_alpha", 1e-8, 3e-5),
    "reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 9e-2),
    "max_depth": trial.suggest_int("max_depth", 1, 20),
    "num_leaves": trial.suggest_int("num_leaves", 2, 256),
    "colsample_bytree": trial.suggest_float("colsample_bytree", 0.4, 1.0),
    "subsample": trial.suggest_float("subsample", 0.3, 1.0),
    "subsample_freq": trial.suggest_int("subsample_freq", 1, 10),
    "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
    "max_bin": trial.suggest_int("max_bin", 200, 500),
    "random_state": 42,
    #'objective' : " multi:softmax", #다중분류시
    #"device" : "gpu" #gpu 사용시
    }

    n_splits = trial.suggest_int('n_split', 3, 10)

    lgbm = LGBMRegressor(**param)

    total_mse = []
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True)
    for i, (train_idx, val_idx) in enumerate(skf.split(x_train, y_train)):
        x_train_se, y_train_se = x_train.iloc[train_idx, :], y_train.iloc[train_idx]
        x_val_se, y_val_se = x_train.iloc[val_idx, :], y_train.iloc[val_idx]
        model = lgbm.fit(x_train_se, y_train_se, eval_set=[(x_val_se, y_val_se)], callbacks=[lightgbm.callback.log_evaluation(0)])
        pre = model.predict(x_val_se)
        mse = mean_squared_error(y_val_se, pre)
        total_mse.append(mse)

    return np.mean(total_mse)

In [None]:
study = optuna.create_study(study_name='lgbm_param', direction='minimize', sampler=optuna.samplers.TPESampler(seed=42))

[32m[I 2022-10-07 04:09:38,423][0m A new study created in memory with name: lgbm_param[0m


In [None]:
study.optimize(lambda trial : objective_lgbm(trial, x_train, y_train), n_trials=10)
print(f'best score : {study.best_value}\nbest params :{study.best_trial.params}')

[32m[I 2022-10-07 04:09:45,551][0m Trial 0 finished with value: 2557924295.4915805 and parameters: {'learning_rate': 0.0037460266483547782, 'n_estimators': 3806, 'reg_alpha': 2.196249831492404e-05, 'reg_lambda': 0.05387926759114846, 'max_depth': 4, 'num_leaves': 41, 'colsample_bytree': 0.4348501673009197, 'subsample': 0.9063233020424546, 'subsample_freq': 7, 'min_child_samples': 72, 'max_bin': 206}. Best is trial 0 with value: 2557924295.4915805.[0m
[32m[I 2022-10-07 04:09:51,311][0m Trial 1 finished with value: 2249761577.889497 and parameters: {'learning_rate': 0.009699128611767782, 'n_estimators': 3338, 'reg_alpha': 6.378049929241502e-06, 'reg_lambda': 0.016364255230389386, 'max_depth': 4, 'num_leaves': 79, 'colsample_bytree': 0.7148538589793427, 'subsample': 0.602361513049481, 'subsample_freq': 3, 'min_child_samples': 63, 'max_bin': 241}. Best is trial 1 with value: 2249761577.889497.[0m
[32m[I 2022-10-07 04:09:54,145][0m Trial 2 finished with value: 3385591515.9349837 and 

best score : 1941781433.6643193
best params :{'learning_rate': 0.00887224021302069, 'n_estimators': 1915, 'reg_alpha': 3.596631435689668e-06, 'reg_lambda': 0.06419203371762168, 'max_depth': 16, 'num_leaves': 145, 'colsample_bytree': 0.8625803079727365, 'subsample': 0.6456569174550735, 'subsample_freq': 6, 'min_child_samples': 46, 'max_bin': 207}


In [None]:
optuna.visualization.plot_param_importances(study)

In [None]:
optuna.visualization.plot_optimization_history(study)

## catboost

In [None]:
def objective_cat(trial : optuna.Trial, x_train, y_train):
    param = {
    'learning_rate' : trial.suggest_float('learning_rate', 1e-6,1.0),
    'bagging_temperature' :trial.suggest_float('bagging_temperature', 0.01, 100.00),
    "n_estimators":trial.suggest_int("n_estimators", 50, 4000),
    "max_depth":trial.suggest_int("max_depth", 1, 20),
    'random_strength' :trial.suggest_int('random_strength', 0, 100),
    "colsample_bylevel":trial.suggest_float("colsample_bylevel", 0.4, 1.0),
    "l2_leaf_reg":trial.suggest_float("l2_leaf_reg",1e-8,3e-5),
    "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
    "max_bin": trial.suggest_int("max_bin", 200, 500),
    'od_type': trial.suggest_categorical('od_type', ['IncToDec', 'Iter']),
    "random_state":42,
  #'task_type':'GPU' #GPU사용
  }

    n_splits = trial.suggest_int('n_split', 3, 10)

    lgbm = LGBMRegressor(**param)

    total_mse = []
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True)
    for i, (train_idx, val_idx) in enumerate(skf.split(x_train, y_train)):
        x_train_se, y_train_se = x_train.iloc[train_idx, :], y_train.iloc[train_idx]
        x_val_se, y_val_se = x_train.iloc[val_idx, :], y_train.iloc[val_idx]
        model = lgbm.fit(x_train_se, y_train_se, eval_set=[(x_val_se, y_val_se)], callbacks=[lightgbm.callback.log_evaluation(0)])
        pre = model.predict(x_val_se)
        mse = mean_squared_error(y_val_se, pre)
        total_mse.append(mse)

    return mse

In [None]:
study = optuna.create_study(study_name='catboost_paeam', direction='minimize', sampler=optuna.samplers.TPESampler(seed=42))

[32m[I 2022-10-07 03:29:20,323][0m A new study created in memory with name: catboost_paeam[0m


In [None]:
study.optimize(lambda trial : objective_cat(trial, x_train, y_train), n_trials=10)
print(f'best score : {study.best_value}, params : {study.best_trial.params}')

[32m[I 2022-10-07 03:29:43,228][0m Trial 0 finished with value: 2188741045.3900137 and parameters: {'learning_rate': 0.11242458164232401, 'bagging_temperature': 95.07192349792751, 'n_estimators': 7334, 'max_depth': 10, 'random_strength': 15, 'colsample_bylevel': 0.49359671220172163, 'l2_leaf_reg': 1.7519275289243016e-06, 'min_child_samples': 88, 'max_bin': 380, 'od_type': 'IncToDec'}. Best is trial 0 with value: 2188741045.3900137.[0m
[32m[I 2022-10-07 03:29:44,844][0m Trial 1 finished with value: 2258849140.0643497 and parameters: {'learning_rate': 0.2909759646633821, 'bagging_temperature': 83.24593965363417, 'n_estimators': 2162, 'max_depth': 4, 'random_strength': 18, 'colsample_bylevel': 0.5825453457757226, 'l2_leaf_reg': 1.5747445384650815e-05, 'min_child_samples': 46, 'max_bin': 287, 'od_type': 'IncToDec'}. Best is trial 0 with value: 2188741045.3900137.[0m
[32m[I 2022-10-07 03:34:35,156][0m Trial 2 finished with value: 2328289051.530705 and parameters: {'learning_rate': 0

best score : 1840624171.5272157, params : {'learning_rate': 0.03245663895529201, 'bagging_temperature': 3.1526042768165574, 'n_estimators': 6382, 'max_depth': 6, 'random_strength': 51, 'colsample_bylevel': 0.9445398843556558, 'l2_leaf_reg': 7.486273952174759e-06, 'min_child_samples': 44, 'max_bin': 427, 'od_type': 'IncToDec'}
