## AutoML
- Optuna
    - 모델, 하이퍼파라미터 최적화 
    - Gradient boosting을 이용한 model 중 3가지 SOTA 모델을 Optuna에 입력으로 준다.
        - XGBoost
        - LightGBM
        - CatBoost

In [5]:
# Optuna
import optuna
import hiplot as hip

# Regressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

# cross validation
from sklearn.model_selection import cross_val_predict

# visualization
import matplotlib.pyplot as plt 
from sklearn.metrics import PredictionErrorDisplay

import pandas as pd 
import numpy as np

# 평가 지표
import evaluation

In [4]:
df = pd.read_csv('dataframes/selected_df_q2.csv')
features = df.loc[:, (df.columns != 'interest rate') & (df.columns != 'date')]
target = df['interest rate'].values

In [7]:
def objective(trial):
    model_name = trial.suggest_categorical('model',['XGBoost', 'LightGBM', 'CatBoost'])
    if model_name == 'XGBoost' :
        params = {
            'subsample': trial.suggest_float('subsample', 0.7, 0.8),
            'n_estimators': trial.suggest_int('n_estimators', 100, 150),
            'min_child_weight': trial.suggest_int('min_child_weight', 6, 8),
            'max_depth': trial.suggest_int('max_depth', 8, 10),
            'learning_rate': trial.suggest_float('learning_rate', 0.1, 0.15),
            'gamma': trial.suggest_float('gamma', 0.1, 0.2),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.3, 0.5),
            'random_state': 42
        }
        model = XGBRegressor(**params)
    elif model_name == 'LightGBM' :  
        params = {
            'num_leaves': trial.suggest_int('num_leaves', 20, 40),
            'n_estimators': trial.suggest_int('n_estimators', 100, 200),
            'min_child_weight': trial.suggest_int('min_child_weight', 5, 20),
            'max_depth': trial.suggest_int('max_depth', 10, 30),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.2),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.4, 0.8),
            'random_state': 42,
            'force_row_wise': True,
            'verbose': -1
        }
        model = LGBMRegressor(**params)
    elif model_name == 'CatBoost' :
        params = {
            'iterations': trial.suggest_int('iterations', 150, 200),
            'learning_rate': trial.suggest_float('learning_rate', 0.05, 0.15),
            'depth': trial.suggest_int('depth', 6, 8),
            'l2_leaf_reg': trial.suggest_int('l2_leaf_reg', 8, 9),
            'border_count': trial.suggest_int('border_count', 32, 255),
            'random_state': 42,
            'verbose': False
        }
        model = CatBoostRegressor(**params)
    


    y_pred = cross_val_predict(model, features, target, cv=500)
    return evaluation.rmse(y_pred, target)
    
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)
print(f'Best Model : {study.best_value}, Best Parameters : {study.best_params}')

trial = study.best_trial

print("Best Trial")
print(f"Value : {trial.value}")
print("Params : ")
for key, value in trial.params.items() :
    print(f'{key} : {value}')

[I 2024-10-10 17:10:39,927] A new study created in memory with name: no-name-d3f0bd1b-5c6a-4a19-a4ba-5f804e99c93f
[I 2024-10-10 17:12:06,150] Trial 0 finished with value: 0.5647563079479586 and parameters: {'model': 'CatBoost', 'iterations': 159, 'learning_rate': 0.0899121807073138, 'depth': 7, 'l2_leaf_reg': 8, 'border_count': 156}. Best is trial 0 with value: 0.5647563079479586.
[I 2024-10-10 17:12:34,703] Trial 1 finished with value: 0.5739359392444103 and parameters: {'model': 'LightGBM', 'num_leaves': 37, 'n_estimators': 153, 'min_child_weight': 19, 'max_depth': 27, 'learning_rate': 0.16306801598221493, 'colsample_bytree': 0.7839832946842481}. Best is trial 0 with value: 0.5647563079479586.
[I 2024-10-10 17:13:57,169] Trial 2 finished with value: 0.5598915692790322 and parameters: {'model': 'CatBoost', 'iterations': 192, 'learning_rate': 0.14358617093836878, 'depth': 6, 'l2_leaf_reg': 9, 'border_count': 212}. Best is trial 2 with value: 0.5598915692790322.
[I 2024-10-10 17:15:43,1

KeyboardInterrupt: 