# **이변수 함수 최적화**

In [1]:
from hyperopt import hp
#입력 변수명과 입력값 검색 공간 설정
search_space = {'x': hp.quniform('x',-10,10,1), 'y': hp.quniform('y',-15,15,1)}

In [2]:
from hyperopt import STATUS_OK
#목적함수 생성
def objective_func(search_space):
    x = search_space['x']
    y = search_space['y']
    retval = x**2 - 20*y

    return retval

In [3]:
from hyperopt import fmin, tpe, Trials
import numpy as np
#입력 결과값을 저장한 객체값 생성
trial_val = Trials()
#목적함수의 최소값을 반환하는 최적 입력 변수값을 5번의 시도로 찾음
best_01 = fmin(fn=objective_func, space=search_space, algo=tpe.suggest, max_evals=5, trials=trial_val, rstate=np.random.default_rng(seed=0))
print('best:', best_01)

100%|██████████| 5/5 [00:00<00:00, 546.55trial/s, best loss: -224.0]
best: {'x': np.float64(-4.0), 'y': np.float64(12.0)}


In [4]:
trial_val = Trials()
#목적함수의 최소값을 반환하는 최적 입력 변수값을 20번의 시도로 찾음
best_02 = fmin(fn=objective_func, space=search_space, algo=tpe.suggest, max_evals=20, trials=trial_val, rstate=np.random.default_rng(seed=0))
print('best:', best_02)

100%|██████████| 20/20 [00:00<00:00, 743.62trial/s, best loss: -296.0]
best: {'x': np.float64(2.0), 'y': np.float64(15.0)}


In [5]:
print(trial_val.results)

[{'loss': -64.0, 'status': 'ok'}, {'loss': -184.0, 'status': 'ok'}, {'loss': 56.0, 'status': 'ok'}, {'loss': -224.0, 'status': 'ok'}, {'loss': 61.0, 'status': 'ok'}, {'loss': -296.0, 'status': 'ok'}, {'loss': -40.0, 'status': 'ok'}, {'loss': 281.0, 'status': 'ok'}, {'loss': 64.0, 'status': 'ok'}, {'loss': 100.0, 'status': 'ok'}, {'loss': 60.0, 'status': 'ok'}, {'loss': -39.0, 'status': 'ok'}, {'loss': 1.0, 'status': 'ok'}, {'loss': -164.0, 'status': 'ok'}, {'loss': 21.0, 'status': 'ok'}, {'loss': -56.0, 'status': 'ok'}, {'loss': 284.0, 'status': 'ok'}, {'loss': 176.0, 'status': 'ok'}, {'loss': -171.0, 'status': 'ok'}, {'loss': 0.0, 'status': 'ok'}]


In [6]:
len(trial_val.results)

20

In [7]:
print(trial_val.vals)

{'x': [np.float64(-6.0), np.float64(-4.0), np.float64(4.0), np.float64(-4.0), np.float64(9.0), np.float64(2.0), np.float64(10.0), np.float64(-9.0), np.float64(-8.0), np.float64(-0.0), np.float64(-0.0), np.float64(1.0), np.float64(9.0), np.float64(6.0), np.float64(9.0), np.float64(2.0), np.float64(-2.0), np.float64(-4.0), np.float64(7.0), np.float64(-0.0)], 'y': [np.float64(5.0), np.float64(10.0), np.float64(-2.0), np.float64(12.0), np.float64(1.0), np.float64(15.0), np.float64(7.0), np.float64(-10.0), np.float64(0.0), np.float64(-5.0), np.float64(-3.0), np.float64(2.0), np.float64(4.0), np.float64(10.0), np.float64(3.0), np.float64(3.0), np.float64(-14.0), np.float64(-8.0), np.float64(11.0), np.float64(-0.0)]}


In [8]:
import pandas as pd
losses = [loss_dict['loss'] for loss_dict in trial_val.results]
result_dict = trial_val.vals | {'losses': losses}
result_df = pd.DataFrame(result_dict)
result_df

Unnamed: 0,x,y,losses
0,-6.0,5.0,-64.0
1,-4.0,10.0,-184.0
2,4.0,-2.0,56.0
3,-4.0,12.0,-224.0
4,9.0,1.0,61.0
5,2.0,15.0,-296.0
6,10.0,7.0,-40.0
7,-9.0,-10.0,281.0
8,-8.0,0.0,64.0
9,-0.0,-5.0,100.0


# **XGBoost 하이퍼 파라미터 최적화**

In [9]:
#필요한 라이브러리 임포트 및 데이터 로딩
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

dataset = load_breast_cancer()
cancer_df = pd.DataFrame(data=dataset.data, columns=dataset.feature_names)
cancer_df['target'] = dataset.target
X_features = cancer_df.iloc[:,:-1]
y_label = cancer_df.iloc[:,-1]

#전체 데이터 중 80%는 학습용, 20%는 테스트용 데이터로 추출
X_train, X_test, y_train, y_test = train_test_split(X_features, y_label, test_size=0.2, random_state=156)
#학습용 데이터 중 90%는 학습용, 10%는 검증용 데이터로 추출
X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=156)

In [10]:
#하이퍼 파라미터 검색 공간 설정
from hyperopt import hp
xgb_search_space = {'max_depth': hp.quniform('max_depth',5,20,1),
                    'min_child_weight': hp.quniform('min_child_weight',1,2,1),
                    'learning_rate': hp.uniform('learning_rate',0.01,0.2),
                    'colsample_bytree': hp.uniform('colsample_bytree',0.5,1)}

In [11]:
#목적 함수 설정
from hyperopt import STATUS_OK
from sklearn.model_selection import cross_val_score
from xgboost import XGBClassifier

def objective_func(search_space):
    xgb_clf = XGBClassifier(n_estimators=100,
                            max_depth=int(search_space['max_depth']),
                            min_child_weight=int(search_space['min_child_weight']),
                            learning_rate=search_space['learning_rate'],
                            colsample_bytree=search_space['colsample_bytree'],
                            eval_metric='logloss')
    accuracy = cross_val_score(xgb_clf, X_train, y_train, scoring='accuracy', cv=3)

    return {'loss':-1 * np.mean(accuracy), 'status':STATUS_OK}

In [12]:
#최적 하이퍼 파라미터 도출
from hyperopt import fmin, tpe, Trials

trial_val = Trials()
best = fmin(fn=objective_func,
            space=xgb_search_space,
            algo=tpe.suggest,
            max_evals=50,
            trials=trial_val,
            rstate=np.random.default_rng(seed=9))
print('best:', best)

100%|██████████| 50/50 [00:26<00:00,  1.89trial/s, best loss: -0.9670616939700244]
best: {'colsample_bytree': np.float64(0.858999866296754), 'learning_rate': np.float64(0.17459146963933464), 'max_depth': np.float64(20.0), 'min_child_weight': np.float64(2.0)}


In [13]:
print(f'colsample_bytree: {best['colsample_bytree']:.5f}, learning_rate: {best['learning_rate']:.5f}, max_depth: {int(best['max_depth'])}, min_child_weight: {int(best['min_child_weight'])}')

colsample_bytree: 0.85900, learning_rate: 0.17459, max_depth: 20, min_child_weight: 2


In [14]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, f1_score, confusion_matrix

def get_clf_eval(y_test, pred, pred_proba):
    confusion = confusion_matrix(y_test, pred)
    accuracy = accuracy_score(y_test, pred)
    precision = precision_score(y_test, pred)
    recall = recall_score(y_test, pred)
    f1 = f1_score(y_test, pred)
    roc_auc = roc_auc_score(y_test, pred_proba)
    print('오차 행렬')
    print(confusion)
    print(f'정확도: {accuracy:.4f}, 정밀도: {precision:.4f}, 재현율: {recall:.4f}, F1: {f1:.4f}, AUC: {roc_auc:.4f}')

In [15]:
#도출한 최적 하이퍼 파라미터들을 XGBClassifier에 입력하여 성능 평가
xgb_wrapper = XGBClassifier(n_estimators=400,
                            learning_rate=round(best['learning_rate'], 5),
                            max_depth=int(best['max_depth']),
                            min_child_weight=int(best['min_child_weight']),
                            colsample_bytree=round(best['colsample_bytree'], 5),
                            early_stopping_rounds=50,
                            eval_metric='logloss')
evals = [(X_tr, y_tr), (X_val, y_val)]
xgb_wrapper.fit(X_tr, y_tr, eval_set=evals, verbose=True)

preds = xgb_wrapper.predict(X_test)
pred_proba = xgb_wrapper.predict_proba(X_test)[:,1]

get_clf_eval(y_test, preds, pred_proba)

[0]	validation_0-logloss:0.53178	validation_1-logloss:0.57774
[1]	validation_0-logloss:0.43693	validation_1-logloss:0.53104
[2]	validation_0-logloss:0.36375	validation_1-logloss:0.46939
[3]	validation_0-logloss:0.30808	validation_1-logloss:0.42846
[4]	validation_0-logloss:0.26254	validation_1-logloss:0.39423
[5]	validation_0-logloss:0.22780	validation_1-logloss:0.36739
[6]	validation_0-logloss:0.19887	validation_1-logloss:0.35126
[7]	validation_0-logloss:0.17442	validation_1-logloss:0.33290
[8]	validation_0-logloss:0.15590	validation_1-logloss:0.31895
[9]	validation_0-logloss:0.13757	validation_1-logloss:0.30455
[10]	validation_0-logloss:0.12339	validation_1-logloss:0.29727
[11]	validation_0-logloss:0.11020	validation_1-logloss:0.28950
[12]	validation_0-logloss:0.09900	validation_1-logloss:0.28643
[13]	validation_0-logloss:0.09022	validation_1-logloss:0.28405
[14]	validation_0-logloss:0.08217	validation_1-logloss:0.27688
[15]	validation_0-logloss:0.07616	validation_1-logloss:0.27643
[1