In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import roc_auc_score
import xgboost as xgb
from hyperopt import hp, fmin, tpe, Trials, STATUS_OK
import mlflow
import mlflow.sklearn
mlflow.set_tracking_uri("http://127.0.0.1:5000")

# 데이터 로딩
data = pd.read_csv('../data/dataset.csv')

In [3]:
# 예측 타겟과 특성 분리
X = data.drop('Default', axis=1)
y = data['Default']

In [4]:
# 범주형과 수치형 컬럼 구분
categorical_cols = ['State', 'BankState', 'NewExist', 'UrbanRural', 'RealEstate']
numerical_cols = ['DisbursementGross', 'GrAppv', 'daysterm']


In [5]:
# 데이터 전처리 파이프라인 구성
preprocessor = ColumnTransformer(transformers=[
    ('num', StandardScaler(), numerical_cols),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
])

In [6]:
# 데이터 전처리 실행
X_processed = preprocessor.fit_transform(X)

In [7]:
# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

# 하이퍼파라미터 탐색 공간 설정
space = {
    'max_depth': hp.choice('max_depth', range(3, 10)),
    'learning_rate': hp.uniform('learning_rate', 0.01, 0.2),
    'n_estimators': hp.choice('n_estimators', range(50, 200)),
    'gamma': hp.uniform('gamma', 0, 5)
}

In [8]:
# 최적화를 위한 목적 함수 정의
def objective(params):
    # MLflow에 실험 이름 설정
    mlflow.set_experiment("assignment1")

    # 각 하이퍼파라미터 조합별 실험 시작
    with mlflow.start_run(nested=True):
        # XGBoost 모델 초기화 및 훈련
        model = xgb.XGBClassifier(eval_metric='logloss', use_label_encoder=False, **params)
        model.fit(X_train, y_train)

        # 예측 확률 및 ROC-AUC 계산
        probs = model.predict_proba(X_test)[:, 1]
        auc = roc_auc_score(y_test, probs)

        # MLflow에 파라미터 및 메트릭 로깅
        mlflow.log_params(params)
        mlflow.log_metric("roc_auc", auc)

        # 목적 함수 결과 반환 (손실 최소화)
        return {'loss': -auc, 'status': STATUS_OK}


In [9]:
# Hyperopt를 사용한 최적화 실행
trials = Trials()
best_params = fmin(
    fn=objective, # 목적 함수
    space=space,    # 탐색 공간
    algo=tpe.suggest,   # 어떻게 탐색할거냐. (최적화 알고리즘)
    max_evals=30,   # 시도 횟수
    trials=trials   # 결과 저장소
)

  0%|          | 0/30 [00:00<?, ?trial/s, best loss=?]

2025/10/10 17:17:41 INFO mlflow.tracking.fluent: Experiment with name 'assignment1' does not exist. Creating a new experiment.

Parameters: { "use_label_encoder" } are not used.




🏃 View run ambitious-doe-833 at: http://127.0.0.1:5000/#/experiments/1/runs/033af9502d5a406496f7107675748b8e

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1

  3%|▎         | 1/30 [00:02<01:26,  3.00s/trial, best loss: -0.9727997227997228]

Parameters: { "use_label_encoder" } are not used.




🏃 View run thoughtful-fowl-281 at: http://127.0.0.1:5000/#/experiments/1/runs/2b0a3cb0899b4f20a84ed0ac641f7800

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                     

  7%|▋         | 2/30 [00:03<00:40,  1.44s/trial, best loss: -0.9754479754479755]

Parameters: { "use_label_encoder" } are not used.




🏃 View run treasured-pug-219 at: http://127.0.0.1:5000/#/experiments/1/runs/e406c891acb841cfa88c20d5a6a74b1e

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                     

 10%|█         | 3/30 [00:03<00:24,  1.10trial/s, best loss: -0.9754479754479755]

Parameters: { "use_label_encoder" } are not used.




🏃 View run smiling-goose-394 at: http://127.0.0.1:5000/#/experiments/1/runs/f201e1b1d8704a9d9a6e23442d9ea926

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                     

 13%|█▎        | 4/30 [00:03<00:17,  1.48trial/s, best loss: -0.9754479754479755]

Parameters: { "use_label_encoder" } are not used.




🏃 View run delicate-lynx-687 at: http://127.0.0.1:5000/#/experiments/1/runs/cd542d0714e44fd392083b79ca652f1a

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                     

 17%|█▋        | 5/30 [00:04<00:13,  1.90trial/s, best loss: -0.9757573507573507]

Parameters: { "use_label_encoder" } are not used.




🏃 View run incongruous-newt-281 at: http://127.0.0.1:5000/#/experiments/1/runs/aed60971e88e4d43aecb6bf9a161e769

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                     

 20%|██        | 6/30 [00:04<00:10,  2.26trial/s, best loss: -0.9757573507573507]

Parameters: { "use_label_encoder" } are not used.




🏃 View run defiant-pug-626 at: http://127.0.0.1:5000/#/experiments/1/runs/4beaeacf00044b0580bbe9f1d8abcb37

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                     

 23%|██▎       | 7/30 [00:04<00:08,  2.59trial/s, best loss: -0.9757573507573507]

Parameters: { "use_label_encoder" } are not used.




🏃 View run delightful-frog-542 at: http://127.0.0.1:5000/#/experiments/1/runs/b69f35fdf56342cd9b2de531e1a3a58a

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                     

 27%|██▋       | 8/30 [00:05<00:07,  2.80trial/s, best loss: -0.9757573507573507]

Parameters: { "use_label_encoder" } are not used.




🏃 View run fortunate-conch-350 at: http://127.0.0.1:5000/#/experiments/1/runs/0bdbc3d46f604f9e9c226661774a8ae5

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                     

 30%|███       | 9/30 [00:05<00:07,  2.85trial/s, best loss: -0.9791604791604792]

Parameters: { "use_label_encoder" } are not used.




🏃 View run polite-calf-121 at: http://127.0.0.1:5000/#/experiments/1/runs/63323b7a5a7b4928b6d49892a09833f8

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                     

🏃 View run placid-frog-910 at: http://127.0.0.1:5000/#/experiments/1/runs/16dc08ef18fe4a93b04ebf69a6bb11c1

 33%|███▎      | 10/30 [00:05<00:06,  2.94trial/s, best loss: -0.9791604791604792]

Parameters: { "use_label_encoder" } are not used.




🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 37%|███▋      | 11/30 [00:05<00:05,  3.19trial/s, best loss: -0.9791604791604792]

Parameters: { "use_label_encoder" } are not used.




🏃 View run indecisive-shoat-885 at: http://127.0.0.1:5000/#/experiments/1/runs/277729522bbe4d129d5c4851b359081d

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 40%|████      | 12/30 [00:06<00:05,  3.22trial/s, best loss: -0.9791604791604792]

Parameters: { "use_label_encoder" } are not used.




🏃 View run overjoyed-shoat-582 at: http://127.0.0.1:5000/#/experiments/1/runs/43d5c38cfa84450cb7c315491f3c5dcc

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 43%|████▎     | 13/30 [00:06<00:05,  3.36trial/s, best loss: -0.9791604791604792]

Parameters: { "use_label_encoder" } are not used.




🏃 View run marvelous-vole-81 at: http://127.0.0.1:5000/#/experiments/1/runs/c7e7fb02e0454ef6bbea09ae350ac691

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

🏃 View run puzzled-colt-602 at: http://127.0.0.1:5000/#/experiments/1/runs/e89c9f331e0842ddb493a2d4ac67df08

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 50%|█████     | 15/30 [00:07<00:04,  3.44trial/s, best loss: -0.9791604791604792]

Parameters: { "use_label_encoder" } are not used.


Parameters: { "use_label_encoder" } are not used.




🏃 View run caring-wolf-276 at: http://127.0.0.1:5000/#/experiments/1/runs/17556b401a97432a9317e9788ef38d63

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

🏃 View run unequaled-shrimp-538 at: http://127.0.0.1:5000/#/experiments/1/runs/5495f29711f7439ba7be15b1400217f2

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 57%|█████▋    | 17/30 [00:07<00:03,  3.69trial/s, best loss: -0.9796802296802296]

Parameters: { "use_label_encoder" } are not used.


Parameters: { "use_label_encoder" } are not used.




🏃 View run dazzling-snake-985 at: http://127.0.0.1:5000/#/experiments/1/runs/fa1909e4b8fa41b592cf16d015cd83f9

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 60%|██████    | 18/30 [00:07<00:03,  3.69trial/s, best loss: -0.9796802296802296]

Parameters: { "use_label_encoder" } are not used.




🏃 View run peaceful-shrike-2 at: http://127.0.0.1:5000/#/experiments/1/runs/451286f9890547f883c6017be5f51568

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 63%|██████▎   | 19/30 [00:08<00:02,  3.69trial/s, best loss: -0.9796802296802296]

Parameters: { "use_label_encoder" } are not used.




🏃 View run overjoyed-fowl-978 at: http://127.0.0.1:5000/#/experiments/1/runs/2eea78682ff8461aa3a82c46b5110a0d

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 67%|██████▋   | 20/30 [00:08<00:02,  3.72trial/s, best loss: -0.9796802296802296]

Parameters: { "use_label_encoder" } are not used.




🏃 View run rogue-mink-865 at: http://127.0.0.1:5000/#/experiments/1/runs/93ab22a3dc074125aae2908f9ec5f743

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 70%|███████   | 21/30 [00:08<00:02,  3.34trial/s, best loss: -0.9796802296802296]

Parameters: { "use_label_encoder" } are not used.




🏃 View run chill-shrike-953 at: http://127.0.0.1:5000/#/experiments/1/runs/bee15fd53ac74a51a231c42d7b3946b3

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 73%|███████▎  | 22/30 [00:09<00:02,  3.45trial/s, best loss: -0.9797049797049796]

Parameters: { "use_label_encoder" } are not used.




🏃 View run marvelous-panda-620 at: http://127.0.0.1:5000/#/experiments/1/runs/2406cad4092f4523a1f19bf5831c6727

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

🏃 View run secretive-wasp-890 at: http://127.0.0.1:5000/#/experiments/1/runs/2e2ee2c9afcb4bb1ae3eafc986a6a3f9

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 80%|████████  | 24/30 [00:09<00:01,  3.50trial/s, best loss: -0.9797049797049796]

Parameters: { "use_label_encoder" } are not used.


Parameters: { "use_label_encoder" } are not used.




🏃 View run receptive-bass-584 at: http://127.0.0.1:5000/#/experiments/1/runs/00db1d2783ec478e930ca7e30dbf09b0

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 83%|████████▎ | 25/30 [00:09<00:01,  3.47trial/s, best loss: -0.9797049797049796]

Parameters: { "use_label_encoder" } are not used.




🏃 View run traveling-loon-474 at: http://127.0.0.1:5000/#/experiments/1/runs/a3045c69dcad4cfe9ce439f4cbbe5ab3

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

🏃 View run beautiful-squid-705 at: http://127.0.0.1:5000/#/experiments/1/runs/3eddc7a8b19b40cd90187f0fec036d0b

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 90%|█████████ | 27/30 [00:10<00:00,  3.71trial/s, best loss: -0.9797049797049796]

Parameters: { "use_label_encoder" } are not used.




🏃 View run resilient-worm-491 at: http://127.0.0.1:5000/#/experiments/1/runs/074fe10d73ca4b4fbed47fd5921f6830

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 93%|█████████▎| 28/30 [00:10<00:00,  3.72trial/s, best loss: -0.9797049797049796]

Parameters: { "use_label_encoder" } are not used.


Parameters: { "use_label_encoder" } are not used.




🏃 View run efficient-cod-912 at: http://127.0.0.1:5000/#/experiments/1/runs/b9900762b3e945c6aeace268e6dd259a

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 97%|█████████▋| 29/30 [00:11<00:00,  3.47trial/s, best loss: -0.9797049797049796]

Parameters: { "use_label_encoder" } are not used.




🏃 View run clean-toad-645 at: http://127.0.0.1:5000/#/experiments/1/runs/d0d766c2d9fa4ff8b3a67340125cd862

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

100%|██████████| 30/30 [00:11<00:00,  2.65trial/s, best loss: -0.9797049797049796]
