In [3]:
import mlflow

# MLflow 서버 연결 설정 (로컬 실행시)
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("test_prediction")

2025/10/27 13:39:31 INFO mlflow.tracking.fluent: Experiment with name 'test_prediction' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/2', creation_time=1761539971160, experiment_id='2', last_update_time=1761539971160, lifecycle_stage='active', name='test_prediction', tags={}>

In [4]:
# ======================
# 7) LightGBM 학습 (불균형 보정 + 조기종료) - MLflow 연동
# ======================
# 테스트용 데이터 임의 생성 (입력)
import numpy as np
import pandas as pd
import os
import pickle
from lightgbm import LGBMClassifier
import lightgbm as lgb
from sklearn.metrics import roc_auc_score, average_precision_score, f1_score

# 테스트 데이터 입력
np.random.seed(123)
X_test = pd.DataFrame(np.random.randn(100, 5), columns=[f"feat{i}" for i in range(5)])
y_test = np.random.randint(0, 2, 100)
X_train = pd.DataFrame(np.random.randn(400, 5), columns=[f"feat{i}" for i in range(5)])
y_train = np.random.randint(0, 2, 400)
cat_cols = []  # 범주형 특성 없음
RANDOM_STATE = 123
ARTIFACT_DIR = "artifacts"
os.makedirs(ARTIFACT_DIR, exist_ok=True)

with mlflow.start_run():
    # 파라미터 로깅
    mlflow.log_params({
        "n_estimators": 1500,
        "learning_rate": 0.04,
        "num_leaves": 63,
        "subsample": 0.8,
        "colsample_bytree": 0.8,
        "reg_lambda": 1.0,
        "class_weight": "balanced",
        "test_size": 0.2,
        "random_state": 123
    })
    
    categorical_features = [c for c in cat_cols if c in X_train.columns]
    lgbm = LGBMClassifier(
        n_estimators=1500,
        learning_rate=0.04,
        num_leaves=63,
        subsample=0.8,
        colsample_bytree=0.8,
        reg_lambda=1.0,
        class_weight="balanced",
        random_state=RANDOM_STATE,
        n_jobs=-1
    )

    lgbm.fit(
        X_train, y_train,
        eval_set=[(X_test, y_test)],
        eval_metric="auc",
        callbacks=[lgb.early_stopping(stopping_rounds=80, verbose=True)],
        categorical_feature=categorical_features
    )
    
    # 모델 저장 (기존 방식)
    with open(os.path.join(ARTIFACT_DIR, "model_lgbm.pkl"), "wb") as f:
        pickle.dump(lgbm, f)
    
    # 8번 섹션의 평가 메트릭을 MLflow로 로깅
    proba = lgbm.predict_proba(X_test)[:, 1]
    pred_default = (proba >= 0.5).astype(int)
    
    roc = roc_auc_score(y_test, proba)
    precision = average_precision_score(y_test, proba)
    
    mlflow.log_metrics({
        "roc": roc,
        "precision": precision,
        "f1_score": f1_score(y_test, pred_default)
    })
    
    # 모델 로깅 (MLflow에 모델도 저장)
    mlflow.lightgbm.log_model(lgbm, "model")
    
    print(f"\n[MLflow] 실험 저장 완료!")



[LightGBM] [Info] Number of positive: 212, number of negative: 188
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000093 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 670
[LightGBM] [Info] Number of data points in the train set: 400, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
Training until validation scores don't improve for 80 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.474638	valid_0's binary_logloss: 0.693555





[MLflow] 실험 저장 완료!
🏃 View run rogue-toad-247 at: http://localhost:5000/#/experiments/2/runs/f6489ad316b442b0b1a3eb00335801c4
🧪 View experiment at: http://localhost:5000/#/experiments/2
