In [1]:
import pandas as pd
import xgboost as xgb
import mlflow
import mlflow.xgboost
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from mlflow.models import infer_signature

mlflow.set_tracking_uri("http://127.0.0.1:5000")
# 데이터 로딩
data = pd.read_csv('../data/dataset.csv')

In [2]:
# 타겟과 특성 분리
X = data.drop('Default', axis=1)
y = data['Default']

In [3]:
# 전처리를 위한 컬럼 구분
categorical_cols = ['State', 'BankState', 'NewExist', 'UrbanRural', 'RealEstate']
numerical_cols = ['DisbursementGross', 'GrAppv', 'daysterm']

In [4]:
# 전처리 파이프라인 구성 num은 수치형, cat은 범주형
preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numerical_cols),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
])

In [5]:
# 전처리 실행
X_processed = preprocessor.fit_transform(X)

In [6]:
# 훈련/테스트 분할
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

In [7]:
# XGBoost 모델 학습
model = xgb.XGBClassifier(eval_metric='logloss')
model.fit(X_train, y_train)

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [8]:
# 2. MLflow를 사용하여 모델 로깅 및 등록
mlflow.set_experiment("assignment2")

with mlflow.start_run(run_name="XGBoost"):

    # 모델 파라미터 기록
    mlflow.log_param(model.get_params())

    # 성능 지표 기록 
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]

    # 성능 평가 지표 계산
    acc = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_pred_proba)

    mlflow.log_metric("roc_auc", roc_auc)

    # 모델 로깅 및 Model Registry 등록
    signature = infer_signature(X_train, y_pred)
    mlflow.xgboost.log_model(model, "model", registered_model_name="LoanDefaultModel", signature=signature, input_example=X_test[:1])

    print(f"모델 등록 완료. ROC-AUC: {roc_auc:.4f}")

MlflowException: API request to http://127.0.0.1:5000/api/2.0/mlflow/experiments/get-by-name failed with exception HTTPConnectionPool(host='127.0.0.1', port=5000): Max retries exceeded with url: /api/2.0/mlflow/experiments/get-by-name?experiment_name=assignment2 (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x00000230466C1610>: Failed to establish a new connection: [WinError 10061] 대상 컴퓨터에서 연결을 거부했으므로 연결하지 못했습니다'))