In [2]:
# 📦 라이브러리
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

# 📂 데이터 불러오기
train_df = pd.read_csv('../../3_Post-Feature Engineering/YANG/train_data_with_smote.csv')
val_df = pd.read_csv('../../3_Post-Feature Engineering/YANG/val_data.csv')

# 🎯 X, y 분리
X_train = train_df.drop(columns='is_defaulted')
y_train = train_df['is_defaulted']
X_val = val_df.drop(columns='is_defaulted')
y_val = val_df['is_defaulted']

# ✅ 1. Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# 📈 예측 및 평가 (Random Forest)
rf_preds = rf_model.predict(X_val)
rf_probs = rf_model.predict_proba(X_val)[:, 1]
print("📊 [Random Forest]")
print(confusion_matrix(y_val, rf_preds))
print(classification_report(y_val, rf_preds))
print("ROC-AUC:", roc_auc_score(y_val, rf_probs))

# ✅ 2. LightGBM
lgb_model = LGBMClassifier(random_state=42)
lgb_model.fit(X_train, y_train)

# 📈 예측 및 평가 (LightGBM)
lgb_preds = lgb_model.predict(X_val)
lgb_probs = lgb_model.predict_proba(X_val)[:, 1]
print("\n📊 [LightGBM]")
print(confusion_matrix(y_val, lgb_preds))
print(classification_report(y_val, lgb_preds))
print("ROC-AUC:", roc_auc_score(y_val, lgb_probs))


📊 [Random Forest]
[[3411  162]
 [  45   33]]
              precision    recall  f1-score   support

           0       0.99      0.95      0.97      3573
           1       0.17      0.42      0.24        78

    accuracy                           0.94      3651
   macro avg       0.58      0.69      0.61      3651
weighted avg       0.97      0.94      0.95      3651

ROC-AUC: 0.8758674388397311
[LightGBM] [Info] Number of positive: 16901, number of negative: 16901
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000953 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4080
[LightGBM] [Info] Number of data points in the train set: 33802, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000

📊 [LightGBM]
[[3106  467]
 [  34   44]]
              precision    recall  f1-score   support

           0       0.99      0.87      0.93      3573
           