In [1]:
# Environment setup
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import warnings
warnings.filterwarnings('ignore')

print("✅ Environment configured")

✅ Environment configured


In [2]:
import joblib
import numpy as np
import onnxmltools
import onnx
import onnxruntime as ort
from xgboost import XGBClassifier
from skl2onnx.common.data_types import FloatTensorType

In [3]:
print(f"📋 Current PATH: {os.getcwd()}")

📋 Current PATH: /workspace


In [4]:
# === 사용자 설정 ===
MODEL_TEMPLATE = "/workspace/xgb_model_fold{}.pkl"
ONNX_TEMPLATE = "/workspace/xgb_model_fold{}.onnx"
SAMPLE_X_PATH = "/workspace/sample_X.npy"  # optional
N_FEATURES = 118  # ✅ 학습 시 사용한 feature 개수
# ====================

In [5]:
def convert_single_model(fold_idx: int):
    """단일 fold 모델을 ONNX로 변환"""
    model_path = MODEL_TEMPLATE.format(fold_idx)
    onnx_path = ONNX_TEMPLATE.format(fold_idx)

    if not os.path.exists(model_path):
        print(f"❌ {model_path} not found, skipping.")
        return None

    print(f"\n🔧 [Fold {fold_idx}] Loading {model_path}")
    model = joblib.load(model_path)

    # ✅ Booster-only 모델일 경우 wrapper 복원
    if hasattr(model, "n_features_in_"):
        n_features = model.n_features_in_
    else:
        print("⚠️ model is Booster only → wrapping with XGBClassifier.")
        booster = model
        model = XGBClassifier()
        model._Booster = booster
        model.n_classes_ = 2       # binary classification
        model.n_estimators = 1     # dummy
        n_features = N_FEATURES  # 직접 지정

    initial_type = [('input', FloatTensorType([None, n_features]))]
    print(f"💾 [Fold {fold_idx}] Converting to ONNX...")
    onnx_model = onnxmltools.convert_xgboost(model, initial_types=initial_type)
    onnxmltools.utils.save_model(onnx_model, onnx_path)
    print(f"✅ [Fold {fold_idx}] Saved to {onnx_path}")
    return onnx_path

In [6]:
def validate_onnx(fold_idx: int, sample_X: np.ndarray | None = None):
    """단일 fold 모델 검증"""
    onnx_path = ONNX_TEMPLATE.format(fold_idx)
    model_path = MODEL_TEMPLATE.format(fold_idx)
    if not os.path.exists(onnx_path):
        print(f"⚠️ [Fold {fold_idx}] ONNX file not found, skipping validation.")
        return

    print(f"🔍 [Fold {fold_idx}] Validating structure...")
    onnx_model = onnx.load(onnx_path)
    onnx.checker.check_model(onnx_model)
    print("✅ Structure check passed.")

    if sample_X is None:
        print("⚠️ No sample_X provided, skipping numeric comparison.")
        return

    sess = ort.InferenceSession(onnx_path, providers=["CPUExecutionProvider"])
    model = joblib.load(model_path)

    # Booster-only 모델도 처리
    if not hasattr(model, "predict"):
        print("⚠️ Restoring Booster model for comparison.")
        booster = model
        model = XGBClassifier()
        model._Booster = booster

    pred_onnx = sess.run(None, {"input": sample_X})[0]
    pred_xgb = model.predict(sample_X)

    close = np.allclose(pred_xgb, pred_onnx, atol=1e-5)
    print(f"✅ [Fold {fold_idx}] Output equivalence: {close}")
    if not close:
        print("⚠️ Slight numeric difference detected.")

In [7]:
sample_X = None
if os.path.exists(SAMPLE_X_PATH):
    sample_X = np.load(SAMPLE_X_PATH).astype(np.float32)
    print(f"📦 Loaded sample_X.npy: {sample_X.shape}")

# loop over folds
for i in range(1, 6):
    convert_single_model(i)
    validate_onnx(i, sample_X)


🔧 [Fold 1] Loading /workspace/xgb_model_fold1.pkl
⚠️ model is Booster only → wrapping with XGBClassifier.
💾 [Fold 1] Converting to ONNX...
✅ [Fold 1] Saved to /workspace/xgb_model_fold1.onnx
🔍 [Fold 1] Validating structure...
✅ Structure check passed.
⚠️ No sample_X provided, skipping numeric comparison.

🔧 [Fold 2] Loading /workspace/xgb_model_fold2.pkl
⚠️ model is Booster only → wrapping with XGBClassifier.
💾 [Fold 2] Converting to ONNX...
✅ [Fold 2] Saved to /workspace/xgb_model_fold2.onnx
🔍 [Fold 2] Validating structure...
✅ Structure check passed.
⚠️ No sample_X provided, skipping numeric comparison.

🔧 [Fold 3] Loading /workspace/xgb_model_fold3.pkl
⚠️ model is Booster only → wrapping with XGBClassifier.
💾 [Fold 3] Converting to ONNX...
✅ [Fold 3] Saved to /workspace/xgb_model_fold3.onnx
🔍 [Fold 3] Validating structure...
✅ Structure check passed.
⚠️ No sample_X provided, skipping numeric comparison.

🔧 [Fold 4] Loading /workspace/xgb_model_fold4.pkl
⚠️ model is Booster only → w