## 1. 환경 설정 및 라이브러리 Import

In [1]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.5.0-py3-none-any.whl (400 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.9/400.9 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.9.0 optuna-4.5.0


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## 데이터 불러오기
- 원본 데이터가 아닌 사전에 전처리해둔 데이터셋 로드

In [3]:
# -*- coding: utf-8 -*-
# =========================================================
# CTR 예측 — 통합 피처엔지니어링 + Optuna CatBoost K-Fold
# (심야=자정~05시, 누수방지 KFold Target Encoding 포함)
# =========================================================

# (선택) Colab Drive mount


# 기본
import os, gc, random, warnings, pickle, itertools
warnings.filterwarnings('ignore')
from datetime import datetime

# 데이터/수학
import numpy as np
import pandas as pd

# 모델/검증/메트릭
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import average_precision_score

import optuna

# ----------------------------------
# 설정
# ----------------------------------
CFG = {
    'SEED': 42,
    'N_FOLDS': 5,
    'CATBOOST_ITERATIONS': 3000,
    'CATBOOST_EARLY_STOPPING': 100,
    'N_TRIALS': 40,                 # Optuna 탐색 횟수 (필요시 늘리기)
    'DOWNSAMPLE_RATIO': 2,          # clicked=1 대비 0의 배수 (2= 1:2)
}

random.seed(CFG['SEED'])
np.random.seed(CFG['SEED'])
os.environ['PYTHONHASHSEED'] = str(CFG['SEED'])

# 경로
TRAIN_PATH = "/content/drive/MyDrive/open/train.parquet"
TEST_PATH  = "/content/drive/MyDrive/open/test.parquet"
SAMPLE_SUB = "/content/drive/MyDrive/open/sample_submission.csv"
FEATURE_SAVE_DIR = "/content/drive/Othercomputers/내 Mac/Python/데이콘/현재 진행중/토스/outputs_test/features_postTE"
os.makedirs(FEATURE_SAVE_DIR, exist_ok=True)

train_features_pkl = os.path.join(FEATURE_SAVE_DIR, "train_features_2.pkl")
test_features_pkl  = os.path.join(FEATURE_SAVE_DIR, "test_features_2.pkl")

target_col = "clicked"
seq_col = "seq"

# # =========================================================
# # 1) 데이터 로드 & 다운샘플 (불균형 완화)
# # =========================================================
# all_train = pd.read_parquet(TRAIN_PATH, engine="pyarrow")
# test_base = pd.read_parquet(TEST_PATH, engine="pyarrow").drop(columns=['ID'])

# print("Base Train:", all_train.shape, " Test:", test_base.shape)

# clicked_1 = all_train[all_train[target_col] == 1]
# clicked_0 = all_train[all_train[target_col] == 0].sample(
#     n=len(clicked_1) * CFG['DOWNSAMPLE_RATIO'], random_state=CFG['SEED']
# )
# train_base = pd.concat([clicked_1, clicked_0], axis=0).sample(frac=1, random_state=CFG['SEED']).reset_index(drop=True)
# del clicked_1, clicked_0; gc.collect()

# print("Downsampled Train:", train_base.shape,
#       "| clicked=0:", (train_base[target_col]==0).sum(),
#       "| clicked=1:", (train_base[target_col]==1).sum())


# =========================================================
# 8) 평가지표: 0.5*AP + 0.5*(1 - WLL)
# =========================================================
def calculate_competition_score(y_true, y_pred_proba):
    ap = average_precision_score(y_true, y_pred_proba)
    pos_weight = len(y_true) / (2 * np.sum(y_true))
    neg_weight = len(y_true) / (2 * (len(y_true) - np.sum(y_true)))
    ypp = np.clip(y_pred_proba, 1e-7, 1-1e-7)
    log_loss_pos = -np.mean(y_true * np.log(ypp)) * pos_weight
    log_loss_neg = -np.mean((1 - y_true) * np.log(1 - ypp)) * neg_weight
    wll = log_loss_pos + log_loss_neg
    final_score = 0.5 * ap + 0.5 * (1 - wll)
    return {'ap': ap, 'wll': wll, 'final_score': final_score}

# =========================================================
# 9) (Revised) 저장한 PKL 피처 로드만 해서 진행
# =========================================================
print("\n=== Load prebuilt features (PKL) ===")
if not (os.path.exists(train_features_pkl) and os.path.exists(test_features_pkl)):
    raise FileNotFoundError(
        f"PKL이 없습니다.\n- {train_features_pkl}\n- {test_features_pkl}\n"
        "먼저 피처를 생성/저장한 뒤 다시 실행하세요."
    )

with open(train_features_pkl, "rb") as f:
    train_features = pickle.load(f)
with open(test_features_pkl, "rb") as f:
    test_features = pickle.load(f)

print(f"Loaded train_features: {train_features.shape} | test_features: {test_features.shape}")

# CatBoost: 카테고리형 인덱스 (pandas category dtype 기준)
target_col = "clicked"  # 혹시 위에서 바뀌었을 수 있으니 다시 명시
feature_cols = [c for c in train_features.columns if c != target_col]
cat_feature_indices = [
    i for i, col in enumerate(feature_cols)
    if str(train_features[col].dtype) == "category"
]
print("Cat features (idx):", cat_feature_indices)

# Loaded train_features: (612537, 175) | test_features: (1527298, 175)
# Cat features (idx): [154, 155, 156, 157, 158, 159, 160]


=== Load prebuilt features (PKL) ===
Loaded train_features: (612537, 180) | test_features: (1527298, 180)
Cat features (idx): [154, 155, 158, 159, 160, 161, 162, 163]


## Optuna기반 Kfold 학습 함수

In [4]:
import numpy as np
import pandas as pd

# 공통 드랍할 컬럼
drop_cols = [
    "hour_bucket", "hour_bucket_simple", "dow_hour",
    "day_of_week__inventory_id", "gender__age_group",
    "is_weekend__hour_bucket", "hour_bucket__inventory_id"
]

# test에서만 드랍할 컬럼
drop_test_only = ["ID"]

# inf/NaN 처리 대상 (history 로그변환 컬럼)
log_hist_cols = [
    "history_a_2_log1p", "history_a_4_log1p",
    "history_a_5_log1p", "history_a_6_log1p",
    "history_a_7_log1p"
]

def clean_features(df: pd.DataFrame, is_test=False):
    out = df.copy()

    # 공통 드랍
    for c in drop_cols:
        if c in out.columns:
            out.drop(columns=c, inplace=True)

    # test 전용 드랍
    if is_test:
        for c in drop_test_only:
            if c in out.columns:
                out.drop(columns=c, inplace=True)

    # inf -> NaN -> 0
    for c in log_hist_cols:
        if c in out.columns:
            out[c] = out[c].replace([np.inf, -np.inf], np.nan).fillna(0)

    return out

# 실제 적용
train_features_clean = clean_features(train_features, is_test=False)
test_features_clean  = clean_features(test_features, is_test=True)

print("Train shape:", train_features_clean.shape)
print("Test shape:", test_features_clean.shape)

Train shape: (612537, 173)
Test shape: (1527298, 172)


In [5]:
# =========================================================
# XGBoost 버전: Optuna 기반 K-Fold XGB 학습/재학습
# - CatBoost 코드 인터페이스를 최대한 유지
# - 범주형(object/category) → category codes로 안전 변환
# - GPU 사용 시 tree_method='gpu_hist', predictor='gpu_predictor'
# - 탐색/최종학습 평가지표 로직(calculate_competition_score) 그대로 사용
# =========================================================

import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
import optuna
from xgboost import XGBClassifier

def _prep_xgb_features(df: pd.DataFrame, cols: list) -> pd.DataFrame:
    """object/category → codes, 수치형은 numeric으로 강제 캐스팅"""
    X = df[cols].copy()
    for c in cols:
        dt = X[c].dtype
        if str(dt) == 'object' or pd.api.types.is_categorical_dtype(dt):
            X[c] = X[c].astype('category').cat.codes.astype('int32')  # NaN→-1
        else:
            X[c] = pd.to_numeric(X[c], errors='coerce')
    return X

def _scale_pos_weight(y: np.ndarray) -> float:
    pos = float(np.sum(y))
    neg = float(len(y) - pos)
    return (neg / pos) if pos > 0 else 1.0

def catboost_kfold_optuna(  # 이름 유지 (외부 호출부 변경 없이 교체 사용)
    train_df: pd.DataFrame,
    feature_cols: list,
    target_col: str,
    cat_feature_indices: list = None,   # XGB에선 사용 안 함(호환 목적의 더미 인자)
    n_folds: int = 5,
    n_trials: int = 40,
    use_gpu: bool = True
):
    # ===== 데이터 준비 =====
    X_raw = train_df[feature_cols]
    y = train_df[target_col].astype(int).values
    X = _prep_xgb_features(train_df, feature_cols)

    # 공통 세팅
    random_state = CFG.get('SEED', 42)
    n_estimators_default = CFG.get('CATBOOST_ITERATIONS', 2000)  # 기존 키 재활용
    early_stopping_rounds = CFG.get('CATBOOST_EARLY_STOPPING', 100)
    spw_global = _scale_pos_weight(y)

    # ===== Optuna objective: 기존 final_score(= 0.5*AP + 0.5*(1-WLL)) 최대화 =====
    def objective(trial):
        params = {
            # 핵심 트리/정규화
            "max_depth": trial.suggest_int("max_depth", 4, 10),
            "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
            "min_child_weight": trial.suggest_float("min_child_weight", 1e-2, 10.0, log=True),
            "subsample": trial.suggest_float("subsample", 0.5, 1.0),
            "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
            "gamma": trial.suggest_float("gamma", 0.0, 10.0),
            "reg_alpha": trial.suggest_float("reg_alpha", 1e-8, 10.0, log=True),
            "reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 10.0, log=True),

            # 고정/일반 설정
            "n_estimators": n_estimators_default,
            "objective": "binary:logistic",
            "eval_metric": "logloss",
            "random_state": random_state,
            "n_jobs": -1,
            "tree_method": "gpu_hist" if use_gpu else "hist",
            "predictor": "gpu_predictor" if use_gpu else "auto",
            "verbosity": 0,
            "early_stopping_rounds":early_stopping_rounds,
            # 불균형 대응(기존 Auto class weights 유사)
            "scale_pos_weight": spw_global,
        }

        skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=random_state)
        scores = []
        for tr_idx, va_idx in skf.split(X, y):
            X_tr, X_va = X.iloc[tr_idx], X.iloc[va_idx]
            y_tr, y_va = y[tr_idx], y[va_idx]

            model = XGBClassifier(**params)
            model.fit(
                X_tr, y_tr,
                eval_set=[(X_va, y_va)],

                verbose=False
            )

            pred = model.predict_proba(X_va)[:, 1]
            m = calculate_competition_score(y_va, pred)  # 기존 함수 그대로 사용
            scores.append(m['final_score'])

        return float(np.mean(scores))

    # ===== Optuna 실행 =====
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=n_trials)

    print("\n=== Optuna Best Params (XGB) ===")
    print(study.best_trial.params)
    print(f"Best CV Final Score: {study.best_value:.6f}")

    # ===== 베스트 파라미터로 K-Fold 재학습 =====
    best = study.best_trial.params.copy()
    final_params = {
        # 탐색된 값 반영
        "max_depth": best.get("max_depth"),
        "learning_rate": best.get("learning_rate"),
        "min_child_weight": best.get("min_child_weight"),
        "subsample": best.get("subsample"),
        "colsample_bytree": best.get("colsample_bytree"),
        "gamma": best.get("gamma"),
        "reg_alpha": best.get("reg_alpha"),
        "reg_lambda": best.get("reg_lambda"),

        # 고정/일반
        "n_estimators": n_estimators_default,
        "objective": "binary:logistic",
        "eval_metric": "logloss",
        "random_state": random_state,
        "n_jobs": -1,
        "tree_method": "gpu_hist" if use_gpu else "hist",
        "predictor": "gpu_predictor" if use_gpu else "auto",
        "verbosity": 1,
        'early_stopping_rounds':early_stopping_rounds,
        # 불균형 대응
        "scale_pos_weight": spw_global,
    }

    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=random_state)
    fold_models, fold_scores = [], []
    oof_pred = np.zeros(len(X), dtype=np.float32)

    for fold, (tr_idx, va_idx) in enumerate(skf.split(X, y), 1):
        print(f"\n{'='*18} FOLD {fold}/{n_folds} {'='*18}")
        X_tr, X_va = X.iloc[tr_idx], X.iloc[va_idx]
        y_tr, y_va = y[tr_idx], y[va_idx]

        model = XGBClassifier(**final_params)
        model.fit(
            X_tr, y_tr,
            eval_set=[(X_va, y_va)],

            verbose=True
        )

        pred = model.predict_proba(X_va)[:, 1]
        oof_pred[va_idx] = pred

        m = calculate_competition_score(y_va, pred)
        print(f"Fold {fold} Final: {m['final_score']:.6f} | AP: {m['ap']:.6f} | WLL: {m['wll']:.6f}")

        fold_scores.append(m['final_score'])
        fold_models.append(model)

    oof_metrics = calculate_competition_score(y, oof_pred)
    print("\n" + "="*70)
    print("K-Fold Final Summary (XGB)")
    print("="*70)
    print(f"Mean Final: {np.mean(fold_scores):.6f} ± {np.std(fold_scores):.6f}")
    print(f"OOF  Final: {oof_metrics['final_score']:.6f} | AP: {oof_metrics['ap']:.6f} | WLL: {oof_metrics['wll']:.6f}")

    return fold_models, {
        'mean_final_score': float(np.mean(fold_scores)),
        'oof_score': float(oof_metrics['final_score']),
        'best_params': final_params
    }

## Optuna 학습 후 최적하이퍼파라미터로 최종 예측

In [6]:
# # =========================================================
# # 실행: Optuna 기반 K-Fold XGBoost 학습
# #  - 위에서 정의한 catboost_kfold_optuna(XGB 버전) 호출
# #  - train_features / test_features / feature_cols / CFG / target_col / SAMPLE_SUB 가 준비되어 있다고 가정
# # =========================================================
# print("\n=== Optuna 기반 K-Fold XGBoost 학습 시작 ===")
# opt_models, opt_results = catboost_kfold_optuna(
#     train_df=train_features,
#     feature_cols=feature_cols,
#     target_col=target_col,
#     cat_feature_indices=None,          # XGB에서는 미사용(호환용)
#     n_folds=CFG['N_FOLDS'],
#     n_trials=CFG['N_TRIALS'],
#     use_gpu=True                       # GPU 없으면 False
# )

# # =========================================================
# # 추론 & 제출 저장 (평균 앙상블)
# #  - 테스트에도 학습과 동일한 전처리(_prep_xgb_features) 적용
# # =========================================================
# print("\n=== Test Inference (K-Fold Avg, XGB) ===")
# X_test = _prep_xgb_features(test_features, feature_cols)
# preds = []
# for i, m in enumerate(opt_models, 1):
#     p = m.predict_proba(X_test)[:, 1]
#     preds.append(p)
#     print(f"  fold {i} done.")
# pred_test = np.mean(preds, axis=0)

# # 제출 저장
# submit = pd.read_csv(SAMPLE_SUB)
# if 'clicked' not in submit.columns:
#     submit['clicked'] = 0.5
# submit['clicked'] = np.clip(pred_test, 1e-7, 1-1e-7)
# ts = datetime.now().strftime("%Y%m%d_%H%M%S")
# save_path = f"/content/drive/MyDrive/open/submit_xgb_optuna_fromPKL_{ts}.csv"
# os.makedirs(os.path.dirname(save_path), exist_ok=True)
# submit.to_csv(save_path, index=False)
# print(f"\n[Saved] Submission -> {save_path}")

# # 모델 저장 (XGB는 json/ubj 등으로 저장 가능)
# MODEL_DIR = "/content/drive/MyDrive/open/models/xgb_optuna_fromPKL55"
# os.makedirs(MODEL_DIR, exist_ok=True)
# for i, m in enumerate(opt_models, 1):
#     m.save_model(os.path.join(MODEL_DIR, f"xgb_opt_fromPKL_fold{i}.json"))
# print(f"[Saved] {len(opt_models)} fold models -> {MODEL_DIR}")
# print("\nDone.")

## optuna로 찾은 값으로 고정 후 시드앙상블 기반 Fold학습 및 추론

[I 2025-09-11 17:46:17,820] Trial 33 finished with value: 0.508017437920223 and parameters: {'max_depth': 7, 'learning_rate': 0.012721949315056357, 'min_child_weight': 3.168389617826618, 'subsample': 0.7630521320307035, 'colsample_bytree': 0.6270674196394964, 'gamma': 3.2835272786651024, 'reg_alpha': 7.217060829745139, 'reg_lambda': 0.09191632812265578}. Best is trial 33 with value: 0.508017437920223.

In [None]:
# =========================================================
# K-Fold HistGradientBoosting (Fixed params + Seed Ensemble)
# =========================================================
import numpy as np, pandas as pd, os
from datetime import datetime
from tqdm.auto import tqdm
from sklearn.experimental import enable_hist_gradient_boosting  # noqa: F401
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.model_selection import StratifiedKFold
import joblib

# ---------- 유틸 ----------
def _prep_hgb_features(df: pd.DataFrame, cols: list) -> pd.DataFrame:
    """범주형→codes, 수치형은 numeric으로 강제 + inf/이상치 방어"""
    X = df[cols].copy()
    for c in cols:
        dt = X[c].dtype
        if str(dt) == "object" or pd.api.types.is_categorical_dtype(dt):
            X[c] = X[c].astype("category").cat.codes.astype("int32")  # NaN→-1
        else:
            X[c] = pd.to_numeric(X[c], errors="coerce")
            X[c].replace([np.inf, -np.inf], np.nan, inplace=True)
            X[c] = X[c].clip(-1e12, 1e12)
    return X

def _check_sentry(name, X: pd.DataFrame):
    has_inf = np.isinf(X.to_numpy()).any()
    all_nan = X.isna().all().sum()
    print(f"[SENTRY] {name}: has_inf={has_inf} | all-NaN cols={all_nan}")

def _class_weights(y: np.ndarray):
    """0/1 비율로 샘플가중치 산출 (양/음 합 0.5씩)"""
    pos_ratio = y.mean()
    w_pos = 0.5 / max(pos_ratio, 1e-9)
    w_neg = 0.5 / max(1 - pos_ratio, 1e-9)
    return w_pos, w_neg

# ---------- 사용 컬럼 교집합 ----------
ban_cols = {target_col, 'ID'}
cols_train = set(train_features_clean.columns) - ban_cols
cols_test  = set(test_features_clean.columns)  - ban_cols
feature_cols = sorted(list(cols_train & cols_test))
print(f"[OK] final feature_cols = {len(feature_cols)}")

# ---------- 입력 준비 ----------
print("\n=== K-Fold HGB (Fixed params + Seed Ensemble) 시작 ===")
X = _prep_hgb_features(train_features_clean, feature_cols)
y = train_features_clean[target_col].astype(int).values
X_test = _prep_hgb_features(test_features_clean, feature_cols)

# 결측은 HGB가 자체 처리(빈 노드) 가능하지만, 안전하게 NaN 유지해도 됨
_check_sentry("X_train", X)
_check_sentry("X_test", X_test)

# ---------- 고정 하이퍼파라미터 ----------
HGB_PARAMS = dict(
    loss="log_loss",
    learning_rate=0.05,
    max_iter=720,
    max_leaf_nodes=175,
    max_depth=10,
    min_samples_leaf=233,
    l2_regularization=0.005,
    max_bins=201,
    early_stopping=True,
    validation_fraction=0.05,
    n_iter_no_change=37,
    random_state=42
)

SEEDS = [0, 1, 2, 3, 4]

all_models, all_preds = [], []
oof_pred_total = np.zeros(len(X), dtype=np.float32)

for seed in SEEDS:
    print(f"\n##### Seed {seed} #####")
    params = HGB_PARAMS.copy()
    params["random_state"] = seed

    skf = StratifiedKFold(n_splits=CFG['N_FOLDS'], shuffle=True, random_state=seed)
    fold_models, fold_scores = [], []
    oof_pred = np.zeros(len(X), dtype=np.float32)

    for fold, (tr_idx, va_idx) in enumerate(skf.split(X, y), 1):
        print(f"\n{'='*18} SEED {seed} | FOLD {fold}/{CFG['N_FOLDS']} {'='*18}")

        X_tr, X_va = X.iloc[tr_idx], X.iloc[va_idx]
        y_tr, y_va = y[tr_idx], y[va_idx]

        # 클래스 불균형 샘플가중치
        w_pos, w_neg = _class_weights(y_tr)
        sw_tr = np.where(y_tr == 1, w_pos, w_neg).astype("float32")

        model = HistGradientBoostingClassifier(**params)
        model.fit(X_tr, y_tr, sample_weight=sw_tr)

        pred = model.predict_proba(X_va)[:, 1]
        oof_pred[va_idx] = pred

        m = calculate_competition_score(y_va, pred)
        print(f"Fold {fold} Final: {m['final_score']:.6f} | AP: {m['ap']:.6f} | WLL: {m['wll']:.6f}")

        fold_scores.append(m['final_score'])
        fold_models.append(model)

    # 시드별 OOF 성능 요약
    oof_metrics = calculate_competition_score(y, oof_pred)
    print("\n" + "="*70)
    print(f"Seed {seed} Summary (HGB Fixed params)")
    print("="*70)
    print(f"Mean Final: {np.mean(fold_scores):.6f} ± {np.std(fold_scores):.6f}")
    print(f"OOF  Final: {oof_metrics['final_score']:.6f} | AP: {oof_metrics['ap']:.6f} | WLL: {oof_metrics['wll']:.6f}")

    all_models.append(fold_models)
    oof_pred_total += oof_pred / len(SEEDS)  # 시드 평균 반영

# =========================================================
# Test Inference (Seed × Fold 평균) + 저장
# =========================================================
print("\n=== Test Inference (K-Fold Avg × Seed Avg, HGB Fixed) ===")
preds = []
for s_idx, fold_models in enumerate(all_models):
    for f_idx, m in enumerate(fold_models, 1):
        p = m.predict_proba(X_test)[:, 1]
        preds.append(p)
        print(f"  seed {SEEDS[s_idx]} fold {f_idx} done.")

pred_test = np.mean(preds, axis=0)

# 제출
submit = pd.read_csv(SAMPLE_SUB)
if 'clicked' not in submit.columns:
    submit['clicked'] = 0.5
submit['clicked'] = np.clip(pred_test, 1e-7, 1 - 1e-7)

ts = datetime.now().strftime("%Y%m%d_%H%M%S")
save_path = f"/content/drive/MyDrive/open/submit_HGB_FIXED_seedEnsemble_{ts}.csv"
os.makedirs(os.path.dirname(save_path), exist_ok=True)
submit.to_csv(save_path, index=False)
print(f"\n[Saved] Submission -> {save_path}")

# 모델 저장 (joblib)
MODEL_DIR = "/content/drive/MyDrive/open/models/hgb_fixed_seedEnsemble"
os.makedirs(MODEL_DIR, exist_ok=True)
idx = 0
for s_idx, fold_models in enumerate(all_models):
    for f_idx, m in enumerate(fold_models, 1):
        joblib.dump(m, os.path.join(MODEL_DIR, f"hgb_fixed_seed{SEEDS[s_idx]}_fold{f_idx}.joblib"))
        idx += 1
print(f"[Saved] {idx} models -> {MODEL_DIR}")
print("\nDone.")

[OK] final feature_cols = 172

=== K-Fold HGB (Fixed params + Seed Ensemble) 시작 ===
[SENTRY] X_train: has_inf=False | all-NaN cols=0
[SENTRY] X_test: has_inf=False | all-NaN cols=0

##### Seed 0 #####

Fold 1 Final: 0.507461 | AP: 0.610546 | WLL: 0.595624

Fold 2 Final: 0.502147 | AP: 0.603602 | WLL: 0.599308

Fold 3 Final: 0.506190 | AP: 0.609025 | WLL: 0.596645

Fold 4 Final: 0.505496 | AP: 0.608535 | WLL: 0.597544

Fold 5 Final: 0.507190 | AP: 0.610503 | WLL: 0.596122

Seed 0 Summary (HGB Fixed params)
Mean Final: 0.505697 ± 0.001909
OOF  Final: 0.505684 | AP: 0.608416 | WLL: 0.597049

##### Seed 1 #####

Fold 1 Final: 0.502930 | AP: 0.604585 | WLL: 0.598726

Fold 2 Final: 0.506475 | AP: 0.609656 | WLL: 0.596706

Fold 3 Final: 0.504235 | AP: 0.606635 | WLL: 0.598166

Fold 4 Final: 0.506010 | AP: 0.608707 | WLL: 0.596688

Fold 5 Final: 0.509193 | AP: 0.612601 | WLL: 0.594215

Seed 1 Summary (HGB Fixed params)
Mean Final: 0.505768 ± 0.002131
OOF  Final: 0.505745 | AP: 0.608391 | WLL: 