In [None]:
# ▶ 1. 필수 라이브러리
import pandas as pd
import numpy as np
from catboost import CatBoostClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score
import warnings
warnings.filterwarnings('ignore')

# ▶ 2. 데이터 로딩
df = pd.read_csv('피처_43/피처 선택/43_Segment 추가.csv', encoding='utf-8-sig')

# ▶ 3. y_vif 생성 (Segment → 숫자 라벨로)
le = LabelEncoder()
y_vif = le.fit_transform(df['Segment'])

# ▶ 4. X_selected 생성 (불필요한 컬럼 제거)
X_selected = df.drop(columns=['Segment', 'ID'])
X_vif = X_selected.copy()

# ▶ 5. 교차검증 수동 구현 (CatBoost 안정성 확보)
def evaluate_cv_score_manual(X_input, y_input, cv=3):
    skf = StratifiedKFold(n_splits=cv, shuffle=True, random_state=42)
    scores = []

    for fold, (train_idx, valid_idx) in enumerate(skf.split(X_input, y_input), 1):
        X_train, X_val = X_input.iloc[train_idx], X_input.iloc[valid_idx]
        y_train, y_val = y_input[train_idx], y_input[valid_idx]

        model = CatBoostClassifier(
            loss_function="MultiClass",
            eval_metric="MultiClass",
            task_type="GPU",
            learning_rate=0.01,
            iterations=70000,
            early_stopping_rounds=3000,
            l2_leaf_reg=50,
            random_seed=42,
            od_type="Iter",
            depth=5,
            border_count=64,
            verbose=1000
        )

        model.fit(X_train, y_train, eval_set=(X_val, y_val), use_best_model=True)
        preds = model.predict(X_val)
        score = f1_score(y_val, preds, average='micro')
        scores.append(score)

        print(f"• Fold {fold}: {score:.4f}")

    print(f"\n✅ 평균 F1_micro 스코어: {np.mean(scores):.4f}")
    return np.mean(scores)

# ▶ 6. 실행
evaluate_cv_score_manual(X_vif, y_vif)

0:	learn: 1.5800645	test: 1.5800877	best: 1.5800877 (0)	total: 123ms	remaining: 2h 24m
