# TeamBoost v3 (1등 노림) — DL Stacking (MLP+FTT → Stacker MLP)

출력 파일
- submission_stack.csv : 스태커 최종(추천)
- submission_prob.csv  : 최적 가중치 확률 평균
- submission_rank.csv  : rank 평균
- submission_best.csv  : OOF 기준 최선 자동 선택


In [1]:
import os, random, warnings
import numpy as np
import pandas as pd
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import roc_auc_score


In [2]:
SEEDS = [42, 202, 777]
N_FOLDS = 5
BATCH_SIZE = 512
EPOCHS = 50
PATIENCE = 8

STACK_EPOCHS = 200
STACK_PATIENCE = 20
STACK_LR = 3e-3

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)

def set_seed(seed):
    random.seed(seed); np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)


Device: cpu


In [3]:
train_raw = pd.read_csv("../../data/raw/train.csv")
test_raw  = pd.read_csv("../../data/raw/test_x.csv")
train_raw["voted_bin"] = (train_raw["voted"] == 2).astype(int)
print("Train:", train_raw.shape, "Test:", test_raw.shape, "pos_ratio:", float(train_raw["voted_bin"].mean()))


Train: (45532, 79) Test: (11383, 77) pos_ratio: 0.5468242115435298


In [4]:
def clean_data(df):
    df = df.copy()
    for col in ['education', 'engnat', 'hand', 'married', 'urban']:
        if col in df.columns:
            df.loc[df[col] == 0, col] = np.nan
    if 'familysize' in df.columns:
        df.loc[df['familysize'] == 0, 'familysize'] = np.nan
        df.loc[df['familysize'] > 15, 'familysize'] = np.nan
    for col in [f"tp{i:02d}" for i in range(1, 11)]:
        if col in df.columns:
            df.loc[df[col] == 0, col] = np.nan
    for col in [f"Q{c}E" for c in "abcdefghijklmnopqrst"]:
        if col in df.columns:
            df[col] = df[col].clip(lower=0, upper=60000)
    return df

def build_features(df):
    df = df.copy()
    age_map = {"10s": 1, "20s": 2, "30s": 3, "40s": 4, "50s": 5, "60s": 6, "+70s": 7}
    df["age_ord"] = df["age_group"].map(age_map)
    df["is_teenager"] = (df["age_ord"] == 1).astype(int)
    df["is_young"] = (df["age_ord"] <= 2).astype(int)
    df["is_old"] = (df["age_ord"] >= 6).astype(int)
    df["edu_low"] = (df["education"] <= 2).astype(float)
    df["edu_high"] = (df["education"] >= 3).astype(float)
    df["is_single"] = (df["married"] == 1).astype(float)
    df["is_married"] = (df["married"] == 2).astype(float)
    df["is_urban"] = (df["urban"] == 3).astype(float)
    df["is_english_native"] = (df["engnat"] == 1).astype(float)
    df["is_male"] = (df["gender"] == "Male").astype(int)

    qa_cols = [f"Q{c}A" for c in "abcdefghijklmnopqrst"]
    df["qa_mean"] = df[qa_cols].mean(axis=1)
    df["qa_std"] = df[qa_cols].std(axis=1)
    df["qa_range"] = df[qa_cols].max(axis=1) - df[qa_cols].min(axis=1)
    df["qa_extreme_ratio"] = ((df[qa_cols] == 1) | (df[qa_cols] == 5)).sum(axis=1) / 20
    df["qa_neutral_ratio"] = (df[qa_cols] == 3).sum(axis=1) / 20
    df["qa_all_same"] = (df[qa_cols].std(axis=1) == 0).astype(int)

    qe_cols = [f"Q{c}E" for c in "abcdefghijklmnopqrst"]
    for col in qe_cols:
        df[f"{col}_log"] = np.log1p(df[col])
    qe_log_cols = [f"{col}_log" for col in qe_cols]
    df["qe_log_mean"] = df[qe_log_cols].mean(axis=1)
    df["qe_log_std"] = df[qe_log_cols].std(axis=1)
    df["qe_fast_ratio"] = (df[qe_cols] < 500).sum(axis=1) / 20
    df["qe_total_log"] = df[qe_log_cols].sum(axis=1)
    df["is_careless"] = ((df[qe_cols].mean(axis=1) < 500) | (df["qa_all_same"] == 1)).astype(int)

    tp_cols = [f"tp{i:02d}" for i in range(1, 11)]
    df["tp_missing_ratio"] = df[tp_cols].isna().sum(axis=1) / 10
    df["extraversion"] = df["tp01"] - df["tp06"]
    df["agreeableness"] = df["tp07"] - df["tp02"]
    df["conscientiousness"] = df["tp03"] - df["tp08"]
    df["neuroticism"] = df["tp04"] - df["tp09"]
    df["openness"] = df["tp05"] - df["tp10"]
    df["tp_mean"] = df[tp_cols].mean(axis=1)

    wr_cols = [f"wr_{i:02d}" for i in range(1, 14)]
    wf_cols = [f"wf_{i:02d}" for i in range(1, 4)]
    df["wr_sum"] = df[wr_cols].sum(axis=1)
    df["wf_sum"] = df[wf_cols].sum(axis=1)
    df["word_credibility"] = df["wr_sum"] - df["wf_sum"]
    df["vocab_high"] = (df["wr_sum"] >= 11).astype(int)

    df["age_edu"] = df["age_ord"] * df["education"]
    df["young_low_edu"] = df["is_young"] * df["edu_low"]
    df["young_single"] = df["is_young"] * df["is_single"]
    df["old_married"] = df["is_old"] * df["is_married"]
    df["teenager_low_edu"] = df["is_teenager"] * df["edu_low"]
    return df

def target_encode(train_df, val_df, test_df, col, target_col, smoothing=10):
    global_mean = train_df[target_col].mean()
    agg = train_df.groupby(col)[target_col].agg(['mean', 'count'])
    agg['te'] = (agg['count'] * agg['mean'] + smoothing * global_mean) / (agg['count'] + smoothing)
    te_map = agg['te'].to_dict()
    return (train_df[col].map(te_map).fillna(global_mean).values,
            val_df[col].map(te_map).fillna(global_mean).values,
            test_df[col].map(te_map).fillna(global_mean).values)

def create_target_encodings(train_df, val_df, test_df, target_col="voted_bin"):
    te_dict = {'train': {}, 'val': {}, 'test': {}}
    for col in ['age_group', 'race', 'religion']:
        tr, va, te = target_encode(train_df, val_df, test_df, col, target_col, 10)
        te_dict['train'][f'{col}_te'] = tr
        te_dict['val'][f'{col}_te'] = va
        te_dict['test'][f'{col}_te'] = te
    for df in [train_df, val_df, test_df]:
        df['age_edu_cat'] = df['age_group'].astype(str) + '_' + df['education'].astype(str)
        df['age_married_cat'] = df['age_group'].astype(str) + '_' + df['married'].astype(str)
        df['age_race_cat'] = df['age_group'].astype(str) + '_' + df['race'].astype(str)
        df['age_edu_married_cat'] = df['age_group'].astype(str) + '_' + df['education'].astype(str) + '_' + df['married'].astype(str)
    for col, sm in [('age_edu_cat', 5), ('age_married_cat', 5), ('age_race_cat', 5), ('age_edu_married_cat', 3)]:
        tr, va, te = target_encode(train_df, val_df, test_df, col, target_col, sm)
        te_dict['train'][f'{col}_te'] = tr
        te_dict['val'][f'{col}_te'] = va
        te_dict['test'][f'{col}_te'] = te
    return te_dict


In [5]:
qa_cols = [f"Q{c}A" for c in "abcdefghijklmnopqrst"]
qe_log_cols = [f"Q{c}E_log" for c in "abcdefghijklmnopqrst"]
wr_cols = [f"wr_{i:02d}" for i in range(1, 14)]
wf_cols = [f"wf_{i:02d}" for i in range(1, 4)]
tp_cols = [f"tp{i:02d}" for i in range(1, 11)]

num_features = (
    qa_cols + qe_log_cols + wr_cols + wf_cols + tp_cols +
    [
        "age_ord", "education", "married", "urban", "engnat", "familysize", "hand",
        "is_teenager", "is_young", "is_old", "edu_low", "edu_high",
        "is_single", "is_married", "is_urban", "is_english_native", "is_male",
        "qa_mean", "qa_std", "qa_range", "qa_extreme_ratio", "qa_neutral_ratio", "qa_all_same",
        "qe_log_mean", "qe_log_std", "qe_fast_ratio", "qe_total_log", "is_careless",
        "tp_missing_ratio", "tp_mean",
        "extraversion", "agreeableness", "conscientiousness", "neuroticism", "openness",
        "wr_sum", "wf_sum", "word_credibility", "vocab_high",
        "age_edu", "young_low_edu", "young_single", "old_married", "teenager_low_edu",
    ]
)
te_features = ['age_group_te', 'race_te', 'religion_te', 
               'age_edu_cat_te', 'age_married_cat_te', 'age_race_cat_te', 'age_edu_married_cat_te']
cat_features = ['gender', 'race', 'religion']

class TabDataset(Dataset):
    def __init__(self, X_num, X_cat, y=None):
        self.X_num = torch.tensor(X_num, dtype=torch.float32)
        self.X_cat = torch.tensor(X_cat, dtype=torch.long)
        self.y = None if y is None else torch.tensor(y, dtype=torch.float32).unsqueeze(1)
    def __len__(self): return len(self.X_num)
    def __getitem__(self, idx):
        if self.y is None: return self.X_num[idx], self.X_cat[idx]
        return self.X_num[idx], self.X_cat[idx], self.y[idx]


In [6]:
class MLP(nn.Module):
    def __init__(self, num_features, cat_dims, embed_dim=8, hidden_dims=[256,128,64], dropout=0.3):
        super().__init__()
        self.embeddings = nn.ModuleList([nn.Embedding(dim + 1, embed_dim) for dim in cat_dims])
        input_dim = num_features + len(cat_dims) * embed_dim
        layers_ = []
        prev = input_dim
        for h in hidden_dims:
            layers_ += [nn.Linear(prev, h), nn.BatchNorm1d(h), nn.ReLU(), nn.Dropout(dropout)]
            prev = h
        self.mlp = nn.Sequential(*layers_)
        self.output = nn.Linear(hidden_dims[-1], 1)
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
    def forward(self, x_num, x_cat):
        cat_embeds = torch.cat([emb(x_cat[:, i]) for i, emb in enumerate(self.embeddings)], dim=1)
        x = torch.cat([x_num, cat_embeds], dim=1)
        x = self.mlp(x)
        return self.output(x)

class NumericalEmbedding(nn.Module):
    def __init__(self, num_features, d_token):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(num_features, d_token) * 0.02)
        self.bias = nn.Parameter(torch.zeros(num_features, d_token))
    def forward(self, x):
        return x.unsqueeze(-1) * self.weight + self.bias

class FTTransformer(nn.Module):
    def __init__(self, num_features, cat_dims, d_token=48, n_layers=2, n_heads=4, dropout=0.2):
        super().__init__()
        self.num_embed = NumericalEmbedding(num_features, d_token)
        self.cat_embeds = nn.ModuleList([nn.Embedding(dim + 1, d_token) for dim in cat_dims])
        self.cls_token = nn.Parameter(torch.randn(1, 1, d_token) * 0.02)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_token, nhead=n_heads, dim_feedforward=d_token * 2,
            dropout=dropout, activation='gelu', batch_first=True, norm_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=n_layers)
        self.head = nn.Sequential(
            nn.LayerNorm(d_token),
            nn.Linear(d_token, d_token // 2),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(d_token // 2, 1)
        )
    def forward(self, x_num, x_cat):
        num_tokens = self.num_embed(x_num)
        cat_tokens = torch.stack([emb(x_cat[:, i]) for i, emb in enumerate(self.cat_embeds)], dim=1)
        tokens = torch.cat([num_tokens, cat_tokens], dim=1)
        cls = self.cls_token.expand(tokens.size(0), -1, -1)
        tokens = torch.cat([cls, tokens], dim=1)
        x = self.transformer(tokens)
        return self.head(x[:, 0])


In [7]:
class Stacker(nn.Module):
    def __init__(self, in_dim=2, hidden=16, dropout=0.1):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, hidden),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden, 1)
        )
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                nn.init.zeros_(m.bias)
    def forward(self, x):
        return self.net(x)


In [8]:
def train_base(model, train_loader, val_loader, train_y, val_y, device, epochs=EPOCHS, patience=PATIENCE, lr=1e-3):
    model.to(device)
    pos_ratio = float(np.mean(train_y))
    pos_weight = torch.tensor([(1 - pos_ratio) / (pos_ratio + 1e-6)], device=device)
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3)

    best_auc = -1
    best_state = None
    no_improve = 0

    for epoch in range(epochs):
        model.train()
        for X_num, X_cat, yy in train_loader:
            X_num, X_cat, yy = X_num.to(device), X_cat.to(device), yy.to(device)
            optimizer.zero_grad()
            logits = model(X_num, X_cat)
            loss = criterion(logits, yy)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()

        model.eval()
        preds = []
        with torch.no_grad():
            for X_num, X_cat, _ in val_loader:
                X_num, X_cat = X_num.to(device), X_cat.to(device)
                preds.append(torch.sigmoid(model(X_num, X_cat)).cpu().numpy())
        preds = np.concatenate(preds).ravel()
        auc = roc_auc_score(val_y, preds)
        scheduler.step(auc)

        if auc > best_auc + 1e-5:
            best_auc = auc
            best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
            no_improve = 0
        else:
            no_improve += 1
            if no_improve >= patience:
                break

    if best_state is not None:
        model.load_state_dict(best_state)
    return model

def predict_base(model, loader, device):
    model.eval()
    preds = []
    with torch.no_grad():
        for batch in loader:
            X_num, X_cat = batch[0].to(device), batch[1].to(device)
            preds.append(torch.sigmoid(model(X_num, X_cat)).cpu().numpy())
    return np.concatenate(preds).ravel()

def best_blend_weight(y_true, p_mlp, p_ft):
    best_auc = -1
    best_w = 0.5
    for w in np.linspace(0.0, 1.0, 101):
        p = w*p_mlp + (1-w)*p_ft
        auc = roc_auc_score(y_true, p)
        if auc > best_auc:
            best_auc, best_w = auc, w
    return best_w, best_auc

def rank_avg(a, b):
    ra = a.argsort().argsort().astype(np.float32)
    rb = b.argsort().argsort().astype(np.float32)
    r = (ra + rb) / 2.0
    r = (r - r.min()) / (r.max() - r.min() + 1e-12)
    return r

def train_stacker(oof_feats, y_true, epochs=STACK_EPOCHS, patience=STACK_PATIENCE, lr=STACK_LR):
    X = torch.tensor(oof_feats, dtype=torch.float32)
    y_t = torch.tensor(y_true.reshape(-1,1), dtype=torch.float32)

    n = len(oof_feats)
    idx = np.arange(n)
    np.random.shuffle(idx)
    split = int(n*0.8)
    tr, va = idx[:split], idx[split:]

    X_tr, y_tr = X[tr], y_t[tr]
    X_va, y_va = X[va], y_t[va]

    ds_tr = DataLoader(torch.utils.data.TensorDataset(X_tr, y_tr), batch_size=2048, shuffle=True)
    ds_va = DataLoader(torch.utils.data.TensorDataset(X_va, y_va), batch_size=4096, shuffle=False)

    model = Stacker(in_dim=oof_feats.shape[1], hidden=16, dropout=0.1).to(DEVICE)
    opt = torch.optim.Adam(model.parameters(), lr=lr)
    bce = nn.BCEWithLogitsLoss()

    best_auc=-1
    best_state=None
    no_imp=0

    for ep in range(epochs):
        model.train()
        for xb, yb in ds_tr:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            opt.zero_grad()
            logits = model(xb)
            loss = bce(logits, yb)
            loss.backward()
            opt.step()

        model.eval()
        with torch.no_grad():
            logits = model(X_va.to(DEVICE)).cpu().numpy().ravel()
            preds = 1/(1+np.exp(-logits))
        auc = roc_auc_score(y_va.numpy().ravel(), preds)

        if auc > best_auc + 1e-6:
            best_auc = auc
            best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
            no_imp=0
        else:
            no_imp += 1
            if no_imp >= patience:
                break

    if best_state is not None:
        model.load_state_dict(best_state)
    return model

def predict_stacker(model, feats):
    X = torch.tensor(feats, dtype=torch.float32).to(DEVICE)
    model.eval()
    with torch.no_grad():
        logits = model(X).cpu().numpy().ravel()
    return 1/(1+np.exp(-logits))


## 7) 메인 실행: 멀티시드 → 베이스 OOF/test → 스태커 학습 → 제출 4종


In [None]:
def run_one_seed(seed):
    set_seed(seed)
    train_clean = clean_data(train_raw)
    test_clean  = clean_data(test_raw)

    oof_mlp = np.zeros(len(train_clean), dtype=np.float32)
    oof_ft  = np.zeros(len(train_clean), dtype=np.float32)
    test_mlp = np.zeros(len(test_clean), dtype=np.float32)
    test_ft  = np.zeros(len(test_clean), dtype=np.float32)

    skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=seed)

    for fold, (tr_idx, va_idx) in enumerate(skf.split(train_clean, train_clean["voted_bin"])):
        train_fold = train_clean.iloc[tr_idx].copy().reset_index(drop=True)
        val_fold   = train_clean.iloc[va_idx].copy().reset_index(drop=True)
        test_fold  = test_clean.copy()

        train_fe = build_features(train_fold)
        val_fe   = build_features(val_fold)
        test_fe  = build_features(test_fold)

        te_dict = create_target_encodings(train_fe, val_fe, test_fe, "voted_bin")
        all_num = num_features + te_features

        X_train = train_fe[num_features].copy()
        X_val   = val_fe[num_features].copy()
        X_test  = test_fe[num_features].copy()

        for te_name in te_features:
            X_train[te_name] = te_dict['train'][te_name]
            X_val[te_name]   = te_dict['val'][te_name]
            X_test[te_name]  = te_dict['test'][te_name]

        for col in all_num:
            med = X_train[col].median()
            if pd.isna(med): med = 0
            X_train[col] = X_train[col].fillna(med)
            X_val[col]   = X_val[col].fillna(med)
            X_test[col]  = X_test[col].fillna(med)

        scaler = StandardScaler()
        X_train_s = scaler.fit_transform(X_train.values)
        X_val_s   = scaler.transform(X_val.values)
        X_test_s  = scaler.transform(X_test.values)

        cat_dims=[]
        X_cat_train=[]; X_cat_val=[]; X_cat_test=[]
        for col in cat_features:
            le = LabelEncoder()
            all_vals = list(set(train_fe[col].fillna("NaN").astype(str)) |
                            set(val_fe[col].fillna("NaN").astype(str)) |
                            set(test_fe[col].fillna("NaN").astype(str)))
            le.fit(all_vals + ["UNK"])
            cat_dims.append(len(le.classes_))
            X_cat_train.append(le.transform(train_fe[col].fillna("NaN").astype(str)))
            X_cat_val.append(le.transform(val_fe[col].fillna("NaN").astype(str)))
            X_cat_test.append(le.transform(test_fe[col].fillna("NaN").astype(str)))

        X_cat_train = np.stack(X_cat_train, axis=1)
        X_cat_val   = np.stack(X_cat_val, axis=1)
        X_cat_test  = np.stack(X_cat_test, axis=1)

        y_train = train_fe["voted_bin"].values.astype(np.float32)
        y_val   = val_fe["voted_bin"].values.astype(np.float32)

        train_ds = TabDataset(X_train_s, X_cat_train, y_train)
        val_ds   = TabDataset(X_val_s, X_cat_val, y_val)
        test_ds  = TabDataset(X_test_s, X_cat_test, None)

        train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
        val_loader   = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False)
        test_loader  = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False)

        mlp = MLP(num_features=len(all_num), cat_dims=cat_dims, embed_dim=8, hidden_dims=[256,128,64], dropout=0.3)
        mlp = train_base(mlp, train_loader, val_loader, y_train, y_val, DEVICE, lr=1e-3)
        oof_mlp[va_idx] = predict_base(mlp, val_loader, DEVICE)
        test_mlp += predict_base(mlp, test_loader, DEVICE) / N_FOLDS

        ft = FTTransformer(num_features=len(all_num), cat_dims=cat_dims, d_token=48, n_layers=2, n_heads=4, dropout=0.2)
        ft = train_base(ft, train_loader, val_loader, y_train, y_val, DEVICE, lr=5e-4)
        oof_ft[va_idx] = predict_base(ft, val_loader, DEVICE)
        test_ft += predict_base(ft, test_loader, DEVICE) / N_FOLDS

        print(f"  fold {fold+1}/{N_FOLDS} done")

    return oof_mlp, oof_ft, test_mlp, test_ft

# run seeds
train_clean = clean_data(train_raw)
y_all = train_clean["voted_bin"].values.astype(np.float32)

oof_mlp_list=[]; oof_ft_list=[]; test_mlp_list=[]; test_ft_list=[]

for s in SEEDS:
    print("\n==============================")
    print("SEED:", s)
    print("==============================")
    o_mlp, o_ft, t_mlp, t_ft = run_one_seed(s)
    print(" seed OOF mlp:", roc_auc_score(y_all, o_mlp))
    print(" seed OOF ft :", roc_auc_score(y_all, o_ft))
    oof_mlp_list.append(o_mlp); oof_ft_list.append(o_ft)
    test_mlp_list.append(t_mlp); test_ft_list.append(t_ft)

# average across seeds
oof_mlp = np.mean(np.vstack(oof_mlp_list), axis=0)
oof_ft  = np.mean(np.vstack(oof_ft_list), axis=0)
test_mlp = np.mean(np.vstack(test_mlp_list), axis=0)
test_ft  = np.mean(np.vstack(test_ft_list), axis=0)

# prob blend
best_w, best_auc = best_blend_weight(y_all, oof_mlp, oof_ft)
oof_prob = best_w*oof_mlp + (1-best_w)*oof_ft
test_prob = best_w*test_mlp + (1-best_w)*test_ft

# rank blend
oof_rank = rank_avg(oof_mlp, oof_ft)
test_rank = rank_avg(test_mlp, test_ft)

# stacker
oof_feats = np.vstack([oof_mlp, oof_ft]).T
test_feats = np.vstack([test_mlp, test_ft]).T

stacker = train_stacker(oof_feats, y_all)
oof_stack = predict_stacker(stacker, oof_feats)
test_stack = predict_stacker(stacker, test_feats)

scores = {
    "prob": roc_auc_score(y_all, oof_prob),
    "rank": roc_auc_score(y_all, oof_rank),
    "stack": roc_auc_score(y_all, oof_stack),
}
print("\nOOF scores:", scores, "best_w(mlp):", best_w)

idx = test_raw["index"].values if "index" in test_raw.columns else np.arange(len(test_raw))
# pd.DataFrame({"index": idx, "voted": test_prob}).to_csv("submission_prob.csv", index=False)
# pd.DataFrame({"index": idx, "voted": test_rank}).to_csv("submission_rank.csv", index=False)
# pd.DataFrame({"index": idx, "voted": test_stack}).to_csv("submission_stack.csv", index=False)

best_name = max(scores, key=scores.get)
best_pred = {"prob": test_prob, "rank": test_rank, "stack": test_stack}[best_name]
pd.DataFrame({"index": idx, "voted": best_pred}).to_csv("submission_best.csv", index=False)

print("Saved: submission_prob.csv / submission_rank.csv / submission_stack.csv / submission_best.csv")
print("Chosen best:", best_name)



SEED: 42
  fold 1/5 done
  fold 2/5 done
  fold 3/5 done
  fold 4/5 done
  fold 5/5 done
 seed OOF mlp: 0.769406009902198
 seed OOF ft : 0.7662679716572101

SEED: 202
  fold 1/5 done
  fold 2/5 done
  fold 3/5 done
  fold 4/5 done
