In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score
import torch, torch.nn as nn, torch.nn.functional as F, torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import lightgbm as lgb
from catboost import CatBoostClassifier
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
import xgboost as xgb
from itertools import product
from torch.amp.grad_scaler import GradScaler
from torch.amp.autocast_mode import autocast
device = "cuda" if torch.cuda.is_available() else "cpu"

In [15]:
df = pd.read_csv("data/train.csv")
X = df.drop(columns=["id","y"])
y = df["y"]
test_df = pd.read_csv("data/test.csv")
XTEST = test_df.drop(columns=["id"])

In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 750000 entries, 0 to 749999
Data columns (total 18 columns):
 #   Column     Non-Null Count   Dtype 
---  ------     --------------   ----- 
 0   id         750000 non-null  int64 
 1   age        750000 non-null  int64 
 2   job        750000 non-null  object
 3   marital    750000 non-null  object
 4   education  750000 non-null  object
 5   default    750000 non-null  object
 6   balance    750000 non-null  int64 
 7   housing    750000 non-null  object
 8   loan       750000 non-null  object
 9   contact    750000 non-null  object
 10  day        750000 non-null  int64 
 11  month      750000 non-null  object
 12  duration   750000 non-null  int64 
 13  campaign   750000 non-null  int64 
 14  pdays      750000 non-null  int64 
 15  previous   750000 non-null  int64 
 16  poutcome   750000 non-null  object
 17  y          750000 non-null  int64 
dtypes: int64(9), object(9)
memory usage: 103.0+ MB


In [17]:
df.describe()

Unnamed: 0,id,age,balance,day,duration,campaign,pdays,previous,y
count,750000.0,750000.0,750000.0,750000.0,750000.0,750000.0,750000.0,750000.0,750000.0
mean,374999.5,40.926395,1204.067397,16.117209,256.229144,2.577008,22.412733,0.298545,0.120651
std,216506.495284,10.098829,2836.096759,8.250832,272.555662,2.718514,77.319998,1.335926,0.325721
min,0.0,18.0,-8019.0,1.0,1.0,1.0,-1.0,0.0,0.0
25%,187499.75,33.0,0.0,9.0,91.0,1.0,-1.0,0.0,0.0
50%,374999.5,39.0,634.0,17.0,133.0,2.0,-1.0,0.0,0.0
75%,562499.25,48.0,1390.0,21.0,361.0,3.0,-1.0,0.0,0.0
max,749999.0,95.0,99717.0,31.0,4918.0,63.0,871.0,200.0,1.0


In [18]:
df.select_dtypes("object").nunique()

job          12
marital       3
education     4
default       2
housing       2
loan          2
contact       3
month        12
poutcome      4
dtype: int64

In [19]:
num_cols = X.select_dtypes("number").columns.tolist()
cat_cols = X.select_dtypes("object").columns.tolist()

In [20]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=0.25, gamma=2.0):
        super().__init__()
        self.alpha, self.gamma = alpha, gamma
    def forward(self, logits, targets):
        bce = F.binary_cross_entropy_with_logits(logits, targets, reduction="none")
        p = torch.sigmoid(logits)
        pt = torch.where(targets==1, p, 1-p)
        loss = self.alpha * (1-pt).pow(self.gamma) * bce
        return loss.mean()

In [21]:
class FFNN(nn.Module):
    def __init__(self, in_dim):
        super().__init__()
        self.fc1 = nn.Linear(in_dim, 384)
        self.bn1 = nn.BatchNorm1d(384)
        self.fc2 = nn.Linear(384, 192)
        self.bn2 = nn.BatchNorm1d(192)
        self.fc3 = nn.Linear(384+192, 96)  # concat skip
        self.bn3 = nn.BatchNorm1d(96)
        self.out = nn.Linear(96, 1)
        self.drop = nn.Dropout(0.25)

    def forward(self, x, return_logits=True, sigmoid=False):
        x1 = F.relu(self.bn1(self.fc1(x)))
        x1 = self.drop(x1)
        x2 = F.relu(self.bn2(self.fc2(x1)))
        x2 = self.drop(x2)
        xcat = torch.cat([x2, x1], dim=1)
        x3 = F.relu(self.bn3(self.fc3(xcat)))
        x3 = self.drop(x3)
        logits = self.out(x3)
        if return_logits:
            return logits
        return torch.sigmoid(logits)

In [22]:
def train_ffnn_oof(X_tr, y_tr, X_va, y_va,
                        epochs=80, batch_size=1024, lr=1e-3,
                        weight_decay=1e-4, patience=12, seed=42, device=None):
    if device is None:
        device = "cuda" if torch.cuda.is_available() else "cpu"

    g = torch.Generator(); g.manual_seed(seed)
    model = FFNN(X_tr.shape[1]).to(device)
    opt = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    crit = FocalLoss(alpha=0.25, gamma=2.0)
    scaler = GradScaler(enabled=(device=="cuda"))

    ds_tr = TensorDataset(torch.tensor(X_tr, dtype=torch.float32),
                          torch.tensor(np.asarray(y_tr), dtype=torch.float32).unsqueeze(1))
    dl_tr = DataLoader(ds_tr, batch_size=batch_size, shuffle=True, generator=g)

    total_steps = epochs * len(dl_tr)
    sch = torch.optim.lr_scheduler.OneCycleLR(opt, max_lr=lr,
                                              total_steps=total_steps, pct_start=0.15)

    best_auc, best_state, no_improve = -1, None, 0
    for ep in range(epochs):
        model.train()
        for xb, yb in dl_tr:
            xb, yb = xb.to(device), yb.to(device)
            opt.zero_grad(set_to_none=True)
            with autocast("cuda"):
                logits = model(xb, return_logits=True)
                loss = crit(logits, yb)
            scaler.scale(loss).backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 2.0)
            scaler.step(opt); scaler.update(); sch.step()

        model.eval()
        with torch.no_grad():
            va_logits = []
            for i in range(0, X_va.shape[0], 4096):
                xb = torch.tensor(X_va[i:i+4096], dtype=torch.float32, device=device)
                va_logits.append(model(xb, return_logits=True).squeeze().cpu().numpy())
            va_logits = np.concatenate(va_logits)
            va_auc = roc_auc_score(y_va, va_logits)

        if va_auc > best_auc + 1e-5:
            best_auc, best_state, no_improve = va_auc, model.state_dict(), 0
        else:
            no_improve += 1
            if no_improve >= patience:
                break

    if best_state is not None:
        model.load_state_dict(best_state)

    # final val logits
    model.eval()
    with torch.no_grad():
        va_logits = []
        for i in range(0, X_va.shape[0], 4096):
            xb = torch.tensor(X_va[i:i+4096], dtype=torch.float32, device=device)
            va_logits.append(model(xb, return_logits=True).squeeze().cpu().numpy())
        va_logits = np.concatenate(va_logits)
    return model, va_logits, best_auc

def ffnn_predict_logits(model, X, device=None, chunk=4096):
    model.eval()
    outs = []
    with torch.no_grad():
        for i in range(0, X.shape[0], chunk):
            xb = torch.tensor(X[i:i+chunk], dtype=torch.float32, device=device)
            outs.append(model(xb, return_logits=True).squeeze().cpu().numpy())
    return np.concatenate(outs)

In [23]:
prep_tree = ColumnTransformer(
    transformers=[
        ("num", SimpleImputer(strategy="median"), num_cols),
        ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), cat_cols)
    ],
    remainder="drop"
    
)
prep_nn = ColumnTransformer(
    transformers= [
        ("num", Pipeline([("imp", SimpleImputer(strategy="median")),
                            ("sc", StandardScaler())]), num_cols),
        ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), cat_cols)
    ],
    remainder="drop"
)

In [24]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
oof_ffnn = np.zeros(len(X), dtype=np.float32)
oof_lgb  = np.zeros(len(X), dtype=np.float32)
oof_cat  = np.zeros(len(X), dtype=np.float32)
oof_xgb  = np.zeros(len(X), dtype=np.float32)


In [None]:
lgb_test_preds = []
cat_test_preds = []
ffnn_test_preds = []
xgb_test_preds = []

for fold,(tr_idx,va_idx) in enumerate(kf.split(X, y)):
    print("Fold", fold+1)
    X_tr, X_va = X.iloc[tr_idx], X.iloc[va_idx]
    y_tr, y_va = y.iloc[tr_idx], y.iloc[va_idx]

    # -------- FFNN --------
    X_tr_nn = prep_nn.fit_transform(X_tr)
    X_va_nn = prep_nn.transform(X_va)
    XTEST_NN = prep_nn.transform(XTEST)

    model_ff, va_logits, va_auc = train_ffnn_oof(
        X_tr_nn, y_tr.values, X_va_nn, y_va.values,
        epochs=80, batch_size=1024, lr=1e-3,
        weight_decay=1e-4, patience=12, seed=42, device=device)
    oof_ffnn[va_idx] = va_logits
    ffnn_test_preds.append(ffnn_predict_logits(model_ff, XTEST_NN, device=device))

    # -------- LightGBM --------
    Xtr_tree = prep_tree.fit_transform(X_tr)
    Xva_tree = prep_tree.transform(X_va)
    XTEST_TREE = prep_tree.transform(XTEST)

    m1 = lgb.LGBMClassifier(
        objective="binary", metric="auc",
        boosting_type="gbdt", device="gpu",
        n_estimators=20000, learning_rate=0.03,
        num_leaves=128, min_data_in_leaf=250,
        feature_fraction=0.8, bagging_fraction=0.8,
        bagging_freq=1, lambda_l2=3.0, random_state=42,
    )
    m1.fit(Xtr_tree, y_tr,
           eval_set=[(Xva_tree, y_va)],
           eval_metric="auc",
           callbacks=[lgb.early_stopping(200), lgb.log_evaluation(0)])
    oof_lgb[va_idx] = np.asarray(m1.predict(Xva_tree, raw_score=True))
    lgb_test_preds.append(np.asarray(m1.predict(XTEST_TREE, raw_score=True)))

    # -------- CatBoost --------
    prep_cat = SimpleImputer(strategy="median").fit(X_tr[num_cols])
    Xtr_cb = X_tr.copy();   Xtr_cb[num_cols] = prep_cat.transform(X_tr[num_cols])
    Xva_cb = X_va.copy();   Xva_cb[num_cols] = prep_cat.transform(X_va[num_cols])
    XTEST_CAT = XTEST.copy(); XTEST_CAT[num_cols] = prep_cat.transform(XTEST[num_cols])

    for c in cat_cols:
        Xtr_cb[c]    = Xtr_cb[c].astype("category")
        Xva_cb[c]    = Xva_cb[c].astype("category")
        XTEST_CAT[c] = XTEST_CAT[c].astype("category")

    cat_idx = [Xtr_cb.columns.get_loc(c) for c in cat_cols]

    m2 = CatBoostClassifier(
        task_type="GPU", devices="0",
        loss_function="Logloss", eval_metric="AUC",
        iterations=10000, learning_rate=0.03, depth=8,
        l2_leaf_reg=3.0, bagging_temperature=1.0,
        random_strength=0.2, border_count=128,
        early_stopping_rounds=300, verbose=False, random_state=42,
    )
    m2.fit(Xtr_cb, y_tr, eval_set=(Xva_cb, y_va),
           use_best_model=True, cat_features=cat_idx)
    oof_cat[va_idx] = np.asarray(m2.predict(Xva_cb, prediction_type="RawFormulaVal"))
    cat_test_preds.append(np.asarray(m2.predict(XTEST_CAT, prediction_type="RawFormulaVal")))
    
    # -------- XGBoost --------
    dtr = xgb.DMatrix(Xtr_tree, label=y_tr)
    dva = xgb.DMatrix(Xva_tree, label=y_va)
    dte = xgb.DMatrix(XTEST_TREE)
    
    params = dict(
        objective="binary:logistic",
        eval_metric="auc",
        tree_method="gpu_hist",
        predictor="gpu_predictor",
        max_depth=6,
        eta=0.03,
        subsample=0.8,
        colsample_bytree=0.8,
        reg_lambda=2.0,
        reg_alpha=0.0,
        random_state=42
    )
    
    m3 = xgb.train(params, dtr, num_boost_round=20000,
                    evals=[(dva, "valid")], early_stopping_rounds=300, verbose_eval=False)
    
    p_va = m3.predict(dva, iteration_range=(0, m3.best_iteration+1))
    p_te = m3.predict(dte, iteration_range=(0, m3.best_iteration+1))
    eps=1e-7
    oof_xgb[va_idx] = np.log((p_va+eps)/(1-p_va+eps))
    xgb_test_preds.append(np.log((p_te+eps)/(1-p_te+eps)))
    
# -------- fold ortalaması --------
ffnn_test_pred = np.mean(ffnn_test_preds, axis=0)
lgb_test_pred  = np.mean(lgb_test_preds,  axis=0)
cat_test_pred  = np.mean(cat_test_preds,  axis=0)
xgb_test_pred  = np.mean(xgb_test_preds, axis=0)

Fold 1
[LightGBM] [Info] Number of positive: 72391, number of negative: 527609
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 1046
[LightGBM] [Info] Number of data points in the train set: 600000, number of used features: 51
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 11 dense feature groups (6,87 MB) transferred to GPU in 0,005490 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0,120652 -> initscore=-1,986273
[LightGBM] [Info] Start training from score -1,986273
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[1467]	valid_0's auc: 0.970089






Default metric period is 5 because AUC is/are not implemented for GPU


Fold 2
[LightGBM] [Info] Number of positive: 72391, number of negative: 527609
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 1044
[LightGBM] [Info] Number of data points in the train set: 600000, number of used features: 51
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 11 dense feature groups (6,87 MB) transferred to GPU in 0,005155 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0,120652 -> initscore=-1,986273
[LightGBM] [Info] Start training from score -1,986273
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[1371]	valid_0's auc: 0.968887






Default metric period is 5 because AUC is/are not implemented for GPU


Fold 3
[LightGBM] [Info] Number of positive: 72390, number of negative: 527610
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 1042
[LightGBM] [Info] Number of data points in the train set: 600000, number of used features: 51
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 11 dense feature groups (6,87 MB) transferred to GPU in 0,004760 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0,120650 -> initscore=-1,986289
[LightGBM] [Info] Start training from score -1,986289
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[1111]	valid_0's auc: 0.968833






Default metric period is 5 because AUC is/are not implemented for GPU


Fold 4
[LightGBM] [Info] Number of positive: 72390, number of negative: 527610
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 1039
[LightGBM] [Info] Number of data points in the train set: 600000, number of used features: 51
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 11 dense feature groups (6,87 MB) transferred to GPU in 0,005187 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0,120650 -> initscore=-1,986289
[LightGBM] [Info] Start training from score -1,986289
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[1358]	valid_0's auc: 0.969892






Default metric period is 5 because AUC is/are not implemented for GPU


Fold 5
[LightGBM] [Info] Number of positive: 72390, number of negative: 527610
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 1043
[LightGBM] [Info] Number of data points in the train set: 600000, number of used features: 51
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Laptop GPU, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 11 dense feature groups (6,87 MB) transferred to GPU in 0,005231 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0,120650 -> initscore=-1,986289
[LightGBM] [Info] Start training from score -1,986289
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[1394]	valid_0's auc: 0.969388










Default metric period is 5 because AUC is/are not implemented for GPU


In [26]:
X_meta = pd.DataFrame({
    "ffnn": oof_ffnn,
    "lgb":  oof_lgb,
    "cat":  oof_cat,
    "xgb":  oof_xgb
})
y_meta = y.reset_index(drop=True)

In [27]:
flips = {}
for c in X_meta.columns:
    r = np.corrcoef(X_meta[c], y_meta)[0, 1]
    flips[c] = 1 if (not np.isnan(r) and r >= 0) else -1
flip_s = pd.Series(flips)
X_meta_tr, X_meta_te, y_meta_tr, y_meta_te = train_test_split(X_meta, 
                                                              y_meta, 
                                                              test_size=0.15, 
                                                              stratify=y_meta, 
                                                              random_state=42)
X_meta_tr = X_meta_tr.mul(flip_s, axis=1)
X_meta_te = X_meta_te.mul(flip_s, axis=1)

In [28]:
P = X_meta.values
y = y_meta.values

def grid_w(k, step=0.02):
    ws = np.arange(0, 1 + 1e-12, step)
    for comb in product(ws, repeat=k-1):
        s = sum(comb)
        if s <= 1:
            yield np.array(list(comb) + [1 - s])
P_tr = X_meta_tr.values
P_te = X_meta_te.values
y_tr = y_meta_tr.values
y_te = y_meta_te.values

best_auc, best_w = -1, None
for w in grid_w(P_tr.shape[1], step=0.02):
    auc = roc_auc_score(y_tr, P_tr @ w)
    if auc > best_auc:
        best_auc, best_w = auc, w
hold_auc = roc_auc_score(y_te, P_te @ best_w)
print("Blend (logit) train AUC:", best_auc, "holdout AUC:", hold_auc, "weights:", best_w)


Blend (logit) train AUC: 0.9695454634231728 holdout AUC: 0.9692595322212408 weights: [0.   0.78 0.   0.22]


In [29]:
def rank_cols(A):
    return np.column_stack([pd.Series(A[:,i]).rank().values for i in range(A.shape[1])])
Pr_tr = rank_cols(P_tr)
Pr_te = rank_cols(P_te)
best_auc_r, best_wr = -1, None
for w in grid_w(Pr_tr.shape[1], step=0.02):
    auc = roc_auc_score(y_tr, Pr_tr @ w)
    if auc > best_auc_r:
        best_auc_r, best_wr = auc, w
hold_auc_r = roc_auc_score(y_te, Pr_te @ best_wr)
print("Blend (rank) train AUC:", best_auc_r, "holdout AUC:", hold_auc_r, "weights:", best_wr)

KeyboardInterrupt: 

In [30]:
test_stack = np.c_[ffnn_test_pred, lgb_test_pred, cat_test_pred, xgb_test_pred]
test_stack = pd.DataFrame(test_stack, columns=X_meta.columns).mul(flip_s, axis=1).values

final_test_logit = test_stack @ best_w     # logit-blend
test_stack_rank  = rank_cols(test_stack)
final_test_rank  = test_stack_rank @ best_wr


In [31]:
final_scores = final_test_logit
submission = pd.DataFrame({"id": test_df["id"], "y": 1/(1+np.exp(-final_scores))})
submission.to_csv("submission_blend.csv", index=False, float_format="%.9f")
print("submission_blend.csv yazıldı.")

submission_blend.csv yazıldı.


In [32]:
X_final_test = pd.DataFrame({
    "ffnn": ffnn_test_pred,
    "lgb":  lgb_test_pred,
    "cat":  cat_test_pred,
    "xgb":  xgb_test_pred
}).mul(flip_s, axis=1)

In [33]:
meta_model = make_pipeline(
    StandardScaler(),
    LogisticRegression(penalty="l2", C=0.5, solver="lbfgs",
                       max_iter=2000, class_weight="balanced")
)
meta_model.fit(X_meta.mul(flip_s, axis=1), y_meta)

0,1,2
,steps,"[('standardscaler', ...), ('logisticregression', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,0.5
,fit_intercept,True
,intercept_scaling,1
,class_weight,'balanced'
,random_state,
,solver,'lbfgs'
,max_iter,2000


In [34]:
preds = meta_model.predict_proba(X_final_test)[:,1]
submission = pd.DataFrame({"id": test_df["id"], "y": preds})
submission.to_csv("submission_metaLR.csv", index=False, float_format="%.9f")
print("submission_metaLR.csv yazıldı.")

submission_metaLR.csv yazıldı.
