confirm CatBoost can use GPU

In [1]:
import numpy as np
import pandas as pd
from catboost import CatBoostClassifier

X_small = pd.DataFrame({"a":[0,1,0,1], "b":[1,1,0,0]})
y_small = np.array([0,1,0,1])

m = CatBoostClassifier(
    iterations=50,
    task_type="GPU",
    devices="0",
    verbose=False
)
m.fit(X_small, y_small)
print("✅ CatBoost GPU fit OK")


✅ CatBoost GPU fit OK


load data

In [9]:
from __future__ import annotations
from pathlib import Path
import numpy as np
import pandas as pd
import time

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
from catboost import CatBoostClassifier, Pool

ROOT = Path.cwd().resolve().parents[0]          # notebooks/ -> repo root
DATA_DIR = ROOT / "data" / "raw"
REPORTS_DIR = ROOT / "reports"
REPORTS_DIR.mkdir(parents=True, exist_ok=True)

train = pd.read_csv(DATA_DIR / "train.csv")
test  = pd.read_csv(DATA_DIR / "test.csv")
sub   = pd.read_csv(DATA_DIR / "sample_submission.csv")

id_col = sub.columns[0]
target_col = sub.columns[1]

y = (train[target_col] == "Presence").astype(int)

cat_cols = ['Sex', 'FBS over 120', 'Exercise angina', 'EKG results',
            'Slope of ST', 'Thallium', 'Number of vessels fluro', 'Chest pain type']
num_cols = ['Age', 'BP', 'Cholesterol', 'Max HR', 'ST depression']
feature_cols = cat_cols + num_cols

X = train[feature_cols].copy()
X_test = test[feature_cols].copy()

cat_idx = [X.columns.get_loc(c) for c in cat_cols]

print("X:", X.shape, "| X_test:", X_test.shape, "| pos rate:", float(y.mean()))
print("cat_idx:", cat_idx)
print("id_col:", id_col, "| target_col:", target_col)


X: (630000, 13) | X_test: (270000, 13) | pos rate: 0.44833968253968254
cat_idx: [0, 1, 2, 3, 4, 5, 6, 7]
id_col: id | target_col: Heart Disease


single-fold GPU timing (fast confirm)

In [6]:
params_gpu = dict(
    loss_function="Logloss",
    eval_metric="AUC",
    iterations=10000,          # big; early stopping will pick best
    learning_rate=0.03,
    depth=6,
    l2_leaf_reg=10,
    random_strength=1.0,
    subsample=0.8,
    bootstrap_type="Bernoulli",
    od_type="Iter",
    od_wait=200,               # early stop patience
    verbose=False,
    allow_writing_files=False,
    task_type="GPU",
    devices="0",

    # optional speed knob; usually safe
    border_count=128,
)


skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
tr_idx, va_idx = next(skf.split(X, y))

X_tr, X_va = X.iloc[tr_idx], X.iloc[va_idx]
y_tr, y_va = y.iloc[tr_idx], y.iloc[va_idx]

tr_pool = Pool(X_tr, y_tr, cat_features=cat_idx)
va_pool = Pool(X_va, y_va, cat_features=cat_idx)

# for single-fold, show progress occasionally
params_single = dict(params_gpu)
params_single["iterations"] = 3000
params_single["verbose"] = 200

t0 = time.time()
model = CatBoostClassifier(**params_single, random_seed=42)
model.fit(tr_pool, eval_set=va_pool, use_best_model=True)
t1 = time.time()

p_va = model.predict_proba(va_pool)[:, 1]
auc = roc_auc_score(y_va, p_va)

print("Single-fold GPU AUC:", round(float(auc), 5))
print("Single-fold GPU runtime (sec):", round(t1 - t0, 2))
print("Best iter:", model.get_best_iteration())



Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.9361442	best: 0.9361442 (0)	total: 22.3ms	remaining: 1m 6s
200:	test: 0.9546572	best: 0.9546572 (200)	total: 4.26s	remaining: 59.4s
400:	test: 0.9550797	best: 0.9550797 (400)	total: 9.1s	remaining: 59s
600:	test: 0.9554610	best: 0.9554610 (600)	total: 16.9s	remaining: 1m 7s
800:	test: 0.9556342	best: 0.9556344 (799)	total: 24.5s	remaining: 1m 7s
1000:	test: 0.9557190	best: 0.9557190 (1000)	total: 29.5s	remaining: 59s
1200:	test: 0.9557736	best: 0.9557736 (1200)	total: 33.8s	remaining: 50.6s
1400:	test: 0.9558057	best: 0.9558057 (1396)	total: 38.1s	remaining: 43.5s
1600:	test: 0.9558202	best: 0.9558219 (1596)	total: 42.4s	remaining: 37.1s
1800:	test: 0.9558333	best: 0.9558336 (1799)	total: 46.7s	remaining: 31.1s
2000:	test: 0.9558418	best: 0.9558420 (1998)	total: 51.1s	remaining: 25.5s
2200:	test: 0.9558423	best: 0.9558455 (2132)	total: 55.4s	remaining: 20.1s
bestTest = 0.9558454752
bestIteration = 2132
Shrink model to first 2133 iterations.
Single-fold GPU AUC: 0.95585
Singl

full 5-fold GPU runtime

In [7]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

oof = np.zeros(len(X), dtype=float)
scores = []
best_iters = []

t0 = time.time()

for fold, (tr_idx, va_idx) in enumerate(skf.split(X, y), start=1):
    X_tr, X_va = X.iloc[tr_idx], X.iloc[va_idx]
    y_tr, y_va = y.iloc[tr_idx], y.iloc[va_idx]

    tr_pool = Pool(X_tr, y_tr, cat_features=cat_idx)
    va_pool = Pool(X_va, y_va, cat_features=cat_idx)

    model = CatBoostClassifier(**params_gpu, random_seed=42 + fold)
    model.fit(tr_pool, eval_set=va_pool, use_best_model=True)

    p_va = model.predict_proba(va_pool)[:, 1]
    oof[va_idx] = p_va

    auc = roc_auc_score(y_va, p_va)
    scores.append(auc)

    bi = model.get_best_iteration()
    best_iters.append(bi)

    print(f"Fold {fold} AUC: {auc:.5f} | best_iter: {bi}")

t1 = time.time()

print("Mean CV AUC:", round(float(np.mean(scores)), 5))
print("OOF  AUC:", round(float(roc_auc_score(y, oof)), 5))
print("Avg best_iter:", int(np.mean(best_iters)))
print("GPU 5-fold runtime (min):", round((t1 - t0)/60, 2))


Default metric period is 5 because AUC is/are not implemented for GPU


Fold 1 AUC: 0.95584 | best_iter: 2033


Default metric period is 5 because AUC is/are not implemented for GPU


Fold 2 AUC: 0.95479 | best_iter: 1859


Default metric period is 5 because AUC is/are not implemented for GPU


Fold 3 AUC: 0.95561 | best_iter: 2261


Default metric period is 5 because AUC is/are not implemented for GPU


Fold 4 AUC: 0.95516 | best_iter: 2374


Default metric period is 5 because AUC is/are not implemented for GPU


Fold 5 AUC: 0.95602 | best_iter: 2277
Mean CV AUC: 0.95549
OOF  AUC: 0.95548
Avg best_iter: 2160
GPU 5-fold runtime (min): 4.69


Save OOF from GPU CV

In [10]:
import numpy as np

np.save(REPORTS_DIR / "oof_catboost.npy", oof)
print("Saved:", REPORTS_DIR / "oof_catboost.npy")


Saved: C:\Dev\kaggle-ps-s6e2-heart\reports\oof_catboost.npy


full train + save test preds

In [11]:
best_n = int(np.mean(best_iters))
print("Training full model with iterations =", best_n)

final_params = dict(params_gpu)
final_params["iterations"] = best_n
final_params["verbose"] = False

full_pool = Pool(X, y, cat_features=cat_idx)
test_pool = Pool(X_test, cat_features=cat_idx)

final_model = CatBoostClassifier(**final_params, random_seed=2026)
final_model.fit(full_pool)

test_pred = final_model.predict_proba(test_pool)[:, 1]

np.save(REPORTS_DIR / "test_catboost.npy", test_pred)
print("Saved:", REPORTS_DIR / "test_catboost.npy")


Training full model with iterations = 2160


Default metric period is 5 because AUC is/are not implemented for GPU


Saved: C:\Dev\kaggle-ps-s6e2-heart\reports\test_catboost.npy


write submission CSV

In [12]:
submission = pd.DataFrame({
    id_col: test[id_col],
    target_col: test_pred
})

out_path = REPORTS_DIR / "sub_catboost_gpu.csv"
submission.to_csv(out_path, index=False)

print("Saved:", out_path)
submission.head()


Saved: C:\Dev\kaggle-ps-s6e2-heart\reports\sub_catboost_gpu.csv


Unnamed: 0,id,Heart Disease
0,630000,0.950183
1,630001,0.006991
2,630002,0.988824
3,630003,0.003558
4,630004,0.200136
