# CatBoost Hyperparameter Tuning for Fraud Detection

This notebook performs grid search style tuning for CatBoost with the same data, feature preset, and business cost evaluation as the VAE tuner.


In [1]:
import sys
import os
sys.path.insert(0, os.path.abspath('../..'))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import json

from itertools import product
from catboost import CatBoostClassifier

from src.evaluation import FraudEvaluationMetrics
from src.vae_models.vae_base import FraudDataHandler

import warnings
warnings.filterwarnings('ignore')

print("Environment ready for CatBoost tuning.")


Environment ready for CatBoost tuning.


## 1. Define Base Configuration and Parameter Grid


In [2]:
base_config = {
    'data_path': '../../data/processed/creditcard_fe.csv',
    'drop_features': 'logreg_baseline',
    'random_seed': 42,
    'C_FP': 550,
    'C_FN': 110,
}

param_grid = {
    'iterations': [200, 400],
    'depth': [4, 6, 8],
    'learning_rate': [0.1, 0.05, 0.02]
}

total_configs = np.prod([len(v) for v in param_grid.values()])
print("Hyperparameter Tuning Configuration")
print("="*60)
print(f"Total Configurations: {total_configs}")
for k, v in param_grid.items():
    print(f"  {k:15s}: {v}")


Hyperparameter Tuning Configuration
Total Configurations: 18
  iterations     : [200, 400]
  depth          : [4, 6, 8]
  learning_rate  : [0.1, 0.05, 0.02]


## 2. Load Data Once


In [3]:
handler = FraudDataHandler(
    data_path=base_config['data_path'],
    random_seed=base_config['random_seed'],
    drop_features=base_config['drop_features']
)

splits = handler.load_and_split()
X_train_scaled, X_val_scaled, X_test_scaled = handler.preprocess(
    splits['X_train'], splits['X_val'], splits['X_test']
)

y_train, y_val, y_test = splits['y_train'], splits['y_val'], splits['y_test']

print("Data ready.")


Loading data from ../../data/processed/creditcard_fe.csv...
Dataset loaded: 284807 transactions
  Normal: 284315 (99.83%)
  Fraud: 492 (0.17%)
  Features: 22
  Dropped features: 14

Data split:
  Training: 170589 normal transactions
  Validation: 57109 transactions (246 fraud)
  Test: 57109 transactions (246 fraud)
Data ready.


## 3. Run Grid Search (cost-based threshold on validation)


In [4]:
def evaluate_config(iterations, depth, learning_rate):
    model = CatBoostClassifier(
        iterations=iterations,
        depth=depth,
        learning_rate=learning_rate,
        loss_function='Logloss',
        random_seed=base_config['random_seed'],
        verbose=False,
        allow_writing_files=False,
        thread_count=-1
    )
    X_train_supervised = np.vstack([X_train_scaled, X_val_scaled])
    y_train_supervised = np.hstack([y_train, y_val])
    model.fit(X_train_supervised, y_train_supervised)

    val_proba = model.predict_proba(X_val_scaled)[:, 1]
    percentiles = np.arange(1, 100, 0.5)
    thresholds = np.percentile(val_proba, percentiles)

    C_FP, C_FN = base_config['C_FP'], base_config['C_FN']
    def val_cost(th):
        y_pred = (val_proba >= th).astype(int)
        fp = ((y_val == 0) & (y_pred == 1)).sum()
        fn = ((y_val == 1) & (y_pred == 0)).sum()
        return fp * C_FP + fn * C_FN

    costs = np.array([val_cost(t) for t in thresholds])
    best_idx = int(np.argmin(costs))
    best_threshold = float(thresholds[best_idx])
    best_val_cost = int(costs[best_idx])

    # Evaluate on test with best threshold
    test_proba = model.predict_proba(X_test_scaled)[:, 1]
    y_pred_test = (test_proba >= best_threshold).astype(int)
    evaluator = FraudEvaluationMetrics(cost_fp=C_FP, cost_fn=C_FN)
    metrics = evaluator.calculate_metrics(y_test, y_pred_test, y_scores=test_proba)

    return {
        'iterations': iterations,
        'depth': depth,
        'learning_rate': learning_rate,
        'val_cost': best_val_cost,
        'threshold': best_threshold,
        'test_cost': int(metrics['total_cost']),
        'test_precision': float(metrics['precision']),
        'test_recall': float(metrics['recall']),
        'test_pr_auc': float(metrics['pr_auc']) if metrics['pr_auc'] is not None else None
    }

results = []
for it, d, lr in product(param_grid['iterations'], param_grid['depth'], param_grid['learning_rate']):
    res = evaluate_config(it, d, lr)
    results.append(res)

results_df = pd.DataFrame(results)
print(f"Completed {len(results_df)} configurations")
print("Best by validation cost:")
best_row = results_df.loc[results_df['val_cost'].idxmin()]
print(best_row)


Completed 18 configurations
Best by validation cost:
iterations          400.000000
depth                 8.000000
learning_rate         0.100000
val_cost          22000.000000
threshold             0.008046
test_cost         25740.000000
test_precision        0.841463
test_recall           0.841463
test_pr_auc           0.861649
Name: 15, dtype: float64


## 4. Analyze and Save Results


In [5]:
# Save results CSV and JSON summary
os.makedirs('../../results/tuning/', exist_ok=True)
results_df.to_csv('../../results/tuning/catboost_grid_search_results.csv', index=False)

best_row = results_df.loc[results_df['val_cost'].idxmin()].to_dict()
with open('../../results/tuning/catboost_grid_search_detailed.json', 'w') as f:
    json.dump({
        'results': results_df.to_dict(orient='records'),
        'best_by_val_cost': best_row
    }, f, indent=2)

print('Saved: results/tuning/catboost_grid_search_results.csv')
print('Saved: results/tuning/catboost_grid_search_detailed.json')


Saved: results/tuning/catboost_grid_search_results.csv
Saved: results/tuning/catboost_grid_search_detailed.json
