# Credit Card Fraud Detection Demo

This notebook walks through:
- Loading and splitting the dataset
- Training multiple models
- Evaluating performance with imbalance-aware metrics



In [1]:
import os

import numpy as np

from data_utils import load_creditcard_data
from models import build_models
from evaluation import evaluate_predictions, print_detailed_report

(X_train,
 X_val,
 X_test,
 y_train,
 y_val,
 y_test,
 scaler,) = load_creditcard_data()

models = build_models()



ModuleNotFoundError: No module named 'data_utils'

In [None]:
results = {}

for name, model in models.items():
    print(f"\n=== Model: {name} ===")
    model.fit(X_train, y_train)
    y_val_pred = model.predict(X_val)
    y_val_proba = None
    if hasattr(model, "predict_proba"):
        y_val_proba = model.predict_proba(X_val)[:, 1]
    elif hasattr(model, "decision_function"):
        y_val_proba = model.decision_function(X_val)

    metrics = evaluate_predictions(y_val, y_val_pred, y_val_proba)
    results[name] = metrics
    print(metrics)



In [None]:
# pick best model by F1
best_name = max(results, key=lambda k: results[k].get("f1", 0.0))
print("Best model:", best_name, results[best_name])

best_model = models[best_name]
# retrain on train+val
import numpy as np

X_train_val = np.vstack([X_train, X_val])
y_train_val = np.concatenate([y_train, y_val])

best_model.fit(X_train_val, y_train_val)

y_test_pred = best_model.predict(X_test)
y_test_proba = None
if hasattr(best_model, "predict_proba"):
    y_test_proba = best_model.predict_proba(X_test)[:, 1]
elif hasattr(best_model, "decision_function"):
    y_test_proba = best_model.decision_function(X_test)

print("\nTest metrics:")
print(evaluate_predictions(y_test, y_test_pred, y_test_proba))
print_detailed_report(y_test, y_test_pred)

