In [None]:
pip install tqdm

In [None]:
import pandas as pd
import numpy as np
import os
import time

from sklearn.model_selection import train_test_split
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer, SimpleImputer
from sklearn.tree import DecisionTreeRegressor
from tqdm import tqdm


In [None]:
# === File paths ===
DATA_PATH = "../../ieee-fraud-detection/"
TRAIN_TRANS_FILE = "train_transaction.csv"
TRAIN_IDENTITY_FILE = "train_identity.csv"
TEST_TRANS_FILE = "test_transaction.csv"
TEST_IDENTITY_FILE = "test_identity.csv"

# === Load Data ===
def load_data():
    print("Loading train and test data...")

    train_transaction = pd.read_csv(os.path.join(DATA_PATH, TRAIN_TRANS_FILE))
    train_identity = pd.read_csv(os.path.join(DATA_PATH, TRAIN_IDENTITY_FILE))
    train = pd.merge(train_transaction, train_identity, how="left", on="TransactionID")

    test_transaction = pd.read_csv(os.path.join(DATA_PATH, TEST_TRANS_FILE))
    test_identity = pd.read_csv(os.path.join(DATA_PATH, TEST_IDENTITY_FILE))
    test = pd.merge(test_transaction, test_identity, how="left", on="TransactionID")

    return train, test

# Load data
train_raw, test_raw = load_data()
train_raw.shape, test_raw.shape


In [None]:
def get_column_types(df):
    numerical_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    categorical_cols = df.select_dtypes(include=["object", "category"]).columns.tolist()
    return numerical_cols, categorical_cols


In [None]:
def iterative_impute_fast(df, columns, max_iter=5, sample_frac=0.1, verbose=True):
    df = df.copy()
    if verbose:
        print(f"\n⚡ Fast Iterative Imputation: {len(columns)} columns | Sample: {sample_frac*100:.0f}% | Estimator: DecisionTree\n")

    sample_df = df[columns].sample(frac=sample_frac, random_state=42)

    imputer = IterativeImputer(
        estimator=DecisionTreeRegressor(max_depth=5),
        max_iter=max_iter,
        random_state=42,
        verbose=0
    )

    imputer.fit(sample_df)
    df[columns] = imputer.transform(df[columns])

    print(" Fast iterative imputation complete.\n")
    return df


In [None]:
def clean_data(df, target_column='isFraud'):
    print(" Cleaning data...")
    df = df.copy()
    numerical_cols, categorical_cols = get_column_types(df)

    if target_column in numerical_cols:
        numerical_cols.remove(target_column)

    # Calculate missing ratios
    missing_ratios = df[numerical_cols].isnull().mean()

    # Segmentation by % missing
    iter_missing = missing_ratios[missing_ratios < 0.10].index.tolist()
    mean_missing = missing_ratios[(missing_ratios >= 0.10) & (missing_ratios < 0.35)].index.tolist()
    median_missing = missing_ratios[(missing_ratios >= 0.35) & (missing_ratios <= 0.70)].index.tolist()
    high_missing = missing_ratios[missing_ratios > 0.70].index.tolist()

    print(f"Dropping {len(high_missing)} columns with >70% missing values...")
    df.drop(columns=high_missing, inplace=True)

    # Iterative Imputer for very low-missing features
    if iter_missing:
        print(f"Iteratively imputing {len(iter_missing)} columns (<10% missing)...")
        t0 = time.time()
        df = iterative_impute_fast(df, iter_missing, max_iter=5, sample_frac=0.1, verbose=True)
        print(f" Iterative imputation complete in {time.time() - t0:.2f} seconds.\n")

    # Mean Imputer for moderate-missing features
    if mean_missing:
        print(f" Applying Mean Imputer to {len(mean_missing)} columns (10–35% missing)...")
        t1 = time.time()
        mean_imp = SimpleImputer(strategy='mean')
        df[mean_missing] = mean_imp.fit_transform(df[mean_missing])
        print(f" Mean imputation complete in {time.time() - t1:.2f} seconds.\n")

    # Median Imputer for high-missing but usable features
    if median_missing:
        print(f" Applying Median Imputer to {len(median_missing)} columns (35–70% missing)...")
        t2 = time.time()
        median_imp = SimpleImputer(strategy='median')
        df[median_missing] = median_imp.fit_transform(df[median_missing])
        print(f" Median imputation complete in {time.time() - t2:.2f} seconds.\n")

    print(" Data cleaning complete.\n")
    return df


In [None]:
train_cleaned = clean_data(train_raw)
train_cleaned.shape


In [None]:
# Separate target and features
X = train_cleaned.drop(columns=['isFraud'])
y = train_cleaned['isFraud']

# Stratified split
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

print("Train shape:", X_train.shape)
print("Validation shape:", X_val.shape)


In [None]:
# Convert object columns to category so LightGBM can handle them
def prepare_lightgbm_data(X):
    X = X.copy()
    for col in X.select_dtypes(include='object').columns:
        X[col] = X[col].astype('category')
    return X

X_train = prepare_lightgbm_data(X_train)
X_val = prepare_lightgbm_data(X_val)


In [None]:
import lightgbm as lgb
from sklearn.metrics import roc_auc_score, roc_curve
import matplotlib.pyplot as plt


In [None]:
def train_lightgbm(X_train, y_train, X_val, y_val):
    print(" Training LightGBM model...")

    train_data = lgb.Dataset(X_train, label=y_train, categorical_feature='auto')
    val_data = lgb.Dataset(X_val, label=y_val, reference=train_data, categorical_feature='auto')

    params = {
        'objective': 'binary',
        'metric': 'auc',
        'boosting_type': 'gbdt',
        'verbosity': -1,
        'learning_rate': 0.05,
        'num_leaves': 64,
        'max_depth': -1,
        'random_state': 42
    }

    model = lgb.train(
        params,
        train_data,
        valid_sets=[train_data, val_data],
        valid_names=['train', 'val'],
        num_boost_round=1000,
        callbacks=[
            lgb.early_stopping(stopping_rounds=50),
            lgb.log_evaluation(period=100)
        ]
    )

    print(" Model training complete.\n")
    return model


In [None]:
def plot_roc_auc(model, X_val, y_val):
    print(" Generating ROC curve...")
    y_proba = model.predict(X_val)
    fpr, tpr, _ = roc_curve(y_val, y_proba)
    auc_score = roc_auc_score(y_val, y_proba)

    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, label=f"AUC = {auc_score:.4f}", linewidth=2)
    plt.plot([0, 1], [0, 1], 'k--', alpha=0.5)
    plt.title("ROC Curve")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.legend(loc="lower right")
    plt.grid(True)
    plt.show()

    print(f" AUC Score: {auc_score:.4f}\n")


In [None]:
# Train the model
lgb_model = train_lightgbm(X_train, y_train, X_val, y_val)

# Plot ROC & AUC
plot_roc_auc(lgb_model, X_val, y_val)


In [None]:
from sklearn.metrics import roc_auc_score

# === 1. Check isFraud balance in train/val ===
def check_fraud_distribution(y_train, y_val):
    print(" Fraud distribution:")
    print(f"Train: {y_train.mean():.4f} ({y_train.sum()} frauds)")
    print(f"Val:   {y_val.mean():.4f} ({y_val.sum()} frauds)\n")

# === 2. Correlation with isFraud ===
def check_feature_correlation(df):
    print(" Top features highly correlated with isFraud:")
    corr = df.corr(numeric_only=True)['isFraud'].sort_values(key=abs, ascending=False)
    print(corr.head(10), "\n")

# === 3. Random prediction baseline ===
def check_random_auc(y_val):
    random_preds = np.random.rand(len(y_val))
    auc_random = roc_auc_score(y_val, random_preds)
    print(f" Random Model AUC (baseline): {auc_random:.4f}\n")

# === 4. Minimal feature model ===
def check_baseline_model(X_train, y_train, X_val, y_val):
    baseline_cols = [col for col in ['TransactionAmt', 'card1', 'card2', 'addr1'] if col in X_train.columns]
    if not baseline_cols:
        print(" Baseline features not in data.")
        return
    print(f" Re-training with weak features only: {baseline_cols}\n")
    X_base_train = X_train[baseline_cols]
    X_base_val = X_val[baseline_cols]
    
    model = train_lightgbm(X_base_train, y_train, X_base_val, y_val)
    plot_roc_auc(model, X_base_val, y_val)

# === Run All Checks ===
def run_sanity_checks(train_df, X_train, y_train, X_val, y_val):
    check_fraud_distribution(y_train, y_val)
    check_feature_correlation(train_df)
    check_random_auc(y_val)
    check_baseline_model(X_train, y_train, X_val, y_val)

# === Execute ===
run_sanity_checks(train_cleaned, X_train, y_train, X_val, y_val)


In [None]:
pip install xgboost

In [None]:
# If not installed
# !pip install xgboost

import xgboost as xgb
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score, roc_curve
import numpy as np
import matplotlib.pyplot as plt


In [None]:
# Drop object columns (XGBoost can't handle categoricals directly)
def prepare_xgb_data(X):
    X = X.copy()
    X = X.select_dtypes(include=[np.number])
    return X.fillna(-999)  # Safe placeholder for missing values

X_train_xgb = prepare_xgb_data(X)
y_train_xgb = y.copy()


In [None]:
def xgb_cross_validation(X, y, n_splits=5):
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    aucs = []
    mean_fpr = np.linspace(0, 1, 100)
    tprs = []

    print(f" Starting {n_splits}-fold cross-validation...\n")

    for i, (train_idx, val_idx) in enumerate(skf.split(X, y)):
        X_tr, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_tr, y_val = y.iloc[train_idx], y.iloc[val_idx]

        model = xgb.XGBClassifier(
            objective='binary:logistic',
            learning_rate=0.05,
            n_estimators=200,         # keep fixed to avoid early stopping
            max_depth=6,
            subsample=0.8,
            colsample_bytree=0.8,
            random_state=42,
            use_label_encoder=False,
            tree_method='hist'
        )

        # Set eval_metric via internal param update
        model.set_params(eval_metric='auc')

        model.fit(X_tr, y_tr)

        y_proba = model.predict_proba(X_val)[:, 1]
        auc = roc_auc_score(y_val, y_proba)
        aucs.append(auc)

        fpr, tpr, _ = roc_curve(y_val, y_proba)
        tpr_interp = np.interp(mean_fpr, fpr, tpr)
        tpr_interp[0] = 0.0
        tprs.append(tpr_interp)

        print(f"Fold {i+1} AUC: {auc:.4f}")

    print(f"\n Mean AUC: {np.mean(aucs):.4f} ± {np.std(aucs):.4f}")
    return aucs, mean_fpr, tprs


In [None]:
def plot_avg_roc_cv(mean_fpr, tprs):
    mean_tpr = np.mean(tprs, axis=0)
    mean_tpr[-1] = 1.0
    plt.figure(figsize=(8, 6))
    plt.plot(mean_fpr, mean_tpr, label="Mean ROC (CV)", color='b')
    plt.plot([0, 1], [0, 1], 'k--', label="Random Guess")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title("Mean ROC Curve from XGBoost Cross-Validation")
    plt.legend()
    plt.grid(True)
    plt.show()


In [None]:
aucs, mean_fpr, tprs = xgb_cross_validation(X_train_xgb, y_train_xgb, n_splits=5)
plot_avg_roc_cv(mean_fpr, tprs)


In [None]:
# Use only numeric columns (same as XGBoost) and fill missing values
def prepare_lgbm_numeric(X):
    X = X.copy()
    X = X.select_dtypes(include=[np.number])
    return X.fillna(-999)  # Optional: LGBM can handle NaNs too

X_train_lgbm_restricted = prepare_lgbm_numeric(X_train)
X_val_lgbm_restricted = prepare_lgbm_numeric(X_val)


In [None]:
# Use the same LightGBM training function you already have
lgb_model_restricted = train_lightgbm(
    X_train_lgbm_restricted,
    y_train,
    X_val_lgbm_restricted,
    y_val
)

# Plot ROC and print AUC
plot_roc_auc(lgb_model_restricted, X_val_lgbm_restricted, y_val)


In [None]:
from sklearn.preprocessing import OrdinalEncoder

# === Fit encoder on training data ===
def encode_full_dataset_fit(X):
    X = X.copy()

    # Force all categorical columns to string first
    cat_cols = X.select_dtypes(include=['object', 'category']).columns
    X[cat_cols] = X[cat_cols].astype(str)

    # Encode categorical features
    encoder = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)
    X[cat_cols] = encoder.fit_transform(X[cat_cols])

    # Ensure all columns are float
    for col in X.columns:
        X[col] = pd.to_numeric(X[col], errors='coerce')

    return X.fillna(-999), encoder

# === Transform val/test data with fitted encoder ===
def encode_full_dataset_transform(X, encoder):
    X = X.copy()

    cat_cols = X.select_dtypes(include=['object', 'category']).columns
    X[cat_cols] = X[cat_cols].astype(str)
    X[cat_cols] = encoder.transform(X[cat_cols])

    for col in X.columns:
        X[col] = pd.to_numeric(X[col], errors='coerce')

    return X.fillna(-999)


In [None]:
X_train_encoded, encoder = encode_full_dataset_fit(X_train)
X_val_encoded = encode_full_dataset_transform(X_val, encoder)


In [None]:
def train_xgboost_full(X_train, y_train, X_val, y_val):
    model = xgb.XGBClassifier(
        objective='binary:logistic',
        learning_rate=0.05,
        n_estimators=300,
        max_depth=6,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42,
        use_label_encoder=False,
        tree_method='hist'
    )

    model.set_params(eval_metric='auc')  # for older XGBoost versions
    model.fit(X_train, y_train)
    
    return model


In [None]:
xgb_model_encoded = train_xgboost_full(X_train_encoded, y_train, X_val_encoded, y_val)

# Evaluate
y_proba = xgb_model_encoded.predict_proba(X_val_encoded)[:, 1]
fpr, tpr, _ = roc_curve(y_val, y_proba)
auc_score = roc_auc_score(y_val, y_proba)

# Plot ROC
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f"XGBoost (Encoded) AUC = {auc_score:.4f}", color='darkorange')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve – XGBoost with Encoded Categorical Features")
plt.legend()
plt.grid(True)
plt.show()

print(f" AUC Score with Encoded XGBoost: {auc_score:.4f}")


In [None]:
from lightgbm import LGBMClassifier, early_stopping, log_evaluation

# Minimal retraining setup
lgbm_sklearn_model = LGBMClassifier(
    objective='binary',
    learning_rate=0.05,
    num_leaves=64,
    n_estimators=1000,
    random_state=42
)

lgbm_sklearn_model.fit(
    X_train_lgbm_restricted, y_train,
    eval_set=[(X_val_lgbm_restricted, y_val)],
    eval_metric='auc',
    callbacks=[
        early_stopping(50),
        log_evaluation(100)
    ]
)


In [None]:
from sklearn.metrics import precision_recall_curve, average_precision_score
import matplotlib.pyplot as plt

# === Predict probabilities ===
y_proba_lgb = lgbm_sklearn_model.predict_proba(X_val_lgbm_restricted)[:, 1]
y_proba_xgb = xgb_model_encoded.predict_proba(X_val_encoded)[:, 1]

# === Compute PR curves ===
precision_lgb, recall_lgb, _ = precision_recall_curve(y_val, y_proba_lgb)
precision_xgb, recall_xgb, _ = precision_recall_curve(y_val, y_proba_xgb)
# === Compute AUCs ===
ap_lgb = average_precision_score(y_val, y_proba_lgb)
ap_xgb = average_precision_score(y_val, y_proba_xgb)

# === Plot PR curves ===
plt.figure(figsize=(8, 6))
plt.plot(recall_lgb, precision_lgb, label=f"LightGBM (AP = {ap_lgb:.4f})")
plt.plot(recall_xgb, precision_xgb, label=f"XGBoost (AP = {ap_xgb:.4f})")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve – Fraud Detection")
plt.grid(True)
plt.legend()
plt.show()

In [None]:
from sklearn.metrics import (
    f1_score, fbeta_score, classification_report,
    balanced_accuracy_score, confusion_matrix
)

# === Threshold for classification (you can tune this later)
threshold = 0.5

# === Convert probabilities to binary predictions
y_pred_lgb = (y_proba_lgb >= threshold).astype(int)
y_pred_xgb = (y_proba_xgb >= threshold).astype(int)

# === F1 and Fβ scores
f1_lgb = f1_score(y_val, y_pred_lgb)
f1_xgb = f1_score(y_val, y_pred_xgb)

fbeta_lgb = fbeta_score(y_val, y_pred_lgb, beta=2)
fbeta_xgb = fbeta_score(y_val, y_pred_xgb, beta=2)

# === Balanced Accuracy
bal_acc_lgb = balanced_accuracy_score(y_val, y_pred_lgb)
bal_acc_xgb = balanced_accuracy_score(y_val, y_pred_xgb)

# === Scikit-learn Reports
print(" LightGBM Classification Report:\n")
print(classification_report(y_val, y_pred_lgb, digits=4))

print(" XGBoost Classification Report:\n")
print(classification_report(y_val, y_pred_xgb, digits=4))

# === Summary Output
print("\n Summary Metrics:")

print(f"LightGBM")
print(f"  F1 Score          : {f1_lgb:.4f}")
print(f"  F2 Score (β=2)    : {fbeta_lgb:.4f}")
print(f"  Balanced Accuracy : {bal_acc_lgb:.4f}")

print(f"\n XGBoost")
print(f"  F1 Score          : {f1_xgb:.4f}")
print(f"  F2 Score (β=2)    : {fbeta_xgb:.4f}")
print(f"  Balanced Accuracy : {bal_acc_xgb:.4f}")


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, fbeta_score

thresholds = np.arange(0.05, 0.95, 0.01)
precisions = []
recalls = []
f2_scores = []

best_thresh = 0
best_f2 = 0

for t in thresholds:
    preds = (y_proba_lgb >= t).astype(int)
    p = precision_score(y_val, preds, zero_division=0)
    r = recall_score(y_val, preds)
    f2 = fbeta_score(y_val, preds, beta=2)
    
    precisions.append(p)
    recalls.append(r)
    f2_scores.append(f2)
    
    if f2 > best_f2:
        best_f2 = f2
        best_thresh = t

# Plotting
plt.figure(figsize=(10, 6))
plt.plot(thresholds, recalls, label="Recall", color="green")
plt.plot(thresholds, precisions, label="Precision", color="blue")
plt.plot(thresholds, f2_scores, label="F2 Score", color="purple")
plt.axvline(x=best_thresh, linestyle="--", color="red", label=f"Best Threshold = {best_thresh:.2f}")
plt.xlabel("Threshold")
plt.ylabel("Metric Value")
plt.title("🔍 Threshold Sweep – LightGBM")
plt.legend()
plt.grid(True)
plt.show()


In [None]:
from sklearn.metrics import classification_report

# Default threshold (0.5)
preds_default = (y_proba_lgb >= 0.5).astype(int)
recall_default = recall_score(y_val, preds_default)

# Best threshold
preds_best = (y_proba_lgb >= best_thresh).astype(int)
recall_best = recall_score(y_val, preds_best)

print(f" Default Threshold (0.5) Recall      : {recall_default:.4f}")
print(f" Best Threshold ({best_thresh:.2f}) Recall : {recall_best:.4f}")
print(f" F2 Score at Best Threshold          : {best_f2:.4f}")

print("\n Classification Report at Best Threshold:\n")
print(classification_report(y_val, preds_best, digits=4))


In [None]:
from sklearn.metrics import roc_auc_score, average_precision_score, confusion_matrix

# AUC and AUPRC (threshold-independent)
auc_lgb = roc_auc_score(y_val, y_proba_lgb)
auprc_lgb = average_precision_score(y_val, y_proba_lgb)

# Threshold-based predictions (you already chose 0.12)
threshold = 0.12
y_pred_thresh = (y_proba_lgb >= threshold).astype(int)

# Confusion matrix at threshold
cm = confusion_matrix(y_val, y_pred_thresh)

print(f" LightGBM Performance Summary (Threshold = {threshold:.2f})")
print(f"AUC       : {auc_lgb:.4f}")
print(f"AUPRC     : {auprc_lgb:.4f}")
print(f"Recall    : {recall_score(y_val, y_pred_thresh):.4f}")
print(f"Precision : {precision_score(y_val, y_pred_thresh):.4f}")
print(f"F1 Score  : {f1_score(y_val, y_pred_thresh):.4f}")
print(f"F2 Score  : {fbeta_score(y_val, y_pred_thresh, beta=2):.4f}")
print(f"Balanced Accuracy : {balanced_accuracy_score(y_val, y_pred_thresh):.4f}")

print("\n Confusion Matrix:")
print(cm)


In [None]:
pip install catboost


In [None]:
def prepare_for_catboost(X):
    X = X.copy()
    cat_cols = X.select_dtypes(include=['object', 'category']).columns

    # Replace NaNs with string 'missing' for object-type columns
    for col in cat_cols:
        X[col] = X[col].astype(str).fillna("missing")

    return X, cat_cols.tolist()


In [None]:
X_train_cb, cat_features_cb = prepare_for_catboost(X_train)
X_val_cb, _ = prepare_for_catboost(X_val)


In [None]:
from catboost import CatBoostClassifier, Pool

train_pool = Pool(X_train_cb, label=y_train, cat_features=cat_features_cb)
val_pool = Pool(X_val_cb, label=y_val, cat_features=cat_features_cb)

cat_model = CatBoostClassifier(
    iterations=1000,
    learning_rate=0.05,
    depth=6,
    eval_metric='AUC',
    random_seed=42,
    early_stopping_rounds=50,
    verbose=100
)

cat_model.fit(train_pool, eval_set=val_pool)


In [None]:
from sklearn.metrics import roc_auc_score, roc_curve
import matplotlib.pyplot as plt

y_proba_cb = cat_model.predict_proba(X_val_cb)[:, 1]
fpr_cb, tpr_cb, _ = roc_curve(y_val, y_proba_cb)
auc_cb = roc_auc_score(y_val, y_proba_cb)

plt.figure(figsize=(8, 6))
plt.plot(fpr_cb, tpr_cb, label=f"CatBoost AUC = {auc_cb:.4f}", color='purple')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve – CatBoost")
plt.legend()
plt.grid(True)
plt.show()

print(f" Final CatBoost AUC: {auc_cb:.4f}")


In [None]:
from sklearn.metrics import precision_recall_curve, average_precision_score
import matplotlib.pyplot as plt

# === Predict probabilities ===
y_proba_lgb = lgbm_sklearn_model.predict_proba(X_val_lgbm_restricted)[:, 1]
y_proba_xgb = xgb_model_encoded.predict_proba(X_val_encoded)[:, 1]
y_proba_cat = cat_model.predict_proba(X_val_cb)[:, 1]

# === Compute PR curves ===
precision_lgb, recall_lgb, _ = precision_recall_curve(y_val, y_proba_lgb)
precision_xgb, recall_xgb, _ = precision_recall_curve(y_val, y_proba_xgb)
precision_cat, recall_cat, _ = precision_recall_curve(y_val, y_proba_cat)

# === Compute AUCs ===
ap_lgb = average_precision_score(y_val, y_proba_lgb)
ap_xgb = average_precision_score(y_val, y_proba_xgb)
ap_cat = average_precision_score(y_val, y_proba_cat)

# === Plot PR curves ===
plt.figure(figsize=(8, 6))
plt.plot(recall_lgb, precision_lgb, label=f"LightGBM (AP = {ap_lgb:.4f})")
plt.plot(recall_xgb, precision_xgb, label=f"XGBoost (AP = {ap_xgb:.4f})")
plt.plot(recall_cat, precision_cat, label=f"CatBoost (AP = {ap_cat:.4f})")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve – Fraud Detection")
plt.grid(True)
plt.legend()
plt.show()
