# Fraud Detection with MLflow Tracking

This notebook demonstrates how to use MLflow to track machine learning experiments for fraud detection using XGBoost and LightGBM models.

In [2]:
# Import required libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, average_precision_score, classification_report
import xgboost as xgb
import lightgbm as lgb
import sklearn
import mlflow
import mlflow.xgboost
import mlflow.lightgbm
from datetime import datetime

In [3]:
# Set MLflow tracking URI (local tracking)
mlflow.set_tracking_uri("file:./mlruns")

# Create a new experiment with timestamp
experiment_name = f"fraud_detection_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
mlflow.set_experiment(experiment_name)

print(f"Created new experiment: {experiment_name}")

2025/05/28 17:43:31 INFO mlflow.tracking.fluent: Experiment with name 'fraud_detection_20250528_174331' does not exist. Creating a new experiment.


Created new experiment: fraud_detection_20250528_174331


In [5]:
# Load the dataset
data = pd.read_csv("data/creditcard.csv")
print("Dataset shape:", data.shape)
data.head()

Dataset shape: (284807, 31)


Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [6]:
# Prepare features and target
X = data.drop(['Class', 'Time'], axis=1)
y = data['Class']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print("Training set shape:", X_train.shape)
print("Test set shape:", X_test.shape)
print("\nClass distribution in training set:")
print(y_train.value_counts(normalize=True))

Training set shape: (227845, 29)
Test set shape: (56962, 29)

Class distribution in training set:
Class
0    0.998271
1    0.001729
Name: proportion, dtype: float64


In [7]:
with mlflow.start_run(run_name="xgboost_baseline"):
    # Log parameters
    xgb_params = {
        "n_estimators": 200,
        "max_depth": 6,
        "learning_rate": 0.1,
        "subsample": 0.8,
        "colsample_bytree": 0.8,
        "scale_pos_weight": y_train.value_counts()[0] / y_train.value_counts()[1],
        "eval_metric": 'logloss',
        "early_stopping_rounds": 10,
        "random_state": 42
    }
    mlflow.log_params(xgb_params)
    
    # Initialize and train model
    xgb_model = xgb.XGBClassifier(**xgb_params)
    
    trained_xgb_model = xgb_model.fit(
        X=X_train,
        y=y_train,
        eval_set=[(X_test, y_test)],
        verbose=20
    )
    
    # Log model
    mlflow.xgboost.log_model(trained_xgb_model, "xgboost_model")
    
    # Evaluate and log metrics
    y_pred_proba_xgb = xgb_model.predict_proba(X_test)[:, 1]
    roc_auc = roc_auc_score(y_test, y_pred_proba_xgb)
    pr_auc = average_precision_score(y_test, y_pred_proba_xgb)
    
    mlflow.log_metric("roc_auc", roc_auc)
    mlflow.log_metric("pr_auc", pr_auc)
    
    print("====== XGBoost Performance ======")
    print("ROC AUC:", roc_auc)
    print("PR AUC:", pr_auc)
    print("\nClassification Report:")
    print(classification_report(y_test, xgb_model.predict(X_test), digits=4))

[0]	validation_0-logloss:0.60923
[20]	validation_0-logloss:0.08681
[40]	validation_0-logloss:0.02376
[60]	validation_0-logloss:0.01097
[80]	validation_0-logloss:0.00678
[100]	validation_0-logloss:0.00424
[120]	validation_0-logloss:0.00336
[140]	validation_0-logloss:0.00310
[160]	validation_0-logloss:0.00299
[179]	validation_0-logloss:0.00296


  self.get_booster().save_model(fname)


ROC AUC: 0.974786960366589
PR AUC: 0.8728517133261517

Classification Report:
              precision    recall  f1-score   support

           0     0.9997    0.9997    0.9997     56864
           1     0.8454    0.8367    0.8410        98

    accuracy                         0.9995     56962
   macro avg     0.9225    0.9182    0.9204     56962
weighted avg     0.9995    0.9995    0.9995     56962



In [8]:
with mlflow.start_run(run_name="lightgbm_baseline"):
    # Prepare LightGBM datasets
    lgb_train = lgb.Dataset(X_train, y_train)
    lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
    
    # Define parameters
    lgb_params = {
        'objective': 'binary',
        'metric': ['auc', 'binary_logloss'],
        'is_unbalance': True,
        'boosting_type': 'gbdt',
        'learning_rate': 0.01,
        'num_leaves': 60,
        'max_depth': -1,
        'feature_fraction': 0.8,
        'bagging_fraction': 0.8,
        'bagging_freq': 5,
        'verbose': -1,
        'seed': 42
    }
    
    # Log parameters
    mlflow.log_params(lgb_params)
    
    # Train model
    lgb_model = lgb.train(
        lgb_params,
        lgb_train,
        num_boost_round=5000,
        valid_sets=[lgb_train, lgb_eval],
        callbacks=[lgb.early_stopping(stopping_rounds=50, verbose=True)]
    )
    
    # Log model
    mlflow.lightgbm.log_model(lgb_model, "lightgbm_model")
    
    # Evaluate and log metrics
    y_pred_proba_lgb = lgb_model.predict(X_test, num_iteration=lgb_model.best_iteration)
    roc_auc = roc_auc_score(y_test, y_pred_proba_lgb)
    pr_auc = average_precision_score(y_test, y_pred_proba_lgb)
    
    mlflow.log_metric("roc_auc", roc_auc)
    mlflow.log_metric("pr_auc", pr_auc)
    mlflow.log_metric("best_iteration", lgb_model.best_iteration)
    
    print("=== LightGBM Performance ===")
    print("ROC AUC:", roc_auc)
    print("PR AUC:", pr_auc)
    print(f"Best iteration: {lgb_model.best_iteration}")
    print("\nClassification Report:")
    print(classification_report(y_test, (y_pred_proba_lgb > 0.5).astype(int), digits=4))

Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[115]	training's auc: 0.999904	training's binary_logloss: 0.0031644	valid_1's auc: 0.986522	valid_1's binary_logloss: 0.00520469




=== LightGBM Performance ===
ROC AUC: 0.9865215465758616
PR AUC: 0.6550093424782732
Best iteration: 115

Classification Report:
              precision    recall  f1-score   support

           0     0.9997    0.9993    0.9995     56864
           1     0.6891    0.8367    0.7558        98

    accuracy                         0.9991     56962
   macro avg     0.8444    0.9180    0.8776     56962
weighted avg     0.9992    0.9991    0.9991     56962

