In [1]:
import os
import shap
import json
import mlflow
import mlflow.xgboost

import numpy as np
import pandas as pd
import xgboost as xgb
import seaborn as sns
import matplotlib.pyplot as plt

from scipy.stats import randint, uniform

from mlflow import MlflowClient
from mlflow.models import infer_signature, make_metric, MetricThreshold

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectFromModel
from sklearn.preprocessing import StandardScaler, OneHotEncoder, MinMaxScaler
from sklearn.datasets import load_iris, load_diabetes, make_classification, load_wine, load_digits, load_breast_cancer, fetch_openml
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, ParameterGrid, cross_val_score
from sklearn.metrics import (mean_absolute_error, 
                             mean_squared_error, 
                             r2_score, 
                             accuracy_score, 
                             precision_score, 
                             recall_score, 
                             f1_score, 
                             roc_auc_score, 
                             roc_curve, 
                             precision_recall_curve, 
                             confusion_matrix,
                             average_precision_score, 
                             classification_report)

# Quickstart with MLflow + XGBoost

## Load and prepare the dataset
We will train a simple multi-class classification model for Iris flowers using the iris dataset. Let's load the dataset using load_iris() into a pandas Dataframe and take a look at the data.

In [2]:
iris_df = load_iris(as_frame=True).frame
iris_df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [3]:
# Split into 80% training and 20% testing
train_df, test_df = train_test_split(iris_df, test_size=0.2, random_state=42)
train_df.shape, test_df.shape

((120, 5), (30, 5))

In [4]:
# Separate the target column for the training set
train_dataset = mlflow.data.from_pandas(train_df, name="train")
X_train = train_dataset.df.drop(["target"], axis=1)
y_train = train_dataset.df[["target"]]

dtrain = xgb.DMatrix(X_train, label=y_train)

In [5]:
# Separate the target column for the testing set
test_dataset = mlflow.data.from_pandas(test_df, name="test")
X_test = test_dataset.df.drop(["target"], axis=1)
y_test = test_dataset.df[["target"]]

dtest = xgb.DMatrix(X_test, label=y_test)

In [87]:
mlflow.login()

2026/01/05 19:46:47 INFO mlflow.utils.credentials: Successfully connected to MLflow hosted tracking server! Host: https://dbc-e4fb7400-b637.cloud.databricks.com.


In [7]:
mlflow.set_tracking_uri("databricks")
mlflow.set_experiment("/Users/debajyoti.das.bookworm@gmail.com/mlflow-xgboost-quickstart")

<Experiment: artifact_location='dbfs:/databricks/mlflow-tracking/1996672001009152', creation_time=1767120565824, experiment_id='1996672001009152', last_update_time=1767383883186, lifecycle_stage='active', name='/Users/debajyoti.das.bookworm@gmail.com/mlflow-xgboost-quickstart', tags={'mlflow.experiment.sourceName': '/Users/debajyoti.das.bookworm@gmail.com/mlflow-xgboost-quickstart',
 'mlflow.experimentKind': 'custom_model_development',
 'mlflow.experimentType': 'MLFLOW_EXPERIMENT',
 'mlflow.ownerEmail': 'debajyoti.das.bookworm@gmail.com',
 'mlflow.ownerId': '8680600426295472'}>

## Logging with MLflow
MLflow has powerful tracking APIs that let's us log runs and models along with their associated metadata such as parameters and metrics. Let's train and evaluate our model.

In [8]:
# Define and log the parameters for our model
params = {"objective": "multi:softprob",
          "num_class": len(set(train_df["target"])),
          "max_depth": 8,
          "learning_rate": 0.05,
          "subsample": 0.9,
          "colsample_bytree": 0.9,
          "min_child_weight": 1,
          "gamma": 0,
          "reg_alpha": 0,
          "reg_lambda": 1,
          "random_state": 42}

In [9]:
# Start a training run
with mlflow.start_run() as run:
   training_config = {"num_boost_round": 200,"early_stopping_rounds": 20}
   mlflow.log_params(params)
   mlflow.log_params(training_config)

   # Custom evaluation tracking
   eval_results = {}
   
   # Train model, on iris data, with custom callback
   model = xgb.train(params = params,
                     dtrain = dtrain,
                     num_boost_round = training_config["num_boost_round"],
                     evals = [(dtrain, "train"), (dtest, "test")],
                     early_stopping_rounds = training_config["early_stopping_rounds"],
                     evals_result = eval_results,
                     verbose_eval = False,)

   # Log training history to the run
   for epoch, (train_metrics, test_metrics) in enumerate(zip(eval_results["train"]["mlogloss"], eval_results["test"]["mlogloss"])):
    mlflow.log_metrics({"train_logloss": train_metrics, "test_logloss": test_metrics}, step=epoch)
    
   # Final evaluation
   y_pred_proba = model.predict(dtest)
   y_pred = np.argmax(y_pred_proba, axis=1)
   final_metrics = {"accuracy": accuracy_score(y_test, y_pred),
                    "roc_auc": roc_auc_score(y_test, y_pred_proba, multi_class="ovr"),}
   mlflow.log_metrics(final_metrics, step=model.best_iteration)
        
   # Log the model at the best iteration, linked with all params and metrics
   model_info = mlflow.xgboost.log_model(xgb_model=model,
                                         name="xgboost_model",
                                         signature=infer_signature(X_train, y_pred_proba),
                                         input_example=X_train[:5],
                                         step=model.best_iteration)

üèÉ View run victorious-jay-92 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/a1a74aec37984bda9bcc797ab2fbd6dd
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


In [10]:
logged_model = mlflow.get_logged_model(model_info.model_id)
logged_model, logged_model.metrics, logged_model.params

(LoggedModel(artifact_location='dbfs:/databricks/mlflow-tracking/1996672001009152/logged_models/m-a93c56ecb093462db808d128e768e4f4/artifacts', creation_timestamp=1767385102982, experiment_id='1996672001009152', last_updated_timestamp=1767385114761, model_id='m-a93c56ecb093462db808d128e768e4f4', model_type='', model_uri='models:/m-a93c56ecb093462db808d128e768e4f4', name='xgboost_model', source_run_id='a1a74aec37984bda9bcc797ab2fbd6dd', status=<LoggedModelStatus.READY: 'READY'>, status_message=''),
 [<Metric: dataset_digest=None, dataset_name=None, key='accuracy', model_id='m-a93c56ecb093462db808d128e768e4f4', run_id='a1a74aec37984bda9bcc797ab2fbd6dd', step=199, timestamp=1767385102541, value=1.0>,
  <Metric: dataset_digest=None, dataset_name=None, key='roc_auc', model_id='m-a93c56ecb093462db808d128e768e4f4', run_id='a1a74aec37984bda9bcc797ab2fbd6dd', step=199, timestamp=1767385102541, value=1.0>,
  <Metric: dataset_digest=None, dataset_name=None, key='test_logloss', model_id='m-a93c56ec

# Learning XGBoost + MLflow Comprehensively
In this comprehensive guide, we'll explore how to use XGBoost with MLflow for experiment tracking, model management, and production deployment. We'll cover both the native XGBoost API and scikit-learn compatible interface, from basic autologging to advanced distributed training patterns.

## Quick Start with Autologging
The fastest way to get started is with MLflow's XGBoost autologging. Enable comprehensive experiment tracking with a single line. The simple example below automatically logs all XGBoost parameters and training configuration, training and validation metrics for each boosting round, feature importance plots and JSON artifacts, the trained model with proper serialization, and early stopping metrics and best iteration information.

In [11]:
# Enable autologging for XGBoost
mlflow.xgboost.autolog()

In [12]:
# Load sample data
diabetes_data = load_diabetes(as_frame=True).frame
train_df, test_df = train_test_split(diabetes_data, test_size=0.2, random_state=42)

# Prepare training data
X_train = train_df.drop(columns=["target"], axis=1)
y_train = train_df["target"]

# Prepare testing data
X_test = test_df.drop(columns=["target"], axis=1)
y_test = test_df["target"]

# Prepare DMatrix format
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

In [13]:
# Define training parameters
params = {"objective": "reg:squarederror",
          "max_depth": 6,
          "learning_rate": 0.1,
          "subsample": 0.8,
          "colsample_bytree": 0.8,
          "random_state": 42}

In [14]:
# Train model on diabetes data - MLflow automatically logs everything
with mlflow.start_run():
    model = xgb.train(params=params,
                      dtrain=dtrain,
                      num_boost_round=100,
                      evals=[(dtrain, "train"), (dtest, "test")],
                      early_stopping_rounds=10,
                      verbose_eval=False)

    print(f"Best iteration: {model.best_iteration}")
    print(f"Best score: {model.best_score}")



Best iteration: 26
Best score: 54.86431383570459
üèÉ View run gentle-loon-650 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/08028890e5554aa78bd8aa8941238a18
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


MLflow's XGBoost autologging behavior can also be customized to fit our specific workflow needs

In [15]:
# Fine-tune autologging behavior
mlflow.xgboost.autolog(importance_types=["weight", "gain", "cover"],  # Types of importance to log,
                       log_input_examples=True,  # Include input examples in logged models,
                       log_model_signatures=True,  # Include model signatures,
                       log_models=True,  # Log trained models,
                       log_datasets=True,  # Log dataset information,
                       model_format="json",  # Use JSON format for better compatibility,
                       registered_model_name="XGBoostModel-Predict_Diabetes",  # Auto-register models,
                       extra_tags={"team": "data-science", "project": "predict-diabetes"},)

In [16]:
# Train a new model on diabetes data - MLflow automatically logs all the above metrics that we mentioned
with mlflow.start_run():
    model = xgb.train(params=params,
                      dtrain=dtrain,
                      num_boost_round=100,
                      evals=[(dtrain, "train"), (dtest, "test")],
                      early_stopping_rounds=10,
                      verbose_eval=False)

    print(f"Best iteration: {model.best_iteration}")
    print(f"Best score: {model.best_score}")



Best iteration: 26
Best score: 54.86431383570459
üèÉ View run intrigued-grub-144 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/e74afa8bb0f242c4ab0d2f36919a4cef
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


These configuration options give us fine-grained control over the autologging behavior. 
- **Importance types** controls which feature importance metrics are captured. 
- **Dataset logging** tracks the data used for training and evaluation. 
- **Input examples** and **signatures** are crucial for production deployment. 
- **Extra tags** help organize experiments across teams and projects.

## Understanding XGBoost Autologging
### What Gets Logged
MLflow's XGBoost autologging captures comprehensive information about your gradient boosting process automatically:

**Category**	            **Information Captured**
- *Parameters*          |   All booster parameters, training configuration, callback settings
- *Metrics*             |   Training/validation metrics per iteration, early stopping metrics
- *Feature Importance*  |   Weight, gain, cover, and total_gain importance with visualizations
- *Artifacts*           |   Trained model, feature importance plots, JSON importance data
The autologging system is designed to be comprehensive yet non-intrusive. It captures everything you need for reproducibility without requiring changes to your existing XGBoost code.

### Native vs Scikit-learn API
XGBoost offers two main interfaces, and MLflow supports both seamlessly:

In [17]:
# Scikit-learn API - Familiar interface with sklearn integration
mlflow.sklearn.autolog()  # Note: Use sklearn autolog for XGBoost sklearn API

model = xgb.XGBRegressor(n_estimators=100, max_depth=6) # Using sklearn API for XGBoost
model.fit(X_train, y_train)

2026/01/02 20:18:57 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '62188ec2dddc49d5828b22efd0b854de', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current xgboost workflow


üèÉ View run sneaky-flea-468 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/62188ec2dddc49d5828b22efd0b854de
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


#### Choosing the Right API:

- **Native XGBoost API** - Use when you need maximum performance with direct access to all XGBoost optimizations, advanced features like custom objectives and evaluation metrics, memory efficiency with fine-grained control over data loading, or competition settings where every bit of performance matters.

- **Scikit-learn API** - Use when you need pipeline integration with sklearn preprocessing and feature engineering, hyperparameter tuning using GridSearchCV or RandomizedSearchCV, team familiarity with sklearn patterns, or rapid prototyping with familiar interfaces.

## Logging Approaches
### Manual Logging
For complete control over experiment tracking, you can manually instrument your XGBoost training:

In [18]:
# Generate sample data
X, y = make_classification(n_samples=10000, n_features=20, n_classes=2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

- #### colsample_bytree:
    - is a hyperparameter used for feature subsampling. It defines the **fraction of features (columns) to be randomly selected when constructing each new tree**. 
    - It is analogous to the **max_features** parameter in *Random Forest*, which also limits the number of features considered for splits to increase model diversity. 

- #### subsample:
    - is a hyperparameter used for dataset subsampling. It defines the **fraction of the data (rows) to be randomly selected when constructing each new tree**. 
    - Subsampling occurs **once per boosting iteration** (for every new tree). In each round, a fresh random subset is drawn from the original training pool. 
    - Practitioners commonly use values between **0.5 and 0.9**. Setting it too low (e.g., **< 0.5**) can lead to **underfitting** because the trees may not have enough data to learn meaningful patterns.

- #### min_child_weight
    - is a regularization parameter used to control tree depth and prevent overfitting by requiring a **minimum "amount of evidence" before a split can be made**. It is formally defined as **the minimum sum of instance weights (Hessian) required in a child node**. If a potential split would result in a leaf node where the sum of its instances' Hessians is *less* than the min_child_weight value, the tree stops growing that branch (it will not partition further). **Range** is between **0 to infinity**. The default is **1**.
    - To **reduce overfitting**, practitioners **increase** the value. This makes the algorithm more conservative, preventing it from creating highly specific branches for a small number of samples. Commonly tuned between **1 and 20**, though very large datasets may require much higher values (e.g., hundreds) to be effective.

In [19]:
params = {"objective": "binary:logistic",
            "max_depth": 8,
            "learning_rate": 0.05,
            "subsample": 0.9,           # Only 90% of data considered per tree
            "colsample_bytree": 0.9,    # Only 90% of features considered per tree
            "min_child_weight": 1,
            "gamma": 0,
            "reg_alpha": 0,
            "reg_lambda": 1,
            "random_state": 42,}

training_config = {"num_boost_round": 500,"early_stopping_rounds": 50,}

# Custom evaluation tracking
eval_results = {}

In [20]:
# Prepare data
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

In [21]:
# Manual logging approach
with mlflow.start_run():

    # Log all parameters
    mlflow.log_params(params)
    mlflow.log_params(training_config)

    # Train model, on make_classification data, with custom callback
    model = xgb.train(params=params,
                      dtrain=dtrain,
                      num_boost_round=training_config["num_boost_round"],
                      evals=[(dtrain, "train"), (dtest, "test")],
                      early_stopping_rounds=training_config["early_stopping_rounds"],
                      evals_result=eval_results,
                      verbose_eval=False,)
    
    # Log training history
    for epoch, (train_metrics, test_metrics) in enumerate(zip(eval_results["train"]["logloss"], eval_results["test"]["logloss"])):
        mlflow.log_metrics({"train_logloss": train_metrics, "test_logloss": test_metrics}, step=epoch)

    # Final evaluation
    y_pred_proba = model.predict(dtest)
    y_pred = (y_pred_proba > 0.5).astype(int)

    final_metrics = {"accuracy": accuracy_score(y_test, y_pred),
                     "roc_auc": roc_auc_score(y_test, y_pred_proba),
                     "best_iteration": model.best_iteration,
                     "best_score": model.best_score}

    mlflow.log_metrics(final_metrics)

    # Log the model, at the best iteration, linked with all params and metrics, with signature
    signature = infer_signature(X_train, y_pred_proba)
    model_info = mlflow.xgboost.log_model(xgb_model=model,
                                          name="xgboost_manl_logg_model",
                                          signature=signature,
                                          input_example=X_train[:5],
                                          step=model.best_iteration)
    print(f"Model logged in run {mlflow.active_run().info.run_id}")  



Model logged in run b0c1c917cb1449f1ac47d6cab9e11c46
üèÉ View run rebellious-pug-79 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/b0c1c917cb1449f1ac47d6cab9e11c46
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


### Scikit-learn Integration
XGBoost's scikit-learn compatible estimators work seamlessly with MLflow's sklearn autologging:

In [22]:
# Load data
wine = load_wine(as_frame=True).frame
train_df, test_df = train_test_split(wine, test_size=0.2, random_state=42)

# Separate the target column for the training set
X_train = train_df.drop(["target"], axis=1)
y_train = train_df["target"]

# Separate the target column for the testing set
X_test = test_df.drop(["target"], axis=1)
y_test = test_df["target"]

In [23]:
# Enable sklearn autologging for XGBoost sklearn estimators
mlflow.sklearn.autolog()

with mlflow.start_run(run_name="XGBoost Sklearn API"):
    # XGBoost with scikit-learn interface
    model = xgb.XGBClassifier(n_estimators=100,
                              max_depth=6,
                              learning_rate=0.1,
                              subsample=0.8,
                              colsample_bytree=0.8,
                              random_state=42,
                              eval_metric="mlogloss",
                              early_stopping_rounds=10,)
    
    # Fit with evaluation set for early stopping
    # Cross-validation scores are automatically logged
    cv_scores = cross_val_score(model, 
                                X_train, 
                                y_train, 
                                cv=5, 
                                params = {'eval_set':[(X_test, y_test)]},           # Need to pass eval_set here for early stopping as this param is expected in model.fit() when model is being trained independently
                                n_jobs=-1,) 
    print(f"CV Score: {cv_scores.mean():.3f} (+/- {cv_scores.std() * 2:.3f})")

[0]	validation_0-mlogloss:0.98988
[0]	validation_0-mlogloss:0.99115
[0]	validation_0-mlogloss:0.97918
[0]	validation_0-mlogloss:0.98400
[0]	validation_0-mlogloss:0.99994
[1]	validation_0-mlogloss:0.89051
[1]	validation_0-mlogloss:0.89749
[1]	validation_0-mlogloss:0.88274
[1]	validation_0-mlogloss:0.90260
[2]	validation_0-mlogloss:0.79931
[2]	validation_0-mlogloss:0.81472
[2]	validation_0-mlogloss:0.80332
[1]	validation_0-mlogloss:0.88133
[2]	validation_0-mlogloss:0.82798
[3]	validation_0-mlogloss:0.74285
[3]	validation_0-mlogloss:0.72956
[3]	validation_0-mlogloss:0.73033
[3]	validation_0-mlogloss:0.75510
[2]	validation_0-mlogloss:0.79516
[4]	validation_0-mlogloss:0.69776
[4]	validation_0-mlogloss:0.68215
[4]	validation_0-mlogloss:0.67358
[3]	validation_0-mlogloss:0.72483
[4]	validation_0-mlogloss:0.69330
[5]	validation_0-mlogloss:0.63860
[5]	validation_0-mlogloss:0.62426
[5]	validation_0-mlogloss:0.61684
[4]	validation_0-mlogloss:0.66305
[5]	validation_0-mlogloss:0.63711
[6]	validation

#### Pipeline integration with Sckit-learn API

In [24]:
# # Create preprocessing pipeline
# preprocessor = ColumnTransformer(transformers=[("num", StandardScaler(), [0, 1, 2, 3]),
#                                                ("cat", OneHotEncoder(drop="first"), [4, 5]),
#                                                ])

# # Complete ML pipeline
# pipeline = Pipeline([("preprocessor", preprocessor),
#                      ("classifier", xgb.XGBClassifier(n_estimators=100, random_state=42))])

# with mlflow.start_run():
#     # Entire pipeline is logged including preprocessing steps
#     pipeline.fit(X_train, y_train)

#     # Pipeline scoring is automatically captured
#     train_score = pipeline.score(X_train, y_train)
#     test_score = pipeline.score(X_test, y_test)

## Hyperparameter Optimization
### GridSearchCV
MLflow provides exceptional support for XGBoost hyperparameter optimization, automatically creating organized child runs for parameter search experiments:

In [25]:
# Load data
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size=0.2, random_state=42)

In [26]:
# Enable autologging with hyperparameter tuning support
mlflow.sklearn.autolog(max_tuning_runs=10)  # Track top 10 parameter combinations

In [27]:
# Define parameter grid
param_grid = {"n_estimators": [50, 100, 200],
              "max_depth": [3, 6, 9],
              "learning_rate": [0.01, 0.1, 0.2],
              "subsample": [0.8, 0.9, 1.0],
              "colsample_bytree": [0.8, 0.9, 1.0],}

In [28]:
with mlflow.start_run(run_name="XGBoost Grid Search"):
    # Create base model
    xgb_model = xgb.XGBClassifier(random_state=42)

    # Grid search, with cross-validation, on 'digits' data
    grid_search = GridSearchCV(xgb_model, param_grid, cv=5, scoring="roc_auc_ovr", n_jobs=-1, verbose=1)

    grid_search.fit(X_train, y_train)

    # Best parameters and scores are automatically logged
    print(f"Best parameters: {grid_search.best_params_}")
    print(f"Best CV score: {grid_search.best_score_:.3f}")

    # Evaluate on test set
    test_score = grid_search.score(X_test, y_test)
    print(f"Test score: {test_score:.3f}")

Fitting 5 folds for each of 243 candidates, totalling 1215 fits


2026/01/02 20:20:50 INFO mlflow.sklearn.utils: Logging the 10 best runs, 233 runs will be omitted.


üèÉ View run unleashed-panda-849 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/6258955d92e34f79bea93176f87e1f3c
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152
üèÉ View run delightful-mink-555 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/5bb98eeb1bc94f7cabeee0aa840fb528
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152
üèÉ View run orderly-snipe-167 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/947759d039d44b67bc113eaabd3f7274
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152
üèÉ View run luxuriant-ox-776 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/90247b29522a46739f1b459acb61fcf4
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/m

MLflow automatically creates a parent run containing the overall search results and child runs for each parameter combination, making it easy to analyze which parameters work best.

### RandomizedSearchCV
For more efficient hyperparameter exploration, especially with large parameter spaces, RandomizedSearchCV provides a great alternative:

In [29]:
# Define parameter distributions for more efficient exploration
param_distributions = {"n_estimators": randint(50, 300),
                       "max_depth": randint(5, 20),
                       "min_child_weight": randint(1, 10),
                       "learning_rate": uniform(0.01, 0.3),
                       "subsample": uniform(0.6, 0.4),
                       "colsample_bytree": uniform(0.6, 0.4),
                       "gamma": uniform(0, 0.5),
                       "reg_alpha": uniform(0, 1),
                       "reg_lambda": uniform(0, 1),}

In [30]:
with mlflow.start_run(run_name="XGBoost Randomized Search"):
    xgb_model = xgb.XGBClassifier(random_state=42)
    random_search = RandomizedSearchCV(xgb_model,
                                       param_distributions,
                                       n_iter=50,  # Try 50 random combinations
                                       cv=5,
                                       scoring="roc_auc_ovr",
                                       random_state=42,
                                       n_jobs=-1,)

    random_search.fit(X_train, y_train)

2026/01/02 20:21:16 INFO mlflow.sklearn.utils: Logging the 10 best runs, 40 runs will be omitted.


üèÉ View run overjoyed-toad-939 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/77a7135337bd4d4c9b7005b667df3004
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152
üèÉ View run overjoyed-moose-918 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/b86aa50338a44754a32cc1e8ce5ff4a7
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152
üèÉ View run popular-skunk-825 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/cb944aa6e30b4833ac18b0c390b08bf0
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152
üèÉ View run thundering-fox-467 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/e48bcb4c7981459b95cb63f33b0a0f91
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/

The max_tuning_runs parameter in autolog controls how many of the best parameter combinations get their own child runs, helping you focus on the most promising results.

## Feature Importance Analysis
### Multiple Importance Types
XGBoost provides multiple types of feature importance, and MLflow captures them all automatically:

In [31]:
# Load data
breast_cancer = load_breast_cancer(as_frame=True).frame
train_df, test_df = train_test_split(breast_cancer, test_size=0.2, random_state=42)

# Separate the target column for the training set
X_train = train_df.drop(["target"], axis=1)
y_train = train_df["target"]

# Prepare data in XGBoost DMatrix format
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

In [32]:
def comprehensive_feature_importance_analysis(model, feature_names=None):
    """Analyze and log comprehensive feature importance."""

    importance_types = ["weight", "gain", "cover", "total_gain"]

    with mlflow.start_run(run_name="Feature Importance Analysis"):
        for imp_type in importance_types:
            # Get importance scores
            importance = model.get_score(importance_type=imp_type)

            if not importance:
                continue

            # Sort features by importance
            sorted_features = sorted(importance.items(), key=lambda x: x[1], reverse=True)

            # Log individual feature scores
            for feature, score in sorted_features[:20]:  # Top 20 features
                mlflow.log_metric(f"{imp_type}_{feature}", score)

            # Create visualization
            features, scores = zip(*sorted_features[:20])

            plt.figure(figsize=(10, 8))
            sns.barplot(x=list(scores), y=list(features))
            plt.title(f"Top 20 Feature Importance ({imp_type.title()})")
            plt.xlabel("Importance Score")
            plt.tight_layout()

            # Save and log plot
            plot_filename = f"feature_importance_{imp_type}.png"
            plt.savefig(plot_filename, dpi=300, bbox_inches="tight")
            mlflow.log_artifact(plot_filename)
            plt.close()

            # Log importance as JSON artifact
            json_filename = f"feature_importance_{imp_type}.json"
            with open(json_filename, "w") as f:
                json.dump(importance, f, indent=2)
            mlflow.log_artifact(json_filename)


# Usage
model = xgb.train(params, dtrain, num_boost_round=100)
comprehensive_feature_importance_analysis(model, feature_names=X_train.columns)

2026/01/02 20:21:19 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '406d3e4fadb8433db5fa568388e4521d', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current xgboost workflow


üèÉ View run gregarious-hawk-382 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/406d3e4fadb8433db5fa568388e4521d
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152
üèÉ View run Feature Importance Analysis at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/bfc04c0451574929bc7fdc3d9467f51e
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


### Feature Selection
Use XGBoost feature importance for automated feature selection:

In [33]:
def feature_selection_pipeline(train_features, train_target, test_features, test_target):
    """Pipeline with XGBoost-based feature selection."""

    with mlflow.start_run(run_name="Feature Selection Pipeline"):
        # Step 1: Train initial model for feature selection
        selector_model = xgb.XGBClassifier(n_estimators=50, max_depth=6, random_state=42)
        selector_model.fit(train_features, train_target)

        # Evaluate performance
        original_train_score = selector_model.score(train_features, train_target)
        original_test_score = selector_model.score(test_features, test_target)

        # Step 2: Feature selection based on importance
        selector = SelectFromModel(selector_model,
                                   threshold="median",  # Select features above median importance
                                   prefit=True,)
        
        selected_train_features = selector.transform(train_features)
        selected_test_features = selector.transform(test_features)

        # Log feature selection results
        selected_features = selector.get_support()
        n_selected = sum(selected_features)

        mlflow.log_metrics(
            {
                                "original_features": train_features.shape[1],
                                "original_train_accuracy": original_train_score,
                                "original_test_accuracy": original_test_score,
                                "selected_features": n_selected,
                                "feature_reduction_ratio": n_selected / train_features.shape[1],
                            }
                        )

        # Step 3: Train final model on selected features
        final_model = xgb.XGBClassifier(n_estimators=50, max_depth=6, random_state=42)
        final_model.fit(selected_train_features, train_target)

        # Evaluate performance
        train_score = final_model.score(selected_train_features, train_target)
        test_score = final_model.score(selected_test_features, test_target)

        mlflow.log_metrics(
                                {
                                    "train_accuracy_selected": train_score,
                                    "test_accuracy_selected": test_score,
                                }
                            )

        # Log the final model and selector
        model_signature = infer_signature(selected_train_features, final_model.predict(selected_test_features))
        selector_signature = infer_signature(train_features, selector_model.predict(train_features))

        mlflow.sklearn.log_model(final_model, name="final_model", signature=model_signature)
        mlflow.sklearn.log_model(selector, name="feature_selector", signature=selector_signature)

        return final_model, selector

In [34]:
# Load data
breast_cancer = load_breast_cancer(as_frame=True).frame
train_df, test_df = train_test_split(breast_cancer, test_size=0.2, random_state=42)

# Separate the target column for the training set
X_train = train_df.drop(["target"], axis=1)
y_train = train_df["target"]

# Separate the target column for the testing set
X_test = test_df.drop(["target"], axis=1)
y_test = test_df["target"]

In [35]:
final_model, selector = feature_selection_pipeline(X_train, y_train, X_test, y_test)



üèÉ View run Feature Selection Pipeline at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/ae7f8c797cf74568911a9148e3a67073
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


## Model Management
### Serialization & Formats
XGBoost supports various serialization formats, each optimized for different deployment scenarios:

In [36]:
# Train model
model = xgb.train(params, dtrain, num_boost_round=100)

# Create model signature for production deployment
X_sample = X_train[:100]

# For native XGBoost
predictions = model.predict(xgb.DMatrix(X_sample))
signature = infer_signature(X_sample, predictions)

# For sklearn XGBoost
# predictions = model.predict(X_sample)
# signature = infer_signature(X_sample, predictions)

with mlflow.start_run():
    # JSON format (recommended) - Human readable and version stable
    mlflow.xgboost.log_model(xgb_model=model,
                             signature=signature,
                             input_example=X_sample[:5],  # Sample input for documentation 
                             name="model_json", 
                             model_format="json")

    # UBJ format - More compact binary format
    mlflow.xgboost.log_model(xgb_model=model, 
                             signature=signature,
                             input_example=X_sample[:5],  # Sample input for documentation
                             name="model_ubj", 
                             model_format="ubj")

    # Legacy XGBoost format (deprecated but sometimes needed)
    mlflow.xgboost.log_model(xgb_model=model, 
                             signature=signature,
                             input_example=X_sample[:5],  # Sample input for documentation
                             name="model_xgb", 
                             model_format="xgb")

2026/01/02 20:22:17 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '9bc3b9269c824bcdb5cd31e0276ee3c3', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current xgboost workflow


üèÉ View run useful-steed-236 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/9bc3b9269c824bcdb5cd31e0276ee3c3
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


  xgb_model.save_model(model_data_path)


üèÉ View run loud-midge-184 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/edc2fe65f6f64609ba7aff85782e0448
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


- **JSON format** is recommended for production as it's human-readable and version-stable. 
- **UBJ format** provides more compact binary serialization. 
- The legacy **XGBoost format** is deprecated but sometimes needed for compatibility.

Model signatures are automatically inferred when autologging is enabled, but one can also create them manually for more control over the schema validation process.

## Advanced XGBoost Features
### Multi-Class Classification
XGBoost naturally handles multi-class classification with MLflow tracking:

In [37]:
# Multi-class classification
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size=0.2, random_state=42)

In [38]:
with mlflow.start_run(run_name="Multi-class XGBoost"):
    # XGBoost naturally handles multi-class
    model = xgb.XGBClassifier(objective="multi:softprob",
                              num_class=10,  # 10 digit classes
                              n_estimators=100,
                              max_depth=6,
                              random_state=42,)

    model.fit(X_train, y_train)

    # Multi-class predictions
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)

    # Multi-class metrics
    report = classification_report(y_test, y_pred, output_dict=True)

    # Log per-class metrics
    for class_label, metrics in report.items():
        if isinstance(metrics, dict):
            mlflow.log_metrics(
                                    {
                                        f"class_{class_label}_precision": metrics["precision"],
                                        f"class_{class_label}_recall": metrics["recall"],
                                        f"class_{class_label}_f1": metrics["f1-score"],
                                    }
                                )



üèÉ View run Multi-class XGBoost at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/cb5b77cede124de987cb0600a5fbdbfd
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


### Custom Callbacks
Implement custom callbacks for advanced monitoring and control:

In [39]:
class MLflowCallback(xgb.callback.TrainingCallback):
    def __init__(self):
        self.metrics_history = []

    def after_iteration(self, model, epoch, evals_log):
        # Log metrics in real-time
        metrics = {}
        for dataset, metric_dict in evals_log.items():
            for metric_name, values in metric_dict.items():
                key = f"{dataset}_{metric_name}"
                metrics[key] = values[-1]  # Latest value

        mlflow.log_metrics(metrics, step=epoch)
        self.metrics_history.append(metrics)

        # Custom logic for model checkpointing
        if epoch % 50 == 0:
            temp_model_path = f"checkpoint_epoch_{epoch}.json"
            model.save_model(temp_model_path)
            mlflow.log_artifact(temp_model_path)

        return False  # Continue training

In [40]:
# Usage
with mlflow.start_run():
    callback = MLflowCallback()
    model = xgb.train(params, dtrain, callbacks=[callback], num_boost_round=1000)



üèÉ View run nebulous-cub-701 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/db4c5149769145b38b004279872525df
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


### Custom Objectives & Metrics
XGBoost allows custom objective functions and evaluation metrics, which MLflow can track:

In [41]:
def custom_objective_function(y_pred, y_true):
    """Custom objective function for XGBoost."""
    # Example: Focal loss for imbalanced classification
    alpha = 0.25
    gamma = 2.0

    # Convert DMatrix to numpy array
    y_true = y_true.get_label()

    # Calculate focal loss gradients and hessians
    p = 1 / (1 + np.exp(-y_pred))  # sigmoid

    # Focal loss gradient
    grad = alpha * (1 - p) ** gamma * (gamma * p * np.log(p + 1e-8) + p - y_true)

    # Focal loss hessian
    hess = (alpha * (1 - p) ** gamma * (gamma * (gamma + 1) * p * np.log(p + 1e-8) + 2 * gamma * p + p))

    return grad, hess

In [42]:
def custom_eval_metric(y_pred, y_true):
    """Custom evaluation metric."""
    y_true = y_true.get_label()
    y_pred = 1 / (1 + np.exp(-y_pred))  # sigmoid

    # Custom F-beta score
    beta = 2.0
    precision = np.sum((y_pred > 0.5) & (y_true == 1)) / np.sum(y_pred > 0.5)
    recall = np.sum((y_pred > 0.5) & (y_true == 1)) / np.sum(y_true == 1)

    f_beta = (1 + beta**2) * precision * recall / (beta**2 * precision + recall)

    return "f_beta", f_beta

In [43]:
# Load data
breast_cancer = load_breast_cancer(as_frame=True).frame
train_df, test_df = train_test_split(breast_cancer, test_size=0.2, random_state=42)

# Separate the target column for the training set
X_train = train_df.drop(["target"], axis=1)
y_train = train_df["target"]

# Separate the target column for the testing set
X_test = test_df.drop(["target"], axis=1)
y_test = test_df["target"]

# Prepare data in XGBoost DMatrix format
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

In [44]:
# Train with custom objective and metric
with mlflow.start_run(run_name="XGBoost with custom objective & metric"):
    model = xgb.train(params=params,
                      dtrain=dtrain,
                      obj=custom_objective_function,
                      custom_metric=custom_eval_metric,
                      num_boost_round=100,
                      evals=[(dtrain, "train"), (dtest, "test")],
                      verbose_eval=10,)

[0]	train-logloss:0.66599	train-f_beta:0.89431	test-logloss:0.66678	test-f_beta:0.89196
[10]	train-logloss:0.58564	train-f_beta:0.89431	test-logloss:0.58706	test-f_beta:0.89196
[20]	train-logloss:0.55420	train-f_beta:0.89431	test-logloss:0.55685	test-f_beta:0.89196
[30]	train-logloss:0.53579	train-f_beta:0.89431	test-logloss:0.53911	test-f_beta:0.89196
[40]	train-logloss:0.52321	train-f_beta:0.89431	test-logloss:0.52781	test-f_beta:0.89196
[50]	train-logloss:0.51383	train-f_beta:0.89431	test-logloss:0.51928	test-f_beta:0.89196
[60]	train-logloss:0.50585	train-f_beta:0.89431	test-logloss:0.51204	test-f_beta:0.89196
[70]	train-logloss:0.49986	train-f_beta:0.89431	test-logloss:0.50691	test-f_beta:0.89196
[80]	train-logloss:0.49464	train-f_beta:0.89431	test-logloss:0.50212	test-f_beta:0.89196
[90]	train-logloss:0.48995	train-f_beta:0.89431	test-logloss:0.49797	test-f_beta:0.89196
[99]	train-logloss:0.48628	train-f_beta:0.89431	test-logloss:0.49475	test-f_beta:0.89196




üèÉ View run XGBoost with custom objective & metric at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/9778870e5d3c4ce3a51c5d1319344d87
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


### Performance Optimization
XGBoost offers several performance optimization options that MLflow can track:

In [45]:
# GPU-accelerated training
def gpu_accelerated_training(X_train, y_train, X_test, y_test):
    """GPU-accelerated XGBoost training."""

    with mlflow.start_run(run_name="GPU XGBoost"):
        # GPU-optimized parameters
        params = {"tree_method": "gpu_hist",  # Use GPU for training
                  "device": "gpu:0",  # GPU device ID
                  "predictor": "gpu_predictor",  # Use GPU for prediction
                  "objective": "binary:logistic",
                  "eval_metric": "logloss",
                  "max_depth": 8,
                  "learning_rate": 0.1,
                  }

        dtrain = xgb.DMatrix(X_train, label=y_train)
        dtest = xgb.DMatrix(X_test, label=y_test)

        model = xgb.train(params=params,
                          dtrain=dtrain,
                          num_boost_round=500,
                          evals=[(dtrain, "train"), (dtest, "test")],
                          early_stopping_rounds=50,)

        return model

In [46]:
# Memory-efficient training for large datasets
def memory_efficient_training(X_train, y_train, X_test, y_test):
    """Memory efficient training for large datasets."""

    with mlflow.start_run():
        # Enable histogram-based algorithm for faster training
        params = {"tree_method": "hist",  # Use histogram-based algorithm
                  "max_bin": 256,  # Number of bins for histogram
                  "objective": "reg:squarederror",
                  "eval_metric": "rmse",
                  }

        # For very large datasets, consider loading from file
        # dtrain = xgb.DMatrix('train.libsvm')
        # dtest = xgb.DMatrix('test.libsvm')
        dtrain = xgb.DMatrix(X_train, label=y_train)
        dtest = xgb.DMatrix(X_test, label=y_test)        

        model = xgb.train(params=params,
                          dtrain=dtrain,
                          num_boost_round=1000,
                          evals=[(dtest, "test")],
                          early_stopping_rounds=50,
                          verbose_eval=100,)

        return model

In [47]:
# Load data
diabetes_data = load_diabetes(as_frame=True).frame
train_df, test_df = train_test_split(diabetes_data, test_size=0.2, random_state=42)

# Separate the target column for the training set
X_train = train_df.drop(["target"], axis=1)
y_train = train_df["target"]

# Separate the target column for the testing set
X_test = test_df.drop(["target"], axis=1)
y_test = test_df["target"]

In [48]:
# # Execute GPU-accelerated training model
# gpu_model = gpu_accelerated_training(X_train, y_train, X_test, y_test)
# # Create and log model signature
# signature = infer_signature(X_train, gpu_model.predict(xgb.DMatrix(X_train)))
# # Log the GPU model
# model_info = mlflow.xgboost.log_model(gpu_model, name="gpu_acc_model", signature=signature)

In [49]:
# Execute memory efficient training model
mem_eff_model = memory_efficient_training(X_train, y_train, X_test, y_test)
# Create and log model signature
signature = infer_signature(X_train, mem_eff_model.predict(xgb.DMatrix(X_train)))
# Log the model
model_info = mlflow.xgboost.log_model(mem_eff_model, name="mem_acc_model", signature=signature)

[0]	test-rmse:63.73800
[53]	test-rmse:57.83504




üèÉ View run agreeable-dog-422 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/858880b5088d478fb39a668966bbb3ee
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


## Model Evaluation with MLflow
### MLflow Evaluate API
MLflow provides a comprehensive evaluation API that automatically generates metrics, visualizations, and diagnostic tools.

#### Automatic Generation Includes:
- **Performance Metrics** such as <ins>accuracy, precision, recall, F1-score, ROC-AUC</ins> for classification. 
- **Visualizations** including <ins>confusion matrix, ROC curve, precision-recall curve</ins>. 
- **Feature Importance** with <ins>SHAP</ins> values and feature contribution analysis. 
- **Model Artifacts** where all plots and diagnostic information are saved to MLflow.

In [50]:
# Load data
breast_cancer = load_breast_cancer(as_frame=True).frame
train_df, test_df = train_test_split(breast_cancer, test_size=0.2, random_state=42)

# Separate the target column for the training set
X_train = train_df.drop(["target"], axis=1)
y_train = train_df["target"]

# Separate the target column for the testing set
X_test = test_df.drop(["target"], axis=1)
y_test = test_df["target"]

In [51]:
# Prepare data and train model
model = xgb.XGBClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

2026/01/02 20:23:27 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'bf070e2a2fc648748e1b119e7f267601', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current xgboost workflow


üèÉ View run delicate-chimp-189 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/bf070e2a2fc648748e1b119e7f267601
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [52]:
# Create evaluation dataset
eval_data = X_test.copy()
eval_data["label"] = y_test

with mlflow.start_run():
    # Log model with signature
    signature = infer_signature(X_test, model.predict(X_test))
    model_info = mlflow.sklearn.log_model(model, name="model", signature=signature)

    # Comprehensive evaluation with MLflow
    result = mlflow.models.evaluate(model_info.model_uri,
                                    eval_data,
                                    targets="label",
                                    model_type="classifier",  # or "regressor" for regressionevaluators=["default"],
                                    )

    # Access automatic metrics
    print(f"Accuracy: {result.metrics['accuracy_score']:.3f}")
    print(f"F1 Score: {result.metrics['f1_score']:.3f}")
    print(f"ROC AUC: {result.metrics['roc_auc']:.3f}")

    # Access generated artifacts
    print("Generated artifacts:")
    for artifact_name, path in result.artifacts.items():
        print(f"  {artifact_name}: {path}")

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:23:44 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-d0cc467e4a464a169aa1c9b726e06849
2026/01/02 20:23:44 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:24:02 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:24:02 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:24:22 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


Accuracy: 0.956
F1 Score: 0.965
ROC AUC: 0.991
Generated artifacts:
  roc_curve_plot: ImageEvaluationArtifact(uri='dbfs:/databricks/mlflow-tracking/1996672001009152/e51f8ca8e894412f8d8e5b165fd20cea/artifacts/roc_curve_plot.png')
  precision_recall_curve_plot: ImageEvaluationArtifact(uri='dbfs:/databricks/mlflow-tracking/1996672001009152/e51f8ca8e894412f8d8e5b165fd20cea/artifacts/precision_recall_curve_plot.png')
  lift_curve_plot: ImageEvaluationArtifact(uri='dbfs:/databricks/mlflow-tracking/1996672001009152/e51f8ca8e894412f8d8e5b165fd20cea/artifacts/lift_curve_plot.png')
  calibration_curve_plot: ImageEvaluationArtifact(uri='dbfs:/databricks/mlflow-tracking/1996672001009152/e51f8ca8e894412f8d8e5b165fd20cea/artifacts/calibration_curve_plot.png')
  confusion_matrix: ImageEvaluationArtifact(uri='dbfs:/databricks/mlflow-tracking/1996672001009152/e51f8ca8e894412f8d8e5b165fd20cea/artifacts/confusion_matrix.png')
  shap_beeswarm_plot: ImageEvaluationArtifact(uri='dbfs:/databricks/mlflow-trac

### Regression Evaluation
For XGBoost regression models, MLflow automatically provides regression-specific metrics, including:

- **Mean Absolute Error (MAE), Mean Squared Error (MSE), and Root MSE** provide error magnitude assessment. 
- **R¬≤ Score and Adjusted R¬≤** measure model fit quality. 
- **Mean Absolute Percentage Error (MAPE)** shows relative error rates. 
- **Residual plots and distribution analysis** help identify model assumptions violations.

In [53]:
# Load regression dataset
diabetes_data = load_diabetes(as_frame=True).frame
train_df, test_df = train_test_split(diabetes_data, test_size=0.2, random_state=42)

# Separate the target column for the training set
X_train = train_df.drop(["target"], axis=1)
y_train = train_df["target"]

# Separate the target column for the testing set
X_test = test_df.drop(["target"], axis=1)
y_test = test_df["target"]

In [54]:
# Train XGBoost regressor
reg_model = xgb.XGBRegressor(n_estimators=100, max_depth=6, random_state=42)
reg_model.fit(X_train, y_train)

2026/01/02 20:24:27 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '471cd80dddeb40628fe7b898247e59d2', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current xgboost workflow


üèÉ View run righteous-quail-467 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/471cd80dddeb40628fe7b898247e59d2
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [55]:
# Create evaluation dataset
eval_data = X_test.copy()
eval_data["target"] = y_test

In [56]:
with mlflow.start_run():
    # Log and evaluate regression model
    signature = infer_signature(X_train, reg_model.predict(X_train))
    model_info = mlflow.sklearn.log_model(reg_model, name="model", signature=signature)

    result = mlflow.models.evaluate(model_info.model_uri,
                                    eval_data,
                                    targets="target",
                                    model_type="regressor",
                                    evaluators=["default"])

    print(f"MAE: {result.metrics['mean_absolute_error']:.3f}")
    print(f"RMSE: {result.metrics['root_mean_squared_error']:.3f}")
    print(f"R¬≤ Score: {result.metrics['r2_score']:.3f}")

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:24:45 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-7a7a695cde6b4da1b48068a7e102d2a7
2026/01/02 20:24:45 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:24:46 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:24:46 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer ExactExplainer is used.


MAE: 46.389
RMSE: 57.888
R¬≤ Score: 0.368
üèÉ View run capricious-bird-360 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/a655c290cff14b14b38fc3fea58aa675
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


### Custom Metrics & Artifacts
We can further extend MLflow evaluation with custom metrics and visualizations:

In [57]:
def profit_metric(predictions, targets, sample_weights=None):
    """Custom business metric: profit from correct predictions."""
    # Assume profit of $100 per correct prediction, $50 loss per error
    correct_predictions = (predictions == targets).sum()
    incorrect_predictions = len(predictions) - correct_predictions

    profit = (correct_predictions * 100) - (incorrect_predictions * 50)
    return profit

In [58]:
def create_feature_importance_comparison(eval_df, builtin_metrics, artifacts_dir):
    """Compare XGBoost native importance with SHAP values."""

    # This would use model feature importance from eval_df
    # Create comparison visualization
    plt.figure(figsize=(12, 8))

    # Placeholder for actual feature importance comparison
    features = [f"feature_{i}" for i in range(10)]
    xgb_importance = np.random.random(10)
    shap_importance = np.random.random(10)

    x = np.arange(len(features))
    width = 0.35

    plt.bar(x - width / 2, xgb_importance, width, label="XGBoost Native", alpha=0.8)
    plt.bar(x + width / 2, shap_importance, width, label="SHAP Values", alpha=0.8)

    plt.xlabel("Features")
    plt.ylabel("Importance")
    plt.title("Feature Importance Comparison")
    plt.xticks(x, features, rotation=45)
    plt.legend()
    plt.tight_layout()

    plot_path = os.path.join(artifacts_dir, "importance_comparison.png")
    plt.savefig(plot_path)
    plt.close()

    return {"importance_comparison": plot_path}

In [59]:
# Load regression dataset
housing = fetch_openml(name="california_housing", version=1, as_frame=True)
#housing = fetch_california_housing(as_frame=True)
X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.2, random_state=42)

X_train.drop(columns=["ocean_proximity"], inplace=True)
X_test.drop(columns=["ocean_proximity"], inplace=True)
X_test.dropna(inplace=True)

In [60]:
# Train XGBoost regressor
housing_model = xgb.XGBRegressor(n_estimators=100, max_depth=6, random_state=42)
housing_model.fit(X_train, y_train)

2026/01/02 20:24:54 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'ab0cbfac5bd04f0ea7ccab96488035d3', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current xgboost workflow


üèÉ View run rogue-goose-557 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/ab0cbfac5bd04f0ea7ccab96488035d3
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [61]:
# Create evaluation dataset
eval_data = X_test.copy()
eval_data["target"] = y_test

In [62]:
with mlflow.start_run():
    # Log and evaluate regression model
    signature = infer_signature(X_train, housing_model.predict(X_train))
    model_info = mlflow.sklearn.log_model(housing_model, name="housing_model", signature=signature)

    # Create custom metric
    custom_profit = make_metric(eval_fn=profit_metric, greater_is_better=True, name="profit_score")

    # Use custom metrics and artifacts
    result = mlflow.models.evaluate(model_info.model_uri,
                                    eval_data,
                                    targets="target",
                                    model_type="regressor",
                                    extra_metrics=[custom_profit],
                                    custom_artifacts=[create_feature_importance_comparison],)

    print(f"Custom Profit Score: ${result.metrics['profit_score']:.2f}")



Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:25:13 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-9697b249c5814de6952160cab0b68513
2026/01/02 20:25:13 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:25:14 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:25:15 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer ExactExplainer is used.
ExactExplainer explainer: 2001it [00:18, 50.68it/s]                           


Custom Profit Score: $-196050.00
üèÉ View run vaunted-goose-683 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/9dc4f70a81f34225a0c521f17011b5f0
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


### Manual Evaluation
For cases where we need more control or custom evaluation logic, we can still implement manual evaluation:

In [63]:
# Load data
breast_cancer = load_breast_cancer(as_frame=True).frame
train_df, test_df = train_test_split(breast_cancer, test_size=0.2, random_state=42)

# Separate the target column for the training set
X_train = train_df.drop(["target"], axis=1)
y_train = train_df["target"]

# Separate the target column for the testing set
X_test = test_df.drop(["target"], axis=1)
y_test = test_df["target"]

In [64]:
def comprehensive_xgboost_evaluation(model, X_test, y_test, X_train=None, y_train=None):
    """Comprehensive XGBoost model evaluation with MLflow logging."""

    with mlflow.start_run(run_name="Comprehensive Model Evaluation"):
        
        # Training metrics if provided
        if X_train is not None and y_train is not None:
            model.fit(X_train, y_train)  # Ensure model is fitted
            if hasattr(model, "predict_proba"):
                y_train_pred = model.predict_proba(X_train)[:, 1]
            else:
                dtrain = (xgb.DMatrix(X_train) if not isinstance(X_train, xgb.DMatrix) else X_train)
                y_train_pred = model.predict(dtrain)

            train_metrics = {"train_accuracy": accuracy_score(y_train, (y_train_pred > 0.5).astype(int)),
                            "train_roc_auc": roc_auc_score(y_train, y_train_pred),}
            mlflow.log_metrics(train_metrics)
        
        # Predictions
        if hasattr(model, "predict_proba"):
            y_pred_proba = model.predict_proba(X_test)[:, 1]
            y_pred = (y_pred_proba > 0.5).astype(int)
        else:
            # Native XGBoost model
            if isinstance(X_test, xgb.DMatrix):
                dtest = X_test
            else:
                dtest = xgb.DMatrix(X_test)
            y_pred_proba = model.predict(dtest)
            y_pred = (y_pred_proba > 0.5).astype(int)

        # Basic metrics
        metrics = {"accuracy": accuracy_score(y_test, y_pred),
                   "precision": precision_score(y_test, y_pred, average="weighted"),
                   "recall": recall_score(y_test, y_pred, average="weighted"),
                   "f1_score": f1_score(y_test, y_pred, average="weighted"),
                   "roc_auc": roc_auc_score(y_test, y_pred_proba),}

        mlflow.log_metrics(metrics)

        # ROC Curve
        fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
        plt.figure(figsize=(8, 6))
        plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {metrics["roc_auc"]:.3f})')
        plt.plot([0, 1], [0, 1], "k--", label="Random Classifier")
        plt.xlabel("False Positive Rate")
        plt.ylabel("True Positive Rate")
        plt.title("ROC Curve")
        plt.legend()
        plt.grid(True)
        plt.savefig("roc_curve.png", dpi=300, bbox_inches="tight")
        mlflow.log_artifact("roc_curve.png")
        plt.close()

        # Precision-Recall Curve
        precision, recall, _ = precision_recall_curve(y_test, y_pred_proba)
        avg_precision = average_precision_score(y_test, y_pred_proba)

        plt.figure(figsize=(8, 6))
        plt.plot(recall, precision, label=f"PR Curve (AP = {avg_precision:.3f})")
        plt.xlabel("Recall")
        plt.ylabel("Precision")
        plt.title("Precision-Recall Curve")
        plt.legend()
        plt.grid(True)
        plt.savefig("precision_recall_curve.png", dpi=300, bbox_inches="tight")
        mlflow.log_artifact("precision_recall_curve.png")
        plt.close()

        # Confusion Matrix
        cm = confusion_matrix(y_test, y_pred)
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
        plt.title("Confusion Matrix")
        plt.ylabel("True Label")
        plt.xlabel("Predicted Label")
        plt.savefig("confusion_matrix.png", dpi=300, bbox_inches="tight")
        mlflow.log_artifact("confusion_matrix.png")
        plt.close()

        mlflow.log_metric("average_precision", avg_precision)

In [65]:
comprehensive_xgboost_evaluation(xgb.XGBClassifier(max_depth=6, n_estimators=200, random_state=42), X_test, y_test, X_train, y_train)



üèÉ View run Comprehensive Model Evaluation at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/8121d5436db24058b7e88f1f50eb9198
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


## Model Comparison and Selection
### MLflow Model Comparison
Use MLflow evaluate to systematically compare multiple XGBoost configurations:

In [66]:
# Define XGBoost variants to compare
xgb_models = {"xgb_shallow": xgb.XGBClassifier(max_depth=3, n_estimators=100, random_state=42),
              "xgb_deep": xgb.XGBClassifier(max_depth=8, n_estimators=100, random_state=42),
              "xgb_boosted": xgb.XGBClassifier(max_depth=6, n_estimators=200, random_state=42),}

# Compare with other algorithms
all_models = {**xgb_models,
              "random_forest": RandomForestClassifier(n_estimators=100, random_state=42),}

In [67]:
# Evaluate each model systematically
comparison_results = {}

for model_name, model in all_models.items():
    with mlflow.start_run(run_name=f"eval_{model_name}"):
        # Train model
        model.fit(X_train, y_train)

        # Create evaluation dataset
        eval_data = X_test.copy()
        eval_data["target"] = y_test

        # Log model
        signature = infer_signature(X_train, model.predict(X_train))
        model_info = mlflow.sklearn.log_model(model, name=f"{model_name}", signature=signature)

        # Comprehensive evaluation with MLflow
        result = mlflow.models.evaluate(model_info.model_uri,
                                        eval_data,
                                        targets="target",
                                        model_type="classifier",
                                        evaluators=["default"],)

        comparison_results[model_name] = result.metrics

        # Log key metrics for comparison
        mlflow.log_metrics(
                                {
                                    "accuracy": result.metrics["accuracy_score"],
                                    "f1": result.metrics["f1_score"],
                                    "roc_auc": result.metrics["roc_auc"],
                                    "precision": result.metrics["precision_score"],
                                    "recall": result.metrics["recall_score"],
                                }
                            )



Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:26:06 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-f1bb036fcd764903b9d4eccb330c8b63
2026/01/02 20:26:06 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:26:06 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:26:06 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:26:11 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run eval_xgb_shallow at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/b6e3c94f8d8e4f4ba252be5fae6528f0
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:26:31 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-03c02dc9b3414e63b29ddc7fda1e439b
2026/01/02 20:26:31 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:26:31 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:26:31 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:26:36 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run eval_xgb_deep at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/9e51ce99811b437a8bc7db6aaea980f2
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:26:56 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-c9aa29b5bd0141e485e18abc4b2ada99
2026/01/02 20:26:56 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:26:57 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:26:57 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:27:01 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run eval_xgb_boosted at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/aa37d85d322b49bc9723f9b2706584f4
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:27:22 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-a1fd891106ad422f9b4a7d68969a659a
2026/01/02 20:27:22 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:27:22 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:27:22 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:27:27 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run eval_random_forest at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/442ee4be16be4e4c9d69ba391cae5d0c
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


In [68]:
comparison_df = pd.DataFrame(comparison_results).T
print("Model Comparison Summary:")
print(comparison_df[["accuracy_score", "f1_score", "roc_auc"]].round(3))

# Identify best model
best_model = comparison_df["f1_score"].idxmax()
print(f"\nBest model by F1 score: {best_model}")

Model Comparison Summary:
               accuracy_score  f1_score  roc_auc
xgb_shallow             0.956     0.965    0.994
xgb_deep                0.956     0.965    0.991
xgb_boosted             0.956     0.965    0.992
random_forest           0.965     0.972    0.995

Best model by F1 score: random_forest


### Hyperparameter Evaluation
Combine hyperparameter tuning with MLflow evaluation:

In [69]:
# Define parameter grid for XGBoost
param_grid = {"max_depth": [3, 6, 9],
              "learning_rate": [0.01, 0.1, 0.2],
              "n_estimators": [100, 200],
              "subsample": [0.8, 1.0],}

# Evaluate each parameter combination
grid_results = []

In [70]:
for params in ParameterGrid(param_grid):
    with mlflow.start_run(run_name=f"xgb_grid_search"):
        # Log parameters
        mlflow.log_params(params)

        # Train model with current parameters
        model = xgb.XGBClassifier(**params, random_state=42)
        model.fit(X_train, y_train)

        # Log and evaluate
        signature = infer_signature(X_train, model.predict(X_train))
        model_info = mlflow.sklearn.log_model(model, name="xgb_grid_search_model", signature=signature)

        # MLflow evaluation
        result = mlflow.models.evaluate(model_info.model_uri,
                                        eval_data,
                                        targets="target",
                                        model_type="classifier",
                                        evaluators=["default"],)

        # Track results
        grid_results.append(
                                {
                                    **params,
                                    "f1_score": result.metrics["f1_score"],
                                    "roc_auc": result.metrics["roc_auc"],
                                    "accuracy": result.metrics["accuracy_score"],
                                }
                            )

        # Log selection metric
        mlflow.log_metric("grid_search_score", result.metrics["f1_score"])

# Find best parameters
best_result = max(grid_results, key=lambda x: x["f1_score"])
print(f"Best parameters: {best_result}")



Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:27:47 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-0ac71fb4345f422da8f83ecb9c4f2a55
2026/01/02 20:27:47 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:27:48 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:27:48 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:27:52 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/2c04f32965e84a6aacfa12959872f5c4
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:28:12 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-ed0118405d0b4aeea1e5e6bc55825f30
2026/01/02 20:28:12 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:28:13 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:28:13 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:28:17 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/a6c705479e51438e9d7a8598a4fd2f47
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:28:39 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-b35a61e90fb64609861d82fb0c7d291f
2026/01/02 20:28:39 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:28:39 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:28:39 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:28:43 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/28bac032a838415b92a6039a2ffe7379
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:29:06 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-465f492bfbff42c38b29c490752da90b
2026/01/02 20:29:06 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:29:06 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:29:06 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:29:10 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/5fd11e796498431a9ac848c422ee42ce
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:29:32 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-48c6cc3bd8224093b3c5e0aae42fbc2e
2026/01/02 20:29:32 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:29:32 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:29:32 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:29:36 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/068f6b34fe8d435a9dfbba9bfae5afe8
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:29:57 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-2b5b6f47be7a47dab503ea264ebf8124
2026/01/02 20:29:57 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:29:58 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:29:58 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:30:02 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/bf0fdaca83ed4593a408efddeb01d8ac
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:30:23 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-3079b162ed074c159982a094ddf0fd14
2026/01/02 20:30:23 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:30:24 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:30:24 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:30:28 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/7c08165b2ed84a8185e9e06c4a539725
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:30:50 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-a4607bd179c948f6958647f5b6d2e976
2026/01/02 20:30:50 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:30:51 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:30:51 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:30:55 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/166a9e984c4a4841b9796b1c93127fdd
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:31:16 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-c55537aee5b9470ba0afb393bb8d9fa8
2026/01/02 20:31:16 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:31:17 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:31:17 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:31:21 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/21318d14471d4572b21a7b1a44e5b3ac
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:31:42 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-d0b0abc3e38147c0994d14b8824eace2
2026/01/02 20:31:42 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:31:43 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:31:43 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:31:47 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/cb6cb308c82d4325957170480821d588
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:32:09 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-4a727aa8af5946a6b90e896b7d3af8a4
2026/01/02 20:32:09 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:32:10 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:32:10 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:32:14 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/5bdff3ea82ab42c590410ce2575b6551
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:32:36 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-0e3806a30c554de1871dc760e9ff9f12
2026/01/02 20:32:36 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:32:37 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:32:37 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:32:41 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/40a793b789ad4d7fa606db120e42b4b3
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:33:02 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-d4f83146d2684d3ab30e0121d6e7ed5c
2026/01/02 20:33:02 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:33:03 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:33:03 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:33:07 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/3268efc62faf4c09b5908c77be8b6354
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:33:27 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-60cc29ae17a7426f90be7ca5dc27c798
2026/01/02 20:33:27 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:33:28 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:33:28 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:33:32 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/f895e0daea1248f5a7c34b70155eaba6
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:33:53 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-64111801ddb54b1984f1d178cf880a63
2026/01/02 20:33:53 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:33:54 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:33:54 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:33:57 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/a6252a7e714045179bbb3ae2f29a700e
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:34:19 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-4c4224f95d644cd395475bc733f4d34f
2026/01/02 20:34:19 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:34:19 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:34:19 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:34:23 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/ec63257bb02b4fd187b4b8d0d5d62a7b
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:34:44 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-84bd9265861d4e7a95ab49f0f9eb2bd4
2026/01/02 20:34:44 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:34:45 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:34:45 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:34:49 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/bbedc61b71fa4efa9ab8ad8fa6b142f4
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:35:10 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-14477ca7b08d4b27a03b99ac67b3f64a
2026/01/02 20:35:10 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:35:11 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:35:11 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:35:15 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/2d0832db5a43472badec5de16d66d0e2
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:35:37 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-8dd5c9d8f6664c26857f0ce3b3ba44e8
2026/01/02 20:35:37 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:35:37 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:35:37 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:35:41 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/0d6064a6376e471b80753ad44ba29960
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:36:03 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-84f2770a17f5494286ee38e3c4663ae1
2026/01/02 20:36:03 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:36:03 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:36:03 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:36:07 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/6c7cef5a6b4648919311b994292dc2d2
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:36:29 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-7ef6a0e27c614e1593aefdadbb64a961
2026/01/02 20:36:29 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:36:30 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:36:30 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:36:34 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/99bf0c69fd5f4e409a6d1db33f4bc753
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:36:55 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-b8ae1275c903459c9b50be67d55b39b8
2026/01/02 20:36:55 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:36:56 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:36:56 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:37:00 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/74cdd90b3b7a44449bc3c40fdc84f9b2
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:37:21 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-ddc2e08d6d3841bba9cd367961a17934
2026/01/02 20:37:21 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:37:22 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:37:22 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:37:26 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/bfdeaa7b65704ad48d5a0497e3e05ccb
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:37:48 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-e3ca1dc250b349f4b058dfd6aa1ab342
2026/01/02 20:37:48 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:37:48 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:37:48 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:37:52 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/e0f221bfc7cf452285fda0fc81a6ba88
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:38:14 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-1bd5efc309de4a8093d54692c365d037
2026/01/02 20:38:14 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:38:14 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:38:14 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:38:19 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/36041c59dfcf4ef0aa8824ec7c858b72
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:38:40 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-d4513f5dfc2946888056260b3cccfd93
2026/01/02 20:38:40 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:38:41 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:38:41 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:38:45 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/e95524a35005475b899906eb6fd895c5
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:39:09 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-66457948921d410fa7e7c2ba2ffd42e0
2026/01/02 20:39:09 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:39:10 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:39:10 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:39:15 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/d7bed4df17c14fcc837b4c9496b84b5a
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:39:39 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-92e61a5389724356855617c21d39035f
2026/01/02 20:39:39 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:39:39 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:39:39 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:39:44 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/f6d047062ca34aeaa55b94860571bc90
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:40:07 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-5d3c4641cc1644699aa77515a10f94ce
2026/01/02 20:40:07 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:40:08 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:40:08 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:40:12 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/bf286fbd99cd42c09cfb876e51c2b724
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:40:34 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-8517b1e9975445438e99578cc0dd8953
2026/01/02 20:40:34 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:40:34 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:40:34 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:40:39 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/5ac3b9261a1a45b7a576011172a5c0e7
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:41:02 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-393c199c82124e59b6480c501be8cb6c
2026/01/02 20:41:02 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:41:02 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:41:02 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:41:07 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/1a0d6cf7bd4b4d4e988b2dbad918d90a
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:41:32 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-db0363708b3f4cd3ac8c5c51229ca484
2026/01/02 20:41:32 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:41:32 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:41:32 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:41:37 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/62fa55688c7549d38f2741b0894fe138
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:42:02 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-6725c65307074ecca3a420306c4a8064
2026/01/02 20:42:02 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:42:03 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:42:03 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:42:08 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/d2dcb1e91f9a4c989a9d6036ecf23b95
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:42:31 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-a5d2587e7cf840a7876113b70875caee
2026/01/02 20:42:31 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:42:32 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:42:32 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:42:36 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/c8e5e85d29994f05800e0cc457064384
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:43:00 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-1952c4c145d14e4886f40f05e24177b9
2026/01/02 20:43:00 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:43:01 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:43:01 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:43:05 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/84aaabb5013b41acaadd3f9f7509d6b2
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:43:28 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-d8d819a05f9f4c5796781076a08234c1
2026/01/02 20:43:28 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:43:28 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:43:28 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:43:32 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run xgb_grid_search at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/796f5117173f4be7bc3ee0a54772c76d
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152
Best parameters: {'learning_rate': 0.1, 'max_depth': 6, 'n_estimators': 200, 'subsample': 0.8, 'f1_score': 0.9790209790209791, 'roc_auc': 0.989190959711759, 'accuracy': 0.9736842105263158}


## Model Validation and Quality Gates
Use MLflow's validation API to ensure model quality:

In [71]:
# First, define quality thresholds for XGBoost models
quality_thresholds = {"accuracy_score": MetricThreshold(threshold=0.85, greater_is_better=True),
                      "f1_score": MetricThreshold(threshold=0.80, greater_is_better=True),
                      "roc_auc": MetricThreshold(threshold=0.75, greater_is_better=True),}

In [72]:
# Second, define your baseline model
baseline_model = xgb.XGBClassifier(random_state=42)
baseline_model.fit(X_train, y_train)
# Log and evaluate classifier model
signature = infer_signature(X_train, baseline_model.predict(X_train))
model_info = mlflow.sklearn.log_model(baseline_model, 
                                      name="baseline_xgbclassifier_model", 
                                      signature=signature)
baseline_result = mlflow.models.evaluate(model_info.model_uri, 
                                         eval_data, 
                                         targets="target", 
                                         model_type="classifier")

2026/01/02 20:43:39 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '2305ce0b581c49c3b9b9581697761cab', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current xgboost workflow


üèÉ View run agreeable-finch-583 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/2305ce0b581c49c3b9b9581697761cab
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:43:57 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-c840c9417f254575bd3e269eb3d9d1f7
2026/01/02 20:43:57 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:43:58 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:43:58 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:44:03 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run righteous-cub-861 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/664225ed59c54b18afdac0544c55434a
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


In [73]:
# Third, validate model meets quality standards
try:
    mlflow.validate_evaluation_results(candidate_result=result,validation_thresholds=quality_thresholds,)
    print("‚úÖ XGBoost model meets all quality thresholds")
except mlflow.exceptions.ModelValidationFailedException as e:
    print(f"‚ùå Model failed validation: {e}")

2026/01/02 20:44:08 INFO mlflow.models.evaluation.validation: Validating candidate model metrics against baseline
2026/01/02 20:44:08 INFO mlflow.models.evaluation.validation: Model validation passed!


‚úÖ XGBoost model meets all quality thresholds


In [74]:
# Fourth, define the candidate XGBoost model with best parameters
candidate_model = xgb.XGBClassifier(learning_rate = 0.1,
                                    max_depth = 6,
                                    n_estimators = 200,
                                    subsample = 0.8,
                                    random_state=42,)
candidate_model.fit(X_train, y_train)
# Log and evaluate classifier model
signature = infer_signature(X_train, candidate_model.predict(X_train))
model_info = mlflow.sklearn.log_model(candidate_model, 
                                      name="upgraded_xgbclassifier_model", 
                                      signature=signature)
candidate_result = mlflow.models.evaluate(model_info.model_uri, 
                                          eval_data, 
                                          targets="target", 
                                          model_type="classifier")

2026/01/02 20:44:09 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'e865f28e24dc4213883e99c3bd3bc88a', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current xgboost workflow


üèÉ View run enthused-rat-691 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/e865f28e24dc4213883e99c3bd3bc88a
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2026/01/02 20:44:28 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-6ecc021f875643908728b63b11a0bf07
2026/01/02 20:44:28 INFO mlflow.tracking.fluent: Use `mlflow.set_active_model` to set the active model to a different one if needed.
2026/01/02 20:44:28 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2026/01/02 20:44:28 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2026/01/02 20:44:34 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer TreeExplainer is used.


üèÉ View run righteous-skunk-983 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/278384e9639144d492858204cc236cc2
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


In [75]:
# Fifth, validate improvement over baseline
improvement_thresholds = {"f1_score": MetricThreshold(threshold=0.02, 
                                                      greater_is_better=True  # Must be 2% better
                                                      ),}

try:
    mlflow.validate_evaluation_results(candidate_result=result,
                                       baseline_result=baseline_result,
                                       validation_thresholds=improvement_thresholds,)
    print("‚úÖ New XGBoost model improves over baseline")
except mlflow.exceptions.ModelValidationFailedException as e:
    print(f"‚ùå Model doesn't improve sufficiently: {e}")

2026/01/02 20:44:39 INFO mlflow.models.evaluation.validation: Validating candidate model metrics against baseline
2026/01/02 20:44:39 INFO mlflow.models.evaluation.validation: Model validation passed!


‚úÖ New XGBoost model improves over baseline


### Loading & Usage
MLflow provides flexible ways to load and use your saved XGBoost models:

In [76]:
# Load data
diabetes_data = load_diabetes(as_frame=True).frame
train_df, test_df = train_test_split(diabetes_data, test_size=0.2, random_state=42)

# Separate the target column for the training set
X_train = train_df.drop(["target"], axis=1)
y_train = train_df["target"]

# Separate the target column for the testing set
X_test = test_df.drop(["target"], axis=1)
y_test = test_df["target"]

In [77]:
# Load model in different ways
run_id = "e235500c87b84992806e077487b17097"

# Load as native XGBoost model (preserves all XGBoost functionality)
xgb_model = mlflow.xgboost.load_model(f"runs:/{run_id}/model")
predictions = xgb_model.predict(xgb.DMatrix(X_test))

# Load as PyFunc model (generic Python function interface)
pyfunc_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")
predictions = pyfunc_model.predict(pd.DataFrame(X_test))

# Load from model registry (production deployment)
# registered_model = mlflow.pyfunc.load_model("models:/XGBoostModel@champion")

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

The PyFunc format is particularly useful for deployment scenarios where you need a consistent interface across different model types and frameworks.

## Production Deployment
### Model Registry
The Model Registry provides centralized model management with version control and alias-based deployment. This is essential for managing XGBoost models from development through production deployment:

In [91]:
# Load data
breast_cancer = load_breast_cancer(as_frame=True).frame
train_df, test_df = train_test_split(breast_cancer, test_size=0.2, random_state=42)

# Separate the target column for the training set
X_train = train_df.drop(["target"], axis=1)
y_train = train_df["target"]

# Separate the target column for the testing set
X_test = test_df.drop(["target"], axis=1)
y_test = test_df["target"]

In [78]:
mlflow.set_registry_uri("databricks-uc")

In [82]:
client = MlflowClient()

# Register model to MLflow Model Registry
with mlflow.start_run(): 
    mlflow.xgboost.log_model(xgb_model=candidate_model,
                             name="model",
                             registered_model_name="workspace.default.XGBoostBreastCancerModel",
                             signature=signature,
                             input_example=X_train[:5],
                             model_format="json",)

Successfully registered model 'workspace.default.xgboostbreastcancermodel'.


Uploading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Created version '1' of model 'workspace.default.xgboostbreastcancermodel'.


üèÉ View run polite-ant-218 at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152/runs/6c918940d95544198ec028e895e26333
üß™ View experiment at: https://dbc-e4fb7400-b637.cloud.databricks.com/ml/experiments/1996672001009152


In [None]:
# Or register an existing model
# mlflow.set_registry_uri("databricks-uc")

# run_id = "e235500c87b84992806e077487b17097"
# model_uri = f"runs:/{run_id}/model"

# # Register the model
# registered_model = mlflow.register_model(model_uri=model_uri, 
#                                          name="workspace.default.XGBoostDiabetesModel")

In [86]:
# Use aliases instead of deprecated stages for deployment management
# Set aliases for different deployment environments
client.set_registered_model_alias(name="workspace.default.XGBoostBreastCancerModel",
                                  alias="champion",  # Production model
                                  version=1,)
                                  #version=model_version.version,)

# client.set_registered_model_alias(name="XGBoostChurnModel",
#                                   alias="challenger",  # A/B testing model
#                                   version=model_version.version,)

In [None]:
# Use tags to track model status and metadata
# client.set_model_version_tag(name="XGBoostChurnModel",
#                              version=model_version.version,
#                              key="validation_status",
#                              value="approved",)

# client.set_model_version_tag(name="XGBoostChurnModel",
#                              version=model_version.version,
#                              key="model_type",
#                              value="xgboost_classifier",)

# client.set_model_version_tag(name="XGBoostChurnModel",
#                              version=model_version.version,
#                              key="feature_importance_type",
#                              value="gain",)

In [None]:
# # Promote model from staging to production environment
# client.copy_model_version(src_model_uri="models:/staging.XGBoostChurnModel@candidate",
#                           dst_name="prod.XGBoostChurnModel",)

#### Modern Model Registry Features:

- **Model Aliases** replace deprecated stages with flexible, named references. You can assign multiple aliases to any model version (e.g., *champion, challenger, shadow*), update aliases independently of model training for seamless deployments, and use them for A/B testing and gradual rollouts.
- **Model Tags** provide rich metadata and status tracking. Track validation status with validation_status: approved, mark model characteristics with *model_type: xgboost_classifier*, and add performance metrics like *best_auc_score: 0.95*.
- **Environment-based Models** support mature MLOps workflows. Create separate registered models per environment: *dev.XGBoostChurnModel, staging.XGBoostChurnModel, prod.XGBoostChurnModel*, and use <ins>copy_model_version()</ins> to promote models across environments.

### Model Serving
MLflow provides built-in model serving capabilities that make it easy to deploy your XGBoost models as REST APIs:

In [None]:
# Serve model using alias for production deployment
# To serve the model, run the following commands in a terminal (not in Jupyter):

#export MLFLOW_TRACKING_URI="databricks"
#export MLFLOW_REGISTRY_URI="databricks-uc"
#export DATABRICKS_HOST="https://<your-databricks-workspace-url>"
#export DATABRICKS_TOKEN="<your-PAT-token>"
#mlflow models serve -m "models:/workspace.default.xgboostbreastcancermodel@champion" -p 5000 --no-conda;

# # Or serve a specific version
# mlflow models serve -m "models:/XGBoostChurnModel/3" -p 5000 --no-conda

# # Or using environment variables
# os.environ["MLFLOW_TRACKING_URI"] = "databricks"
# os.environ["MLFLOW_REGISTRY_URI"] = "databricks-uc"
# os.environ["DATABRICKS_HOST"] = "https://<your-workspace>.cloud.databricks.com"
# os.environ["DATABRICKS_TOKEN"] = "<your-PAT-token>"
# !mlflow models serve -m "models:/workspace.default.xgboostbreastcancermodel@champion" -p 5000 --no-conda

# In stead of using environment variables, you can also set the mlflow uri via mlflow commands ( as we have done in previous cells):
# mlflow.set_tracking_uri("databricks")
# mlflow.set_registry_uri("databricks-uc")
# We will still need to set the DATABRICKS_HOST and DATABRICKS_TOKEN environment variables for authentication when serving the model.

#### Deployment Best Practices:
- **Use aliases for production serving** by pointing to **@champion** or **@production aliases** instead of hard-coding version numbers. 
- Implement **blue-green deployments** by updating aliases to switch traffic between model versions instantly. 
- Ensure **model signatures** provide automatic input validation at serving time. 
- Use **JSON format** for better compatibility and debugging

Once your model is served, you can make predictions by sending POST requests:

In [94]:
import requests
import json

# Example prediction request
# For MLflow sklearn models, use dataframe_split format
# Select a single row and convert to the expected JSON format
single_sample = X_train.iloc[[1]]  # Use iloc for positional indexing
data = {"dataframe_split": single_sample.to_dict(orient="split")}

response = requests.post("http://127.0.0.1:5000/invocations",
                         headers={"Content-Type": "application/json"},
                         json=data)  # Use json parameter for automatic serialization

# Check if the request was successful
if response.status_code == 200:
    try:
        predictions = response.json()
        print("Predictions:", predictions)
    except json.JSONDecodeError as e:
        print(f"Failed to decode JSON: {e}")
        print("Response text:", response.text)
else:
    print(f"Request failed with status code {response.status_code}")
    print("Response text:", response.text)

Predictions: {'predictions': [0]}


For larger production deployments, you can also deploy MLflow models to cloud platforms like AWS SageMaker, Azure ML, or deploy them as Docker containers for Kubernetes orchestration.

# Conclusion
MLflow's XGBoost integration provides a comprehensive solution for gradient boosting experiment management and deployment. Whether we're using the native XGBoost API for maximum performance or the scikit-learn interface for pipeline integration, MLflow captures all the essential information needed for reproducible machine learning.

## Key benefits of using MLflow with XGBoost:

- **Comprehensive Autologging** provides one-line setup that captures parameters, metrics, and feature importance. 
- **Dual API Support** offers seamless integration with both native and scikit-learn XGBoost interfaces. 
- **Advanced Feature Analysis** includes multiple importance types with automatic visualization. 
- **Production-Ready Deployment** provides model registry integration with multiple serialization formats. 
- **Performance Optimization** supports GPU acceleration and memory-efficient training. 
- **Competition-Grade Tracking** offers detailed experiment management for winning ML solutions.

The patterns and examples in this guide provide a solid foundation for building scalable, reproducible gradient boosting systems with XGBoost and MLflow. Start with autologging for immediate benefits, then gradually adopt more advanced features like custom objectives, callbacks, and sophisticated deployment patterns as your projects grow in complexity and scale.