In [1]:
import os
os.environ["MLFLOW_TRACKING_URI"] = "http://localhost:5000"
os.environ["MLFLOW_S3_ENDPOINT_URL"] = "http://localhost:9000"
os.environ["AWS_ACCESS_KEY_ID"] = "mlflow_minio_user"
os.environ["AWS_SECRET_ACCESS_KEY"] = "mlflow_minio_password"

pip_reqs = [
    "mlflow[mlserver]==2.22.0",
    "numpy==2.2.5",
    "pandas==2.2.3",
    "psutil==7.0.0",
    "scikit-learn==1.6.1",
    "scipy==1.15.3",
    "xgboost==3.0.1",
]


In [2]:
import mlflow.xgboost
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
import pandas as pd
import os


print(f"MLflow Tracking URI: {mlflow.get_tracking_uri()}")
print(f"MLflow S3 Endpoint URL: {os.getenv('MLFLOW_S3_ENDPOINT_URL')}")

X, y = make_classification(n_samples=200, n_features=5, n_informative=3, n_redundant=0, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

feature_names = [f"feature_{i}" for i in range(X_train.shape[1])]
X_train_df = pd.DataFrame(X_train, columns=feature_names)
X_test_df = pd.DataFrame(X_test, columns=feature_names)

dtrain = xgb.DMatrix(X_train_df, label=y_train)
dtest = xgb.DMatrix(X_test_df, label=y_test)

experiment_name = "XGBoost_Toy_Benchmark"
registered_model_name = "xgboost-toy-classifier"

try:
    experiment = mlflow.set_experiment(experiment_name)
    experiment_id = experiment.experiment_id
except Exception as e:
    print(f"Error {e}")
    experiment_id = mlflow.get_experiment_by_name(experiment_name).experiment_id

print(f"Using experiment: {experiment_name} (ID: {experiment_id})")

MLflow Tracking URI: http://localhost:5000
MLflow S3 Endpoint URL: http://localhost:9000
Using experiment: XGBoost_Toy_Benchmark (ID: 1)


In [3]:
# Training and tracking

with mlflow.start_run(experiment_id=experiment_id, run_name="Initial Toy Run") as run:
    run_id = run.info.run_id
    print(f"MLflow Run ID: {run_id}")

    params = {
        'objective': 'binary:logistic',
        'max_depth': 2,
        'eta': 0.1,
        'eval_metric': 'logloss',
    }
    num_boost_round = 20

    print("Training model...")
    model = xgb.train(
        params,
        dtrain,
        num_boost_round=num_boost_round,
        evals=[(dtest, "validation")],
        early_stopping_rounds=5,
        verbose_eval=False
    )
    print("Model trained...")

    y_pred_proba = model.predict(dtest)
    y_pred = (y_pred_proba > 0.5).astype(int)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy:.4f}")

    mlflow.log_params(params)
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("num_boost_round", num_boost_round)

    print("Loogging the MLflow model...")
    from mlflow.models.signature import infer_signature

    signature = infer_signature(X_test_df, pd.Series(y_pred_proba, name="prediction_score"))


    mlflow.xgboost.log_model(
    xgb_model=model,
    artifact_path="toy_xgboost_model",
    signature=signature,

    registered_model_name=registered_model_name,
    input_example=X_test_df.head(5),
    pip_requirements=pip_reqs
    )

    print(f"Model '{registered_model_name}' has been logged and registered.")

    model_artifact_uri_in_run = f"runs:/{run_id}/toy_xgboost_model"
    print(f"Model URI in the run: {model_artifact_uri_in_run}")

    client = mlflow.tracking.MlflowClient()
    latest_version = client.get_latest_versions(registered_model_name, stages=["None"])[0]
    print(f"Registered model: {registered_model_name}, Version: {latest_version.version}, Stage: {latest_version.current_stage}")
    print(f"'models:/' URI: models:/{registered_model_name}/{latest_version.version}")
    print(f"S3/MinIO source URI (approximate—verify in UI): {latest_version.source}/toy_xgboost_model")




MLflow Run ID: 76c20b9893544b7bb2a38e26cf3e20f7
Training model...
Model trained...
Accuracy: 0.9000
Loogging the MLflow model...


  xgb_model.save_model(model_data_path)
INFO:botocore.credentials:Found credentials in environment variables.
Registered model 'xgboost-toy-classifier' already exists. Creating a new version of this model...
2025/05/15 14:54:42 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: xgboost-toy-classifier, version 4


Model 'xgboost-toy-classifier' has been logged and registered.
Model URI in the run: runs:/76c20b9893544b7bb2a38e26cf3e20f7/toy_xgboost_model
Registered model: xgboost-toy-classifier, Version: 4, Stage: None
'models:/' URI: models:/xgboost-toy-classifier/4
S3/MinIO source URI (approximate—verify in UI): s3://mlflow-artifacts/1/76c20b9893544b7bb2a38e26cf3e20f7/artifacts/toy_xgboost_model/toy_xgboost_model
🏃 View run Initial Toy Run at: http://localhost:5000/#/experiments/1/runs/76c20b9893544b7bb2a38e26cf3e20f7
🧪 View experiment at: http://localhost:5000/#/experiments/1


Created version '4' of model 'xgboost-toy-classifier'.
  latest_version = client.get_latest_versions(registered_model_name, stages=["None"])[0]


In [4]:

### Promote the model to Production

client = mlflow.tracking.MlflowClient()
model_version_to_promote = latest_version.version

print(f"\nPromoting '{registered_model_name}' version {model_version_to_promote} to 'Production'...")
try:
    client.transition_model_version_stage(
        name=registered_model_name,
        version=model_version_to_promote,
        stage="Production",
        archive_existing_versions=True
    )
    print("Model successfully promoted to 'Production'.")
except Exception as e:
    print(f"Error promoting model: {e}")
    print("You may need to do this manually via the MLflow UI if there are conflicts or it’s already in Production.")

promoted_version_details = client.get_model_version(registered_model_name, model_version_to_promote)
print(f"Current stage of model '{registered_model_name}' v{promoted_version_details.version}: {promoted_version_details.current_stage}")



Promoting 'xgboost-toy-classifier' version 4 to 'Production'...
Model successfully promoted to 'Production'.
Current stage of model 'xgboost-toy-classifier' v4: Production


  client.transition_model_version_stage(


In [5]:
# Check the model URI in the MLflow UI

print(f"\n--- ACTION REQUIRED! ---")
print(f"Go to the MLflow UI (http://localhost:5000) and locate Run ID: {run_id}")
print("Navigate to the artifacts and copy the full S3/MinIO URI for the 'toy_xgboost_model' folder.")
print(f"Example: s3://mlflow-artifacts/{experiment_id}/{run_id}/artifacts/toy_xgboost_model")
print("You will need this URI for the `modelUri` field in your SeldonDeployment YAML.")


--- ACTION REQUIRED! ---
Go to the MLflow UI (http://localhost:5000) and locate Run ID: 76c20b9893544b7bb2a38e26cf3e20f7
Navigate to the artifacts and copy the full S3/MinIO URI for the 'toy_xgboost_model' folder.
Example: s3://mlflow-artifacts/1/76c20b9893544b7bb2a38e26cf3e20f7/artifacts/toy_xgboost_model
You will need this URI for the `modelUri` field in your SeldonDeployment YAML.
