In [1]:
import sys
sys.path.append("/mnt/code")

In [2]:
from __future__ import annotations
import os
from utils import mlflow_utils
import xgboost as xgb
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import mlflow
from mlflow import MlflowClient
from mlflow.entities import Run
from mlflow.entities.model_registry import ModelVersion
from mlflow.models.model import ModelInfo
from mlflow.exceptions import RestException
from mlflow.store.artifact.runs_artifact_repo import RunsArtifactRepository



from typing import Optional, Iterable, Union
import numpy as np
import pandas as pd
import mlflow
import mlflow.pyfunc
import mlflow.xgboost
from mlflow.artifacts import download_artifacts
import xgboost as xgb

In [3]:
my_name = os.environ['DOMINO_STARTING_USERNAME']
exp_model_name = f"xgb-demo-{my_name}-demo-v1"
model_name = exp_model_name
mlflow_utils.ensure_mlflow_experiment(exp_model_name)
mlflow_utils.ensure_registered_model(exp_model_name)

<RegisteredModel: aliases={}, creation_timestamp=1758501006243, description='', last_updated_timestamp=1758655217099, latest_versions=[<ModelVersion: aliases=[], creation_timestamp=1758655217099, current_stage='None', description='XGB basic demo — acc=0.9580', last_updated_timestamp=1758655217099, name='xgb-demo-wadkars-demo-v1', run_id='8becdbd12a7546638b4be51be8593f7c', run_link='', source='mlflow-artifacts:/mlflow/8becdbd12a7546638b4be51be8593f7c/artifacts/model', status='READY', status_message='', tags={'mlflow.domino.dataset_info': '68b08bc1dc77613e66e63652-68b08bc1dc77613e66e63651,68b8a07b2ad64d0492eb2c97-68b8a07b2ad64d0492eb2c96',
 'mlflow.domino.environment_id': '68b0bb70dc77613e66e6368d',
 'mlflow.domino.environment_revision_id': '68ceaf1ac4ae886f22d8150d',
 'mlflow.domino.hardware_tier': 'medium-k8s',
 'mlflow.domino.project_id': '68b08bbfdc77613e66e6364e',
 'mlflow.domino.project_name': 'ddl-end-to-end-demo',
 'mlflow.domino.run_id': '68d2d61cc2deda4aff8bddd6',
 'mlflow.domi

In [5]:
    # Optional: point to your MLflow server
    # mlflow.set_tracking_uri("http://localhost:5000")
    # mlflow.set_registry_uri("http://localhost:5000")


    # Data
    X, y = load_breast_cancer(return_X_y=True, as_frame=False)
    Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)

    # Simple XGBoost classifier
    model = xgb.XGBClassifier(
        n_estimators=80,
        max_depth=3,
        learning_rate=0.1,
        subsample=0.9,
        colsample_bytree=0.9,
        reg_lambda=1.0,
        random_state=42,
        eval_metric="logloss",
        n_jobs=4,
    )

    with mlflow.start_run(run_name="xgb-basic") as run:
        model.fit(Xtr, ytr)
        preds = model.predict(Xte)
        acc = accuracy_score(yte, preds)

        # Log params/metrics
        mlflow.log_params({
            "n_estimators": model.n_estimators,
            "max_depth": model.max_depth,
            "learning_rate": model.learning_rate,
            "subsample": model.subsample,
            "colsample_bytree": model.colsample_bytree,
        })
        mlflow.log_metric("accuracy", acc)

        # Log model (returns ModelInfo with .model_uri)
        model_info: ModelInfo = mlflow.xgboost.log_model(
            xgb_model=model,
            artifact_path="model",
            registered_model_name=None,  # we’ll register a version explicitly below
        )

        # Ensure a registered model exists, then create a version from this run      
        

        mv: ModelVersion = mlflow_utils.register_model_version(
            model_name=exp_model_name,
            model_desc=f"XGB basic demo — acc={acc:.4f}",
            model_info=model_info,
            run=run,
        )

        print(f"Run ID:        {run.info.run_id}")
        print(f"Model URI:     {model_info.model_uri}")
        print(f"Registered:    {model_name} v{mv.version} (status={mv.status})")



2025/09/25 17:49:26 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: xgb-demo-wadkars-demo-v1, version 4


Run ID:        0a1e50c468cd454abec3c67a524be5aa
Model URI:     runs:/0a1e50c468cd454abec3c67a524be5aa/model
Registered:    xgb-demo-wadkars-demo-v1 v4 (status=READY)
🏃 View run xgb-basic at: http://127.0.0.1:8765/#/experiments/269/runs/0a1e50c468cd454abec3c67a524be5aa
🧪 View experiment at: http://127.0.0.1:8765/#/experiments/269


In [6]:
# ---------- Option A: load as PyFunc (simplest .predict on DataFrame/ndarray) ----------

def load_xgb_pyfunc_version(model_name: str, version: Union[int, str]) -> mlflow.pyfunc.PyFuncModel:
    """
    Load a registered model *version* as a PyFunc model.
    Works regardless of how the model was trained/logged (XGBoost flavor included).

    Example URI: models:/my_model/3
    """
    uri = f"models:/{model_name}/{version}"
    return mlflow.pyfunc.load_model(uri)


def predict_with_pyfunc(model: mlflow.pyfunc.PyFuncModel,
                        X: Union[pd.DataFrame, np.ndarray, Iterable[Iterable[float]]]) -> np.ndarray:
    """
    Run predictions using a PyFunc model.
    Accepts pandas DataFrame or numpy-like 2D structure.
    """
    if not isinstance(X, pd.DataFrame):
        X = pd.DataFrame(X)
    #print(X)
    y_pred = model.predict(X)
    # Ensure numpy array
    return np.asarray(y_pred)


# ---------- Option B: load as raw XGBoost Booster (gives you full XGB control) ----------

def load_xgb_booster_version(model_name: str, version: Union[int, str]) -> xgb.Booster:
    """
    Load a registered model *version* as an XGBoost Booster.
    (Use when you want raw XGB APIs, e.g., DMatrix and custom prediction params.)
    """
    uri = f"models:/{model_name}/{version}"
    return mlflow.xgboost.load_model(uri)


def predict_with_booster(booster: xgb.Booster,
                         X: Union[pd.DataFrame, np.ndarray, Iterable[Iterable[float]]]) -> np.ndarray:
    """
    Run predictions using a raw XGBoost Booster.
    """
    if isinstance(X, pd.DataFrame):
        dm = xgb.DMatrix(X.values)
    else:
        dm = xgb.DMatrix(np.asarray(X))
    return booster.predict(dm)


# ---------- (Optional) Just download the model files to disk ----------

def download_model_version_artifacts(model_name: str,
                                     version: Union[int, str],
                                     dst_dir: Optional[str] = None) -> str:
    """
    Download all artifacts for a given registered model version to a local directory.
    Returns the local path where files were written.
    """
    uri = f"models:/{model_name}/{version}"
    return download_artifacts(artifact_uri=uri, dst_path=dst_dir)

In [7]:
pyfunc_model = load_xgb_pyfunc_version(exp_model_name, "latest")
sample = np.random.rand(1, 30)  # shape must match your training columns
preds = predict_with_pyfunc(pyfunc_model, sample)
print("PyFunc preds:", preds)

  latest = client.get_latest_versions(name, None if stage is None else [stage])


Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

PyFunc preds: [1]



## Deploy registered model as an endpoint


Invoke model endpoint with the following payload
```
{
  "data": [

 [14.2,19.7,92.3,645.0,0.096,0.085,0.05,0.025,0.18,0.062,0.4,1.2,2.8,30.0,0.006,0.02,0.02,0.007,0.02,0.003,15.8,27.0,104.0,780.0,0.125,0.18,0.19,0.07,0.27,0.085]


]
}
```