system.data_classification.results + updated UI

mlflow system tables

In [0]:
# Databricks notebook cell â€” minimal experiment + UC model

import pandas as pd
import mlflow
import mlflow.pyfunc
from mlflow.models import infer_signature
from mlflow.tracking import MlflowClient
from pyspark.sql.types import DoubleType

# --- 0) Configure MLflow endpoints
mlflow.set_tracking_uri("databricks")      # tracking in this workspace
mlflow.set_registry_uri("databricks-uc")   # register models in Unity Catalog (UC)  ðŸ”‘
# (UC model registry is recommended; the URI above is the standard way to target it.)  # noqa
# Ref: Databricks "Manage model lifecycle in Unity Catalog". :contentReference[oaicite:1]{index=1}

# --- 1) Create (or set) an MLflow experiment under your /Users/<you>/ folder
user = spark.sql("SELECT current_user()").collect()[0][0]
EXP_NAME = f"/Users/{user}/uc_minimal_exp"

# OPTIONAL: store experiment artifacts in a UC Volume (requires MLflow â‰¥ 2.15)
# CATALOG, SCHEMA, VOLUME = "hub", "default", "mlops_vol"  # adjust if you have a volume
# ARTIFACT_PATH = f"dbfs:/Volumes/{CATALOG}/{SCHEMA}/{VOLUME}/experiments/{user}/uc_minimal_exp"
# if mlflow.get_experiment_by_name(EXP_NAME) is None:
#     mlflow.create_experiment(name=EXP_NAME, artifact_location=ARTIFACT_PATH)  # UC Volumes path
# else:
#     pass
# If you don't use a UC Volume, the default DBFS artifact store is fine for a minimal demo.  # noqa
# Details on using UC Volumes for experiment artifacts. :contentReference[oaicite:2]{index=2}

if mlflow.get_experiment_by_name(EXP_NAME) is None:
    mlflow.create_experiment(name=EXP_NAME)  # simplest: default DBFS artifact store
mlflow.set_experiment(EXP_NAME)

# --- 2) Simplest possible Spark DataFrame
df = spark.createDataFrame([(0.0,), (1.0,), (2.0,), (3.0,)], ["x"])

# --- 3) Simplest possible MLflow model: doubles 'x'
class Doubler(mlflow.pyfunc.PythonModel):
    def predict(self, context, model_input: pd.DataFrame):
        return model_input["x"] * 2

# --- 4) Track a run + log & register the model in UC (hub.default.doubler)
model_name = "hub.default.doubler"  # <catalog>.<schema>.<registered_model_name>

with mlflow.start_run(run_name="doubler_minimal") as run:
    # a couple of lightweight tracking calls
    mlflow.log_param("model_type", "pyfunc_doubler")
    mlflow.log_metric("sanity_in", 1.0)
    mlflow.log_metric("sanity_out", 2.0)

    input_example = pd.DataFrame({"x": [1.0]})
    signature = infer_signature(input_example, pd.Series([2.0]))

    mlflow.pyfunc.log_model(
        artifact_path="model",
        python_model=Doubler(),
        registered_model_name=model_name,   # registers in UC
        input_example=input_example,
        signature=signature,
    )

    print(f"Experiment path: {mlflow.get_experiment(run.info.experiment_id).name}")
    print(f"Run ID: {run.info.run_id}")

# --- 5) Resolve the latest UC model version, then score the Spark DataFrame
client = MlflowClient()
versions = [int(v.version) for v in client.search_model_versions(f"name = '{model_name}'")]
latest_version = max(versions)
model_uri = f"models:/{model_name}/{latest_version}"
print(f"Registered in UC as: {model_uri}")
