In [0]:
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.serving import (
    EndpointCoreConfigInput, 
    ServedEntityInput,
    ServedModelInputWorkloadSize,
    ServedModelInputWorkloadType,
    AutoCaptureConfigInput
)
from databricks.sdk import errors

w = WorkspaceClient()

CATALOG='protein_folding'
SCHEMA='boltz'
MODEL_NAME = 'boltz'
endpoint_name = 'boltz'

model_name = f"{CATALOG}.{SCHEMA}.{MODEL_NAME}"
versions = w.model_versions.list(model_name)
latest_version = max(versions, key=lambda v: v.version).version

print("version being served = ", latest_version)


served_entities=[
    ServedEntityInput(
        entity_name=model_name,
        entity_version=latest_version,
        name=MODEL_NAME,

        workload_type="GPU_SMALL",
        workload_size="Small",
        scale_to_zero_enabled=True
    )
]
auto_capture_config = AutoCaptureConfigInput(
    catalog_name = CATALOG,
    schema_name=SCHEMA,
    table_name_prefix=f'{MODEL_NAME}_serving',
    enabled=True
)

try:
    # try to update the endpoint if already have one
    existing_endpoint = w.serving_endpoints.get(endpoint_name)
    # may take some time to actually do the update
    status = w.serving_endpoints.update_config(
        name=endpoint_name,
        served_entities=served_entities,
        auto_capture_config=auto_capture_config,
    )
except errors.platform.ResourceDoesNotExist as e:
    # if no endpoint yet, make it, wait for it to spin up, and put model on endpoint
    status = w.serving_endpoints.create(
        name=endpoint_name,
        config=EndpointCoreConfigInput(
            name=endpoint_name,
            served_entities=served_entities,
            auto_capture_config = auto_capture_config,
        )
    )

print(status)