In [0]:
%run ../00.set_variables

In [0]:
#deploy the first two endpoints manually rather than awaiting approval via deployment job
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.serving import ServedEntityInput, EndpointCoreConfigInput, AutoCaptureConfigInput
from databricks.sdk.errors import NotFound
import time

# --- Configuration ---
# The name of your Unity Catalog.
CATALOG_NAME = catalog_name 

# The name of the schema (database) where your models are registered.
SCHEMA_NAME = schema_name   

MODELS_TO_DEPLOY = [
    "fe_model", 
    "si_model"   
]

w = WorkspaceClient()

def serve_model(model_name, model_version):
    model_FQDN = f"{catalog_name}.{schema_name}.{model_name}"
    serving_endpoint_name = f"{schema_name}-{model_name}-serving-endpoint"
    w = WorkspaceClient()
    endpoint_config = EndpointCoreConfigInput(
        name=serving_endpoint_name,
        served_entities=[
            ServedEntityInput(
                entity_name=model_FQDN,
                entity_version=model_version,
                scale_to_zero_enabled=True,
                workload_size="Small"
            )
        ],
        auto_capture_config = AutoCaptureConfigInput(catalog_name=catalog_name, schema_name=schema_name, enabled=True, table_name_prefix=f"{model_name}_payload_inference_table" )
    )

    force_update = True #Set this to True to release a newer version (the demo won't update the endpoint to a newer model version by default)
    existing_endpoint = next((e for e in w.serving_endpoints.list() if e.name == serving_endpoint_name), None)
    if existing_endpoint == None:
        print(f"Creating the endpoint {serving_endpoint_name}, this will take a few minutes to package and deploy the endpoint...")
        w.serving_endpoints.create_and_wait(name=serving_endpoint_name, config=endpoint_config)
    else:
        print(f"endpoint {serving_endpoint_name} already exist...")
        if force_update:
            w.serving_endpoints.update_config_and_wait(served_entities=endpoint_config.served_entities, name=serving_endpoint_name)

#deploy v1 models
from concurrent.futures import ThreadPoolExecutor

with ThreadPoolExecutor() as executor:
    futures = [executor.submit(serve_model, model, 1) for model in MODELS_TO_DEPLOY]
    for future in futures:
        future.result()
