In [0]:
%run ../00.set_variables

In [0]:
# deploy the first two endpoints manually rather than awaiting approval via deployment job
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.serving import EndpointCoreConfigInput, ServedModelInput, ServedEntityInput, AutoCaptureConfigInput
from databricks.sdk.errors import NotFound, ResourceDoesNotExist
from concurrent.futures import ThreadPoolExecutor
import time

#i'm lazy
CATALOG_NAME = catalog_name 
SCHEMA_NAME = schema_name   

MODELS_TO_DEPLOY = [
    "fe_model", 
    "si_model"   
]

w = WorkspaceClient()

def create_or_update_uc_endpoint(w: WorkspaceClient,
                                 uc_catalog: str,
                                 uc_schema: str,
                                 uc_model_name: str,
                                 model_version: str
                                 ):
    
    full_model_name = f"{uc_catalog}.{uc_schema}.{uc_model_name}"
    serving_endpoint_name = f"{schema_name}-{uc_model_name}-serving-endpoint"

    served_entities=[
    ServedEntityInput(
        entity_name=full_model_name,
        entity_version=model_version,
        workload_size="Small",
        scale_to_zero_enabled=True,

    )
    ]

    # Update serving endpoint if it already exists, otherwise create the serving endpoint
    try:
        w.serving_endpoints.update_config_and_wait(name=serving_endpoint_name, served_entities=served_entities)
    except ResourceDoesNotExist:
        w.serving_endpoints.create_and_wait(name=serving_endpoint_name, config=EndpointCoreConfigInput(served_entities=served_entities))

# deploy v1 models
with ThreadPoolExecutor() as executor:
    # We use the model name as the endpoint name for simplicity
    futures = [executor.submit(create_or_update_uc_endpoint, w, CATALOG_NAME, SCHEMA_NAME, model, '1') for model in MODELS_TO_DEPLOY]
    for future in futures:
        future.result()


In [0]:
from databricks.sdk.service.iam import PermissionLevel, AccessControlRequest

endpoints = [f"{schema_name}-{model}-serving-endpoint" for model in MODELS_TO_DEPLOY]

for endpoint in endpoints:

  endpoint_id = w.serving_endpoints.get(name=endpoint).id

  # Define the permission rule and apply it to the endpoint.
  # This grants all users in the workspace the ability to query the endpoint.
  w.serving_endpoints.update_permissions(
      serving_endpoint_id=endpoint_id,
      access_control_list=[
          AccessControlRequest(
              group_name="users",
              permission_level=PermissionLevel.CAN_QUERY
          )
      ]
  )

  #enable ai gateway
  base_url = "/api/2.0/serving-endpoints"
  api_url = f"{base_url}/{endpoint}/ai-gateway"

  gateway_request_data = {
        "usage_tracking_config": {"enabled": True},
        "inference_table_config": {
            "enabled": True,
            "catalog_name": catalog_name,
            "schema_name": schema_name,
        }
    }
  
  w.api_client.do("PUT", api_url, body=gateway_request_data)
  

