Create a vector search index on the products, tables, and descriptions table

In [0]:
%pip install databricks-vectorsearch
dbutils.library.restartPython()


Collecting databricks-vectorsearch
  Downloading databricks_vectorsearch-0.63-py3-none-any.whl.metadata (2.8 kB)
Collecting deprecation>=2 (from databricks-vectorsearch)
  Downloading deprecation-2.1.0-py2.py3-none-any.whl.metadata (4.6 kB)
Downloading databricks_vectorsearch-0.63-py3-none-any.whl (19 kB)
Downloading deprecation-2.1.0-py2.py3-none-any.whl (11 kB)
Installing collected packages: deprecation, databricks-vectorsearch
Successfully installed databricks-vectorsearch-0.63 deprecation-2.1.0
[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


In [0]:
from databricks.vector_search.client import VectorSearchClient

vs_client = VectorSearchClient()

TARGET_ENDPOINT_NAME = "demo_vs_endpoint"      # what you *want* to create
FALLBACK_ENDPOINT_NAME = "dbdemos_vs_endpoint"  # the one you already have

def get_or_create_endpoint():
    # 1. If target exists, just use it
    try:
        vs_client.get_endpoint(name=TARGET_ENDPOINT_NAME)
        print(f"Using existing endpoint: {TARGET_ENDPOINT_NAME}")
        return TARGET_ENDPOINT_NAME
    except Exception as e:
        # If it's not a 404-style error, you may want to re-raise
        if "404" not in str(e) and "RESOURCE_DOES_NOT_EXIST" not in str(e):
            raise

    # 2. Try to create the target endpoint
    try:
        vs_client.create_endpoint(
            name=TARGET_ENDPOINT_NAME,
            endpoint_type="STANDARD",  # or "STORAGE_OPTIMIZED"
        )
        print(f"Created endpoint: {TARGET_ENDPOINT_NAME}")
        return TARGET_ENDPOINT_NAME
    except Exception as e:
        # 3. If creation fails (e.g., capacity / quota), fall back
        print(f"Could not create endpoint {TARGET_ENDPOINT_NAME}, "
              f"falling back to {FALLBACK_ENDPOINT_NAME}: {e}")

        # Optionally validate that the fallback actually exists
        vs_client.get_endpoint(name=FALLBACK_ENDPOINT_NAME)
        print(f"Using fallback endpoint: {FALLBACK_ENDPOINT_NAME}")
        return FALLBACK_ENDPOINT_NAME

ENDPOINT_NAME = get_or_create_endpoint()


[NOTICE] Using a notebook authentication token. Recommended for development only. For improved performance, please use Service Principal based authentication. To disable this message, pass disable_notice=True.
Created endpoint: demo_vs_endpoint


In [0]:
print(ENDPOINT_NAME)

demo_vs_endpoint


## Creating a Vector Search Index

In [0]:
from databricks.vector_search.client import VectorSearchClient

vs_client = VectorSearchClient()
CATALOG = "fnma_product_catalog_jcg"
SCHEMA = "default"
SOURCE_TABLE = f"{CATALOG}.{SCHEMA}.product_catalog"

INDEX_SHORT_NAME = "product_catalog_vector_index"
INDEX_FULL_NAME = f"{CATALOG}.{SCHEMA}.{INDEX_SHORT_NAME}"

def get_or_create_delta_sync_index(endpoint_name: str) -> None:
    # 1. If index already exists, just use it
    try:
        vs_client.get_index(
            endpoint_name=endpoint_name,
            index_name=INDEX_FULL_NAME,
        )
        print(f"Using existing index: {INDEX_FULL_NAME}")
        return
    except Exception as e:
        # If this is not a \"not found\" case, you may wish to re-raise
        if "RESOURCE_DOES_NOT_EXIST" not in str(e) and "404" not in str(e):
            raise

    # 2. Create a new Delta Sync index with Databricks-managed embeddings
    print(f"Creating index: {INDEX_FULL_NAME}")
    index = vs_client.create_delta_sync_index(
        endpoint_name=endpoint_name,
        source_table_name=SOURCE_TABLE,
        index_name=INDEX_FULL_NAME,
        pipeline_type="CONTINUOUS", #"TRIGGERED"  
        primary_key="unique_id",           # column in SOURCE_TABLE
        embedding_source_column="Description",  # text column in SOURCE_TABLE
        embedding_model_endpoint_name="databricks-bge-large-en",  # example model
        # Optional: only sync specific columns
        # columns_to_sync=[\"id\", \"text\"],
    )

    # Optionally, wait for the index to become ready or trigger an immediate sync
    index.wait_until_ready()  # available in the SDK for convenience
    # or:
    # index.sync()  # trigger sync once the pipeline is created

# Use the endpoint from your earlier get_or_create_endpoint()
ENDPOINT_NAME = ENDPOINT_NAME  # assuming defined earlier
get_or_create_delta_sync_index(ENDPOINT_NAME)


[NOTICE] Using a notebook authentication token. Recommended for development only. For improved performance, please use Service Principal based authentication. To disable this message, pass disable_notice=True.
Creating index: fnma_product_catalog_jcg.default.product_catalog_vector_index
