In [0]:
%pip install databricks-vectorsearch
dbutils.library.restartPython()

In [0]:
# UC CATALOG, SCHEMA, INDEX
UC_CATALOG = "users"
UC_SCHEMA = "alex_miller"
VS_INDEX_NAME = "vs_batch_example"
SOURCE_TABLE = f"{UC_CATALOG}.{UC_SCHEMA}.imdb_embeddings"

# VS Endpoint Name
VECTOR_SEARCH_ENDPOINT = "abs_test_temp"
ENDPOINT_TYPE = "STORAGE_OPTIMIZED"  # or STANDARD

# Index-Name
VECTOR_SEARCH_INDEX = f"{UC_CATALOG}.{UC_SCHEMA}.{VS_INDEX_NAME}"

# Embedding Dimensions
EMBEDDING_DIMENSION = 1024

# Update sync 
update_index = False

In [0]:
from databricks.vector_search.client import VectorSearchClient

# The following line automatically generates a PAT Token for authentication
client = VectorSearchClient()

# The following line uses the service principal token for authentication
# client = VectorSearchClient(service_principal_client_id=<CLIENT_ID>,service_principal_client_secret=<CLIENT_SECRET>)
if client.list_endpoints().get("name", VECTOR_SEARCH_ENDPOINT):
  print("Endpoint already created.....")
else:
  print(f"Creating endpoint {VECTOR_SEARCH_ENDPOINT}")
  client.create_endpoint_and_wait(
      name=VECTOR_SEARCH_ENDPOINT,
      endpoint_type=ENDPOINT_TYPE
  )

In [0]:
indexes_list = client.list_indexes(VECTOR_SEARCH_ENDPOINT).get("vector_indexes")
try:
  for index in indexes_list:
    if index.get("name", VECTOR_SEARCH_INDEX) == VECTOR_SEARCH_INDEX:
      index_created = True
      index = client.get_index(index_name=VECTOR_SEARCH_INDEX)
      break
except TypeError:
  index_created = False

if index_created:
  print(f"{VECTOR_SEARCH_INDEX} already exists.....")
  if update_index:
    print(f"Updating index {VECTOR_SEARCH_INDEX}")
    index.sync()
else:
  print(f"Creating index {VECTOR_SEARCH_INDEX}")
  index = client.create_delta_sync_index_and_wait(
    endpoint_name=VECTOR_SEARCH_ENDPOINT,
    source_table_name=SOURCE_TABLE,
    index_name=VECTOR_SEARCH_INDEX,
    pipeline_type="TRIGGERED",
    primary_key="id",
    embedding_dimension=1024,
    embedding_vector_column="embeddings"
  )