In [0]:
%pip install databricks-vectorsearch
dbutils.library.restartPython()

In [0]:
from databricks.vector_search.client import VectorSearchClient

vs_client = VectorSearchClient()

endpoint_name = "docai_endpoint"  # 🔁 Change if needed

vs_client.create_endpoint(
    name=endpoint_name,
    endpoint_type="INDEX"  # For Delta table-based indexes
)

In [0]:
spark.sql("SELECT * FROM `docai-dbx`.gold.doc_embeddings LIMIT 1").show()


In [0]:
# COMMAND ----------

# ✅ Step 1: Install vector search SDK if needed


# COMMAND ----------

# ✅ Step 4: Create index on your Delta table
# Make sure your table exists before running this

index_name = "doc_embeddings_index"  # 🔁 Choose a name for the index
source_table = "docai-dbx.gold.doc_embeddings"  # 🧠 Replace with your actual table name
primary_key_col = "chunk_id"
embedding_dim = 384  # Because we used all-MiniLM-L6-v2

vs_client.create_delta_sync_index(
    endpoint_name=endpoint_name,
    index_name=index_name,
    source_table_name=source_table,
    primary_key=primary_key_col,
    embedding_dimension=embedding_dim
)

print(f"✅ Index {index_name} created on table {source_table}")

# COMMAND ----------

# ✅ Step 5: Check if the index is ready

index = vs_client.get_index(
    endpoint_name=endpoint_name,
    index_name=index_name
)

status = index.describe()["status"]
print(f"📡 Index Status: {status}")
