In [0]:
%pip install -U -qq databricks-vectorsearch databricks-sdk flashrank PyPDF2
dbutils.library.restartPython()

In [0]:
%run ./9-Common-Code

In [0]:
import time

def wait_for_vs_endpoint_to_be_ready(vsc, vs_endpoint_name, retries: int = 180, pause_sec: int = 10):
    """
    Polls the VectorSearchClient until the endpoint's state becomes ONLINE.
    Raises if it errors out or if we exceed the retry count.
    """
    for i in range(retries):
        endpoint = vsc.get_endpoint(vs_endpoint_name)
        # SDK may surface status under either "endpoint_status" or "status"
        st = endpoint.get("endpoint_status", endpoint.get("status", {})).get("state", "").upper()
        if st == "ONLINE":
            return endpoint
        if st in ("PROVISIONING", "") or i < 6:
            if i % 10 == 0:
                print(f"[{i:>3}] Endpoint {vs_endpoint_name} is {st!r}, waiting...")
            time.sleep(pause_sec)
        else:
            raise RuntimeError(f"Endpoint entered unexpected state {st!r}: {endpoint}")
    # final check / timeout
    raise TimeoutError(f"Timed out waiting for endpoint {vs_endpoint_name}: last status {st!r}")


In [0]:
from databricks.vector_search.client import VectorSearchClient

# Use your current Databricks user or service principal for auth
client = VectorSearchClient()

# Create a “STANDARD” or “STORAGE_OPTIMIZED” endpoint
client.create_endpoint(
    name=VECTOR_SEARCH_ENDPOINT_NAME,
    endpoint_type="STANDARD"
)

# check the status of the endpoint
wait_for_vs_endpoint_to_be_ready(client, VECTOR_SEARCH_ENDPOINT_NAME)
print(f"Endpoint named {VECTOR_SEARCH_ENDPOINT_NAME} is ready.")

In [0]:
# the table we'd like to index
source_table_fullname = f"{CATALOG_NAME}.{SCHEMA_NAME}.lab_wikipedia_text_embeddings"

# where we want to store our index
vs_index_fullname = f"{CATALOG_NAME}.{SCHEMA_NAME}.lab_wikipedia_self_managed_vs_index"

# create or sync the index
if not index_exists(vsc, vs_endpoint_name, vs_index_fullname):
  print(f"Creating index {vs_index_fullname} on endpoint {vs_endpoint_name}...")
  vsc.create_delta_sync_index(
    endpoint_name=vs_endpoint_name,
    index_name=vs_index_fullname,
    source_table_name=source_table_fullname,
    pipeline_type="TRIGGERED", #Sync needs to be manually triggered
    primary_key="id",
    embedding_dimension=1024, #Match your model embedding size (gte)
    embedding_vector_column="embedding"
  )
else:
  # trigger a sync to update our vs content with the new data saved in the table
  vsc.get_index(vs_endpoint_name, vs_index_fullname).sync()

# let's wait for the index to be ready and all our embeddings to be created and indexed
wait_for_index_to_be_ready(vsc, vs_endpoint_name, vs_index_fullname)

In [0]:
import mlflow.deployments

deploy_client = mlflow.deployments.get_deploy_client("databricks")
question = "How Generative AI impacts humans?"
response = deploy_client.predict(endpoint="databricks-gte-large-en", inputs={"input": [question]})
embeddings = [e["embedding"] for e in response.data]
print(embeddings)

In [0]:
# get similar 5 documents.
results = vsc.get_index(vs_endpoint_name, vs_index_fullname).similarity_search(
  query_vector=embeddings[0],
  columns=["pdf_name", "content"],
  num_results=5)

# format result to align with reranker lib format. 
passages = []
for doc in results.get("result", {}).get("data_array", []):
    new_doc = {"file": doc[0], "text": doc[1]}
    passages.append(new_doc)

pprint(passages)

In [0]:
from flashrank import Ranker, RerankRequest

# Ensure the model file exists at this path or update the path accordingly
cache_dir = f"{DA.paths.working_dir}/opt"

ranker = Ranker(model_name="rank-T5-flan", cache_dir=cache_dir)

rerankrequest = RerankRequest(query=question, passages=passages)
results = ranker.rerank(rerankrequest)
print(*results[:3], sep="\n\n")