In [0]:
%pip install databricks-vectorsearch openai --quiet

[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


In [0]:
#Restart Python Session
dbutils.library.restartPython()

In [0]:
# Add the OpenAI API key to the environment
dbutils.widgets.text(name="OPENAI_API_KEY", 
                     defaultValue="you_key_here")

In [None]:
# Get the API key
OPENAI_API_KEY = dbutils.widgets.get('OPENAI_API_KEY')

In [0]:
from databricks.vector_search.client import VectorSearchClient
from databricks.sdk import WorkspaceClient
from openai import OpenAI

In [0]:
# 1. Connect to Databricks
ws = WorkspaceClient()
vsc = VectorSearchClient(disable_notice=True)

In [0]:
%sql
-- Creating a Catalog
CREATE CATALOG IF NOT EXISTS vector_search
COMMENT 'This is the catalog for the vector search database';

In [0]:
%sql
-- Creating a Schema
CREATE SCHEMA IF NOT EXISTS vector_search.vectors
COMMENT 'This is the schema for the vector search';

In [0]:
# 2. Sample knowledge base
docs = spark.createDataFrame([
    (1, "Refunds are available within 30 days."),
    (2, "Subscriptions renew automatically each month."),
    (3, "Accounts can be canceled from settings."),
    (4, "Go to the profile page and update the profile information when your address or phone change."),
    (5, "Billing information can be added in the Profile page."),
    (6, "Click Profile Picture > Go to Settings > Click Change Password.")],
    ["id", "text"])

# Save table to Delta Lake
(
    docs
    .write
    .mode("overwrite")
    .saveAsTable("vector_search.vectors.docs")
)

In [0]:
%sql
--Enabling the Change Data Feed to the table to create the vector search index
ALTER TABLE vector_search.vectors.docs
  SET TBLPROPERTIES (delta.enableChangeDataFeed = true)

In [0]:
# 3. Create / connect to vector index
index = vsc.get_index(
    endpoint_name="my_endpoint",
    index_name="vector_search.vectors.search_index"
)

In [0]:
# 4. Semantic search
query = "How do I change my password?"
results = index.similarity_search(
    query_text=query,
    columns=["text"],
    num_results=2,
    disable_notice=True
)

# context
context = " ".join( results['result']['data_array'][n][0] for n in range(len(results['result']['data_array'])) )

In [0]:
context

'Click Profile Picture > Go to Settings > Click Change Password. Go to the profile page and update the profile information when your address or phone change.'

In [0]:
# 5. Call LLM with retrieved context
client = OpenAI(api_key=OPENAI_API_KEY)
response = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[{
        "role": "user",
        "content": f"Answer using only this context:\n{context}\n\nQuestion: {query}"
    }]
)

print(response.choices[0].message.content)


Click Profile Picture > Go to Settings > Click Change Password.
