# Campaigns Vector Search

In [0]:
%pip install -U -qqqq databricks-langchain
dbutils.library.restartPython()

In [0]:
from databricks.vector_search.client import VectorSearchClient

In [0]:
%run ./_resources/00_setup

## Step 1: Create Vector Search Endpoint

In [0]:
client = VectorSearchClient(disable_notice=True)

In [0]:
# Create if doesn't exist

# client.create_endpoint(
#     name=config['endpoint_name'], 
#     endpoint_type="STANDARD"
# )

## Step 2: Create Index

In [0]:
spark.sql(f'ALTER TABLE campaigns_performance SET TBLPROPERTIES (delta.enableChangeDataFeed = true)')

In [0]:
df = spark.table(f"{config['catalog']}.{config['schema']}.campaigns_performance")
df.printSchema()

In [0]:
index = client.create_delta_sync_index(
  endpoint_name=config['endpoint_name'],
  source_table_name=f"{config['catalog']}.{config['schema']}.campaigns_performance",
  index_name=f"{config['catalog']}.{config['schema']}.{config['index_name']}",
  pipeline_type="TRIGGERED",
  primary_key="campaign_id",
  embedding_source_column="optimized_ad_copy",
  embedding_model_endpoint_name="databricks-gte-large-en",
  columns_to_sync=["campaign_id", "segment", "optimized_ad_copy"]
)

In [0]:
# Wait for index to come online. Expect this command to take several minutes.
import time
while not index.describe().get('status').get('detailed_state').startswith('ONLINE'):

    print("Waiting for index to be ONLINE...")
    time.sleep(10)
print("Index is ONLINE")
index.describe()

## Step 3: Query Index

In [0]:
results = index.similarity_search(
    query_text="Fitness and exercise",
    columns=["campaign_id","optimized_ad_copy"],
    num_results=2
    )

results

### Use segment filter

In [0]:
results = index.similarity_search(
    query_text="Fitness and exercise",
    columns=["campaign_id","optimized_ad_copy"],
    filters={"segment": ["College Student"]},
    num_results=2
    )

results