# MOBI Docs Vector Search
This notebook demonstrates how to manually create a vector search table

In [0]:
%pip install databricks-vectorsearch mlflow requests
%restart_python

In [0]:
# Setup: minimal deps + add src to sys.path
import sys
from pathlib import Path
src_path = Path.cwd() / "src"
if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))


In [0]:
# import mlflow
# config = mlflow.models.ModelConfig(development_config='config.yaml')
# CATALOG = config.get('catalog')
# dbutils.widgets.text('catalog', CATALOG)
# SCHEMA = config.get('schema')
# dbutils.widgets.text('schema', SCHEMA)
# RAW_DATA_VOL = config.get('raw_data_vol')

import mlflow
config = mlflow.models.ModelConfig(development_config='config.yaml')

dbutils.widgets.text('catalog', config.get('catalog') or 'hive_metastore')
dbutils.widgets.text('schema',  config.get('schema')  or 'default')

catalog = dbutils.widgets.get('catalog')
schema = dbutils.widgets.get('schema')
print(f"Using catalog.schema: {catalog}.{schema}")



In [0]:
# Show ten rows of the bronze_site table we already proudced

display(spark.table(f"`{catalog}`.`{schema}`.bronze_site").limit(10))

In [0]:
%sql
ALTER TABLE `${catalog}`.`${schema}`.bronze_site SET TBLPROPERTIES (delta.enableChangeDataFeed = true);

## Create Vector Search Endpoint

In [0]:
from databricks.vector_search.client import VectorSearchClient

client = VectorSearchClient(disable_notice=True)
client.create_endpoint(
    name="mobi_vs_endpoint",
    endpoint_type="STANDARD"
)

## Create Vector Search Index

In [0]:
index = client.create_delta_sync_index(
    endpoint_name="mobi_vs_endpoint",
    source_table_name=f"`{catalog}`.`{schema}`.bronze_site",
    index_name=f"{catalog}.{schema}.site_index",
    pipeline_type="TRIGGERED",
    primary_key="url",                # Must be present in your table
    embedding_source_column="content_md",  # Text column for embedding
    embedding_model_endpoint_name="databricks-gte-large-en" # or any available model
)

In [0]:
%sql
SELECT 
  *, 
  floor(unique_id / 5) AS unique_id_bin_10 
FROM vector_search(
  index=>'`${catalog}`.`${schema}`.documents_index',
  query_text=>"Trip Fares",
  num_results=>50,
  query_type=>'hybrid'
)
ORDER BY unique_id_bin_10 DESC

In [0]:
%sql
CREATE OR REPLACE FUNCTION `${catalog}`.`${schema}`.site_search(
  description STRING COMMENT 'A search of mobi documents'
)
RETURNS TABLE (
  unique_id INTEGER,
  file_name STRING,
  value STRING,
  search_score STRING
)
COMMENT 'Returns the top three documents matching semantic search.
'
RETURN
SELECT *
FROM vector_search(
  index=>'`${catalog}`.`${schema}`.site_index',
  query_text=>description,
  num_results=>3,
  query_type=>'hybrid'
)

In [0]:
%sql
SELECT * FROM `${catalog}`.`${schema}`.doc_search("Trip Fares")