In [0]:
# This should be a process where the embedding model hosted on an enpoint is hit and then then results which are embeddings will be provided to the index similarity search with a query_vector provided as input


#below is sample code of how to do the search with embeddings the query vector is the vector returned from the endpoint you need to do it with my own enpoint though

ENDPOINT_NAME = "clip-embedding-endpoint-135" 
INDEX_NAME = "autobricks.agriculture.crop_images_directory_embeddings_index"



"""# Delta Sync Index with pre-calculated embeddings
results = index.similarity_search(
    query_vector=[...],
    columns=["embeddings", "file_path"],
    num_results=5
    )"""

In [0]:
## Here is code that can be used as a way of querying a vector database using the databricks sdk this is meant to be part of a streamlit application but I want you to use the patterns of the functions to get the embeddings. The difference is that my model accepts a particular payload Ignore the streamlit and use the patterns to get embeddings with my model and and then do the run vector search. IGNORE THE ST STREAMLIT STUFF!


"""Use these payload formats when calling the model serving endpoint:
Text embedding: {'inputs': {'input_data': ['Hello world']}, 'params': {'input_type': 'text'}}
Image embedding: {'inputs': {'input_data': ['data:image/jpeg;base64,/9j/4AAQ...']}, 'params': {'input_type': 'image'}}"""

import streamlit as st
from databricks.sdk import WorkspaceClient

w = WorkspaceClient()

openai_client = w.serving_endpoints.get_open_ai_client()

EMBEDDING_MODEL_ENDPOINT_NAME = "databricks-gte-large-en"


def get_embeddings(text):
    try:
        response = openai_client.embeddings.create(
            model=EMBEDDING_MODEL_ENDPOINT_NAME, input=text
        )
        return response.data[0].embedding
    except Exception as e:
        st.text(f"Error generating embeddings: {e}")


def run_vector_search(prompt: str) -> str:
    prompt_vector = get_embeddings(prompt)
    if prompt_vector is None or isinstance(prompt_vector, str):
        return f"Failed to generate embeddings: {prompt_vector}"

    columns_to_fetch = [col.strip() for col in columns.split(",") if col.strip()]

    try:
        query_result = w.vector_search_indexes.query_index(
            index_name=index_name,
            columns=columns_to_fetch,
            query_vector=prompt_vector,
            num_results=3,
        )
        return query_result.result.data_array
    except Exception as e:
        return f"Error during vector search: {e}"


index_name = st.text_input(
    label="Unity Catalog Vector search index:",
    placeholder="catalog.schema.index-name",
)

columns = st.text_input(
    label="Columns to retrieve (comma-separated):",
    placeholder="url, name",
    help="Enter one or more column names present in the vector search index, separated by commas. E.g. id, text, url.",
)

text_input = st.text_input(
    label="Enter your search query:",
    placeholder="What is Databricks?",
    key="search_query_key",
)

if st.button("Run vector search"):
    result = run_vector_search(text_input)
    st.write("Search results:")
    st.write(result)