# Resolve Entities with `ai_query`

In [None]:
# If running on a local IDE
from databricks.connect import DatabricksSession

spark = DatabricksSession.builder.remote(serverless=True).getOrCreate()

In [None]:
# Set variables
CATALOG = "users"  # TODO: your catalog name
SCHEMA = "david_huang"  # TODO: your schema name
LLM_ENDPOINT = "databricks-llama-4-maverick"

In [None]:
spark.sql(f"select * from {CATALOG}.{SCHEMA}.ner_demo_generated_transactions limit 5;")

## Retrieve 10 possible merchant entities for each transaction

In [None]:
N_RESULTS = 10

spark.sql(
    f"""
    CREATE OR REPLACE TABLE {CATALOG}.{SCHEMA}.ner_demo_top_results as
    SELECT
        trans.transaction_id,
        trans.merchant_name,
        trans.transaction_date,
        array_agg(search.merchant_name) as possible_entities,
        array_agg(search.search_score) as search_score
    FROM (
        select *
        from {CATALOG}.{SCHEMA}.ner_demo_generated_transactions
    ) as trans,
    LATERAL (
        SELECT *
        FROM VECTOR_SEARCH(
            index => "{CATALOG}.{SCHEMA}.ner_demo_merchant_index",
            query_text => merchant_name,
            num_results => {N_RESULTS}
        )
    ) as search
    GROUP BY
        trans.transaction_id,
        trans.merchant_name,
        trans.transaction_date
    ;
"""
)

In [None]:
spark.sql(f"select * from {CATALOG}.{SCHEMA}.ner_demo_top_results limit 5;")

## Use LLM to pick from top possible results

In [None]:
ner_prompt = (
    "You are an expert reviewer. "
    "Your job is to view the following input which may contain spelling mistakes or extra characters, "
    "and match it to one of the possible entities in the following list. "
    "RETURN ONLY THE MATCHED ENTITY AND NOTHING ELSE. "
    "DO NOT REPEAT THE PROMPT OR ADD ANY ADDITIONAL INFORMATION.\n"
)

print(ner_prompt)

In [None]:
spark.sql(
    f"""
    CREATE OR REPLACE TABLE {CATALOG}.{SCHEMA}.ner_demo_resolution_output as
    WITH base AS (
        SELECT *,
            '{ner_prompt}' || 
            '\nINPUT -> ' || 
            merchant_name || 
            '\nPOSSIBLE ENTITIES -> ' || 
            cast(possible_entities as string) || 
            '\nMATCHED ENTITY ->' as ner_prompt
        FROM {CATALOG}.{SCHEMA}.ner_demo_top_results
    )
    SELECT *,
        ai_query(
            '{LLM_ENDPOINT}', 
            ner_prompt, 
            failOnError => false
        ) as resolved_merchant
    FROM base
    ;
    """
)

In [None]:
spark.sql(
    f"select merchant_name, resolved_merchant from {CATALOG}.{SCHEMA}.ner_demo_resolution_output limit 5;"
)