# Evaluate `ai_query` Results

In [None]:
# If running on a local IDE
from databricks.connect import DatabricksSession

spark = DatabricksSession.builder.remote(serverless=True).getOrCreate()

In [None]:
# Set variables
CATALOG = "users"  # TODO: your catalog name
SCHEMA = "david_huang"  # TODO: your schema name

## Compare to ground truth

In [None]:
# Create evluation table
spark.sql(
    f"""
    CREATE OR REPLACE TABLE {CATALOG}.{SCHEMA}.ner_demo_evaluation as
    WITH base AS (
        SELECT a.transaction_id, a.merchant_name, a.resolved_merchant.result as prediction, b.merchant_name as ground_truth
        FROM {CATALOG}.{SCHEMA}.ner_demo_resolution_output a
        LEFT JOIN (
            SELECT merchant_name, explode(name_variations.name_variations) as generated_merchant_names
            FROM {CATALOG}.{SCHEMA}.ner_demo_generated_entities 
        ) b
        ON a.merchant_name = b.generated_merchant_names
    )
    SELECT *, case when prediction == ground_truth then 1 else 0 end as score
    FROM base
    ;
    """
)

In [None]:
# Get accuracy (92%)
spark.sql(
    f"""
    SELECT sum(score) as n_correct, sum(score) / count(*) as accuracy 
    FROM {CATALOG}.{SCHEMA}.ner_demo_evaluation
    ;
    """
)

## Review incorrect ones

In [None]:
# Get accuracy (92%)
spark.sql(
    f"""
    SELECT *
    FROM {CATALOG}.{SCHEMA}.ner_demo_evaluation
    WHERE score = 0
    ;
    """
)