# Synonym expansion playground

In [1]:
from cpr_sdk.search_adaptors import VespaSearchAdapter
from cpr_sdk.utils import dig

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
VESPA_URL = "http://localhost:8080"
search_adapter = VespaSearchAdapter(VESPA_URL)

## 1. searching "NDC" using a simple YQL query

This should match "nationally determined contribution" on titles, according to the rulesets.

Both queries (using `userInput` and `contains` return zero results for "ndc").

In [11]:
userinput_yql = f"""select * from sources family_document, document_passage where (userInput(@query_string)) """

search_adapter.client.query(
    {
        "yql": userinput_yql,
        "query_string": "ndc",
        "rules.off": False,
        "rules.rulebase": "synonyms_acronyms",
        "trace.level": "3",
    }
).json

{'trace': {'children': [{'message': "Using query profile 'default'"},
   {'message': "Invoking chain 'vespa' [com.yahoo.prelude.statistics.StatisticsSearcher@native -> com.yahoo.prelude.querytransform.PhrasingSearcher@vespa -> ... -> federation@native]"},
   {'children': [{'message': 'YQL query parsed: [select * from sources family_document, document_passage where weakAnd(default contains "ndc") limit 5000 timeout 20000]'},
     {'message': 'Transforming \'WEAKAND(100) default:ndc\' to \'WEAKAND(100) "national determined contribution" default:ndc\' since \'ndc +> "national determined contribution"\' matched'},
     {'message': 'SemanticSearcher: Rewrote query: [select * from sources family_document, document_passage where weakAnd(default contains phrase("national", "determined", "contribution"), default contains "ndc") limit 5000 timeout 20000]'},
     {'message': 'Federating to [family-document-passage]'},
     {'children': [{'message': 'Stemming with language ENGLISH'},
       {'mess

Note the `contains` logic doesn't seem to work here - it fails for 'ndc' but also 'climate' which is a word in family_name

In [10]:
contains_yql = f"""select * from sources family_document, document_passage where family_name contains @query_string """

search_adapter.client.query(
    {
        "yql": contains_yql,
        "query_string": "climate",
        "rules.off": False,
        "rules.rulebase": "synonyms_acronyms",
        "trace.level": "3",
    }
).json

{'trace': {'children': [{'message': "Using query profile 'default'"},
   {'message': "Invoking chain 'vespa' [com.yahoo.prelude.statistics.StatisticsSearcher@native -> com.yahoo.prelude.querytransform.PhrasingSearcher@vespa -> ... -> federation@native]"},
   {'children': [{'message': "Field 'family_name' is an attribute, 'contains' will only match exactly (unless fuzzy is used)"},
     {'message': 'YQL query parsed: [select * from sources family_document, document_passage where family_name contains "climate" limit 5000 timeout 20000]'},
     {'message': 'Federating to [family-document-passage]'},
     {'children': [{'message': 'Stemming with language ENGLISH'},
       {'message': 'Stemming: [select * from sources family_document, document_passage where family_name contains "climate" limit 5000 timeout 19981]'},
       {'message': 'Lowercasing: [select * from sources family_document, document_passage where family_name contains ({normalizeCase: false}"climate") limit 5000 timeout 19981]'