From ff39f7e3e527d0e66316468c144c09580f090a4e Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Tue, 7 Jun 2022 22:27:15 +0200 Subject: [PATCH] fix(#1533): restrict highlighted fields (#1544) --- src/rubrix/server/daos/records.py | 6 ++--- .../server/elasticseach/query_helpers.py | 9 ++++++- .../test_log_for_text_classification.py | 2 +- .../server/text_classification/test_model.py | 24 +++++++++++++++---- 4 files changed, 31 insertions(+), 10 deletions(-) diff --git a/src/rubrix/server/daos/records.py b/src/rubrix/server/daos/records.py index f00752f9aa..f32940fc61 100644 --- a/src/rubrix/server/daos/records.py +++ b/src/rubrix/server/daos/records.py @@ -436,11 +436,11 @@ def __configure_query_highlight__(cls, task: TaskType): return { "pre_tags": [cls.__HIGHLIGHT_PRE_TAG__], "post_tags": [cls.__HIGHLIGHT_POST_TAG__], - "require_field_match": False, + "require_field_match": True, "fields": { "text": {}, - # TODO: `words` will be removed once the migration will be completed. - # This configuration is included just for old datasets records + "text.*": {}, + # TODO(@frascuchon): `words` will be removed in version 0.16.0 "words": {}, **({"inputs.*": {}} if task == TaskType.text_classification else {}), }, diff --git a/src/rubrix/server/elasticseach/query_helpers.py b/src/rubrix/server/elasticseach/query_helpers.py index e692616970..f33889f9e8 100644 --- a/src/rubrix/server/elasticseach/query_helpers.py +++ b/src/rubrix/server/elasticseach/query_helpers.py @@ -260,8 +260,15 @@ def text_query(text_query: Optional[str]) -> Dict[str, Any]: "query": text_query, } }, + { + "query_string": { + "default_field": "text", + "default_operator": "AND", + "query": text_query, + } + }, ], - minimum_should_match="50%", + minimum_should_match="30%", ) @staticmethod diff --git a/tests/functional_tests/test_log_for_text_classification.py b/tests/functional_tests/test_log_for_text_classification.py index 28d3ae3ed0..1bb5201412 100644 --- a/tests/functional_tests/test_log_for_text_classification.py +++ b/tests/functional_tests/test_log_for_text_classification.py @@ -73,7 +73,7 @@ def test_search_keywords(mocked_client): for keyword in keywords ] ) - assert {"limit", "limits", "limit?"} == top_keywords, top_keywords + assert top_keywords == {"limits", "limited", "limit"}, top_keywords def test_log_records_with_empty_metadata_list(mocked_client): diff --git a/tests/server/text_classification/test_model.py b/tests/server/text_classification/test_model.py index 2d727dac98..5871f162e4 100644 --- a/tests/server/text_classification/test_model.py +++ b/tests/server/text_classification/test_model.py @@ -286,16 +286,18 @@ def test_query_with_uncovered_by_rules(): "must": {"match_all": {}}, "must_not": { "bool": { + "minimum_should_match": 1, "should": [ { "bool": { + "minimum_should_match": "30%", "should": [ { "query_string": { + "boost": "2.0", "default_field": "words", "default_operator": "AND", "query": "query", - "boost": "2.0", } }, { @@ -305,19 +307,26 @@ def test_query_with_uncovered_by_rules(): "query": "query", } }, + { + "query_string": { + "default_field": "text", + "default_operator": "AND", + "query": "query", + } + }, ], - "minimum_should_match": "50%", } }, { "bool": { + "minimum_should_match": "30%", "should": [ { "query_string": { + "boost": "2.0", "default_field": "words", "default_operator": "AND", "query": "other*", - "boost": "2.0", } }, { @@ -327,12 +336,17 @@ def test_query_with_uncovered_by_rules(): "query": "other*", } }, + { + "query_string": { + "default_field": "text", + "default_operator": "AND", + "query": "other*", + } + }, ], - "minimum_should_match": "50%", } }, ], - "minimum_should_match": 1, } }, }