From ff39f7e3e527d0e66316468c144c09580f090a4e Mon Sep 17 00:00:00 2001
From: Francisco Aranda <francisco@recogn.ai>
Date: Tue, 7 Jun 2022 22:27:15 +0200
Subject: [PATCH] fix(#1533): restrict highlighted fields (#1544)

---
 src/rubrix/server/daos/records.py             |  6 ++---
 .../server/elasticseach/query_helpers.py      |  9 ++++++-
 .../test_log_for_text_classification.py       |  2 +-
 .../server/text_classification/test_model.py  | 24 +++++++++++++++----
 4 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/src/rubrix/server/daos/records.py b/src/rubrix/server/daos/records.py
index f00752f9aa..f32940fc61 100644
--- a/src/rubrix/server/daos/records.py
+++ b/src/rubrix/server/daos/records.py
@@ -436,11 +436,11 @@ def __configure_query_highlight__(cls, task: TaskType):
         return {
             "pre_tags": [cls.__HIGHLIGHT_PRE_TAG__],
             "post_tags": [cls.__HIGHLIGHT_POST_TAG__],
-            "require_field_match": False,
+            "require_field_match": True,
             "fields": {
                 "text": {},
-                # TODO: `words` will be removed once the migration will be completed.
-                #  This configuration is included just for old datasets records
+                "text.*": {},
+                # TODO(@frascuchon): `words` will be removed in version 0.16.0
                 "words": {},
                 **({"inputs.*": {}} if task == TaskType.text_classification else {}),
             },
diff --git a/src/rubrix/server/elasticseach/query_helpers.py b/src/rubrix/server/elasticseach/query_helpers.py
index e692616970..f33889f9e8 100644
--- a/src/rubrix/server/elasticseach/query_helpers.py
+++ b/src/rubrix/server/elasticseach/query_helpers.py
@@ -260,8 +260,15 @@ def text_query(text_query: Optional[str]) -> Dict[str, Any]:
                         "query": text_query,
                     }
                 },
+                {
+                    "query_string": {
+                        "default_field": "text",
+                        "default_operator": "AND",
+                        "query": text_query,
+                    }
+                },
             ],
-            minimum_should_match="50%",
+            minimum_should_match="30%",
         )
 
     @staticmethod
diff --git a/tests/functional_tests/test_log_for_text_classification.py b/tests/functional_tests/test_log_for_text_classification.py
index 28d3ae3ed0..1bb5201412 100644
--- a/tests/functional_tests/test_log_for_text_classification.py
+++ b/tests/functional_tests/test_log_for_text_classification.py
@@ -73,7 +73,7 @@ def test_search_keywords(mocked_client):
             for keyword in keywords
         ]
     )
-    assert {"limit", "limits", "limit?"} == top_keywords, top_keywords
+    assert top_keywords == {"limits", "limited", "limit"}, top_keywords
 
 
 def test_log_records_with_empty_metadata_list(mocked_client):
diff --git a/tests/server/text_classification/test_model.py b/tests/server/text_classification/test_model.py
index 2d727dac98..5871f162e4 100644
--- a/tests/server/text_classification/test_model.py
+++ b/tests/server/text_classification/test_model.py
@@ -286,16 +286,18 @@ def test_query_with_uncovered_by_rules():
             "must": {"match_all": {}},
             "must_not": {
                 "bool": {
+                    "minimum_should_match": 1,
                     "should": [
                         {
                             "bool": {
+                                "minimum_should_match": "30%",
                                 "should": [
                                     {
                                         "query_string": {
+                                            "boost": "2.0",
                                             "default_field": "words",
                                             "default_operator": "AND",
                                             "query": "query",
-                                            "boost": "2.0",
                                         }
                                     },
                                     {
@@ -305,19 +307,26 @@ def test_query_with_uncovered_by_rules():
                                             "query": "query",
                                         }
                                     },
+                                    {
+                                        "query_string": {
+                                            "default_field": "text",
+                                            "default_operator": "AND",
+                                            "query": "query",
+                                        }
+                                    },
                                 ],
-                                "minimum_should_match": "50%",
                             }
                         },
                         {
                             "bool": {
+                                "minimum_should_match": "30%",
                                 "should": [
                                     {
                                         "query_string": {
+                                            "boost": "2.0",
                                             "default_field": "words",
                                             "default_operator": "AND",
                                             "query": "other*",
-                                            "boost": "2.0",
                                         }
                                     },
                                     {
@@ -327,12 +336,17 @@ def test_query_with_uncovered_by_rules():
                                             "query": "other*",
                                         }
                                     },
+                                    {
+                                        "query_string": {
+                                            "default_field": "text",
+                                            "default_operator": "AND",
+                                            "query": "other*",
+                                        }
+                                    },
                                 ],
-                                "minimum_should_match": "50%",
                             }
                         },
                     ],
-                    "minimum_should_match": 1,
                 }
             },
         }