Skip to content

Commit

Permalink
fix(#1533): restrict highlighted fields (#1544)
Browse files Browse the repository at this point in the history
  • Loading branch information
frascuchon committed Jun 7, 2022
1 parent 56ae851 commit ff39f7e
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 10 deletions.
6 changes: 3 additions & 3 deletions src/rubrix/server/daos/records.py
Expand Up @@ -436,11 +436,11 @@ def __configure_query_highlight__(cls, task: TaskType):
return {
"pre_tags": [cls.__HIGHLIGHT_PRE_TAG__],
"post_tags": [cls.__HIGHLIGHT_POST_TAG__],
"require_field_match": False,
"require_field_match": True,
"fields": {
"text": {},
# TODO: `words` will be removed once the migration will be completed.
# This configuration is included just for old datasets records
"text.*": {},
# TODO(@frascuchon): `words` will be removed in version 0.16.0
"words": {},
**({"inputs.*": {}} if task == TaskType.text_classification else {}),
},
Expand Down
9 changes: 8 additions & 1 deletion src/rubrix/server/elasticseach/query_helpers.py
Expand Up @@ -260,8 +260,15 @@ def text_query(text_query: Optional[str]) -> Dict[str, Any]:
"query": text_query,
}
},
{
"query_string": {
"default_field": "text",
"default_operator": "AND",
"query": text_query,
}
},
],
minimum_should_match="50%",
minimum_should_match="30%",
)

@staticmethod
Expand Down
2 changes: 1 addition & 1 deletion tests/functional_tests/test_log_for_text_classification.py
Expand Up @@ -73,7 +73,7 @@ def test_search_keywords(mocked_client):
for keyword in keywords
]
)
assert {"limit", "limits", "limit?"} == top_keywords, top_keywords
assert top_keywords == {"limits", "limited", "limit"}, top_keywords


def test_log_records_with_empty_metadata_list(mocked_client):
Expand Down
24 changes: 19 additions & 5 deletions tests/server/text_classification/test_model.py
Expand Up @@ -286,16 +286,18 @@ def test_query_with_uncovered_by_rules():
"must": {"match_all": {}},
"must_not": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"bool": {
"minimum_should_match": "30%",
"should": [
{
"query_string": {
"boost": "2.0",
"default_field": "words",
"default_operator": "AND",
"query": "query",
"boost": "2.0",
}
},
{
Expand All @@ -305,19 +307,26 @@ def test_query_with_uncovered_by_rules():
"query": "query",
}
},
{
"query_string": {
"default_field": "text",
"default_operator": "AND",
"query": "query",
}
},
],
"minimum_should_match": "50%",
}
},
{
"bool": {
"minimum_should_match": "30%",
"should": [
{
"query_string": {
"boost": "2.0",
"default_field": "words",
"default_operator": "AND",
"query": "other*",
"boost": "2.0",
}
},
{
Expand All @@ -327,12 +336,17 @@ def test_query_with_uncovered_by_rules():
"query": "other*",
}
},
{
"query_string": {
"default_field": "text",
"default_operator": "AND",
"query": "other*",
}
},
],
"minimum_should_match": "50%",
}
},
],
"minimum_should_match": 1,
}
},
}
Expand Down

0 comments on commit ff39f7e

Please sign in to comment.