From 5f421b2645738ad7a531f8774000367bdee729dc Mon Sep 17 00:00:00 2001 From: MJedr Date: Thu, 29 Oct 2020 14:30:37 +0100 Subject: [PATCH] visitor: use proper queries for complex queries without keyword Ref: inspirehep/inspirehep#1472 --- .../visitors/elastic_search_visitor.py | 6 +- tests/test_elastic_search_visitor.py | 64 +++++++++++++++++++ 2 files changed, 67 insertions(+), 3 deletions(-) diff --git a/inspire_query_parser/visitors/elastic_search_visitor.py b/inspire_query_parser/visitors/elastic_search_visitor.py index 30be451..4720c56 100644 --- a/inspire_query_parser/visitors/elastic_search_visitor.py +++ b/inspire_query_parser/visitors/elastic_search_visitor.py @@ -653,7 +653,7 @@ def visit_empty_query(self, node): return {'match_all': {}} def visit_value_op(self, node): - return generate_match_query('_all', node.op.value, with_operator_and=True) + return node.op.accept(self) def visit_malformed_query(self, node): return self._generate_malformed_query(node) @@ -790,7 +790,7 @@ def handle_author_query(self, node, fieldnames=None): def visit_value(self, node, fieldnames=None): if not fieldnames: - fieldnames = '_all' + return generate_match_query('_all', node.value, with_operator_and=True) if node.contains_wildcard: return self.handle_value_wildcard(node, fieldnames=fieldnames) @@ -941,7 +941,7 @@ def visit_partial_match_value(self, node, fieldnames=None): return wrap_query_in_nested_if_field_is_nested(query, fieldnames, self.NESTED_FIELDS) - def visit_regex_value(self, node, fieldname): + def visit_regex_value(self, node, fieldname="_all"): query = { 'regexp': { fieldname: node.value diff --git a/tests/test_elastic_search_visitor.py b/tests/test_elastic_search_visitor.py index 98f46c1..8bfeccb 100644 --- a/tests/test_elastic_search_visitor.py +++ b/tests/test_elastic_search_visitor.py @@ -3552,3 +3552,67 @@ def test_wildcard_queries_are_nested_for_nested_fields(): generated_es_query = _parse_query(query_str) assert generated_es_query == expected_es_query + + +def test_regex_search_works_without_keyword(): + query_str = '/inve/' + expected_es_query = { + "regexp": { + "_all": "inve" + } + } + + generated_es_query = _parse_query(query_str) + assert generated_es_query == expected_es_query + + +def test_exact_match_works_without_keyword(): + query_str = '"invenio"' + expected_es_query = { + "match_phrase": { + "_all": "invenio" + } + } + + generated_es_query = _parse_query(query_str) + assert generated_es_query == expected_es_query + + +def test_partial_match_works_without_keyword(): + query_str = "'invenio'" + expected_es_query = { + "query_string": { + "query": "*invenio*", + "default_field": "_all", + "analyze_wildcard": True + } + } + + generated_es_query = _parse_query(query_str) + assert generated_es_query == expected_es_query + + +def test_exact_match_works_without_keyword_in_complex_query(): + query_str = '"invenio" something' + expected_es_query = { + "bool": { + "must": [ + { + "match_phrase": { + "_all": "invenio" + } + }, + { + "match": { + "_all": { + "query": "something", + "operator": "and" + } + } + } + ] + } + } + + generated_es_query = _parse_query(query_str) + assert generated_es_query == expected_es_query