Skip to content

Commit

Permalink
Merge pull request #138 from inspirehep/remove-implicit-and
Browse files Browse the repository at this point in the history
parser: remove implicit `and` for SPIRES-style
  • Loading branch information
michamos committed Sep 9, 2020
2 parents 6e6f3d0 + af11c03 commit e1a8a10
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 19 deletions.
7 changes: 0 additions & 7 deletions inspire_query_parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,13 +277,6 @@ def parse_terminal_token(cls, parser, text):
return text, \
SyntaxError("parsing a keyword (token followed by \":\"): \"" + repr(matched_token) + "\"")

# Attempt to recognize whether current terminal is a non shortened version of Inspire keywords. This is
# done for supporting implicit-and in case of SPIRES style keyword queries. Using the non shortened version
# of the keywords, makes this recognition not eager.
if not parser._parsing_parenthesized_simple_values_expression \
and matched_token in INSPIRE_KEYWORDS_SET:
return text, SyntaxError("parsing a keyword (non shortened INSPIRE keyword)")

result = remaining_text, matched_token
else:
result = text, SyntaxError("expecting match on " + repr(cls.token_regex.pattern))
Expand Down
2 changes: 1 addition & 1 deletion tests/test_elastic_search_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -942,7 +942,7 @@ def test_elastic_search_visitor_with_query_with_malformed_part_and_default_malfo
'inspire_query_parser.visitors.elastic_search_visitor.DEFAULT_ES_OPERATOR_FOR_MALFORMED_QUERIES', ES_SHOULD_QUERY
)
def test_elastic_search_visitor_with_query_with_malformed_part_and_default_malformed_query_op_as_should():
query_str = 'subject astrophysics and author'
query_str = 'subject astrophysics and author:'
expected_es_query = \
{
"bool": {
Expand Down
6 changes: 3 additions & 3 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,14 @@ def test_that_parse_terminal_token_does_not_accept_token_followed_by_colon():
assert returned_unrecognised_text == query_str


def test_that_parse_terminal_token_does_not_accept_non_shortened_inspire_keywords():
def test_that_parse_terminal_token_accepts_non_shortened_inspire_keywords():
query_str = "exact-author"

parser = StatefulParser()

returned_unrecognised_text, returned_result = SimpleValueUnit.parse_terminal_token(parser, query_str)
assert isinstance(returned_result, SyntaxError)
assert returned_unrecognised_text == query_str
assert returned_result == query_str
assert returned_unrecognised_text == ""


# Testing SimpleValueUnit (terminals recognition) cases (no parenthesized SimpleValue).
Expand Down
35 changes: 31 additions & 4 deletions tests/test_parser_functionality.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@
InvenioKeywordQuery(InspireKeyword('cite'),
Value(SimpleValue('M.N.1'))))))))))])
),
("author ellis title boson not title higgs",
("author ellis and title boson not title higgs",
Query([Statement(BooleanQuery(
Expression(SimpleQuery(SpiresKeywordQuery(InspireKeyword(u'author'), Value(SimpleValue(u'ellis'))))),
And(), Statement(BooleanQuery(
Expand Down Expand Up @@ -465,7 +465,7 @@
And(), Statement(
Expression(SimpleQuery(SpiresKeywordQuery(InspireKeyword('title'), Value(SimpleValue('foo'))))))))])
),
('date this month author ellis',
('date this month and author ellis',
Query([Statement(BooleanQuery(
Expression(SimpleQuery(SpiresKeywordQuery(InspireKeyword('date'), Value(SimpleValue('this month'))))),
And(), Statement(Expression(
Expand Down Expand Up @@ -513,10 +513,37 @@
SimpleQuery(SpiresKeywordQuery(InspireKeyword('author'), Value(SimpleValue('o*aigh'))))))))))])
),
# Unrecognized queries
# Queries for implicit "and" removal
('title and foo',
Query([MalformedQueryWords(['title', 'and', 'foo'])])
Query([Statement(Expression(
SimpleQuery(Value(SimpleValueBooleanQuery(SimpleValue('title'), And(), SimpleValue('foo'))))
))])
),
('author takumi doi',
Query([Statement(Expression(
SimpleQuery(SpiresKeywordQuery(InspireKeyword('author'), Value(SimpleValue('takumi doi'))))
))])
),
('title cms and title experiment and date 2008',
Query([Statement(BooleanQuery(
Expression(SimpleQuery(SpiresKeywordQuery(InspireKeyword('title'), Value(SimpleValue('cms'))))),
And(),
Statement(BooleanQuery(
Expression(SimpleQuery(SpiresKeywordQuery(InspireKeyword('title'), Value(SimpleValue('experiment'))))),
And(),
Statement(Expression(SimpleQuery(SpiresKeywordQuery(InspireKeyword('date'), Value(SimpleValue('2008'))))))
))
))])
),
('author:witten title:foo',
Query([Statement(BooleanQuery(
Expression(SimpleQuery(InvenioKeywordQuery(InspireKeyword('author'), Value(SimpleValue('witten'))))),
And(),
Statement(Expression(SimpleQuery(InvenioKeywordQuery(InspireKeyword('title'), Value(SimpleValue('foo'))))))
))])
),
# Unrecognized queries
('title γ-radiation and and',
Query([Statement(Expression(
SimpleQuery(SpiresKeywordQuery(InspireKeyword('title'), Value(SimpleValue('\u03b3-radiation')))))),
Expand Down
28 changes: 24 additions & 4 deletions tests/test_restructuring_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@
# ##### Boolean operators at terminals level ####
(
'author ellis title boson not higgs',
'author ellis title:boson not higgs',
AndOp(
KeywordOp(Keyword('author'), Value('ellis')),
AndOp(
Expand Down Expand Up @@ -410,14 +410,14 @@
)
),
(
'date this month author ellis',
'date this month and author ellis',
AndOp(
KeywordOp(Keyword('date'), Value(str(date.today()))),
KeywordOp(Keyword('author'), Value('ellis'))
)
),
(
'date this month - 3 author ellis',
'date this month - 3 and author ellis',
AndOp(
KeywordOp(Keyword('date'), Value(str(date.today() - relativedelta(months=3)))),
KeywordOp(Keyword('author'), Value('ellis'))
Expand Down Expand Up @@ -477,8 +477,28 @@
KeywordOp(Keyword('texkeys'), Value('Hirata:1992*', contains_wildcard=True))
),
# Queries for implicit "and" removal
('title and foo', AndOp(ValueOp(Value('title')), ValueOp(Value('foo')))),
('author takumi doi', KeywordOp(Keyword('author'), Value('takumi doi'))),
(
'title cms and title experiment and date 2008',
AndOp(
KeywordOp(Keyword('title'), Value('cms')),
AndOp(
KeywordOp(Keyword('title'), Value('experiment')),
KeywordOp(Keyword('date'), Value('2008'))
)
)
),
(
'author:witten title:foo',
AndOp(
KeywordOp(Keyword('author'), Value('witten')),
KeywordOp(Keyword('title'), Value('foo'))
)
),
# Unrecognized queries
('title and foo', MalformedQuery(['title', 'and', 'foo'])),
(
'title γ-radiation and and',
QueryWithMalformedPart(
Expand Down

0 comments on commit e1a8a10

Please sign in to comment.