Skip to content

Commit

Permalink
WebSearch: SPIRES syntax date robustness fix
Browse files Browse the repository at this point in the history
* Less buggy regular expression for matching dates in SPIRES syntax converter,
  and additional unit tests.
  (fixes #323)
  • Loading branch information
Joe Blaylock authored and tiborsimko committed Nov 12, 2010
1 parent d4b69e5 commit dc864c5
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 6 deletions.
11 changes: 5 additions & 6 deletions modules/websearch/lib/search_engine_query_parser.py
Expand Up @@ -99,7 +99,7 @@ def parse_query(self, query):
parse_query() is a wrapper for self.tokenize() and self.parse().
"""
toklist = self.tokenize(query)
depth, balanced, d0_p = self.nesting_depth_and_balance(toklist)
depth, balanced, dummy_d0_p = self.nesting_depth_and_balance(toklist)
if not balanced:
raise SyntaxError("Mismatched parentheses in "+str(toklist))
toklist, var_subs = self.substitute_variables(toklist)
Expand Down Expand Up @@ -196,7 +196,7 @@ def logically_reduce(self, token_list):
the not in -(p | q) will be fully distributed (as -p + -q).
"""

maxdepth, balanced, d0_p = self.nesting_depth_and_balance(token_list)
maxdepth, dummy_balanced, d0_p = self.nesting_depth_and_balance(token_list)
s = ' '.join(token_list)
s = self._invenio_to_python_logical(s)
last_maxdepth = 0
Expand All @@ -206,7 +206,7 @@ def logically_reduce(self, token_list):
except SyntaxError:
raise SyntaxError(str(s)+" couldn't be converted to a logic expression.")
last_maxdepth = maxdepth
maxdepth, balanced, d0_p = self.nesting_depth_and_balance(self.tokenize(s))
maxdepth, dummy_balanced, d0_p = self.nesting_depth_and_balance(self.tokenize(s))
if d0_p == 1 and s[0] == '(' and s[-1] == ')': # s can come back with extra parens
s = s[1:-1]
s = self._python_logical_to_invenio(s)
Expand Down Expand Up @@ -596,7 +596,8 @@ def _compile_regular_expressions(self):

# regular expression that matches date searches which have been
# keyword-substituted
self._re_keysubbed_date_expr = re.compile(r'\b(?P<term>(' + self._DATE_ADDED_FIELD + ')|(' + self._DATE_UPDATED_FIELD + ')|(' + self._DATE_FIELD + '))\s*(?P<content>.+)(?= and not | and | or | not |$)', re.IGNORECASE)
#self._re_keysubbed_date_expr = re.compile(r'\b(?P<term>(' + self._DATE_ADDED_FIELD + ')|(' + self._DATE_UPDATED_FIELD + ')|(' + self._DATE_FIELD + '))\s*(?P<content>.+)(?= and not | and | or | not |$)', re.IGNORECASE)
self._re_keysubbed_date_expr = re.compile(r'\b(?P<term>(' + self._DATE_ADDED_FIELD + ')|(' + self._DATE_UPDATED_FIELD + ')|(' + self._DATE_FIELD + '))(?P<content>.+?)(?= and not | and | or | not |$)', re.IGNORECASE)

# for finding (and changing) a variety of different SPIRES search keywords
self._re_spires_find_keyword = re.compile('^(?P<find>f|fin|find)\s+(?P<query>.*)$', re.IGNORECASE)
Expand Down Expand Up @@ -727,8 +728,6 @@ def mangle_with_dateutils(query):
isodates.append(datestamp)

daterange = '->'.join(isodates)
#if re.search('[^\s]+-[^>][^\s]*', daterange):
# daterange = '"' + daterange + '"'
result += match.group('term') + daterange
position = match.end()
result += query[position : ]
Expand Down
12 changes: 12 additions & 0 deletions modules/websearch/lib/search_engine_query_parser_tests.py
Expand Up @@ -528,6 +528,18 @@ def test_date_by_yr_mo(self):
inv_search = 'year:1976-04'
self._compare_searches(inv_search, spi_search)

def test_date_by_yr_mo_day_wholemonth_and_suffix(self):
"""SPIRES search syntax - searching by date 1976-04-01 and t dog"""
spi_search = "find date 1976-04-01 and t dog"
inv_search = 'year:1976-04 and title:dog'
self._compare_searches(inv_search, spi_search)

def test_date_by_yr_mo_day_and_suffix(self):
"""SPIRES search syntax - searching by date 1976-04-05 and t dog"""
spi_search = "find date 1976-04-05 and t dog"
inv_search = 'year:1976-04-05 and title:dog'
self._compare_searches(inv_search, spi_search)

def test_date_by_eq_yr_mo(self):
"""SPIRES search syntax - searching by date 1976-04"""
spi_search = "find date 1976-04"
Expand Down

0 comments on commit dc864c5

Please sign in to comment.