Skip to content

Commit

Permalink
WebSearch: SearchQueryParenthesisedParser rewrite
Browse files Browse the repository at this point in the history
* New SQPP supports parenthetic subexpressions nested to arbitrary depth.

* Unit tests corrected to reflect both the new support for parentheses and
  because some of them should not have been correct with the old parser,
  either.
  (fixes #131)
  (fixes #67)
  (fixes #181)
  (fixes #189)
  (fixes #190)
  (fixes #191)

* Introduction of logicutils unit tests.
  • Loading branch information
Joe Blaylock authored and tiborsimko committed Sep 14, 2010
1 parent 4722b8a commit 1fc28f3
Show file tree
Hide file tree
Showing 6 changed files with 393 additions and 360 deletions.
1 change: 1 addition & 0 deletions modules/miscutil/lib/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ pylib_DATA = __init__.py \
dbquery.py \
dbquery_tests.py \
logicutils.py \
logicutils_tests.py \
mailutils.py \
miscutil_config.py \
messages.py \
Expand Down
64 changes: 64 additions & 0 deletions modules/miscutil/lib/logicutils_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# -*- coding: utf-8 -*-
"""Unit tests for logic library."""

import unittest

from invenio.logic import expr, Expr, to_cnf, pl_true
from invenio.testutils import make_test_suite, run_test_suite

class exprExprOpsTest(unittest.TestCase):
"""Testing expr and Expr against one another."""

def test_trivial_expr(self):
"""logicutils - create trivial Expr with expr()"""
self.assertEqual(expr('a | b'), Expr('|', 'a', 'b'))

def test_deep_expr(self):
"""logicutils - create deep Expr with expr()"""
self.assertEqual(expr('a | b | c | d | e'),
Expr('|', Expr('|', Expr('|', Expr('|', 'a', 'b'), 'c'), 'd'), 'e'))


class toCNFTest(unittest.TestCase):
"""Testing conversion to conjunctive normal form"""

def test_singleton(self):
"""logicutils - singletons are already in CNF"""
self.assertEqual(to_cnf(expr('a')),
Expr('a'))

def test_complex_example_Norvig(self):
"""logicutils - (P&Q) | (~P & ~Q) in CNF"""
self.assertEqual(str(to_cnf('(P&Q) | (~P & ~Q)')),
str('((~P | P) & (~Q | P) & (~P | Q) & (~Q | Q))'))

def test_ORed_pair(self):
"""logicutils - ORed pair should be in CNF"""
self.assertEqual(to_cnf(expr('a | b')),
Expr('|', 'a', 'b'))

def test_ANDed_pair(self):
"""logicutils - ANDed pair should be in CNF"""
self.assertEqual(to_cnf(expr('a & b')),
Expr('&', 'a', 'b'))


class prop_logicTest(unittest.TestCase):
"""Testing basic propositional logic functionality"""
P = Expr('P')

def test_pl_true_P_true(self):
"""logicutils - True thing is evaluated as such"""
self.assertEqual(pl_true(self.P, {self.P: True}),
True)

def test_pl_true_P_false(self):
"""logicutils - False thing is evaluated as such"""
self.assertEqual(pl_true(self.P, {self.P: False}),
False)


TEST_SUITE = make_test_suite(exprExprOpsTest, toCNFTest, prop_logicTest)

if __name__ == "__main__":
run_test_suite(TEST_SUITE)
8 changes: 4 additions & 4 deletions modules/websearch/lib/search_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
from invenio.webpage import pageheaderonly, pagefooteronly, create_error_box
from invenio.messages import gettext_set_language
from invenio.search_engine_query_parser import SearchQueryParenthesisedParser, \
InvenioWebSearchQueryParserException, SpiresToInvenioSyntaxConverter
InvenioWebSearchMismatchedParensError, SpiresToInvenioSyntaxConverter

from invenio import webinterface_handler_config as apache

Expand Down Expand Up @@ -1931,12 +1931,12 @@ def search_pattern_parenthesised(req=None, p=None, f=None, m=None, ap=0, of="id"
return result_hitset

# If searching with parenteses fails, perform search ignoring parentheses
except InvenioWebSearchQueryParserException:
except SyntaxError:

print_warning(req, _("Nested or mismatched parentheses detected. Ignoring all parentheses in the query..."))
print_warning(req, _("Search syntax misunderstood. Ignoring all parentheses in the query. If this doesn't help, please check your search and try again."))

# remove the parentheses in the query. Current implementation removes all the parentheses,
# but it could be improved to romove only these that are not insede quotes
# but it could be improved to romove only these that are not inside quotes
p = p.replace('(', ' ')
p = p.replace(')', ' ')

Expand Down
Loading

0 comments on commit 1fc28f3

Please sign in to comment.