Skip to content

Commit

Permalink
views: query factory support
Browse files Browse the repository at this point in the history
* Add support for customizing query generation and thereby support
  multiple query parsers.

* Adds query factory for Elasticsearch Lucene syntax.

Signed-off-by: Lars Holm Nielsen <lars.holm.nielsen@cern.ch>
  • Loading branch information
lnielsen committed Feb 16, 2016
1 parent 76d031f commit f1577f8
Show file tree
Hide file tree
Showing 3 changed files with 174 additions and 10 deletions.
86 changes: 86 additions & 0 deletions invenio_records_rest/query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2016 CERN.
#
# Invenio is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA 02111-1307, USA.
#
# In applying this license, CERN does not
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.

"""Query factories for REST API."""

from __future__ import absolute_import, print_function

from flask import current_app, request
from invenio_search import Query

from .errors import InvalidQueryRESTError


def default_query_factory(index, page, size):
"""Parse and slice query using Invenio-Query-Parser.
:param index: Index to search in.
:param page: Requested page.
:param size: Request results size.
:returns: Tuple of (query, URL arguments).
"""
query_string = request.values.get('q', '')

try:
query = Query(query_string)[(page-1)*size:page*size]
except SyntaxError:
current_app.logger.debug(
"Failed parsing query: {0}".format(
request.values.get('q', '')),
exc_info=True)
raise InvalidQueryRESTError()

return (query, {'q': query_string})


def es_query_factory(index, page, size):
"""Send query directly as query string query to Elasticsearch.
.. warning:
All fields in a record that a user can access are searchable! This means
that if a user can access a record, you cannot include confidential
information into the record (or you must remove it when indexing).
Otherwise a user is able to search for the information.
The reason is that the query string is passed directly to Elasticsearch,
which takes care of parsing the string.
:param index: Index to search in.
:param page: Requested page.
:param size: Request results size.
:returns: Tuple of (query, URL arguments).
"""
query_string = request.values.get('q', '')

query = Query()
if query_string.strip():
query.body['query'] = dict(
query_string=dict(
query=query_string,
allow_leading_wildcard=False,
)
)
query = query[(page-1)*size:page*size]
return (query, {'q': query_string})
23 changes: 13 additions & 10 deletions invenio_records_rest/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,16 @@
from invenio_records.api import Record
from invenio_rest import ContentNegotiatedMethodView
from invenio_rest.decorators import require_content_types
from invenio_search import Query, current_search_client
from invenio_search import current_search_client
from jsonpatch import JsonPatchException, JsonPointerException
from sqlalchemy.exc import SQLAlchemyError
from werkzeug.local import LocalProxy
from werkzeug.routing import BuildError
from werkzeug.utils import import_string

from .errors import InvalidQueryRESTError, MaxResultWindowRESTError
from .errors import MaxResultWindowRESTError
from .facets import default_facets_factory
from .query import default_query_factory
from .sorter import default_sorter_factory

current_records_rest = LocalProxy(
Expand Down Expand Up @@ -81,7 +82,8 @@ def create_url_rules(endpoint, list_route=None, item_route=None,
search_index=None, search_type=None,
default_media_type=None,
max_result_window=None, use_options_view=True,
facets_factory_imp=None, sorter_factory_imp=None):
facets_factory_imp=None, sorter_factory_imp=None,
query_factory_imp=None):
"""Create Werkzeug URL rules.
:param endpoint: Name of endpoint.
Expand Down Expand Up @@ -159,6 +161,9 @@ def create_url_rules(endpoint, list_route=None, item_route=None,
sorter_factory=(
import_string(sorter_factory_imp) if sorter_factory_imp
else default_sorter_factory),
query_factory=(
import_string(query_factory_imp) if query_factory_imp
else default_query_factory),
)
item_view = RecordResource.as_view(
RecordResource.view_name.format(endpoint),
Expand Down Expand Up @@ -316,7 +321,7 @@ def __init__(self, resolver=None, minter_name=None, pid_type=None,
search_type=None, record_serializers=None,
search_serializers=None, default_media_type=None,
max_result_window=None, facets_factory=None,
sorter_factory=None, **kwargs):
sorter_factory=None, query_factory=None, **kwargs):
"""Constructor."""
super(RecordsListResource, self).__init__(
method_serializers={
Expand All @@ -340,6 +345,7 @@ def __init__(self, resolver=None, minter_name=None, pid_type=None,
self.max_result_window = max_result_window or 10000
self.facets_factory = facets_factory
self.sorter_factory = sorter_factory
self.query_factory = query_factory

def get(self, **kwargs):
"""Search records.
Expand All @@ -352,15 +358,12 @@ def get(self, **kwargs):
if page*size >= self.max_result_window:
raise MaxResultWindowRESTError()

# Parse and slice query
try:
query = Query(request.values.get('q', ''))[(page-1)*size:page*size]
except SyntaxError:
raise InvalidQueryRESTError()

# Arguments that must be added in prev/next links
urlkwargs = dict()

query, qs_kwargs = self.query_factory(self.search_index, page, size)
urlkwargs.update(qs_kwargs)

# Facets
query, qs_kwargs = self.facets_factory(query, self.search_index)
urlkwargs.update(qs_kwargs)
Expand Down
75 changes: 75 additions & 0 deletions tests/test_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2016 CERN.
#
# Invenio is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA 02111-1307, USA.
#
# In applying this license, CERN does not
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.

"""Test query factories."""

from __future__ import absolute_import, print_function

import pytest

from invenio_records_rest.errors import InvalidQueryRESTError
from invenio_records_rest.query import default_query_factory, es_query_factory


def test_default_query_factory(app, user_factory):
"""Test default query factory."""
app.config.update(dict(SEARCH_QUERY_ENHANCERS=[]))
with app.test_request_context("?q=test"):
query, urlargs = default_query_factory('myindex', 1, 10)
assert query.body['query'] == dict(
multi_match=dict(
fields=['_all'],
query='test',
))
assert query.body['from'] == 0
assert query.body['size'] == 10
assert urlargs['q'] == 'test'

with app.test_request_context("?q=:"):
pytest.raises(
InvalidQueryRESTError,
default_query_factory, 'myindex', 1, 10)


def test_es_query_factory(app, user_factory):
"""Test es query factory."""
app.config.update(dict(SEARCH_QUERY_ENHANCERS=[]))
with app.test_request_context("?q=test"):
query, urlargs = es_query_factory('myindex', 2, 20)
assert query.body['query'] == dict(
query_string=dict(
query="test",
allow_leading_wildcard=False,
)
)
assert query.body['from'] == 20
assert query.body['size'] == 20
assert urlargs['q'] == 'test'

with app.test_request_context("?q="):
query, urlargs = es_query_factory('myindex', 2, 20)
assert query.body['query'] == dict(match_all={})
assert query.body['from'] == 20
assert query.body['size'] == 20
assert urlargs['q'] == ''

0 comments on commit f1577f8

Please sign in to comment.