Skip to content

Commit

Permalink
Merge pull request #8053 from ckan/solr-local-params
Browse files Browse the repository at this point in the history
Allow alternate Solr query parsers via allow list
  • Loading branch information
amercader committed Mar 11, 2024
2 parents f42547b + 1e3b6d0 commit 6992ec2
Show file tree
Hide file tree
Showing 6 changed files with 190 additions and 12 deletions.
1 change: 1 addition & 0 deletions changes/8053.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Define allowed alternative Solr query parsers via the :ref:`ckan.search.solr_allowed_query_parsers` config option
9 changes: 9 additions & 0 deletions ckan/config/config_declaration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -892,6 +892,15 @@ groups:
enabled to improve dataset update/create speed (however there may be
a slight delay before dataset gets seen in results).
- key: ckan.search.solr_allowed_query_parsers
type: list
default: []
example: ["bool", "knn"]
description: |
Local parameters are not allowed when passing queries to Solr. An exception to this is when passing local parameters for special query parsers, that need to be enabled explicitly using this config option. For instance, the example provided would allow sending queries like the following::
search_params["q"] = "{!bool must=test}..."
search_params["q"] = "{!knn field=vector topK=10}..."
- key: ckan.search.show_all_types
default: dataset
example: dataset
Expand Down
73 changes: 62 additions & 11 deletions ckan/lib/search/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
import re
import logging
from typing import Any, NoReturn, Optional, Union, cast, Dict
from pyparsing import (
Word, QuotedString, Suppress, OneOrMore, Group, alphanums
)
from pyparsing.exceptions import ParseException
import pysolr

from ckan.common import asbool
Expand Down Expand Up @@ -89,6 +93,51 @@ def convert_legacy_parameters_to_solr(
return solr_params


def _parse_local_params(local_params: str) -> list[Union[str, list[str]]]:
"""
Parse a local parameters section as return it as a list, eg:
{!dismax qf=myfield v='some value'} -> ['dismax', ['qf', 'myfield'], ['v', 'some value']]
{!type=dismax qf=myfield v='some value'} -> [['type', 'dismax'], ['qf', 'myfield'], ['v', 'some value']]
"""
key = Word(alphanums + "_")
value = QuotedString('"') | QuotedString("'") | Word(alphanums + "_$")
pair = Group(key + Suppress("=") + value)
expression = Suppress("{!") + OneOrMore(pair | key) + Suppress("}")

return expression.parse_string(local_params).as_list()


def _get_local_query_parser(q: str) -> str:
"""
Given a Solr parameter, extract any custom query parsers used in the
local parameters, .e.g. q={!child ...}...
"""
qp_type = ""
q = q.strip()
if not q.startswith("{!"):
return qp_type

try:
local_params = q[:q.rindex("}") + 1]
parts = _parse_local_params(local_params)
except (ParseException, ValueError) as e:
raise SearchQueryError(f"Could not parse incoming local parameters: {e}")

if isinstance(parts[0], str):
# Most common form of defining the query parser type e.g. {!knn ...}
qp_type = parts[0]
else:
# Alternative syntax e.g. {!type=knn ...}
type_part = [p for p in parts if p[0] == "type"]
if type_part:
qp_type = type_part[0][1]
return qp_type


class QueryOptions(Dict[str, Any]):
"""
Options specify aspects of the search query which are only tangentially related
Expand Down Expand Up @@ -295,12 +344,6 @@ def get_index(self, reference: str) -> dict[str, Any]:
'wt': 'json',
'fq': 'site_id:"%s" ' % config.get('ckan.site_id') + '+entity_type:package'}

try:
if query['q'].startswith('{!'):
raise SearchError('Local parameters are not supported.')
except KeyError:
pass

conn = make_connection(decode_dates=False)
log.debug('Package query: %r' % query)
try:
Expand Down Expand Up @@ -397,11 +440,19 @@ def run(self,

query.setdefault("df", "text")
query.setdefault("q.op", "AND")
try:
if query['q'].startswith('{!'):
raise SearchError('Local parameters are not supported.')
except KeyError:
pass

def _check_query_parser(param: str, value: Any):
if isinstance(value, str) and value.strip().startswith("{!"):
if not _get_local_query_parser(value) in config["ckan.search.solr_allowed_query_parsers"]:
raise SearchError(f"Local parameters are not supported in param '{param}'.")

for param in query.keys():
if isinstance(query[param], str):
_check_query_parser(param, query[param])
elif isinstance(query[param], list):
for item in query[param]:
_check_query_parser(param, item)


conn = make_connection(decode_dates=False)
log.debug('Package query: %r' % query)
Expand Down
114 changes: 114 additions & 0 deletions ckan/tests/lib/search/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
import pytest

import ckan.config as config
from ckan.lib.search.common import SearchQueryError
import ckan.tests.factories as factories
import ckan.model as model
import ckan.lib.search as search
from ckan.lib.search import check_solr_schema_version, SearchError
from ckan.lib.search.query import _get_local_query_parser

root_dir = os.path.join(os.path.dirname(config.__file__), "solr")
data_dir = os.path.join(os.path.dirname(__file__), "data")
Expand Down Expand Up @@ -98,3 +100,115 @@ def test_04_delete_package_from_dict():
model.repo.commit_and_remove()

assert query.run({"q": ""})["count"] == 1


@pytest.mark.parametrize(
"query,parser",
[
("*:*", ""),
("title:test AND organization:test-org", ""),
("{!bool must=test}", "bool"),
(" {!bool must=test}", "bool"),
("{!bool must='test string'}", "bool"),
("{!bool must='test string'}solr rocks", "bool"),
(" {!bool must='test string'}solr rocks", "bool"),
(" {!bool must='test string'}", "bool"),
("{!bool must='test string with \"quotes\"'}", "bool"),
("{!type=bool must=test}", "bool"),
("{!type=bool must='test string'}", "bool"),
("{!must=test type=bool}", "bool"),
("{!must=test type=bool}solr rocks", "bool"),
("{!must='test text' type=bool}solr rocks", "bool"),
("{!dismax qf=myfield}solr rocks", "dismax"),
("{!type=dismax qf=myfield v='solr rocks'}", "dismax"),
("{!type=lucene df=summary}solr rocks", "lucene"),
("{!v='lies type= here' type=dismax}", "dismax"),
("{!some_parser}", "some_parser"),
("{!dismax v=some_value}", "dismax"),
("{!some_parser a='0.9' traversalFilter='foo:[*+TO+15]'}", "some_parser"),
("{!some_parser must=$ref}", "some_parser"),
]
)
def test_get_local_query_parser(query, parser):

assert _get_local_query_parser(query) == parser


@pytest.mark.parametrize(
"query",
[
"{!v='lies type= here' some params",
"{!v='lies type= here' v2='\\{some test \\} type=dismax}",
]
)
def test_get_local_query_parser_exception(query):

with pytest.raises(SearchQueryError):
_get_local_query_parser(query)


def test_local_params_not_allowed_by_default():

query = search.query_for(model.Package)
with pytest.raises(search.common.SearchError) as e:
query.run({"q": "{!bool must=test}"})

assert str(e.value) == "Local parameters are not supported in param 'q'."


def test_local_params_not_allowed_by_default_different_field():

query = search.query_for(model.Package)
with pytest.raises(search.common.SearchError) as e:
query.run({"fq": "{!bool must=test} +site_id:test.ckan.net"})

assert str(e.value) == "Local parameters are not supported in param 'fq'."


def test_local_params_not_allowed_by_default_different_field_list():

query = search.query_for(model.Package)
with pytest.raises(search.common.SearchError) as e:
query.run({"fq_list": ["+site_id:default", "{!bool must=test}"]})

assert str(e.value) == "Local parameters are not supported in param 'fq_list'."


def test_local_params_with_whitespace_not_allowed_by_default():

query = search.query_for(model.Package)
with pytest.raises(search.common.SearchError) as e:
query.run({"q": " {!bool must=test}"})

assert str(e.value) == "Local parameters are not supported in param 'q'."


@pytest.mark.ckan_config("ckan.search.solr_allowed_query_parsers", "bool")
@pytest.mark.usefixtures("clean_index")
def test_allowed_local_params_via_config_not_defined():

query = search.query_for(model.Package)
with pytest.raises(search.common.SearchError) as e:
query.run({"q": "{!something_else a=test}"})

assert str(e.value) == "Local parameters are not supported in param 'q'."


@pytest.mark.ckan_config("ckan.search.solr_allowed_query_parsers", "bool knn")
@pytest.mark.usefixtures("clean_index")
def test_allowed_local_params_via_config():

factories.Dataset(title="A dataset about bees")
factories.Dataset(title="A dataset about butterflies")
query = search.query_for(model.Package)

assert query.run({"q": "{!bool must=bees}", "defType": "lucene"})["count"] == 1

assert query.run({"q": " {!bool must=bees}", "defType": "lucene"})["count"] == 1

# Alternative syntax
assert query.run({"q": "{!type=bool must=beetles}", "defType": "lucene"})["count"] == 0

assert query.run({"q": "{!must=bees type=bool}", "defType": "lucene"})["count"] == 1
1 change: 1 addition & 0 deletions requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ passlib==1.7.4
polib==1.2.0
psycopg2==2.9.7
PyJWT==2.8.0
pyparsing==3.0.7
python-magic==0.4.27
pysolr==3.9.0
python-dateutil==2.8.2
Expand Down
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,9 @@ psycopg2==2.9.7
pyjwt==2.8.0
# via -r requirements.in
pyparsing==3.0.7
# via packaging
# via
# -r requirements.in
# packaging
pysolr==3.9.0
# via -r requirements.in
python-dateutil==2.8.2
Expand Down

0 comments on commit 6992ec2

Please sign in to comment.