Skip to content

Commit

Permalink
Merge pull request #5286 from hypothesis/add-uri-wildcard-to-api-search
Browse files Browse the repository at this point in the history
Add wildcard_uri parameter to /api/search
  • Loading branch information
Hannah Stepanek committed Sep 18, 2018
2 parents 9094e81 + aacff57 commit b673eeb
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 5 deletions.
21 changes: 21 additions & 0 deletions docs/_extra/api-reference/hypothesis.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,27 @@ paths:
Alias of `uri`.
required: false
type: string
- name: wildcard_uri
in: query
description: |
Limit the results to annotations matching the wildcard URI.
URI can be a URL (a web page address) or a URN representing another
kind of resource such as DOI (Digital Object Identifier) or a
PDF fingerprint.
`*` will match any character sequence (including an empty one),
and a `?` will match any single character. Wildcards are only permitted
within the path and query parts of the URI.
Escaping wildcards is not supported.
Examples of valid uris: `http://foo.com/*` `urn:x-pdf:*` `file://localhost/?bc.pdf`
Examples of invalid uris: `*foo.com` `u?n:*` `file://*` `http://foo.com*`
<mark>This feature is experimental and the API may change.</mark>
required: false
type: string
- name: user
in: query
description: Limit the results to annotations made by the specified user.
Expand Down
33 changes: 32 additions & 1 deletion h/schemas/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,22 @@
from pyramid import i18n

from h.schemas.base import JSONSchema, ValidationError
from h.search.query import LIMIT_DEFAULT, LIMIT_MAX, OFFSET_MAX
from h.search.query import LIMIT_DEFAULT, LIMIT_MAX, OFFSET_MAX, wildcard_uri_is_valid
from h.util import document_claims

_ = i18n.TranslationStringFactory(__package__)


def _validate_wildcard_uri(node, value):
"""Raise if wildcards are within the domain of the uri."""
for val in value:
if not wildcard_uri_is_valid(val):
raise colander.Invalid(
node,
"""Wildcards (? and *) are not permitted within the
domain of wildcard_uri""")


class AnnotationSchema(JSONSchema):

"""Validate an annotation object."""
Expand Down Expand Up @@ -417,6 +427,27 @@ class SearchParamsSchema(colander.Schema):
missing=colander.drop,
description="Alias of uri.",
)
wildcard_uri = colander.SchemaNode(
colander.Sequence(),
colander.SchemaNode(colander.String()),
validator=_validate_wildcard_uri,
missing=colander.drop,
description="""
Limit the results to annotations matching the wildcard URI.
URI can be a URL (a web page address) or a URN representing another
kind of resource such as DOI (Digital Object Identifier) or a
PDF fingerprint.
`*` will match any character sequence (including an empty one),
and a `?` will match any single character. Wildcards are only permitted
within the path and query parts of the URI.
Escaping wildcards is not supported.
Examples of valid uris":" `http://foo.com/*` `urn:x-pdf:*` `file://localhost/?bc.pdf`
Examples of invalid uris":" `*foo.com` `u?n:*` `file://*` `http://foo.com*`
""",
)
any = colander.SchemaNode(
colander.Sequence(),
colander.SchemaNode(colander.String()),
Expand Down
2 changes: 1 addition & 1 deletion h/search/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def wildcard_uri_is_valid(wildcard_uri):
if "*" not in wildcard_uri and "?" not in wildcard_uri:
return False
try:
normalized_uri = urlparse.urlparse(wildcard_uri)
normalized_uri = urlparse.urlparse(wildcard_uri.replace("*", "").replace("?", ""))

# Remove all parts of the url except the scheme, netloc, and provide a substitute
# path value "p" so that uri's that only have a scheme and path are still valid.
Expand Down
4 changes: 2 additions & 2 deletions h/views/api/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import newrelic.agent

from h import search as search_lib
from h.search import UriFilter
from h.search import UriCombinedWildcardFilter
from h import storage
from h.exceptions import PayloadError
from h.events import AnnotationEvent
Expand Down Expand Up @@ -113,7 +113,7 @@ def search(request):
search = search_lib.Search(request,
separate_replies=separate_replies,
stats=stats)
search.append_modifier(UriFilter(request))
search.append_modifier(UriCombinedWildcardFilter(request, separate_keys=True))
result = search.run(params)

svc = request.find_service(name='annotation_json_presentation')
Expand Down
12 changes: 12 additions & 0 deletions tests/h/schemas/annotation_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,7 @@ def test_it_returns_only_known_params(self, schema):
'text': "text me",
'uri': "foobar.com",
'uri.parts': "bbc",
'wildcard_uri': "http://foo.com/*",
'url': "https://foobar.com",
'any': "foo",
'user': "pooky",
Expand All @@ -647,6 +648,7 @@ def test_it_returns_only_known_params(self, schema):
'text': "text me",
'uri': "foobar.com",
'uri.parts': "bbc",
'wildcard_uri': "http://foo.com/*",
'url': "https://foobar.com",
'any': "foo",
'user': "pooky",
Expand Down Expand Up @@ -758,6 +760,16 @@ def test_sets_offset_to_0_if_search_after(self, schema):
assert params["offset"] == 0
assert params["search_after"] == "2009-02-16"

@pytest.mark.parametrize('wildcard_uri', (
"https://localhost:3000*",
"file://localhost*/foo.pdf",
))
def test_raises_if_wildcards_are_in_domain(self, schema, wildcard_uri):
input_params = NestedMultiDict(MultiDict({"wildcard_uri": wildcard_uri}))

with pytest.raises(ValidationError):
validate_query_params(schema, input_params)

@pytest.fixture
def schema(self):
return SearchParamsSchema()
Expand Down
4 changes: 3 additions & 1 deletion tests/h/search/query_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,7 +602,9 @@ def _get_search(self, search, pyramid_request, separate_keys):
("urn:*", True),
("urn:x-pdf:*", True),
("http://foo.com/*", True),
("doi:10.101?", True)
("doi:10.101?", True),
("http://*.org/*", False),
("http://example.*", False),
])
def test_identifies_wildcard_uri_is_valid(wildcard_uri, expected):
assert query.wildcard_uri_is_valid(wildcard_uri) == expected
Expand Down

0 comments on commit b673eeb

Please sign in to comment.