Skip to content

Commit

Permalink
Allow ? in domain of a wildcard url (#5310)
Browse files Browse the repository at this point in the history
* Allow ? inside netloc of url

Allow ?'s inside the netloc of a url like:
https://example.com?foo=bar
https://example.com?.

Add additional tests and re-organize tests so that there is a "flow"
to the test cases and it's easier to see which cases are being covered.

* Update docs to reflect ? in domain is permitted

* fixup: remove unreachable check for * in scheme
  • Loading branch information
Hannah Stepanek committed Oct 1, 2018
1 parent 334e5f8 commit 95ac584
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 28 deletions.
5 changes: 3 additions & 2 deletions docs/_extra/api-reference/hypothesis.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -380,8 +380,9 @@ paths:
PDF fingerprint.
`*` will match any character sequence (including an empty one),
and a `?` will match any single character. Wildcards are only permitted
within the path and query parts of the URI.
and a `?` will match any single character. `*`s are only permitted
within the path and query parts of the URI and `?`s are only permitted
within the domain, path, and query parts of the URI.
Escaping wildcards is not supported.
Expand Down
5 changes: 3 additions & 2 deletions h/schemas/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,8 +439,9 @@ class SearchParamsSchema(colander.Schema):
PDF fingerprint.
`*` will match any character sequence (including an empty one),
and a `?` will match any single character. Wildcards are only permitted
within the path and query parts of the URI.
and a `?` will match any single character. `*`s are only permitted
within the path and query parts of the URI and `?`s are only permitted
within the domain, path, and query parts of the URI.
Escaping wildcards is not supported.
Expand Down
28 changes: 11 additions & 17 deletions h/search/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,26 +22,20 @@ def wildcard_uri_is_valid(wildcard_uri):
"""
Return True if uri contains wildcards in appropriate places, return False otherwise.
Wildcards are not permitted in the scheme or netloc of the uri.
*'s are not permitted in the scheme or netloc. ?'s are not permitted in the scheme.
"""
if "*" not in wildcard_uri and "?" not in wildcard_uri:
return False
try:
normalized_uri = urlparse.urlparse(wildcard_uri.replace("*", "").replace("?", ""))

# Remove all parts of the url except the scheme, netloc, and provide a substitute
# path value "p" so that uri's that only have a scheme and path are still valid.
uri_parts = (normalized_uri.scheme, normalized_uri.netloc, "p", "", "", "")

# Remove the "p" standing for path from the end of the uri.
begining_of_uri = urlparse.urlunparse(uri_parts)[:-1]

# If a wildcard was in the scheme the uri may come back as "" (a falsey value).
if begining_of_uri and wildcard_uri.startswith(begining_of_uri):
return True
except ValueError:
pass
return False

normalized_uri = urlparse.urlparse(wildcard_uri.replace("?", ""))
if not normalized_uri.scheme or "*" in normalized_uri.netloc:
return False

normalized_uri = urlparse.urlparse(wildcard_uri.replace("*", ""))
if not normalized_uri.scheme:
return False

return True


def popall(multidict, key):
Expand Down
14 changes: 7 additions & 7 deletions tests/h/search/query_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,21 +582,21 @@ def _get_search(self, search, pyramid_request, separate_keys):


@pytest.mark.parametrize('wildcard_uri,expected', [
("http?://bar.com", False),
("htt*://bar.com", False),
("*http://bar.com", False),
("http?://bar.com", False),
("http://bar?com*", False),
("?http://bar.com", False),
("http://localhost:3000*", False),
("http://bar*.com", False),
("http://bar?com", False),
("*?http://bar.com", False),
("file://*", False),
("https://foo.com", False),
("http://foo.com*", False),
("urn:*", True),
("urn:x-pdf:*", True),
("http://foo.com/*", True),
("urn:*", True),
("http://bar.com?foo=baz", True),
("doi:10.101?", True),
("http://*.org/*", False),
("http://example.*", False),
("http://example.com?", True),
])
def test_identifies_wildcard_uri_is_valid(wildcard_uri, expected):
assert query.wildcard_uri_is_valid(wildcard_uri) == expected
Expand Down

0 comments on commit 95ac584

Please sign in to comment.