Skip to content

Commit

Permalink
Merge pull request #5429 from hypothesis/normalize-uris-in-websocket-…
Browse files Browse the repository at this point in the history
…filtering

Normalize URIs for comparison when filtering Web Socket messages
  • Loading branch information
robertknight committed Nov 22, 2018
2 parents c692303 + e1ec22f commit c5f8bc3
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 11 deletions.
19 changes: 15 additions & 4 deletions h/streamer/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import unicodedata

from jsonpointer import resolve_pointer
from h.util.uri import normalize as normalize_uri
from h._compat import text_type

SCHEMA = {
Expand Down Expand Up @@ -50,15 +51,25 @@ def evaluate_clause(self, clause, target):

filter_term = clause['value']

def normalize(term):
# Apply generic normalization.
normalized = uni_fold(term)

# Apply field-specific normalization.
if clause['field'] == '/uri':
normalized = normalize_uri(term)

return normalized

if isinstance(filter_term, list):
filter_term = [t for t in uni_fold(filter_term)]
filter_term = [normalize(t) for t in filter_term]
else:
filter_term = uni_fold(filter_term)
filter_term = normalize(filter_term)

if isinstance(field_value, list):
field_value = [v for v in uni_fold(field_value)]
field_value = [normalize(v) for v in field_value]
else:
field_value = uni_fold(field_value)
field_value = normalize(field_value)

if clause['operator'] == 'one_of':
# The `one_of` operator behaves differently depending on whether
Expand Down
24 changes: 17 additions & 7 deletions tests/h/streamer/filter_test.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,38 @@
from __future__ import unicode_literals

import pytest

from h.streamer.filter import FilterHandler


class TestFilterHandler(object):
def test_it_matches_uri(self):
@pytest.mark.parametrize('query_uris,ann_uri,should_match', [
# Test cases that require only exact comparisons.
(["https://example.com", "https://example.org"], 'https://example.com', True),
(["https://example.com", "https://example.org"], 'https://example.net', False),
# Test cases that require comparison of normalized URIs.
(["https://example.com"], "http://example.com", True),
(["http://example.com"], "https://example.com", True),
(["http://example.com/?"], "https://example.com", True),
(["http://example.com"], "https://example.com/?", True),
])
def test_it_matches_uri(self, query_uris, ann_uri, should_match):
query = {
"match_policy": "include_any",
"actions": {},
"clauses": [
{
"field": "/uri",
"operator": "one_of",
"value": ["https://example.com", "https://example.org"],
"value": query_uris,
}
],
}
handler = FilterHandler(query)

ann = {"id": "123", "uri": "https://example.com"}
assert handler.match(ann) is True

ann = {"id": "123", "uri": "https://example.net"}
assert handler.match(ann) is False
ann = {"id": "123", "uri": ann_uri}
assert handler.match(ann) is should_match

def test_it_matches_id(self):
query = {
Expand Down

0 comments on commit c5f8bc3

Please sign in to comment.