Skip to content

Commit

Permalink
Cleanup Uri Filters (#5377)
Browse files Browse the repository at this point in the history
* Append UriCombinedWildcardFilter in Search class

Previously, because the original UriWildcardFilter was not combined,
the WildcardFilter and the UriFilter were moved outside of the Search
class. Since the UriCombinedWildcardFilter can act as both the original
UriFilter and the new WildcardFilter, move it back into the Search __init__
and add a new param separate_wildcard_uri_keys to Search __init__ that
will be passed to the UriCombinedWildcardFilter. This param will configure
the UriCombinedWildcardFilter to behave in one of two ways:
 - Expect wildcards and exact matches in url/uri keys.
 - Expect wildcards in wildcard_uri and exact matches in url/uri keys.

The default behavior is to separate wildcards and exact uri matches into
separate keys.

Since the UriCombinedWildcardFilter is now appended in the Search class,
places that were previously appending UriFilter or UriCombinedWildcardFilter,
no longer need to do so.

The only view that expects wildcards and exact uri's to be in the same key
is activity pages and a comment was added to explain why separate_wildcard_uri_keys
is set to False.

Add tests for the new param sepatate_wildcard_uri_keys in the Search class.

* Delete UriFilter

The UriCombinedWildcardFilter can act as the UriFilter, thus the individual
UriFilter is no longer needed and can be removed.

The UriCombinedWildcardFilter and UriFilter's can also be removed from the
h/search/__init__.py since they are no longer used outside of the Search class.

* fixup: indicate what boolean represents

* fixup: use get_search fixture & object in cls def
  • Loading branch information
Hannah Stepanek committed Oct 18, 2018
1 parent dc8ef4c commit b3b814f
Show file tree
Hide file tree
Showing 10 changed files with 69 additions and 88 deletions.
8 changes: 4 additions & 4 deletions h/activity/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@
from h.search import (TopLevelAnnotationsFilter,
AuthorityFilter,
TagsAggregation,
UsersAggregation,
UriCombinedWildcardFilter)
UsersAggregation)


class ActivityResults(namedtuple('ActivityResults', [
Expand Down Expand Up @@ -165,10 +164,11 @@ def load_documents(query):

@newrelic.agent.function_trace()
def _execute_search(request, query, page_size):
search = Search(request, stats=request.stats)
# Wildcards and exact url matches are specified in the url facet so set
# separate_wildcard_uri_keys to False.
search = Search(request, stats=request.stats, separate_wildcard_uri_keys=False)
search.append_modifier(AuthorityFilter(authority=request.default_authority))
search.append_modifier(TopLevelAnnotationsFilter())
search.append_modifier(UriCombinedWildcardFilter(request=request))
for agg in aggregations_for(query):
search.append_aggregation(agg)

Expand Down
6 changes: 1 addition & 5 deletions h/search/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@
UserFilter,
AuthorityFilter,
TagsAggregation,
UsersAggregation,
UriCombinedWildcardFilter,
UriFilter)
UsersAggregation)

__all__ = (
'Search',
Expand All @@ -23,8 +21,6 @@
'AuthorityFilter',
'TagsAggregation',
'UsersAggregation',
'UriCombinedWildcardFilter',
'UriFilter',
'get_client',
'init',
)
Expand Down
15 changes: 14 additions & 1 deletion h/search/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,23 @@ class Search(object):
resulting annotations will only include top-level annotations, not replies.
:type separate_replies: bool
:param separate_wildcard_uri_keys: If True, wildcard searches are only performed
on wildcard_uri's, and exact match searches are performed on uri/url parameters.
If False, uri/url parameters are expected to contain both wildcard and exact
matches.
:type separate_wildcard_uri_keys: bool
:param stats: An optional statsd client to which some metrics will be
published.
:type stats: statsd.client.StatsClient
"""
def __init__(self, request, separate_replies=False, stats=None, _replies_limit=200):
def __init__(
self, request,
separate_replies=False,
separate_wildcard_uri_keys=True,
stats=None,
_replies_limit=200,
):
self.es = request.es
self.separate_replies = separate_replies
self.stats = stats
Expand All @@ -51,6 +63,7 @@ def __init__(self, request, separate_replies=False, stats=None, _replies_limit=2
query.HiddenFilter(request),
query.AnyMatcher(),
query.TagsMatcher(),
query.UriCombinedWildcardFilter(request, separate_keys=separate_wildcard_uri_keys),
query.KeyValueMatcher()]
self._aggregations = []

Expand Down
29 changes: 0 additions & 29 deletions h/search/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,35 +246,6 @@ def __call__(self, search, _):
return search.filter("terms", group=groups)


class UriFilter(object):

"""
A filter that selects only annotations where the 'uri' parameter matches.
"""

def __init__(self, request):
"""Initialize a new UriFilter.
:param request: the pyramid.request object
"""
self.request = request

def __call__(self, search, params):
if 'uri' not in params and 'url' not in params:
return search
query_uris = popall(params, 'uri') + popall(params, 'url')

uris = set()
for query_uri in query_uris:
expanded = storage.expand_uri(self.request.db, query_uri)

us = [uri.normalize(u) for u in expanded]
uris.update(us)
return search.filter(
'terms', **{'target.scope': list(uris)})


class UriCombinedWildcardFilter(object):

"""
Expand Down
2 changes: 0 additions & 2 deletions h/views/api/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import newrelic.agent

from h import search as search_lib
from h.search import UriCombinedWildcardFilter
from h import storage
from h.exceptions import PayloadError
from h.events import AnnotationEvent
Expand Down Expand Up @@ -55,7 +54,6 @@ def search(request):
search = search_lib.Search(request,
separate_replies=separate_replies,
stats=stats)
search.append_modifier(UriCombinedWildcardFilter(request, separate_keys=True))
result = search.run(params)

svc = request.find_service(name='annotation_json_presentation')
Expand Down
1 change: 0 additions & 1 deletion h/views/badge.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ def badge(request):
else:
query = MultiDict({'uri': uri, 'limit': 0})
s = search.Search(request, stats=request.stats)
s.append_modifier(search.UriFilter(request))
result = s.run(query)
count = result.total

Expand Down
1 change: 0 additions & 1 deletion h/views/feeds.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
def _annotations(request):
"""Return the annotations from the search API."""
s = search.Search(request, stats=request.stats)
s.append_modifier(search.UriFilter(request))
result = s.run(MultiDict(request.params))
return fetch_ordered_annotations(request.db, result.annotation_ids)

Expand Down
18 changes: 2 additions & 16 deletions tests/h/activity/query_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,6 @@ def unparse(self):
'bucketing',
'presenters',
'AuthorityFilter',
'UriCombinedWildcardFilter',
'Search',
'TagsAggregation',
'TopLevelAnnotationsFilter',
Expand All @@ -232,7 +231,8 @@ def test_it_creates_a_search_query(self, pyramid_request, Search):
execute(pyramid_request, MultiDict(), self.PAGE_SIZE)

Search.assert_called_once_with(pyramid_request,
stats=pyramid_request.stats)
stats=pyramid_request.stats,
separate_wildcard_uri_keys=False)

def test_it_only_returns_top_level_annotations(self,
pyramid_request,
Expand All @@ -252,16 +252,6 @@ def test_it_only_shows_annotations_from_default_authority(self,
AuthorityFilter.assert_called_once_with(pyramid_request.default_authority)
search.append_modifier.assert_any_call(AuthorityFilter.return_value)

def test_it_recognizes_wildcards_in_uri_url(self,
pyramid_request,
search,
UriCombinedWildcardFilter):

execute(pyramid_request, MultiDict(), self.PAGE_SIZE)

UriCombinedWildcardFilter.assert_called_once_with(pyramid_request)
search.append_modifier.assert_any_call(UriCombinedWildcardFilter.return_value)

def test_it_adds_a_tags_aggregation_to_the_search_query(self,
pyramid_request,
search,
Expand Down Expand Up @@ -631,10 +621,6 @@ def TagsAggregation(self, patch):
def AuthorityFilter(self, patch):
return patch('h.activity.query.AuthorityFilter')

@pytest.fixture
def UriCombinedWildcardFilter(self, patch):
return patch('h.activity.query.UriCombinedWildcardFilter')

@pytest.fixture
def TopLevelAnnotationsFilter(self, patch):
return patch('h.activity.query.TopLevelAnnotationsFilter')
Expand Down
18 changes: 18 additions & 0 deletions tests/h/search/core_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,27 @@
from __future__ import unicode_literals

import datetime
import pytest
from webob.multidict import MultiDict

from h import search


class TestSearch(object):
"""Unit tests for search.Search when no separate_replies argument is given."""
def test_it_defaults_separate_wildcard_uri_keys_to_true(self, pyramid_request, UriCombinedWildcardFilter):
separate_keys = True

search.Search(pyramid_request)

UriCombinedWildcardFilter.assert_called_once_with(pyramid_request, separate_keys)

def test_it_passes_separate_wildcard_uri_keys_to_filter(self, pyramid_request, UriCombinedWildcardFilter):
separate_keys = False

search.Search(pyramid_request, separate_wildcard_uri_keys=separate_keys)

UriCombinedWildcardFilter.assert_called_once_with(pyramid_request, separate_keys)

def test_it_returns_replies_in_annotations_ids(self, matchers, pyramid_request, Annotation):
"""Without separate_replies it returns replies in annotation_ids.
Expand Down Expand Up @@ -123,6 +137,10 @@ def test_it_returns_an_empty_replies_list(self, pyramid_request, Annotation):

assert result.reply_ids == []

@pytest.fixture
def UriCombinedWildcardFilter(self, patch):
return patch('h.search.core.query.UriCombinedWildcardFilter')


class TestSearchWithSeparateReplies(object):
"""Unit tests for search.Search when separate_replies=True is given."""
Expand Down
59 changes: 30 additions & 29 deletions tests/h/search/query_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,17 +391,22 @@ def search(self, search):
return search


class TestUriFilter(object):
class TestUriCombinedWildcardFilter(object):
# TODO - Explicit test of URL normalization (ie. that search normalizes input
# URL using `h.util.uri.normalize` and queries with that).

@pytest.mark.parametrize("field", ("uri", "url"))
def test_filters_by_field(self, search, Annotation, field):
def test_filters_by_field(self, Annotation, get_search, field):
search = get_search()
Annotation(target_uri="https://foo.com")
expected_ids = [Annotation(target_uri="https://bar.com").id]

result = search.run(webob.multidict.MultiDict({field: "https://bar.com"}))

assert sorted(result.annotation_ids) == sorted(expected_ids)

def test_filters_on_whole_url(self, search, Annotation):
def test_filters_on_whole_url(self, Annotation, get_search):
search = get_search()
Annotation(target_uri="http://bar.com/foo")
expected_ids = [Annotation(target_uri="http://bar.com").id,
Annotation(target_uri="http://bar.com/").id]
Expand All @@ -410,23 +415,26 @@ def test_filters_on_whole_url(self, search, Annotation):

assert sorted(result.annotation_ids) == sorted(expected_ids)

def test_filter_matches_invalid_uri(self, search, Annotation):
def test_filter_matches_invalid_uri(self, Annotation, get_search):
search = get_search()
Annotation(target_uri="https://bar.com")
expected_ids = [Annotation(target_uri="invalid-uri").id]

result = search.run(webob.multidict.MultiDict({"uri": "invalid-uri"}))

assert sorted(result.annotation_ids) == sorted(expected_ids)

def test_filters_aliases_http_and_https(self, search, Annotation):
def test_filters_aliases_http_and_https(self, Annotation, get_search):
search = get_search()
expected_ids = [Annotation(target_uri="http://bar.com").id,
Annotation(target_uri="https://bar.com").id]

result = search.run(webob.multidict.MultiDict({"url": "http://bar.com"}))

assert sorted(result.annotation_ids) == sorted(expected_ids)

def test_returns_all_annotations_with_equivalent_uris(self, search, Annotation, storage):
def test_returns_all_annotations_with_equivalent_uris(self, Annotation, get_search, storage):
search = get_search()
# Mark all these uri's as equivalent uri's.
storage.expand_uri.side_effect = lambda _, x: [
"urn:x-pdf:1234",
Expand All @@ -447,7 +455,8 @@ def test_returns_all_annotations_with_equivalent_uris(self, search, Annotation,

assert sorted(result.annotation_ids) == sorted(expected_ids)

def test_ors_multiple_url_uris(self, search, Annotation):
def test_ors_multiple_url_uris(self, Annotation, get_search):
search = get_search()
Annotation(target_uri="http://baz.com")
Annotation(target_uri="https://www.foo.com")
expected_ids = [Annotation(target_uri="https://bar.com").id,
Expand All @@ -464,21 +473,6 @@ def test_ors_multiple_url_uris(self, search, Annotation):

assert sorted(result.annotation_ids) == sorted(expected_ids)

# TODO - Explicit test of URL normalization (ie. that search normalizes input
# URL using `h.util.uri.normalize` and queries with that).

@pytest.fixture
def search(self, search, pyramid_request):
search.append_modifier(query.UriFilter(pyramid_request))
return search

@pytest.fixture
def storage(self, patch):
return patch('h.search.query.storage')


class TestUriCombinedWildcardFilter():

@pytest.mark.parametrize('params,expected_ann_indexes,separate_keys', [
# Test with separate_keys = True (aka uri/url are exact match & wildcard_uri is wildcard match.)
Expand Down Expand Up @@ -508,8 +502,7 @@ class TestUriCombinedWildcardFilter():
])
def test_matches(
self,
search,
pyramid_request,
get_search,
Annotation,
params,
expected_ann_indexes,
Expand All @@ -518,7 +511,7 @@ def test_matches(
"""
All uri matches (wildcard and exact) are OR'd.
"""
search = self._get_search(search, pyramid_request, separate_keys)
search = get_search(separate_keys)

ann_ids = [Annotation(target_uri="http://bar.com?foo").id,
Annotation(target_uri="http://bar.com/baz-457").id,
Expand Down Expand Up @@ -558,10 +551,18 @@ def test_pops_params(self, es_dsl_search, pyramid_request, params, separate_keys
assert "url" not in params
assert "wildcard_uri" not in params

def _get_search(self, search, pyramid_request, separate_keys):
search.append_modifier(query.UriCombinedWildcardFilter(
pyramid_request, separate_keys))
return search
@pytest.fixture
def get_search(self, search, pyramid_request):
def _get_search(separate_keys=True):
search.append_modifier(query.UriCombinedWildcardFilter(
pyramid_request, separate_keys))
return search

return _get_search

@pytest.fixture
def storage(self, patch):
return patch('h.search.query.storage')


@pytest.mark.parametrize('wildcard_uri,expected', [
Expand Down

0 comments on commit b3b814f

Please sign in to comment.