From ee04fe4807e29ed57f93bbe5bfd17e218106aed7 Mon Sep 17 00:00:00 2001 From: Matt DeBoard Date: Fri, 15 Jun 2012 14:31:02 -0400 Subject: [PATCH] Refactored ``SearchBackend.search`` so that kwarg-generation operations are in a discrete method. This makes it much simpler to subclass ``SearchBackend`` (& the engine-specific variants) to add support for new parameters. --- AUTHORS | 1 + haystack/backends/__init__.py | 20 +++++---- haystack/backends/elasticsearch_backend.py | 49 ++++++++++++---------- haystack/backends/simple_backend.py | 11 ++--- haystack/backends/solr_backend.py | 43 ++++++++++--------- tests/core/tests/mocks.py | 49 ++++++++++------------ 6 files changed, 90 insertions(+), 83 deletions(-) diff --git a/AUTHORS b/AUTHORS index a88875ea5..d7e2915d7 100644 --- a/AUTHORS +++ b/AUTHORS @@ -63,3 +63,4 @@ Thanks to * Alex Vidal (avidal) for a patch allowing developers to override the queryset used for update operations. * Igor Támara (ikks) for a patch related to Unicode ``verbose_name_plural``. * Dan Helfman (witten) for a patch related to highlighting. + * Matt DeBoard for refactor of ``SolrSearchBackend.search`` method to allow simpler extension of the class. diff --git a/haystack/backends/__init__.py b/haystack/backends/__init__.py index 20d77a8e5..1ca3cb522 100644 --- a/haystack/backends/__init__.py +++ b/haystack/backends/__init__.py @@ -6,9 +6,8 @@ from django.db.models.base import ModelBase from django.utils import tree from django.utils.encoding import force_unicode -from haystack.constants import DJANGO_CT, VALID_FILTERS, FILTER_SEPARATOR, DEFAULT_ALIAS, DEFAULT_OPERATOR +from haystack.constants import VALID_FILTERS, FILTER_SEPARATOR, DEFAULT_ALIAS from haystack.exceptions import MoreLikeThisError, FacetingError -from haystack.inputs import Clean from haystack.models import SearchResult from haystack.utils.loading import UnifiedIndex @@ -103,11 +102,7 @@ def clear(self, models=[], commit=True): raise NotImplementedError @log_query - def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, - fields='', highlight=False, facets=None, date_facets=None, query_facets=None, - narrow_queries=None, spelling_query=None, within=None, - dwithin=None, distance_point=None, models=None, - limit_to_registered_models=None, result_class=None, **kwargs): + def search(self, query_string, **kwargs): """ Takes a query to search on and returns dictionary. @@ -123,6 +118,17 @@ def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, """ raise NotImplementedError + def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, + fields='', highlight=False, facets=None, + date_facets=None, query_facets=None, + narrow_queries=None, spelling_query=None, + within=None, dwithin=None, distance_point=None, + models=None, limit_to_registered_models=None, + result_class=None): + # A convenience method most backends should include in order to make + # extension easier. + raise NotImplementedError + def prep_value(self, value): """ Hook to give the backend a chance to prep an attribute value before diff --git a/haystack/backends/elasticsearch_backend.py b/haystack/backends/elasticsearch_backend.py index b41cb0262..47fb75edb 100644 --- a/haystack/backends/elasticsearch_backend.py +++ b/haystack/backends/elasticsearch_backend.py @@ -231,21 +231,13 @@ def clear(self, models=[], commit=True): else: self.log.error("Failed to clear Elasticsearch index: %s", e) - @log_query - def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, - fields='', highlight=False, facets=None, date_facets=None, query_facets=None, - narrow_queries=None, spelling_query=None, within=None, - dwithin=None, distance_point=None, models=None, - limit_to_registered_models=None, result_class=None, **kwargs): - if len(query_string) == 0: - return { - 'results': [], - 'hits': 0, - } - - if not self.setup_complete: - self.setup() - + def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, + fields='', highlight=False, facets=None, + date_facets=None, query_facets=None, + narrow_queries=None, spelling_query=None, + within=None, dwithin=None, distance_point=None, + models=None, limit_to_registered_models=None, + result_class=None): index = haystack.connections[self.connection_alias].get_unified_index() content_field = index.document_field @@ -278,8 +270,6 @@ def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, }, } - geo_sort = False - if fields: if isinstance(fields, (list, set)): fields = " ".join(fields) @@ -451,16 +441,31 @@ def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, if not kwargs['query']['filtered'].get('filter'): kwargs['query'] = kwargs['query']['filtered']['query'] + return kwargs + + @log_query + def search(self, query_string, **kwargs): + if len(query_string) == 0: + return { + 'results': [], + 'hits': 0, + } + + if not self.setup_complete: + self.setup() + + search_kwargs = self.build_search_kwargs(query_string, **kwargs) + # Because Elasticsearch. query_params = { - 'from': start_offset, + 'from': kwargs.get('start_offset', 0), } - if end_offset is not None and end_offset > start_offset: - query_params['size'] = end_offset - start_offset + if kwargs.get('end_offset') is not None and kwargs.get('end_offset') > kwargs.get('start_offset', 0): + query_params['size'] = kwargs.get('end_offset') - kwargs.get('start_offset', 0) try: - raw_results = self.conn.search(None, kwargs, indexes=[self.index_name], doc_types=['modelresult'], **query_params) + raw_results = self.conn.search(None, search_kwargs, indexes=[self.index_name], doc_types=['modelresult'], **query_params) except (requests.RequestException, pyelasticsearch.ElasticSearchError), e: if not self.silently_fail: raise @@ -468,7 +473,7 @@ def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, self.log.error("Failed to query Elasticsearch using '%s': %s", query_string, e) raw_results = {} - return self._process_results(raw_results, highlight=highlight, result_class=result_class) + return self._process_results(raw_results, highlight=kwargs.get('highlight'), result_class=kwargs.get('result_class', SearchResult)) def more_like_this(self, model_instance, additional_query_string=None, start_offset=0, end_offset=None, models=None, diff --git a/haystack/backends/simple_backend.py b/haystack/backends/simple_backend.py index 671060eba..8f5b7d661 100644 --- a/haystack/backends/simple_backend.py +++ b/haystack/backends/simple_backend.py @@ -41,16 +41,13 @@ def clear(self, models=[], commit=True): logger.warning('clear is not implemented in this backend') @log_query - def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, - fields='', highlight=False, facets=None, date_facets=None, query_facets=None, - narrow_queries=None, spelling_query=None, within=None, - dwithin=None, distance_point=None, models=None, - limit_to_registered_models=None, result_class=None, **kwargs): + def search(self, query_string, **kwargs): hits = 0 results = [] + result_class = SearchResult - if result_class is None: - result_class = SearchResult + if kwargs.get('result_class'): + result_class = kwargs['result_class'] if query_string: for model in connections[self.connection_alias].get_unified_index().get_indexed_models(): diff --git a/haystack/backends/solr_backend.py b/haystack/backends/solr_backend.py index f6b5b6038..258458dd9 100644 --- a/haystack/backends/solr_backend.py +++ b/haystack/backends/solr_backend.py @@ -114,21 +114,34 @@ def clear(self, models=[], commit=True): self.log.error("Failed to clear Solr index: %s", e) @log_query - def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, - fields='', highlight=False, facets=None, date_facets=None, query_facets=None, - narrow_queries=None, spelling_query=None, within=None, - dwithin=None, distance_point=None, models=None, - limit_to_registered_models=None, result_class=None, **kwargs): + def search(self, query_string, **kwargs): if len(query_string) == 0: return { 'results': [], 'hits': 0, } - kwargs = { - 'fl': '* score', - } - geo_sort = False + search_kwargs = self.build_search_kwargs(query_string, **kwargs) + + try: + raw_results = self.conn.search(query_string, **search_kwargs) + except (IOError, SolrError), e: + if not self.silently_fail: + raise + + self.log.error("Failed to query Solr using '%s': %s", query_string, e) + raw_results = EmptyResults() + + return self._process_results(raw_results, highlight=kwargs.get('highlight'), result_class=kwargs.get('result_class', SearchResult), distance_point=kwargs.get('distance_point')) + + def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, + fields='', highlight=False, facets=None, + date_facets=None, query_facets=None, + narrow_queries=None, spelling_query=None, + within=None, dwithin=None, distance_point=None, + models=None, limit_to_registered_models=None, + result_class=None): + kwargs = {'fl': '* score'} if fields: if isinstance(fields, (list, set)): @@ -142,7 +155,6 @@ def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, lng, lat = distance_point['point'].get_coords() kwargs['sfield'] = distance_point['field'] kwargs['pt'] = '%s,%s' % (lat, lng) - geo_sort = True if sort_by == 'distance asc': kwargs['sort'] = 'geodist() asc' @@ -245,16 +257,7 @@ def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, # kwargs['fl'] += ' _dist_:geodist()' pass - try: - raw_results = self.conn.search(query_string, **kwargs) - except (IOError, SolrError), e: - if not self.silently_fail: - raise - - self.log.error("Failed to query Solr using '%s': %s", query_string, e) - raw_results = EmptyResults() - - return self._process_results(raw_results, highlight=highlight, result_class=result_class, distance_point=distance_point) + return kwargs def more_like_this(self, model_instance, additional_query_string=None, start_offset=0, end_offset=None, models=None, diff --git a/tests/core/tests/mocks.py b/tests/core/tests/mocks.py index 9fd825734..ad63346c0 100644 --- a/tests/core/tests/mocks.py +++ b/tests/core/tests/mocks.py @@ -1,16 +1,14 @@ from django.db.models.loading import get_model -from django.utils.encoding import force_unicode from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, log_query from haystack.models import SearchResult from haystack.routers import BaseRouter from haystack.utils import get_identifier -from core.models import MockModel class MockMasterSlaveRouter(BaseRouter): def for_read(self, **hints): return 'slave' - + def for_write(self, **hints): return 'master' @@ -19,13 +17,13 @@ class MockPassthroughRouter(BaseRouter): def for_read(self, **hints): if hints.get('pass_through') is False: return 'pass' - + return None - + def for_write(self, **hints): if hints.get('pass_through') is False: return 'pass' - + return None @@ -39,7 +37,7 @@ def __init__(self, app_label, model_name, pk, score, **kwargs): class MockSearchBackend(BaseSearchBackend): model_name = 'mockmodel' - + def update(self, index, iterable, commit=True): global MOCK_INDEX_DATA for obj in iterable: @@ -53,32 +51,29 @@ def remove(self, obj, commit=True): def clear(self, models=[], commit=True): global MOCK_INDEX_DATA MOCK_INDEX_DATA = {} - + @log_query - def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, - fields='', highlight=False, facets=None, date_facets=None, query_facets=None, - narrow_queries=None, spelling_query=None, - limit_to_registered_models=None, result_class=None, **kwargs): + def search(self, query_string, **kwargs): from haystack import connections global MOCK_INDEX_DATA results = [] hits = len(MOCK_INDEX_DATA) indexed_models = connections['default'].get_unified_index().get_indexed_models() - + def junk_sort(key): app, model, pk = key.split('.') - + if pk.isdigit(): return int(pk) else: return ord(pk[0]) - + sliced = sorted(MOCK_INDEX_DATA, key=junk_sort) - + for result in sliced: app_label, model_name, pk = result.split('.') model = get_model(app_label, model_name) - + if model: if model in indexed_models: results.append(MockSearchResult(app_label, model_name, pk, 1 - (i / 100.0))) @@ -86,12 +81,12 @@ def junk_sort(key): hits -= 1 else: hits -= 1 - + return { - 'results': results[start_offset:end_offset], + 'results': results[kwargs.get('start_offset'):kwargs.get('end_offset')], 'hits': hits, } - + def more_like_this(self, model_instance, additional_query_string=None, result_class=None): return self.search(query_string='*') @@ -111,32 +106,32 @@ class MixedMockSearchBackend(MockSearchBackend): def search(self, query_string, **kwargs): if kwargs.get('end_offset') and kwargs['end_offset'] > 30: kwargs['end_offset'] = 30 - + result_info = super(MixedMockSearchBackend, self).search(query_string, **kwargs) result_info['hits'] = 30 - + # Remove search results from other models. temp_results = [] - + for result in result_info['results']: if not int(result.pk) in (9, 13, 14): # MockSearchResult('core', 'AnotherMockModel', 9, .1) # MockSearchResult('core', 'AnotherMockModel', 13, .1) # MockSearchResult('core', 'NonexistentMockModel', 14, .1) temp_results.append(result) - + result_info['results'] = temp_results - + return result_info class MockSearchQuery(BaseSearchQuery): def build_query(self): return '' - + def clean(self, query_fragment): return query_fragment - + # def run_mlt(self): # # To simulate the chunking behavior of a regular search, return a slice # # of our results using start/end offset.