From 419550a105889a0135eed92b408129b931dcef90 Mon Sep 17 00:00:00 2001 From: Daniel Lindsley Date: Thu, 15 Dec 2011 01:15:51 -0600 Subject: [PATCH] Added .values() / .values_list() methods, for fetching less data. Thanks to acdha for the original implementation! --- docs/searchquery_api.rst | 14 +-- docs/searchqueryset_api.rst | 44 ++++++++ haystack/backends/__init__.py | 45 +++++--- haystack/backends/solr_backend.py | 37 ++++--- haystack/query.py | 115 +++++++++++++++++++-- tests/core/tests/query.py | 30 +++++- tests/simple_tests/tests/simple_backend.py | 56 ++++++---- 7 files changed, 273 insertions(+), 68 deletions(-) diff --git a/docs/searchquery_api.rst b/docs/searchquery_api.rst index 392b1426f..5c72ffa08 100644 --- a/docs/searchquery_api.rst +++ b/docs/searchquery_api.rst @@ -43,10 +43,10 @@ different combinations, you should use ``SQ`` objects. Like Example:: from haystack.query import SQ - + # We want "title: Foo AND (tags:bar OR tags:moof)" sqs = SearchQuerySet().filter(title='Foo').filter(SQ(tags='bar') | SQ(tags='moof')) - + # To clean user-provided data: sqs = SearchQuerySet() clean_query = sqs.query.clean(user_query) @@ -109,16 +109,18 @@ A basic (override-able) implementation is provided. ``run`` ~~~~~~~ -.. method:: SearchQuery.run(self, spelling_query=None) +.. method:: SearchQuery.run(self, spelling_query=None, **kwargs) Builds and executes the query. Returns a list of search results. Optionally passes along an alternate query for spelling suggestions. +Optionally passes along more kwargs for controlling the search query. + ``run_mlt`` ~~~~~~~~~~~ -.. method:: SearchQuery.run_mlt(self) +.. method:: SearchQuery.run_mlt(self, **kwargs) Executes the More Like This. Returns a list of search results similar to the provided document (and optionally query). @@ -126,7 +128,7 @@ to the provided document (and optionally query). ``run_raw`` ~~~~~~~~~~~ -.. method:: SearchQuery.run_raw(self) +.. method:: SearchQuery.run_raw(self, **kwargs) Executes a raw query. Returns a list of search results. @@ -143,7 +145,7 @@ the results. ``get_results`` ~~~~~~~~~~~~~~~ -.. method:: SearchQuery.get_results(self) +.. method:: SearchQuery.get_results(self, **kwargs) Returns the results received from the backend. diff --git a/docs/searchqueryset_api.rst b/docs/searchqueryset_api.rst index 35b4bb52f..151b25484 100644 --- a/docs/searchqueryset_api.rst +++ b/docs/searchqueryset_api.rst @@ -611,6 +611,50 @@ Example:: suggestion = SearchQuerySet().spelling_suggestion('moar exmples') suggestion # u'more examples' +``values`` +~~~~~~~~~~ + +.. method:: SearchQuerySet.values(self, *fields) + +Returns a list of dictionaries, each containing the key/value pairs for the +result, exactly like Django's ``ValuesQuerySet``. + +This method causes the query to evaluate and run the search if it hasn't already +run. + +You must provide a list of one or more fields as arguments. These fields will +be the ones included in the individual results. + +Example:: + + sqs = SearchQuerySet().auto_query('banana').values('title', 'description') + + +``values_list`` +~~~~~~~~~~~~~~~ + +.. method:: SearchQuerySet.values_list(self, *fields, **kwargs) + +Returns a list of field values as tuples, exactly like Django's +``ValuesListQuerySet``. + +This method causes the query to evaluate and run the search if it hasn't already +run. + +You must provide a list of one or more fields as arguments. These fields will +be the ones included in the individual results. + +You may optionally also provide a ``flat=True`` kwarg, which in the case of a +single field being provided, will return a flat list of that field rather than +a list of tuples. + +Example:: + + sqs = SearchQuerySet().auto_query('banana').values_list('title', 'description') + + # ...or just the titles as a flat list... + sqs = SearchQuerySet().auto_query('banana').values_list('title', flat=True) + .. _field-lookups: diff --git a/haystack/backends/__init__.py b/haystack/backends/__init__.py index 41f710361..11d8b34de 100644 --- a/haystack/backends/__init__.py +++ b/haystack/backends/__init__.py @@ -292,6 +292,11 @@ def __init__(self, using=DEFAULT_ALIAS): self.date_facets = {} self.query_facets = [] self.narrow_queries = set() + #: If defined, fields should be a list of field names - no other values + #: will be retrieved so the caller must be careful to include django_ct + #: and django_id when using code which expects those to be included in + #: the results + self.fields = [] self._raw_query = None self._raw_query_params = {} self._more_like_this = False @@ -361,20 +366,26 @@ def build_params(self, spelling_query=None): if self.result_class: kwargs['result_class'] = self.result_class + if self.fields: + kwargs['fields'] = self.fields + return kwargs - def run(self, spelling_query=None): + def run(self, spelling_query=None, **kwargs): """Builds and executes the query. Returns a list of search results.""" final_query = self.build_query() - kwargs = self.build_params(spelling_query=spelling_query) + search_kwargs = self.build_params(spelling_query=spelling_query) + + if kwargs: + search_kwargs.update(kwargs) - results = self.backend.search(final_query, **kwargs) + results = self.backend.search(final_query, **search_kwargs) self._results = results.get('results', []) self._hit_count = results.get('hits', 0) self._facet_counts = self.post_process_facets(results) self._spelling_suggestion = results.get('spelling_suggestion', None) - def run_mlt(self): + def run_mlt(self, **kwargs): """ Executes the More Like This. Returns a list of search results similar to the provided document (and optionally query). @@ -382,21 +393,27 @@ def run_mlt(self): if self._more_like_this is False or self._mlt_instance is None: raise MoreLikeThisError("No instance was provided to determine 'More Like This' results.") - kwargs = { + search_kwargs = { 'result_class': self.result_class, } + if kwargs: + search_kwargs.update(kwargs) + additional_query_string = self.build_query() - results = self.backend.more_like_this(self._mlt_instance, additional_query_string, **kwargs) + results = self.backend.more_like_this(self._mlt_instance, additional_query_string, **search_kwargs) self._results = results.get('results', []) self._hit_count = results.get('hits', 0) - def run_raw(self): + def run_raw(self, **kwargs): """Executes a raw query. Returns a list of search results.""" - kwargs = self.build_params() - kwargs.update(self._raw_query_params) + search_kwargs = self.build_params() + search_kwargs.update(self._raw_query_params) - results = self.backend.search(self._raw_query, **kwargs) + if kwargs: + search_kwargs.update(kwargs) + + results = self.backend.search(self._raw_query, **search_kwargs) self._results = results.get('results', []) self._hit_count = results.get('hits', 0) self._facet_counts = results.get('facets', {}) @@ -426,7 +443,7 @@ def get_count(self): return self._hit_count - def get_results(self): + def get_results(self, **kwargs): """ Returns the results received from the backend. @@ -436,12 +453,12 @@ def get_results(self): if self._results is None: if self._more_like_this: # Special case for MLT. - self.run_mlt() + self.run_mlt(**kwargs) elif self._raw_query: # Special case for raw queries. - self.run_raw() + self.run_raw(**kwargs) else: - self.run() + self.run(**kwargs) return self._results diff --git a/haystack/backends/solr_backend.py b/haystack/backends/solr_backend.py index 945d54253..9bc43e5a1 100644 --- a/haystack/backends/solr_backend.py +++ b/haystack/backends/solr_backend.py @@ -1,5 +1,4 @@ import logging -import sys from django.conf import settings from django.core.exceptions import ImproperlyConfigured from django.db.models.loading import get_model @@ -120,6 +119,9 @@ def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, } if fields: + if isinstance(fields, (list, set)): + fields = " ".join(fields) + kwargs['fl'] = fields if sort_by is not None: @@ -459,10 +461,10 @@ def build_query_fragment(self, field, filter_type, value): return result - def run(self, spelling_query=None): + def run(self, spelling_query=None, **kwargs): """Builds and executes the query. Returns a list of search results.""" final_query = self.build_query() - kwargs = { + search_kwargs = { 'start_offset': self.start_offset, 'result_class': self.result_class, } @@ -476,50 +478,53 @@ def run(self, spelling_query=None): else: order_by_list.append('%s asc' % order_by) - kwargs['sort_by'] = ", ".join(order_by_list) + search_kwargs['sort_by'] = ", ".join(order_by_list) if self.end_offset is not None: - kwargs['end_offset'] = self.end_offset + search_kwargs['end_offset'] = self.end_offset if self.highlight: - kwargs['highlight'] = self.highlight + search_kwargs['highlight'] = self.highlight if self.facets: - kwargs['facets'] = list(self.facets) + search_kwargs['facets'] = list(self.facets) if self.date_facets: - kwargs['date_facets'] = self.date_facets + search_kwargs['date_facets'] = self.date_facets if self.query_facets: - kwargs['query_facets'] = self.query_facets + search_kwargs['query_facets'] = self.query_facets if self.narrow_queries: - kwargs['narrow_queries'] = self.narrow_queries + search_kwargs['narrow_queries'] = self.narrow_queries + + if self.fields: + search_kwargs['fields'] = self.fields if spelling_query: - kwargs['spelling_query'] = spelling_query + search_kwargs['spelling_query'] = spelling_query - results = self.backend.search(final_query, **kwargs) + results = self.backend.search(final_query, **search_kwargs) self._results = results.get('results', []) self._hit_count = results.get('hits', 0) self._facet_counts = self.post_process_facets(results) self._spelling_suggestion = results.get('spelling_suggestion', None) - def run_mlt(self): + def run_mlt(self, **kwargs): """Builds and executes the query. Returns a list of search results.""" if self._more_like_this is False or self._mlt_instance is None: raise MoreLikeThisError("No instance was provided to determine 'More Like This' results.") additional_query_string = self.build_query() - kwargs = { + search_kwargs = { 'start_offset': self.start_offset, 'result_class': self.result_class, } if self.end_offset is not None: - kwargs['end_offset'] = self.end_offset - self.start_offset + search_kwargs['end_offset'] = self.end_offset - self.start_offset - results = self.backend.more_like_this(self._mlt_instance, additional_query_string, **kwargs) + results = self.backend.more_like_this(self._mlt_instance, additional_query_string, **search_kwargs) self._results = results.get('results', []) self._hit_count = results.get('hits', 0) diff --git a/haystack/query.py b/haystack/query.py index 9e26c6c10..891015d22 100644 --- a/haystack/query.py +++ b/haystack/query.py @@ -3,7 +3,7 @@ import warnings from haystack import connections, connection_router from haystack.backends import SQ -from haystack.constants import REPR_OUTPUT_SIZE, ITERATOR_LOAD_PER_QUERY, DEFAULT_OPERATOR, DEFAULT_ALIAS +from haystack.constants import REPR_OUTPUT_SIZE, ITERATOR_LOAD_PER_QUERY, DEFAULT_OPERATOR from haystack.exceptions import NotHandled @@ -151,11 +151,11 @@ def _manual_iter(self): if not self._fill_cache(current_position, current_position + ITERATOR_LOAD_PER_QUERY): raise StopIteration - def _fill_cache(self, start, end): + def _fill_cache(self, start, end, **kwargs): # Tell the query where to start from and how many we'd like. self.query._reset() self.query.set_limits(start, end) - results = self.query.get_results() + results = self.query.get_results(**kwargs) if results == None or len(results) == 0: return False @@ -175,6 +175,15 @@ def _fill_cache(self, start, end): if end is None: end = self.query.get_count() + to_cache = self.post_process_results(results) + + # Assign by slice. + self._result_cache[start:start + len(to_cache)] = to_cache + return True + + def post_process_results(self, results): + to_cache = [] + # Check if we wish to load all objects. if self._load_all: original_results = [] @@ -198,8 +207,6 @@ def _fill_cache(self, start, end): # Revert to old behaviour loaded_objects[model] = model._default_manager.in_bulk(models_pks[model]) - to_cache = [] - for result in results: if self._load_all: # We have to deal with integer keys being cast from strings @@ -219,10 +226,7 @@ def _fill_cache(self, start, end): to_cache.append(result) - # Assign by slice. - self._result_cache[start:start + len(to_cache)] = to_cache - return True - + return to_cache def __getitem__(self, k): """ @@ -517,6 +521,33 @@ def spelling_suggestion(self, preferred_query=None): clone = self._clone() return clone.query.get_spelling_suggestion(preferred_query) + def values(self, *fields): + """ + Returns a list of dictionaries, each containing the key/value pairs for + the result, exactly like Django's ``ValuesQuerySet``. + """ + qs = self._clone(klass=ValuesSearchQuerySet) + qs._fields.extend(fields) + return qs + + def values_list(self, *fields, **kwargs): + """ + Returns a list of field values as tuples, exactly like Django's + ``QuerySet.values``. + + Optionally accepts a ``flat=True`` kwarg, which in the case of a + single field being provided, will return a flat list of that field + rather than a list of tuples. + """ + flat = kwargs.pop("flat", False) + + if flat and len(fields) > 1: + raise TypeError("'flat' is not valid when values_list is called with more than one field.") + + qs = self._clone(klass=ValuesListSearchQuerySet) + qs._fields.extend(fields) + qs._flat = flat + return qs # Utility methods. @@ -554,6 +585,72 @@ def facet_counts(self): return {} +class ValuesListSearchQuerySet(SearchQuerySet): + """ + A ``SearchQuerySet`` which returns a list of field values as tuples, exactly + like Django's ``ValuesListQuerySet``. + """ + def __init__(self, *args, **kwargs): + super(ValuesListSearchQuerySet, self).__init__(*args, **kwargs) + self._flat = False + self._fields = [] + + # Removing this dependency would require refactoring much of the backend + # code (_process_results, etc.) and these aren't large enough to make it + # an immediate priority: + self._internal_fields = ['id', 'django_ct', 'django_id', 'score'] + + def _clone(self, klass=None): + clone = super(ValuesListSearchQuerySet, self)._clone(klass=klass) + clone._fields = self._fields + clone._flat = self._flat + return clone + + def _fill_cache(self, start, end): + query_fields = set(self._internal_fields) + query_fields.update(self._fields) + kwargs = { + 'fields': query_fields + } + return super(ValuesListSearchQuerySet, self)._fill_cache(start, end, **kwargs) + + def post_process_results(self, results): + to_cache = [] + + if self._flat: + accum = to_cache.extend + else: + accum = to_cache.append + + for result in results: + accum([getattr(result, i, None) for i in self._fields]) + + return to_cache + + +class ValuesSearchQuerySet(ValuesListSearchQuerySet): + """ + A ``SearchQuerySet`` which returns a list of dictionaries, each containing + the key/value pairs for the result, exactly like Django's + ``ValuesQuerySet``. + """ + def _fill_cache(self, start, end): + query_fields = set(self._internal_fields) + query_fields.update(self._fields) + kwargs = { + 'fields': query_fields + } + return super(ValuesListSearchQuerySet, self)._fill_cache(start, end, **kwargs) + + def post_process_results(self, results): + to_cache = [] + + for result in results: + to_cache.append(dict((i, getattr(result, i, None)) for i in self._fields)) + + return to_cache + + class RelatedSearchQuerySet(SearchQuerySet): """ A variant of the SearchQuerySet that can handle `load_all_queryset`s. diff --git a/tests/core/tests/query.py b/tests/core/tests/query.py index 4798852da..bba96ac3e 100644 --- a/tests/core/tests/query.py +++ b/tests/core/tests/query.py @@ -7,7 +7,8 @@ from haystack.exceptions import FacetingError from haystack import indexes from haystack.models import SearchResult -from haystack.query import SearchQuerySet, EmptySearchQuerySet +from haystack.query import (SearchQuerySet, EmptySearchQuerySet, + ValuesSearchQuerySet, ValuesListSearchQuerySet) from haystack.utils.loading import UnifiedIndex from core.models import MockModel, AnotherMockModel, CharPKMockModel, AFifthMockModel from core.tests.indexes import ReadQuerySetTestSearchIndex, GhettoAFifthMockModelSearchIndex, TextReadQuerySetTestSearchIndex @@ -726,6 +727,33 @@ def test___or__(self): self.assertEqual(len(sqs.query.query_filter), 2) +class ValuesQuerySetTestCase(SearchQuerySetTestCase): + def test_values_sqs(self): + sqs = self.msqs.auto_query("test").values("id") + self.assert_(isinstance(sqs, ValuesSearchQuerySet)) + + # We'll do a basic test to confirm that slicing works as expected: + self.assert_(isinstance(sqs[0], dict)) + self.assert_(isinstance(sqs[0:5][0], dict)) + + def test_valueslist_sqs(self): + sqs = self.msqs.auto_query("test").values_list("id") + + self.assert_(isinstance(sqs, ValuesListSearchQuerySet)) + self.assert_(isinstance(sqs[0], (list, tuple))) + self.assert_(isinstance(sqs[0:1][0], (list, tuple))) + + self.assertRaises(TypeError, self.msqs.auto_query("test").values_list, "id", "score", flat=True) + + flat_sqs = self.msqs.auto_query("test").values_list("id", flat=True) + self.assert_(isinstance(sqs, ValuesListSearchQuerySet)) + + # Note that this will actually be None because a mocked sqs lacks + # anything else: + self.assert_(flat_sqs[0] is None) + self.assert_(flat_sqs[0:1][0] is None) + + class EmptySearchQuerySetTestCase(TestCase): def setUp(self): super(EmptySearchQuerySetTestCase, self).setUp() diff --git a/tests/simple_tests/tests/simple_backend.py b/tests/simple_tests/tests/simple_backend.py index d9b5120d9..0f98c1eaa 100644 --- a/tests/simple_tests/tests/simple_backend.py +++ b/tests/simple_tests/tests/simple_backend.py @@ -13,56 +13,56 @@ class SimpleMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) name = indexes.CharField(model_attr='author', faceted=True) pub_date = indexes.DateField(model_attr='pub_date') - + def get_model(self): return MockModel class SimpleSearchBackendTestCase(TestCase): fixtures = ['bulk_data.json'] - + def setUp(self): super(SimpleSearchBackendTestCase, self).setUp() - + self.backend = connections['default'].get_backend() self.index = connections['default'].get_unified_index().get_index(MockModel) self.sample_objs = MockModel.objects.all() - + def test_update(self): self.backend.update(self.index, self.sample_objs) - + def test_remove(self): self.backend.remove(self.sample_objs[0]) - + def test_clear(self): self.backend.clear() - + def test_search(self): # No query string should always yield zero results. self.assertEqual(self.backend.search(u''), {'hits': 0, 'results': []}) - + self.assertEqual(self.backend.search(u'*')['hits'], 23) self.assertEqual([result.pk for result in self.backend.search(u'*')['results']], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]) - + self.assertEqual(self.backend.search(u'daniel')['hits'], 23) self.assertEqual([result.pk for result in self.backend.search(u'daniel')['results']], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]) - + self.assertEqual(self.backend.search(u'should be a string')['hits'], 1) self.assertEqual([result.pk for result in self.backend.search(u'should be a string')['results']], [8]) # Ensure the results are ``SearchResult`` instances... self.assertEqual(self.backend.search(u'should be a string')['results'][0].score, 0) - + self.assertEqual(self.backend.search(u'index document')['hits'], 6) self.assertEqual([result.pk for result in self.backend.search(u'index document')['results']], [2, 3, 15, 16, 17, 18]) - + # Regression-ville self.assertEqual([result.object.id for result in self.backend.search(u'index document')['results']], [2, 3, 15, 16, 17, 18]) self.assertEqual(self.backend.search(u'index document')['results'][0].model, MockModel) - + # No support for spelling suggestions self.assertEqual(self.backend.search(u'Indx')['hits'], 0) self.assertFalse(self.backend.search(u'Indx').get('spelling_suggestion')) - + # No support for facets self.assertEqual(self.backend.search(u'', facets=['name']), {'hits': 0, 'results': []}) self.assertEqual(self.backend.search(u'daniel', facets=['name'])['hits'], 23) @@ -72,27 +72,27 @@ def test_search(self): self.assertEqual(self.backend.search(u'daniel', query_facets={'name': '[* TO e]'})['hits'], 23) self.assertFalse(self.backend.search(u'').get('facets')) self.assertFalse(self.backend.search(u'daniel').get('facets')) - + # Note that only textual-fields are supported. self.assertEqual(self.backend.search(u'2009-06-18')['hits'], 0) - + # Ensure that swapping the ``result_class`` works. self.assertTrue(isinstance(self.backend.search(u'index document', result_class=MockSearchResult)['results'][0], MockSearchResult)) - + def test_more_like_this(self): self.backend.update(self.index, self.sample_objs) self.assertEqual(self.backend.search(u'*')['hits'], 23) - + # Unsupported by 'simple'. Should see empty results. self.assertEqual(self.backend.more_like_this(self.sample_objs[0])['hits'], 0) class LiveSimpleSearchQuerySetTestCase(TestCase): fixtures = ['bulk_data.json'] - + def setUp(self): super(LiveSimpleSearchQuerySetTestCase, self).setUp() - + # Stow. self.old_debug = settings.DEBUG settings.DEBUG = True @@ -101,7 +101,7 @@ def setUp(self): self.smmi = SimpleMockSearchIndex() self.ui.build(indexes=[self.smmi]) connections['default']._index = self.ui - + self.sample_objs = MockModel.objects.all() self.sqs = SearchQuerySet() @@ -110,7 +110,7 @@ def tearDown(self): connections['default']._index = self.old_ui settings.DEBUG = self.old_debug super(LiveSimpleSearchQuerySetTestCase, self).tearDown() - + def test_general_queries(self): # For now, just make sure these don't throw an exception. # They won't work until the simple backend is improved. @@ -119,3 +119,15 @@ def test_general_queries(self): self.assertTrue(len(self.sqs.exclude(name='daniel')) > 0) self.assertTrue(len(self.sqs.order_by('-pub_date')) > 0) + def test_values_queries(self): + sqs = self.sqs.auto_query('daniel') + self.assertTrue(len(sqs) > 0) + + flat_scores = sqs.values_list("score", flat=True) + self.assertEqual(flat_scores[0], 0) + + scores = sqs.values_list("id", "score") + self.assertEqual(scores[0], [1, 0]) + + scores_dict = sqs.values("id", "score") + self.assertEqual(scores_dict[0], {"id": 1, "score": 0})