From 03279e331a452549ff18f0a99e225eb95c7de074 Mon Sep 17 00:00:00 2001 From: Daniel Lindsley Date: Wed, 27 Oct 2010 19:42:15 -0500 Subject: [PATCH] Ensure that when fetching the length of a result set that the whole index isn't consumed (especially on Whoosh & Xapian). --- haystack/backends/__init__.py | 5 +++++ haystack/backends/whoosh_backend.py | 3 --- tests/core/tests/mocks.py | 4 ++-- tests/core/tests/query.py | 1 + tests/whoosh_tests/tests/whoosh_backend.py | 19 +++++++++++++++++++ 5 files changed, 27 insertions(+), 5 deletions(-) diff --git a/haystack/backends/__init__.py b/haystack/backends/__init__.py index 3c0c0c5d6..f4e60a253 100644 --- a/haystack/backends/__init__.py +++ b/haystack/backends/__init__.py @@ -396,6 +396,11 @@ def get_count(self): the results. """ if self._hit_count is None: + # Limit the slice to 10 so we get a count without consuming + # everything. + if not self.end_offset: + self.end_offset = 10 + if self._more_like_this: # Special case for MLT. self.run_mlt() diff --git a/haystack/backends/whoosh_backend.py b/haystack/backends/whoosh_backend.py index 7570fd6ec..696d95e68 100644 --- a/haystack/backends/whoosh_backend.py +++ b/haystack/backends/whoosh_backend.py @@ -344,9 +344,6 @@ def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, if narrowed_results: raw_results.filter(narrowed_results) - # Make sure we don't process bits we already have. - # import pdb; pdb.set_trace() - # Determine the page. page_num = 0 diff --git a/tests/core/tests/mocks.py b/tests/core/tests/mocks.py index fadbac1f3..20eb653a7 100644 --- a/tests/core/tests/mocks.py +++ b/tests/core/tests/mocks.py @@ -43,7 +43,7 @@ def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, hits = len(self.mock_search_results) indexed_models = site.get_indexed_models() - sliced = self.mock_search_results[start_offset:end_offset] + sliced = self.mock_search_results for result in sliced: model = get_model('core', self.model_name) @@ -57,7 +57,7 @@ def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, hits -= 1 return { - 'results': results, + 'results': results[start_offset:end_offset], 'hits': hits, } diff --git a/tests/core/tests/query.py b/tests/core/tests/query.py index 561e9bae6..def3ac90f 100644 --- a/tests/core/tests/query.py +++ b/tests/core/tests/query.py @@ -478,6 +478,7 @@ def test_load_all(self): # If nothing is registered, you get nothing. haystack.site.unregister(MockModel) + haystack.site.unregister(CharPKMockModel) sqs = self.msqs.load_all() self.assert_(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs), 0) diff --git a/tests/whoosh_tests/tests/whoosh_backend.py b/tests/whoosh_tests/tests/whoosh_backend.py index cb799f860..845d454f9 100644 --- a/tests/whoosh_tests/tests/whoosh_backend.py +++ b/tests/whoosh_tests/tests/whoosh_backend.py @@ -580,6 +580,25 @@ def test_cache_is_full(self): fire_the_iterator_and_fill_cache = [result for result in results] self.assertEqual(results._cache_is_full(), True) self.assertEqual(len(backends.queries), 1) + + def test_count(self): + more_samples = [] + + for i in xrange(1, 50): + mock = MockModel() + mock.id = i + mock.author = 'daniel%s' % i + mock.pub_date = date(2009, 2, 25) - timedelta(days=i) + more_samples.append(mock) + + self.sb.update(self.smmi, more_samples) + + backends.reset_search_queries() + self.assertEqual(len(backends.queries), 0) + results = self.sqs.all() + self.assertEqual(len(results), 49) + self.assertEqual(results._cache_is_full(), False) + self.assertEqual(len(backends.queries), 1) class WhooshRoundTripSearchIndex(SearchIndex):