Permalink
Browse files

Moved the ``build_queryset`` method to ``SearchIndex``.

This method is used to build the queryset for indexing operations. It is copied
from the build_queryset function that lived in the update_index management
command.

Making this change allows developers to modify the queryset used for indexing
even when a date filter is necessary. See `tests/core/indexes.py` for tests.
  • Loading branch information...
1 parent 259274e commit ec1f917588b3be1ce89f077a6c3c72be1551f84d @avidal avidal committed with toastdriven Feb 7, 2012
Showing with 98 additions and 37 deletions.
  1. +1 −0 AUTHORS
  2. +13 −0 docs/searchindex_api.rst
  3. +45 −0 haystack/indexes.py
  4. +2 −36 haystack/management/commands/update_index.py
  5. +36 −0 tests/core/tests/indexes.py
  6. +1 −1 tests/settings.py
View
@@ -60,3 +60,4 @@ Thanks to
* A significant portion of the input types feature.
* Aram Dulyan (Aramgutang) for fixing the included admin class to be Django 1.4 compatible.
* Honza Kral (HonzaKral) for various Elasticsearch tweaks & testing.
+ * Alex Vidal (avidal) for a patch allowing developers to override the queryset used for update operations.
View
@@ -402,6 +402,19 @@ Get the default QuerySet for read actions.
Subclasses can override this method to work with other managers.
Useful when working with default managers that filter some objects.
+``build_queryset``
+-------------------
+
+.. method:: SearchIndex.build_queryset(self, start_date=None, end_date=None)
+
+Get the default QuerySet to index when doing an index update.
+
+Subclasses can override this method to take into account related
+model modification times.
+
+The default is to use ``SearchIndex.index_queryset`` and filter
+based on ``SearchIndex.get_updated_field``
+
``prepare``
-----------
View
@@ -1,6 +1,7 @@
import copy
import threading
import sys
+import warnings
from django.db.models import signals
from django.utils.encoding import force_unicode
from haystack import connections, connection_router
@@ -135,6 +136,50 @@ def read_queryset(self):
"""
return self.index_queryset()
+ def build_queryset(self, start_date=None, end_date=None):
+ """
+ Get the default QuerySet to index when doing an index update.
+
+ Subclasses can override this method to take into account related
+ model modification times.
+
+ The default is to use ``SearchIndex.index_queryset`` and filter
+ based on ``SearchIndex.get_updated_field``
+ """
+ extra_lookup_kwargs = {}
+ model = self.get_model()
+ updated_field = self.get_updated_field()
+
+ update_field_msg = ("No updated date field found for '%s' "
+ "- not restricting by age.") % model.__name__
+
+ if start_date:
+ if updated_field:
+ extra_lookup_kwargs['%s__gte' % updated_field] = start_date
+ else:
+ warnings.warn(update_field_msg)
+
+ if end_date:
+ if updated_field:
+ extra_lookup_kwargs['%s__lte' % updated_field] = end_date
+ else:
+ warnings.warn(update_field_msg)
+
+ index_qs = None
+
+ if hasattr(self, 'get_queryset'):
+ warnings.warn("'SearchIndex.get_queryset' was deprecated in Haystack v2. Please rename the method 'index_queryset'.")
+ index_qs = self.get_queryset()
+ else:
+ index_qs = self.index_queryset()
+
+ if not hasattr(index_qs, 'filter'):
+ raise ImproperlyConfigured("The '%r' class must return a 'QuerySet' in the 'index_queryset' method." % self)
+
+ # `.select_related()` seems like a good idea here but can fail on
+ # nullable `ForeignKey` as well as what seems like other cases.
+ return index_qs.filter(**extra_lookup_kwargs).order_by(model._meta.pk.name)
+
def prepare(self, obj):
"""
Fetches and adds/alters data before indexing.
@@ -44,46 +44,12 @@ def worker(bits):
backend = haystack_connections[using].get_backend()
if func == 'do_update':
- qs = build_queryset(index, model, start_date=start_date, end_date=end_date, verbosity=verbosity)
+ qs = index.build_queryset(start_date=start_date, end_date=end_date)
do_update(backend, index, qs, start, end, total, verbosity=verbosity)
elif bits[0] == 'do_remove':
do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=verbosity)
-def build_queryset(index, model, start_date=None, end_date=None, verbosity=1):
- extra_lookup_kwargs = {}
- updated_field = index.get_updated_field()
-
- if start_date:
- if updated_field:
- extra_lookup_kwargs['%s__gte' % updated_field] = start_date
- else:
- if verbosity >= 2:
- print "No updated date field found for '%s' - not restricting by age." % model.__name__
-
- if end_date:
- if updated_field:
- extra_lookup_kwargs['%s__lte' % updated_field] = end_date
- else:
- if verbosity >= 2:
- print "No updated date field found for '%s' - not restricting by age." % model.__name__
-
- index_qs = None
-
- if hasattr(index, 'get_queryset'):
- warnings.warn("'SearchIndex.get_queryset' was deprecated in Haystack v2. Please rename the method 'index_queryset'.")
- index_qs = index.get_queryset()
- else:
- index_qs = index.index_queryset()
-
- if not hasattr(index_qs, 'filter'):
- raise ImproperlyConfigured("The '%r' class must return a 'QuerySet' in the 'index_queryset' method." % index)
-
- # `.select_related()` seems like a good idea here but can fail on
- # nullable `ForeignKey` as well as what seems like other cases.
- return index_qs.filter(**extra_lookup_kwargs).order_by(model._meta.pk.name)
-
-
def do_update(backend, index, qs, start, end, total, verbosity=1):
# Get a clone of the QuerySet so that the cache doesn't bloat up
# in memory. Useful when reindexing large amounts of data.
@@ -238,7 +204,7 @@ def handle_label(self, label, **options):
print "Skipping '%s' - no index." % model
continue
- qs = build_queryset(index, model, start_date=self.start_date, end_date=self.end_date, verbosity=self.verbosity)
+ qs = index.build_queryset(start_date=self.start_date, end_date=self.end_date)
total = qs.count()
if self.verbosity >= 1:
@@ -73,6 +73,9 @@ def index_queryset(self):
def read_queryset(self):
return MockModel.objects.filter(author__in=['daniel1', 'daniel3'])
+ def build_queryset(self, start_date=None, end_date=None):
+ return MockModel.objects.filter(author__in=['daniel1', 'daniel3'])
+
class GoodNullableMockSearchIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True)
@@ -220,6 +223,39 @@ def test_index_queryset(self):
def test_read_queryset(self):
self.assertEqual(len(self.cmi.read_queryset()), 2)
+ def test_build_queryset(self):
+ # The custom SearchIndex.build_queryset returns the same records as
+ # the read_queryset
+ self.assertEqual(len(self.cmi.build_queryset()), 2)
+
+ # Store a reference to the original method
+ old_guf = self.mi.__class__.get_updated_field
+
+ self.mi.__class__.get_updated_field = lambda self: 'pub_date'
+
+ # With an updated field, we should get have filtered results
+ sd = datetime.datetime(2009, 3, 17, 7, 0)
+ self.assertEqual(len(self.mi.build_queryset(start_date=sd)), 2)
+
+ ed = datetime.datetime(2009, 3, 17, 7, 59)
+ self.assertEqual(len(self.mi.build_queryset(end_date=ed)), 2)
+
+ sd = datetime.datetime(2009, 3, 17, 6, 0)
+ ed = datetime.datetime(2009, 3, 17, 6, 59)
+ self.assertEqual(len(self.mi.build_queryset(start_date=sd,
+ end_date=ed)), 1)
+
+ # Remove the updated field for the next test
+ del self.mi.__class__.get_updated_field
+
+ # The default should return all 3 even if we specify a start date
+ # because there is no updated field specified
+ self.assertEqual(len(self.mi.build_queryset(start_date=sd)), 3)
+
+ # Restore the original attribute
+ self.mi.__class__.get_updated_field = old_guf
+
+
def test_prepare(self):
mock = MockModel()
mock.pk = 20
View
@@ -1,7 +1,7 @@
# Haystack settings for running tests.
DATABASES = {
'default': {
- 'ENGINE': 'sqlite3',
+ 'ENGINE': 'django.db.backends.sqlite3',
'NAME': 'haystack_tests.db',
}
}

0 comments on commit ec1f917

Please sign in to comment.