Skip to content

Commit

Permalink
Moved the build_queryset method to SearchIndex.
Browse files Browse the repository at this point in the history
This method is used to build the queryset for indexing operations. It is copied
from the build_queryset function that lived in the update_index management
command.

Making this change allows developers to modify the queryset used for indexing
even when a date filter is necessary. See `tests/core/indexes.py` for tests.
  • Loading branch information
avidal authored and toastdriven committed Mar 16, 2012
1 parent 259274e commit ec1f917
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 37 deletions.
1 change: 1 addition & 0 deletions AUTHORS
Expand Up @@ -60,3 +60,4 @@ Thanks to
* A significant portion of the input types feature. * A significant portion of the input types feature.
* Aram Dulyan (Aramgutang) for fixing the included admin class to be Django 1.4 compatible. * Aram Dulyan (Aramgutang) for fixing the included admin class to be Django 1.4 compatible.
* Honza Kral (HonzaKral) for various Elasticsearch tweaks & testing. * Honza Kral (HonzaKral) for various Elasticsearch tweaks & testing.
* Alex Vidal (avidal) for a patch allowing developers to override the queryset used for update operations.
13 changes: 13 additions & 0 deletions docs/searchindex_api.rst
Expand Up @@ -402,6 +402,19 @@ Get the default QuerySet for read actions.
Subclasses can override this method to work with other managers. Subclasses can override this method to work with other managers.
Useful when working with default managers that filter some objects. Useful when working with default managers that filter some objects.


``build_queryset``
-------------------

.. method:: SearchIndex.build_queryset(self, start_date=None, end_date=None)

Get the default QuerySet to index when doing an index update.

Subclasses can override this method to take into account related
model modification times.

The default is to use ``SearchIndex.index_queryset`` and filter
based on ``SearchIndex.get_updated_field``

``prepare`` ``prepare``
----------- -----------


Expand Down
45 changes: 45 additions & 0 deletions haystack/indexes.py
@@ -1,6 +1,7 @@
import copy import copy
import threading import threading
import sys import sys
import warnings
from django.db.models import signals from django.db.models import signals
from django.utils.encoding import force_unicode from django.utils.encoding import force_unicode
from haystack import connections, connection_router from haystack import connections, connection_router
Expand Down Expand Up @@ -135,6 +136,50 @@ def read_queryset(self):
""" """
return self.index_queryset() return self.index_queryset()


def build_queryset(self, start_date=None, end_date=None):
"""
Get the default QuerySet to index when doing an index update.
Subclasses can override this method to take into account related
model modification times.
The default is to use ``SearchIndex.index_queryset`` and filter
based on ``SearchIndex.get_updated_field``
"""
extra_lookup_kwargs = {}
model = self.get_model()
updated_field = self.get_updated_field()

update_field_msg = ("No updated date field found for '%s' "
"- not restricting by age.") % model.__name__

if start_date:
if updated_field:
extra_lookup_kwargs['%s__gte' % updated_field] = start_date
else:
warnings.warn(update_field_msg)

if end_date:
if updated_field:
extra_lookup_kwargs['%s__lte' % updated_field] = end_date
else:
warnings.warn(update_field_msg)

index_qs = None

if hasattr(self, 'get_queryset'):
warnings.warn("'SearchIndex.get_queryset' was deprecated in Haystack v2. Please rename the method 'index_queryset'.")
index_qs = self.get_queryset()
else:
index_qs = self.index_queryset()

if not hasattr(index_qs, 'filter'):
raise ImproperlyConfigured("The '%r' class must return a 'QuerySet' in the 'index_queryset' method." % self)

# `.select_related()` seems like a good idea here but can fail on
# nullable `ForeignKey` as well as what seems like other cases.
return index_qs.filter(**extra_lookup_kwargs).order_by(model._meta.pk.name)

def prepare(self, obj): def prepare(self, obj):
""" """
Fetches and adds/alters data before indexing. Fetches and adds/alters data before indexing.
Expand Down
38 changes: 2 additions & 36 deletions haystack/management/commands/update_index.py
Expand Up @@ -44,46 +44,12 @@ def worker(bits):
backend = haystack_connections[using].get_backend() backend = haystack_connections[using].get_backend()


if func == 'do_update': if func == 'do_update':
qs = build_queryset(index, model, start_date=start_date, end_date=end_date, verbosity=verbosity) qs = index.build_queryset(start_date=start_date, end_date=end_date)
do_update(backend, index, qs, start, end, total, verbosity=verbosity) do_update(backend, index, qs, start, end, total, verbosity=verbosity)
elif bits[0] == 'do_remove': elif bits[0] == 'do_remove':
do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=verbosity) do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=verbosity)




def build_queryset(index, model, start_date=None, end_date=None, verbosity=1):
extra_lookup_kwargs = {}
updated_field = index.get_updated_field()

if start_date:
if updated_field:
extra_lookup_kwargs['%s__gte' % updated_field] = start_date
else:
if verbosity >= 2:
print "No updated date field found for '%s' - not restricting by age." % model.__name__

if end_date:
if updated_field:
extra_lookup_kwargs['%s__lte' % updated_field] = end_date
else:
if verbosity >= 2:
print "No updated date field found for '%s' - not restricting by age." % model.__name__

index_qs = None

if hasattr(index, 'get_queryset'):
warnings.warn("'SearchIndex.get_queryset' was deprecated in Haystack v2. Please rename the method 'index_queryset'.")
index_qs = index.get_queryset()
else:
index_qs = index.index_queryset()

if not hasattr(index_qs, 'filter'):
raise ImproperlyConfigured("The '%r' class must return a 'QuerySet' in the 'index_queryset' method." % index)

# `.select_related()` seems like a good idea here but can fail on
# nullable `ForeignKey` as well as what seems like other cases.
return index_qs.filter(**extra_lookup_kwargs).order_by(model._meta.pk.name)


def do_update(backend, index, qs, start, end, total, verbosity=1): def do_update(backend, index, qs, start, end, total, verbosity=1):
# Get a clone of the QuerySet so that the cache doesn't bloat up # Get a clone of the QuerySet so that the cache doesn't bloat up
# in memory. Useful when reindexing large amounts of data. # in memory. Useful when reindexing large amounts of data.
Expand Down Expand Up @@ -238,7 +204,7 @@ def handle_label(self, label, **options):
print "Skipping '%s' - no index." % model print "Skipping '%s' - no index." % model
continue continue


qs = build_queryset(index, model, start_date=self.start_date, end_date=self.end_date, verbosity=self.verbosity) qs = index.build_queryset(start_date=self.start_date, end_date=self.end_date)
total = qs.count() total = qs.count()


if self.verbosity >= 1: if self.verbosity >= 1:
Expand Down
36 changes: 36 additions & 0 deletions tests/core/tests/indexes.py
Expand Up @@ -73,6 +73,9 @@ def index_queryset(self):
def read_queryset(self): def read_queryset(self):
return MockModel.objects.filter(author__in=['daniel1', 'daniel3']) return MockModel.objects.filter(author__in=['daniel1', 'daniel3'])


def build_queryset(self, start_date=None, end_date=None):
return MockModel.objects.filter(author__in=['daniel1', 'daniel3'])



class GoodNullableMockSearchIndex(indexes.SearchIndex, indexes.Indexable): class GoodNullableMockSearchIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True) text = indexes.CharField(document=True, use_template=True)
Expand Down Expand Up @@ -220,6 +223,39 @@ def test_index_queryset(self):
def test_read_queryset(self): def test_read_queryset(self):
self.assertEqual(len(self.cmi.read_queryset()), 2) self.assertEqual(len(self.cmi.read_queryset()), 2)


def test_build_queryset(self):
# The custom SearchIndex.build_queryset returns the same records as
# the read_queryset
self.assertEqual(len(self.cmi.build_queryset()), 2)

# Store a reference to the original method
old_guf = self.mi.__class__.get_updated_field

self.mi.__class__.get_updated_field = lambda self: 'pub_date'

# With an updated field, we should get have filtered results
sd = datetime.datetime(2009, 3, 17, 7, 0)
self.assertEqual(len(self.mi.build_queryset(start_date=sd)), 2)

ed = datetime.datetime(2009, 3, 17, 7, 59)
self.assertEqual(len(self.mi.build_queryset(end_date=ed)), 2)

sd = datetime.datetime(2009, 3, 17, 6, 0)
ed = datetime.datetime(2009, 3, 17, 6, 59)
self.assertEqual(len(self.mi.build_queryset(start_date=sd,
end_date=ed)), 1)

# Remove the updated field for the next test
del self.mi.__class__.get_updated_field

# The default should return all 3 even if we specify a start date
# because there is no updated field specified
self.assertEqual(len(self.mi.build_queryset(start_date=sd)), 3)

# Restore the original attribute
self.mi.__class__.get_updated_field = old_guf


def test_prepare(self): def test_prepare(self):
mock = MockModel() mock = MockModel()
mock.pk = 20 mock.pk = 20
Expand Down
2 changes: 1 addition & 1 deletion tests/settings.py
@@ -1,7 +1,7 @@
# Haystack settings for running tests. # Haystack settings for running tests.
DATABASES = { DATABASES = {
'default': { 'default': {
'ENGINE': 'sqlite3', 'ENGINE': 'django.db.backends.sqlite3',
'NAME': 'haystack_tests.db', 'NAME': 'haystack_tests.db',
} }
} }
Expand Down

0 comments on commit ec1f917

Please sign in to comment.