Skip to content

Commit

Permalink
Adds a --nocommit arg to the update_index, clear_index and rebuild_index
Browse files Browse the repository at this point in the history
management commands.
  • Loading branch information
andrewschoen committed Oct 28, 2014
1 parent 9462199 commit c5abef6
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 18 deletions.
20 changes: 20 additions & 0 deletions docs/management_commands.rst
Expand Up @@ -23,10 +23,18 @@ following arguments::
``--using``:
If provided, determines which connection should be used. Default is
``default``.
``--nocommit``:
If provided, it will pass commit=False to the backend. This means that the
update will not become immediately visible and will depend on another explicit commit
or the backend's commit strategy to complete the update.

By default, this is an **INTERACTIVE** command and assumes that you do **NOT**
wish to delete the entire index.

.. note::

The ``--nocommit`` argument is only supported by the Solr backend.

.. warning::

Depending on the backend you're using, this may simply delete the entire
Expand Down Expand Up @@ -80,6 +88,14 @@ arguments::
``--using``:
If provided, determines which connection should be used. Default is
``default``.
``--nocommit``:
If provided, it will pass commit=False to the backend. This means that the
updates will not become immediately visible and will depend on another explicit commit
or the backend's commit strategy to complete the update.

.. note::

The ``--nocommit`` argument is only supported by the Solr and Elasticsearch backends.

Examples::

Expand Down Expand Up @@ -147,6 +163,10 @@ of the arguments of the following arguments::
``--using``:
If provided, determines which connection should be used. Default is
``default``.
``--nocommit``:
If provided, it will pass commit=False to the backend. This means that the
update will not become immediately visible and will depend on another explicit commit
or the backend's commit strategy to complete the update.

For when you really, really want a completely rebuilt index.

Expand Down
5 changes: 3 additions & 2 deletions haystack/backends/solr_backend.py
Expand Up @@ -103,8 +103,9 @@ def clear(self, models=[], commit=True):

self.conn.delete(q=" OR ".join(models_to_delete), commit=commit)

# Run an optimize post-clear. http://wiki.apache.org/solr/FAQ#head-9aafb5d8dff5308e8ea4fcf4b71f19f029c4bb99
self.conn.optimize()
if commit:
# Run an optimize post-clear. http://wiki.apache.org/solr/FAQ#head-9aafb5d8dff5308e8ea4fcf4b71f19f029c4bb99
self.conn.optimize()
except (IOError, SolrError) as e:
if not self.silently_fail:
raise
Expand Down
6 changes: 5 additions & 1 deletion haystack/management/commands/clear_index.py
Expand Up @@ -18,13 +18,17 @@ class Command(BaseCommand):
help='Update only the named backend (can be used multiple times). '
'By default all backends will be updated.'
),
make_option('--nocommit', action='store_false', dest='commit',
default=True, help='Will pass commit=False to the backend.'
),
)
option_list = BaseCommand.option_list + base_options

def handle(self, **options):
"""Clears out the search index completely."""
from haystack import connections
self.verbosity = int(options.get('verbosity', 1))
self.commit = options.get('commit', True)

using = options.get('using')
if not using:
Expand All @@ -47,7 +51,7 @@ def handle(self, **options):

for backend_name in using:
backend = connections[backend_name].get_backend()
backend.clear()
backend.clear(commit=self.commit)

if self.verbosity >= 1:
print("All documents removed.")
28 changes: 16 additions & 12 deletions haystack/management/commands/update_index.py
Expand Up @@ -56,9 +56,9 @@ def worker(bits):
pass

if bits[0] == 'do_update':
func, model, start, end, total, using, start_date, end_date, verbosity = bits
func, model, start, end, total, using, start_date, end_date, verbosity, commit = bits
elif bits[0] == 'do_remove':
func, model, pks_seen, start, upper_bound, using, verbosity = bits
func, model, pks_seen, start, upper_bound, using, verbosity, commit = bits
else:
return

Expand All @@ -68,12 +68,12 @@ def worker(bits):

if func == 'do_update':
qs = index.build_queryset(start_date=start_date, end_date=end_date)
do_update(backend, index, qs, start, end, total, verbosity=verbosity)
do_update(backend, index, qs, start, end, total, verbosity=verbosity, commit=commit)
elif bits[0] == 'do_remove':
do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=verbosity)
do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=verbosity, commit=commit)


def do_update(backend, index, qs, start, end, total, verbosity=1):
def do_update(backend, index, qs, start, end, total, verbosity=1, commit=True):
# Get a clone of the QuerySet so that the cache doesn't bloat up
# in memory. Useful when reindexing large amounts of data.
small_cache_qs = qs.all()
Expand All @@ -86,13 +86,13 @@ def do_update(backend, index, qs, start, end, total, verbosity=1):
print(" indexed %s - %d of %d (by %s)." % (start + 1, end, total, os.getpid()))

# FIXME: Get the right backend.
backend.update(index, current_qs)
backend.update(index, current_qs, commit=commit)

# Clear out the DB connections queries because it bloats up RAM.
reset_queries()


def do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=1):
def do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=1, commit=True):
# Fetch a list of results.
# Can't do pk range, because id's are strings (thanks comments
# & UUIDs!).
Expand All @@ -106,7 +106,7 @@ def do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=1):
if verbosity >= 2:
print(" removing %s." % result.pk)

backend.remove(".".join([result.app_label, result.model_name, str(result.pk)]))
backend.remove(".".join([result.app_label, result.model_name, str(result.pk)]), commit=commit)


class Command(LabelCommand):
Expand Down Expand Up @@ -140,6 +140,9 @@ class Command(LabelCommand):
default=0, type='int',
help='Allows for the use multiple workers to parallelize indexing. Requires multiprocessing.'
),
make_option('--nocommit', action='store_false', dest='commit',
default=True, help='Will pass commit=False to the backend.'
),
)
option_list = LabelCommand.option_list + base_options

Expand All @@ -150,6 +153,7 @@ def handle(self, *items, **options):
self.end_date = None
self.remove = options.get('remove', False)
self.workers = int(options.get('workers', 0))
self.commit = options.get('commit', True)

self.backends = options.get('using')
if not self.backends:
Expand Down Expand Up @@ -231,9 +235,9 @@ def update_backend(self, label, using):
end = min(start + batch_size, total)

if self.workers == 0:
do_update(backend, index, qs, start, end, total, self.verbosity)
do_update(backend, index, qs, start, end, total, verbosity=self.verbosity, commit=self.commit)
else:
ghetto_queue.append(('do_update', model, start, end, total, using, self.start_date, self.end_date, self.verbosity))
ghetto_queue.append(('do_update', model, start, end, total, using, self.start_date, self.end_date, self.verbosity, self.commit))

if self.workers > 0:
pool = multiprocessing.Pool(self.workers)
Expand All @@ -258,9 +262,9 @@ def update_backend(self, label, using):
upper_bound = start + batch_size

if self.workers == 0:
do_remove(backend, index, model, pks_seen, start, upper_bound)
do_remove(backend, index, model, pks_seen, start, upper_bound, self.verbosity, self.commit)
else:
ghetto_queue.append(('do_remove', model, pks_seen, start, upper_bound, using, self.verbosity))
ghetto_queue.append(('do_remove', model, pks_seen, start, upper_bound, using, self.verbosity, self.commit))

if self.workers > 0:
pool = multiprocessing.Pool(self.workers)
Expand Down
28 changes: 25 additions & 3 deletions test_haystack/solr_tests/test_management_commands.py
Expand Up @@ -59,13 +59,22 @@ def test_basic_commands(self):
call_command('clear_index', interactive=False, verbosity=0)
self.assertEqual(self.solr.search('*:*').hits, 0)

call_command('update_index', verbosity=0, commit=False)
self.assertEqual(self.solr.search('*:*').hits, 0)

call_command('update_index', verbosity=0)
self.assertEqual(self.solr.search('*:*').hits, 23)

call_command('clear_index', interactive=False, verbosity=0)
self.assertEqual(self.solr.search('*:*').hits, 0)

call_command('rebuild_index', interactive=False, verbosity=0)
call_command('rebuild_index', interactive=False, verbosity=0, commit=False)
self.assertEqual(self.solr.search('*:*').hits, 0)

call_command('rebuild_index', interactive=False, verbosity=0, commit=True)
self.assertEqual(self.solr.search('*:*').hits, 23)

call_command('clear_index', interactive=False, verbosity=0, commit=False)
self.assertEqual(self.solr.search('*:*').hits, 23)

def test_remove(self):
Expand All @@ -83,6 +92,12 @@ def test_remove(self):
call_command('update_index', verbosity=0)
self.assertEqual(self.solr.search('*:*').hits, 23)

call_command('update_index', remove=True, verbosity=0, commit=False)
self.assertEqual(self.solr.search('*:*').hits, 23)

call_command('update_index', remove=True, verbosity=0, workers=2, commit=False)
self.assertEqual(self.solr.search('*:*').hits, 23)

# With the remove flag, it's gone.
call_command('update_index', remove=True, verbosity=0)
self.assertEqual(self.solr.search('*:*').hits, 22)
Expand Down Expand Up @@ -145,13 +160,20 @@ def test_multiprocessing(self):
call_command('update_index', verbosity=2, workers=2, batchsize=5)
self.assertEqual(self.solr.search('*:*').hits, 23)

call_command('clear_index', interactive=False, verbosity=0)
self.assertEqual(self.solr.search('*:*').hits, 0)

call_command('update_index', verbosity=2, workers=2, batchsize=5, commit=False)
self.assertEqual(self.solr.search('*:*').hits, 0)

def test_build_schema_wrong_backend(self):

settings.HAYSTACK_CONNECTIONS['whoosh'] = {'ENGINE': 'haystack.backends.whoosh_backend.WhooshEngine',
'PATH': mkdtemp(prefix='dummy-path-'),}
'PATH': mkdtemp(prefix='dummy-path-'), }

connections['whoosh']._index = self.ui
self.assertRaises(ImproperlyConfigured, call_command, 'build_solr_schema',using='whoosh', interactive=False)
self.assertRaises(ImproperlyConfigured, call_command, 'build_solr_schema', using='whoosh', interactive=False)


class AppModelManagementCommandTestCase(TestCase):
fixtures = ['bulk_data.json']
Expand Down

0 comments on commit c5abef6

Please sign in to comment.