Skip to content

Commit

Permalink
Merge pull request #1090 from andrewschoen/feature/no-commit-flag
Browse files Browse the repository at this point in the history
Adds a --nocommit arg to the update_index, clear_index and rebuild_index management command.
  • Loading branch information
acdha committed Jan 14, 2015
2 parents 1ae83f2 + 6e863c2 commit 3249597
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 16 deletions.
20 changes: 20 additions & 0 deletions docs/management_commands.rst
Expand Up @@ -23,10 +23,18 @@ following arguments::
``--using``:
If provided, determines which connection should be used. Default is
``default``.
``--nocommit``:
If provided, it will pass commit=False to the backend. This means that the
update will not become immediately visible and will depend on another explicit commit
or the backend's commit strategy to complete the update.

By default, this is an **INTERACTIVE** command and assumes that you do **NOT**
wish to delete the entire index.

.. note::

The ``--nocommit`` argument is only supported by the Solr backend.

.. warning::

Depending on the backend you're using, this may simply delete the entire
Expand Down Expand Up @@ -80,6 +88,14 @@ arguments::
``--using``:
If provided, determines which connection should be used. Default is
``default``.
``--nocommit``:
If provided, it will pass commit=False to the backend. This means that the
updates will not become immediately visible and will depend on another explicit commit
or the backend's commit strategy to complete the update.

.. note::

The ``--nocommit`` argument is only supported by the Solr and Elasticsearch backends.

Examples::

Expand Down Expand Up @@ -147,6 +163,10 @@ of the arguments of the following arguments::
``--using``:
If provided, determines which connection should be used. Default is
``default``.
``--nocommit``:
If provided, it will pass commit=False to the backend. This means that the
update will not become immediately visible and will depend on another explicit commit
or the backend's commit strategy to complete the update.

For when you really, really want a completely rebuilt index.

Expand Down
6 changes: 5 additions & 1 deletion haystack/management/commands/clear_index.py
Expand Up @@ -18,13 +18,17 @@ class Command(BaseCommand):
help='Update only the named backend (can be used multiple times). '
'By default all backends will be updated.'
),
make_option('--nocommit', action='store_false', dest='commit',
default=True, help='Will pass commit=False to the backend.'
),
)
option_list = BaseCommand.option_list + base_options

def handle(self, **options):
"""Clears out the search index completely."""
from haystack import connections
self.verbosity = int(options.get('verbosity', 1))
self.commit = options.get('commit', True)

using = options.get('using')
if not using:
Expand All @@ -47,7 +51,7 @@ def handle(self, **options):

for backend_name in using:
backend = connections[backend_name].get_backend()
backend.clear()
backend.clear(commit=self.commit)

if self.verbosity >= 1:
print("All documents removed.")
28 changes: 16 additions & 12 deletions haystack/management/commands/update_index.py
Expand Up @@ -56,9 +56,9 @@ def worker(bits):
pass

if bits[0] == 'do_update':
func, model, start, end, total, using, start_date, end_date, verbosity = bits
func, model, start, end, total, using, start_date, end_date, verbosity, commit = bits
elif bits[0] == 'do_remove':
func, model, pks_seen, start, upper_bound, using, verbosity = bits
func, model, pks_seen, start, upper_bound, using, verbosity, commit = bits
else:
return

Expand All @@ -68,12 +68,12 @@ def worker(bits):

if func == 'do_update':
qs = index.build_queryset(start_date=start_date, end_date=end_date)
do_update(backend, index, qs, start, end, total, verbosity=verbosity)
do_update(backend, index, qs, start, end, total, verbosity=verbosity, commit=commit)
elif bits[0] == 'do_remove':
do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=verbosity)
do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=verbosity, commit=commit)


def do_update(backend, index, qs, start, end, total, verbosity=1):
def do_update(backend, index, qs, start, end, total, verbosity=1, commit=True):
# Get a clone of the QuerySet so that the cache doesn't bloat up
# in memory. Useful when reindexing large amounts of data.
small_cache_qs = qs.all()
Expand All @@ -86,13 +86,13 @@ def do_update(backend, index, qs, start, end, total, verbosity=1):
print(" indexed %s - %d of %d (by %s)." % (start + 1, end, total, os.getpid()))

# FIXME: Get the right backend.
backend.update(index, current_qs)
backend.update(index, current_qs, commit=commit)

# Clear out the DB connections queries because it bloats up RAM.
reset_queries()


def do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=1):
def do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=1, commit=True):
# Fetch a list of results.
# Can't do pk range, because id's are strings (thanks comments
# & UUIDs!).
Expand All @@ -106,7 +106,7 @@ def do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=1):
if verbosity >= 2:
print(" removing %s." % result.pk)

backend.remove(".".join([result.app_label, result.model_name, str(result.pk)]))
backend.remove(".".join([result.app_label, result.model_name, str(result.pk)]), commit=commit)


class Command(LabelCommand):
Expand Down Expand Up @@ -140,6 +140,9 @@ class Command(LabelCommand):
default=0, type='int',
help='Allows for the use multiple workers to parallelize indexing. Requires multiprocessing.'
),
make_option('--nocommit', action='store_false', dest='commit',
default=True, help='Will pass commit=False to the backend.'
),
)
option_list = LabelCommand.option_list + base_options

Expand All @@ -150,6 +153,7 @@ def handle(self, *items, **options):
self.end_date = None
self.remove = options.get('remove', False)
self.workers = int(options.get('workers', 0))
self.commit = options.get('commit', True)

self.backends = options.get('using')
if not self.backends:
Expand Down Expand Up @@ -231,9 +235,9 @@ def update_backend(self, label, using):
end = min(start + batch_size, total)

if self.workers == 0:
do_update(backend, index, qs, start, end, total, self.verbosity)
do_update(backend, index, qs, start, end, total, verbosity=self.verbosity, commit=self.commit)
else:
ghetto_queue.append(('do_update', model, start, end, total, using, self.start_date, self.end_date, self.verbosity))
ghetto_queue.append(('do_update', model, start, end, total, using, self.start_date, self.end_date, self.verbosity, self.commit))

if self.workers > 0:
pool = multiprocessing.Pool(self.workers)
Expand All @@ -258,9 +262,9 @@ def update_backend(self, label, using):
upper_bound = start + batch_size

if self.workers == 0:
do_remove(backend, index, model, pks_seen, start, upper_bound)
do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=self.verbosity, commit=self.commit)
else:
ghetto_queue.append(('do_remove', model, pks_seen, start, upper_bound, using, self.verbosity))
ghetto_queue.append(('do_remove', model, pks_seen, start, upper_bound, using, self.verbosity, self.commit))

if self.workers > 0:
pool = multiprocessing.Pool(self.workers)
Expand Down
28 changes: 25 additions & 3 deletions test_haystack/solr_tests/test_management_commands.py
Expand Up @@ -59,13 +59,22 @@ def test_basic_commands(self):
call_command('clear_index', interactive=False, verbosity=0)
self.assertEqual(self.solr.search('*:*').hits, 0)

call_command('update_index', verbosity=0, commit=False)
self.assertEqual(self.solr.search('*:*').hits, 0)

call_command('update_index', verbosity=0)
self.assertEqual(self.solr.search('*:*').hits, 23)

call_command('clear_index', interactive=False, verbosity=0)
self.assertEqual(self.solr.search('*:*').hits, 0)

call_command('rebuild_index', interactive=False, verbosity=0)
call_command('rebuild_index', interactive=False, verbosity=0, commit=False)
self.assertEqual(self.solr.search('*:*').hits, 0)

call_command('rebuild_index', interactive=False, verbosity=0, commit=True)
self.assertEqual(self.solr.search('*:*').hits, 23)

call_command('clear_index', interactive=False, verbosity=0, commit=False)
self.assertEqual(self.solr.search('*:*').hits, 23)

def test_remove(self):
Expand All @@ -83,6 +92,12 @@ def test_remove(self):
call_command('update_index', verbosity=0)
self.assertEqual(self.solr.search('*:*').hits, 23)

call_command('update_index', remove=True, verbosity=0, commit=False)
self.assertEqual(self.solr.search('*:*').hits, 23)

call_command('update_index', remove=True, verbosity=0, workers=2, commit=False)
self.assertEqual(self.solr.search('*:*').hits, 23)

# With the remove flag, it's gone.
call_command('update_index', remove=True, verbosity=0)
self.assertEqual(self.solr.search('*:*').hits, 22)
Expand Down Expand Up @@ -145,13 +160,20 @@ def test_multiprocessing(self):
call_command('update_index', verbosity=2, workers=2, batchsize=5)
self.assertEqual(self.solr.search('*:*').hits, 23)

call_command('clear_index', interactive=False, verbosity=0)
self.assertEqual(self.solr.search('*:*').hits, 0)

call_command('update_index', verbosity=2, workers=2, batchsize=5, commit=False)
self.assertEqual(self.solr.search('*:*').hits, 0)

def test_build_schema_wrong_backend(self):

settings.HAYSTACK_CONNECTIONS['whoosh'] = {'ENGINE': 'haystack.backends.whoosh_backend.WhooshEngine',
'PATH': mkdtemp(prefix='dummy-path-'),}
'PATH': mkdtemp(prefix='dummy-path-'), }

connections['whoosh']._index = self.ui
self.assertRaises(ImproperlyConfigured, call_command, 'build_solr_schema',using='whoosh', interactive=False)
self.assertRaises(ImproperlyConfigured, call_command, 'build_solr_schema', using='whoosh', interactive=False)


class AppModelManagementCommandTestCase(TestCase):
fixtures = ['bulk_data.json']
Expand Down

0 comments on commit 3249597

Please sign in to comment.