From 27f4bbd1aeba42d2a4b6179f56d8d0230dfa6bff Mon Sep 17 00:00:00 2001 From: Andrew Schoen Date: Tue, 21 Oct 2014 11:07:25 -0500 Subject: [PATCH] Adds a --nocommit arg to the update_index, clear_index and rebuild_index management commands. --- docs/management_commands.rst | 12 ++++++++ haystack/backends/solr_backend.py | 5 ++-- haystack/management/commands/clear_index.py | 6 +++- haystack/management/commands/update_index.py | 28 +++++++++++-------- .../solr_tests/test_management_commands.py | 19 +++++++++++-- 5 files changed, 52 insertions(+), 18 deletions(-) diff --git a/docs/management_commands.rst b/docs/management_commands.rst index bf9c9ffb9..767612a67 100644 --- a/docs/management_commands.rst +++ b/docs/management_commands.rst @@ -23,6 +23,10 @@ following arguments:: ``--using``: If provided, determines which connection should be used. Default is ``default``. + ``--nocommit``: + If provided, it will pass commit=False to the backend. This means that the + update will not become immediately visible and will depend on another explicit commit + or the backend's commit strategy to complete the update. Only supported by the Solr backend. By default, this is an **INTERACTIVE** command and assumes that you do **NOT** wish to delete the entire index. @@ -80,6 +84,10 @@ arguments:: ``--using``: If provided, determines which connection should be used. Default is ``default``. + ``--nocommit``: + If provided, it will pass commit=False to the backend. This means that the + updates will not become immediately visible and will depend on another explicit commit + or the backend's commit strategy to complete the update. Examples:: @@ -147,6 +155,10 @@ of the arguments of the following arguments:: ``--using``: If provided, determines which connection should be used. Default is ``default``. + ``--nocommit``: + If provided, it will pass commit=False to the backend. This means that the + update will not become immediately visible and will depend on another explicit commit + or the backend's commit strategy to complete the update. For when you really, really want a completely rebuilt index. diff --git a/haystack/backends/solr_backend.py b/haystack/backends/solr_backend.py index 777e35fd2..d1a05b063 100644 --- a/haystack/backends/solr_backend.py +++ b/haystack/backends/solr_backend.py @@ -103,8 +103,9 @@ def clear(self, models=[], commit=True): self.conn.delete(q=" OR ".join(models_to_delete), commit=commit) - # Run an optimize post-clear. http://wiki.apache.org/solr/FAQ#head-9aafb5d8dff5308e8ea4fcf4b71f19f029c4bb99 - self.conn.optimize() + if commit: + # Run an optimize post-clear. http://wiki.apache.org/solr/FAQ#head-9aafb5d8dff5308e8ea4fcf4b71f19f029c4bb99 + self.conn.optimize() except (IOError, SolrError) as e: if not self.silently_fail: raise diff --git a/haystack/management/commands/clear_index.py b/haystack/management/commands/clear_index.py index 93b5f91b5..3c09f78cf 100644 --- a/haystack/management/commands/clear_index.py +++ b/haystack/management/commands/clear_index.py @@ -18,6 +18,9 @@ class Command(BaseCommand): help='Update only the named backend (can be used multiple times). ' 'By default all backends will be updated.' ), + make_option('--nocommit', action='store_false', dest='commit', + default=True, help='Will pass commit=False to the backend.' + ), ) option_list = BaseCommand.option_list + base_options @@ -25,6 +28,7 @@ def handle(self, **options): """Clears out the search index completely.""" from haystack import connections self.verbosity = int(options.get('verbosity', 1)) + self.commit = options.get('commit', True) using = options.get('using') if not using: @@ -47,7 +51,7 @@ def handle(self, **options): for backend_name in using: backend = connections[backend_name].get_backend() - backend.clear() + backend.clear(commit=self.commit) if self.verbosity >= 1: print("All documents removed.") diff --git a/haystack/management/commands/update_index.py b/haystack/management/commands/update_index.py index 4281e185b..91d4f3cbe 100755 --- a/haystack/management/commands/update_index.py +++ b/haystack/management/commands/update_index.py @@ -56,9 +56,9 @@ def worker(bits): pass if bits[0] == 'do_update': - func, model, start, end, total, using, start_date, end_date, verbosity = bits + func, model, start, end, total, using, start_date, end_date, verbosity, commit = bits elif bits[0] == 'do_remove': - func, model, pks_seen, start, upper_bound, using, verbosity = bits + func, model, pks_seen, start, upper_bound, using, verbosity, commit = bits else: return @@ -68,12 +68,12 @@ def worker(bits): if func == 'do_update': qs = index.build_queryset(start_date=start_date, end_date=end_date) - do_update(backend, index, qs, start, end, total, verbosity=verbosity) + do_update(backend, index, qs, start, end, total, verbosity=verbosity, commit=commit) elif bits[0] == 'do_remove': - do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=verbosity) + do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=verbosity, commit=commit) -def do_update(backend, index, qs, start, end, total, verbosity=1): +def do_update(backend, index, qs, start, end, total, verbosity=1, commit=True): # Get a clone of the QuerySet so that the cache doesn't bloat up # in memory. Useful when reindexing large amounts of data. small_cache_qs = qs.all() @@ -86,13 +86,13 @@ def do_update(backend, index, qs, start, end, total, verbosity=1): print(" indexed %s - %d of %d (by %s)." % (start + 1, end, total, os.getpid())) # FIXME: Get the right backend. - backend.update(index, current_qs) + backend.update(index, current_qs, commit=commit) # Clear out the DB connections queries because it bloats up RAM. reset_queries() -def do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=1): +def do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=1, commit=True): # Fetch a list of results. # Can't do pk range, because id's are strings (thanks comments # & UUIDs!). @@ -106,7 +106,7 @@ def do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=1): if verbosity >= 2: print(" removing %s." % result.pk) - backend.remove(".".join([result.app_label, result.model_name, str(result.pk)])) + backend.remove(".".join([result.app_label, result.model_name, str(result.pk)]), commit=commit) class Command(LabelCommand): @@ -140,6 +140,9 @@ class Command(LabelCommand): default=0, type='int', help='Allows for the use multiple workers to parallelize indexing. Requires multiprocessing.' ), + make_option('--nocommit', action='store_false', dest='commit', + default=True, help='Will pass commit=False to the backend.' + ), ) option_list = LabelCommand.option_list + base_options @@ -150,6 +153,7 @@ def handle(self, *items, **options): self.end_date = None self.remove = options.get('remove', False) self.workers = int(options.get('workers', 0)) + self.commit = options.get('commit', True) self.backends = options.get('using') if not self.backends: @@ -231,9 +235,9 @@ def update_backend(self, label, using): end = min(start + batch_size, total) if self.workers == 0: - do_update(backend, index, qs, start, end, total, self.verbosity) + do_update(backend, index, qs, start, end, total, self.verbosity, self.commit) else: - ghetto_queue.append(('do_update', model, start, end, total, using, self.start_date, self.end_date, self.verbosity)) + ghetto_queue.append(('do_update', model, start, end, total, using, self.start_date, self.end_date, self.verbosity, self.commit)) if self.workers > 0: pool = multiprocessing.Pool(self.workers) @@ -258,9 +262,9 @@ def update_backend(self, label, using): upper_bound = start + batch_size if self.workers == 0: - do_remove(backend, index, model, pks_seen, start, upper_bound) + do_remove(backend, index, model, pks_seen, start, upper_bound, self.verbosity, self.commit) else: - ghetto_queue.append(('do_remove', model, pks_seen, start, upper_bound, using, self.verbosity)) + ghetto_queue.append(('do_remove', model, pks_seen, start, upper_bound, using, self.verbosity, self.commit)) if self.workers > 0: pool = multiprocessing.Pool(self.workers) diff --git a/test_haystack/solr_tests/test_management_commands.py b/test_haystack/solr_tests/test_management_commands.py index dce03e6bb..5dc4d0c6f 100644 --- a/test_haystack/solr_tests/test_management_commands.py +++ b/test_haystack/solr_tests/test_management_commands.py @@ -59,13 +59,22 @@ def test_basic_commands(self): call_command('clear_index', interactive=False, verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 0) + call_command('update_index', verbosity=0, commit=False) + self.assertEqual(self.solr.search('*:*').hits, 0) + call_command('update_index', verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 23) call_command('clear_index', interactive=False, verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 0) - call_command('rebuild_index', interactive=False, verbosity=0) + call_command('rebuild_index', interactive=False, verbosity=0, commit=False) + self.assertEqual(self.solr.search('*:*').hits, 0) + + call_command('rebuild_index', interactive=False, verbosity=0, commit=True) + self.assertEqual(self.solr.search('*:*').hits, 23) + + call_command('clear_index', interactive=False, verbosity=0, commit=False) self.assertEqual(self.solr.search('*:*').hits, 23) def test_remove(self): @@ -83,6 +92,9 @@ def test_remove(self): call_command('update_index', verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 23) + call_command('update_index', remove=True, verbosity=0, commit=False) + self.assertEqual(self.solr.search('*:*').hits, 23) + # With the remove flag, it's gone. call_command('update_index', remove=True, verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 22) @@ -148,10 +160,11 @@ def test_multiprocessing(self): def test_build_schema_wrong_backend(self): settings.HAYSTACK_CONNECTIONS['whoosh'] = {'ENGINE': 'haystack.backends.whoosh_backend.WhooshEngine', - 'PATH': mkdtemp(prefix='dummy-path-'),} + 'PATH': mkdtemp(prefix='dummy-path-'), } connections['whoosh']._index = self.ui - self.assertRaises(ImproperlyConfigured, call_command, 'build_solr_schema',using='whoosh', interactive=False) + self.assertRaises(ImproperlyConfigured, call_command, 'build_solr_schema', using='whoosh', interactive=False) + class AppModelManagementCommandTestCase(TestCase): fixtures = ['bulk_data.json']