From ca4e00907e531d6949c1a400f0b445b09a7dd2dc Mon Sep 17 00:00:00 2001 From: Nuno Maduro Date: Thu, 8 Nov 2018 15:46:35 +0100 Subject: [PATCH 1/4] Adds replace_all_objects --- algoliasearch/index.py | 42 ++++++++++++++++++++++++++++++++++-- tests/test_index.py | 49 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 88 insertions(+), 3 deletions(-) diff --git a/algoliasearch/index.py b/algoliasearch/index.py index b391b58e6..d92e83209 100644 --- a/algoliasearch/index.py +++ b/algoliasearch/index.py @@ -23,6 +23,8 @@ """ import time +import random +import string try: from urllib import urlencode @@ -205,6 +207,42 @@ def partial_update_objects(self, objects, no_create=False, request_options=None) }) return self.batch(requests, no_create=no_create, request_options=request_options) + def replace_all_objects(self, objects, request_options=None): + """ + Push a new set of objects and remove all previous objects. + Settings, synonyms and query rules are untouched. + Replace all records in an index without any downtime. + + @param objects contains an array of objects to push (each object + must contains a objectID attribute) + """ + + safe = False + if request_options is not None and 'safe' in request_options.parameters: + safe = request_options.parameters['safe'] + request_options.parameters.pop('safe', None) + + letters = string.ascii_letters + random_string = ''.join(random.choice(letters) for i in range(10)) + tmp_index_name = self.index_name + '_tmp_' + random_string + + tmp_index = self.client.init_index(tmp_index_name) + + copy_response = self.client.copy_index(self.index_name, tmp_index_name, scope=['settings', 'synonyms', 'rules']) + + if safe: + self.wait_task(copy_response['taskID']) + + batch_response = tmp_index.save_objects(objects, request_options) + if safe: + tmp_index.wait_task(batch_response['taskID']) + + move_response = self.client.move_index(tmp_index_name, self.index_name); + if safe: + self.wait_task(move_response['taskID']) + + return [copy_response, batch_response, move_response] + @deprecated def saveObject(self, obj): return self.save_object(obj) @@ -699,7 +737,7 @@ def iter_synonyms(self, hits_per_page=1000, request_options=None): def iter_rules(self, hits_per_page=1000, request_options=None): page = 0 response = self.search_rules( - '', page=page, + '', page=page, hitsPerPage=hits_per_page, request_options=request_options ) @@ -712,7 +750,7 @@ def iter_rules(self, hits_per_page=1000, request_options=None): page += 1 response = self.search_rules( - '', page=page, + '', page=page, hitsPerPage=hits_per_page, request_options=request_options ) diff --git a/tests/test_index.py b/tests/test_index.py index 91e87e046..5bfba4816 100644 --- a/tests/test_index.py +++ b/tests/test_index.py @@ -4,7 +4,7 @@ from datetime import datetime from decimal import Decimal -from algoliasearch.client import MAX_API_KEY_LENGTH +from algoliasearch.client import RequestOptions, MAX_API_KEY_LENGTH from algoliasearch.helpers import AlgoliaException from .helpers import Factory, rule_stub @@ -527,6 +527,53 @@ def test_delete_by(index): assert obj2 in res['hits'] +def test_replace_all_objects(index): + obj1 = {'objectID': 'A', 'color': 'red'} + obj2 = {'objectID': 'B', 'color': 'blue'} + response = index.save_objects([obj1, obj2]) + index.wait_task(response['taskID']) + + obj3 = {'objectID': 'C', 'color': 'green'} + obj4 = {'objectID': 'D', 'color': 'yellow'} + responses = index.replace_all_objects([obj3, obj4]) + for response in responses: + index.wait_task(response['taskID']) + + res = index.search('') + + del res['hits'][0]['_highlightResult'] + del res['hits'][1]['_highlightResult'] + + assert len(res['hits']) == 2 + assert obj1 not in res['hits'] + assert obj2 not in res['hits'] + assert obj3 in res['hits'] + assert obj4 in res['hits'] + + +def test_replace_all_objects_with_safe(index): + obj1 = {'objectID': 'A', 'color': 'red'} + obj2 = {'objectID': 'B', 'color': 'blue'} + response = index.save_objects([obj1, obj2]) + index.wait_task(response['taskID']) + + obj3 = {'objectID': 'C', 'color': 'green'} + obj4 = {'objectID': 'D', 'color': 'yellow'} + request_options = RequestOptions({'safe': True}) + index.replace_all_objects([obj3, obj4], request_options) + + res = index.search('') + + del res['hits'][0]['_highlightResult'] + del res['hits'][1]['_highlightResult'] + + assert len(res['hits']) == 2 + assert obj1 not in res['hits'] + assert obj2 not in res['hits'] + assert obj3 in res['hits'] + assert obj4 in res['hits'] + + def test_batch(rw_index): factory = Factory() requests = [ From 7ecb07ac701ed4eb5b05960f1a50de1e422fe7f3 Mon Sep 17 00:00:00 2001 From: Nuno Maduro Date: Thu, 8 Nov 2018 15:52:21 +0100 Subject: [PATCH 2/4] Updates changelog --- CHANGELOG.md | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dc1347764..df39bc580 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ +* Adds `replace_all_objects` method on index - PR [#388](https://github.com/algolia/algoliasearch-client-python/pull/388) + Push a new set of objects and remove all previous objects. Usage: + ```python + index = client.init_index('name'); + index.replace_all_objects(objects) + ``` + ### 1.17.0 - 2018-06-19 @@ -13,11 +20,11 @@ Introduce new Analytics object, wrapper around the [Analytics API](https://www.algolia.com/doc/rest-api/analytics/) (more methods to come). -* 2 methods about taskID initially available in the `Index` moved to the `Client`. - You could get some taskID from the engine without necessarily have an instance of Index, +* 2 methods about taskID initially available in the `Index` moved to the `Client`. + You could get some taskID from the engine without necessarily have an instance of Index, instead of instantiating an index that you won't need, you can now call wait_task and get_task_status on the client. The original methods on the index still work and are **not** deprecated. - + ```python client.wait_ask(index_name, taskID) client.get_task_status(index_name, taskID) @@ -74,7 +81,7 @@ https://blog.algolia.com/travis-encrypted-variables-external-contributions/ Cursor can become so long that the generated URL fails (error HTTP 414). * Chore: Add Python version to the UserAgent - + ### 1.15.3 - 2018-03-15 * Remove the `[security]` flair of `requests` From 110c5258474184bc6efb7fc83e2ed43aae1c66a8 Mon Sep 17 00:00:00 2001 From: Nuno Maduro Date: Thu, 15 Nov 2018 14:41:50 +0100 Subject: [PATCH 3/4] Passes request options over api methods in replace_all_objects --- algoliasearch/index.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/algoliasearch/index.py b/algoliasearch/index.py index d92e83209..e369eb439 100644 --- a/algoliasearch/index.py +++ b/algoliasearch/index.py @@ -216,7 +216,6 @@ def replace_all_objects(self, objects, request_options=None): @param objects contains an array of objects to push (each object must contains a objectID attribute) """ - safe = False if request_options is not None and 'safe' in request_options.parameters: safe = request_options.parameters['safe'] @@ -228,20 +227,27 @@ def replace_all_objects(self, objects, request_options=None): tmp_index = self.client.init_index(tmp_index_name) - copy_response = self.client.copy_index(self.index_name, tmp_index_name, scope=['settings', 'synonyms', 'rules']) + responses = [] + + response = self.client.copy_index(self.index_name, tmp_index_name, request_options, scope=['settings', 'synonyms', 'rules']) + responses.append(response) if safe: - self.wait_task(copy_response['taskID']) + self.wait_task(response['taskID']) + + response = tmp_index.save_objects(objects, request_options) + responses.append(response) - batch_response = tmp_index.save_objects(objects, request_options) if safe: - tmp_index.wait_task(batch_response['taskID']) + tmp_index.wait_task(response['taskID']) + + response = self.client.move_index(tmp_index_name, self.index_name, request_options); + responses.append(response) - move_response = self.client.move_index(tmp_index_name, self.index_name); if safe: - self.wait_task(move_response['taskID']) + self.wait_task(response['taskID']) - return [copy_response, batch_response, move_response] + return responses @deprecated def saveObject(self, obj): From ab2810535f9afbb754d1f46c46be953aa147282d Mon Sep 17 00:00:00 2001 From: Nuno Maduro Date: Thu, 15 Nov 2018 14:49:41 +0100 Subject: [PATCH 4/4] Batch save objects in replace_all_objects --- algoliasearch/index.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/algoliasearch/index.py b/algoliasearch/index.py index e369eb439..6dc7b4e07 100644 --- a/algoliasearch/index.py +++ b/algoliasearch/index.py @@ -235,11 +235,26 @@ def replace_all_objects(self, objects, request_options=None): if safe: self.wait_task(response['taskID']) - response = tmp_index.save_objects(objects, request_options) - responses.append(response) + batch = [] + batch_size = 1000 + count = 0 + for obj in objects: + batch.append(obj) + count += 1 + + if count == batch_size: + response = tmp_index.save_objects(batch, request_options) + responses.append(response) + batch = [] + count = 0 + + if batch: + response = tmp_index.save_objects(batch, request_options) + responses.append(response) if safe: - tmp_index.wait_task(response['taskID']) + for response in responses: + tmp_index.wait_task(response['taskID']) response = self.client.move_index(tmp_index_name, self.index_name, request_options); responses.append(response)