From fcaa4d82a6c72741938fd2e90aeac808245b87f0 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Tue, 12 Jan 2021 21:04:39 +0200 Subject: [PATCH] 6 backoff attempts, 3 normal attempts --- corehq/motech/repeaters/const.py | 5 +++- corehq/motech/repeaters/models.py | 17 ++++++++----- corehq/motech/repeaters/tests/test_models.py | 26 +++++++++++++++++--- 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/corehq/motech/repeaters/const.py b/corehq/motech/repeaters/const.py index a8748ac3729a..415c516a91a3 100644 --- a/corehq/motech/repeaters/const.py +++ b/corehq/motech/repeaters/const.py @@ -5,7 +5,10 @@ MIN_RETRY_WAIT = timedelta(minutes=60) CHECK_REPEATERS_INTERVAL = timedelta(minutes=5) CHECK_REPEATERS_KEY = 'check-repeaters-key' -MAX_ATTEMPTS = 6 +# Number of attempts to an online endpoint before cancelling payload +MAX_ATTEMPTS = 3 +# Number of exponential backoff attempts to an offline endpoint +MAX_BACKOFF_ATTEMPTS = 6 # Limit the number of records to forward at a time so that one repeater # can't hold up the rest. RECORDS_AT_A_TIME = 1000 diff --git a/corehq/motech/repeaters/models.py b/corehq/motech/repeaters/models.py index 7d397942eeb8..f12026003205 100644 --- a/corehq/motech/repeaters/models.py +++ b/corehq/motech/repeaters/models.py @@ -70,10 +70,10 @@ from typing import Any, overload from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse -from django.utils.functional import cached_property from django.conf import settings from django.db import models from django.utils import timezone +from django.utils.functional import cached_property from django.utils.translation import ugettext_lazy as _ from couchdbkit.exceptions import ResourceConflict, ResourceNotFound @@ -123,6 +123,7 @@ from .const import ( MAX_ATTEMPTS, + MAX_BACKOFF_ATTEMPTS, MAX_RETRY_WAIT, MIN_RETRY_WAIT, RECORD_CANCELLED_STATE, @@ -1068,7 +1069,11 @@ def add_client_failure_attempt(self, message): that this repeat record does not hold up the rest. """ self.repeater_stub.reset_next_attempt() - self._add_failure_attempt(message) + if self.num_attempts < MAX_ATTEMPTS: + state = RECORD_FAILURE_STATE + else: + state = RECORD_CANCELLED_STATE + self._add_failure_attempt(message, state) def add_server_failure_attempt(self, message): """ @@ -1082,13 +1087,13 @@ def add_server_failure_attempt(self, message): """ self.repeater_stub.set_next_attempt() - self._add_failure_attempt(message) - - def _add_failure_attempt(self, message): - if self.num_attempts < MAX_ATTEMPTS: + if self.num_attempts < MAX_BACKOFF_ATTEMPTS: state = RECORD_FAILURE_STATE else: state = RECORD_CANCELLED_STATE + self._add_failure_attempt(message, state) + + def _add_failure_attempt(self, message, state): self.sqlrepeatrecordattempt_set.create( state=state, message=message, diff --git a/corehq/motech/repeaters/tests/test_models.py b/corehq/motech/repeaters/tests/test_models.py index 8ef578e7bd3d..528ff7c22488 100644 --- a/corehq/motech/repeaters/tests/test_models.py +++ b/corehq/motech/repeaters/tests/test_models.py @@ -14,6 +14,7 @@ from ..const import ( MAX_ATTEMPTS, + MAX_BACKOFF_ATTEMPTS, MIN_RETRY_WAIT, RECORD_CANCELLED_STATE, RECORD_FAILURE_STATE, @@ -329,15 +330,17 @@ def test_add_server_failure_attempt_fail(self): def test_add_server_failure_attempt_cancel(self): message = '504: Gateway Timeout' - for __ in range(MAX_ATTEMPTS + 1): + while self.repeat_record.state != RECORD_CANCELLED_STATE: self.repeat_record.add_server_failure_attempt(message=message) - self.assertEqual(self.repeat_record.state, RECORD_CANCELLED_STATE) + self.assertGreater(self.repeater_stub.last_attempt_at, self.just_now) + # Interval is MIN_RETRY_WAIT because attempts were very close together self.assertEqual(self.repeater_stub.next_attempt_at, self.repeater_stub.last_attempt_at + MIN_RETRY_WAIT) - self.assertEqual(self.repeat_record.num_attempts, MAX_ATTEMPTS + 1) + self.assertEqual(self.repeat_record.num_attempts, + MAX_BACKOFF_ATTEMPTS + 1) attempts = list(self.repeat_record.attempts) - expected_states = ([RECORD_FAILURE_STATE] * MAX_ATTEMPTS + expected_states = ([RECORD_FAILURE_STATE] * MAX_BACKOFF_ATTEMPTS + [RECORD_CANCELLED_STATE]) self.assertEqual([a.state for a in attempts], expected_states) self.assertEqual(attempts[-1].message, message) @@ -355,6 +358,21 @@ def test_add_client_failure_attempt_fail(self): self.assertEqual(self.repeat_record.attempts[0].message, message) self.assertIsNone(self.repeat_record.attempts[0].traceback) + def test_add_client_failure_attempt_cancel(self): + message = '409: Conflict' + while self.repeat_record.state != RECORD_CANCELLED_STATE: + self.repeat_record.add_client_failure_attempt(message=message) + self.assertIsNone(self.repeater_stub.last_attempt_at) + self.assertIsNone(self.repeater_stub.next_attempt_at) + self.assertEqual(self.repeat_record.num_attempts, + MAX_ATTEMPTS + 1) + attempts = list(self.repeat_record.attempts) + expected_states = ([RECORD_FAILURE_STATE] * MAX_ATTEMPTS + + [RECORD_CANCELLED_STATE]) + self.assertEqual([a.state for a in attempts], expected_states) + self.assertEqual(attempts[-1].message, message) + self.assertIsNone(attempts[-1].traceback) + def test_add_payload_exception_attempt(self): message = 'ValueError: Schema validation failed' tb_str = 'Traceback ...'