Skip to content

Commit

Permalink
6 backoff attempts, 3 normal attempts
Browse files Browse the repository at this point in the history
  • Loading branch information
kaapstorm committed Jan 12, 2021
1 parent da916b2 commit fcaa4d8
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 11 deletions.
5 changes: 4 additions & 1 deletion corehq/motech/repeaters/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
MIN_RETRY_WAIT = timedelta(minutes=60)
CHECK_REPEATERS_INTERVAL = timedelta(minutes=5)
CHECK_REPEATERS_KEY = 'check-repeaters-key'
MAX_ATTEMPTS = 6
# Number of attempts to an online endpoint before cancelling payload
MAX_ATTEMPTS = 3
# Number of exponential backoff attempts to an offline endpoint
MAX_BACKOFF_ATTEMPTS = 6
# Limit the number of records to forward at a time so that one repeater
# can't hold up the rest.
RECORDS_AT_A_TIME = 1000
Expand Down
17 changes: 11 additions & 6 deletions corehq/motech/repeaters/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,10 @@
from typing import Any, overload
from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse

from django.utils.functional import cached_property
from django.conf import settings
from django.db import models
from django.utils import timezone
from django.utils.functional import cached_property
from django.utils.translation import ugettext_lazy as _

from couchdbkit.exceptions import ResourceConflict, ResourceNotFound
Expand Down Expand Up @@ -123,6 +123,7 @@

from .const import (
MAX_ATTEMPTS,
MAX_BACKOFF_ATTEMPTS,
MAX_RETRY_WAIT,
MIN_RETRY_WAIT,
RECORD_CANCELLED_STATE,
Expand Down Expand Up @@ -1068,7 +1069,11 @@ def add_client_failure_attempt(self, message):
that this repeat record does not hold up the rest.
"""
self.repeater_stub.reset_next_attempt()
self._add_failure_attempt(message)
if self.num_attempts < MAX_ATTEMPTS:
state = RECORD_FAILURE_STATE
else:
state = RECORD_CANCELLED_STATE
self._add_failure_attempt(message, state)

def add_server_failure_attempt(self, message):
"""
Expand All @@ -1082,13 +1087,13 @@ def add_server_failure_attempt(self, message):
"""
self.repeater_stub.set_next_attempt()
self._add_failure_attempt(message)

def _add_failure_attempt(self, message):
if self.num_attempts < MAX_ATTEMPTS:
if self.num_attempts < MAX_BACKOFF_ATTEMPTS:
state = RECORD_FAILURE_STATE
else:
state = RECORD_CANCELLED_STATE
self._add_failure_attempt(message, state)

def _add_failure_attempt(self, message, state):
self.sqlrepeatrecordattempt_set.create(
state=state,
message=message,
Expand Down
26 changes: 22 additions & 4 deletions corehq/motech/repeaters/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from ..const import (
MAX_ATTEMPTS,
MAX_BACKOFF_ATTEMPTS,
MIN_RETRY_WAIT,
RECORD_CANCELLED_STATE,
RECORD_FAILURE_STATE,
Expand Down Expand Up @@ -329,15 +330,17 @@ def test_add_server_failure_attempt_fail(self):

def test_add_server_failure_attempt_cancel(self):
message = '504: Gateway Timeout'
for __ in range(MAX_ATTEMPTS + 1):
while self.repeat_record.state != RECORD_CANCELLED_STATE:
self.repeat_record.add_server_failure_attempt(message=message)
self.assertEqual(self.repeat_record.state, RECORD_CANCELLED_STATE)

self.assertGreater(self.repeater_stub.last_attempt_at, self.just_now)
# Interval is MIN_RETRY_WAIT because attempts were very close together
self.assertEqual(self.repeater_stub.next_attempt_at,
self.repeater_stub.last_attempt_at + MIN_RETRY_WAIT)
self.assertEqual(self.repeat_record.num_attempts, MAX_ATTEMPTS + 1)
self.assertEqual(self.repeat_record.num_attempts,
MAX_BACKOFF_ATTEMPTS + 1)
attempts = list(self.repeat_record.attempts)
expected_states = ([RECORD_FAILURE_STATE] * MAX_ATTEMPTS
expected_states = ([RECORD_FAILURE_STATE] * MAX_BACKOFF_ATTEMPTS
+ [RECORD_CANCELLED_STATE])
self.assertEqual([a.state for a in attempts], expected_states)
self.assertEqual(attempts[-1].message, message)
Expand All @@ -355,6 +358,21 @@ def test_add_client_failure_attempt_fail(self):
self.assertEqual(self.repeat_record.attempts[0].message, message)
self.assertIsNone(self.repeat_record.attempts[0].traceback)

def test_add_client_failure_attempt_cancel(self):
message = '409: Conflict'
while self.repeat_record.state != RECORD_CANCELLED_STATE:
self.repeat_record.add_client_failure_attempt(message=message)
self.assertIsNone(self.repeater_stub.last_attempt_at)
self.assertIsNone(self.repeater_stub.next_attempt_at)
self.assertEqual(self.repeat_record.num_attempts,
MAX_ATTEMPTS + 1)
attempts = list(self.repeat_record.attempts)
expected_states = ([RECORD_FAILURE_STATE] * MAX_ATTEMPTS
+ [RECORD_CANCELLED_STATE])
self.assertEqual([a.state for a in attempts], expected_states)
self.assertEqual(attempts[-1].message, message)
self.assertIsNone(attempts[-1].traceback)

def test_add_payload_exception_attempt(self):
message = 'ValueError: Schema validation failed'
tb_str = 'Traceback ...'
Expand Down

0 comments on commit fcaa4d8

Please sign in to comment.