diff --git a/setup.py b/setup.py index 4e4c69b69..a87a19bb6 100644 --- a/setup.py +++ b/setup.py @@ -7,6 +7,7 @@ install_requires = [ "troposphere>=1.9.0", + "botocore>=1.6.0", "boto3>=1.3.1,<1.5.0", "PyYAML~=3.12", "awacs>=0.6.0", diff --git a/stacker/providers/aws/default.py b/stacker/providers/aws/default.py index 8c5e17156..61eb8e48b 100644 --- a/stacker/providers/aws/default.py +++ b/stacker/providers/aws/default.py @@ -7,10 +7,10 @@ import sys import botocore.exceptions +from botocore.config import Config from ..base import BaseProvider from ... import exceptions -from ...util import retry_with_backoff from ...ui import ui from stacker.session_cache import get_session @@ -22,6 +22,23 @@ logger = logging.getLogger(__name__) +# This value controls the maximum number of times a CloudFormation API call +# will be attempted, after being throttled. This value is used in an +# exponential backoff algorithm to determine how long the client should wait +# until attempting a retry: +# +# base * growth_factor ^ (attempts - 1) +# +# A value of 10 here would cause the worst case wait time for the last retry to +# be ~8 mins: +# +# 1 * 2 ^ (10 - 1) = 512 seconds +# +# References: +# https://github.com/boto/botocore/blob/1.6.1/botocore/retryhandler.py#L39-L58 +# https://github.com/boto/botocore/blob/1.6.1/botocore/data/_retry.json#L97-L121 +MAX_ATTEMPTS = 10 + MAX_TAIL_RETRIES = 5 DEFAULT_CAPABILITIES = ["CAPABILITY_NAMED_IAM", ] @@ -48,43 +65,6 @@ def get_output_dict(stack): return outputs -def retry_on_throttling(fn, attempts=3, args=None, kwargs=None): - """Wrap retry_with_backoff to handle AWS Cloudformation Throttling. - - Args: - fn (function): The function to call. - attempts (int): Maximum # of attempts to retry the function. - args (list): List of positional arguments to pass to the function. - kwargs (dict): Dict of keyword arguments to pass to the function. - - Returns: - passthrough: This returns the result of the function call itself. - - Raises: - passthrough: This raises any exceptions the function call raises, - except for boto.exception.BotoServerError, provided it doesn't - retry more than attempts. - """ - def _throttling_checker(exc): - """ - - Args: - exc (botocore.exceptions.ClientError): Expected exception type - - Returns: - boolean: indicating whether this error is a throttling error - """ - if exc.response['ResponseMetadata']['HTTPStatusCode'] == 400 and \ - exc.response['Error']['Code'] == "Throttling": - logger.debug("AWS throttling calls.") - return True - return False - - return retry_with_backoff(fn, args=args, kwargs=kwargs, attempts=attempts, - exc_list=(botocore.exceptions.ClientError, ), - retry_checker=_throttling_checker) - - def s3_fallback(fqn, template, parameters, tags, method, change_set_name=None, service_role=None): logger.warn("DEPRECATION WARNING: Falling back to legacy " @@ -108,7 +88,7 @@ def s3_fallback(fqn, template, parameters, tags, method, change_set_name=get_change_set_name() ) - response = retry_on_throttling(method, kwargs=args) + response = method(**args) return response @@ -274,11 +254,8 @@ def wait_till_change_set_complete(cfn_client, change_set_id, try_count=25, complete = False response = None for i in range(try_count): - response = retry_on_throttling( - cfn_client.describe_change_set, - kwargs={ - 'ChangeSetName': change_set_id, - }, + response = cfn_client.describe_change_set( + ChangeSetName=change_set_id, ) complete = response["Status"] in ("FAILED", "CREATE_COMPLETE") if complete: @@ -310,10 +287,7 @@ def create_change_set(cfn_client, fqn, template, parameters, tags, change_set_name=get_change_set_name() ) try: - response = retry_on_throttling( - cfn_client.create_change_set, - kwargs=args - ) + response = cfn_client.create_change_set(**args) except botocore.exceptions.ClientError as e: if e.response['Error']['Message'] == ('TemplateURL must reference ' 'a valid S3 object to which ' @@ -499,16 +473,21 @@ def cloudformation(self): # see https://github.com/remind101/stacker/issues/196 pid = os.getpid() if pid != self._pid or not self._cloudformation: + config = Config( + retries=dict( + max_attempts=MAX_ATTEMPTS + ) + ) session = get_session(self.region) - self._cloudformation = session.client('cloudformation') + self._cloudformation = session.client('cloudformation', + config=config) return self._cloudformation def get_stack(self, stack_name, **kwargs): try: - return retry_on_throttling( - self.cloudformation.describe_stacks, - kwargs=dict(StackName=stack_name))['Stacks'][0] + return self.cloudformation.describe_stacks( + StackName=stack_name)['Stacks'][0] except botocore.exceptions.ClientError as e: if "does not exist" not in e.message: raise @@ -613,7 +592,7 @@ def destroy_stack(self, stack, **kwargs): if self.service_role: args["RoleARN"] = self.service_role - retry_on_throttling(self.cloudformation.delete_stack, kwargs=args) + self.cloudformation.delete_stack(**args) return True def create_stack(self, fqn, template, parameters, tags, @@ -647,11 +626,8 @@ def create_stack(self, fqn, template, parameters, tags, 'CREATE', service_role=self.service_role, **kwargs ) - retry_on_throttling( - self.cloudformation.execute_change_set, - kwargs={ - 'ChangeSetName': change_set_id, - }, + self.cloudformation.execute_change_set( + ChangeSetName=change_set_id, ) else: args = generate_cloudformation_args( @@ -660,10 +636,7 @@ def create_stack(self, fqn, template, parameters, tags, ) try: - retry_on_throttling( - self.cloudformation.create_stack, - kwargs=args - ) + self.cloudformation.create_stack(**args) except botocore.exceptions.ClientError as e: if e.response['Error']['Message'] == ('TemplateURL must ' 'reference a valid S3 ' @@ -837,11 +810,8 @@ def interactive_update_stack(self, fqn, template, old_parameters, finally: ui.unlock() - retry_on_throttling( - self.cloudformation.execute_change_set, - kwargs={ - 'ChangeSetName': change_set_id, - }, + self.cloudformation.execute_change_set( + ChangeSetName=change_set_id, ) def noninteractive_changeset_update(self, fqn, template, old_parameters, @@ -869,11 +839,8 @@ def noninteractive_changeset_update(self, fqn, template, old_parameters, 'UPDATE', service_role=self.service_role, **kwargs ) - retry_on_throttling( - self.cloudformation.execute_change_set, - kwargs={ - 'ChangeSetName': change_set_id, - }, + self.cloudformation.execute_change_set( + ChangeSetName=change_set_id, ) def default_update_stack(self, fqn, template, old_parameters, parameters, @@ -899,10 +866,7 @@ def default_update_stack(self, fqn, template, old_parameters, parameters, ) try: - retry_on_throttling( - self.cloudformation.update_stack, - kwargs=args - ) + self.cloudformation.update_stack(**args) except botocore.exceptions.ClientError as e: if "No updates are to be performed." in e.message: logger.debug( @@ -944,9 +908,8 @@ def get_stack_info(self, stack_name): stack = self.get_stack(stack_name) try: - template = retry_on_throttling( - self.cloudformation.get_template, - kwargs=dict(StackName=stack_name))['TemplateBody'] + template = self.cloudformation.get_template( + StackName=stack_name)['TemplateBody'] except botocore.exceptions.ClientError as e: if "does not exist" not in e.message: raise diff --git a/stacker/tests/test_util.py b/stacker/tests/test_util.py index 5dbe2b7e4..01b078707 100644 --- a/stacker/tests/test_util.py +++ b/stacker/tests/test_util.py @@ -16,7 +16,6 @@ handle_hooks, merge_map, yaml_to_ordered_dict, - retry_with_backoff, get_client_region, get_s3_endpoint, s3_bucket_location_constraint, @@ -391,88 +390,3 @@ def _second_raises_exception2(self, a, b, x=None, y=None): def _throws_exception2(self, a, b, x=None, y=None): self.counter += 1 raise TestException2("Broke.") - - def test_function_works_no_retry(self): - - r = retry_with_backoff(self._works_immediately, - attempts=2, min_delay=0, max_delay=.1, - args=["a", "b"], - kwargs={"x": "X", "y": "Y"}) - self.assertEqual(r, ["a", "b", "X", "Y"]) - self.assertEqual(self.counter, 1) - - def test_retry_exception(self): - - r = retry_with_backoff(self._works_second_attempt, - attempts=5, min_delay=0, max_delay=.1, - args=["a", "b"], - kwargs={"x": "X", "y": "Y"}) - self.assertEqual(r, ["a", "b", "X", "Y"]) - self.assertEqual(self.counter, 2) - - def test_multiple_exceptions(self): - - r = retry_with_backoff(self._second_raises_exception2, - exc_list=(TestException1, TestException2), - attempts=5, min_delay=0, max_delay=.1, - args=["a", "b"], - kwargs={"x": "X", "y": "Y"}) - self.assertEqual(r, ["a", "b", "X", "Y"]) - self.assertEqual(self.counter, 2) - - def test_unhandled_exception(self): - - with self.assertRaises(TestException2): - retry_with_backoff(self._throws_exception2, - exc_list=(TestException1), - attempts=5, min_delay=0, max_delay=.1, - args=["a", "b"], - kwargs={"x": "X", "y": "Y"}) - self.assertEqual(self.counter, 1) - - def test_never_recovers(self): - - with self.assertRaises(TestException2): - retry_with_backoff(self._throws_exception2, - exc_list=(TestException1, TestException2), - attempts=5, min_delay=0, max_delay=.1, - args=["a", "b"], - kwargs={"x": "X", "y": "Y"}) - self.assertEqual(self.counter, 5) - - def test_retry_checker(self): - def _throws_handled_exception(a, b, x=None, y=None): - self.counter += 1 - if self.counter == 2: - return [a, b, x, y] - raise TestException2("Broke.") - - def _throws_unhandled_exception(a, b, x=None, y=None): - self.counter += 1 - if self.counter == 2: - return [a, b, x, y] - raise TestException2("Invalid") - - def _check_for_broke_message(e): - if "Broke." in e.message: - return True - return False - - r = retry_with_backoff(_throws_handled_exception, - exc_list=(TestException2), - retry_checker=_check_for_broke_message, - attempts=5, min_delay=0, max_delay=.1, - args=["a", "b"], - kwargs={"x": "X", "y": "Y"}) - self.assertEqual(self.counter, 2) - self.assertEqual(r, ["a", "b", "X", "Y"]) - - self.counter = 0 - with self.assertRaises(TestException2): - retry_with_backoff(_throws_unhandled_exception, - exc_list=(TestException2), - retry_checker=_check_for_broke_message, - attempts=5, min_delay=0, max_delay=.1, - args=["a", "b"], - kwargs={"x": "X", "y": "Y"}) - self.assertEqual(self.counter, 1) diff --git a/stacker/util.py b/stacker/util.py index 8f8a269f0..2ff4b36aa 100644 --- a/stacker/util.py +++ b/stacker/util.py @@ -9,7 +9,6 @@ import sys import tarfile import tempfile -import time import zipfile import collections @@ -28,68 +27,6 @@ logger = logging.getLogger(__name__) -def retry_with_backoff(function, args=None, kwargs=None, attempts=5, - min_delay=1, max_delay=3, exc_list=None, - retry_checker=None): - """Retries function, catching expected Exceptions. - - Each retry has a delay between `min_delay` and `max_delay` seconds, - increasing with each attempt. - - Args: - function (function): The function to call. - args (list, optional): A list of positional arguments to pass to the - given function. - kwargs (dict, optional): Keyword arguments to pass to the given - function. - attempts (int, optional): The # of times to retry the function. - Default: 5 - min_delay (int, optional): The minimum time to delay retries, in - seconds. Default: 1 - max_delay (int, optional): The maximum time to delay retries, in - seconds. Default: 5 - exc_list (list, optional): A list of :class:`Exception` classes that - should be retried. Default: [:class:`Exception`,] - retry_checker (func, optional): An optional function that is used to - do a deeper analysis on the received :class:`Exception` to - determine if it qualifies for retry. Receives a single argument, - the :class:`Exception` object that was caught. Should return - True if it should be retried. - - Returns: - variable: Returns whatever the given function returns. - - Raises: - :class:`Exception`: Raises whatever exception the given function - raises, if unable to succeed within the given number of attempts. - """ - args = args or [] - kwargs = kwargs or {} - attempt = 0 - if not exc_list: - exc_list = (Exception, ) - while True: - attempt += 1 - logger.debug("Calling %s, attempt %d.", function, attempt) - sleep_time = min(max_delay, min_delay * attempt) - try: - return function(*args, **kwargs) - except exc_list as e: - # If there is no retry checker function, or if there is and it - # returns True, then go ahead and retry - if not retry_checker or retry_checker(e): - if attempt == attempts: - logger.error("Function %s failed after %s retries. Giving " - "up.", function.func_name, attempts) - raise - logger.debug("Caught expected exception: %r", e) - # If there is a retry checker function, and it returned False, - # do not retry - else: - raise - time.sleep(sleep_time) - - def camel_to_snake(name): """Converts CamelCase to snake_case.