From 1b60f0614684d547cd1d3051a2825992c9eae098 Mon Sep 17 00:00:00 2001 From: Zhang Date: Fri, 1 Jul 2016 13:59:22 -0500 Subject: [PATCH] INFRASYS-7453: re-implement wait times --- License2Deploy/rolling_deploy.py | 86 +++++++++++++++++++------------- README.md | 13 +++++ setup.py | 6 ++- tests/rolling_deploy_test.py | 6 +-- 4 files changed, 70 insertions(+), 41 deletions(-) diff --git a/License2Deploy/rolling_deploy.py b/License2Deploy/rolling_deploy.py index 496066e..475db01 100644 --- a/License2Deploy/rolling_deploy.py +++ b/License2Deploy/rolling_deploy.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python import logging import argparse @@ -6,6 +6,7 @@ from time import sleep, time from AWSConn import AWSConn from set_logging import SetLogging +from retry.api import retry_call class RollingDeploy(object): @@ -19,7 +20,10 @@ def __init__(self, profile_name=None, regions_conf=None, stack_name=None, - session=None): + session=None, + creation_wait=[10, 60], + ready_wait=[10, 30], + health_wait=[10, 30]): self.env = env self.session = session self.project = project.replace('-','') @@ -39,6 +43,9 @@ def __init__(self, self.cloudformation_client = AWSConn.get_boto3_client('cloudformation', self.region, self.profile_name, session) self.exit_error_code = 2 self.load_balancer = False + self.creation_wait = creation_wait + self.ready_wait = ready_wait + self.health_wait = health_wait def get_ami_id_state(self, ami_id): try: @@ -160,14 +167,11 @@ def get_instance_ids_by_requested_build_tag(self, id_list, build): new_instances += [instance_id for new_id in instances_build_tags if new_id.tags['BUILD'] == str(build)] if not new_instances: - logging.error("There are no instances in the group with build number {0}. Please ensure AMI was promoted.\nInstance ID List: {1}".format(build, id_list)) - group_name = self.get_autoscale_group_name() - self.set_autoscale_instance_desired_count(self.calculate_autoscale_desired_instance_count(group_name, 'decrease'), group_name) - exit(self.exit_error_code) - - id_ip_dict = self.get_instance_ip_addrs(new_instances) - logging.info("New Instance List with IP Addresses: {0}".format(id_ip_dict)) - return new_instances + raise Exception('There are no instances in the group with build number {0}'.format(self.build_number)) + else: + ip_dict = self.get_instance_ip_addrs(new_instances) + logging.info("New Instance List with IP Addresses: {0}".format(ip_dict)) + return new_instances def wait_for_new_instances(self, instance_ids, retry=10, wait_time=30): ''' Monitor new instances that come up and wait until they are ready ''' @@ -188,24 +192,15 @@ def wait_for_new_instances(self, instance_ids, retry=10, wait_time=30): else: logging.info("{0} is in a healthy state. Moving on...".format(instance)) - def lb_healthcheck(self, new_ids, attempt=0, wait_time=0): + def lb_healthcheck(self, new_ids): ''' Confirm that the healthchecks report back OK in the LB. ''' - try: - attempt += 1 - if attempt > self.MAX_RETRIES: - logging.error('Load balancer healthcheck has exceeded the timeout threshold. Rolling back.') - self.revert_deployment() - sleep(wait_time) - instance_ids = self.conn_elb.describe_instance_health(self.load_balancer, new_ids) - status = filter(lambda instance: instance.state != "InService", instance_ids) - if status: - logging.info('Must check load balancer again. Following instance(s) are not "InService": {0}'.format(status)) - return self.lb_healthcheck(new_ids, attempt=attempt, wait_time=30) - except Exception as e: - logging.error('Failed to health check load balancer instance states. Error: {0}'.format(e)) - self.revert_deployment() - logging.info('ELB healthcheck OK') - return True + instance_ids = self.conn_elb.describe_instance_health(self.load_balancer, new_ids) + status = filter(lambda instance: instance.state != "InService", instance_ids) + if status: + raise Exception('Must check load balancer again. Following instance(s) are not "InService": {0}'.format(status)) + else: + logging.info('ELB healthcheck OK') + return True def confirm_lb_has_only_new_instances(self, wait_time=60): ''' Confirm that only new instances with the current build tag are in the load balancer ''' @@ -238,14 +233,32 @@ def tag_ami(self, ami_id, env): def gather_instance_info(self, group): #pragma: no cover instance_ids = self.get_all_instance_ids(group) + logging.info("Instance ID List: {0}".format(instance_ids)) new_instance_ids = self.get_instance_ids_by_requested_build_tag(instance_ids, self.build_number) return new_instance_ids - def healthcheck_new_instances(self, group_name): # pragma: no cover - ''' Healthchecking new instances to ensure deployment was successful ''' - new_instance_ids = self.gather_instance_info(group_name) - self.wait_for_new_instances(new_instance_ids) #Wait for new instances to be up and ready - self.lb_healthcheck(new_instance_ids) #Once instances are ready, healthcheck. If successful, decrease desired count. + def launch_new_instances(self, group_name): # pragma: no cover + # step 1: wait for ec2 creating instances + try: + logging.info("Trying for maximum 10 minutes to allow for instances to be created.") + new_instance_ids = retry_call(self.gather_instance_info, fargs=[group_name], tries=self.creation_wait[0], delay=self.creation_wait[1], logger=logging) + except Exception as e: + logging.error("There are no instances in the group with build number {0}. Please ensure AMI was promoted.".format(self.build_number)) + group_name = self.get_autoscale_group_name() + self.set_autoscale_instance_desired_count(self.calculate_autoscale_desired_instance_count(group_name, 'decrease'), group_name) + exit(self.exit_error_code) + + # step 2: waiting for instances coming up and ready + logging.info("Waiting maximum 5 minutes for instances to be ready.") + self.wait_for_new_instances(new_instance_ids, self.ready_wait[0], self.ready_wait[1]) #Wait for new instances to be up and ready + + # step 3: waiting for instance health check to be completed + try: + logging.info("Trying for maximum 5 minutes to health-check all instances.") + retry_call(self.lb_healthcheck, fargs=[new_instance_ids], tries=self.health_wait[0], delay=self.health_wait[1], logger=logging) + except Exception as e: + logging.error('Load balancer healthcheck has exceeded the timeout threshold. Rolling back.') + self.revert_deployment() def retrieve_project_cloudwatch_alarms(self): """ Retrieve all the Cloud-Watch alarms for the given project and environment """ @@ -290,9 +303,7 @@ def deploy(self): # pragma: no cover logging.info("Build #: {0} ::: Autoscale Group: {1}".format(self.build_number, group_name)) self.disable_project_cloudwatch_alarms() self.set_autoscale_instance_desired_count(self.calculate_autoscale_desired_instance_count(group_name, 'increase'), group_name) - logging.info("Sleeping for 240 seconds to allow for instances to spin up") - sleep(240) #Need to wait until the instances come up in the load balancer - self.healthcheck_new_instances(group_name) + self.launch_new_instances(group_name) self.set_autoscale_instance_desired_count(self.calculate_autoscale_desired_instance_count(group_name, 'decrease'), group_name) self.confirm_lb_has_only_new_instances() self.tag_ami(self.ami_id, self.env) @@ -322,12 +333,15 @@ def get_args(): # pragma: no cover parser.add_argument('-P', '--profile', default='default', action='store', dest='profile', help='Profile name as designated in aws credentials/config files', type=str) parser.add_argument('-c', '--config', default='/opt/License2Deploy/regions.yml', action='store', dest='config', help='Config file Location, eg. /opt/License2Deploy/regions.yml', type=str) parser.add_argument('-s', '--stack', action='store', dest='stack_name', help='Stack name if AutoScaling Group created via CloudFormation', type=str) + parser.add_argument('-C', '--creation-wait', action='store', dest='creation_wait', help='Wait time for ec2 instance creation', type=int, nargs=2, default=[10, 60]) + parser.add_argument('-r', '--ready-wait', action='store', dest='ready_wait', help='Wait time for ec2 instance to be ready', type=int, nargs=2, default=[10, 30]) + parser.add_argument('-H', '--health-wait', action='store', dest='health_wait', help='Wait time for ec2 instance health check', type=int, nargs=2, default=[10, 30]) return parser.parse_args() def main(): # pragma: no cover args = get_args() SetLogging.setup_logging() - deployObj = RollingDeploy(args.env, args.project, args.build_number, args.ami_id, args.profile, args.config, args.stack_name) + deployObj = RollingDeploy(args.env, args.project, args.build_number, args.ami_id, args.profile, args.config, args.stack_name, None, args.creation_wait, args.ready_wait, args.health_wait) deployObj.deploy() if __name__ == "__main__": # pragma: no cover diff --git a/README.md b/README.md index c708fcd..a18ab5e 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ Usage ``` usage: rolling_deploy.py [-h] -e ENV -p PROJECT -b BUILD_NUM -a AMI_ID [-P PROFILE] [-c CONFIG] [-s STACK_NAME] + [-C CREATION_WAIT] [-r READY_WAIT] [-H HEALTH_WAIT] optional arguments: -h, --help show this help message and exit @@ -41,6 +42,18 @@ optional arguments: /opt/License2Deploy/config.yml -s STACK_NAME, --stack STACK_NAME Stack name if AutoScaling Group created via CloudFormation + -C CREATION_WAIT, --creation-wait CREATION_WAIT + Time to wait for EC2 instances to be created + (# of tries, interval of each try in seconds), default (10, 60) + e.g. -C 10 60 + -r READY_WAIT, --ready-wait READY_WAIT + Time to wait for EC2 instances to come up and be ready + (# of tries, interval of each try in seconds), default (10, 30) + e.g. -r 10 30 + -H HEALTH_WAIT, --health-wait HEALTH_WAIT + Time to wait for EC2 instances to be health checked + (# of tries, interval of each try in seconds), default (10, 30) + e.g. -H 10 30 ``` Requirements ================== diff --git a/setup.py b/setup.py index f3cbc57..2167f43 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,8 @@ "boto", "PyYaml", "argparse", - 'boto3' + 'boto3', + 'retry' ] tests_require = [ @@ -18,7 +19,8 @@ "moto", "PyYaml", 'placebo', - 'boto3' + 'boto3', + 'retry' ] def read(fname): diff --git a/tests/rolling_deploy_test.py b/tests/rolling_deploy_test.py index 9ef830e..4571161 100644 --- a/tests/rolling_deploy_test.py +++ b/tests/rolling_deploy_test.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python import unittest import boto @@ -310,12 +310,12 @@ def test_get_instance_ids_by_requested_build_tag(self): if [y for y in name.tags if y == 'BUILD' and name.tags['BUILD'] == '0']: new_inst.append(name.id) self.assertEqual(len(self.rolling_deploy.get_instance_ids_by_requested_build_tag(new_inst, 0)), 2) - self.assertRaises(SystemExit, lambda: self.rolling_deploy.get_instance_ids_by_requested_build_tag(new_inst, 1)) + self.assertRaises(Exception, lambda: self.rolling_deploy.get_instance_ids_by_requested_build_tag(new_inst, 1)) @mock_ec2 def test_get_instance_ids_by_requested_build_tag_failure(self): self.setUpEC2() - self.assertRaises(SystemExit, lambda: self.rolling_deploy.get_instance_ids_by_requested_build_tag([], 0)) + self.assertRaises(Exception, lambda: self.rolling_deploy.get_instance_ids_by_requested_build_tag([], 0)) @mock_autoscaling def test_set_autoscale_instance_desired_count(self):