diff --git a/License2Deploy/rolling_deploy.py b/License2Deploy/rolling_deploy.py index b8607b9..47a0061 100644 --- a/License2Deploy/rolling_deploy.py +++ b/License2Deploy/rolling_deploy.py @@ -9,6 +9,8 @@ class RollingDeploy(object): + MAX_RETRIES = 10 + def __init__(self, env=None, project=None, buildNum=None, ami_id=None, profile_name=None, regions_conf=None): self.env = env self.project = project.replace('-','') @@ -22,6 +24,7 @@ def __init__(self, env=None, project=None, buildNum=None, ami_id=None, profile_n self.conn_elb = AWSConn.aws_conn_elb(self.region, self.profile_name) self.conn_auto = AWSConn.aws_conn_auto(self.region, self.profile_name) self.exit_error_code = 2 + self.load_balancer = self.get_lb() def get_ami_id_state(self, ami_id): try: @@ -66,7 +69,7 @@ def get_autoscale_group_name(self): def get_lb(self): try: - return next(n.name for n in self.conn_elb.get_all_load_balancers() if self.project in str(n.name)) + return next(n.name for n in self.conn_elb.get_all_load_balancers() if self.project in str(n.name) and self.env in str(n.name)) except Exception as e: logging.error("Unable to pull down ELB info: {0}".format(e)) exit(self.exit_error_code) @@ -160,35 +163,35 @@ def wait_for_new_instances(self, instance_ids, retry=10, wait_time=30): else: logging.info("{0} is in a healthy state. Moving on...".format(instance)) - def lb_healthcheck(self, new_ids, retry=10, wait_time=30): + def lb_healthcheck(self, new_ids, attempt=0, wait_time=0): ''' Confirm that the healthchecks report back OK in the LB. ''' - lb = self.get_lb() - inst_length = len(new_ids) - for inst_id in range(inst_length): - count = 0 - instance_id = self.conn_elb.describe_instance_health(lb)[inst_id] - while instance_id.state != 'InService': - logging.warning("Load balancer healthcheck is returning {0} for {1}. Retrying after 10 seconds. Count == {2}".format(instance_id.state, instance_id.instance_id, count)) - instance_id = self.conn_elb.describe_instance_health(lb)[inst_id] - count = (count + 1) - if instance_id.state != 'InService' and (count >= retry): - logging.error("Load balancer healthcheck returning {0} for {1} and has exceeded the timeout threshold set. Please roll back.".format(instance_id.state, instance_id.instance_id)) - self.revert_deployment() - sleep(wait_time) - logging.info("ELB healthcheck OK == {0}: {1}".format(instance_id.instance_id, instance_id.state)) + try: + attempt += 1 + if attempt > self.MAX_RETRIES: + logging.error('Load balancer healthcheck has exceeded the timeout threshold. Rolling back.') + self.revert_deployment() + sleep(wait_time) + instance_ids = self.conn_elb.describe_instance_health(self.load_balancer, new_ids) + status = filter(lambda instance: instance.state != "InService", instance_ids) + if status: + logging.info('Must check load balancer again. Following instance(s) are not "InService": {0}'.format(status)) + return self.lb_healthcheck(new_ids, attempt=attempt, wait_time=30) + except Exception as e: + logging.error('Failed to health check load balancer instance states. Error: {0}'.format(e)) + self.revert_deployment() + logging.info('ELB healthcheck OK') return True def confirm_lb_has_only_new_instances(self, wait_time=60): ''' Confirm that only new instances with the current build tag are in the load balancer ''' sleep(wait_time) # Allotting time for the instances to shut down - lb = self.get_lb() - instance_ids = self.conn_elb.describe_instance_health(lb) + instance_ids = self.conn_elb.describe_instance_health(self.load_balancer) for instance in instance_ids: build = self.conn_ec2.get_all_reservations(instance.instance_id)[0].instances[0].tags['BUILD'] if build != self.buildNum: logging.error("There is still an old instance in the ELB: {0}. Please investigate".format(instance)) exit(self.exit_error_code) - logging.info("Deployed instances {0} to ELB: {1}".format(instance_ids, lb)) + logging.info("Deployed instances {0} to ELB: {1}".format(instance_ids, self.load_balancer)) return instance_ids def tag_ami(self, ami_id, env): @@ -239,11 +242,13 @@ def revert_deployment(self): #pragma: no cover group_name = self.get_autoscale_group_name() new_instance_ids = self.gather_instance_info(group_name) for instance_id in new_instance_ids: - self.conn_auto.terminate_instance(instance_id, decrement_capacity=True) - logging.info("Removed {0} from autoscale group".format(instance_id)) + try: + self.conn_auto.terminate_instance(instance_id, decrement_capacity=True) + logging.info("Removed {0} from autoscale group".format(instance_id)) + except: + logging.warning('Failed to remove instance: {0}.'.format(instance_id)) logging.error("REVERT COMPLETE!") exit(self.exit_error_code) - def get_args(): # pragma: no cover parser = argparse.ArgumentParser() diff --git a/tests/rolling_deploy_test.py b/tests/rolling_deploy_test.py index bc3192a..4cc6ba6 100644 --- a/tests/rolling_deploy_test.py +++ b/tests/rolling_deploy_test.py @@ -10,6 +10,7 @@ from moto import mock_elb from License2Deploy.rolling_deploy import RollingDeploy from License2Deploy.AWSConn import AWSConn +import sys class RollingDeployTest(unittest.TestCase): @@ -24,6 +25,7 @@ class RollingDeployTest(unittest.TestCase): @mock_elb @mock_ec2 def setUp(self): + self.setUpELB() self.rolling_deploy = RollingDeploy('stg', 'server-gms-extender', '0', 'ami-abcd1234', None, './regions.yml') def get_autoscaling_configurations(self, launch_configuration_name, autoscaling_group_name): @@ -59,13 +61,14 @@ def setUpAutoScaleGroup(self, configurations): conn.create_auto_scaling_group(group) @mock_elb - def setUpELB(self): + def setUpELB(self, env='stg'): conn_elb = boto.connect_elb() zones = ['us-east-1a'] ports = [(80, 8080, 'http')] - conn_elb.create_load_balancer('servergmsextenderELBstg', zones, ports) - balancers = conn_elb.get_all_load_balancers(load_balancer_names=['servergmsextenderELBstg']) - self.assertEqual(balancers[0].name, 'servergmsextenderELBstg') + load_balancer_name = 'servergmsextenderELB{0}'.format(env) + conn_elb.create_load_balancer(load_balancer_name, zones, ports) + balancers = conn_elb.get_all_load_balancers(load_balancer_names=[load_balancer_name]) + self.assertEqual(balancers[0].name, load_balancer_name) @mock_ec2 @mock_elb @@ -135,11 +138,15 @@ def test_get_lb(self): self.setUpELB() self.assertEqual(u'servergmsextenderELBstg', self.rolling_deploy.get_lb()) #Return All LB's with the proper build number + # assertRaises is a context manager since Python 2.7. Only testing in Python 2.7 + # https://docs.python.org/2.7/library/unittest.html @mock_elb def test_get_lb_failure(self): - self.setUpELB() - self.rolling_deploy = RollingDeploy('stg', 'fake-server-gms-extender', '0', 'bad', 'server-deploy', './regions.yml') #Need for exception - self.assertRaises(SystemExit, lambda: self.rolling_deploy.get_lb()) #Will raise exception because name can't be found + if sys.version_info >= (2, 7): + self.setUpELB() + with self.assertRaises(SystemExit) as rolling_deploy: + RollingDeploy('stg', 'fake-server-gms-extender', '0', 'bad', 'server-deploy', './regions.yml') + self.assertEqual(2, rolling_deploy.exception.code) @mock_ec2 @mock_elb @@ -173,7 +180,9 @@ def test_get_autoscale_group_name_stg(self): self.assertNotEqual(group, self.GMS_AUTOSCALING_GROUP_PRD) @mock_autoscaling + @mock_elb def test_get_autoscale_group_name_prd(self): + self.setUpELB(env='prd') self.rolling_deploy = RollingDeploy('prd', 'server-gms-extender', '0', 'ami-test212', None, './regions.yml') autoscaling_configurations = list() autoscaling_configurations.append(self.get_autoscaling_configurations(self.GMS_LAUNCH_CONFIGURATION_STG, self.GMS_AUTOSCALING_GROUP_STG)) @@ -208,7 +217,6 @@ def test_get_instance_ip_addrs(self): def test_get_all_instance_ids(self): self.setUpAutoScaleGroup([self.get_autoscaling_configurations(self.GMS_LAUNCH_CONFIGURATION_STG, self.GMS_AUTOSCALING_GROUP_STG)]) conn = boto.connect_ec2() - instance_id_list = [] reservation = conn.run_instances('ami-1234abcd', min_count=2, private_ip_address="10.10.10.10") instance_ids = reservation.instances rslt = self.rolling_deploy.get_all_instance_ids(self.GMS_AUTOSCALING_GROUP_STG)