Merge pull request #19 from sbraverman/INFRASYS-6354

(10.5.0) INFRASYS-6354 pull loadbalancer instances by specific instance-ids and runs healtcheck in a recursive fashion. Improves error handling load balancer instances healthcheck and performs rollback when necessary
dandb · Dec 3, 2015 · f413706 · f413706
2 parents 5bf346d + 89ca82b
commit f413706
Show file tree

Hide file tree

Showing 2 changed files with 43 additions and 30 deletions.
diff --git a/License2Deploy/rolling_deploy.py b/License2Deploy/rolling_deploy.py
@@ -9,6 +9,8 @@
 
 class RollingDeploy(object):
 
+  MAX_RETRIES = 10
+
   def __init__(self, env=None, project=None, buildNum=None, ami_id=None, profile_name=None, regions_conf=None):
     self.env = env
     self.project = project.replace('-','')
@@ -22,6 +24,7 @@ def __init__(self, env=None, project=None, buildNum=None, ami_id=None, profile_n
     self.conn_elb = AWSConn.aws_conn_elb(self.region, self.profile_name)
     self.conn_auto = AWSConn.aws_conn_auto(self.region, self.profile_name)
     self.exit_error_code = 2
+    self.load_balancer = self.get_lb()
 
   def get_ami_id_state(self, ami_id):
     try:
@@ -66,7 +69,7 @@ def get_autoscale_group_name(self):
 
   def get_lb(self):
     try:
-      return next(n.name for n in self.conn_elb.get_all_load_balancers() if self.project in str(n.name))
+      return next(n.name for n in self.conn_elb.get_all_load_balancers() if self.project in str(n.name) and self.env in str(n.name))
     except Exception as e:
       logging.error("Unable to pull down ELB info: {0}".format(e))
       exit(self.exit_error_code)
@@ -160,35 +163,35 @@ def wait_for_new_instances(self, instance_ids, retry=10, wait_time=30):
           else:
             logging.info("{0} is in a healthy state. Moving on...".format(instance))
 
-  def lb_healthcheck(self, new_ids, retry=10, wait_time=30):
+  def lb_healthcheck(self, new_ids, attempt=0, wait_time=0):
     ''' Confirm that the healthchecks report back OK in the LB. '''
-    lb = self.get_lb()
-    inst_length = len(new_ids)
-    for inst_id in range(inst_length):
-      count = 0
-      instance_id = self.conn_elb.describe_instance_health(lb)[inst_id]
-      while instance_id.state != 'InService':
-        logging.warning("Load balancer healthcheck is returning {0} for {1}. Retrying after 10 seconds. Count == {2}".format(instance_id.state, instance_id.instance_id, count))
-        instance_id = self.conn_elb.describe_instance_health(lb)[inst_id]
-        count = (count + 1)
-        if instance_id.state != 'InService' and (count >= retry):
-          logging.error("Load balancer healthcheck returning {0} for {1} and has exceeded the timeout threshold set. Please roll back.".format(instance_id.state, instance_id.instance_id)) 
-          self.revert_deployment()
-        sleep(wait_time)
-      logging.info("ELB healthcheck OK == {0}: {1}".format(instance_id.instance_id, instance_id.state))
+    try:
+      attempt += 1
+      if attempt > self.MAX_RETRIES:
+        logging.error('Load balancer healthcheck has exceeded the timeout threshold. Rolling back.')
+        self.revert_deployment()
+      sleep(wait_time)
+      instance_ids = self.conn_elb.describe_instance_health(self.load_balancer, new_ids)
+      status = filter(lambda instance: instance.state != "InService", instance_ids)
+      if status:
+        logging.info('Must check load balancer again. Following instance(s) are not "InService": {0}'.format(status))
+        return self.lb_healthcheck(new_ids, attempt=attempt, wait_time=30)
+    except Exception as e:
+      logging.error('Failed to health check load balancer instance states. Error: {0}'.format(e))
+      self.revert_deployment()
+    logging.info('ELB healthcheck OK')
     return True
 
   def confirm_lb_has_only_new_instances(self, wait_time=60):
     ''' Confirm that only new instances with the current build tag are in the load balancer '''
     sleep(wait_time) # Allotting time for the instances to shut down
-    lb = self.get_lb()
-    instance_ids = self.conn_elb.describe_instance_health(lb)
+    instance_ids = self.conn_elb.describe_instance_health(self.load_balancer)
     for instance in instance_ids:
       build = self.conn_ec2.get_all_reservations(instance.instance_id)[0].instances[0].tags['BUILD']
       if build != self.buildNum:
         logging.error("There is still an old instance in the ELB: {0}. Please investigate".format(instance))
         exit(self.exit_error_code)
-    logging.info("Deployed instances {0} to ELB: {1}".format(instance_ids, lb))
+    logging.info("Deployed instances {0} to ELB: {1}".format(instance_ids, self.load_balancer))
     return instance_ids
 
   def tag_ami(self, ami_id, env):
@@ -239,11 +242,13 @@ def revert_deployment(self): #pragma: no cover
     group_name = self.get_autoscale_group_name()
     new_instance_ids = self.gather_instance_info(group_name)
     for instance_id in new_instance_ids:
-      self.conn_auto.terminate_instance(instance_id, decrement_capacity=True)
-      logging.info("Removed {0} from autoscale group".format(instance_id))
+      try:
+        self.conn_auto.terminate_instance(instance_id, decrement_capacity=True)
+        logging.info("Removed {0} from autoscale group".format(instance_id))
+      except:
+        logging.warning('Failed to remove instance: {0}.'.format(instance_id))
     logging.error("REVERT COMPLETE!")
     exit(self.exit_error_code)
-
 
 def get_args(): # pragma: no cover
   parser = argparse.ArgumentParser()

diff --git a/tests/rolling_deploy_test.py b/tests/rolling_deploy_test.py
@@ -10,6 +10,7 @@
 from moto import mock_elb
 from License2Deploy.rolling_deploy import RollingDeploy
 from License2Deploy.AWSConn import AWSConn
+import sys
 
 class RollingDeployTest(unittest.TestCase):
 
@@ -24,6 +25,7 @@ class RollingDeployTest(unittest.TestCase):
   @mock_elb
   @mock_ec2
   def setUp(self):
+    self.setUpELB()
     self.rolling_deploy = RollingDeploy('stg', 'server-gms-extender', '0', 'ami-abcd1234', None, './regions.yml')
 
   def get_autoscaling_configurations(self, launch_configuration_name, autoscaling_group_name):
@@ -59,13 +61,14 @@ def setUpAutoScaleGroup(self, configurations):
       conn.create_auto_scaling_group(group)
 
   @mock_elb
-  def setUpELB(self):
+  def setUpELB(self, env='stg'):
     conn_elb = boto.connect_elb()
     zones = ['us-east-1a']
     ports = [(80, 8080, 'http')]
-    conn_elb.create_load_balancer('servergmsextenderELBstg', zones, ports)
-    balancers = conn_elb.get_all_load_balancers(load_balancer_names=['servergmsextenderELBstg'])
-    self.assertEqual(balancers[0].name, 'servergmsextenderELBstg')
+    load_balancer_name = 'servergmsextenderELB{0}'.format(env)
+    conn_elb.create_load_balancer(load_balancer_name, zones, ports)
+    balancers = conn_elb.get_all_load_balancers(load_balancer_names=[load_balancer_name])
+    self.assertEqual(balancers[0].name, load_balancer_name)
 
   @mock_ec2
   @mock_elb
@@ -135,11 +138,15 @@ def test_get_lb(self):
     self.setUpELB()
     self.assertEqual(u'servergmsextenderELBstg', self.rolling_deploy.get_lb()) #Return All LB's with the proper build number
 
+  # assertRaises is a context manager since Python 2.7. Only testing in Python 2.7
+  # https://docs.python.org/2.7/library/unittest.html
   @mock_elb
   def test_get_lb_failure(self):
-    self.setUpELB()
-    self.rolling_deploy = RollingDeploy('stg', 'fake-server-gms-extender', '0', 'bad', 'server-deploy', './regions.yml') #Need for exception
-    self.assertRaises(SystemExit, lambda: self.rolling_deploy.get_lb()) #Will raise exception because name can't be found
+    if sys.version_info >= (2, 7):
+      self.setUpELB()
+      with self.assertRaises(SystemExit) as rolling_deploy:
+        RollingDeploy('stg', 'fake-server-gms-extender', '0', 'bad', 'server-deploy', './regions.yml')
+      self.assertEqual(2, rolling_deploy.exception.code)
 
   @mock_ec2
   @mock_elb
@@ -173,7 +180,9 @@ def test_get_autoscale_group_name_stg(self):
     self.assertNotEqual(group, self.GMS_AUTOSCALING_GROUP_PRD)
 
   @mock_autoscaling
+  @mock_elb
   def test_get_autoscale_group_name_prd(self):
+    self.setUpELB(env='prd')
     self.rolling_deploy = RollingDeploy('prd', 'server-gms-extender', '0', 'ami-test212', None, './regions.yml')
     autoscaling_configurations = list()
     autoscaling_configurations.append(self.get_autoscaling_configurations(self.GMS_LAUNCH_CONFIGURATION_STG, self.GMS_AUTOSCALING_GROUP_STG))
@@ -208,7 +217,6 @@ def test_get_instance_ip_addrs(self):
   def test_get_all_instance_ids(self):
     self.setUpAutoScaleGroup([self.get_autoscaling_configurations(self.GMS_LAUNCH_CONFIGURATION_STG, self.GMS_AUTOSCALING_GROUP_STG)])
     conn = boto.connect_ec2()
-    instance_id_list = []
     reservation = conn.run_instances('ami-1234abcd', min_count=2, private_ip_address="10.10.10.10")
     instance_ids = reservation.instances
     rslt = self.rolling_deploy.get_all_instance_ids(self.GMS_AUTOSCALING_GROUP_STG)