From 1b60f0614684d547cd1d3051a2825992c9eae098 Mon Sep 17 00:00:00 2001
From: Zhang <zhangni@dnb.com>
Date: Fri, 1 Jul 2016 13:59:22 -0500
Subject: [PATCH] INFRASYS-7453: re-implement wait times

---
 License2Deploy/rolling_deploy.py | 86 +++++++++++++++++++-------------
 README.md                        | 13 +++++
 setup.py                         |  6 ++-
 tests/rolling_deploy_test.py     |  6 +--
 4 files changed, 70 insertions(+), 41 deletions(-)

diff --git a/License2Deploy/rolling_deploy.py b/License2Deploy/rolling_deploy.py
index 496066e..475db01 100644
--- a/License2Deploy/rolling_deploy.py
+++ b/License2Deploy/rolling_deploy.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 import logging
 import argparse
@@ -6,6 +6,7 @@
 from time import sleep, time
 from AWSConn import AWSConn
 from set_logging import SetLogging
+from retry.api import retry_call
 
 class RollingDeploy(object):
 
@@ -19,7 +20,10 @@ def __init__(self,
                profile_name=None,
                regions_conf=None,
                stack_name=None,
-               session=None):
+               session=None,
+               creation_wait=[10, 60],
+               ready_wait=[10, 30],
+               health_wait=[10, 30]):
     self.env = env
     self.session = session
     self.project = project.replace('-','')
@@ -39,6 +43,9 @@ def __init__(self,
     self.cloudformation_client = AWSConn.get_boto3_client('cloudformation', self.region, self.profile_name, session)
     self.exit_error_code = 2
     self.load_balancer = False
+    self.creation_wait = creation_wait
+    self.ready_wait = ready_wait
+    self.health_wait = health_wait
 
   def get_ami_id_state(self, ami_id):
     try:
@@ -160,14 +167,11 @@ def get_instance_ids_by_requested_build_tag(self, id_list, build):
       new_instances += [instance_id for new_id in instances_build_tags if new_id.tags['BUILD'] == str(build)]
 
     if not new_instances:
-      logging.error("There are no instances in the group with build number {0}. Please ensure AMI was promoted.\nInstance ID List: {1}".format(build, id_list))
-      group_name = self.get_autoscale_group_name()
-      self.set_autoscale_instance_desired_count(self.calculate_autoscale_desired_instance_count(group_name, 'decrease'), group_name)
-      exit(self.exit_error_code)
-
-    id_ip_dict = self.get_instance_ip_addrs(new_instances)
-    logging.info("New Instance List with IP Addresses: {0}".format(id_ip_dict))
-    return new_instances
+      raise Exception('There are no instances in the group with build number {0}'.format(self.build_number))
+    else:
+      ip_dict = self.get_instance_ip_addrs(new_instances)
+      logging.info("New Instance List with IP Addresses: {0}".format(ip_dict))
+      return new_instances
 
   def wait_for_new_instances(self, instance_ids, retry=10, wait_time=30):
     ''' Monitor new instances that come up and wait until they are ready '''
@@ -188,24 +192,15 @@ def wait_for_new_instances(self, instance_ids, retry=10, wait_time=30):
           else:
             logging.info("{0} is in a healthy state. Moving on...".format(instance))
 
-  def lb_healthcheck(self, new_ids, attempt=0, wait_time=0):
+  def lb_healthcheck(self, new_ids):
     ''' Confirm that the healthchecks report back OK in the LB. '''
-    try:
-      attempt += 1
-      if attempt > self.MAX_RETRIES:
-        logging.error('Load balancer healthcheck has exceeded the timeout threshold. Rolling back.')
-        self.revert_deployment()
-      sleep(wait_time)
-      instance_ids = self.conn_elb.describe_instance_health(self.load_balancer, new_ids)
-      status = filter(lambda instance: instance.state != "InService", instance_ids)
-      if status:
-        logging.info('Must check load balancer again. Following instance(s) are not "InService": {0}'.format(status))
-        return self.lb_healthcheck(new_ids, attempt=attempt, wait_time=30)
-    except Exception as e:
-      logging.error('Failed to health check load balancer instance states. Error: {0}'.format(e))
-      self.revert_deployment()
-    logging.info('ELB healthcheck OK')
-    return True
+    instance_ids = self.conn_elb.describe_instance_health(self.load_balancer, new_ids)
+    status = filter(lambda instance: instance.state != "InService", instance_ids)
+    if status:
+      raise Exception('Must check load balancer again. Following instance(s) are not "InService": {0}'.format(status))
+    else:
+      logging.info('ELB healthcheck OK')
+      return True
 
   def confirm_lb_has_only_new_instances(self, wait_time=60):
     ''' Confirm that only new instances with the current build tag are in the load balancer '''
@@ -238,14 +233,32 @@ def tag_ami(self, ami_id, env):
 
   def gather_instance_info(self, group): #pragma: no cover
     instance_ids = self.get_all_instance_ids(group)
+    logging.info("Instance ID List: {0}".format(instance_ids))
     new_instance_ids = self.get_instance_ids_by_requested_build_tag(instance_ids, self.build_number)
     return new_instance_ids
 
-  def healthcheck_new_instances(self, group_name): # pragma: no cover
-    ''' Healthchecking new instances to ensure deployment was successful '''
-    new_instance_ids = self.gather_instance_info(group_name)
-    self.wait_for_new_instances(new_instance_ids) #Wait for new instances to be up and ready
-    self.lb_healthcheck(new_instance_ids) #Once instances are ready, healthcheck. If successful, decrease desired count.
+  def launch_new_instances(self, group_name): # pragma: no cover
+    # step 1: wait for ec2 creating instances
+    try:
+      logging.info("Trying for maximum 10 minutes to allow for instances to be created.")
+      new_instance_ids = retry_call(self.gather_instance_info, fargs=[group_name], tries=self.creation_wait[0], delay=self.creation_wait[1], logger=logging)
+    except Exception as e:
+      logging.error("There are no instances in the group with build number {0}. Please ensure AMI was promoted.".format(self.build_number))
+      group_name = self.get_autoscale_group_name()
+      self.set_autoscale_instance_desired_count(self.calculate_autoscale_desired_instance_count(group_name, 'decrease'), group_name)
+      exit(self.exit_error_code)
+
+    # step 2: waiting for instances coming up and ready
+    logging.info("Waiting maximum 5 minutes for instances to be ready.")
+    self.wait_for_new_instances(new_instance_ids, self.ready_wait[0], self.ready_wait[1]) #Wait for new instances to be up and ready
+
+    # step 3: waiting for instance health check to be completed
+    try:
+      logging.info("Trying for maximum 5 minutes to health-check all instances.")
+      retry_call(self.lb_healthcheck, fargs=[new_instance_ids], tries=self.health_wait[0], delay=self.health_wait[1], logger=logging)
+    except Exception as e:
+      logging.error('Load balancer healthcheck has exceeded the timeout threshold. Rolling back.')
+      self.revert_deployment()
 
   def retrieve_project_cloudwatch_alarms(self):
     """ Retrieve all the Cloud-Watch alarms for the given project and environment """
@@ -290,9 +303,7 @@ def deploy(self): # pragma: no cover
     logging.info("Build #: {0} ::: Autoscale Group: {1}".format(self.build_number, group_name))
     self.disable_project_cloudwatch_alarms()
     self.set_autoscale_instance_desired_count(self.calculate_autoscale_desired_instance_count(group_name, 'increase'), group_name)
-    logging.info("Sleeping for 240 seconds to allow for instances to spin up")
-    sleep(240) #Need to wait until the instances come up in the load balancer
-    self.healthcheck_new_instances(group_name)
+    self.launch_new_instances(group_name)
     self.set_autoscale_instance_desired_count(self.calculate_autoscale_desired_instance_count(group_name, 'decrease'), group_name)
     self.confirm_lb_has_only_new_instances()
     self.tag_ami(self.ami_id, self.env)
@@ -322,12 +333,15 @@ def get_args(): # pragma: no cover
   parser.add_argument('-P', '--profile', default='default', action='store', dest='profile', help='Profile name as designated in aws credentials/config files', type=str)
   parser.add_argument('-c', '--config', default='/opt/License2Deploy/regions.yml', action='store', dest='config', help='Config file Location, eg. /opt/License2Deploy/regions.yml', type=str)
   parser.add_argument('-s', '--stack', action='store', dest='stack_name', help='Stack name if AutoScaling Group created via CloudFormation', type=str)
+  parser.add_argument('-C', '--creation-wait', action='store', dest='creation_wait', help='Wait time for ec2 instance creation', type=int, nargs=2, default=[10, 60])
+  parser.add_argument('-r', '--ready-wait', action='store', dest='ready_wait', help='Wait time for ec2 instance to be ready', type=int, nargs=2, default=[10, 30])
+  parser.add_argument('-H', '--health-wait', action='store', dest='health_wait', help='Wait time for ec2 instance health check', type=int, nargs=2, default=[10, 30])
   return parser.parse_args()
 
 def main(): # pragma: no cover
   args = get_args()
   SetLogging.setup_logging()
-  deployObj = RollingDeploy(args.env, args.project, args.build_number, args.ami_id, args.profile, args.config, args.stack_name)
+  deployObj = RollingDeploy(args.env, args.project, args.build_number, args.ami_id, args.profile, args.config, args.stack_name, None, args.creation_wait, args.ready_wait, args.health_wait)
   deployObj.deploy()
   
 if __name__ == "__main__": # pragma: no cover
diff --git a/README.md b/README.md
index c708fcd..a18ab5e 100644
--- a/README.md
+++ b/README.md
@@ -22,6 +22,7 @@ Usage
 ```
 usage: rolling_deploy.py [-h] -e ENV -p PROJECT -b BUILD_NUM -a AMI_ID
                          [-P PROFILE] [-c CONFIG] [-s STACK_NAME]
+                         [-C CREATION_WAIT] [-r READY_WAIT] [-H HEALTH_WAIT]
 
 optional arguments:
   -h, --help            show this help message and exit
@@ -41,6 +42,18 @@ optional arguments:
                         /opt/License2Deploy/config.yml
   -s STACK_NAME, --stack STACK_NAME
                         Stack name if AutoScaling Group created via CloudFormation
+  -C CREATION_WAIT, --creation-wait CREATION_WAIT
+                        Time to wait for EC2 instances to be created
+                        (# of tries, interval of each try in seconds), default (10, 60)
+                        e.g. -C 10 60
+  -r READY_WAIT, --ready-wait READY_WAIT
+                        Time to wait for EC2 instances to come up and be ready
+                        (# of tries, interval of each try in seconds), default (10, 30)
+                        e.g. -r 10 30
+  -H HEALTH_WAIT, --health-wait HEALTH_WAIT
+                        Time to wait for EC2 instances to be health checked
+                        (# of tries, interval of each try in seconds), default (10, 30)
+                        e.g. -H 10 30
 ```
 Requirements
 ==================
diff --git a/setup.py b/setup.py
index f3cbc57..2167f43 100644
--- a/setup.py
+++ b/setup.py
@@ -9,7 +9,8 @@
     "boto",
     "PyYaml",
     "argparse",
-    'boto3'
+    'boto3',
+    'retry'
   ]
 
 tests_require = [
@@ -18,7 +19,8 @@
     "moto",
     "PyYaml",
     'placebo',
-    'boto3'
+    'boto3',
+    'retry'
   ]
 
 def read(fname):
diff --git a/tests/rolling_deploy_test.py b/tests/rolling_deploy_test.py
index 9ef830e..4571161 100644
--- a/tests/rolling_deploy_test.py
+++ b/tests/rolling_deploy_test.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 import unittest
 import boto
@@ -310,12 +310,12 @@ def test_get_instance_ids_by_requested_build_tag(self):
          if [y for y in name.tags if y == 'BUILD' and name.tags['BUILD'] == '0']:
            new_inst.append(name.id)
     self.assertEqual(len(self.rolling_deploy.get_instance_ids_by_requested_build_tag(new_inst, 0)), 2)
-    self.assertRaises(SystemExit, lambda: self.rolling_deploy.get_instance_ids_by_requested_build_tag(new_inst, 1))
+    self.assertRaises(Exception, lambda: self.rolling_deploy.get_instance_ids_by_requested_build_tag(new_inst, 1))
 
   @mock_ec2
   def test_get_instance_ids_by_requested_build_tag_failure(self):
     self.setUpEC2()
-    self.assertRaises(SystemExit, lambda: self.rolling_deploy.get_instance_ids_by_requested_build_tag([], 0))
+    self.assertRaises(Exception, lambda: self.rolling_deploy.get_instance_ids_by_requested_build_tag([], 0))
 
   @mock_autoscaling
   def test_set_autoscale_instance_desired_count(self):