From 0973b538aea27e4ef902afa25b5b804a99b1ef45 Mon Sep 17 00:00:00 2001 From: micafer Date: Fri, 10 Mar 2017 12:03:37 +0100 Subject: [PATCH 1/2] Revert fix to #233 --- IM/InfrastructureManager.py | 250 +++++++++++++++--------------------- doc/source/client.rst | 4 +- test/unit/test_im_logic.py | 51 ++++---- 3 files changed, 128 insertions(+), 177 deletions(-) diff --git a/IM/InfrastructureManager.py b/IM/InfrastructureManager.py index 727988149..6f8400fbc 100644 --- a/IM/InfrastructureManager.py +++ b/IM/InfrastructureManager.py @@ -149,152 +149,86 @@ def root(n): return deploy_groups @staticmethod - def _launch_vm(sel_inf, task, deploy_group, auth, deployed_vm, exceptions, cloud_with_errors): - """ - Launch a VM in a cloud provider. - In case of failure it will try with the next provider defined (if any) - """ + def _launch_group(sel_inf, deploy_group, deploys_group_cloud_list, cloud_list, concrete_systems, + radl, auth, deployed_vm, cancel_deployment): + """Launch a group of deploys together.""" + + if not deploy_group: + InfrastructureManager.logger.warning("No VMs to deploy!") + return + if not deploys_group_cloud_list: + cancel_deployment.append(Exception("No cloud provider available")) + return all_ok = False - # Each task_cloud represents a task to launch the VM in a cloud provider - # if some fails we will try to use the next one - for task_cloud in task: - cloud, deploy, launch_radl, requested_radl, remain_vm, vm_type, t_num = task_cloud - - if id(deploy_group) in cloud_with_errors and cloud.cloud.id in cloud_with_errors[id(deploy_group)]: - InfrastructureManager.logger.debug("Cloud %s has failed for this deployment group. " - "Do not use it to launch other VMs of the same group") - continue + exceptions = [] + for cloud_id in deploys_group_cloud_list: + cloud = cloud_list[cloud_id] + all_ok = True + for deploy in deploy_group: + remain_vm, fail_cont = deploy.vm_number, 0 + while (remain_vm > 0 and fail_cont < Config.MAX_VM_FAILS and not cancel_deployment): + concrete_system = concrete_systems[cloud_id][deploy.id][0] + if not concrete_system: + InfrastructureManager.logger.error( + "Error, no concrete system to deploy: " + deploy.id + " in cloud: " + + cloud_id + ". Check if a correct image is being used") + exceptions.append("Error, no concrete system to deploy: " + deploy.id + + " in cloud: " + cloud_id + ". Check if a correct image is being used") + break - fail_cont = 0 - while (remain_vm > 0 and fail_cont < Config.MAX_VM_FAILS): - InfrastructureManager.logger.debug("Launching %d VMs of type %s" % (remain_vm, vm_type)) - try: - launched_vms = cloud.cloud.getCloudConnector(sel_inf).launch( - sel_inf, launch_radl, requested_radl, remain_vm, auth) - all_ok = True - except Exception as e: - all_ok = False - InfrastructureManager.logger.exception("Error launching some of the VMs: %s" % e) - exceptions[deploy][t_num] = ("Error launching the VMs of type %s to cloud ID %s" - " of type %s. Cloud Provider Error: %s" % (vm_type, - cloud.cloud.id, - cloud.cloud.type, - e)) - launched_vms = [] - - for success, launched_vm in launched_vms: - if success: - InfrastructureManager.logger.debug("VM successfully launched: %s" % str(launched_vm.id)) - deployed_vm.setdefault(deploy, []).append(launched_vm) - deploy.cloud_id = cloud.cloud.id - remain_vm -= 1 - else: - all_ok = False - InfrastructureManager.logger.warn("Error launching some of the VMs: %s" % str(launched_vm)) - exceptions[deploy][t_num] = ("Error launching the VMs of type %s to cloud ID %s of type %s. " - "%s" % (vm_type, cloud.cloud.id, - cloud.cloud.type, str(launched_vm))) - if not isinstance(launched_vm, (str, unicode)): - cloud.finalize(launched_vm, auth) - - fail_cont += 1 - if remain_vm > 0 and fail_cont >= Config.MAX_VM_FAILS: - if id(deploy_group) not in cloud_with_errors: - cloud_with_errors[id(deploy_group)] = [] - cloud_with_errors[id(deploy_group)].append(cloud.cloud.id) + (username, _, _, _) = concrete_system.getCredentialValues() + if not username: + raise IncorrectVMCrecentialsException( + "No username for deploy: " + deploy.id) + launch_radl = radl.clone() + launch_radl.systems = [concrete_system.clone()] + requested_radl = radl.clone() + requested_radl.systems = [radl.get_system_by_name(concrete_system.name)] + try: + InfrastructureManager.logger.debug( + "Launching %d VMs of type %s" % (remain_vm, concrete_system.name)) + launched_vms = cloud.cloud.getCloudConnector(sel_inf).launch( + sel_inf, launch_radl, requested_radl, remain_vm, auth) + except Exception, e: + InfrastructureManager.logger.exception("Error launching some of the VMs: %s" % e) + exceptions.append("Error launching the VMs of type %s to cloud ID %s" + " of type %s. Cloud Provider Error: %s" % (concrete_system.name, + cloud.cloud.id, + cloud.cloud.type, e)) + launched_vms = [] + for success, launched_vm in launched_vms: + if success: + InfrastructureManager.logger.debug( + "VM successfully launched: " + str(launched_vm.id)) + deployed_vm.setdefault( + deploy, []).append(launched_vm) + deploy.cloud_id = cloud_id + remain_vm -= 1 + else: + InfrastructureManager.logger.warn( + "Error launching some of the VMs: " + str(launched_vm)) + exceptions.append("Error launching the VMs of type %s to cloud ID %s of type %s. %s" % ( + concrete_system.name, cloud.cloud.id, cloud.cloud.type, str(launched_vm))) + if not isinstance(launched_vm, (str, unicode)): + cloud.finalize(launched_vm, auth) + fail_cont += 1 + if remain_vm > 0 or cancel_deployment: + all_ok = False + break if not all_ok: - # Something has failed, finalize the VMs created and try with other cloud provider (if avail) for deploy in deploy_group: for vm in deployed_vm.get(deploy, []): vm.finalize(auth) deployed_vm[deploy] = [] - else: - # All was OK so do not try with other cloud provider + if cancel_deployment or all_ok: break - - @staticmethod - def _launch_groups(sel_inf, deploy_groups, deploys_group_cloud_list_all, cloud_list, concrete_systems, radl, auth): - """Launch all groups of deploys together.""" - deployed_vm = {} - cancel_deployment = [] - try: - tasks = [] - cloud_with_errors = {} - exceptions = {} - for deploy_group in deploy_groups: - deploys_group_cloud_list = deploys_group_cloud_list_all[id(deploy_group)] - if not deploy_group: - InfrastructureManager.logger.warning("No VMs to deploy!") - return deployed_vm, cancel_deployment - if not deploys_group_cloud_list: - cancel_deployment.append(Exception("No cloud provider available")) - return deployed_vm, cancel_deployment - for deploy in deploy_group: - exceptions[deploy] = [] - task_cloud = [] - if deploy.vm_number > 0: - for cloud_id in deploys_group_cloud_list: - t_num = len(exceptions[deploy]) - exceptions[deploy].append(None) - cloud = cloud_list[cloud_id] - concrete_system = concrete_systems[cloud_id][deploy.id][0] - if not concrete_system: - InfrastructureManager.logger.error( - "Error, no concrete system to deploy: " + deploy.id + " in cloud: " + - cloud_id + ". Check if a correct image is being used") - exceptions[deploy][t_num] = ("Error, no concrete system to deploy: " + - deploy.id + " in cloud: " + cloud_id + - ". Check if a correct image is being used") - break - - (username, _, _, _) = concrete_system.getCredentialValues() - if not username: - raise IncorrectVMCrecentialsException("No username for deploy: " + deploy.id) - - launch_radl = radl.clone() - launch_radl.systems = [concrete_system.clone()] - requested_radl = radl.clone() - requested_radl.systems = [radl.get_system_by_name(concrete_system.name)] - task_cloud.append((cloud, deploy, launch_radl, requested_radl, - deploy.vm_number, concrete_system.name, t_num)) - else: - InfrastructureManager.logger.debug("deploy %s with 0 num. Ignoring." % deploy.id) - - if task_cloud: - tasks.append(task_cloud) - - if Config.MAX_SIMULTANEOUS_LAUNCHES > 1: - pool = ThreadPool(processes=Config.MAX_SIMULTANEOUS_LAUNCHES) - pool.map( - lambda task: InfrastructureManager._launch_vm(sel_inf, task, deploy_group, auth, - deployed_vm, exceptions, - cloud_with_errors), tasks) - pool.close() - else: - for task in tasks: - InfrastructureManager._launch_vm(sel_inf, task, deploy_group, auth, - deployed_vm, exceptions, - cloud_with_errors) - - # check the errors per deploy - for deploy_group in deploy_groups: - for deploy in deploy_group: - if deploy not in deployed_vm or not deployed_vm[deploy]: - # Some error launching VMs - if exceptions[deploy]: - msg = "" - for i, e in enumerate(exceptions[deploy]): - msg += "Attempt %d: %s\n" % (i + 1, str(e)) - cancel_deployment.append(Exception("All machines could not be launched: \n%s" % msg)) - - except Exception as e: - # Please, avoid exception to arrive to this level, because some virtual - # machine may lost - InfrastructureManager.logger.exception("Error launching deploy group.") - cancel_deployment.append(e) - - return deployed_vm, cancel_deployment + if not all_ok and not cancel_deployment: + msg = "" + for i, e in enumerate(exceptions): + msg += "Attempt " + str(i + 1) + ": " + str(e) + "\n" + cancel_deployment.append( + Exception("All machines could not be launched: \n%s" % msg)) @staticmethod def get_infrastructure(inf_id, auth): @@ -547,9 +481,13 @@ def AddResource(inf_id, radl_data, auth, context=True, failed_clouds=[]): # Sort by score the cloud providers # NOTE: consider fake deploys (vm_number == 0) deploys_group_cloud_list = {} + # reverse the list to use the reverse order in the sort function + # list of ordered clouds + + ordered_cloud_list = [c.id for c in CloudInfo.get_cloud_list(auth)] + ordered_cloud_list.reverse() for deploy_group in deploy_groups: - suggested_cloud_ids = list( - set([d.cloud_id for d in deploy_group if d.cloud_id])) + suggested_cloud_ids = list(set([d.cloud_id for d in deploy_group if d.cloud_id])) if len(suggested_cloud_ids) > 1: raise Exception("Two deployments that have to be launched in the same cloud provider " "are asked to be deployed in different cloud providers: %s" % deploy_group) @@ -574,21 +512,33 @@ def AddResource(inf_id, radl_data, auth, context=True, failed_clouds=[]): total += 1 scored_clouds.append((cloud_id, total)) - ordered_cloud_list = [c.id for c in CloudInfo.get_cloud_list(auth)] - # reverse the list to use the reverse order in the sort function - ordered_cloud_list.reverse() # Order the clouds first by the score and then using the cloud # order in the auth data - sorted_scored_clouds = sorted(scored_clouds, key=lambda x: ( - x[1], ordered_cloud_list.index(x[0])), reverse=True) - deploys_group_cloud_list[id(deploy_group)] = [ - c[0] for c in sorted_scored_clouds] + sorted_scored_clouds = sorted(scored_clouds, + key=lambda x: (x[1], ordered_cloud_list.index(x[0])), + reverse=True) + deploys_group_cloud_list[id(deploy_group)] = [c[0] for c in sorted_scored_clouds] # Launch every group in the same cloud provider - deployed_vm, cancel_deployment = InfrastructureManager._launch_groups(sel_inf, deploy_groups, - deploys_group_cloud_list, - cloud_list, concrete_systems, - radl, auth) + deployed_vm = {} + cancel_deployment = [] + try: + if Config.MAX_SIMULTANEOUS_LAUNCHES > 1: + pool = ThreadPool(processes=Config.MAX_SIMULTANEOUS_LAUNCHES) + pool.map( + lambda ds: InfrastructureManager._launch_group(sel_inf, ds, deploys_group_cloud_list[id(ds)], + cloud_list, concrete_systems, radl, auth, + deployed_vm, cancel_deployment), deploy_groups) + pool.close() + else: + for ds in deploy_groups: + InfrastructureManager._launch_group(sel_inf, ds, deploys_group_cloud_list[id(ds)], + cloud_list, concrete_systems, radl, + auth, deployed_vm, cancel_deployment) + except Exception, e: + # Please, avoid exception to arrive to this level, because some virtual + # machine may lost. + cancel_deployment.append(e) # We make this to maintain the order of the VMs in the sel_inf.vm_list # according to the deploys shown in the RADL diff --git a/doc/source/client.rst b/doc/source/client.rst index 5d82fff35..537384175 100644 --- a/doc/source/client.rst +++ b/doc/source/client.rst @@ -165,8 +165,8 @@ keys are: used as the label in the *deploy* section in the RADL. * ``subscription_id`` indicates the subscription_id name associated to the credential. - This field is only used in the Azure and Azure Classic plugins. To create a user to use the Azure - plugi check the documentation of the Azure python SDK: + This field is only used in the Azure and Azure Classic plugins. To create a user to use the Azure (ARM) + plugin check the documentation of the Azure python SDK: `here `_ OpenStack additional fields diff --git a/test/unit/test_im_logic.py b/test/unit/test_im_logic.py index 82339c944..72a9a3ba8 100755 --- a/test/unit/test_im_logic.py +++ b/test/unit/test_im_logic.py @@ -274,17 +274,18 @@ def test_inf_creation_errors(self): network publica (outbound = 'yes') network privada () system front ( - cpu.arch='x86_64' and - cpu.count>=1 and - memory.size>=512m and net_interface.0.connection = 'publica' and net_interface.1.connection = 'privada' and disk.0.image.url = ['one://localhost/image', 'http://localhost:443/image'] and - disk.0.os.credentials.username = 'ubuntu' and - disk.0.os.credentials.password = 'yoyoyo' and - disk.0.os.name = 'linux' + disk.0.os.credentials.username = 'ubuntu' + ) + system wn ( + net_interface.0.connection = 'privada' and + disk.0.image.url = ['one://localhost/image', 'http://localhost:443/image'] and + disk.0.os.credentials.username = 'ubuntu' ) deploy front 1 + deploy wn 2 """ # this case must fail only with one error @@ -308,10 +309,10 @@ def test_inf_creation_errors(self): 'password': 'tests'}]) with self.assertRaises(Exception) as ex: IM.CreateInfrastructure(radl, auth0) - self.assertEqual(str(ex.exception), 'Some deploys did not proceed successfully: ' - 'All machines could not be launched: \n' - 'Attempt 1: Error launching the VMs of type front to cloud ID one of type OpenNebula.' - ' Cloud Provider Error: [Errno 111] Connection refused\n' + self.assertEqual(str(ex.exception), + 'Some deploys did not proceed successfully: All machines could not be launched: \n' + 'Attempt 1: Error launching the VMs of type front to cloud ID one of type OpenNebula. ' + 'Cloud Provider Error: [Errno 111] Connection refused\n' 'Attempt 2: Error, no concrete system to deploy: front in cloud: ost. ' 'Check if a correct image is being used\n\n') @@ -324,13 +325,13 @@ def test_inf_creation_errors(self): 'password': 'tests'}]) with self.assertRaises(Exception) as ex: IM.CreateInfrastructure(radl, auth0) - self.assertEqual(str(ex.exception), 'Some deploys did not proceed successfully: ' - 'All machines could not be launched: \n' - 'Attempt 1: Error launching the VMs of type front to cloud ID occi of type OCCI.' - ' Cloud Provider Error: Error getting os_tpl scheme. ' - 'Check that the image specified is supported in the OCCI server.\n' - 'Attempt 2: Error launching the VMs of type front to cloud ID one of type OpenNebula.' - ' Cloud Provider Error: [Errno 111] Connection refused\n\n') + self.assertIn(str(ex.exception), + 'Some deploys did not proceed successfully: All machines could not be launched: \n' + 'Attempt 1: Error launching the VMs of type front to cloud ID occi of type OCCI. ' + 'Cloud Provider Error: Error getting os_tpl scheme. ' + 'Check that the image specified is supported in the OCCI server.\n' + 'Attempt 2: Error launching the VMs of type front to cloud ID one of type OpenNebula. ' + 'Cloud Provider Error: [Errno 111] Connection refused\n\n') # this case must work OK auth0 = Authentication([{'id': 'ost', 'type': 'OpenStack', 'username': 'user', @@ -479,13 +480,11 @@ def test_inf_addresources3(self, suds_cli): n0, n1 = 2, 5 # Machines to deploy radl = RADL() radl.add(system("s0", [Feature("disk.0.image.url", "=", "mock0://linux.for.ev.er"), - SoftFeatures( - 10, [Feature("memory.size", "<=", 500)]), + SoftFeatures(10, [Feature("memory.size", "<=", 500)]), Feature("disk.0.os.credentials.username", "=", "user"), Feature("disk.0.os.credentials.password", "=", "pass")])) radl.add(system("s1", [Feature("disk.0.image.url", "=", "mock0://linux.for.ev.er"), - SoftFeatures( - 10, [Feature("memory.size", ">=", 800)]), + SoftFeatures(10, [Feature("memory.size", ">=", 800)]), Feature("disk.0.os.credentials.username", "=", "user"), Feature("disk.0.os.credentials.password", "=", "pass")])) radl.add(deploy("s0", n0)) @@ -545,7 +544,7 @@ def test_inf_addresources4(self): IM.DestroyInfrastructure(infId, auth0) - def test_inf_addresources5(self): + def test_inf_addresources_parallel(self): """Deploy n independent virtual machines.""" radl = """" @@ -585,7 +584,7 @@ def test_inf_addresources5(self): auth0 = self.getAuth([0], [], [("Mock", 0)]) infId = IM.CreateInfrastructure("", auth0) - # in this case it will take aprox 20 secs + # in this case it will take aprox 15 secs before = int(time.time()) Config.MAX_SIMULTANEOUS_LAUNCHES = 1 vms = IM.AddResource(infId, str(radl), auth0) @@ -608,8 +607,10 @@ def test_inf_addresources5(self): Config.MAX_SIMULTANEOUS_LAUNCHES = 3 # Test the pool vms = IM.AddResource(infId, str(radl), auth0) delay = int(time.time()) - before - self.assertLess(delay, 7) - self.assertGreater(delay, 4) + self.assertLess(delay, 17) + self.assertGreater(delay, 14) + # self.assertLess(delay, 7) + # self.assertGreater(delay, 4) Config.MAX_SIMULTANEOUS_LAUNCHES = 1 self.assertEqual(vms, [6, 7, 8, 9, 10, 11]) From c9b3a3f8354806b91fe6ea70457585a545f66d85 Mon Sep 17 00:00:00 2001 From: micafer Date: Fri, 10 Mar 2017 12:24:51 +0100 Subject: [PATCH 2/2] Minor change --- IM/InfrastructureManager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/IM/InfrastructureManager.py b/IM/InfrastructureManager.py index 6f8400fbc..efff2a93b 100644 --- a/IM/InfrastructureManager.py +++ b/IM/InfrastructureManager.py @@ -190,7 +190,7 @@ def _launch_group(sel_inf, deploy_group, deploys_group_cloud_list, cloud_list, c "Launching %d VMs of type %s" % (remain_vm, concrete_system.name)) launched_vms = cloud.cloud.getCloudConnector(sel_inf).launch( sel_inf, launch_radl, requested_radl, remain_vm, auth) - except Exception, e: + except Exception as e: InfrastructureManager.logger.exception("Error launching some of the VMs: %s" % e) exceptions.append("Error launching the VMs of type %s to cloud ID %s" " of type %s. Cloud Provider Error: %s" % (concrete_system.name, @@ -535,7 +535,7 @@ def AddResource(inf_id, radl_data, auth, context=True, failed_clouds=[]): InfrastructureManager._launch_group(sel_inf, ds, deploys_group_cloud_list[id(ds)], cloud_list, concrete_systems, radl, auth, deployed_vm, cancel_deployment) - except Exception, e: + except Exception as e: # Please, avoid exception to arrive to this level, because some virtual # machine may lost. cancel_deployment.append(e)