From 5e9e3c9afff2f5fe3942d881f6290dc30274d79e Mon Sep 17 00:00:00 2001 From: Frank Berghaus Date: Fri, 2 Aug 2019 11:05:15 +0200 Subject: [PATCH 1/5] Add support for OpenStack instances that require boot volumes The OpenStackNative cloud type now allows the specification of two parameters boot_volume and boot_volume_gb_per_core that instruct cloudscheduler to create a boot volume on instance creation. The size of the boot volume is controlled by the second option. --- cloud_resources.conf | 13 ++++++ cloudscheduler/cloud_management.py | 2 + cloudscheduler/openstackcluster.py | 69 +++++++++++++++++++++++++++--- 3 files changed, 78 insertions(+), 6 deletions(-) diff --git a/cloud_resources.conf b/cloud_resources.conf index b2ca61e5..6c15a633 100644 --- a/cloud_resources.conf +++ b/cloud_resources.conf @@ -23,6 +23,19 @@ # #cloud_type: OpenStack +# Boot Volume: +# Option for OpenStackNative cloud type +# Should be set to True if the cloud interface requires the creation of a boot +# volume that the root filesystem of the instance resides on +# +#boot_volume = True + +# Boot volume size +# To control the boot volume size set a value in GByte per core. If +# not set the volumes will default to 20GByte +# +#boot_volume_gb_per_core = 20 + # Virtual Machine Slots: # The Maximum Number of virtual machines that can be # run on a cluster at a time diff --git a/cloudscheduler/cloud_management.py b/cloudscheduler/cloud_management.py index f390902a..fdc3fe6c 100644 --- a/cloudscheduler/cloud_management.py +++ b/cloudscheduler/cloud_management.py @@ -382,6 +382,8 @@ def _cluster_from_config(cconfig, cluster): keep_alive=keep_alive, user_domain_name=get_or_none(cconfig, cluster, "user_domain_name"), project_domain_name=get_or_none(cconfig, cluster, "project_domain_name") + boot_volume = get_or_none(cconfig, cluster, "boot_volume"), + boot_volume_gb_per_core = get_or_none(cconfig, cluster, "boot_volume_gb_per_core"), ) elif cloud_type.lower() == "azure" and cloudconfig.verify_cloud_conf_azure(cconfig, cluster): return azurecluster.AzureCluster(name = cluster.lower(), diff --git a/cloudscheduler/openstackcluster.py b/cloudscheduler/openstackcluster.py index 55a0e56b..f266d430 100644 --- a/cloudscheduler/openstackcluster.py +++ b/cloudscheduler/openstackcluster.py @@ -37,7 +37,8 @@ def __init__(self, name="Dummy Cluster", cloud_type="Dummy", key_name=None, boot_timeout=None, secure_connection="", regions="", reverse_dns_lookup=False,placement_zone=None, enabled=True, priority=0, cacert=None,keep_alive=0, user_domain_name=None, - project_domain_name=None): + project_domain_name=None, boot_volume=False, + boot_volume_gb_per_core=20): # Call super class's init cluster_tools.ICluster.__init__(self,name=name, host=auth_url, cloud_type=cloud_type, @@ -115,6 +116,8 @@ def vm_create(self, vm_name, vm_type, vm_user, vm_networkassoc, import novaclient.exceptions use_cloud_init = use_cloud_init or config.use_cloud_init nova = self._get_creds_nova_updated() + if boot_volume: + cinder = self._get_creds_cinder() if len(securitygroup) != 0: sec_group = [] for group in securitygroup: @@ -238,14 +241,52 @@ def vm_create(self, vm_name, vm_type, vm_user, vm_networkassoc, netid = [] else: netid = [] - # Need to get the rotating hostname from the google code to use for here. + # Need to get the rotating hostname from the google code to use for here. name = self._generate_next_name() instance = None if name: + log.info("Trying to create VM on %s: " % self.name) try: - instance = nova.servers.create(name=name, image=imageobj, flavor=flavor, key_name=key_name, - availability_zone=self.placement_zone, nics =netid, userdata=user_data, security_groups=sec_group) + if not boot_volume: + instance = nova.servers.create(name=name, + image=imageobj, + flavor=flavor, + key_name=key_name, + availability_zone=self.placement_zone, + nics =netid, + userdata=user_data, + security_groups=sec_group) + else: + bdm = None + log.debug("creating boot volume") + try: + bv_name = "vol-{}".format(name) + if boot_volume_gb_per_core: + bv_size = boot_volume_gb_per_core * cpu_cores + else: + bv_size = 20 + cv = cinder.volumes.create(name=bv_name, + size=bv_size, + imageRef=imageobj.id) + while (cv.status != 'available'): + time.sleep(1) + cv = cinder.volumes.get(cv.id) + cinder.volumes.set_bootable(cv, True) + bdm = {'vda': str(cv.id) + ':::1'} + except Exception as e: + log.error("failed to create boot volume: {}".format(e)) + raise e + log.debug("boot volume creation successful") + instance = nova.servers.create(name=name, + image=imageobj, + flavor=flavor, + key_name=key_name, + block_device_mapping=bdm, + availability_zone=self.placement_zone, + nics=netid, + userdata=user_data, + security_groups=sec_group) #print instance.__dict__ except novaclient.exceptions.OverLimit as e: log.info("Unable to create VM without exceeded quota on %s: %s" % (self.name, e.message)) @@ -293,8 +334,8 @@ def vm_destroy(self, vm, return_resources=True, reason=""): except novaclient.exceptions.NotFound as e: log.error("VM %s not found on %s: removing from CS" % (vm.id, self.name)) except Exception as e: - try: - log.error("Unhandled exception while destroying VM on %s : %s" % (self.name,e)) + try: + log.error("Unhandled exception while destroying VM on %s : %s" % (self.name,e)) return 1 except: log.error("Failed to log exception properly?") @@ -400,6 +441,22 @@ def _get_keystone_session_v3(self): log.debug("Session object for %s created" % self.name) return sess + def _get_creds_cinder(self): + try: + from cinderclient import client as cclient + except Exception as e: + print("Unable to import cinderclient - cannot create boot volumes") + print(e) + sys.exit(1) + try: + cinder = cclient.Client("3", + session=self.session, + region_name=self.regions[0], + timeout=10) + except Exception as e: + log.error("Cannot use cinder on {}: {}".format(self.name, e)) + raise e + return cinder def _find_network(self, name): nova = self._get_creds_nova_updated() From 12a14711a3f44db3f7fabbb7bc50311749947074 Mon Sep 17 00:00:00 2001 From: Frank Berghaus Date: Sat, 3 Aug 2019 17:02:36 +0200 Subject: [PATCH 2/5] Resolve Rolf's review points Fix indentation to prevent infinite loop, remove nested exception, delete volume if creation failed, and remove some unwanted spaces. The volume deletion after instance deletion at the moment is not checked. Remove nested exception Nesting exeptions does not work. Handle proper catching the eception thrown by the cinder client. Note sure if thie works with the import of the cinder cleint exceptions in the conditional. Delete volume if instance creation failed Add a comma on line 384 Remove accidental spaces --- cloudscheduler/cloud_management.py | 2 +- cloudscheduler/openstackcluster.py | 38 ++++++++++++++++-------------- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/cloudscheduler/cloud_management.py b/cloudscheduler/cloud_management.py index fdc3fe6c..0538a706 100644 --- a/cloudscheduler/cloud_management.py +++ b/cloudscheduler/cloud_management.py @@ -381,7 +381,7 @@ def _cluster_from_config(cconfig, cluster): cacert = get_or_none(cconfig, cluster, "cacert"), keep_alive=keep_alive, user_domain_name=get_or_none(cconfig, cluster, "user_domain_name"), - project_domain_name=get_or_none(cconfig, cluster, "project_domain_name") + project_domain_name=get_or_none(cconfig, cluster, "project_domain_name"), boot_volume = get_or_none(cconfig, cluster, "boot_volume"), boot_volume_gb_per_core = get_or_none(cconfig, cluster, "boot_volume_gb_per_core"), ) diff --git a/cloudscheduler/openstackcluster.py b/cloudscheduler/openstackcluster.py index f266d430..01bb65c7 100644 --- a/cloudscheduler/openstackcluster.py +++ b/cloudscheduler/openstackcluster.py @@ -118,6 +118,7 @@ def vm_create(self, vm_name, vm_type, vm_user, vm_networkassoc, nova = self._get_creds_nova_updated() if boot_volume: cinder = self._get_creds_cinder() + from cinderclient import exceptions as ccexceptions if len(securitygroup) != 0: sec_group = [] for group in securitygroup: @@ -260,23 +261,19 @@ def vm_create(self, vm_name, vm_type, vm_user, vm_networkassoc, else: bdm = None log.debug("creating boot volume") - try: - bv_name = "vol-{}".format(name) - if boot_volume_gb_per_core: - bv_size = boot_volume_gb_per_core * cpu_cores - else: - bv_size = 20 - cv = cinder.volumes.create(name=bv_name, - size=bv_size, - imageRef=imageobj.id) - while (cv.status != 'available'): - time.sleep(1) + bv_name = "vol-{}".format(name) + if boot_volume_gb_per_core: + bv_size = boot_volume_gb_per_core * cpu_cores + else: + bv_size = 20 + cv = cinder.volumes.create(name=bv_name, + size=bv_size, + imageRef=imageobj.id) + while (cv.status != 'available'): + time.sleep(1) cv = cinder.volumes.get(cv.id) - cinder.volumes.set_bootable(cv, True) - bdm = {'vda': str(cv.id) + ':::1'} - except Exception as e: - log.error("failed to create boot volume: {}".format(e)) - raise e + cinder.volumes.set_bootable(cv, True) + bdm = {'vda': str(cv.id) + ':::1'} log.debug("boot volume creation successful") instance = nova.servers.create(name=name, image=imageobj, @@ -290,9 +287,14 @@ def vm_create(self, vm_name, vm_type, vm_user, vm_networkassoc, #print instance.__dict__ except novaclient.exceptions.OverLimit as e: log.info("Unable to create VM without exceeded quota on %s: %s" % (self.name, e.message)) + if cv: cv.delete + except ccexceptions.ClientException as e: + log.error("failed to create boot volume: {}".format(e)) + if cv: cv.delete except Exception as e: #print e log.error("Unhandled exception while creating vm on %s: %s" %(self.name, e)) + if cv: cv.delete if instance: instance_id = instance.id if not vm_keepalive and self.keep_alive: #if job didn't set a keep_alive use the clouds default @@ -334,8 +336,8 @@ def vm_destroy(self, vm, return_resources=True, reason=""): except novaclient.exceptions.NotFound as e: log.error("VM %s not found on %s: removing from CS" % (vm.id, self.name)) except Exception as e: - try: - log.error("Unhandled exception while destroying VM on %s : %s" % (self.name,e)) + try: + log.error("Unhandled exception while destroying VM on %s : %s" % (self.name,e)) return 1 except: log.error("Failed to log exception properly?") From 894614caa467f20349dc4988077f9195ee052d8c Mon Sep 17 00:00:00 2001 From: Frank Berghaus Date: Tue, 13 Aug 2019 19:16:35 +0200 Subject: [PATCH 3/5] Allow boot volume config options as valid Add boot volume options to list of valid options. Set boot volume options as members of the OpenStack cluster class. --- cloudscheduler/cloudconfig.py | 51 ++++++++++++++++++++---- cloudscheduler/openstackcluster.py | 63 ++++++++++++++++-------------- 2 files changed, 78 insertions(+), 36 deletions(-) diff --git a/cloudscheduler/cloudconfig.py b/cloudscheduler/cloudconfig.py index 389489bc..3769330c 100644 --- a/cloudscheduler/cloudconfig.py +++ b/cloudscheduler/cloudconfig.py @@ -89,13 +89,50 @@ def verify_sections_base(conf, name): :param name: The name of cloud to operate on :return: True if conf good, False if problem detected """ - valid_option_names = {'access_key_id', 'auth_dat_file', 'auth_url', 'blob_url', 'boot_timeout', 'cacert', - 'cloud_type', 'contextualization', 'cpu_archs', 'cpu_cores', 'host', - 'image_attach_device', 'key_name', 'keycert', 'max_vm_mem', 'max_vm_storage', 'memory', - 'networks', 'password', 'placement_zone', 'port', 'priority', 'project_id', 'project_domain_name', - 'regions', 'reverse_dns_lookup', 'scratch_attach_device', 'secret_access_key', 'secret_file', - 'secure_connection', 'security_group', 'service_name', 'storage', 'temp_lease_storage', - 'tenant_name', 'total_cpu_cores', 'user_domain_name', 'username', 'vm_keep_alive', 'vm_lifetime', 'vm_slots'} + valid_option_names = { + 'access_key_id', + 'auth_dat_file', + 'auth_url', + 'blob_url', + 'boot_timeout', + 'boot_volume', + 'boot_volume_gb_per_core', + 'cacert', + 'cloud_type', + 'contextualization', + 'cpu_archs', + 'cpu_cores', + 'host', + 'image_attach_device', + 'key_name', + 'keycert', + 'max_vm_mem', + 'max_vm_storage', + 'memory', + 'networks', + 'password', + 'placement_zone', + 'port', + 'priority', + 'project_id', + 'project_domain_name', + 'regions', + 'reverse_dns_lookup', + 'scratch_attach_device', + 'secret_access_key', + 'secret_file', + 'secure_connection', + 'security_group', + 'service_name', + 'storage', + 'temp_lease_storage', + 'tenant_name', + 'total_cpu_cores', + 'user_domain_name', + 'username', + 'vm_keep_alive', + 'vm_lifetime', + 'vm_slots'} options = set(conf.options(name)) diff = options - valid_option_names if len(diff) > 0: diff --git a/cloudscheduler/openstackcluster.py b/cloudscheduler/openstackcluster.py index 01bb65c7..a63a951c 100644 --- a/cloudscheduler/openstackcluster.py +++ b/cloudscheduler/openstackcluster.py @@ -41,11 +41,12 @@ def __init__(self, name="Dummy Cluster", cloud_type="Dummy", boot_volume_gb_per_core=20): # Call super class's init - cluster_tools.ICluster.__init__(self,name=name, host=auth_url, cloud_type=cloud_type, - memory=memory, max_vm_mem=max_vm_mem, networks=networks, - vm_slots=vm_slots, cpu_cores=cpu_cores, - storage=storage, boot_timeout=boot_timeout, enabled=enabled, - priority=priority, keep_alive=keep_alive,) + cluster_tools.ICluster.__init__( + self, name=name, host=auth_url, cloud_type=cloud_type, + memory=memory, max_vm_mem=max_vm_mem, networks=networks, + vm_slots=vm_slots, cpu_cores=cpu_cores, + storage=storage, boot_timeout=boot_timeout, enabled=enabled, + priority=priority, keep_alive=keep_alive,) try: import novaclient.v2.client as nvclient import novaclient.exceptions @@ -68,6 +69,8 @@ def __init__(self, name="Dummy Cluster", cloud_type="Dummy", self.placement_zone = placement_zone self.flavor_set = set() self.cacert = cacert + self.boot_volume = boot_volume + self.boot_volume_gb_per_core = boot_volume_gb_per_core self.user_domain_name = user_domain_name if user_domain_name is not None else "Default" self.project_domain_name = project_domain_name if project_domain_name is not None else "Default" self.session = None @@ -116,9 +119,9 @@ def vm_create(self, vm_name, vm_type, vm_user, vm_networkassoc, import novaclient.exceptions use_cloud_init = use_cloud_init or config.use_cloud_init nova = self._get_creds_nova_updated() - if boot_volume: + if self.boot_volume: cinder = self._get_creds_cinder() - from cinderclient import exceptions as ccexceptions + from cinderclient import exceptions as ccexceptions if len(securitygroup) != 0: sec_group = [] for group in securitygroup: @@ -249,21 +252,22 @@ def vm_create(self, vm_name, vm_type, vm_user, vm_networkassoc, if name: log.info("Trying to create VM on %s: " % self.name) try: - if not boot_volume: - instance = nova.servers.create(name=name, - image=imageobj, - flavor=flavor, - key_name=key_name, - availability_zone=self.placement_zone, - nics =netid, - userdata=user_data, - security_groups=sec_group) + if not self.boot_volume: + instance = nova.servers.create( + name=name, + image=imageobj, + flavor=flavor, + key_name=key_name, + availability_zone=self.placement_zone, + nics=netid, + userdata=user_data, + security_groups=sec_group) else: bdm = None log.debug("creating boot volume") bv_name = "vol-{}".format(name) - if boot_volume_gb_per_core: - bv_size = boot_volume_gb_per_core * cpu_cores + if self.boot_volume_gb_per_core: + bv_size = self.boot_volume_gb_per_core * cpu_cores else: bv_size = 20 cv = cinder.volumes.create(name=bv_name, @@ -275,18 +279,19 @@ def vm_create(self, vm_name, vm_type, vm_user, vm_networkassoc, cinder.volumes.set_bootable(cv, True) bdm = {'vda': str(cv.id) + ':::1'} log.debug("boot volume creation successful") - instance = nova.servers.create(name=name, - image=imageobj, - flavor=flavor, - key_name=key_name, - block_device_mapping=bdm, - availability_zone=self.placement_zone, - nics=netid, - userdata=user_data, - security_groups=sec_group) - #print instance.__dict__ + instance = nova.servers.create( + name=name, + image=imageobj, + flavor=flavor, + key_name=key_name, + block_device_mapping=bdm, + availability_zone=self.placement_zone, + nics=netid, + userdata=user_data, + security_groups=sec_group) + # print instance.__dict__ except novaclient.exceptions.OverLimit as e: - log.info("Unable to create VM without exceeded quota on %s: %s" % (self.name, e.message)) + log.info("Unable to create VM without exceeded quota on %s: %s" % (self.name, e.message) if cv: cv.delete except ccexceptions.ClientException as e: log.error("failed to create boot volume: {}".format(e)) From 21b3d7759dc99a1ce68f9d3d5c2b9ea2f0b0635f Mon Sep 17 00:00:00 2001 From: Frank Berghaus Date: Tue, 20 Aug 2019 18:26:31 +0200 Subject: [PATCH 4/5] Try to delete the boot volume Do the python thing and try deletion and handle expected failures --- cloudscheduler/openstackcluster.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/cloudscheduler/openstackcluster.py b/cloudscheduler/openstackcluster.py index a63a951c..e4cbbf2a 100644 --- a/cloudscheduler/openstackcluster.py +++ b/cloudscheduler/openstackcluster.py @@ -291,15 +291,29 @@ def vm_create(self, vm_name, vm_type, vm_user, vm_networkassoc, security_groups=sec_group) # print instance.__dict__ except novaclient.exceptions.OverLimit as e: - log.info("Unable to create VM without exceeded quota on %s: %s" % (self.name, e.message) - if cv: cv.delete + log.info("Unable to create VM without exceeded quota on %s: %s" % (self.name, e.message)) + try: + cv.delete + except NameError: + pass + else: + log.info("deleted created boot volume") except ccexceptions.ClientException as e: log.error("failed to create boot volume: {}".format(e)) - if cv: cv.delete + try: + cv.delete + except NameError: + pass + else: + log.info("deleted created boot volume") except Exception as e: - #print e log.error("Unhandled exception while creating vm on %s: %s" %(self.name, e)) - if cv: cv.delete + try: + cv.delete + except NameError: + pass + else: + log.info("deleted created boot volume") if instance: instance_id = instance.id if not vm_keepalive and self.keep_alive: #if job didn't set a keep_alive use the clouds default From 56b4b53c487f3786a090663cf0dcac76edc2b70d Mon Sep 17 00:00:00 2001 From: Frank Berghaus Date: Wed, 21 Aug 2019 15:26:30 +0200 Subject: [PATCH 5/5] Get cpu cores from class members The CPU cores need to come from the class. The boot volume configuration is delivered from the config as strings. Convert the strings to a bool or int as the case may be. --- cloudscheduler/openstackcluster.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/cloudscheduler/openstackcluster.py b/cloudscheduler/openstackcluster.py index e4cbbf2a..1b7731f1 100644 --- a/cloudscheduler/openstackcluster.py +++ b/cloudscheduler/openstackcluster.py @@ -69,8 +69,14 @@ def __init__(self, name="Dummy Cluster", cloud_type="Dummy", self.placement_zone = placement_zone self.flavor_set = set() self.cacert = cacert - self.boot_volume = boot_volume - self.boot_volume_gb_per_core = boot_volume_gb_per_core + try: + self.boot_volume = boot_volume.lower() in ["y", "yes", "true", "on"] + except AttributeError: + self.boot_volume = False + try: + self.boot_volume_gb_per_core = int(boot_volume_gb_per_core) + except (TypeError, ValueError): + self.boot_volume_gb_per_core = 20 self.user_domain_name = user_domain_name if user_domain_name is not None else "Default" self.project_domain_name = project_domain_name if project_domain_name is not None else "Default" self.session = None @@ -267,7 +273,7 @@ def vm_create(self, vm_name, vm_type, vm_user, vm_networkassoc, log.debug("creating boot volume") bv_name = "vol-{}".format(name) if self.boot_volume_gb_per_core: - bv_size = self.boot_volume_gb_per_core * cpu_cores + bv_size = self.boot_volume_gb_per_core * self.cpu_cores else: bv_size = 20 cv = cinder.volumes.create(name=bv_name,