diff --git a/nova/compute/manager.py b/nova/compute/manager.py index b55e9b6a23..b45ef7b61c 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -6678,28 +6678,6 @@ def _poll_rescued_instances(self, context): for instance in to_unrescue: self.compute_api.unrescue(context, instance) - @periodic_task.periodic_task(spacing=CONF.pci_affine_interval) - def _affine_pci_dev_instances(self, context): - """Periodically reaffine pci device irqs. This will correct the - affinity setting of dynamically created irqs. - """ - filters = {'vm_state': vm_states.ACTIVE, - 'task_state': None, - 'deleted': False, - 'host': self.host} - instances = objects.InstanceList.get_by_filters( - context, filters, expected_attrs=[], use_slave=True) - for instance in instances: - if len(instance.pci_devices.objects) == 0: - continue - try: - self.driver.affine_pci_dev_irqs(instance, wait_for_irqs=False) - except NotImplementedError: - return - except Exception as e: - LOG.info("Error affining pci device irqs: %s.", - e, instance=instance) - @periodic_task.periodic_task def _poll_unconfirmed_resizes(self, context): if CONF.resize_confirm_window == 0: diff --git a/nova/conf/compute.py b/nova/conf/compute.py index 3c6481b2b3..de0bcbb5fd 100644 --- a/nova/conf/compute.py +++ b/nova/conf/compute.py @@ -950,23 +950,6 @@ def host_extend_options(config_opts_file='/etc/nova/compute_extend.conf'): Possible values: -* 0: Will run at the default periodic interval. -* Any value < 0: Disables the option. -* Any positive integer in seconds. -"""), - cfg.IntOpt('pci_affine_interval', - default=60, -help=""" -Number of seconds between pci affinity updates - -This option specifies how often the pci_affine_interval -periodic task should run. A number less than 0 means to disable the -task completely. Leaving this at the default of 0 will cause this to -run at the default periodic interval. Setting it to any positive -value will cause it to run at approximately that number of seconds. - -Possible values: - * 0: Will run at the default periodic interval. * Any value < 0: Disables the option. * Any positive integer in seconds. diff --git a/nova/conf/libvirt.py b/nova/conf/libvirt.py index 5de984adeb..6dd91d57ec 100644 --- a/nova/conf/libvirt.py +++ b/nova/conf/libvirt.py @@ -1044,19 +1044,6 @@ ), ] -# WRS: add options for PCI IRQ affinity, msi irq detection parameters -libvirt_pci_irq_opts = [ - cfg.IntOpt('msi_irq_timeout', - default=45, - help='Number of seconds to wait for msi irq configuration'), - cfg.IntOpt('msi_irq_since', - default=6, - help='Number of seconds to wait for msi irqs to stabilize.'), - cfg.IntOpt('msi_irq_check_interval', - default=2, - help='Check interval in seconds for msi irqs to stabilize.'), -] - ALL_OPTS = list(itertools.chain( libvirt_general_opts, libvirt_imagebackend_opts, @@ -1074,7 +1061,6 @@ libvirt_volume_smbfs_opts, libvirt_remotefs_opts, libvirt_volume_vzstorage_opts, - libvirt_pci_irq_opts, )) diff --git a/nova/pci/utils.py b/nova/pci/utils.py index 8ae8098a66..7f7f73462c 100644 --- a/nova/pci/utils.py +++ b/nova/pci/utils.py @@ -18,7 +18,6 @@ # -import errno import glob import os import re @@ -28,8 +27,6 @@ from nova import exception from nova.network import model as network_model -from nova import utils -from nova.virt import hardware LOG = logging.getLogger(__name__) @@ -133,173 +130,6 @@ def is_physical_function(domain, bus, slot, function): return False -def get_irqs_by_pci_address(pci_addr): - """Get list of PCI IRQs based on a VF's pci address - - Raises PciDeviceNotFoundById in case the pci device is not found, - or when there is an underlying problem getting associated irqs. - :param pci_addr: PCI address - :return: irqs, msi_irqs - """ - irqs = set() - msi_irqs = set() - - dev_path = "/sys/bus/pci/devices/%s" % (pci_addr) - if not os.path.isdir(dev_path): - raise exception.PciDeviceNotFoundById(id=pci_addr) - - _irqs = set() - irq_path = "/sys/bus/pci/devices/%s/irq" % (pci_addr) - try: - with open(irq_path) as f: - _irqs.update([int(x) for x in f.readline().split() if int(x) > 0]) - except Exception as e: - LOG.error('get_irqs_by_pci_address: ' - 'pci_addr=%(A)s: irq_path=%(P)s; error=%(E)s', - {'A': pci_addr, 'P': irq_path, 'E': e}) - raise exception.PciDeviceNotFoundById(id=pci_addr) - - _msi_irqs = set() - msi_path = "/sys/bus/pci/devices/%s/msi_irqs" % (pci_addr) - try: - _msi_irqs.update([int(x) for x in os.listdir(msi_path) if int(x) > 0]) - except OSError as e: - # msi_path disappears during configuration; do not treat - # non-existance as fatal - if e.errno == errno.ENOENT: - return (irqs, msi_irqs) - else: - LOG.error('get_irqs_by_pci_address: ' - 'pci_addr=%(A)s: msi_path=%(P)s; error=%(E)s', - {'A': pci_addr, 'P': msi_path, 'E': e}) - raise exception.PciDeviceNotFoundById(id=pci_addr) - except Exception as e: - LOG.error('get_irqs_by_pci_address: ' - 'pci_addr=%(A)s: msi_path=%(P)s; error=%(E)s', - {'A': pci_addr, 'P': msi_path, 'E': e}) - raise exception.PciDeviceNotFoundById(id=pci_addr) - - # Return only configured irqs, ignore any that are missing. - for irq in _irqs: - irq_path = "/proc/irq/%s" % (irq) - if os.path.isdir(irq_path): - irqs.update([irq]) - for irq in _msi_irqs: - irq_path = "/proc/irq/%s" % (irq) - if os.path.isdir(irq_path): - msi_irqs.update([irq]) - return (irqs, msi_irqs) - - -def get_pci_irqs_pinned_cpuset(flavor=None, numa_topology=None, - pci_numa_node=None): - """Get pinned cpuset where pci irq are affined. - - :param flavor: flavor - :param pci_numa_node: numa node of a specific PCI device - :param numa_topology: instance numa topology - :return: cpuset, cpulist - """ - cpuset = set() - cpulist = '' - - if numa_topology is None or pci_numa_node is None or pci_numa_node < 0: - return (cpuset, cpulist) - - # Determine full affinity cpuset, but restrict to pci's numa node - for cell in numa_topology.cells: - if cell.id == pci_numa_node: - if cell.cpu_pinning is not None: - cpuset.update(set(cell.cpu_pinning.values())) - - # Use extra-spec hw:pci_irq_affinity_mask only when the instance is pinned. - if cpuset: - pci_cpuset = hardware._get_pci_affinity_mask(flavor) - if pci_cpuset: - cpuset = set() - for cell in numa_topology.cells: - if cell.cpu_pinning is not None: - for vcpu in cell.cpuset: - if vcpu in pci_cpuset: - vcpu_cell, pcpu = numa_topology.vcpu_to_pcpu(vcpu) - cpuset.update(set([pcpu])) - - cpulist = utils.list_to_range(input_list=list(cpuset)) - return (cpuset, cpulist) - - -def set_irqs_affinity_by_pci_address(pci_addr, flavor=None, - numa_topology=None): - """Set cpu affinity for list of PCI IRQs with a VF's pci address, - but restrict cpuset to the numa node of the PCI. - - Return list - Raises PciDeviceNotFoundById in case the pci device is not found, - or when there is an underlying problem getting associated irqs. - :param pci_addr: PCI address - :param flavor: flavor - :param numa_topology: instance numa topology - :return: irqs, msi_irqs, numa_node, cpulist - """ - irqs = set() - msi_irqs = set() - numa_node = None - cpulist = '' - - if numa_topology is None: - return (irqs, msi_irqs, numa_node, cpulist) - - # Get the irqs associated with pci addr - _irqs, _msi_irqs = get_irqs_by_pci_address(pci_addr) - - # Obtain physical numa_node for this pci addr - numa_path = "/sys/bus/pci/devices/%s/numa_node" % (pci_addr) - try: - with open(numa_path) as f: - numa_node = [int(x) for x in f.readline().split()][0] - except Exception as e: - LOG.error('set_irqs_affinity_by_pci_address: ' - 'pci_addr=%(A)s: numa_path=%(P)s; error=%(E)s', - {'A': pci_addr, 'P': numa_path, 'E': e}) - raise exception.PciDeviceNotFoundById(id=pci_addr) - - # Skip irq configuration if there is no associated numa node - if numa_node is None or numa_node < 0: - return (irqs, msi_irqs, numa_node, cpulist) - - # Determine the pinned cpuset where irqs are to be affined - cpuset, cpulist = get_pci_irqs_pinned_cpuset(flavor=flavor, - numa_topology=numa_topology, - pci_numa_node=numa_node) - - # Skip irq configuration if there are no pinned cpus - if not cpuset: - return (irqs, msi_irqs, numa_node, cpulist) - - # Set IRQ affinity, but do not treat errors as fatal. - for irq in _irqs: - irq_aff_path = "/proc/irq/%s/smp_affinity_list" % (irq) - try: - with open(irq_aff_path, 'w') as f: - f.write(cpulist) - irqs.update([irq]) - except Exception as e: - LOG.warning("Could not affine pci_addr:%(A)s, irq:%(I)s, " - "error=%(E)s", - {"A": pci_addr, "I": irq, "E": e}) - for irq in _msi_irqs: - irq_aff_path = "/proc/irq/%s/smp_affinity_list" % (irq) - try: - with open(irq_aff_path, 'w') as f: - f.write(cpulist) - msi_irqs.update([irq]) - except Exception as e: - LOG.warning("Could not affine pci_addr:%(A)s, irq:%(I)s, " - "error=%(E)s", - {"A": pci_addr, "I": irq, "E": e}) - return (irqs, msi_irqs, numa_node, cpulist) - - def format_instance_pci_devices(pci_devices=None, delim='\n'): """Returns formated pci devices list. diff --git a/nova/virt/driver.py b/nova/virt/driver.py index f3e5fe278d..4b965c9b5b 100644 --- a/nova/virt/driver.py +++ b/nova/virt/driver.py @@ -1667,12 +1667,6 @@ def get_l3_closids(self): def get_l3_closids_used(self): return 0 - # WRS: extension - def affine_pci_dev_irqs(self, instance, wait_for_irqs=True): - """Affine PCI device irqs to VM's pcpus.""" - raise NotImplementedError() - - def load_compute_driver(virtapi, compute_driver=None): """Load a compute driver module. diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 0531490dcd..8e5aa8e0eb 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -487,10 +487,6 @@ def __init__(self, virtapi, read_only=False): # beginning to ensure any syntax error will be reported and # avoid any re-calculation when computing resources. self._reserved_hugepages = hardware.numa_get_reserved_huge_pages() - - self._msi_irq_count = {} - self._msi_irq_since = {} - self._msi_irq_elapsed = {} self._cachetune_support = None self._cachetune_cdp_support = None @@ -2989,108 +2985,6 @@ def _wait_for_boot(): timer = loopingcall.FixedIntervalLoopingCall(_wait_for_boot) timer.start(interval=0.5).wait() - def affine_pci_dev_irqs(self, instance, wait_for_irqs=True): - """Affine PCI device irqs to VM's pcpus.""" - - def _wait_for_msi_irqs(instance): - """Check each pci device has the expected number of msi irqs.""" - _prev = self._msi_irq_count.copy() - addrs = set() - for pci_dev in instance.pci_devices.objects: - addr = pci_dev.address - addrs.update([addr]) - try: - irqs, msi_irqs = pci_utils.get_irqs_by_pci_address(addr) - except Exception as e: - msi_irqs = set() - LOG.error('_wait_for_msi_irqs: pci_addr=%(A)s, ' - 'error=%(E)s', - {'A': addr, 'E': e}) - self._msi_irq_count[addr] = len(msi_irqs) - self._msi_irq_elapsed[addr] += \ - CONF.libvirt.msi_irq_check_interval - if _prev[addr] == self._msi_irq_count[addr]: - self._msi_irq_since[addr] += \ - CONF.libvirt.msi_irq_check_interval - else: - self._msi_irq_since[pci_dev.address] = 0 - - # Done when msi irq counts have not changed for some time - if all((self._msi_irq_count[k] > 0) and - (self._msi_irq_since[k] >= CONF.libvirt.msi_irq_since) - for k in addrs): - raise loopingcall.LoopingCallDone() - - # Abort due to timeout - if all(self._msi_irq_elapsed[k] >= CONF.libvirt.msi_irq_timeout - for k in addrs): - msg = (_("reached %(timeout)s seconds timeout, waiting for " - "msi irqs of pci_addrs: %(addrs)s") - % {'addrs': list(addrs), - 'timeout': CONF.libvirt.msi_irq_timeout}) - LOG.warning(msg) - raise loopingcall.LoopingCallDone() - - # Determine how many msi irqs we expect to be configured. - if len(instance.pci_devices.objects) == 0: - return - - # Initialize msi irq tracking. - for pci_dev in instance.pci_devices.objects: - if wait_for_irqs or (pci_dev.address not in self._msi_irq_count): - self._msi_irq_count[pci_dev.address] = 0 - self._msi_irq_since[pci_dev.address] = 0 - self._msi_irq_elapsed[pci_dev.address] = 0 - - # Wait for msi irqs to be configured. - if wait_for_irqs: - timer = loopingcall.FixedIntervalLoopingCall( - _wait_for_msi_irqs, instance) - timer.start(interval=CONF.libvirt.msi_irq_check_interval).wait() - - @utils.synchronized(instance.uuid) - def do_affine_pci_dev_instance(): - """Set pci device irq affinity for this instance.""" - instance.refresh() - numa_topology = instance.get('numa_topology') - flavor = instance.get_flavor() - for pci_dev in instance.pci_devices.objects: - try: - irqs, msi_irqs, pci_numa_node, pci_cpulist = \ - pci_utils.set_irqs_affinity_by_pci_address( - pci_dev.address, flavor=flavor, - numa_topology=numa_topology) - except Exception as e: - irqs = set() - msi_irqs = set() - pci_numa_node = None - pci_cpulist = '' - LOG.error("Could not affine irqs for pci_addr:%(A)s, " - "error: %(E)s", - {"A": pci_dev.address, "E": e}, - instance=instance) - - # Log irqs affined when there is a change in the counts. - msi_irq_count = len(msi_irqs) - if ((msi_irq_count != self._msi_irq_count[pci_dev.address]) or - wait_for_irqs): - self._msi_irq_count[pci_dev.address] = msi_irq_count - LOG.info("IRQs affined for pci_addr=%(A)s, " - "dev_id=%(D)s, dev_type=%(T)s, " - "vendor_id=%(V)s, product_id=%(P)s, " - "irqs=%(I)s, msi_irqs:%(M)s, " - "numa_node=%(N)s, cpulist=%(C)s", - {'A': pci_dev.address, - 'D': pci_dev.dev_id, - 'T': pci_dev.dev_type, - 'V': pci_dev.vendor_id, - 'P': pci_dev.product_id, - 'I': ', '.join(map(str, irqs)), - 'M': ', '.join(map(str, msi_irqs)), - 'N': pci_numa_node, 'C': pci_cpulist}, - instance=instance) - do_affine_pci_dev_instance() - def _flush_libvirt_console(self, pty): out, err = utils.execute('dd', 'if=%s' % pty, @@ -5835,13 +5729,6 @@ def _create_domain_and_network(self, context, xml, instance, network_info, block_device_info, guest, destroy_disks_on_failure) - # Affine irqs associated with PCI/PT and SRIOV network devices. - # This chooses the subset of cpus from instance numa_topology that - # reside on the same numa node as the PCI device. - # This is done asynchronously since it takes a while for the msi irqs - # to be configured. - utils.spawn_n(self.affine_pci_dev_irqs, instance) - # Resume only if domain has been paused if pause: guest.resume()