diff --git a/Cargo.lock b/Cargo.lock index 5244f4d2b73..0ea518f3a49 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1045,15 +1045,12 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" name = "pci" version = "0.1.0" dependencies = [ - "byteorder", "displaydoc", "libc", "log", "serde", "serde_test", "thiserror 2.0.17", - "vm-device", - "vm-memory", "vmm-sys-util", ] @@ -1650,14 +1647,6 @@ dependencies = [ "thiserror 2.0.17", ] -[[package]] -name = "vm-device" -version = "0.1.0" -dependencies = [ - "serde", - "vmm-sys-util", -] - [[package]] name = "vm-fdt" version = "0.3.0" @@ -1724,7 +1713,6 @@ dependencies = [ "uuid", "vhost", "vm-allocator", - "vm-device", "vm-fdt", "vm-memory", "vm-superio", diff --git a/src/pci/Cargo.toml b/src/pci/Cargo.toml index 96c84a8b913..47cfe5d3e6b 100644 --- a/src/pci/Cargo.toml +++ b/src/pci/Cargo.toml @@ -12,17 +12,12 @@ bench = false default = [] [dependencies] -byteorder = "1.5.0" + displaydoc = "0.2.5" libc = "0.2.176" log = "0.4.28" serde = { version = "1.0.228", features = ["derive"] } thiserror = "2.0.17" -vm-device = { path = "../vm-device" } -vm-memory = { version = "0.16.1", features = [ - "backend-mmap", - "backend-bitmap", -] } [dev-dependencies] serde_test = "1.0.177" diff --git a/src/pci/src/lib.rs b/src/pci/src/lib.rs index 83f5a7a5dcf..c06e4e6dc59 100644 --- a/src/pci/src/lib.rs +++ b/src/pci/src/lib.rs @@ -6,31 +6,15 @@ // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause //! Implements pci devices and busses. -#[macro_use] -extern crate log; -mod bus; -mod configuration; -mod device; -mod msix; +extern crate log; use std::fmt::{self, Debug, Display}; use std::num::ParseIntError; use std::str::FromStr; use serde::de::Visitor; - -pub use self::bus::{PciBus, PciConfigIo, PciConfigMmio, PciRoot, PciRootError}; -pub use self::configuration::{ - PciBarConfiguration, PciBarPrefetchable, PciBarRegionType, PciCapability, PciCapabilityId, - PciClassCode, PciConfiguration, PciConfigurationState, PciExpressCapabilityId, PciHeaderType, - PciMassStorageSubclass, PciNetworkControllerSubclass, PciProgrammingInterface, - PciSerialBusSubClass, PciSubclass, PCI_CONFIGURATION_ID, -}; -pub use self::device::{ - BarReprogrammingParams, DeviceRelocation, Error as PciDeviceError, PciDevice, -}; -pub use self::msix::{Error as MsixError, MsixCap, MsixConfig, MsixConfigState, MsixTableEntry}; +use serde::{Deserialize, Serialize}; /// PCI has four interrupt pins A->D. #[derive(Copy, Clone)] @@ -47,11 +31,6 @@ impl PciInterruptPin { } } -#[cfg(target_arch = "x86_64")] -pub const PCI_CONFIG_IO_PORT: u64 = 0xcf8; -#[cfg(target_arch = "x86_64")] -pub const PCI_CONFIG_IO_PORT_SIZE: u64 = 0x8; - #[derive(Clone, Copy, PartialEq, Eq, PartialOrd)] pub struct PciBdf(u32); @@ -202,6 +181,344 @@ impl FromStr for PciBdf { } } +/// Represents the types of PCI headers allowed in the configuration registers. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum PciHeaderType { + Device, + Bridge, +} + +/// Classes of PCI nodes. +#[allow(dead_code)] +#[derive(Copy, Clone)] +pub enum PciClassCode { + TooOld, + MassStorage, + NetworkController, + DisplayController, + MultimediaController, + MemoryController, + BridgeDevice, + SimpleCommunicationController, + BaseSystemPeripheral, + InputDevice, + DockingStation, + Processor, + SerialBusController, + WirelessController, + IntelligentIoController, + EncryptionController, + DataAcquisitionSignalProcessing, + Other = 0xff, +} + +impl PciClassCode { + pub fn get_register_value(self) -> u8 { + self as u8 + } +} + +/// A PCI subclass. Each class in `PciClassCode` can specify a unique set of subclasses. This trait +/// is implemented by each subclass. It allows use of a trait object to generate configurations. +pub trait PciSubclass { + /// Convert this subclass to the value used in the PCI specification. + fn get_register_value(&self) -> u8; +} + +/// Subclasses of the MultimediaController class. +#[allow(dead_code)] +#[derive(Copy, Clone)] +pub enum PciMultimediaSubclass { + VideoController = 0x00, + AudioController = 0x01, + TelephonyDevice = 0x02, + AudioDevice = 0x03, + Other = 0x80, +} + +impl PciSubclass for PciMultimediaSubclass { + fn get_register_value(&self) -> u8 { + *self as u8 + } +} + +/// Subclasses of the BridgeDevice +#[allow(dead_code)] +#[derive(Copy, Clone)] +pub enum PciBridgeSubclass { + HostBridge = 0x00, + IsaBridge = 0x01, + EisaBridge = 0x02, + McaBridge = 0x03, + PciToPciBridge = 0x04, + PcmciaBridge = 0x05, + NuBusBridge = 0x06, + CardBusBridge = 0x07, + RacEwayBridge = 0x08, + PciToPciSemiTransparentBridge = 0x09, + InfiniBrandToPciHostBridge = 0x0a, + OtherBridgeDevice = 0x80, +} + +impl PciSubclass for PciBridgeSubclass { + fn get_register_value(&self) -> u8 { + *self as u8 + } +} + +/// Subclass of the SerialBus +#[allow(dead_code)] +#[derive(Copy, Clone)] +pub enum PciSerialBusSubClass { + Firewire = 0x00, + Accessbus = 0x01, + Ssa = 0x02, + Usb = 0x03, +} + +impl PciSubclass for PciSerialBusSubClass { + fn get_register_value(&self) -> u8 { + *self as u8 + } +} + +/// Mass Storage Sub Classes +#[allow(dead_code)] +#[derive(Copy, Clone)] +pub enum PciMassStorageSubclass { + ScsiStorage = 0x00, + IdeInterface = 0x01, + FloppyController = 0x02, + IpiController = 0x03, + RaidController = 0x04, + AtaController = 0x05, + SataController = 0x06, + SerialScsiController = 0x07, + NvmController = 0x08, + MassStorage = 0x80, +} + +impl PciSubclass for PciMassStorageSubclass { + fn get_register_value(&self) -> u8 { + *self as u8 + } +} + +/// Network Controller Sub Classes +#[allow(dead_code)] +#[derive(Copy, Clone)] +pub enum PciNetworkControllerSubclass { + EthernetController = 0x00, + TokenRingController = 0x01, + FddiController = 0x02, + AtmController = 0x03, + IsdnController = 0x04, + WorldFipController = 0x05, + PicmgController = 0x06, + InfinibandController = 0x07, + FabricController = 0x08, + NetworkController = 0x80, +} + +impl PciSubclass for PciNetworkControllerSubclass { + fn get_register_value(&self) -> u8 { + *self as u8 + } +} + +/// Types of PCI capabilities. +#[derive(Debug, PartialEq, Eq, Copy, Clone)] +#[allow(dead_code)] +#[allow(non_camel_case_types)] +#[repr(u8)] +pub enum PciCapabilityId { + ListId = 0, + PowerManagement = 0x01, + AcceleratedGraphicsPort = 0x02, + VitalProductData = 0x03, + SlotIdentification = 0x04, + MessageSignalledInterrupts = 0x05, + CompactPciHotSwap = 0x06, + PciX = 0x07, + HyperTransport = 0x08, + VendorSpecific = 0x09, + Debugport = 0x0A, + CompactPciCentralResourceControl = 0x0B, + PciStandardHotPlugController = 0x0C, + BridgeSubsystemVendorDeviceId = 0x0D, + AgpTargetPciPcibridge = 0x0E, + SecureDevice = 0x0F, + PciExpress = 0x10, + MsiX = 0x11, + SataDataIndexConf = 0x12, + PciAdvancedFeatures = 0x13, + PciEnhancedAllocation = 0x14, +} + +impl From for PciCapabilityId { + fn from(c: u8) -> Self { + match c { + 0 => PciCapabilityId::ListId, + 0x01 => PciCapabilityId::PowerManagement, + 0x02 => PciCapabilityId::AcceleratedGraphicsPort, + 0x03 => PciCapabilityId::VitalProductData, + 0x04 => PciCapabilityId::SlotIdentification, + 0x05 => PciCapabilityId::MessageSignalledInterrupts, + 0x06 => PciCapabilityId::CompactPciHotSwap, + 0x07 => PciCapabilityId::PciX, + 0x08 => PciCapabilityId::HyperTransport, + 0x09 => PciCapabilityId::VendorSpecific, + 0x0A => PciCapabilityId::Debugport, + 0x0B => PciCapabilityId::CompactPciCentralResourceControl, + 0x0C => PciCapabilityId::PciStandardHotPlugController, + 0x0D => PciCapabilityId::BridgeSubsystemVendorDeviceId, + 0x0E => PciCapabilityId::AgpTargetPciPcibridge, + 0x0F => PciCapabilityId::SecureDevice, + 0x10 => PciCapabilityId::PciExpress, + 0x11 => PciCapabilityId::MsiX, + 0x12 => PciCapabilityId::SataDataIndexConf, + 0x13 => PciCapabilityId::PciAdvancedFeatures, + 0x14 => PciCapabilityId::PciEnhancedAllocation, + _ => PciCapabilityId::ListId, + } + } +} + +/// Types of PCI Express capabilities. +#[derive(PartialEq, Eq, Copy, Clone, Debug)] +#[allow(dead_code)] +#[repr(u16)] +pub enum PciExpressCapabilityId { + NullCapability = 0x0000, + AdvancedErrorReporting = 0x0001, + VirtualChannelMultiFunctionVirtualChannelNotPresent = 0x0002, + DeviceSerialNumber = 0x0003, + PowerBudgeting = 0x0004, + RootComplexLinkDeclaration = 0x0005, + RootComplexInternalLinkControl = 0x0006, + RootComplexEventCollectorEndpointAssociation = 0x0007, + MultiFunctionVirtualChannel = 0x0008, + VirtualChannelMultiFunctionVirtualChannelPresent = 0x0009, + RootComplexRegisterBlock = 0x000a, + VendorSpecificExtendedCapability = 0x000b, + ConfigurationAccessCorrelation = 0x000c, + AccessControlServices = 0x000d, + AlternativeRoutingIdentificationInterpretation = 0x000e, + AddressTranslationServices = 0x000f, + SingleRootIoVirtualization = 0x0010, + DeprecatedMultiRootIoVirtualization = 0x0011, + Multicast = 0x0012, + PageRequestInterface = 0x0013, + ReservedForAmd = 0x0014, + ResizeableBar = 0x0015, + DynamicPowerAllocation = 0x0016, + ThpRequester = 0x0017, + LatencyToleranceReporting = 0x0018, + SecondaryPciExpress = 0x0019, + ProtocolMultiplexing = 0x001a, + ProcessAddressSpaceId = 0x001b, + LnRequester = 0x001c, + DownstreamPortContainment = 0x001d, + L1PmSubstates = 0x001e, + PrecisionTimeMeasurement = 0x001f, + PciExpressOverMphy = 0x0020, + FRSQueueing = 0x0021, + ReadinessTimeReporting = 0x0022, + DesignatedVendorSpecificExtendedCapability = 0x0023, + VfResizeableBar = 0x0024, + DataLinkFeature = 0x0025, + PhysicalLayerSixteenGts = 0x0026, + LaneMarginingAtTheReceiver = 0x0027, + HierarchyId = 0x0028, + NativePcieEnclosureManagement = 0x0029, + PhysicalLayerThirtyTwoGts = 0x002a, + AlternateProtocol = 0x002b, + SystemFirmwareIntermediary = 0x002c, + ShadowFunctions = 0x002d, + DataObjectExchange = 0x002e, + Reserved = 0x002f, + ExtendedCapabilitiesAbsence = 0xffff, +} + +impl From for PciExpressCapabilityId { + fn from(c: u16) -> Self { + match c { + 0x0000 => PciExpressCapabilityId::NullCapability, + 0x0001 => PciExpressCapabilityId::AdvancedErrorReporting, + 0x0002 => PciExpressCapabilityId::VirtualChannelMultiFunctionVirtualChannelNotPresent, + 0x0003 => PciExpressCapabilityId::DeviceSerialNumber, + 0x0004 => PciExpressCapabilityId::PowerBudgeting, + 0x0005 => PciExpressCapabilityId::RootComplexLinkDeclaration, + 0x0006 => PciExpressCapabilityId::RootComplexInternalLinkControl, + 0x0007 => PciExpressCapabilityId::RootComplexEventCollectorEndpointAssociation, + 0x0008 => PciExpressCapabilityId::MultiFunctionVirtualChannel, + 0x0009 => PciExpressCapabilityId::VirtualChannelMultiFunctionVirtualChannelPresent, + 0x000a => PciExpressCapabilityId::RootComplexRegisterBlock, + 0x000b => PciExpressCapabilityId::VendorSpecificExtendedCapability, + 0x000c => PciExpressCapabilityId::ConfigurationAccessCorrelation, + 0x000d => PciExpressCapabilityId::AccessControlServices, + 0x000e => PciExpressCapabilityId::AlternativeRoutingIdentificationInterpretation, + 0x000f => PciExpressCapabilityId::AddressTranslationServices, + 0x0010 => PciExpressCapabilityId::SingleRootIoVirtualization, + 0x0011 => PciExpressCapabilityId::DeprecatedMultiRootIoVirtualization, + 0x0012 => PciExpressCapabilityId::Multicast, + 0x0013 => PciExpressCapabilityId::PageRequestInterface, + 0x0014 => PciExpressCapabilityId::ReservedForAmd, + 0x0015 => PciExpressCapabilityId::ResizeableBar, + 0x0016 => PciExpressCapabilityId::DynamicPowerAllocation, + 0x0017 => PciExpressCapabilityId::ThpRequester, + 0x0018 => PciExpressCapabilityId::LatencyToleranceReporting, + 0x0019 => PciExpressCapabilityId::SecondaryPciExpress, + 0x001a => PciExpressCapabilityId::ProtocolMultiplexing, + 0x001b => PciExpressCapabilityId::ProcessAddressSpaceId, + 0x001c => PciExpressCapabilityId::LnRequester, + 0x001d => PciExpressCapabilityId::DownstreamPortContainment, + 0x001e => PciExpressCapabilityId::L1PmSubstates, + 0x001f => PciExpressCapabilityId::PrecisionTimeMeasurement, + 0x0020 => PciExpressCapabilityId::PciExpressOverMphy, + 0x0021 => PciExpressCapabilityId::FRSQueueing, + 0x0022 => PciExpressCapabilityId::ReadinessTimeReporting, + 0x0023 => PciExpressCapabilityId::DesignatedVendorSpecificExtendedCapability, + 0x0024 => PciExpressCapabilityId::VfResizeableBar, + 0x0025 => PciExpressCapabilityId::DataLinkFeature, + 0x0026 => PciExpressCapabilityId::PhysicalLayerSixteenGts, + 0x0027 => PciExpressCapabilityId::LaneMarginingAtTheReceiver, + 0x0028 => PciExpressCapabilityId::HierarchyId, + 0x0029 => PciExpressCapabilityId::NativePcieEnclosureManagement, + 0x002a => PciExpressCapabilityId::PhysicalLayerThirtyTwoGts, + 0x002b => PciExpressCapabilityId::AlternateProtocol, + 0x002c => PciExpressCapabilityId::SystemFirmwareIntermediary, + 0x002d => PciExpressCapabilityId::ShadowFunctions, + 0x002e => PciExpressCapabilityId::DataObjectExchange, + 0xffff => PciExpressCapabilityId::ExtendedCapabilitiesAbsence, + _ => PciExpressCapabilityId::Reserved, + } + } +} + +/// See pci_regs.h in kernel +#[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Debug)] +pub enum PciBarRegionType { + Memory32BitRegion = 0, + IoRegion = 0x01, + Memory64BitRegion = 0x04, +} + +#[derive(Debug, Copy, Clone, Serialize, Deserialize)] +pub enum PciBarPrefetchable { + NotPrefetchable = 0, + Prefetchable = 0x08, +} + +impl From for bool { + fn from(val: PciBarPrefetchable) -> Self { + match val { + PciBarPrefetchable::NotPrefetchable => false, + PciBarPrefetchable::Prefetchable => true, + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/vm-device/Cargo.toml b/src/vm-device/Cargo.toml deleted file mode 100644 index 53842418b5d..00000000000 --- a/src/vm-device/Cargo.toml +++ /dev/null @@ -1,16 +0,0 @@ -[package] -authors = ["The Cloud Hypervisor Authors"] -edition = "2021" -name = "vm-device" -version = "0.1.0" -license = "Apache-2.0 AND BSD-3-Clause" - -[lib] -bench = false - -[features] -default = [] - -[dependencies] -serde = { version = "1.0.228", features = ["derive", "rc"] } -vmm-sys-util = { version = "0.14.0", features = ["with-serde"] } diff --git a/src/vm-device/src/interrupt/mod.rs b/src/vm-device/src/interrupt/mod.rs deleted file mode 100644 index da5d87a4e1a..00000000000 --- a/src/vm-device/src/interrupt/mod.rs +++ /dev/null @@ -1,194 +0,0 @@ -// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. -// Copyright (C) 2019 Alibaba Cloud. All rights reserved. -// Copyright © 2019 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause - -//! Traits and Structs to manage interrupt sources for devices. -//! -//! In system programming, an interrupt is a signal to the processor emitted by hardware or -//! software indicating an event that needs immediate attention. An interrupt alerts the processor -//! to a high-priority condition requiring the interruption of the current code the processor is -//! executing. The processor responds by suspending its current activities, saving its state, and -//! executing a function called an interrupt handler (or an interrupt service routine, ISR) to deal -//! with the event. This interruption is temporary, and, after the interrupt handler finishes, -//! unless handling the interrupt has emitted a fatal error, the processor resumes normal -//! activities. -//! -//! Hardware interrupts are used by devices to communicate that they require attention from the -//! operating system, or a bare-metal program running on the CPU if there are no OSes. The act of -//! initiating a hardware interrupt is referred to as an interrupt request (IRQ). Different devices -//! are usually associated with different interrupts using a unique value associated with each -//! interrupt. This makes it possible to know which hardware device caused which interrupts. -//! These interrupt values are often called IRQ lines, or just interrupt lines. -//! -//! Nowadays, IRQ lines is not the only mechanism to deliver device interrupts to processors. -//! MSI [(Message Signaled Interrupt)](https://en.wikipedia.org/wiki/Message_Signaled_Interrupts) -//! is another commonly used alternative in-band method of signaling an interrupt, using special -//! in-band messages to replace traditional out-of-band assertion of dedicated interrupt lines. -//! While more complex to implement in a device, message signaled interrupts have some significant -//! advantages over pin-based out-of-band interrupt signaling. Message signaled interrupts are -//! supported in PCI bus since its version 2.2, and in later available PCI Express bus. Some -//! non-PCI architectures also use message signaled interrupts. -//! -//! While IRQ is a term commonly used by Operating Systems when dealing with hardware -//! interrupts, the IRQ numbers managed by OSes are independent of the ones managed by VMM. -//! For simplicity sake, the term `Interrupt Source` is used instead of IRQ to represent both -//! pin-based interrupts and MSI interrupts. -//! -//! A device may support multiple types of interrupts, and each type of interrupt may support one -//! or multiple interrupt sources. For example, a PCI device may support: -//! * Legacy Irq: exactly one interrupt source. -//! * PCI MSI Irq: 1,2,4,8,16,32 interrupt sources. -//! * PCI MSIx Irq: 2^n(n=0-11) interrupt sources. -//! -//! A distinct Interrupt Source Identifier (ISID) will be assigned to each interrupt source. -//! An ID allocator will be used to allocate and free Interrupt Source Identifiers for devices. -//! To decouple the vm-device crate from the ID allocator, the vm-device crate doesn't take the -//! responsibility to allocate/free Interrupt Source IDs but only makes use of assigned IDs. -//! -//! The overall flow to deal with interrupts is: -//! * The VMM creates an interrupt manager -//! * The VMM creates a device manager, passing on an reference to the interrupt manager -//! * The device manager passes on an reference to the interrupt manager to all registered devices -//! * The guest kernel loads drivers for virtual devices -//! * The guest device driver determines the type and number of interrupts needed, and update the -//! device configuration -//! * The virtual device backend requests the interrupt manager to create an interrupt group -//! according to guest configuration information - -use std::sync::Arc; - -use vmm_sys_util::eventfd::EventFd; - -/// Reuse std::io::Result to simplify interoperability among crates. -pub type Result = std::io::Result; - -/// Data type to store an interrupt source identifier. -pub type InterruptIndex = u32; - -/// Configuration data for legacy interrupts. -/// -/// On x86 platforms, legacy interrupts means those interrupts routed through PICs or IOAPICs. -#[derive(Copy, Clone, Debug)] -pub struct LegacyIrqSourceConfig { - pub irqchip: u32, - pub pin: u32, -} - -/// Configuration data for MSI/MSI-X interrupts. -/// -/// On x86 platforms, these interrupts are vectors delivered directly to the LAPIC. -#[derive(Copy, Clone, Debug, Default)] -pub struct MsiIrqSourceConfig { - /// High address to delivery message signaled interrupt. - pub high_addr: u32, - /// Low address to delivery message signaled interrupt. - pub low_addr: u32, - /// Data to write to delivery message signaled interrupt. - pub data: u32, - /// Unique ID of the device to delivery message signaled interrupt. - pub devid: u32, -} - -/// Configuration data for an interrupt source. -#[derive(Copy, Clone, Debug)] -pub enum InterruptSourceConfig { - /// Configuration data for Legacy interrupts. - LegacyIrq(LegacyIrqSourceConfig), - /// Configuration data for PciMsi, PciMsix and generic MSI interrupts. - MsiIrq(MsiIrqSourceConfig), -} - -/// Configuration data for legacy, pin based interrupt groups. -/// -/// A legacy interrupt group only takes one irq number as its configuration. -#[derive(Copy, Clone, Debug)] -pub struct LegacyIrqGroupConfig { - /// Legacy irq number. - pub irq: InterruptIndex, -} - -/// Configuration data for MSI/MSI-X interrupt groups -/// -/// MSI/MSI-X interrupt groups are basically a set of vectors. -#[derive(Copy, Clone, Debug)] -pub struct MsiIrqGroupConfig { - /// First index of the MSI/MSI-X interrupt vectors - pub base: InterruptIndex, - /// Number of vectors in the MSI/MSI-X group. - pub count: InterruptIndex, -} - -/// Trait to manage interrupt sources for virtual device backends. -/// -/// The InterruptManager implementations should protect itself from concurrent accesses internally, -/// so it could be invoked from multi-threaded context. -pub trait InterruptManager: Send + Sync { - type GroupConfig; - - /// Create an [InterruptSourceGroup](trait.InterruptSourceGroup.html) object to manage - /// interrupt sources for a virtual device - /// - /// An [InterruptSourceGroup](trait.InterruptSourceGroup.html) object manages all interrupt - /// sources of the same type for a virtual device. - /// - /// # Arguments - /// * interrupt_type: type of interrupt source. - /// * base: base Interrupt Source ID to be managed by the group object. - /// * count: number of Interrupt Sources to be managed by the group object. - fn create_group(&self, config: Self::GroupConfig) -> Result>; - - /// Destroy an [InterruptSourceGroup](trait.InterruptSourceGroup.html) object created by - /// [create_group()](trait.InterruptManager.html#tymethod.create_group). - /// - /// Assume the caller takes the responsibility to disable all interrupt sources of the group - /// before calling destroy_group(). This assumption helps to simplify InterruptSourceGroup - /// implementations. - fn destroy_group(&self, group: Arc) -> Result<()>; -} - -pub trait InterruptSourceGroup: Send + Sync { - /// Enable the interrupt sources in the group to generate interrupts. - fn enable(&self) -> Result<()> { - // Not all interrupt sources can be enabled. - // To accommodate this, we can have a no-op here. - Ok(()) - } - - /// Disable the interrupt sources in the group to generate interrupts. - fn disable(&self) -> Result<()> { - // Not all interrupt sources can be disabled. - // To accommodate this, we can have a no-op here. - Ok(()) - } - - /// Inject an interrupt from this interrupt source into the guest. - fn trigger(&self, index: InterruptIndex) -> Result<()>; - - /// Returns an interrupt notifier from this interrupt. - /// - /// An interrupt notifier allows for external components and processes - /// to inject interrupts into a guest, by writing to the file returned - /// by this method. - #[allow(unused_variables)] - fn notifier(&self, index: InterruptIndex) -> Option<&EventFd>; - - /// Update the interrupt source group configuration. - /// - /// # Arguments - /// * index: sub-index into the group. - /// * config: configuration data for the interrupt source. - /// * masked: if the interrupt is masked - /// * set_gsi: whether update the GSI routing table. - fn update( - &self, - index: InterruptIndex, - config: InterruptSourceConfig, - masked: bool, - set_gsi: bool, - ) -> Result<()>; - - /// Set the interrupt group GSI routing table. - fn set_gsi(&self) -> Result<()>; -} diff --git a/src/vm-device/src/lib.rs b/src/vm-device/src/lib.rs deleted file mode 100644 index b980b09c4b9..00000000000 --- a/src/vm-device/src/lib.rs +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 -// -// Copyright © 2020 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 -// - -use serde::{Deserialize, Serialize}; - -mod bus; -pub mod interrupt; - -pub use self::bus::{Bus, BusDevice, BusDeviceSync, Error as BusError}; - -/// Type of Message Signalled Interrupt -#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] -pub enum MsiIrqType { - /// PCI MSI IRQ numbers. - PciMsi, - /// PCI MSIx IRQ numbers. - PciMsix, - /// Generic MSI IRQ numbers. - GenericMsi, -} - -#[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Debug)] -pub enum PciBarType { - Io, - Mmio32, - Mmio64, -} - -/// Enumeration for device resources. -#[allow(missing_docs)] -#[derive(Clone, Debug, Serialize, Deserialize)] -pub enum Resource { - /// IO Port address range. - PioAddressRange { base: u16, size: u16 }, - /// Memory Mapped IO address range. - MmioAddressRange { base: u64, size: u64 }, - /// PCI BAR - PciBar { - index: usize, - base: u64, - size: u64, - type_: PciBarType, - prefetchable: bool, - }, - /// Legacy IRQ number. - LegacyIrq(u32), - /// Message Signaled Interrupt - MsiIrq { - ty: MsiIrqType, - base: u32, - size: u32, - }, - /// Network Interface Card MAC address. - MacAddress(String), - /// KVM memslot index. - KvmMemSlot(u32), -} diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 32fbe9c02de..d2601f4a305 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -53,7 +53,6 @@ utils = { path = "../utils" } uuid = "1.18.1" vhost = { version = "0.14.0", features = ["vhost-user-frontend"] } vm-allocator = { version = "0.1.3", features = ["serde"] } -vm-device = { path = "../vm-device" } vm-memory = { version = "0.16.2", features = [ "backend-mmap", "backend-bitmap", diff --git a/src/vmm/src/arch/aarch64/vcpu.rs b/src/vmm/src/arch/aarch64/vcpu.rs index 7a591bdee91..39020b6fbb6 100644 --- a/src/vmm/src/arch/aarch64/vcpu.rs +++ b/src/vmm/src/arch/aarch64/vcpu.rs @@ -23,6 +23,7 @@ use crate::cpu_config::aarch64::custom_cpu_template::VcpuFeatures; use crate::cpu_config::templates::CpuConfiguration; use crate::logger::{IncMetric, METRICS, error}; use crate::vcpu::{VcpuConfig, VcpuError}; +use crate::vstate::bus::Bus; use crate::vstate::memory::{Address, GuestMemoryMmap}; use crate::vstate::vcpu::VcpuEmulation; use crate::vstate::vm::Vm; @@ -120,7 +121,7 @@ pub struct KvmVcpu { #[derive(Default, Debug)] pub struct Peripherals { /// mmio bus. - pub mmio_bus: Option>, + pub mmio_bus: Option>, } impl KvmVcpu { diff --git a/src/vmm/src/arch/x86_64/vcpu.rs b/src/vmm/src/arch/x86_64/vcpu.rs index eea1f24ae69..5dd6a2e55e5 100644 --- a/src/vmm/src/arch/x86_64/vcpu.rs +++ b/src/vmm/src/arch/x86_64/vcpu.rs @@ -25,6 +25,7 @@ use crate::arch::x86_64::msr::{MsrError, create_boot_msr_entries}; use crate::arch::x86_64::regs::{SetupFpuError, SetupRegistersError, SetupSpecialRegistersError}; use crate::cpu_config::x86_64::{CpuConfiguration, cpuid}; use crate::logger::{IncMetric, METRICS}; +use crate::vstate::bus::Bus; use crate::vstate::memory::GuestMemoryMmap; use crate::vstate::vcpu::{VcpuConfig, VcpuEmulation, VcpuError}; use crate::vstate::vm::Vm; @@ -160,9 +161,9 @@ pub struct KvmVcpu { #[derive(Default, Debug)] pub struct Peripherals { /// Pio bus. - pub pio_bus: Option>, + pub pio_bus: Option>, /// Mmio bus. - pub mmio_bus: Option>, + pub mmio_bus: Option>, } impl KvmVcpu { @@ -267,7 +268,7 @@ impl KvmVcpu { } /// Sets a Port Mapped IO bus for this vcpu. - pub fn set_pio_bus(&mut self, pio_bus: Arc) { + pub fn set_pio_bus(&mut self, pio_bus: Arc) { self.peripherals.pio_bus = Some(pio_bus); } diff --git a/src/vmm/src/arch/x86_64/vm.rs b/src/vmm/src/arch/x86_64/vm.rs index b71d18ae37b..37d97d8c212 100644 --- a/src/vmm/src/arch/x86_64/vm.rs +++ b/src/vmm/src/arch/x86_64/vm.rs @@ -14,6 +14,7 @@ use serde::{Deserialize, Serialize}; use crate::arch::x86_64::msr::MsrError; use crate::snapshot::Persist; use crate::utils::u64_to_usize; +use crate::vstate::bus::Bus; use crate::vstate::memory::{GuestMemoryExtension, GuestMemoryState}; use crate::vstate::resources::ResourceAllocator; use crate::vstate::vm::{VmCommon, VmError}; @@ -60,7 +61,7 @@ pub struct ArchVm { /// `None` if `KVM_CAP_XSAVE2` not supported. xsave2_size: Option, /// Port IO bus - pub pio_bus: Arc, + pub pio_bus: Arc, } impl ArchVm { @@ -95,7 +96,7 @@ impl ArchVm { .set_tss_address(u64_to_usize(crate::arch::x86_64::layout::KVM_TSS_ADDRESS)) .map_err(ArchVmError::SetTssAddress)?; - let pio_bus = Arc::new(vm_device::Bus::new()); + let pio_bus = Arc::new(Bus::new()); Ok(ArchVm { common, diff --git a/src/vmm/src/device_manager/legacy.rs b/src/vmm/src/device_manager/legacy.rs index 77173433d38..d8ff2aaf41b 100644 --- a/src/vmm/src/device_manager/legacy.rs +++ b/src/vmm/src/device_manager/legacy.rs @@ -18,12 +18,13 @@ use vmm_sys_util::eventfd::EventFd; use crate::Vm; use crate::devices::legacy::serial::SerialOut; use crate::devices::legacy::{EventFdTrigger, I8042Device, SerialDevice, SerialEventsWrapper}; +use crate::vstate::bus::BusError; /// Errors corresponding to the `PortIODeviceManager`. #[derive(Debug, derive_more::From, thiserror::Error, displaydoc::Display)] pub enum LegacyDeviceError { /// Failed to add legacy device to Bus: {0} - BusError(vm_device::BusError), + BusError(BusError), /// Failed to create EventFd: {0} EventFd(std::io::Error), } diff --git a/src/vmm/src/device_manager/mmio.rs b/src/vmm/src/device_manager/mmio.rs index 46accb637b0..f7514ed1e0c 100644 --- a/src/vmm/src/device_manager/mmio.rs +++ b/src/vmm/src/device_manager/mmio.rs @@ -27,6 +27,7 @@ use crate::devices::legacy::{RTCDevice, SerialDevice}; use crate::devices::pseudo::BootTimer; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::transport::mmio::MmioTransport; +use crate::vstate::bus::{Bus, BusError}; #[cfg(target_arch = "x86_64")] use crate::vstate::memory::GuestAddress; use crate::vstate::resources::ResourceAllocator; @@ -37,7 +38,7 @@ pub enum MmioError { /// Failed to allocate requested resource: {0} Allocator(#[from] vm_allocator::Error), /// Failed to insert device on the bus: {0} - BusInsert(#[from] vm_device::BusError), + BusInsert(#[from] BusError), /// Failed to allocate requested resourc: {0} Cmdline(#[from] linux_loader::cmdline::Error), /// Failed to find the device on the bus. @@ -360,7 +361,7 @@ impl MMIODeviceManager { /// Register a boot timer device. pub fn register_mmio_boot_timer( &mut self, - mmio_bus: &vm_device::Bus, + mmio_bus: &Bus, boot_timer: Arc>, ) -> Result<(), MmioError> { // Attach a new boot timer device. diff --git a/src/vmm/src/device_manager/mod.rs b/src/vmm/src/device_manager/mod.rs index d7052422a3a..d11996ad06d 100644 --- a/src/vmm/src/device_manager/mod.rs +++ b/src/vmm/src/device_manager/mod.rs @@ -36,6 +36,7 @@ use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::transport::mmio::{IrqTrigger, MmioTransport}; use crate::resources::VmResources; use crate::snapshot::Persist; +use crate::vstate::bus::BusError; use crate::vstate::memory::GuestMemoryMmap; use crate::{EmulateSerialInitError, EventManager, Vm}; @@ -68,7 +69,7 @@ pub enum AttachDeviceError { /// MMIO transport error: {0} MmioTransport(#[from] MmioError), /// Error inserting device in bus: {0} - Bus(#[from] vm_device::BusError), + Bus(#[from] BusError), /// Error creating VMGenID device: {0} CreateVmGenID(#[from] VmGenIdError), /// Error while registering VMGenID with KVM: {0} @@ -427,7 +428,7 @@ pub enum DevicePersistError { /// Error resetting serial console: {0} SerialRestore(#[from] EmulateSerialInitError), /// Error inserting device in bus: {0} - Bus(#[from] vm_device::BusError), + Bus(#[from] BusError), /// Error creating DeviceManager: {0} DeviceManager(#[from] DeviceManagerCreateError), } diff --git a/src/vmm/src/device_manager/pci_mngr.rs b/src/vmm/src/device_manager/pci_mngr.rs index b8371d82a83..b9fab1afa3a 100644 --- a/src/vmm/src/device_manager/pci_mngr.rs +++ b/src/vmm/src/device_manager/pci_mngr.rs @@ -8,9 +8,7 @@ use std::sync::{Arc, Mutex}; use event_manager::{MutEventSubscriber, SubscriberOps}; use log::{debug, error, warn}; -use pci::{PciDeviceError, PciRootError}; use serde::{Deserialize, Serialize}; -use vm_device::BusError; use super::persist::{MmdsState, SharedDeviceType}; use crate::devices::pci::PciSegment; @@ -31,11 +29,13 @@ use crate::devices::virtio::vsock::persist::{ VsockConstructorArgs, VsockState, VsockUdsConstructorArgs, }; use crate::devices::virtio::vsock::{Vsock, VsockUnixBackend}; +use crate::pci::bus::PciRootError; use crate::resources::VmResources; use crate::snapshot::Persist; use crate::vmm_config::mmds::MmdsConfigError; +use crate::vstate::bus::BusError; +use crate::vstate::interrupts::InterruptError; use crate::vstate::memory::GuestMemoryMmap; -use crate::vstate::vm::{InterruptError, MsiVectorGroup}; use crate::{EventManager, Vm}; #[derive(Debug, Default)] @@ -58,8 +58,6 @@ pub enum PciManagerError { Msi(#[from] InterruptError), /// VirtIO PCI device error: {0} VirtioPciDevice(#[from] VirtioPciDeviceError), - /// PCI device error: {0} - PciDeviceError(#[from] PciDeviceError), /// KVM error: {0} Kvm(#[from] vmm_sys_util::errno::Error), /// MMDS error: {0} @@ -123,24 +121,29 @@ impl PciDevices { let msix_num = u16::try_from(device.lock().expect("Poisoned lock").queues().len() + 1).unwrap(); - let msix_vectors = Arc::new(Vm::create_msix_group(vm.clone(), msix_num)?); + let msix_vectors = Vm::create_msix_group(vm.clone(), msix_num)?; // Create the transport - let mut virtio_device = - VirtioPciDevice::new(id.clone(), mem, device, msix_vectors, pci_device_bdf.into())?; + let mut virtio_device = VirtioPciDevice::new( + id.clone(), + mem, + device, + Arc::new(msix_vectors), + pci_device_bdf.into(), + )?; // Allocate bars let mut resource_allocator_lock = vm.resource_allocator(); let resource_allocator = resource_allocator_lock.deref_mut(); - virtio_device.allocate_bars(&mut resource_allocator.mmio64_memory)?; + virtio_device.allocate_bars(&mut resource_allocator.mmio64_memory); let virtio_device = Arc::new(Mutex::new(virtio_device)); pci_segment .pci_bus .lock() .expect("Poisoned lock") - .add_device(pci_device_bdf.device() as u32, virtio_device.clone())?; + .add_device(pci_device_bdf.device() as u32, virtio_device.clone()); self.virtio_devices .insert((device_type, id.clone()), virtio_device.clone()); @@ -164,17 +167,12 @@ impl PciDevices { ) -> Result<(), PciManagerError> { // We should only be reaching this point if PCI is enabled let pci_segment = self.pci_segment.as_ref().unwrap(); - let msi_vector_group = Arc::new(MsiVectorGroup::restore( - vm.clone(), - &transport_state.msi_vector_group, - )?); let device_type: u32 = device.lock().expect("Poisoned lock").device_type(); let virtio_device = Arc::new(Mutex::new(VirtioPciDevice::new_from_state( device_id.to_string(), - vm.guest_memory().clone(), + vm, device.clone(), - msi_vector_group, transport_state.clone(), )?)); @@ -185,7 +183,7 @@ impl PciDevices { .add_device( transport_state.pci_device_bdf.device() as u32, virtio_device.clone(), - )?; + ); self.virtio_devices .insert((device_type, device_id.to_string()), virtio_device.clone()); diff --git a/src/vmm/src/device_manager/persist.rs b/src/vmm/src/device_manager/persist.rs index d6d46fff0f5..b6b2ff7eb70 100644 --- a/src/vmm/src/device_manager/persist.rs +++ b/src/vmm/src/device_manager/persist.rs @@ -43,6 +43,7 @@ use crate::mmds::data_store::MmdsVersion; use crate::resources::{ResourcesError, VmResources}; use crate::snapshot::Persist; use crate::vmm_config::mmds::MmdsConfigError; +use crate::vstate::bus::BusError; use crate::vstate::memory::GuestMemoryMmap; use crate::{EventManager, Vm}; @@ -58,7 +59,7 @@ pub enum DevicePersistError { /// Mmio transport MmioTransport, /// Bus error: {0} - Bus(#[from] vm_device::BusError), + Bus(#[from] BusError), #[cfg(target_arch = "aarch64")] /// Legacy: {0} Legacy(#[from] std::io::Error), diff --git a/src/vmm/src/devices/legacy/i8042.rs b/src/vmm/src/devices/legacy/i8042.rs index eee9957f208..664002637e6 100644 --- a/src/vmm/src/devices/legacy/i8042.rs +++ b/src/vmm/src/devices/legacy/i8042.rs @@ -14,6 +14,7 @@ use serde::Serialize; use vmm_sys_util::eventfd::EventFd; use crate::logger::{IncMetric, SharedIncMetric, error}; +use crate::vstate::bus::BusDevice; /// Errors thrown by the i8042 device. #[derive(Debug, thiserror::Error, displaydoc::Display)] @@ -210,7 +211,7 @@ impl I8042Device { } } -impl vm_device::BusDevice for I8042Device { +impl BusDevice for I8042Device { fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) { // All our ports are byte-wide. We don't know how to handle any wider data. if data.len() != 1 { @@ -342,7 +343,6 @@ impl vm_device::BusDevice for I8042Device { #[cfg(test)] mod tests { - use vm_device::BusDevice; use super::*; diff --git a/src/vmm/src/devices/legacy/rtc_pl031.rs b/src/vmm/src/devices/legacy/rtc_pl031.rs index b025c1d1512..96b1134eb1c 100644 --- a/src/vmm/src/devices/legacy/rtc_pl031.rs +++ b/src/vmm/src/devices/legacy/rtc_pl031.rs @@ -122,7 +122,7 @@ impl RTCDevice { } #[cfg(target_arch = "aarch64")] -impl vm_device::BusDevice for RTCDevice { +impl crate::vstate::bus::BusDevice for RTCDevice { fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) { self.bus_read(offset, data) } diff --git a/src/vmm/src/devices/legacy/serial.rs b/src/vmm/src/devices/legacy/serial.rs index 63834af3cb1..6b5c29aabf4 100644 --- a/src/vmm/src/devices/legacy/serial.rs +++ b/src/vmm/src/devices/legacy/serial.rs @@ -23,6 +23,7 @@ use vmm_sys_util::eventfd::EventFd; use crate::devices::legacy::EventFdTrigger; use crate::logger::{IncMetric, SharedIncMetric}; +use crate::vstate::bus::BusDevice; /// Received Data Available interrupt - for letting the driver know that /// there is some pending data to be processed. @@ -363,7 +364,7 @@ fn is_fifo(fd: RawFd) -> bool { (stat.st_mode & libc::S_IFIFO) != 0 } -impl vm_device::BusDevice for SerialWrapper +impl BusDevice for SerialWrapper where I: Read + AsRawFd + Send, { @@ -393,7 +394,6 @@ where mod tests { #![allow(clippy::undocumented_unsafe_blocks)] - use vm_device::BusDevice; use vmm_sys_util::eventfd::EventFd; use super::*; diff --git a/src/vmm/src/devices/mod.rs b/src/vmm/src/devices/mod.rs index 371cc2cfa9e..232eabba989 100644 --- a/src/vmm/src/devices/mod.rs +++ b/src/vmm/src/devices/mod.rs @@ -23,6 +23,7 @@ use crate::devices::virtio::net::metrics::NetDeviceMetrics; use crate::devices::virtio::queue::{InvalidAvailIdx, QueueError}; use crate::devices::virtio::vsock::VsockError; use crate::logger::IncMetric; +use crate::vstate::interrupts::InterruptError; // Function used for reporting error in terms of logging // but also in terms of metrics of net event fails. @@ -41,7 +42,7 @@ pub enum DeviceError { /// Failed to read from the TAP device. FailedReadTap, /// Failed to signal irq: {0} - FailedSignalingIrq(io::Error), + FailedSignalingIrq(#[from] InterruptError), /// IO error: {0} IoError(io::Error), /// Device received malformed payload. diff --git a/src/vmm/src/devices/pci/pci_segment.rs b/src/vmm/src/devices/pci/pci_segment.rs index 7deaa027f7b..04d902ba252 100644 --- a/src/vmm/src/devices/pci/pci_segment.rs +++ b/src/vmm/src/devices/pci/pci_segment.rs @@ -14,14 +14,16 @@ use std::sync::{Arc, Mutex}; #[cfg(target_arch = "x86_64")] use acpi_tables::{Aml, aml}; use log::info; +use pci::PciBdf; #[cfg(target_arch = "x86_64")] -use pci::{PCI_CONFIG_IO_PORT, PCI_CONFIG_IO_PORT_SIZE, PciConfigIo}; -use pci::{PciBdf, PciBus, PciConfigMmio, PciRoot, PciRootError}; use uuid::Uuid; use vm_allocator::AddressAllocator; -use vm_device::{BusDeviceSync, BusError}; use crate::arch::{ArchVm as Vm, PCI_MMCONFIG_START, PCI_MMIO_CONFIG_SIZE_PER_SEGMENT}; +#[cfg(target_arch = "x86_64")] +use crate::pci::bus::{PCI_CONFIG_IO_PORT, PCI_CONFIG_IO_PORT_SIZE}; +use crate::pci::bus::{PciBus, PciConfigIo, PciConfigMmio, PciRoot, PciRootError}; +use crate::vstate::bus::{BusDeviceSync, BusError}; use crate::vstate::resources::ResourceAllocator; pub struct PciSegment { diff --git a/src/vmm/src/devices/pseudo/boot_timer.rs b/src/vmm/src/devices/pseudo/boot_timer.rs index f0cf38977b5..81f9abb34f3 100644 --- a/src/vmm/src/devices/pseudo/boot_timer.rs +++ b/src/vmm/src/devices/pseudo/boot_timer.rs @@ -6,6 +6,7 @@ use std::sync::{Arc, Barrier}; use utils::time::TimestampUs; use crate::logger::info; +use crate::vstate::bus::BusDevice; const MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE: u8 = 123; @@ -15,7 +16,7 @@ pub struct BootTimer { start_ts: TimestampUs, } -impl vm_device::BusDevice for BootTimer { +impl BusDevice for BootTimer { fn write(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option> { // Only handle byte length instructions at a zero offset. if data.len() != 1 || offset != 0 { diff --git a/src/vmm/src/devices/virtio/balloon/mod.rs b/src/vmm/src/devices/virtio/balloon/mod.rs index 91de01a8393..33020e0ddec 100644 --- a/src/vmm/src/devices/virtio/balloon/mod.rs +++ b/src/vmm/src/devices/virtio/balloon/mod.rs @@ -17,6 +17,7 @@ use super::queue::{InvalidAvailIdx, QueueError}; use crate::devices::virtio::balloon::metrics::METRICS; use crate::devices::virtio::queue::FIRECRACKER_MAX_QUEUE_SIZE; use crate::logger::IncMetric; +use crate::vstate::interrupts::InterruptError; /// Device ID used in MMIO device identification. /// Because Balloon is unique per-vm, this ID can be hardcoded. @@ -72,7 +73,7 @@ pub enum BalloonError { /// EventFd error: {0} EventFd(std::io::Error), /// Received error while sending an interrupt: {0} - InterruptError(std::io::Error), + InterruptError(InterruptError), /// Guest gave us a malformed descriptor. MalformedDescriptor, /// Guest gave us a malformed payload. diff --git a/src/vmm/src/devices/virtio/block/vhost_user/mod.rs b/src/vmm/src/devices/virtio/block/vhost_user/mod.rs index 0afaaed3400..0cd6d46f0ae 100644 --- a/src/vmm/src/devices/virtio/block/vhost_user/mod.rs +++ b/src/vmm/src/devices/virtio/block/vhost_user/mod.rs @@ -7,6 +7,7 @@ pub mod persist; use self::device::VhostUserBlock; use crate::devices::virtio::vhost_user::VhostUserError; +use crate::vstate::interrupts::InterruptError; /// Number of queues for the vhost-user block device. pub const NUM_QUEUES: u64 = 1; @@ -28,5 +29,5 @@ pub enum VhostUserBlockError { /// Error opening eventfd: {0} EventFd(std::io::Error), /// Error creating irqfd: {0} - Interrupt(std::io::Error), + Interrupt(InterruptError), } diff --git a/src/vmm/src/devices/virtio/transport/mmio.rs b/src/vmm/src/devices/virtio/transport/mmio.rs index c20928e3c29..f5039281f16 100644 --- a/src/vmm/src/devices/virtio/transport/mmio.rs +++ b/src/vmm/src/devices/virtio/transport/mmio.rs @@ -15,8 +15,10 @@ use super::{VirtioInterrupt, VirtioInterruptType}; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::device_status; use crate::devices::virtio::queue::Queue; -use crate::logger::{error, warn}; +use crate::logger::{IncMetric, METRICS, error, warn}; use crate::utils::byte_order; +use crate::vstate::bus::BusDevice; +use crate::vstate::interrupts::InterruptError; use crate::vstate::memory::{GuestAddress, GuestMemoryMmap}; // TODO crosvm uses 0 here, but IIRC virtio specified some other vendor id that should be used @@ -232,7 +234,7 @@ impl MmioTransport { } } -impl vm_device::BusDevice for MmioTransport { +impl BusDevice for MmioTransport { fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { match offset { 0x00..=0xff if data.len() == 4 => { @@ -399,17 +401,19 @@ impl Default for IrqTrigger { } impl VirtioInterrupt for IrqTrigger { - fn trigger(&self, interrupt_type: VirtioInterruptType) -> Result<(), std::io::Error> { + fn trigger(&self, interrupt_type: VirtioInterruptType) -> Result<(), InterruptError> { + METRICS.interrupts.triggers.inc(); match interrupt_type { VirtioInterruptType::Config => self.trigger_irq(IrqType::Config), VirtioInterruptType::Queue(_) => self.trigger_irq(IrqType::Vring), } } - fn trigger_queues(&self, queues: &[u16]) -> Result<(), std::io::Error> { + fn trigger_queues(&self, queues: &[u16]) -> Result<(), InterruptError> { if queues.is_empty() { Ok(()) } else { + METRICS.interrupts.triggers.inc(); self.trigger_irq(IrqType::Vring) } } @@ -449,7 +453,7 @@ impl IrqTrigger { } } - fn trigger_irq(&self, irq_type: IrqType) -> Result<(), std::io::Error> { + fn trigger_irq(&self, irq_type: IrqType) -> Result<(), InterruptError> { let irq = match irq_type { IrqType::Config => VIRTIO_MMIO_INT_CONFIG, IrqType::Vring => VIRTIO_MMIO_INT_VRING, @@ -470,7 +474,6 @@ pub(crate) mod tests { use std::ops::Deref; - use vm_device::BusDevice; use vmm_sys_util::eventfd::EventFd; use super::*; diff --git a/src/vmm/src/devices/virtio/transport/mod.rs b/src/vmm/src/devices/virtio/transport/mod.rs index 39dfe05a4fd..41d0730dfe0 100644 --- a/src/vmm/src/devices/virtio/transport/mod.rs +++ b/src/vmm/src/devices/virtio/transport/mod.rs @@ -6,6 +6,8 @@ use std::sync::atomic::AtomicU32; use vmm_sys_util::eventfd::EventFd; +use crate::vstate::interrupts::InterruptError; + /// MMIO transport for VirtIO devices pub mod mmio; /// PCI transport for VirtIO devices @@ -23,7 +25,7 @@ pub enum VirtioInterruptType { /// API of interrupt types used by VirtIO devices pub trait VirtioInterrupt: std::fmt::Debug + Send + Sync { /// Trigger a VirtIO interrupt. - fn trigger(&self, interrupt_type: VirtioInterruptType) -> Result<(), std::io::Error>; + fn trigger(&self, interrupt_type: VirtioInterruptType) -> Result<(), InterruptError>; /// Trigger multiple Virtio interrupts for selected queues. /// The caller needs to ensure that [`queues`] does not include duplicate entries to @@ -31,7 +33,7 @@ pub trait VirtioInterrupt: std::fmt::Debug + Send + Sync { /// This is to allow sending a single interrupt for implementations that don't /// distinguish different queues, like IrqTrigger, instead of sending multiple same /// interrupts. - fn trigger_queues(&self, queues: &[u16]) -> Result<(), std::io::Error> { + fn trigger_queues(&self, queues: &[u16]) -> Result<(), InterruptError> { queues .iter() .try_for_each(|&qidx| self.trigger(VirtioInterruptType::Queue(qidx))) diff --git a/src/vmm/src/devices/virtio/transport/pci/device.rs b/src/vmm/src/devices/virtio/transport/pci/device.rs index f60ca43ac14..9bc604fc7c1 100644 --- a/src/vmm/src/devices/virtio/transport/pci/device.rs +++ b/src/vmm/src/devices/virtio/transport/pci/device.rs @@ -18,16 +18,12 @@ use anyhow::anyhow; use kvm_ioctls::{IoEventAddress, NoDatamatch}; use log::warn; use pci::{ - BarReprogrammingParams, MsixCap, MsixConfig, MsixConfigState, PciBarConfiguration, - PciBarRegionType, PciBdf, PciCapability, PciCapabilityId, PciClassCode, PciConfiguration, - PciConfigurationState, PciDevice, PciDeviceError, PciHeaderType, PciMassStorageSubclass, - PciNetworkControllerSubclass, PciSubclass, + PciBdf, PciCapabilityId, PciClassCode, PciMassStorageSubclass, PciNetworkControllerSubclass, + PciSubclass, }; use serde::{Deserialize, Serialize}; use thiserror::Error; use vm_allocator::{AddressAllocator, AllocPolicy, RangeInclusive}; -use vm_device::interrupt::{InterruptIndex, InterruptSourceGroup, MsiIrqGroupConfig}; -use vm_device::{BusDevice, PciBarType}; use vm_memory::{Address, ByteValued, GuestAddress, Le32}; use vmm_sys_util::errno; use vmm_sys_util::eventfd::EventFd; @@ -41,11 +37,15 @@ use crate::devices::virtio::transport::pci::common_config::{ }; use crate::devices::virtio::transport::{VirtioInterrupt, VirtioInterruptType}; use crate::logger::{debug, error}; +use crate::pci::configuration::{PciCapability, PciConfiguration, PciConfigurationState}; +use crate::pci::msix::{MsixCap, MsixConfig, MsixConfigState}; +use crate::pci::{BarReprogrammingParams, PciDevice}; use crate::snapshot::Persist; use crate::utils::u64_to_usize; +use crate::vstate::bus::BusDevice; +use crate::vstate::interrupts::{InterruptError, MsixVectorGroup}; use crate::vstate::memory::GuestMemoryMmap; use crate::vstate::resources::ResourceAllocator; -use crate::vstate::vm::{InterruptError, MsiVectorGroup}; const DEVICE_INIT: u8 = 0x00; const DEVICE_ACKNOWLEDGE: u8 = 0x01; @@ -245,7 +245,6 @@ pub struct VirtioPciDeviceState { pub pci_configuration_state: PciConfigurationState, pub pci_dev_state: VirtioPciCommonConfigState, pub msix_state: MsixConfigState, - pub msi_vector_group: Vec, pub bar_address: u64, } @@ -254,7 +253,7 @@ pub enum VirtioPciDeviceError { /// Failed creating VirtioPciDevice: {0} CreateVirtioPciDevice(#[from] anyhow::Error), /// Error creating MSI configuration: {0} - Msi(#[from] pci::MsixError), + Msi(#[from] InterruptError), } pub type Result = std::result::Result; @@ -270,19 +269,12 @@ pub struct VirtioPciDevice { // virtio PCI common configuration common_config: VirtioPciCommonConfig, - // MSI-X config - msix_config: Option>>, - - // Number of MSI-X vectors - msix_num: u16, - // Virtio device reference and status device: Arc>, device_activated: Arc, // PCI interrupts. - virtio_interrupt: Option>, - interrupt_source_group: Arc, + virtio_interrupt: Option>, // Guest memory memory: GuestMemoryMmap, @@ -311,7 +303,6 @@ impl VirtioPciDevice { fn pci_configuration( virtio_device_type: u32, msix_config: &Arc>, - pci_config_state: Option, ) -> PciConfiguration { let pci_device_id = VIRTIO_PCI_DEVICE_ID_BASE + u16::try_from(virtio_device_type).unwrap(); let (class, subclass) = match virtio_device_type { @@ -329,45 +320,24 @@ impl VirtioPciDevice { ), }; - PciConfiguration::new( + PciConfiguration::new_type0( VIRTIO_PCI_VENDOR_ID, pci_device_id, 0x1, // For modern virtio-PCI devices class, subclass, - None, - PciHeaderType::Device, VIRTIO_PCI_VENDOR_ID, pci_device_id, Some(msix_config.clone()), - pci_config_state, ) } - fn msix_config( - pci_device_bdf: u32, - msix_vectors: Arc, - msix_config_state: Option, - ) -> Result>> { - let msix_config = Arc::new(Mutex::new(MsixConfig::new( - msix_vectors.num_vectors(), - msix_vectors, - pci_device_bdf, - msix_config_state, - )?)); - - Ok(msix_config) - } - /// Allocate the PCI BAR for the VirtIO device and its associated capabilities. /// /// This must happen only during the creation of a brand new VM. When a VM is restored from a /// known state, the BARs are already created with the right content, therefore we don't need /// to go through this codepath. - pub fn allocate_bars( - &mut self, - mmio64_allocator: &mut AddressAllocator, - ) -> std::result::Result<(), PciDeviceError> { + pub fn allocate_bars(&mut self, mmio64_allocator: &mut AddressAllocator) { let device_clone = self.device.clone(); let device = device_clone.lock().unwrap(); @@ -389,10 +359,8 @@ impl VirtioPciDevice { ); // Once the BARs are allocated, the capabilities can be added to the PCI configuration. - self.add_pci_capabilities()?; + self.add_pci_capabilities(); self.bar_address = virtio_pci_bar_addr; - - Ok(()) } /// Constructs a new PCI transport for the given virtio device. @@ -400,16 +368,18 @@ impl VirtioPciDevice { id: String, memory: GuestMemoryMmap, device: Arc>, - msi_vectors: Arc, + msix_vectors: Arc, pci_device_bdf: u32, ) -> Result { let num_queues = device.lock().expect("Poisoned lock").queues().len(); - let msix_config = Self::msix_config(pci_device_bdf, msi_vectors.clone(), None)?; + let msix_config = Arc::new(Mutex::new(MsixConfig::new( + msix_vectors.clone(), + pci_device_bdf, + ))); let pci_config = Self::pci_configuration( device.lock().expect("Poisoned lock").device_type(), &msix_config, - None, ); let virtio_common_config = VirtioPciCommonConfig::new(VirtioPciCommonConfigState { @@ -425,7 +395,7 @@ impl VirtioPciDevice { msix_config.clone(), virtio_common_config.msix_config.clone(), virtio_common_config.msix_queues.clone(), - msi_vectors.clone(), + msix_vectors, )); let virtio_pci_device = VirtioPciDevice { @@ -433,13 +403,10 @@ impl VirtioPciDevice { pci_device_bdf: pci_device_bdf.into(), configuration: pci_config, common_config: virtio_common_config, - msix_config: Some(msix_config), - msix_num: msi_vectors.num_vectors(), device, device_activated: Arc::new(AtomicBool::new(false)), virtio_interrupt: Some(interrupt), memory, - interrupt_source_group: msi_vectors, cap_pci_cfg_info: VirtioPciCfgCapInfo::default(), bar_address: 0, }; @@ -449,21 +416,18 @@ impl VirtioPciDevice { pub fn new_from_state( id: String, - memory: GuestMemoryMmap, + vm: &Arc, device: Arc>, - msi_vectors: Arc, state: VirtioPciDeviceState, ) -> Result { - let msix_config = Self::msix_config( - state.pci_device_bdf.into(), - msi_vectors.clone(), - Some(state.msix_state), - )?; + let msix_config = + MsixConfig::from_state(state.msix_state, vm.clone(), state.pci_device_bdf.into())?; + let vectors = msix_config.vectors.clone(); + let msix_config = Arc::new(Mutex::new(msix_config)); - let pci_config = Self::pci_configuration( - device.lock().expect("Poisoned lock").device_type(), - &msix_config, - Some(state.pci_configuration_state), + let pci_config = PciConfiguration::type0_from_state( + state.pci_configuration_state, + Some(msix_config.clone()), ); let virtio_common_config = VirtioPciCommonConfig::new(state.pci_dev_state); let cap_pci_cfg_info = VirtioPciCfgCapInfo { @@ -475,7 +439,7 @@ impl VirtioPciDevice { msix_config.clone(), virtio_common_config.msix_config.clone(), virtio_common_config.msix_queues.clone(), - msi_vectors.clone(), + vectors, )); let virtio_pci_device = VirtioPciDevice { @@ -483,13 +447,10 @@ impl VirtioPciDevice { pci_device_bdf: state.pci_device_bdf, configuration: pci_config, common_config: virtio_common_config, - msix_config: Some(msix_config), - msix_num: msi_vectors.num_vectors(), device, device_activated: Arc::new(AtomicBool::new(state.device_activated)), virtio_interrupt: Some(interrupt), - memory: memory.clone(), - interrupt_source_group: msi_vectors, + memory: vm.guest_memory().clone(), cap_pci_cfg_info, bar_address: state.bar_address, }; @@ -500,7 +461,7 @@ impl VirtioPciDevice { .lock() .expect("Poisoned lock") .activate( - memory, + virtio_pci_device.memory.clone(), virtio_pci_device.virtio_interrupt.as_ref().unwrap().clone(), ); } @@ -524,7 +485,7 @@ impl VirtioPciDevice { self.configuration.get_bar_addr(VIRTIO_BAR_INDEX as usize) } - fn add_pci_capabilities(&mut self) -> std::result::Result<(), PciDeviceError> { + fn add_pci_capabilities(&mut self) { // Add pointers to the different configuration structures from the PCI capabilities. let common_cap = VirtioPciCap::new( PciCapabilityType::Common, @@ -561,18 +522,21 @@ impl VirtioPciDevice { self.configuration.add_capability(&configuration_cap) + VIRTIO_PCI_CAP_OFFSET; self.cap_pci_cfg_info.cap = configuration_cap; - if self.msix_config.is_some() { + if let Some(interrupt) = &self.virtio_interrupt { let msix_cap = MsixCap::new( VIRTIO_BAR_INDEX, - self.msix_num, + interrupt + .msix_config + .lock() + .expect("Poisoned lock") + .vectors + .num_vectors(), MSIX_TABLE_BAR_OFFSET.try_into().unwrap(), VIRTIO_BAR_INDEX, MSIX_PBA_BAR_OFFSET.try_into().unwrap(), ); self.configuration.add_capability(&msix_cap); } - - Ok(()) } fn read_cap_pci_cfg(&mut self, offset: usize, mut data: &mut [u8]) { @@ -666,13 +630,13 @@ impl VirtioPciDevice { pci_configuration_state: self.configuration.state(), pci_dev_state: self.common_config.state(), msix_state: self - .msix_config + .virtio_interrupt .as_ref() .unwrap() + .msix_config .lock() .expect("Poisoned lock") .state(), - msi_vector_group: self.interrupt_source_group.save(), bar_address: self.bar_address, } } @@ -682,7 +646,7 @@ pub struct VirtioInterruptMsix { msix_config: Arc>, config_vector: Arc, queues_vectors: Arc>>, - interrupt_source_group: Arc, + vectors: Arc, } impl std::fmt::Debug for VirtioInterruptMsix { @@ -700,19 +664,19 @@ impl VirtioInterruptMsix { msix_config: Arc>, config_vector: Arc, queues_vectors: Arc>>, - interrupt_source_group: Arc, + vectors: Arc, ) -> Self { VirtioInterruptMsix { msix_config, config_vector, queues_vectors, - interrupt_source_group, + vectors, } } } impl VirtioInterrupt for VirtioInterruptMsix { - fn trigger(&self, int_type: VirtioInterruptType) -> std::result::Result<(), std::io::Error> { + fn trigger(&self, int_type: VirtioInterruptType) -> std::result::Result<(), InterruptError> { let vector = match int_type { VirtioInterruptType::Config => self.config_vector.load(Ordering::Acquire), VirtioInterruptType::Queue(queue_index) => *self @@ -720,7 +684,7 @@ impl VirtioInterrupt for VirtioInterruptMsix { .lock() .unwrap() .get(queue_index as usize) - .ok_or(ErrorKind::InvalidInput)?, + .ok_or(InterruptError::InvalidVectorIndex(queue_index as usize))?, }; if vector == VIRTQ_MSI_NO_VECTOR { @@ -739,8 +703,7 @@ impl VirtioInterrupt for VirtioInterruptMsix { return Ok(()); } - self.interrupt_source_group - .trigger(vector as InterruptIndex) + self.vectors.trigger(vector as usize) } fn notifier(&self, int_type: VirtioInterruptType) -> Option<&EventFd> { @@ -753,8 +716,7 @@ impl VirtioInterrupt for VirtioInterruptMsix { .get(queue_index as usize)?, }; - self.interrupt_source_group - .notifier(vector as InterruptIndex) + self.vectors.notifier(vector as usize) } fn status(&self) -> Arc { @@ -821,11 +783,7 @@ impl PciDevice for VirtioPciDevice { self.configuration.detect_bar_reprogramming(reg_idx, data) } - fn move_bar( - &mut self, - old_base: u64, - new_base: u64, - ) -> std::result::Result<(), std::io::Error> { + fn move_bar(&mut self, old_base: u64, new_base: u64) -> std::result::Result<(), anyhow::Error> { // We only update our idea of the bar in order to support free_bars() above. // The majority of the reallocation is done inside DeviceManager. if self.bar_address == old_base { @@ -859,16 +817,18 @@ impl PciDevice for VirtioPciDevice { warn!("pci: unexpected read to notification BAR. Offset {o:#x}"); } o if (MSIX_TABLE_BAR_OFFSET..MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE).contains(&o) => { - if let Some(msix_config) = &self.msix_config { - msix_config + if let Some(interrupt) = &self.virtio_interrupt { + interrupt + .msix_config .lock() .unwrap() .read_table(o - MSIX_TABLE_BAR_OFFSET, data); } } o if (MSIX_PBA_BAR_OFFSET..MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE).contains(&o) => { - if let Some(msix_config) = &self.msix_config { - msix_config + if let Some(interrupt) = &self.virtio_interrupt { + interrupt + .msix_config .lock() .unwrap() .read_pba(o - MSIX_PBA_BAR_OFFSET, data); @@ -901,16 +861,18 @@ impl PciDevice for VirtioPciDevice { warn!("pci: unexpected write to notification BAR. Offset {o:#x}"); } o if (MSIX_TABLE_BAR_OFFSET..MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE).contains(&o) => { - if let Some(msix_config) = &self.msix_config { - msix_config + if let Some(interrupt) = &self.virtio_interrupt { + interrupt + .msix_config .lock() .unwrap() .write_table(o - MSIX_TABLE_BAR_OFFSET, data); } } o if (MSIX_PBA_BAR_OFFSET..MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE).contains(&o) => { - if let Some(msix_config) = &self.msix_config { - msix_config + if let Some(interrupt) = &self.virtio_interrupt { + interrupt + .msix_config .lock() .unwrap() .write_pba(o - MSIX_PBA_BAR_OFFSET, data); @@ -945,9 +907,9 @@ impl PciDevice for VirtioPciDevice { let mut device = self.device.lock().unwrap(); let reset_result = device.reset(); match reset_result { - Some((virtio_interrupt, mut _queue_evts)) => { + Some(_) => { // Upon reset the device returns its interrupt EventFD - self.virtio_interrupt = Some(virtio_interrupt); + self.virtio_interrupt = None; self.device_activated.store(false, Ordering::SeqCst); // Reset queue readiness (changes queue_enable), queue sizes @@ -988,9 +950,7 @@ mod tests { use event_manager::MutEventSubscriber; use linux_loader::loader::Cmdline; - use pci::{ - MsixCap, PciBdf, PciCapability, PciCapabilityId, PciClassCode, PciDevice, PciSubclass, - }; + use pci::{PciCapabilityId, PciClassCode, PciSubclass}; use vm_memory::{ByteValued, Le32}; use super::{PciCapabilityType, VirtioPciDevice}; @@ -1007,6 +967,8 @@ mod tests { NOTIFY_OFF_MULTIPLIER, PciVirtioSubclass, VirtioPciCap, VirtioPciCfgCap, VirtioPciNotifyCap, }; + use crate::pci::PciDevice; + use crate::pci::msix::MsixCap; use crate::rate_limiter::RateLimiter; use crate::utils::u64_to_usize; use crate::{Vm, Vmm}; diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 7a168db0146..ace273bb94c 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -90,6 +90,8 @@ pub mod gdb; pub mod logger; /// microVM Metadata Service MMDS pub mod mmds; +/// PCI specific emulation code. +pub mod pci; /// Save/restore utilities. pub mod persist; /// Resource store for configured microVM resources. diff --git a/src/vmm/src/logger/metrics.rs b/src/vmm/src/logger/metrics.rs index 294af948591..4b80cf2f2f5 100644 --- a/src/vmm/src/logger/metrics.rs +++ b/src/vmm/src/logger/metrics.rs @@ -791,6 +791,25 @@ impl VcpuMetrics { } } +/// MicroVM interrupt-related metrics +#[derive(Debug, Default, Serialize)] +pub struct InterruptMetrics { + /// Number of interrupt triggers + pub triggers: SharedIncMetric, + /// Configuration updates + pub config_updates: SharedIncMetric, +} + +impl InterruptMetrics { + /// Const default construction. + pub const fn new() -> Self { + Self { + triggers: SharedIncMetric::new(), + config_updates: SharedIncMetric::new(), + } + } +} + /// Metrics specific to the machine manager as a whole. #[derive(Debug, Default, Serialize)] pub struct VmmMetrics { @@ -899,6 +918,8 @@ pub struct FirecrackerMetrics { #[serde(flatten)] /// Vhost-user device related metrics. pub vhost_user_ser: VhostUserMetricsSerializeProxy, + /// Interrupt related metrics + pub interrupts: InterruptMetrics, } impl FirecrackerMetrics { /// Const default construction. @@ -924,6 +945,7 @@ impl FirecrackerMetrics { vsock_ser: VsockMetricsSerializeProxy {}, entropy_ser: EntropyMetricsSerializeProxy {}, vhost_user_ser: VhostUserMetricsSerializeProxy {}, + interrupts: InterruptMetrics::new(), } } } diff --git a/src/pci/src/bus.rs b/src/vmm/src/pci/bus.rs similarity index 89% rename from src/pci/src/bus.rs rename to src/vmm/src/pci/bus.rs index 7739e8d9ef8..a88263226de 100644 --- a/src/pci/src/bus.rs +++ b/src/vmm/src/pci/bus.rs @@ -6,39 +6,31 @@ // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause use std::collections::HashMap; +use std::fmt::Debug; use std::ops::DerefMut; use std::sync::{Arc, Barrier, Mutex}; use byteorder::{ByteOrder, LittleEndian}; -use vm_device::BusDevice; +use pci::{PciBridgeSubclass, PciClassCode}; -use crate::configuration::{PciBridgeSubclass, PciClassCode, PciConfiguration, PciHeaderType}; -use crate::device::{DeviceRelocation, Error as PciDeviceError, PciDevice}; - -const VENDOR_ID_INTEL: u16 = 0x8086; -const DEVICE_ID_INTEL_VIRT_PCIE_HOST: u16 = 0x0d57; -const NUM_DEVICE_IDS: usize = 32; +use crate::logger::error; +use crate::pci::configuration::PciConfiguration; +use crate::pci::{DeviceRelocation, PciDevice}; +use crate::utils::u64_to_usize; +use crate::vstate::bus::BusDevice; /// Errors for device manager. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum PciRootError { - /// Could not allocate device address space for the device. - AllocateDeviceAddrs(PciDeviceError), - /// Could not allocate an IRQ number. - AllocateIrq, - /// Could not add a device to the port io bus. - PioInsert(vm_device::BusError), - /// Could not add a device to the mmio bus. - MmioInsert(vm_device::BusError), /// Could not find an available device slot on the PCI bus. NoPciDeviceSlotAvailable, - /// Invalid PCI device identifier provided. - InvalidPciDeviceSlot(usize), - /// Valid PCI device identifier but already used. - AlreadyInUsePciDeviceSlot(usize), } -pub type Result = std::result::Result; +const VENDOR_ID_INTEL: u16 = 0x8086; +const DEVICE_ID_INTEL_VIRT_PCIE_HOST: u16 = 0x0d57; +const NUM_DEVICE_IDS: usize = 32; + +#[derive(Debug)] /// Emulates the PCI Root bridge device. pub struct PciRoot { /// Configuration space. @@ -52,18 +44,15 @@ impl PciRoot { PciRoot { config } } else { PciRoot { - config: PciConfiguration::new( + config: PciConfiguration::new_type0( VENDOR_ID_INTEL, DEVICE_ID_INTEL_VIRT_PCIE_HOST, 0, PciClassCode::BridgeDevice, &PciBridgeSubclass::HostBridge, - None, - PciHeaderType::Device, 0, 0, None, - None, ), } } @@ -88,16 +77,26 @@ impl PciDevice for PciRoot { } } +/// A PCI bus definition pub struct PciBus { /// Devices attached to this bus. /// Device 0 is host bridge. pub devices: HashMap>>, - device_reloc: Arc, + vm: Arc, device_ids: Vec, } +impl Debug for PciBus { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Root Firecracker PCI Bus") + .field("device_ids", &self.device_ids) + .finish() + } +} + impl PciBus { - pub fn new(pci_root: PciRoot, device_reloc: Arc) -> Self { + /// Create a new PCI bus + pub fn new(pci_root: PciRoot, vm: Arc) -> Self { let mut devices: HashMap>> = HashMap::new(); let mut device_ids: Vec = vec![false; NUM_DEVICE_IDS]; @@ -106,21 +105,22 @@ impl PciBus { PciBus { devices, - device_reloc, + vm, device_ids, } } - pub fn add_device(&mut self, device_id: u32, device: Arc>) -> Result<()> { + /// Insert a device in the bus + pub fn add_device(&mut self, device_id: u32, device: Arc>) { self.devices.insert(device_id, device); - Ok(()) } - pub fn next_device_id(&mut self) -> Result { + /// Get a new device ID + pub fn next_device_id(&mut self) -> Result { for (idx, device_id) in self.device_ids.iter_mut().enumerate() { if !(*device_id) { *device_id = true; - return Ok(idx as u32); + return Ok(idx.try_into().unwrap()); } } @@ -128,6 +128,16 @@ impl PciBus { } } +#[cfg(target_arch = "x86_64")] +/// IO port used for configuring PCI over the legacy bus +pub const PCI_CONFIG_IO_PORT: u64 = 0xcf8; +#[cfg(target_arch = "x86_64")] +/// Size of IO ports we are using to configure PCI over the legacy bus. We have two ports, 0xcf8 +/// and 0xcfc 32bits long. +pub const PCI_CONFIG_IO_PORT_SIZE: u64 = 0x8; + +/// Wrapper that allows handling PCI configuration over the legacy Bus +#[derive(Debug)] pub struct PciConfigIo { /// Config space register. config_address: u32, @@ -135,6 +145,7 @@ pub struct PciConfigIo { } impl PciConfigIo { + /// New Port IO configuration handler pub fn new(pci_bus: Arc>) -> Self { PciConfigIo { config_address: 0, @@ -142,6 +153,7 @@ impl PciConfigIo { } } + /// Handle a configuration space read over Port IO pub fn config_space_read(&self) -> u32 { let enabled = (self.config_address & 0x8000_0000) != 0; if !enabled { @@ -169,14 +181,15 @@ impl PciConfigIo { .lock() .unwrap() .devices - .get(&(device as u32)) + .get(&(device.try_into().unwrap())) .map_or(0xffff_ffff, |d| { d.lock().unwrap().read_config_register(register) }) } + /// Handle a configuration space write over Port IO pub fn config_space_write(&mut self, offset: u64, data: &[u8]) -> Option> { - if offset as usize + data.len() > 4 { + if u64_to_usize(offset) + data.len() > 4 { return None; } @@ -202,23 +215,23 @@ impl PciConfigIo { // be a problem currently, since we mainly access this when we are setting up devices. // We might want to do some profiling to ensure this does not become a bottleneck. let pci_bus = self.pci_bus.as_ref().lock().unwrap(); - if let Some(d) = pci_bus.devices.get(&(device as u32)) { + if let Some(d) = pci_bus.devices.get(&(device.try_into().unwrap())) { let mut device = d.lock().unwrap(); // Find out if one of the device's BAR is being reprogrammed, and // reprogram it if needed. - if let Some(params) = device.detect_bar_reprogramming(register, data) { - if let Err(e) = pci_bus.device_reloc.move_bar( + if let Some(params) = device.detect_bar_reprogramming(register, data) + && let Err(e) = pci_bus.vm.move_bar( params.old_base, params.new_base, params.len, device.deref_mut(), - ) { - error!( - "Failed moving device BAR: {}: 0x{:x}->0x{:x}(0x{:x})", - e, params.old_base, params.new_base, params.len - ); - } + ) + { + error!( + "Failed moving device BAR: {}: 0x{:x}->0x{:x}(0x{:x})", + e, params.old_base, params.new_base, params.len + ); } // Update the register value @@ -229,7 +242,7 @@ impl PciConfigIo { } fn set_config_address(&mut self, offset: u64, data: &[u8]) { - if offset as usize + data.len() > 4 { + if u64_to_usize(offset) + data.len() > 4 { return; } let (mask, value): (u32, u32) = match data.len() { @@ -251,7 +264,7 @@ impl PciConfigIo { impl BusDevice for PciConfigIo { fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) { // Only allow reads to the register boundary. - let start = offset as usize % 4; + let start = u64_to_usize(offset) % 4; let end = start + data.len(); if end > 4 { for d in data.iter_mut() { @@ -268,7 +281,7 @@ impl BusDevice for PciConfigIo { }; for i in start..end { - data[i - start] = (value >> (i * 8)) as u8; + data[i - start] = ((value >> (i * 8)) & 0xff) as u8; } } @@ -285,12 +298,14 @@ impl BusDevice for PciConfigIo { } } +#[derive(Debug)] /// Emulates PCI memory-mapped configuration access mechanism. pub struct PciConfigMmio { pci_bus: Arc>, } impl PciConfigMmio { + /// New MMIO configuration handler object pub fn new(pci_bus: Arc>) -> Self { PciConfigMmio { pci_bus } } @@ -312,14 +327,14 @@ impl PciConfigMmio { .lock() .unwrap() .devices - .get(&(device as u32)) + .get(&(device.try_into().unwrap())) .map_or(0xffff_ffff, |d| { d.lock().unwrap().read_config_register(register) }) } fn config_space_write(&mut self, config_address: u32, offset: u64, data: &[u8]) { - if offset as usize + data.len() > 4 { + if u64_to_usize(offset) + data.len() > 4 { return; } @@ -336,23 +351,23 @@ impl PciConfigMmio { } let pci_bus = self.pci_bus.lock().unwrap(); - if let Some(d) = pci_bus.devices.get(&(device as u32)) { + if let Some(d) = pci_bus.devices.get(&(device.try_into().unwrap())) { let mut device = d.lock().unwrap(); // Find out if one of the device's BAR is being reprogrammed, and // reprogram it if needed. - if let Some(params) = device.detect_bar_reprogramming(register, data) { - if let Err(e) = pci_bus.device_reloc.move_bar( + if let Some(params) = device.detect_bar_reprogramming(register, data) + && let Err(e) = pci_bus.vm.move_bar( params.old_base, params.new_base, params.len, device.deref_mut(), - ) { - error!( - "Failed moving device BAR: {}: 0x{:x}->0x{:x}(0x{:x})", - e, params.old_base, params.new_base, params.len - ); - } + ) + { + error!( + "Failed moving device BAR: {}: 0x{:x}->0x{:x}(0x{:x})", + e, params.old_base, params.new_base, params.len + ); } // Update the register value @@ -364,7 +379,7 @@ impl PciConfigMmio { impl BusDevice for PciConfigMmio { fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) { // Only allow reads to the register boundary. - let start = offset as usize % 4; + let start = u64_to_usize(offset) % 4; let end = start + data.len(); if end > 4 || offset > u64::from(u32::MAX) { for d in data { @@ -373,9 +388,9 @@ impl BusDevice for PciConfigMmio { return; } - let value = self.config_space_read(offset as u32); + let value = self.config_space_read(offset.try_into().unwrap()); for i in start..end { - data[i - start] = (value >> (i * 8)) as u8; + data[i - start] = ((value >> (i * 8)) & 0xff) as u8; } } @@ -383,7 +398,7 @@ impl BusDevice for PciConfigMmio { if offset > u64::from(u32::MAX) { return None; } - self.config_space_write(offset as u32, offset % 4, data); + self.config_space_write(offset.try_into().unwrap(), offset % 4, data); None } @@ -438,14 +453,13 @@ mod tests { use std::sync::atomic::AtomicUsize; use std::sync::{Arc, Mutex}; - use vm_device::BusDevice; + use pci::{PciClassCode, PciMassStorageSubclass}; use super::{PciBus, PciConfigIo, PciConfigMmio, PciRoot}; - use crate::bus::{DEVICE_ID_INTEL_VIRT_PCIE_HOST, VENDOR_ID_INTEL}; - use crate::{ - DeviceRelocation, PciClassCode, PciConfiguration, PciDevice, PciHeaderType, - PciMassStorageSubclass, - }; + use crate::pci::bus::{DEVICE_ID_INTEL_VIRT_PCIE_HOST, VENDOR_ID_INTEL}; + use crate::pci::configuration::PciConfiguration; + use crate::pci::{BarReprogrammingParams, DeviceRelocation, PciDevice}; + use crate::vstate::bus::BusDevice; #[derive(Debug, Default)] struct RelocationMock { @@ -464,8 +478,8 @@ mod tests { _old_base: u64, _new_base: u64, _len: u64, - _pci_dev: &mut dyn crate::PciDevice, - ) -> std::result::Result<(), std::io::Error> { + _pci_dev: &mut dyn PciDevice, + ) -> Result<(), anyhow::Error> { self.reloc_cnt .fetch_add(1, std::sync::atomic::Ordering::SeqCst); Ok(()) @@ -476,18 +490,15 @@ mod tests { impl PciDevMock { fn new() -> Self { - let mut config = PciConfiguration::new( + let mut config = PciConfiguration::new_type0( 0x42, 0x0, 0x0, PciClassCode::MassStorage, &PciMassStorageSubclass::SerialScsiController, - None, - PciHeaderType::Device, 0x13, 0x12, None, - None, ); config.add_pci_bar(0, 0x1000, 0x1000); @@ -515,7 +526,7 @@ mod tests { &mut self, reg_idx: usize, data: &[u8], - ) -> Option { + ) -> Option { self.0.detect_bar_reprogramming(reg_idx, data) } } @@ -615,8 +626,8 @@ mod tests { let mock = Arc::new(RelocationMock::default()); let root = PciRoot::new(None); let mut bus = PciBus::new(root, mock.clone()); - bus.add_device(1, Arc::new(Mutex::new(PciDevMock::new()))) - .unwrap(); + bus.add_device(1, Arc::new(Mutex::new(PciDevMock::new()))); + let bus = Arc::new(Mutex::new(bus)); (PciConfigMmio::new(bus.clone()), PciConfigIo::new(bus), mock) } diff --git a/src/pci/src/configuration.rs b/src/vmm/src/pci/configuration.rs similarity index 58% rename from src/pci/src/configuration.rs rename to src/vmm/src/pci/configuration.rs index b9607d76611..83d74e62b94 100644 --- a/src/pci/src/configuration.rs +++ b/src/vmm/src/pci/configuration.rs @@ -8,10 +8,13 @@ use std::sync::{Arc, Mutex}; use byteorder::{ByteOrder, LittleEndian}; +use pci::{PciCapabilityId, PciClassCode, PciSubclass}; use serde::{Deserialize, Serialize}; -use crate::device::BarReprogrammingParams; -use crate::MsixConfig; +use super::BarReprogrammingParams; +use super::msix::MsixConfig; +use crate::logger::{info, warn}; +use crate::utils::u64_to_usize; // The number of 32bit registers in the config space, 4096 bytes. const NUM_CONFIGURATION_REGISTERS: usize = 1024; @@ -29,337 +32,11 @@ const CAPABILITY_LIST_HEAD_OFFSET: usize = 0x34; const FIRST_CAPABILITY_OFFSET: usize = 0x40; const CAPABILITY_MAX_OFFSET: usize = 192; -pub const PCI_CONFIGURATION_ID: &str = "pci_configuration"; - -/// Represents the types of PCI headers allowed in the configuration registers. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum PciHeaderType { - Device, - Bridge, -} - -/// Classes of PCI nodes. -#[allow(dead_code)] -#[derive(Copy, Clone)] -pub enum PciClassCode { - TooOld, - MassStorage, - NetworkController, - DisplayController, - MultimediaController, - MemoryController, - BridgeDevice, - SimpleCommunicationController, - BaseSystemPeripheral, - InputDevice, - DockingStation, - Processor, - SerialBusController, - WirelessController, - IntelligentIoController, - EncryptionController, - DataAcquisitionSignalProcessing, - Other = 0xff, -} - -impl PciClassCode { - pub fn get_register_value(self) -> u8 { - self as u8 - } -} - -/// A PCI subclass. Each class in `PciClassCode` can specify a unique set of subclasses. This trait -/// is implemented by each subclass. It allows use of a trait object to generate configurations. -pub trait PciSubclass { - /// Convert this subclass to the value used in the PCI specification. - fn get_register_value(&self) -> u8; -} - -/// Subclasses of the MultimediaController class. -#[allow(dead_code)] -#[derive(Copy, Clone)] -pub enum PciMultimediaSubclass { - VideoController = 0x00, - AudioController = 0x01, - TelephonyDevice = 0x02, - AudioDevice = 0x03, - Other = 0x80, -} - -impl PciSubclass for PciMultimediaSubclass { - fn get_register_value(&self) -> u8 { - *self as u8 - } -} - -/// Subclasses of the BridgeDevice -#[allow(dead_code)] -#[derive(Copy, Clone)] -pub enum PciBridgeSubclass { - HostBridge = 0x00, - IsaBridge = 0x01, - EisaBridge = 0x02, - McaBridge = 0x03, - PciToPciBridge = 0x04, - PcmciaBridge = 0x05, - NuBusBridge = 0x06, - CardBusBridge = 0x07, - RacEwayBridge = 0x08, - PciToPciSemiTransparentBridge = 0x09, - InfiniBrandToPciHostBridge = 0x0a, - OtherBridgeDevice = 0x80, -} - -impl PciSubclass for PciBridgeSubclass { - fn get_register_value(&self) -> u8 { - *self as u8 - } -} - -/// Subclass of the SerialBus -#[allow(dead_code)] -#[derive(Copy, Clone)] -pub enum PciSerialBusSubClass { - Firewire = 0x00, - Accessbus = 0x01, - Ssa = 0x02, - Usb = 0x03, -} - -impl PciSubclass for PciSerialBusSubClass { - fn get_register_value(&self) -> u8 { - *self as u8 - } -} - -/// Mass Storage Sub Classes -#[allow(dead_code)] -#[derive(Copy, Clone)] -pub enum PciMassStorageSubclass { - ScsiStorage = 0x00, - IdeInterface = 0x01, - FloppyController = 0x02, - IpiController = 0x03, - RaidController = 0x04, - AtaController = 0x05, - SataController = 0x06, - SerialScsiController = 0x07, - NvmController = 0x08, - MassStorage = 0x80, -} - -impl PciSubclass for PciMassStorageSubclass { - fn get_register_value(&self) -> u8 { - *self as u8 - } -} - -/// Network Controller Sub Classes -#[allow(dead_code)] -#[derive(Copy, Clone)] -pub enum PciNetworkControllerSubclass { - EthernetController = 0x00, - TokenRingController = 0x01, - FddiController = 0x02, - AtmController = 0x03, - IsdnController = 0x04, - WorldFipController = 0x05, - PicmgController = 0x06, - InfinibandController = 0x07, - FabricController = 0x08, - NetworkController = 0x80, -} - -impl PciSubclass for PciNetworkControllerSubclass { - fn get_register_value(&self) -> u8 { - *self as u8 - } -} - -/// Trait to define a PCI class programming interface -/// -/// Each combination of `PciClassCode` and `PciSubclass` can specify a -/// set of register-level programming interfaces. -/// This trait is implemented by each programming interface. -/// It allows use of a trait object to generate configurations. -pub trait PciProgrammingInterface { - /// Convert this programming interface to the value used in the PCI specification. - fn get_register_value(&self) -> u8; -} - -/// Types of PCI capabilities. -#[derive(Debug, PartialEq, Eq, Copy, Clone)] -#[allow(dead_code)] -#[allow(non_camel_case_types)] -#[repr(u8)] -pub enum PciCapabilityId { - ListId = 0, - PowerManagement = 0x01, - AcceleratedGraphicsPort = 0x02, - VitalProductData = 0x03, - SlotIdentification = 0x04, - MessageSignalledInterrupts = 0x05, - CompactPciHotSwap = 0x06, - PciX = 0x07, - HyperTransport = 0x08, - VendorSpecific = 0x09, - Debugport = 0x0A, - CompactPciCentralResourceControl = 0x0B, - PciStandardHotPlugController = 0x0C, - BridgeSubsystemVendorDeviceId = 0x0D, - AgpTargetPciPcibridge = 0x0E, - SecureDevice = 0x0F, - PciExpress = 0x10, - MsiX = 0x11, - SataDataIndexConf = 0x12, - PciAdvancedFeatures = 0x13, - PciEnhancedAllocation = 0x14, -} - -impl From for PciCapabilityId { - fn from(c: u8) -> Self { - match c { - 0 => PciCapabilityId::ListId, - 0x01 => PciCapabilityId::PowerManagement, - 0x02 => PciCapabilityId::AcceleratedGraphicsPort, - 0x03 => PciCapabilityId::VitalProductData, - 0x04 => PciCapabilityId::SlotIdentification, - 0x05 => PciCapabilityId::MessageSignalledInterrupts, - 0x06 => PciCapabilityId::CompactPciHotSwap, - 0x07 => PciCapabilityId::PciX, - 0x08 => PciCapabilityId::HyperTransport, - 0x09 => PciCapabilityId::VendorSpecific, - 0x0A => PciCapabilityId::Debugport, - 0x0B => PciCapabilityId::CompactPciCentralResourceControl, - 0x0C => PciCapabilityId::PciStandardHotPlugController, - 0x0D => PciCapabilityId::BridgeSubsystemVendorDeviceId, - 0x0E => PciCapabilityId::AgpTargetPciPcibridge, - 0x0F => PciCapabilityId::SecureDevice, - 0x10 => PciCapabilityId::PciExpress, - 0x11 => PciCapabilityId::MsiX, - 0x12 => PciCapabilityId::SataDataIndexConf, - 0x13 => PciCapabilityId::PciAdvancedFeatures, - 0x14 => PciCapabilityId::PciEnhancedAllocation, - _ => PciCapabilityId::ListId, - } - } -} - -/// Types of PCI Express capabilities. -#[derive(PartialEq, Eq, Copy, Clone, Debug)] -#[allow(dead_code)] -#[repr(u16)] -pub enum PciExpressCapabilityId { - NullCapability = 0x0000, - AdvancedErrorReporting = 0x0001, - VirtualChannelMultiFunctionVirtualChannelNotPresent = 0x0002, - DeviceSerialNumber = 0x0003, - PowerBudgeting = 0x0004, - RootComplexLinkDeclaration = 0x0005, - RootComplexInternalLinkControl = 0x0006, - RootComplexEventCollectorEndpointAssociation = 0x0007, - MultiFunctionVirtualChannel = 0x0008, - VirtualChannelMultiFunctionVirtualChannelPresent = 0x0009, - RootComplexRegisterBlock = 0x000a, - VendorSpecificExtendedCapability = 0x000b, - ConfigurationAccessCorrelation = 0x000c, - AccessControlServices = 0x000d, - AlternativeRoutingIdentificationInterpretation = 0x000e, - AddressTranslationServices = 0x000f, - SingleRootIoVirtualization = 0x0010, - DeprecatedMultiRootIoVirtualization = 0x0011, - Multicast = 0x0012, - PageRequestInterface = 0x0013, - ReservedForAmd = 0x0014, - ResizeableBar = 0x0015, - DynamicPowerAllocation = 0x0016, - ThpRequester = 0x0017, - LatencyToleranceReporting = 0x0018, - SecondaryPciExpress = 0x0019, - ProtocolMultiplexing = 0x001a, - ProcessAddressSpaceId = 0x001b, - LnRequester = 0x001c, - DownstreamPortContainment = 0x001d, - L1PmSubstates = 0x001e, - PrecisionTimeMeasurement = 0x001f, - PciExpressOverMphy = 0x0020, - FRSQueueing = 0x0021, - ReadinessTimeReporting = 0x0022, - DesignatedVendorSpecificExtendedCapability = 0x0023, - VfResizeableBar = 0x0024, - DataLinkFeature = 0x0025, - PhysicalLayerSixteenGts = 0x0026, - LaneMarginingAtTheReceiver = 0x0027, - HierarchyId = 0x0028, - NativePcieEnclosureManagement = 0x0029, - PhysicalLayerThirtyTwoGts = 0x002a, - AlternateProtocol = 0x002b, - SystemFirmwareIntermediary = 0x002c, - ShadowFunctions = 0x002d, - DataObjectExchange = 0x002e, - Reserved = 0x002f, - ExtendedCapabilitiesAbsence = 0xffff, -} - -impl From for PciExpressCapabilityId { - fn from(c: u16) -> Self { - match c { - 0x0000 => PciExpressCapabilityId::NullCapability, - 0x0001 => PciExpressCapabilityId::AdvancedErrorReporting, - 0x0002 => PciExpressCapabilityId::VirtualChannelMultiFunctionVirtualChannelNotPresent, - 0x0003 => PciExpressCapabilityId::DeviceSerialNumber, - 0x0004 => PciExpressCapabilityId::PowerBudgeting, - 0x0005 => PciExpressCapabilityId::RootComplexLinkDeclaration, - 0x0006 => PciExpressCapabilityId::RootComplexInternalLinkControl, - 0x0007 => PciExpressCapabilityId::RootComplexEventCollectorEndpointAssociation, - 0x0008 => PciExpressCapabilityId::MultiFunctionVirtualChannel, - 0x0009 => PciExpressCapabilityId::VirtualChannelMultiFunctionVirtualChannelPresent, - 0x000a => PciExpressCapabilityId::RootComplexRegisterBlock, - 0x000b => PciExpressCapabilityId::VendorSpecificExtendedCapability, - 0x000c => PciExpressCapabilityId::ConfigurationAccessCorrelation, - 0x000d => PciExpressCapabilityId::AccessControlServices, - 0x000e => PciExpressCapabilityId::AlternativeRoutingIdentificationInterpretation, - 0x000f => PciExpressCapabilityId::AddressTranslationServices, - 0x0010 => PciExpressCapabilityId::SingleRootIoVirtualization, - 0x0011 => PciExpressCapabilityId::DeprecatedMultiRootIoVirtualization, - 0x0012 => PciExpressCapabilityId::Multicast, - 0x0013 => PciExpressCapabilityId::PageRequestInterface, - 0x0014 => PciExpressCapabilityId::ReservedForAmd, - 0x0015 => PciExpressCapabilityId::ResizeableBar, - 0x0016 => PciExpressCapabilityId::DynamicPowerAllocation, - 0x0017 => PciExpressCapabilityId::ThpRequester, - 0x0018 => PciExpressCapabilityId::LatencyToleranceReporting, - 0x0019 => PciExpressCapabilityId::SecondaryPciExpress, - 0x001a => PciExpressCapabilityId::ProtocolMultiplexing, - 0x001b => PciExpressCapabilityId::ProcessAddressSpaceId, - 0x001c => PciExpressCapabilityId::LnRequester, - 0x001d => PciExpressCapabilityId::DownstreamPortContainment, - 0x001e => PciExpressCapabilityId::L1PmSubstates, - 0x001f => PciExpressCapabilityId::PrecisionTimeMeasurement, - 0x0020 => PciExpressCapabilityId::PciExpressOverMphy, - 0x0021 => PciExpressCapabilityId::FRSQueueing, - 0x0022 => PciExpressCapabilityId::ReadinessTimeReporting, - 0x0023 => PciExpressCapabilityId::DesignatedVendorSpecificExtendedCapability, - 0x0024 => PciExpressCapabilityId::VfResizeableBar, - 0x0025 => PciExpressCapabilityId::DataLinkFeature, - 0x0026 => PciExpressCapabilityId::PhysicalLayerSixteenGts, - 0x0027 => PciExpressCapabilityId::LaneMarginingAtTheReceiver, - 0x0028 => PciExpressCapabilityId::HierarchyId, - 0x0029 => PciExpressCapabilityId::NativePcieEnclosureManagement, - 0x002a => PciExpressCapabilityId::PhysicalLayerThirtyTwoGts, - 0x002b => PciExpressCapabilityId::AlternateProtocol, - 0x002c => PciExpressCapabilityId::SystemFirmwareIntermediary, - 0x002d => PciExpressCapabilityId::ShadowFunctions, - 0x002e => PciExpressCapabilityId::DataObjectExchange, - 0xffff => PciExpressCapabilityId::ExtendedCapabilitiesAbsence, - _ => PciExpressCapabilityId::Reserved, - } - } -} - /// A PCI capability list. Devices can optionally specify capabilities in their configuration space. pub trait PciCapability { + /// Bytes of the PCI capability fn bytes(&self) -> &[u8]; + /// Id of the PCI capability fn id(&self) -> PciCapabilityId; } @@ -388,6 +65,7 @@ struct PciBar { used: bool, } +/// PCI configuration space state for (de)serialization #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PciConfigurationState { registers: Vec, @@ -397,6 +75,7 @@ pub struct PciConfigurationState { msix_cap_reg_idx: Option, } +#[derive(Debug)] /// Contains the configuration space of a PCI node. /// /// See the [specification](https://en.wikipedia.org/wiki/PCI_configuration_space). @@ -411,104 +90,58 @@ pub struct PciConfiguration { msix_config: Option>>, } -/// See pci_regs.h in kernel -#[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Debug)] -pub enum PciBarRegionType { - Memory32BitRegion = 0, - IoRegion = 0x01, - Memory64BitRegion = 0x04, -} - -#[derive(Debug, Copy, Clone, Serialize, Deserialize)] -pub enum PciBarPrefetchable { - NotPrefetchable = 0, - Prefetchable = 0x08, -} - -impl From for bool { - fn from(val: PciBarPrefetchable) -> Self { - match val { - PciBarPrefetchable::NotPrefetchable => false, - PciBarPrefetchable::Prefetchable => true, - } - } -} - -#[derive(Debug, Copy, Clone, Serialize, Deserialize)] -pub struct PciBarConfiguration { - pub addr: u64, - pub size: u64, - pub idx: usize, - pub region_type: PciBarRegionType, - pub prefetchable: PciBarPrefetchable, -} - impl PciConfiguration { #[allow(clippy::too_many_arguments)] - pub fn new( + /// Create a new type 0 PCI configuration + pub fn new_type0( vendor_id: u16, device_id: u16, revision_id: u8, class_code: PciClassCode, subclass: &dyn PciSubclass, - programming_interface: Option<&dyn PciProgrammingInterface>, - header_type: PciHeaderType, subsystem_vendor_id: u16, subsystem_id: u16, msix_config: Option>>, - state: Option, ) -> Self { - let (registers, writable_bits, bars, last_capability, msix_cap_reg_idx) = - if let Some(state) = state { - ( - state.registers.try_into().unwrap(), - state.writable_bits.try_into().unwrap(), - state.bars.try_into().unwrap(), - state.last_capability, - state.msix_cap_reg_idx, - ) - } else { - let mut registers = [0u32; NUM_CONFIGURATION_REGISTERS]; - let mut writable_bits = [0u32; NUM_CONFIGURATION_REGISTERS]; - registers[0] = (u32::from(device_id) << 16) | u32::from(vendor_id); - // TODO(dverkamp): Status should be write-1-to-clear - writable_bits[1] = 0x0000_ffff; // Status (r/o), command (r/w) - let pi = if let Some(pi) = programming_interface { - pi.get_register_value() - } else { - 0 - }; - registers[2] = (u32::from(class_code.get_register_value()) << 24) - | (u32::from(subclass.get_register_value()) << 16) - | (u32::from(pi) << 8) - | u32::from(revision_id); - writable_bits[3] = 0x0000_00ff; // Cacheline size (r/w) - - // We only every create device types. No bridges used at the moment - assert_eq!(header_type, PciHeaderType::Device); - registers[3] = 0x0000_0000; // Header type 0 (device) - writable_bits[15] = 0x0000_00ff; // IRQ line (r/w) - registers[11] = (u32::from(subsystem_id) << 16) | u32::from(subsystem_vendor_id); - - ( - registers, - writable_bits, - [PciBar::default(); NUM_BAR_REGS], - None, - None, - ) - }; + let mut registers = [0u32; NUM_CONFIGURATION_REGISTERS]; + let mut writable_bits = [0u32; NUM_CONFIGURATION_REGISTERS]; + registers[0] = (u32::from(device_id) << 16) | u32::from(vendor_id); + // TODO(dverkamp): Status should be write-1-to-clear + writable_bits[1] = 0x0000_ffff; // Status (r/o), command (r/w) + registers[2] = (u32::from(class_code.get_register_value()) << 24) + | (u32::from(subclass.get_register_value()) << 16) + | u32::from(revision_id); + writable_bits[3] = 0x0000_00ff; // Cacheline size (r/w) + registers[3] = 0x0000_0000; // Header type 0 (device) + writable_bits[15] = 0x0000_00ff; // IRQ line (r/w) + registers[11] = (u32::from(subsystem_id) << 16) | u32::from(subsystem_vendor_id); PciConfiguration { registers, writable_bits, - bars, - last_capability, - msix_cap_reg_idx, + bars: [PciBar::default(); NUM_BAR_REGS], + last_capability: None, + msix_cap_reg_idx: None, + msix_config, + } + } + + /// Create a type 0 PCI configuration from snapshot state + pub fn type0_from_state( + state: PciConfigurationState, + msix_config: Option>>, + ) -> Self { + PciConfiguration { + registers: state.registers.try_into().unwrap(), + writable_bits: state.writable_bits.try_into().unwrap(), + bars: state.bars.try_into().unwrap(), + last_capability: state.last_capability, + msix_cap_reg_idx: state.msix_cap_reg_idx, msix_config, } } + /// Create PCI configuration space state pub fn state(&self) -> PciConfigurationState { PciConfigurationState { registers: self.registers.to_vec(), @@ -638,9 +271,9 @@ impl PciConfiguration { // // | Bit 3 | Bits 2-1 | Bit 0 | // | Prefetchable | type | Always 0 | - self.registers[reg_idx] = (((addr & 0xffff_ffff) as u32) & BAR_MEM_ADDR_MASK) - | (PciBarPrefetchable::NotPrefetchable as u32) - | (PciBarRegionType::Memory64BitRegion as u32); + // + // Non-prefetchable, 64 bits BAR region + self.registers[reg_idx] = (((addr & 0xffff_ffff) as u32) & BAR_MEM_ADDR_MASK) | 4u32; self.writable_bits[reg_idx] = BAR_MEM_ADDR_MASK; self.bars[bar_idx].addr = self.registers[reg_idx]; self.bars[bar_idx].used = true; @@ -676,7 +309,7 @@ impl PciConfiguration { let end_offset = cap_offset.checked_add(total_len).unwrap(); assert!(end_offset <= CAPABILITY_MAX_OFFSET); self.registers[STATUS_REG] |= STATUS_REG_CAPABILITIES_USED_MASK; - self.write_byte_internal(tail_offset, cap_offset as u8, false); + self.write_byte_internal(tail_offset, cap_offset.try_into().unwrap(), false); self.write_byte_internal(cap_offset, cap_data.id() as u8, false); self.write_byte_internal(cap_offset + 1, 0, false); // Next pointer. for (i, byte) in cap_data.bytes().iter().enumerate() { @@ -704,39 +337,40 @@ impl PciConfiguration { (next + 3) & !3 } + /// Write a PCI configuration register pub fn write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8]) { if reg_idx >= NUM_CONFIGURATION_REGISTERS { return; } - if offset as usize + data.len() > 4 { + if u64_to_usize(offset) + data.len() > 4 { return; } // Handle potential write to MSI-X message control register - if let Some(msix_cap_reg_idx) = self.msix_cap_reg_idx { - if let Some(msix_config) = &self.msix_config { - if msix_cap_reg_idx == reg_idx && offset == 2 && data.len() == 2 { - // 2-bytes write in the Message Control field - msix_config - .lock() - .unwrap() - .set_msg_ctl(LittleEndian::read_u16(data)); - } else if msix_cap_reg_idx == reg_idx && offset == 0 && data.len() == 4 { - // 4 bytes write at the beginning. Ignore the first 2 bytes which are the - // capability id and next capability pointer - msix_config - .lock() - .unwrap() - .set_msg_ctl((LittleEndian::read_u32(data) >> 16) as u16); - } + if let Some(msix_cap_reg_idx) = self.msix_cap_reg_idx + && let Some(msix_config) = &self.msix_config + { + if msix_cap_reg_idx == reg_idx && offset == 2 && data.len() == 2 { + // 2-bytes write in the Message Control field + msix_config + .lock() + .unwrap() + .set_msg_ctl(LittleEndian::read_u16(data)); + } else if msix_cap_reg_idx == reg_idx && offset == 0 && data.len() == 4 { + // 4 bytes write at the beginning. Ignore the first 2 bytes which are the + // capability id and next capability pointer + msix_config + .lock() + .unwrap() + .set_msg_ctl((LittleEndian::read_u32(data) >> 16) as u16); } } match data.len() { - 1 => self.write_byte(reg_idx * 4 + offset as usize, data[0]), + 1 => self.write_byte(reg_idx * 4 + u64_to_usize(offset), data[0]), 2 => self.write_word( - reg_idx * 4 + offset as usize, + reg_idx * 4 + u64_to_usize(offset), u16::from(data[0]) | (u16::from(data[1]) << 8), ), 4 => self.write_reg(reg_idx, LittleEndian::read_u32(data)), @@ -744,6 +378,7 @@ impl PciConfiguration { } } + /// Detect whether the guest wants to reprogram the address of a BAR pub fn detect_bar_reprogramming( &mut self, reg_idx: usize, @@ -811,25 +446,13 @@ impl PciConfiguration { } } -impl Default for PciBarConfiguration { - fn default() -> Self { - PciBarConfiguration { - idx: 0, - addr: 0, - size: 0, - region_type: PciBarRegionType::Memory64BitRegion, - prefetchable: PciBarPrefetchable::NotPrefetchable, - } - } -} - #[cfg(test)] mod tests { - + use pci::PciMultimediaSubclass; use vm_memory::ByteValued; use super::*; - use crate::MsixCap; + use crate::pci::msix::MsixCap; #[repr(C, packed)] #[derive(Clone, Copy, Default)] @@ -877,14 +500,14 @@ mod tests { #[test] #[should_panic] fn test_too_big_capability() { - let mut cfg = default_config(); + let mut cfg = default_pci_config(); cfg.add_capability(&BadCap::new(127)); } #[test] #[should_panic] fn test_capability_space_overflow() { - let mut cfg = default_config(); + let mut cfg = default_pci_config(); cfg.add_capability(&BadCap::new(62)); cfg.add_capability(&BadCap::new(62)); cfg.add_capability(&BadCap::new(0)); @@ -892,7 +515,7 @@ mod tests { #[test] fn test_add_capability() { - let mut cfg = default_config(); + let mut cfg = default_pci_config(); // Reset capabilities cfg.last_capability = None; @@ -916,7 +539,7 @@ mod tests { // Verify the contents of the capabilities. let cap1_data = cfg.read_reg(cap1_offset / 4); assert_eq!(cap1_data & 0xFF, 0x09); // capability ID - assert_eq!((cap1_data >> 8) & 0xFF, cap2_offset as u32); // next capability pointer + assert_eq!((cap1_data >> 8) & 0xFF, u32::try_from(cap2_offset).unwrap()); // next capability pointer assert_eq!((cap1_data >> 16) & 0xFF, 0x04); // cap1.len assert_eq!((cap1_data >> 24) & 0xFF, 0xAA); // cap1.foo @@ -929,7 +552,7 @@ mod tests { #[test] fn test_msix_capability() { - let mut cfg = default_config(); + let mut cfg = default_pci_config(); // Information about the MSI-X capability layout: https://wiki.osdev.org/PCI#Enabling_MSI-X let msix_cap = MsixCap::new( @@ -1016,40 +639,29 @@ mod tests { assert_eq!(cfg.read_reg(cap_reg + 2), pba_offset); } - #[derive(Copy, Clone)] - enum TestPi { - Test = 0x5a, - } - - impl PciProgrammingInterface for TestPi { - fn get_register_value(&self) -> u8 { - *self as u8 - } - } - - #[test] - fn class_code() { - let cfg = PciConfiguration::new( + fn default_pci_config() -> PciConfiguration { + PciConfiguration::new_type0( 0x1234, 0x5678, 0x1, PciClassCode::MultimediaController, &PciMultimediaSubclass::AudioController, - Some(&TestPi::Test), - PciHeaderType::Device, 0xABCD, 0x2468, None, - None, - ); + ) + } + #[test] + fn class_code() { + let cfg = default_pci_config(); let class_reg = cfg.read_reg(2); let class_code = (class_reg >> 24) & 0xFF; let subclass = (class_reg >> 16) & 0xFF; let prog_if = (class_reg >> 8) & 0xFF; assert_eq!(class_code, 0x04); assert_eq!(subclass, 0x01); - assert_eq!(prog_if, 0x5a); + assert_eq!(prog_if, 0x0); } #[test] @@ -1085,54 +697,38 @@ mod tests { assert_eq!(decode_64_bits_bar_size(hi, lo), 0xffff_ffff_ffff_fff0); } - fn default_config() -> PciConfiguration { - PciConfiguration::new( - 0x42, - 0x0, - 0x0, - PciClassCode::MassStorage, - &PciMassStorageSubclass::SerialScsiController, - None, - PciHeaderType::Device, - 0x13, - 0x12, - None, - None, - ) - } - #[test] #[should_panic] fn test_bar_size_no_power_of_two() { - let mut pci_config = default_config(); + let mut pci_config = default_pci_config(); pci_config.add_pci_bar(0, 0x1000, 0x1001); } #[test] #[should_panic] fn test_bad_bar_index() { - let mut pci_config = default_config(); + let mut pci_config = default_pci_config(); pci_config.add_pci_bar(NUM_BAR_REGS, 0x1000, 0x1000); } #[test] #[should_panic] fn test_bad_64bit_bar_index() { - let mut pci_config = default_config(); + let mut pci_config = default_pci_config(); pci_config.add_pci_bar(NUM_BAR_REGS - 1, 0x1000, 0x1000); } #[test] #[should_panic] fn test_bar_size_overflows() { - let mut pci_config = default_config(); + let mut pci_config = default_pci_config(); pci_config.add_pci_bar(0, u64::MAX, 0x2); } #[test] #[should_panic] fn test_lower_bar_free_upper_used() { - let mut pci_config = default_config(); + let mut pci_config = default_pci_config(); pci_config.add_pci_bar(1, 0x1000, 0x1000); pci_config.add_pci_bar(0, 0x1000, 0x1000); } @@ -1140,7 +736,7 @@ mod tests { #[test] #[should_panic] fn test_lower_bar_used() { - let mut pci_config = default_config(); + let mut pci_config = default_pci_config(); pci_config.add_pci_bar(0, 0x1000, 0x1000); pci_config.add_pci_bar(0, 0x1000, 0x1000); } @@ -1148,14 +744,14 @@ mod tests { #[test] #[should_panic] fn test_upper_bar_used() { - let mut pci_config = default_config(); + let mut pci_config = default_pci_config(); pci_config.add_pci_bar(0, 0x1000, 0x1000); pci_config.add_pci_bar(1, 0x1000, 0x1000); } #[test] fn test_add_pci_bar() { - let mut pci_config = default_config(); + let mut pci_config = default_pci_config(); pci_config.add_pci_bar(0, 0x1_0000_0000, 0x1000); @@ -1168,19 +764,7 @@ mod tests { #[test] fn test_access_invalid_reg() { - let mut pci_config = PciConfiguration::new( - 0x42, - 0x0, - 0x0, - PciClassCode::MassStorage, - &PciMassStorageSubclass::SerialScsiController, - None, - PciHeaderType::Device, - 0x13, - 0x12, - None, - None, - ); + let mut pci_config = default_pci_config(); // Can't read past the end of the configuration space assert_eq!( @@ -1214,53 +798,55 @@ mod tests { #[test] fn test_detect_bar_reprogramming() { - let mut pci_config = PciConfiguration::new( - 0x42, - 0x0, - 0x0, - PciClassCode::MassStorage, - &PciMassStorageSubclass::SerialScsiController, - None, - PciHeaderType::Device, - 0x13, - 0x12, - None, - None, - ); + let mut pci_config = default_pci_config(); // Trying to reprogram with something less than 4 bytes (length of the address) should fail - assert!(pci_config - .detect_bar_reprogramming(BAR0_REG, &[0x13]) - .is_none()); - assert!(pci_config - .detect_bar_reprogramming(BAR0_REG, &[0x13, 0x12]) - .is_none()); - assert!(pci_config - .detect_bar_reprogramming(BAR0_REG, &[0x13, 0x12]) - .is_none()); - assert!(pci_config - .detect_bar_reprogramming(BAR0_REG, &[0x13, 0x12, 0x16]) - .is_none()); + assert!( + pci_config + .detect_bar_reprogramming(BAR0_REG, &[0x13]) + .is_none() + ); + assert!( + pci_config + .detect_bar_reprogramming(BAR0_REG, &[0x13, 0x12]) + .is_none() + ); + assert!( + pci_config + .detect_bar_reprogramming(BAR0_REG, &[0x13, 0x12]) + .is_none() + ); + assert!( + pci_config + .detect_bar_reprogramming(BAR0_REG, &[0x13, 0x12, 0x16]) + .is_none() + ); // Writing all 1s is a special case where we're actually asking for the size of the BAR - assert!(pci_config - .detect_bar_reprogramming(BAR0_REG, &u32::to_le_bytes(0xffff_ffff)) - .is_none()); + assert!( + pci_config + .detect_bar_reprogramming(BAR0_REG, &u32::to_le_bytes(0xffff_ffff)) + .is_none() + ); // Trying to reprogram a BAR that hasn't be initialized does nothing for reg_idx in BAR0_REG..BAR0_REG + NUM_BAR_REGS { - assert!(pci_config - .detect_bar_reprogramming(reg_idx, &u32::to_le_bytes(0x1312_4243)) - .is_none()); + assert!( + pci_config + .detect_bar_reprogramming(reg_idx, &u32::to_le_bytes(0x1312_4243)) + .is_none() + ); } // Reprogramming of a 64bit BAR pci_config.add_pci_bar(0, 0x13_1200_0000, 0x8000); // First we write the lower 32 bits and this shouldn't cause any reprogramming - assert!(pci_config - .detect_bar_reprogramming(BAR0_REG, &u32::to_le_bytes(0x4200_0000)) - .is_none()); + assert!( + pci_config + .detect_bar_reprogramming(BAR0_REG, &u32::to_le_bytes(0x4200_0000)) + .is_none() + ); pci_config.write_config_register(BAR0_REG, 0, &u32::to_le_bytes(0x4200_0000)); // Writing the upper 32 bits should trigger the reprogramming @@ -1287,17 +873,21 @@ mod tests { pci_config.write_config_register(BAR0_REG + 1, 0, &u32::to_le_bytes(0x1312)); // Attempting to reprogram the BAR with the same address should not have any effect - assert!(pci_config - .detect_bar_reprogramming(BAR0_REG, &u32::to_le_bytes(0x4200_0000)) - .is_none()); - assert!(pci_config - .detect_bar_reprogramming(BAR0_REG + 1, &u32::to_le_bytes(0x1312)) - .is_none()); + assert!( + pci_config + .detect_bar_reprogramming(BAR0_REG, &u32::to_le_bytes(0x4200_0000)) + .is_none() + ); + assert!( + pci_config + .detect_bar_reprogramming(BAR0_REG + 1, &u32::to_le_bytes(0x1312)) + .is_none() + ); } #[test] fn test_rom_bar() { - let mut pci_config = default_config(); + let mut pci_config = default_pci_config(); // ROM BAR address should always be 0 and writes to it shouldn't do anything assert_eq!(pci_config.read_reg(ROM_BAR_REG), 0); diff --git a/src/pci/src/device.rs b/src/vmm/src/pci/mod.rs similarity index 83% rename from src/pci/src/device.rs rename to src/vmm/src/pci/mod.rs index fe9e676eb03..2d4826349f8 100644 --- a/src/pci/src/device.rs +++ b/src/vmm/src/pci/mod.rs @@ -5,24 +5,28 @@ // // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause -use std::sync::{Arc, Barrier}; -use std::{io, result}; +/// PCI bus logic +pub mod bus; +/// PCI configuration space handling +pub mod configuration; +/// MSI-X logic +pub mod msix; -#[derive(Debug, thiserror::Error, displaydoc::Display)] -pub enum Error { - /// Allocating space for an IO BAR failed, size={0}. - IoAllocationFailed(u64), - /// Expected resource not found. - MissingResource, -} +use std::fmt::Debug; +use std::sync::{Arc, Barrier}; #[derive(Clone, Copy, Debug, PartialEq, Eq)] +/// Parameters for performing a BAR reprogramming operation pub struct BarReprogrammingParams { + /// Previous address of the BAR pub old_base: u64, + /// New address of the BAR pub new_base: u64, + /// Size of the BAR pub len: u64, } +/// Common logic of all PCI devices pub trait PciDevice: Send { /// Sets a register in the configuration space. /// * `reg_idx` - The index of the config register to modify. @@ -55,7 +59,7 @@ pub trait PciDevice: Send { None } /// Relocates the BAR to a different address in guest address space. - fn move_bar(&mut self, _old_base: u64, _new_base: u64) -> result::Result<(), io::Error> { + fn move_bar(&mut self, _old_base: u64, _new_base: u64) -> Result<(), anyhow::Error> { Ok(()) } } @@ -71,5 +75,5 @@ pub trait DeviceRelocation: Send + Sync { new_base: u64, len: u64, pci_dev: &mut dyn PciDevice, - ) -> result::Result<(), io::Error>; + ) -> Result<(), anyhow::Error>; } diff --git a/src/pci/src/msix.rs b/src/vmm/src/pci/msix.rs similarity index 66% rename from src/pci/src/msix.rs rename to src/vmm/src/pci/msix.rs index 50abeaf9737..16405fe6217 100644 --- a/src/pci/src/msix.rs +++ b/src/vmm/src/pci/msix.rs @@ -1,19 +1,21 @@ +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // Copyright © 2019 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause // use std::sync::Arc; -use std::{io, result}; use byteorder::{ByteOrder, LittleEndian}; +use pci::PciCapabilityId; use serde::{Deserialize, Serialize}; -use vm_device::interrupt::{ - InterruptIndex, InterruptSourceConfig, InterruptSourceGroup, MsiIrqSourceConfig, -}; use vm_memory::ByteValued; -use crate::{PciCapability, PciCapabilityId}; +use crate::Vm; +use crate::logger::{debug, error, warn}; +use crate::pci::configuration::PciCapability; +use crate::snapshot::Persist; +use crate::vstate::interrupts::{InterruptError, MsixVectorConfig, MsixVectorGroup}; const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048; const MSIX_TABLE_ENTRIES_MODULO: u64 = 16; @@ -22,23 +24,21 @@ const BITS_PER_PBA_ENTRY: usize = 64; const FUNCTION_MASK_BIT: u8 = 14; const MSIX_ENABLE_BIT: u8 = 15; -#[derive(Debug, thiserror::Error, displaydoc::Display)] -pub enum Error { - /// Failed enabling the interrupt route. - EnableInterruptRoute(io::Error), - /// Failed updating the interrupt route. - UpdateInterruptRoute(io::Error), -} - #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] +/// MSI-X table entries pub struct MsixTableEntry { + /// Lower 32 bits of the vector address pub msg_addr_lo: u32, + /// Upper 32 bits of the vector address pub msg_addr_hi: u32, + /// Vector data pub msg_data: u32, + /// Enable/Disable and (un)masking control pub vector_ctl: u32, } impl MsixTableEntry { + /// Returns `true` if the vector is masked pub fn masked(&self) -> bool { self.vector_ctl & 0x1 == 0x1 } @@ -56,19 +56,28 @@ impl Default for MsixTableEntry { } #[derive(Debug, Clone, Serialize, Deserialize)] +/// State for (de)serializing MSI-X configuration pub struct MsixConfigState { table_entries: Vec, pba_entries: Vec, masked: bool, enabled: bool, + vectors: Vec, } +/// MSI-X configuration pub struct MsixConfig { + /// Vector table entries pub table_entries: Vec, + /// Pending bit array pub pba_entries: Vec, + /// Id of the device using this set of vectors pub devid: u32, - pub interrupt_source_group: Arc, + /// Interrupts vectors used + pub vectors: Arc, + /// Whether vectors are masked pub masked: bool, + /// Whether vectors are enabled pub enabled: bool, } @@ -85,78 +94,73 @@ impl std::fmt::Debug for MsixConfig { } impl MsixConfig { - pub fn new( - msix_vectors: u16, - interrupt_source_group: Arc, - devid: u32, - state: Option, - ) -> result::Result { - assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE); - - let (table_entries, pba_entries, masked, enabled) = if let Some(state) = state { - if state.enabled && !state.masked { - for (idx, table_entry) in state.table_entries.iter().enumerate() { - if table_entry.masked() { - continue; - } + /// Create a new MSI-X configuration + pub fn new(vectors: Arc, devid: u32) -> Self { + assert!(vectors.num_vectors() <= MAX_MSIX_VECTORS_PER_DEVICE); - let config = MsiIrqSourceConfig { - high_addr: table_entry.msg_addr_hi, - low_addr: table_entry.msg_addr_lo, - data: table_entry.msg_data, - devid, - }; + let mut table_entries: Vec = Vec::new(); + table_entries.resize_with(vectors.num_vectors() as usize, Default::default); + let mut pba_entries: Vec = Vec::new(); + let num_pba_entries: usize = (vectors.num_vectors() as usize).div_ceil(BITS_PER_PBA_ENTRY); + pba_entries.resize_with(num_pba_entries, Default::default); + + MsixConfig { + table_entries, + pba_entries, + devid, + vectors, + masked: true, + enabled: false, + } + } - interrupt_source_group - .update( - idx as InterruptIndex, - InterruptSourceConfig::MsiIrq(config), - state.masked, - true, - ) - .map_err(Error::UpdateInterruptRoute)?; - - interrupt_source_group - .enable() - .map_err(Error::EnableInterruptRoute)?; + /// Create an MSI-X configuration from snapshot state + pub fn from_state( + state: MsixConfigState, + vm: Arc, + devid: u32, + ) -> Result { + let vectors = Arc::new(MsixVectorGroup::restore(vm, &state.vectors)?); + if state.enabled && !state.masked { + for (idx, table_entry) in state.table_entries.iter().enumerate() { + if table_entry.masked() { + continue; } - } - ( - state.table_entries, - state.pba_entries, - state.masked, - state.enabled, - ) - } else { - let mut table_entries: Vec = Vec::new(); - table_entries.resize_with(msix_vectors as usize, Default::default); - let mut pba_entries: Vec = Vec::new(); - let num_pba_entries: usize = (msix_vectors as usize).div_ceil(BITS_PER_PBA_ENTRY); - pba_entries.resize_with(num_pba_entries, Default::default); + let config = MsixVectorConfig { + high_addr: table_entry.msg_addr_hi, + low_addr: table_entry.msg_addr_lo, + data: table_entry.msg_data, + devid, + }; - (table_entries, pba_entries, true, false) - }; + vectors.update(idx, config, state.masked, true)?; + vectors.enable()?; + } + } Ok(MsixConfig { - table_entries, - pba_entries, + table_entries: state.table_entries, + pba_entries: state.pba_entries, devid, - interrupt_source_group, - masked, - enabled, + vectors, + masked: state.masked, + enabled: state.enabled, }) } + /// Create the state object for serializing MSI-X vectors pub fn state(&self) -> MsixConfigState { MsixConfigState { table_entries: self.table_entries.clone(), pba_entries: self.pba_entries.clone(), masked: self.masked, enabled: self.enabled, + vectors: self.vectors.save(), } } + /// Set the MSI-X control message (enable/disable, (un)mask) pub fn set_msg_ctl(&mut self, reg: u16) { let old_masked = self.masked; let old_enabled = self.enabled; @@ -169,25 +173,20 @@ impl MsixConfig { if self.enabled && !self.masked { debug!("MSI-X enabled for device 0x{:x}", self.devid); for (idx, table_entry) in self.table_entries.iter().enumerate() { - let config = MsiIrqSourceConfig { + let config = MsixVectorConfig { high_addr: table_entry.msg_addr_hi, low_addr: table_entry.msg_addr_lo, data: table_entry.msg_data, devid: self.devid, }; - if let Err(e) = self.interrupt_source_group.update( - idx as InterruptIndex, - InterruptSourceConfig::MsiIrq(config), - table_entry.masked(), - true, - ) { + if let Err(e) = self.vectors.update(idx, config, table_entry.masked(), true) { error!("Failed updating vector: {:?}", e); } } } else if old_enabled || !old_masked { debug!("MSI-X disabled for device 0x{:x}", self.devid); - if let Err(e) = self.interrupt_source_group.disable() { + if let Err(e) = self.vectors.disable() { error!("Failed disabling irq_fd: {:?}", e); } } @@ -199,13 +198,14 @@ impl MsixConfig { // masked. if old_masked && !self.masked { for (index, entry) in self.table_entries.clone().iter().enumerate() { - if !entry.masked() && self.get_pba_bit(index as u16) == 1 { + if !entry.masked() && self.get_pba_bit(index.try_into().unwrap()) == 1 { self.inject_msix_and_clear_pba(index); } } } } + /// Read an MSI-X table entry pub fn read_table(&self, offset: u64, data: &mut [u8]) { assert!(data.len() <= 8); @@ -258,6 +258,7 @@ impl MsixConfig { } } + /// Write an MSI-X table entry pub fn write_table(&mut self, offset: u64, data: &[u8]) { assert!(data.len() <= 8); @@ -314,19 +315,17 @@ impl MsixConfig { // this is safe because if the entry is masked (starts masked as per spec) // in the table then it won't be triggered. if self.enabled && !self.masked && !table_entry.masked() { - let config = MsiIrqSourceConfig { + let config = MsixVectorConfig { high_addr: table_entry.msg_addr_hi, low_addr: table_entry.msg_addr_lo, data: table_entry.msg_data, devid: self.devid, }; - if let Err(e) = self.interrupt_source_group.update( - index as InterruptIndex, - InterruptSourceConfig::MsiIrq(config), - table_entry.masked(), - true, - ) { + if let Err(e) = self + .vectors + .update(index, config, table_entry.masked(), true) + { error!("Failed updating vector: {:?}", e); } } @@ -344,12 +343,13 @@ impl MsixConfig { && self.enabled && old_entry.masked() && !table_entry.masked() - && self.get_pba_bit(index as u16) == 1 + && self.get_pba_bit(index.try_into().unwrap()) == 1 { self.inject_msix_and_clear_pba(index); } } + /// Read a pending bit array entry pub fn read_pba(&self, offset: u64, data: &mut [u8]) { let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize; let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO; @@ -391,10 +391,12 @@ impl MsixConfig { } } + /// Write a pending bit array entry pub fn write_pba(&mut self, _offset: u64, _data: &[u8]) { error!("Pending Bit Array is read only"); } + /// Set PBA bit for a vector pub fn set_pba_bit(&mut self, vector: u16, reset: bool) { assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); @@ -414,6 +416,7 @@ impl MsixConfig { } } + /// Get the PBA bit for a vector fn get_pba_bit(&self, vector: u16) -> u8 { assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); @@ -427,38 +430,36 @@ impl MsixConfig { ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8 } + /// Inject an MSI-X interrupt and clear the PBA bit for a vector fn inject_msix_and_clear_pba(&mut self, vector: usize) { // Inject the MSI message - match self - .interrupt_source_group - .trigger(vector as InterruptIndex) - { + match self.vectors.trigger(vector) { Ok(_) => debug!("MSI-X injected on vector control flip"), Err(e) => error!("failed to inject MSI-X: {}", e), } // Clear the bit from PBA - self.set_pba_bit(vector as u16, true); + self.set_pba_bit(vector.try_into().unwrap(), true); } } -#[allow(dead_code)] #[repr(C, packed)] -#[derive(Clone, Copy, Default, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)] +/// MSI-X PCI capability pub struct MsixCap { - // Message Control Register - // 10-0: MSI-X Table size - // 13-11: Reserved - // 14: Mask. Mask all MSI-X when set. - // 15: Enable. Enable all MSI-X when set. + /// Message Control Register + /// 10-0: MSI-X Table size + /// 13-11: Reserved + /// 14: Mask. Mask all MSI-X when set. + /// 15: Enable. Enable all MSI-X when set. pub msg_ctl: u16, - // Table. Contains the offset and the BAR indicator (BIR) - // 2-0: Table BAR indicator (BIR). Can be 0 to 5. - // 31-3: Table offset in the BAR pointed by the BIR. + /// Table. Contains the offset and the BAR indicator (BIR) + /// 2-0: Table BAR indicator (BIR). Can be 0 to 5. + /// 31-3: Table offset in the BAR pointed by the BIR. pub table: u32, - // Pending Bit Array. Contains the offset and the BAR indicator (BIR) - // 2-0: PBA BAR indicator (BIR). Can be 0 to 5. - // 31-3: PBA offset in the BAR pointed by the BIR. + /// Pending Bit Array. Contains the offset and the BAR indicator (BIR) + /// 2-0: PBA BAR indicator (BIR). Can be 0 to 5. + /// 31-3: PBA offset in the BAR pointed by the BIR. pub pba: u32, } @@ -476,6 +477,7 @@ impl PciCapability for MsixCap { } impl MsixCap { + /// Create a new MSI-X capability object pub fn new( table_pci_bar: u8, table_size: u16, @@ -498,76 +500,25 @@ impl MsixCap { #[cfg(test)] mod tests { - use std::sync::atomic::{AtomicUsize, Ordering}; - - use vmm_sys_util::eventfd::EventFd; - use super::*; + use crate::builder::tests::default_vmm; + use crate::logger::{IncMetric, METRICS}; + use crate::{Vm, check_metric_after_block}; - #[derive(Debug)] - struct MockInterrupt { - trigger_cnt: [AtomicUsize; 2], - update_cnt: [AtomicUsize; 2], - event_fd: [EventFd; 2], - } - - impl MockInterrupt { - fn new() -> Self { - MockInterrupt { - trigger_cnt: [AtomicUsize::new(0), AtomicUsize::new(0)], - update_cnt: [AtomicUsize::new(0), AtomicUsize::new(0)], - event_fd: [ - EventFd::new(libc::EFD_NONBLOCK).unwrap(), - EventFd::new(libc::EFD_NONBLOCK).unwrap(), - ], - } - } - - fn interrupt_cnt(&self, index: InterruptIndex) -> usize { - self.trigger_cnt[index as usize].load(Ordering::SeqCst) - } - - fn update_cnt(&self, index: InterruptIndex) -> usize { - self.update_cnt[index as usize].load(Ordering::SeqCst) - } - } - - impl InterruptSourceGroup for MockInterrupt { - fn trigger(&self, index: InterruptIndex) -> vm_device::interrupt::Result<()> { - self.trigger_cnt[index as usize].fetch_add(1, Ordering::SeqCst); - Ok(()) - } - - fn notifier(&self, index: InterruptIndex) -> Option<&EventFd> { - self.event_fd.get(index as usize) - } - - fn update( - &self, - index: InterruptIndex, - _config: InterruptSourceConfig, - _masked: bool, - _set_gsi: bool, - ) -> vm_device::interrupt::Result<()> { - self.update_cnt[index as usize].fetch_add(1, Ordering::SeqCst); - Ok(()) - } - - fn set_gsi(&self) -> vm_device::interrupt::Result<()> { - Ok(()) - } + fn msix_vector_group(nr_vectors: u16) -> Arc { + let vmm = default_vmm(); + Arc::new(Vm::create_msix_group(vmm.vm.clone(), nr_vectors).unwrap()) } #[test] #[should_panic] fn test_too_many_vectors() { - MsixConfig::new(2049, Arc::new(MockInterrupt::new()), 0x42, None).unwrap(); + MsixConfig::new(msix_vector_group(2049), 0x42); } #[test] fn test_new_msix_config() { - let vectors = Arc::new(MockInterrupt::new()); - let config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let config = MsixConfig::new(msix_vector_group(2), 0x42); assert_eq!(config.devid, 0x42); assert!(config.masked); assert!(!config.enabled); @@ -577,8 +528,7 @@ mod tests { #[test] fn test_enable_msix_vectors() { - let vectors = Arc::new(MockInterrupt::new()); - let mut config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let mut config = MsixConfig::new(msix_vector_group(2), 0x42); assert!(!config.enabled); assert!(config.masked); @@ -605,8 +555,7 @@ mod tests { #[test] #[should_panic] fn test_table_access_read_too_big() { - let vectors = Arc::new(MockInterrupt::new()); - let config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let config = MsixConfig::new(msix_vector_group(2), 0x42); let mut buffer = [0u8; 16]; config.read_table(0, &mut buffer); @@ -614,8 +563,7 @@ mod tests { #[test] fn test_read_table_past_end() { - let vectors = Arc::new(MockInterrupt::new()); - let config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let config = MsixConfig::new(msix_vector_group(2), 0x42); let mut buffer = [0u8; 8]; // We have 2 vectors (16 bytes each), so we should be able to read up to 32 bytes. @@ -626,8 +574,7 @@ mod tests { #[test] fn test_read_table_bad_length() { - let vectors = Arc::new(MockInterrupt::new()); - let config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let config = MsixConfig::new(msix_vector_group(2), 0x42); let mut buffer = [0u8; 8]; // We can either read 4 or 8 bytes @@ -653,45 +600,56 @@ mod tests { #[test] fn test_access_table() { - let vectors = Arc::new(MockInterrupt::new()); - let mut config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let mut config = MsixConfig::new(msix_vector_group(2), 0x42); // enabled and not masked - config.set_msg_ctl(0x8000); - assert_eq!(vectors.update_cnt(0), 1); - assert_eq!(vectors.update_cnt(1), 1); + check_metric_after_block!( + METRICS.interrupts.config_updates, + 2, + config.set_msg_ctl(0x8000) + ); let mut buffer = [0u8; 8]; // Write first vector's address with a single 8-byte write - config.write_table(0, &u64::to_le_bytes(0x0000_1312_0000_1110)); // It's still masked so shouldn't be updated - assert_eq!(vectors.update_cnt(0), 1); - assert_eq!(vectors.update_cnt(1), 1); + check_metric_after_block!( + METRICS.interrupts.config_updates, + 0, + config.write_table(0, &u64::to_le_bytes(0x0000_1312_0000_1110)) + ); + // Same for control and message data - config.write_table(8, &u64::to_le_bytes(0x0_0000_0020)); // Now, we enabled it, so we should see an update - assert_eq!(vectors.update_cnt(0), 2); - assert_eq!(vectors.update_cnt(1), 1); + check_metric_after_block!( + METRICS.interrupts.config_updates, + 1, + config.write_table(8, &u64::to_le_bytes(0x0_0000_0020)) + ); // Write second vector's fields with 4-byte writes - // low 32 bits of the address - config.write_table(16, &u32::to_le_bytes(0x4241)); - assert_eq!(vectors.update_cnt(0), 2); - // Still masked - assert_eq!(vectors.update_cnt(1), 1); - // high 32 bits of the address - config.write_table(20, &u32::to_le_bytes(0x4443)); - assert_eq!(vectors.update_cnt(0), 2); - // Still masked - assert_eq!(vectors.update_cnt(1), 1); - // message data - config.write_table(24, &u32::to_le_bytes(0x21)); - assert_eq!(vectors.update_cnt(0), 2); - // Still masked - assert_eq!(vectors.update_cnt(1), 1); - // vector control - config.write_table(28, &u32::to_le_bytes(0x0)); - assert_eq!(vectors.update_cnt(0), 2); - assert_eq!(vectors.update_cnt(1), 2); + // low 32 bits of the address (still masked) + check_metric_after_block!( + METRICS.interrupts.config_updates, + 0, + config.write_table(16, &u32::to_le_bytes(0x4241)) + ); + // high 32 bits of the address (still masked) + check_metric_after_block!( + METRICS.interrupts.config_updates, + 0, + config.write_table(20, &u32::to_le_bytes(0x4443)) + ); + // message data (still masked) + check_metric_after_block!( + METRICS.interrupts.config_updates, + 0, + config.write_table(24, &u32::to_le_bytes(0x21)) + ); + // vector control (now unmasked) + check_metric_after_block!( + METRICS.interrupts.config_updates, + 1, + config.write_table(28, &u32::to_le_bytes(0x0)) + ); assert_eq!(config.table_entries[0].msg_addr_hi, 0x1312); assert_eq!(config.table_entries[0].msg_addr_lo, 0x1110); @@ -738,29 +696,28 @@ mod tests { assert_eq!(0x0_0000_0020, u64::from_le_bytes(buffer)); // If we mask the interrupts we shouldn't see any update - config.write_table(12, &u32::to_le_bytes(0x1)); - config.write_table(28, &u32::to_le_bytes(0x1)); - assert_eq!(vectors.update_cnt(0), 2); - assert_eq!(vectors.update_cnt(1), 2); + check_metric_after_block!(METRICS.interrupts.config_updates, 0, { + config.write_table(12, &u32::to_le_bytes(0x1)); + config.write_table(28, &u32::to_le_bytes(0x1)); + }); // Un-masking them should update them - config.write_table(12, &u32::to_le_bytes(0x0)); - config.write_table(28, &u32::to_le_bytes(0x0)); - assert_eq!(vectors.update_cnt(0), 3); - assert_eq!(vectors.update_cnt(1), 3); + check_metric_after_block!(METRICS.interrupts.config_updates, 2, { + config.write_table(12, &u32::to_le_bytes(0x0)); + config.write_table(28, &u32::to_le_bytes(0x0)); + }); // Setting up the same config should have no effect - config.write_table(12, &u32::to_le_bytes(0x0)); - config.write_table(28, &u32::to_le_bytes(0x0)); - assert_eq!(vectors.update_cnt(0), 3); - assert_eq!(vectors.update_cnt(1), 3); + check_metric_after_block!(METRICS.interrupts.config_updates, 0, { + config.write_table(12, &u32::to_le_bytes(0x0)); + config.write_table(28, &u32::to_le_bytes(0x0)); + }); } #[test] #[should_panic] fn test_table_access_write_too_big() { - let vectors = Arc::new(MockInterrupt::new()); - let mut config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let mut config = MsixConfig::new(msix_vector_group(2), 0x42); let buffer = [0u8; 16]; config.write_table(0, &buffer); @@ -768,8 +725,7 @@ mod tests { #[test] fn test_pba_read_too_big() { - let vectors = Arc::new(MockInterrupt::new()); - let config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let config = MsixConfig::new(msix_vector_group(2), 0x42); let mut buffer = [0u8; 16]; config.read_pba(0, &mut buffer); @@ -778,8 +734,7 @@ mod tests { #[test] fn test_pba_invalid_offset() { - let vectors = Arc::new(MockInterrupt::new()); - let config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let config = MsixConfig::new(msix_vector_group(2), 0x42); let mut buffer = [0u8; 8]; // Past the end of the PBA array @@ -797,8 +752,7 @@ mod tests { #[test] #[should_panic] fn test_set_pba_bit_vector_too_big() { - let vectors = Arc::new(MockInterrupt::new()); - let mut config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let mut config = MsixConfig::new(msix_vector_group(2), 0x42); config.set_pba_bit(2048, false); } @@ -806,16 +760,14 @@ mod tests { #[test] #[should_panic] fn test_get_pba_bit_vector_too_big() { - let vectors = Arc::new(MockInterrupt::new()); - let config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let config = MsixConfig::new(msix_vector_group(2), 0x42); config.get_pba_bit(2048); } #[test] fn test_pba_bit_invalid_vector() { - let vectors = Arc::new(MockInterrupt::new()); - let mut config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let mut config = MsixConfig::new(msix_vector_group(2), 0x42); // We have two vectors, so setting the pending bit for the third one // should be ignored @@ -828,8 +780,7 @@ mod tests { #[test] fn test_pba_read() { - let vectors = Arc::new(MockInterrupt::new()); - let mut config = MsixConfig::new(128, vectors.clone(), 0x42, None).unwrap(); + let mut config = MsixConfig::new(msix_vector_group(128), 0x42); let mut buffer = [0u8; 8]; config.set_pba_bit(1, false); @@ -850,40 +801,33 @@ mod tests { #[test] fn test_pending_interrupt() { - let vectors = Arc::new(MockInterrupt::new()); - let mut config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let mut config = MsixConfig::new(msix_vector_group(2), 0x42); config.set_pba_bit(1, false); assert_eq!(config.get_pba_bit(1), 1); // Enable MSI-X vector and unmask interrupts - config.set_msg_ctl(0x8000); - // Individual vectors are still masked, so no change - assert_eq!(vectors.interrupt_cnt(0), 0); - assert_eq!(vectors.interrupt_cnt(1), 0); + check_metric_after_block!(METRICS.interrupts.triggers, 0, config.set_msg_ctl(0x8000)); // Enable all vectors - config.write_table(8, &u64::to_le_bytes(0x0_0000_0020)); - config.write_table(24, &u64::to_le_bytes(0x0_0000_0020)); - // Vector one had a pending bit, so we must have triggered an interrupt for it // and cleared the pending bit - assert_eq!(vectors.interrupt_cnt(0), 0); - assert_eq!(vectors.interrupt_cnt(1), 1); + check_metric_after_block!(METRICS.interrupts.triggers, 1, { + config.write_table(8, &u64::to_le_bytes(0x0_0000_0020)); + config.write_table(24, &u64::to_le_bytes(0x0_0000_0020)); + }); assert_eq!(config.get_pba_bit(1), 0); // Check that interrupt is sent as well for enabled vectors once we unmask from // Message Control // Mask vectors and set pending bit for vector 0 - config.set_msg_ctl(0xc000); - config.set_pba_bit(0, false); - assert_eq!(vectors.interrupt_cnt(0), 0); - assert_eq!(vectors.interrupt_cnt(1), 1); + check_metric_after_block!(METRICS.interrupts.triggers, 0, { + config.set_msg_ctl(0xc000); + config.set_pba_bit(0, false); + }); // Unmask them - config.set_msg_ctl(0x8000); - assert_eq!(vectors.interrupt_cnt(0), 1); - assert_eq!(vectors.interrupt_cnt(1), 1); + check_metric_after_block!(METRICS.interrupts.triggers, 1, config.set_msg_ctl(0x8000)); assert_eq!(config.get_pba_bit(0), 0); } } diff --git a/src/vm-device/src/bus.rs b/src/vmm/src/vstate/bus.rs similarity index 91% rename from src/vm-device/src/bus.rs rename to src/vmm/src/vstate/bus.rs index 31880d354bb..9c050196323 100644 --- a/src/vm-device/src/bus.rs +++ b/src/vmm/src/vstate/bus.rs @@ -10,7 +10,7 @@ use std::cmp::Ordering; use std::collections::btree_map::BTreeMap; use std::sync::{Arc, Barrier, Mutex, RwLock, Weak}; -use std::{convert, error, fmt, io, result}; +use std::{error, fmt, result}; /// Trait for devices that respond to reads or writes in an arbitrary address space. /// @@ -26,6 +26,7 @@ pub trait BusDevice: Send { } } +/// Trait similar to [`BusDevice`] with the extra requirement that a device is `Send` and `Sync`. #[allow(unused_variables)] pub trait BusDeviceSync: Send + Sync { /// Reads at `offset` from this device @@ -51,8 +52,9 @@ impl BusDeviceSync for Mutex { } } +/// Error type for [`Bus`]-related operations. #[derive(Debug)] -pub enum Error { +pub enum BusError { /// The insertion failed because the new device overlapped with an old device. Overlap, /// Failed to operate on zero sized range. @@ -61,21 +63,16 @@ pub enum Error { MissingAddressRange, } -pub type Result = result::Result; +/// Result type for [`Bus`]-related operations. +pub type Result = result::Result; -impl fmt::Display for Error { +impl fmt::Display for BusError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "bus_error: {self:?}") } } -impl error::Error for Error {} - -impl convert::From for io::Error { - fn from(e: Error) -> Self { - io::Error::other(e) - } -} +impl error::Error for BusError {} /// Holds a base and length representing the address space occupied by a `BusDevice`. /// @@ -83,7 +80,9 @@ impl convert::From for io::Error { /// * len - The length of the range in bytes. #[derive(Debug, Copy, Clone)] pub struct BusRange { + /// base address of a range within a [`Bus`] pub base: u64, + /// length of a range within a [`Bus`] pub len: u64, } @@ -140,6 +139,7 @@ impl Bus { } #[allow(clippy::type_complexity)] + /// Get a reference to a device residing inside the bus at address [`addr`]. pub fn resolve(&self, addr: u64) -> Option<(u64, u64, Arc)> { if let Some((range, dev)) = self.first_before(addr) { let offset = addr - range.base; @@ -150,9 +150,10 @@ impl Bus { None } + /// Insert a device into the [`Bus`] in the range [`addr`, `addr` + `len`]. pub fn insert(&self, device: Arc, base: u64, len: u64) -> Result<()> { if len == 0 { - return Err(Error::ZeroSizedRange); + return Err(BusError::ZeroSizedRange); } // Reject all cases where the new device's range overlaps with an existing device. @@ -163,7 +164,7 @@ impl Bus { .iter() .any(|(range, _dev)| range.overlaps(base, len)) { - return Err(Error::Overlap); + return Err(BusError::Overlap); } if self @@ -173,7 +174,7 @@ impl Bus { .insert(BusRange { base, len }, Arc::downgrade(&device)) .is_some() { - return Err(Error::Overlap); + return Err(BusError::Overlap); } Ok(()) @@ -182,13 +183,13 @@ impl Bus { /// Removes the device at the given address space range. pub fn remove(&self, base: u64, len: u64) -> Result<()> { if len == 0 { - return Err(Error::ZeroSizedRange); + return Err(BusError::ZeroSizedRange); } let bus_range = BusRange { base, len }; if self.devices.write().unwrap().remove(&bus_range).is_none() { - return Err(Error::MissingAddressRange); + return Err(BusError::MissingAddressRange); } Ok(()) @@ -224,7 +225,7 @@ impl Bus { let device = if let Some((_, _, dev)) = self.resolve(old_base) { dev.clone() } else { - return Err(Error::MissingAddressRange); + return Err(BusError::MissingAddressRange); }; // Remove the old address range @@ -243,7 +244,7 @@ impl Bus { dev.read(base, offset, data); Ok(()) } else { - Err(Error::MissingAddressRange) + Err(BusError::MissingAddressRange) } } @@ -255,7 +256,7 @@ impl Bus { // OK to unwrap as lock() failing is a serious error condition and should panic. Ok(dev.write(base, offset, data)) } else { - Err(Error::MissingAddressRange) + Err(BusError::MissingAddressRange) } } } @@ -269,12 +270,14 @@ mod tests { struct ConstantDevice; impl BusDeviceSync for ConstantDevice { + #[allow(clippy::cast_possible_truncation)] fn read(&self, _base: u64, offset: u64, data: &mut [u8]) { for (i, v) in data.iter_mut().enumerate() { *v = (offset as u8) + (i as u8); } } + #[allow(clippy::cast_possible_truncation)] fn write(&self, _base: u64, offset: u64, data: &[u8]) -> Option> { for (i, v) in data.iter().enumerate() { assert_eq!(*v, (offset as u8) + (i as u8)) diff --git a/src/vmm/src/vstate/interrupts.rs b/src/vmm/src/vstate/interrupts.rs new file mode 100644 index 00000000000..5246144d8f6 --- /dev/null +++ b/src/vmm/src/vstate/interrupts.rs @@ -0,0 +1,202 @@ +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::sync::Arc; +use std::sync::atomic::{AtomicBool, Ordering}; + +use kvm_ioctls::VmFd; +use vmm_sys_util::eventfd::EventFd; + +use crate::Vm; +use crate::logger::{IncMetric, METRICS}; +use crate::snapshot::Persist; + +#[derive(Debug, thiserror::Error, displaydoc::Display)] +/// Errors related with Firecracker interrupts +pub enum InterruptError { + /// Error allocating resources: {0} + Allocator(#[from] vm_allocator::Error), + /// IO error: {0} + Io(#[from] std::io::Error), + /// FamStruct error: {0} + FamStruct(#[from] vmm_sys_util::fam::Error), + /// KVM error: {0} + Kvm(#[from] kvm_ioctls::Error), + /// Invalid vector index: {0} + InvalidVectorIndex(usize), +} + +/// Configuration data for an MSI-X interrupt. +#[derive(Copy, Clone, Debug, Default)] +pub struct MsixVectorConfig { + /// High address to delivery message signaled interrupt. + pub high_addr: u32, + /// Low address to delivery message signaled interrupt. + pub low_addr: u32, + /// Data to write to delivery message signaled interrupt. + pub data: u32, + /// Unique ID of the device to delivery message signaled interrupt. + pub devid: u32, +} + +/// Type that describes an allocated interrupt +#[derive(Debug)] +pub struct MsixVector { + /// GSI used for this vector + pub gsi: u32, + /// EventFd used for this vector + pub event_fd: EventFd, + /// Flag determining whether the vector is enabled + pub enabled: AtomicBool, +} + +impl MsixVector { + /// Create a new [`MsixVector`] of a particular type + pub fn new(gsi: u32, enabled: bool) -> Result { + Ok(MsixVector { + gsi, + event_fd: EventFd::new(libc::EFD_NONBLOCK)?, + enabled: AtomicBool::new(enabled), + }) + } +} + +impl MsixVector { + /// Enable vector + pub fn enable(&self, vmfd: &VmFd) -> Result<(), InterruptError> { + if !self.enabled.load(Ordering::Acquire) { + vmfd.register_irqfd(&self.event_fd, self.gsi)?; + self.enabled.store(true, Ordering::Release); + } + + Ok(()) + } + + /// Disable vector + pub fn disable(&self, vmfd: &VmFd) -> Result<(), InterruptError> { + if self.enabled.load(Ordering::Acquire) { + vmfd.unregister_irqfd(&self.event_fd, self.gsi)?; + self.enabled.store(false, Ordering::Release); + } + + Ok(()) + } +} + +#[derive(Debug)] +/// MSI interrupts created for a VirtIO device +pub struct MsixVectorGroup { + /// Reference to the Vm object, which we'll need for interacting with the underlying KVM Vm + /// file descriptor + pub vm: Arc, + /// A list of all the MSI-X vectors + pub vectors: Vec, +} + +impl MsixVectorGroup { + /// Returns the number of vectors in this group + pub fn num_vectors(&self) -> u16 { + // It is safe to unwrap here. We are creating `MsixVectorGroup` objects through the + // `Vm::create_msix_group` where the argument for the number of `vectors` is a `u16`. + u16::try_from(self.vectors.len()).unwrap() + } + + /// Enable the MSI-X vector group + pub fn enable(&self) -> Result<(), InterruptError> { + for route in &self.vectors { + route.enable(&self.vm.common.fd)?; + } + + Ok(()) + } + + /// Disable the MSI-X vector group + pub fn disable(&self) -> Result<(), InterruptError> { + for route in &self.vectors { + route.disable(&self.vm.common.fd)?; + } + + Ok(()) + } + + /// Trigger an interrupt for a vector in the group + pub fn trigger(&self, index: usize) -> Result<(), InterruptError> { + self.notifier(index) + .ok_or(InterruptError::InvalidVectorIndex(index))? + .write(1)?; + METRICS.interrupts.triggers.inc(); + Ok(()) + } + + /// Get a referece to the underlying `EventFd` used to trigger interrupts for a vector in the + /// group + pub fn notifier(&self, index: usize) -> Option<&EventFd> { + self.vectors.get(index).map(|route| &route.event_fd) + } + + /// Update the MSI-X configuration for a vector in the group + pub fn update( + &self, + index: usize, + msi_config: MsixVectorConfig, + masked: bool, + set_gsi: bool, + ) -> Result<(), InterruptError> { + if let Some(vector) = self.vectors.get(index) { + METRICS.interrupts.config_updates.inc(); + // When an interrupt is masked the GSI will not be passed to KVM through + // KVM_SET_GSI_ROUTING. So, call [`disable()`] to unregister the interrupt file + // descriptor before passing the interrupt routes to KVM + if masked { + vector.disable(&self.vm.common.fd)?; + } + + self.vm.register_msi(vector, masked, msi_config)?; + if set_gsi { + self.vm + .set_gsi_routes() + .map_err(|err| std::io::Error::other(format!("MSI-X update: {err}")))? + } + + // Assign KVM_IRQFD after KVM_SET_GSI_ROUTING to avoid + // panic on kernel which does not have commit a80ced6ea514 + // (KVM: SVM: fix panic on out-of-bounds guest IRQ). + if !masked { + vector.enable(&self.vm.common.fd)?; + } + + return Ok(()); + } + + Err(InterruptError::InvalidVectorIndex(index)) + } +} + +impl<'a> Persist<'a> for MsixVectorGroup { + type State = Vec; + type ConstructorArgs = Arc; + type Error = InterruptError; + + fn save(&self) -> Self::State { + // We don't save the "enabled" state of the MSI interrupt. PCI devices store the MSI-X + // configuration and make sure that the vector is enabled during the restore path if it was + // initially enabled + self.vectors.iter().map(|route| route.gsi).collect() + } + + fn restore( + constructor_args: Self::ConstructorArgs, + state: &Self::State, + ) -> std::result::Result { + let mut vectors = Vec::with_capacity(state.len()); + + for gsi in state { + vectors.push(MsixVector::new(*gsi, false)?); + } + + Ok(MsixVectorGroup { + vm: constructor_args, + vectors, + }) + } +} diff --git a/src/vmm/src/vstate/mod.rs b/src/vmm/src/vstate/mod.rs index f4fa25914d0..5e58f09e05a 100644 --- a/src/vmm/src/vstate/mod.rs +++ b/src/vmm/src/vstate/mod.rs @@ -1,6 +1,10 @@ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 +/// Module with the implementation of a Bus that can hold devices. +pub mod bus; +/// VM interrupts implementation. +pub mod interrupts; /// Module with Kvm implementation. pub mod kvm; /// Module with GuestMemory implementation. diff --git a/src/vmm/src/vstate/vcpu.rs b/src/vmm/src/vstate/vcpu.rs index b0f12990710..554ff3c9d62 100644 --- a/src/vmm/src/vstate/vcpu.rs +++ b/src/vmm/src/vstate/vcpu.rs @@ -27,6 +27,7 @@ use crate::logger::{IncMetric, METRICS}; use crate::seccomp::{BpfProgram, BpfProgramRef}; use crate::utils::signal::{Killable, register_signal_handler, sigrtmin}; use crate::utils::sm::StateMachine; +use crate::vstate::bus::Bus; use crate::vstate::vm::Vm; /// Signal number (SIGRTMIN) used to kick Vcpus. @@ -142,7 +143,7 @@ impl Vcpu { } /// Sets a MMIO bus for this vcpu. - pub fn set_mmio_bus(&mut self, mmio_bus: Arc) { + pub fn set_mmio_bus(&mut self, mmio_bus: Arc) { self.kvm_vcpu.peripherals.mmio_bus = Some(mmio_bus); } @@ -699,7 +700,6 @@ pub(crate) mod tests { use std::sync::{Arc, Barrier, Mutex}; use linux_loader::loader::KernelLoader; - use vm_device::BusDevice; use vmm_sys_util::errno; use super::*; @@ -708,6 +708,7 @@ pub(crate) mod tests { use crate::seccomp::get_empty_filters; use crate::utils::mib_to_bytes; use crate::utils::signal::validate_signal_num; + use crate::vstate::bus::BusDevice; use crate::vstate::kvm::Kvm; use crate::vstate::memory::{GuestAddress, GuestMemoryMmap}; use crate::vstate::vcpu::VcpuError as EmulationError; @@ -818,7 +819,7 @@ pub(crate) mod tests { ) ); - let bus = Arc::new(vm_device::Bus::new()); + let bus = Arc::new(Bus::new()); let dummy = Arc::new(Mutex::new(DummyDevice)); bus.insert(dummy, 0x10, 0x10).unwrap(); vcpu.set_mmio_bus(bus); @@ -965,7 +966,7 @@ pub(crate) mod tests { fn test_set_mmio_bus() { let (_, _, mut vcpu) = setup_vcpu(0x1000); assert!(vcpu.kvm_vcpu.peripherals.mmio_bus.is_none()); - vcpu.set_mmio_bus(Arc::new(vm_device::Bus::new())); + vcpu.set_mmio_bus(Arc::new(Bus::new())); assert!(vcpu.kvm_vcpu.peripherals.mmio_bus.is_some()); } diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs index 27d43176367..8a479348747 100644 --- a/src/vmm/src/vstate/vm.rs +++ b/src/vmm/src/vstate/vm.rs @@ -9,9 +9,9 @@ use std::collections::HashMap; use std::fs::OpenOptions; use std::io::Write; use std::path::Path; -use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, Mutex, MutexGuard}; +use anyhow::anyhow; #[cfg(target_arch = "x86_64")] use kvm_bindings::KVM_IRQCHIP_IOAPIC; use kvm_bindings::{ @@ -19,22 +19,20 @@ use kvm_bindings::{ KvmIrqRouting, kvm_irq_routing_entry, kvm_userspace_memory_region, }; use kvm_ioctls::VmFd; -use log::{debug, error}; -use pci::DeviceRelocation; +use log::debug; use serde::{Deserialize, Serialize}; -use vm_device::interrupt::{ - InterruptIndex, InterruptSourceConfig, InterruptSourceGroup, MsiIrqSourceConfig, -}; use vmm_sys_util::errno; use vmm_sys_util::eventfd::EventFd; pub use crate::arch::{ArchVm as Vm, ArchVmError, VmState}; use crate::arch::{GSI_MSI_END, host_page_size}; use crate::logger::info; +use crate::pci::{DeviceRelocation, PciDevice}; use crate::persist::CreateSnapshotError; -use crate::snapshot::Persist; use crate::utils::u64_to_usize; use crate::vmm_config::snapshot::SnapshotType; +use crate::vstate::bus::Bus; +use crate::vstate::interrupts::{InterruptError, MsixVector, MsixVectorConfig, MsixVectorGroup}; use crate::vstate::memory::{ Address, GuestMemory, GuestMemoryExtension, GuestMemoryMmap, GuestMemoryRegion, GuestRegionMmap, }; @@ -42,19 +40,6 @@ use crate::vstate::resources::ResourceAllocator; use crate::vstate::vcpu::VcpuError; use crate::{DirtyBitmap, Vcpu, mem_size_mib}; -#[derive(Debug, thiserror::Error, displaydoc::Display)] -/// Errors related with Firecracker interrupts -pub enum InterruptError { - /// Error allocating resources: {0} - Allocator(#[from] vm_allocator::Error), - /// EventFd error: {0} - EventFd(std::io::Error), - /// FamStruct error: {0} - FamStruct(#[from] vmm_sys_util::fam::Error), - /// KVM error: {0} - Kvm(#[from] kvm_ioctls::Error), -} - #[derive(Debug, Serialize, Deserialize)] /// A struct representing an interrupt line used by some device of the microVM pub struct RoutingEntry { @@ -62,179 +47,6 @@ pub struct RoutingEntry { masked: bool, } -/// Type that describes an allocated interrupt -#[derive(Debug)] -pub struct MsiVector { - /// GSI used for this vector - pub gsi: u32, - /// EventFd used for this vector - pub event_fd: EventFd, - /// Flag determining whether the vector is enabled - pub enabled: AtomicBool, -} - -impl MsiVector { - /// Create a new [`MsiVector`] of a particular type - pub fn new(gsi: u32, enabled: bool) -> Result { - Ok(MsiVector { - gsi, - event_fd: EventFd::new(libc::EFD_NONBLOCK).map_err(InterruptError::EventFd)?, - enabled: AtomicBool::new(enabled), - }) - } -} - -impl MsiVector { - /// Enable vector - fn enable(&self, vmfd: &VmFd) -> Result<(), errno::Error> { - if !self.enabled.load(Ordering::Acquire) { - vmfd.register_irqfd(&self.event_fd, self.gsi)?; - self.enabled.store(true, Ordering::Release); - } - - Ok(()) - } - - /// Disable vector - fn disable(&self, vmfd: &VmFd) -> Result<(), errno::Error> { - if self.enabled.load(Ordering::Acquire) { - vmfd.unregister_irqfd(&self.event_fd, self.gsi)?; - self.enabled.store(false, Ordering::Release); - } - - Ok(()) - } -} - -#[derive(Debug)] -/// MSI interrupts created for a VirtIO device -pub struct MsiVectorGroup { - vm: Arc, - irq_routes: Vec, -} - -impl MsiVectorGroup { - /// Returns the number of vectors in this group - pub fn num_vectors(&self) -> u16 { - // It is safe to unwrap here. We are creating `MsiVectorGroup` objects through the - // `Vm::create_msix_group` where the argument for the number of `irq_routes` is a `u16`. - u16::try_from(self.irq_routes.len()).unwrap() - } -} - -impl<'a> Persist<'a> for MsiVectorGroup { - type State = Vec; - type ConstructorArgs = Arc; - type Error = InterruptError; - - fn save(&self) -> Self::State { - // We don't save the "enabled" state of the MSI interrupt. PCI devices store the MSI-X - // configuration and make sure that the vector is enabled during the restore path if it was - // initially enabled - self.irq_routes.iter().map(|route| route.gsi).collect() - } - - fn restore( - constructor_args: Self::ConstructorArgs, - state: &Self::State, - ) -> std::result::Result { - let mut irq_routes = Vec::with_capacity(state.len()); - - for gsi in state { - irq_routes.push(MsiVector::new(*gsi, false)?); - } - - Ok(MsiVectorGroup { - vm: constructor_args, - irq_routes, - }) - } -} - -impl InterruptSourceGroup for MsiVectorGroup { - fn enable(&self) -> vm_device::interrupt::Result<()> { - for route in &self.irq_routes { - route.enable(&self.vm.common.fd)?; - } - - Ok(()) - } - - fn disable(&self) -> vm_device::interrupt::Result<()> { - for route in &self.irq_routes { - route.disable(&self.vm.common.fd)?; - } - - Ok(()) - } - - fn trigger(&self, index: InterruptIndex) -> vm_device::interrupt::Result<()> { - self.notifier(index) - .ok_or_else(|| { - std::io::Error::other(format!("trigger: invalid interrupt index {index}")) - })? - .write(1) - } - - fn notifier(&self, index: InterruptIndex) -> Option<&EventFd> { - self.irq_routes - .get(index as usize) - .map(|route| &route.event_fd) - } - - fn update( - &self, - index: InterruptIndex, - config: InterruptSourceConfig, - masked: bool, - set_gsi: bool, - ) -> vm_device::interrupt::Result<()> { - let msi_config = match config { - InterruptSourceConfig::LegacyIrq(_) => { - return Err(std::io::Error::other( - "MSI-x update: invalid configuration type", - )); - } - InterruptSourceConfig::MsiIrq(config) => config, - }; - - if let Some(route) = self.irq_routes.get(index as usize) { - // When an interrupt is masked the GSI will not be passed to KVM through - // KVM_SET_GSI_ROUTING. So, call [`disable()`] to unregister the interrupt file - // descriptor before passing the interrupt routes to KVM - if masked { - route.disable(&self.vm.common.fd)?; - } - - self.vm.register_msi(route, masked, msi_config)?; - if set_gsi { - self.vm - .set_gsi_routes() - .map_err(|err| std::io::Error::other(format!("MSI-X update: {err}")))? - } - - // Assign KVM_IRQFD after KVM_SET_GSI_ROUTING to avoid - // panic on kernel which does not have commit a80ced6ea514 - // (KVM: SVM: fix panic on out-of-bounds guest IRQ). - if !masked { - route.enable(&self.vm.common.fd)?; - } - - return Ok(()); - } - - Err(std::io::Error::other(format!( - "MSI-X update: invalid vector index {index}" - ))) - } - - fn set_gsi(&self) -> vm_device::interrupt::Result<()> { - self.vm - .set_gsi_routes() - .map_err(|err| std::io::Error::other(format!("MSI-X update: {err}"))) - } -} - /// Architecture independent parts of a VM. #[derive(Debug)] pub struct VmCommon { @@ -248,7 +60,7 @@ pub struct VmCommon { /// Allocator for VM resources pub resource_allocator: Mutex, /// MMIO bus - pub mmio_bus: Arc, + pub mmio_bus: Arc, } /// Errors associated with the wrappers over KVM ioctls. @@ -324,7 +136,7 @@ impl Vm { guest_memory: GuestMemoryMmap::default(), interrupts: Mutex::new(HashMap::with_capacity(GSI_MSI_END as usize + 1)), resource_allocator: Mutex::new(ResourceAllocator::new()), - mmio_bus: Arc::new(vm_device::Bus::new()), + mmio_bus: Arc::new(Bus::new()), }) } @@ -549,9 +361,9 @@ impl Vm { /// Register an MSI device interrupt pub fn register_msi( &self, - route: &MsiVector, + route: &MsixVector, masked: bool, - config: MsiIrqSourceConfig, + config: MsixVectorConfig, ) -> Result<(), errno::Error> { let mut entry = kvm_irq_routing_entry { gsi: route.gsi, @@ -590,18 +402,18 @@ impl Vm { } /// Create a group of MSI-X interrupts - pub fn create_msix_group(vm: Arc, count: u16) -> Result { + pub fn create_msix_group(vm: Arc, count: u16) -> Result { debug!("Creating new MSI group with {count} vectors"); - let mut irq_routes = Vec::with_capacity(count as usize); + let mut vectors = Vec::with_capacity(count as usize); for gsi in vm .resource_allocator() .allocate_gsi_msi(count as u32)? .iter() { - irq_routes.push(MsiVector::new(*gsi, false)?); + vectors.push(MsixVector::new(*gsi, false)?); } - Ok(MsiVectorGroup { vm, irq_routes }) + Ok(MsixVectorGroup { vm, vectors }) } /// Set GSI routes to KVM @@ -668,20 +480,21 @@ impl DeviceRelocation for Vm { _old_base: u64, _new_base: u64, _len: u64, - _pci_dev: &mut dyn pci::PciDevice, - ) -> Result<(), std::io::Error> { - error!("pci: device relocation not supported"); - Err(std::io::Error::from(std::io::ErrorKind::Unsupported)) + _pci_dev: &mut dyn PciDevice, + ) -> Result<(), anyhow::Error> { + Err(anyhow!("pci: device relocation not supported")) } } #[cfg(test)] pub(crate) mod tests { - use vm_device::interrupt::{InterruptSourceConfig, LegacyIrqSourceConfig}; + use std::sync::atomic::Ordering; + use vm_memory::GuestAddress; use vm_memory::mmap::MmapRegionBuilder; use super::*; + use crate::snapshot::Persist; #[cfg(target_arch = "x86_64")] use crate::snapshot::Snapshot; use crate::test_utils::single_region_mem_raw; @@ -798,7 +611,7 @@ pub(crate) mod tests { vm.setup_irqchip(1).unwrap(); } - fn create_msix_group(vm: &Arc) -> MsiVectorGroup { + fn create_msix_group(vm: &Arc) -> MsixVectorGroup { Vm::create_msix_group(vm.clone(), 4).unwrap() } @@ -818,13 +631,13 @@ pub(crate) mod tests { let msix_group = create_msix_group(&vm); // Initially all vectors are disabled - for route in &msix_group.irq_routes { + for route in &msix_group.vectors { assert!(!route.enabled.load(Ordering::Acquire)) } // Enable works msix_group.enable().unwrap(); - for route in &msix_group.irq_routes { + for route in &msix_group.vectors { assert!(route.enabled.load(Ordering::Acquire)); } // Enabling an enabled group doesn't error out @@ -832,7 +645,7 @@ pub(crate) mod tests { // Disable works msix_group.disable().unwrap(); - for route in &msix_group.irq_routes { + for route in &msix_group.vectors { assert!(!route.enabled.load(Ordering::Acquire)) } // Disabling a disabled group doesn't error out @@ -868,29 +681,18 @@ pub(crate) mod tests { assert!(msix_group.notifier(4).is_none()); } - #[test] - fn test_msi_vector_group_update_wrong_config() { - let (_, vm) = setup_vm_with_memory(mib_to_bytes(128)); - let vm = Arc::new(vm); - let msix_group = create_msix_group(&vm); - let irq_config = LegacyIrqSourceConfig { irqchip: 0, pin: 0 }; - msix_group - .update(0, InterruptSourceConfig::LegacyIrq(irq_config), true, true) - .unwrap_err(); - } - #[test] fn test_msi_vector_group_update_invalid_vector() { let (_, mut vm) = setup_vm_with_memory(mib_to_bytes(128)); enable_irqchip(&mut vm); let vm = Arc::new(vm); let msix_group = create_msix_group(&vm); - let config = InterruptSourceConfig::MsiIrq(MsiIrqSourceConfig { + let config = MsixVectorConfig { high_addr: 0x42, low_addr: 0x12, data: 0x12, devid: 0xafa, - }); + }; msix_group.update(0, config, true, true).unwrap(); msix_group.update(4, config, true, true).unwrap_err(); } @@ -904,7 +706,7 @@ pub(crate) mod tests { let msix_group = create_msix_group(&vm); // Set some configuration for the vectors. Initially all are masked - let mut config = MsiIrqSourceConfig { + let mut config = MsixVectorConfig { high_addr: 0x42, low_addr: 0x13, data: 0x12, @@ -912,13 +714,11 @@ pub(crate) mod tests { }; for i in 0..4 { config.data = 0x12 * i; - msix_group - .update(i, InterruptSourceConfig::MsiIrq(config), true, false) - .unwrap(); + msix_group.update(i as usize, config, true, false).unwrap(); } // All vectors should be disabled - for vector in &msix_group.irq_routes { + for vector in &msix_group.vectors { assert!(!vector.enabled.load(Ordering::Acquire)); } @@ -956,9 +756,7 @@ pub(crate) mod tests { // Updating the config of a vector should enable its route (and only its route) config.data = 0; - msix_group - .update(0, InterruptSourceConfig::MsiIrq(config), false, true) - .unwrap(); + msix_group.update(0, config, false, true).unwrap(); for i in 0..4 { let gsi = crate::arch::GSI_MSI_START + i; let interrupts = vm.common.interrupts.lock().unwrap(); @@ -975,31 +773,6 @@ pub(crate) mod tests { } } - #[cfg(target_arch = "x86_64")] - #[test] - fn test_msi_vector_group_set_gsi_without_ioapic() { - // Setting GSI routes without IOAPIC setup should fail on x86. Apparently, it doesn't fail - // on Aarch64 - let (_, vm) = setup_vm_with_memory(mib_to_bytes(128)); - let vm = Arc::new(vm); - let msix_group = create_msix_group(&vm); - let err = msix_group.set_gsi().unwrap_err(); - assert_eq!( - format!("{err}"), - "MSI-X update: KVM error: Invalid argument (os error 22)" - ); - } - - #[test] - fn test_msi_vector_group_set_gsi() { - let (_, mut vm) = setup_vm_with_memory(mib_to_bytes(128)); - enable_irqchip(&mut vm); - let vm = Arc::new(vm); - let msix_group = create_msix_group(&vm); - - msix_group.set_gsi().unwrap(); - } - #[test] fn test_msi_vector_group_persistence() { let (_, mut vm) = setup_vm_with_memory(mib_to_bytes(128)); @@ -1009,14 +782,14 @@ pub(crate) mod tests { msix_group.enable().unwrap(); let state = msix_group.save(); - let restored_group = MsiVectorGroup::restore(vm, &state).unwrap(); + let restored_group = MsixVectorGroup::restore(vm, &state).unwrap(); assert_eq!(msix_group.num_vectors(), restored_group.num_vectors()); // Even if an MSI group is enabled, we don't save it as such. During restoration, the PCI // transport will make sure the correct config is set for the vectors and enable them // accordingly. - for (id, vector) in msix_group.irq_routes.iter().enumerate() { - let new_vector = &restored_group.irq_routes[id]; + for (id, vector) in msix_group.vectors.iter().enumerate() { + let new_vector = &restored_group.vectors[id]; assert_eq!(vector.gsi, new_vector.gsi); assert!(!new_vector.enabled.load(Ordering::Acquire)); } diff --git a/src/vmm/tests/devices.rs b/src/vmm/tests/devices.rs index a1ddf124cf7..5a228b96f96 100644 --- a/src/vmm/tests/devices.rs +++ b/src/vmm/tests/devices.rs @@ -12,10 +12,10 @@ use std::sync::{Arc, Mutex}; use event_manager::{EventManager, SubscriberOps}; use libc::EFD_NONBLOCK; -use vm_device::BusDevice; use vm_superio::Serial; use vmm::devices::legacy::serial::SerialOut; use vmm::devices::legacy::{EventFdTrigger, SerialEventsWrapper, SerialWrapper}; +use vmm::vstate::bus::BusDevice; use vmm_sys_util::eventfd::EventFd; fn create_serial( diff --git a/tests/host_tools/fcmetrics.py b/tests/host_tools/fcmetrics.py index 3a93ced14d3..864481b68e2 100644 --- a/tests/host_tools/fcmetrics.py +++ b/tests/host_tools/fcmetrics.py @@ -292,6 +292,7 @@ def validate_fc_metrics(metrics): "entropy_rate_limiter_throttled", "rate_limiter_event_count", ], + "interrupts": ["triggers", "config_updates"], } # validate timestamp before jsonschema validation which some more time