From 9618d2ec4df6d4f3dadaba75fe2ecd19dacd2fc5 Mon Sep 17 00:00:00 2001 From: Pascal Scholz Date: Fri, 7 Nov 2025 09:34:50 +0100 Subject: [PATCH 1/6] pci: Allow for device ID allocation on a bus Allocating a device ID is crucial for assigning a specific ID to a device. We need this to implement configurable PCI BDF. Signed-off-by: Pascal Scholz On-behalf-of: SAP pascal.scholz@sap.com --- pci/src/bus.rs | 136 ++++++++++++++++++++++++++++++++++++++--- vmm/src/pci_segment.rs | 2 +- 2 files changed, 130 insertions(+), 8 deletions(-) diff --git a/pci/src/bus.rs b/pci/src/bus.rs index fd19321de5..8dd544fc3f 100644 --- a/pci/src/bus.rs +++ b/pci/src/bus.rs @@ -166,15 +166,42 @@ impl PciBus { Ok(()) } - pub fn next_device_id(&mut self) -> Result { - for (idx, device_id) in self.device_ids.iter_mut().enumerate() { - if !(*device_id) { - *device_id = true; - return Ok(idx as u32); + /// Allocates a PCI device ID on the bus. + /// + /// - `id`: ID to allocate on the bus. If [`None`], the next free + /// device ID on the bus is allocated, else the ID given is + /// allocated + /// + /// ## Errors + /// * Returns [`PciRootError::AlreadyInUsePciDeviceSlot`] in case + /// the ID requested is already allocated. + /// * Returns [`PciRootError::InvalidPciDeviceSlot`] in case the + /// requested ID exceeds the maximum number of devices allowed per + /// bus (see [`NUM_DEVICE_IDS`]). + /// * If `id` is [`None`]: Returns + /// [`PciRootError::NoPciDeviceSlotAvailable`] if no free device + /// slot is available on the bus. + pub fn allocate_device_id(&mut self, id: Option) -> Result { + if let Some(id) = id { + if (id as usize) < NUM_DEVICE_IDS { + if !self.device_ids[id as usize] { + self.device_ids[id as usize] = true; + Ok(id as u32) + } else { + Err(PciRootError::AlreadyInUsePciDeviceSlot(id as usize)) + } + } else { + return Err(PciRootError::InvalidPciDeviceSlot(id as usize)); + } + } else { + for (idx, device_id) in self.device_ids.iter_mut().enumerate() { + if !(*device_id) { + *device_id = true; + return Ok(idx as u32); + } } + Err(PciRootError::NoPciDeviceSlotAvailable) } - - Err(PciRootError::NoPciDeviceSlotAvailable) } pub fn get_device_id(&mut self, id: usize) -> Result<()> { @@ -484,3 +511,98 @@ fn parse_io_config_address(config_address: u32) -> (usize, usize, usize, usize) shift_and_mask(config_address, REGISTER_NUMBER_OFFSET, REGISTER_NUMBER_MASK), ) } + +#[cfg(test)] +mod tests { + // Note this useful idiom: importing names from outer (for mod tests) scope. + use super::*; + + mod pci_bus_tests { + use super::*; + + #[derive(Debug)] + struct MocRelocDevice; + + impl DeviceRelocation for MocRelocDevice { + fn move_bar( + &self, + _old_base: u64, + _new_base: u64, + _len: u64, + _pci_dev: &mut dyn PciDevice, + _region_type: PciBarRegionType, + ) -> std::result::Result<(), std::io::Error> { + Ok(()) + } + } + + fn setup_bus() -> PciBus { + let pci_root = PciRoot::new(None); + let moc_device_reloc = Arc::new(MocRelocDevice {}); + PciBus::new(pci_root, moc_device_reloc) + } + + #[test] + // Test to acquire all IDs that can be acquired + fn allocate_device_id_next_free() { + // The first address is occupied by the root + let mut bus = setup_bus(); + for expected_id in 1..NUM_DEVICE_IDS { + assert_eq!(expected_id as u32, bus.allocate_device_id(None).unwrap()); + } + } + + #[test] + // Test that requesting specific ID work + fn allocate_device_id_request_id() -> std::result::Result<(), Box> { + // The first address is occupied by the root + let mut bus = setup_bus(); + let max_id = (NUM_DEVICE_IDS - 1).try_into()?; + assert_eq!(0x01_u32, bus.allocate_device_id(Some(0x01))?); + assert_eq!(0x10_u32, bus.allocate_device_id(Some(0x10))?); + assert_eq!(max_id as u32, bus.allocate_device_id(Some(max_id))?); + Ok(()) + } + + #[test] + // Test that requesting the same ID twice fails + fn allocate_device_id_request_id_twice_fails() + -> std::result::Result<(), Box> { + let mut bus = setup_bus(); + let max_id = (NUM_DEVICE_IDS - 1).try_into()?; + bus.allocate_device_id(Some(max_id))?; + let _result = bus.allocate_device_id(Some(max_id)); + assert!(matches!( + PciRootError::AlreadyInUsePciDeviceSlot(max_id.into()), + _result + )); + Ok(()) + } + + #[test] + // Test to request an invalid ID + fn allocate_device_id_request_invalid_id_fails() + -> std::result::Result<(), Box> { + let mut bus = setup_bus(); + let max_id = (NUM_DEVICE_IDS + 1).try_into()?; + let _result = bus.allocate_device_id(Some(max_id)); + assert!(matches!( + PciRootError::InvalidPciDeviceSlot(max_id.into()), + _result + )); + Ok(()) + } + + #[test] + // Test to acquire an ID when all IDs were already acquired + fn allocate_device_id_none_left() { + // The first address is occupied by the root + let mut bus = setup_bus(); + for expected_id in 1..NUM_DEVICE_IDS { + assert_eq!(expected_id as u32, bus.allocate_device_id(None).unwrap()); + } + let _result = bus.allocate_device_id(None); + assert!(matches!(PciRootError::NoPciDeviceSlotAvailable, _result)); + } + } +} diff --git a/vmm/src/pci_segment.rs b/vmm/src/pci_segment.rs index 345869c1da..d6995aa1c2 100644 --- a/vmm/src/pci_segment.rs +++ b/vmm/src/pci_segment.rs @@ -170,7 +170,7 @@ impl PciSegment { self.pci_bus .lock() .unwrap() - .next_device_id() + .allocate_device_id(None) .map_err(DeviceManagerError::NextPciDeviceId)? as u8, 0, )) From 9efc3ad56075ff52c3b56b337e3b8fae4328e8e9 Mon Sep 17 00:00:00 2001 From: Pascal Scholz Date: Fri, 7 Nov 2025 09:41:38 +0100 Subject: [PATCH 2/6] vmm: Allow for allocating specific PCI BDFs We replace `next_device_bdf` with `allocate_device_bdf`. The new function optionally accepts a device ID to use when allocating a BDF. If not provided, we fall back to allocating the next available BDF. This allows for creating specific BDFs on the respective segment. Signed-off-by: Pascal Scholz On-behalf-of: SAP pascal.scholz@sap.com --- pci/src/bus.rs | 6 +++--- vmm/src/device_manager.rs | 9 +++++---- vmm/src/pci_segment.rs | 13 ++++++++++--- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/pci/src/bus.rs b/pci/src/bus.rs index 8dd544fc3f..d5b4d189e0 100644 --- a/pci/src/bus.rs +++ b/pci/src/bus.rs @@ -169,8 +169,8 @@ impl PciBus { /// Allocates a PCI device ID on the bus. /// /// - `id`: ID to allocate on the bus. If [`None`], the next free - /// device ID on the bus is allocated, else the ID given is - /// allocated + /// device ID on the bus is allocated, else the ID given is + /// allocated /// /// ## Errors /// * Returns [`PciRootError::AlreadyInUsePciDeviceSlot`] in case @@ -191,7 +191,7 @@ impl PciBus { Err(PciRootError::AlreadyInUsePciDeviceSlot(id as usize)) } } else { - return Err(PciRootError::InvalidPciDeviceSlot(id as usize)); + Err(PciRootError::InvalidPciDeviceSlot(id as usize)) } } else { for (idx, device_id) in self.device_ids.iter_mut().enumerate() { diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index 3f09c6f828..8cd0c6a3ed 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -467,7 +467,7 @@ pub enum DeviceManagerError { /// Failed to find an available PCI device ID. #[error("Failed to find an available PCI device ID")] - NextPciDeviceId(#[source] pci::PciRootError), + AllocatePciDeviceId(#[source] pci::PciRootError), /// Could not reserve the PCI device ID. #[error("Could not reserve the PCI device ID")] @@ -3464,7 +3464,7 @@ impl DeviceManager { let pci_segment_id = 0x0_u16; let (pci_segment_id, pci_device_bdf, resources) = - self.pci_resources(&id, pci_segment_id)?; + self.pci_resources(&id, pci_segment_id, None)?; info!("Creating pvmemcontrol device: id = {}", id); let (pvmemcontrol_pci_device, pvmemcontrol_bus_device) = @@ -4244,7 +4244,7 @@ impl DeviceManager { info!("Creating ivshmem device {}", id); let (pci_segment_id, pci_device_bdf, resources) = - self.pci_resources(&id, pci_segment_id)?; + self.pci_resources(&id, pci_segment_id, None)?; let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); let ivshmem_ops = Arc::new(Mutex::new(IvshmemHandler { @@ -4315,7 +4315,8 @@ impl DeviceManager { (pci_segment_id, pci_device_bdf, resources) } else { - let pci_device_bdf = self.pci_segments[pci_segment_id as usize].next_device_bdf()?; + let pci_device_bdf = + self.pci_segments[pci_segment_id as usize].allocate_device_bdf(None)?; (pci_segment_id, pci_device_bdf, None) }) diff --git a/vmm/src/pci_segment.rs b/vmm/src/pci_segment.rs index d6995aa1c2..7477020e52 100644 --- a/vmm/src/pci_segment.rs +++ b/vmm/src/pci_segment.rs @@ -163,15 +163,22 @@ impl PciSegment { ) } - pub(crate) fn next_device_bdf(&self) -> DeviceManagerResult { + /// Allocates a device BDF on this PCI segment + /// + /// - `device_id`: Device ID to request for BDF allocation + /// + /// ## Errors + /// * [`DeviceManagerError::AllocatePciDeviceId`] if device ID + /// allocation on the bus fails. + pub(crate) fn allocate_device_bdf(&self, device_id: Option) -> DeviceManagerResult { Ok(PciBdf::new( self.id, 0, self.pci_bus .lock() .unwrap() - .allocate_device_id(None) - .map_err(DeviceManagerError::NextPciDeviceId)? as u8, + .allocate_device_id(device_id) + .map_err(DeviceManagerError::AllocatePciDeviceId)? as u8, 0, )) } From ee66fc1f2425e650908af2e853b304ba6ef0881d Mon Sep 17 00:00:00 2001 From: Pascal Scholz Date: Fri, 7 Nov 2025 09:45:22 +0100 Subject: [PATCH 3/6] vmm: Add tests for `allocate_device_bdf` in `PciSegment` Next to tests for `allocate_device_bdf`, we introduce a new constructor `new_without_address_manager`, only available in the test build. As there is no way to instantiate an `AddressManager` in the tests, we use this constructor to work around this. Signed-off-by: Pascal Scholz On-behalf-of: SAP pascal.scholz@sap.com --- vmm/src/pci_segment.rs | 155 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) diff --git a/vmm/src/pci_segment.rs b/vmm/src/pci_segment.rs index 7477020e52..104139fd20 100644 --- a/vmm/src/pci_segment.rs +++ b/vmm/src/pci_segment.rs @@ -208,6 +208,65 @@ impl PciSegment { Ok(()) } + + #[cfg(test)] + /// Creates a PciSegment without the need for an [`AddressManager`] + /// for testing purpose. + /// + /// An [`AddressManager`] would otherwise be required to create + /// [`PciBus`] instances. Instead, we use any struct that implements + /// [`DeviceRelocation`] to instantiate a [`PciBus`]. + pub(crate) fn new_without_address_manager( + id: u16, + numa_node: u32, + mem32_allocator: Arc>, + mem64_allocator: Arc>, + pci_irq_slots: &[u8; 32], + device_reloc: Arc, + ) -> DeviceManagerResult { + let pci_root = PciRoot::new(None); + let pci_bus = Arc::new(Mutex::new(PciBus::new(pci_root, device_reloc.clone()))); + + let pci_config_mmio = Arc::new(Mutex::new(PciConfigMmio::new(Arc::clone(&pci_bus)))); + let mmio_config_address = + layout::PCI_MMCONFIG_START.0 + layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT * id as u64; + + let start_of_mem32_area = mem32_allocator.lock().unwrap().base().0; + let end_of_mem32_area = mem32_allocator.lock().unwrap().end().0; + + let start_of_mem64_area = mem64_allocator.lock().unwrap().base().0; + let end_of_mem64_area = mem64_allocator.lock().unwrap().end().0; + + let segment = PciSegment { + id, + pci_bus, + pci_config_mmio, + mmio_config_address, + proximity_domain: numa_node, + pci_devices_up: 0, + pci_devices_down: 0, + #[cfg(target_arch = "x86_64")] + pci_config_io: None, + mem32_allocator, + mem64_allocator, + start_of_mem32_area, + end_of_mem32_area, + start_of_mem64_area, + end_of_mem64_area, + pci_irq_slots: *pci_irq_slots, + }; + + info!( + "Adding PCI segment: id={}, PCI MMIO config address: 0x{:x}, mem32 area [0x{:x}-0x{:x}, mem64 area [0x{:x}-0x{:x}", + segment.id, + segment.mmio_config_address, + segment.start_of_mem32_area, + segment.end_of_mem32_area, + segment.start_of_mem64_area, + segment.end_of_mem64_area + ); + Ok(segment) + } } struct PciDevSlot { @@ -480,3 +539,99 @@ impl Aml for PciSegment { .to_aml_bytes(sink) } } + +#[cfg(test)] +mod tests { + // Note this useful idiom: importing names from outer (for mod tests) scope. + use super::*; + + mod pci_segment_tests { + use vm_memory::GuestAddress; + + use super::*; + + #[derive(Debug)] + struct MocRelocDevice; + impl DeviceRelocation for MocRelocDevice { + fn move_bar( + &self, + _old_base: u64, + _new_base: u64, + _len: u64, + _pci_dev: &mut dyn pci::PciDevice, + _region_type: pci::PciBarRegionType, + ) -> std::result::Result<(), std::io::Error> { + Ok(()) + } + } + + fn setup() -> PciSegment { + let guest_addr = 0_u64; + let guest_size = 0x1000_usize; + let allocator_1 = Arc::new(Mutex::new( + AddressAllocator::new(GuestAddress(guest_addr), guest_size as u64).unwrap(), + )); + let allocator_2 = Arc::new(Mutex::new( + AddressAllocator::new(GuestAddress(guest_addr), guest_size as u64).unwrap(), + )); + let moc_device_reloc = Arc::new(MocRelocDevice {}); + let arr = [0_u8; 32]; + + PciSegment::new_without_address_manager( + 0, + 0, + allocator_1, + allocator_2, + &arr, + moc_device_reloc, + ) + .unwrap() + } + + #[test] + // Test the default bdf for an an segment with an empty bus (except for the root device) + fn allocate_device_bdf_default() { + // The first address is occupied by the root + let segment = setup(); + let bdf = segment.allocate_device_bdf(None).unwrap(); + assert_eq!(bdf.segment(), segment.id); + assert_eq!(bdf.bus(), 0); + assert_eq!(bdf.device(), 1); + assert_eq!(bdf.function(), 0); + } + + #[test] + // Test to acquire a bdf with s specific device ID + fn allocate_device_bdf_fixed_device_id() { + // The first address is occupied by the root + let expect_device_id = 0x10_u8; + let segment = setup(); + let bdf = segment.allocate_device_bdf(Some(expect_device_id)).unwrap(); + assert_eq!(bdf.segment(), segment.id); + assert_eq!(bdf.bus(), 0); + assert_eq!(bdf.device(), expect_device_id); + assert_eq!(bdf.function(), 0); + } + + #[test] + // Test to acquire a bdf with invalid device id, one already + // taken and the other being greater then the number of allowed + // devices per bus. + fn allocate_device_bdf_invalid_device_id() { + // The first address is occupied by the root + let already_taken_device_id = 0x0_u8; + let overflow_device_id = 0xff_u8; + let segment = setup(); + let bdf_res = segment.allocate_device_bdf(Some(already_taken_device_id)); + assert!(matches!( + bdf_res, + Err(DeviceManagerError::AllocatePciDeviceId(_)) + )); + let bdf_res = segment.allocate_device_bdf(Some(overflow_device_id)); + assert!(matches!( + bdf_res, + Err(DeviceManagerError::AllocatePciDeviceId(_)) + )); + } + } +} From fbd3b93299fd434a4de9bf556a1f64cffe3df458 Mon Sep 17 00:00:00 2001 From: Pascal Scholz Date: Fri, 7 Nov 2025 09:54:03 +0100 Subject: [PATCH 4/6] vmm: Introduce `addr` argument to PCI device configs and update parser Updates all config structs in order to make the new config option available to all PCI device. Additionally update the parser so the new option becomes available on the CLI. Signed-off-by: Pascal Scholz On-behalf-of: SAP pascal.scholz@sap.com --- src/main.rs | 1 + vmm/src/config.rs | 134 +++++++++++++++++++++++++++++++++----- vmm/src/device_manager.rs | 29 +++++++-- vmm/src/lib.rs | 1 + vmm/src/vm_config.rs | 12 ++++ 5 files changed, 154 insertions(+), 23 deletions(-) diff --git a/src/main.rs b/src/main.rs index d096f28cb1..0be6b36af6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -993,6 +993,7 @@ mod unit_tests { rng: RngConfig { src: PathBuf::from("/dev/urandom"), iommu: false, + bdf_device_id: None, }, balloon: None, fs: None, diff --git a/vmm/src/config.rs b/vmm/src/config.rs index 00d26ec881..814c0c9f7a 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -1068,6 +1068,19 @@ impl RateLimiterGroupConfig { } } +fn parse_addr(s: &str) -> std::result::Result<(u8, u8), OptionParserError> { + let (a, b) = s + .split_once('.') + .ok_or(OptionParserError::InvalidSyntax(s.to_owned()))?; + let a = a + .parse::() + .map_err(|_| OptionParserError::InvalidSyntax(s.to_owned()))?; + let b = b + .parse::() + .map_err(|_| OptionParserError::InvalidSyntax(s.to_owned()))?; + Ok((a, b)) +} + impl DiskConfig { pub const SYNTAX: &'static str = "Disk parameters \ \"path=,readonly=on|off,direct=on|off,iommu=on|off,\ @@ -1077,7 +1090,7 @@ impl DiskConfig { ops_size=,ops_one_time_burst=,ops_refill_time=,\ id=,pci_segment=,rate_limit_group=,\ queue_affinity=,\ - serial="; + serial=, addr="; pub fn parse(disk: &str) -> Result { let mut parser = OptionParser::new(); @@ -1102,7 +1115,8 @@ impl DiskConfig { .add("pci_segment") .add("serial") .add("rate_limit_group") - .add("queue_affinity"); + .add("queue_affinity") + .add("addr"); parser.parse(disk).map_err(Error::ParseDisk)?; let path = parser.get("path").map(PathBuf::from); @@ -1214,6 +1228,13 @@ impl DiskConfig { None }; + let (bdf_device_id, _bdf_function) = if let Some(s) = parser.get("addr") { + let (a, b) = parse_addr(s.as_str()).map_err(Error::ParseDisk)?; + (Some(a), Some(b)) + } else { + (None, None) + }; + Ok(DiskConfig { path, readonly, @@ -1231,6 +1252,7 @@ impl DiskConfig { pci_segment, serial, queue_affinity, + bdf_device_id, }) } @@ -1302,7 +1324,7 @@ impl NetConfig { vhost_user=,socket=,vhost_mode=client|server,\ bw_size=,bw_one_time_burst=,bw_refill_time=,\ ops_size=,ops_one_time_burst=,ops_refill_time=,pci_segment=\ - offload_tso=on|off,offload_ufo=on|off,offload_csum=on|off\""; + offload_tso=on|off,offload_ufo=on|off,offload_csum=on|off, addr=DD.F\""; pub fn parse(net: &str) -> Result { let mut parser = OptionParser::new(); @@ -1331,7 +1353,8 @@ impl NetConfig { .add("ops_size") .add("ops_one_time_burst") .add("ops_refill_time") - .add("pci_segment"); + .add("pci_segment") + .add("addr"); parser.parse(net).map_err(Error::ParseNetwork)?; let tap = parser.get("tap"); @@ -1447,6 +1470,13 @@ impl NetConfig { None }; + let (bdf_device_id, _bdf_function) = if let Some(s) = parser.get("addr") { + let (a, b) = parse_addr(s.as_str()).map_err(Error::ParseDisk)?; + (Some(a), Some(b)) + } else { + (None, None) + }; + let config = NetConfig { tap, ip, @@ -1467,6 +1497,7 @@ impl NetConfig { offload_tso, offload_ufo, offload_csum, + bdf_device_id, }; Ok(config) } @@ -1531,7 +1562,7 @@ impl NetConfig { impl RngConfig { pub fn parse(rng: &str) -> Result { let mut parser = OptionParser::new(); - parser.add("src").add("iommu"); + parser.add("src").add("iommu").add("addr"); parser.parse(rng).map_err(Error::ParseRng)?; let src = PathBuf::from( @@ -1545,19 +1576,30 @@ impl RngConfig { .unwrap_or(Toggle(false)) .0; - Ok(RngConfig { src, iommu }) + let (bdf_device_id, _bdf_function) = if let Some(s) = parser.get("addr") { + let (a, b) = parse_addr(s.as_str()).map_err(Error::ParseDisk)?; + (Some(a), Some(b)) + } else { + (None, None) + }; + + Ok(RngConfig { + src, + iommu, + bdf_device_id, + }) } } impl BalloonConfig { pub const SYNTAX: &'static str = "Balloon parameters \"size=,deflate_on_oom=on|off,\ - free_page_reporting=on|off\""; + free_page_reporting=on|off,addr=\""; pub fn parse(balloon: &str) -> Result { let mut parser = OptionParser::new(); parser.add("size"); parser.add("deflate_on_oom"); - parser.add("free_page_reporting"); + parser.add("free_page_reporting").add("addr"); parser.parse(balloon).map_err(Error::ParseBalloon)?; let size = parser @@ -1578,10 +1620,18 @@ impl BalloonConfig { .unwrap_or(Toggle(false)) .0; + let (bdf_device_id, _bdf_function) = if let Some(s) = parser.get("addr") { + let (a, b) = parse_addr(s.as_str()).map_err(Error::ParseDisk)?; + (Some(a), Some(b)) + } else { + (None, None) + }; + Ok(BalloonConfig { size, deflate_on_oom, free_page_reporting, + bdf_device_id, }) } } @@ -1589,7 +1639,8 @@ impl BalloonConfig { impl FsConfig { pub const SYNTAX: &'static str = "virtio-fs parameters \ \"tag=,socket=,num_queues=,\ - queue_size=,id=,pci_segment=\""; + queue_size=,id=,pci_segment=,\ + addr=\""; pub fn parse(fs: &str) -> Result { let mut parser = OptionParser::new(); @@ -1599,7 +1650,8 @@ impl FsConfig { .add("num_queues") .add("socket") .add("id") - .add("pci_segment"); + .add("pci_segment") + .add("addr"); parser.parse(fs).map_err(Error::ParseFileSystem)?; let tag = parser.get("tag").ok_or(Error::ParseFsTagMissing)?; @@ -1624,6 +1676,13 @@ impl FsConfig { .map_err(Error::ParseFileSystem)? .unwrap_or_default(); + let (bdf_device_id, _bdf_function) = if let Some(s) = parser.get("addr") { + let (a, b) = parse_addr(s.as_str()).map_err(Error::ParseDisk)?; + (Some(a), Some(b)) + } else { + (None, None) + }; + Ok(FsConfig { tag, socket, @@ -1631,6 +1690,7 @@ impl FsConfig { queue_size, id, pci_segment, + bdf_device_id, }) } @@ -1756,7 +1816,7 @@ impl FwCfgItem { impl PmemConfig { pub const SYNTAX: &'static str = "Persistent memory parameters \ \"file=,size=,iommu=on|off,\ - discard_writes=on|off,id=,pci_segment=\""; + discard_writes=on|off,id=,pci_segment=,addr=\""; pub fn parse(pmem: &str) -> Result { let mut parser = OptionParser::new(); @@ -1766,7 +1826,8 @@ impl PmemConfig { .add("iommu") .add("discard_writes") .add("id") - .add("pci_segment"); + .add("pci_segment") + .add("addr"); parser.parse(pmem).map_err(Error::ParsePersistentMemory)?; let file = PathBuf::from(parser.get("file").ok_or(Error::ParsePmemFileMissing)?); @@ -1790,6 +1851,13 @@ impl PmemConfig { .map_err(Error::ParsePersistentMemory)? .unwrap_or_default(); + let (bdf_device_id, _bdf_function) = if let Some(s) = parser.get("addr") { + let (a, b) = parse_addr(s.as_str()).map_err(Error::ParseDisk)?; + (Some(a), Some(b)) + } else { + (None, None) + }; + Ok(PmemConfig { file, size, @@ -1797,6 +1865,7 @@ impl PmemConfig { discard_writes, id, pci_segment, + bdf_device_id, }) } @@ -1942,7 +2011,8 @@ impl DebugConsoleConfig { } impl DeviceConfig { - pub const SYNTAX: &'static str = "Direct device assignment parameters \"path=,iommu=on|off,id=,pci_segment=\""; + pub const SYNTAX: &'static str = "Direct device assignment parameters \"\ + path=,iommu=on|off,id=,pci_segment=\""; pub fn parse(device: &str) -> Result { let mut parser = OptionParser::new(); @@ -2046,7 +2116,7 @@ impl UserDeviceConfig { impl VdpaConfig { pub const SYNTAX: &'static str = "vDPA device \ \"path=,num_queues=,iommu=on|off,\ - id=,pci_segment=\""; + id=,pci_segment=,addr=\""; pub fn parse(vdpa: &str) -> Result { let mut parser = OptionParser::new(); @@ -2055,7 +2125,8 @@ impl VdpaConfig { .add("num_queues") .add("iommu") .add("id") - .add("pci_segment"); + .add("pci_segment") + .add("addr"); parser.parse(vdpa).map_err(Error::ParseVdpa)?; let path = parser @@ -2077,12 +2148,20 @@ impl VdpaConfig { .map_err(Error::ParseVdpa)? .unwrap_or_default(); + let (bdf_device_id, _bdf_function) = if let Some(s) = parser.get("addr") { + let (a, b) = parse_addr(s.as_str()).map_err(Error::ParseDisk)?; + (Some(a), Some(b)) + } else { + (None, None) + }; + Ok(VdpaConfig { path, num_queues, iommu, id, pci_segment, + bdf_device_id, }) } @@ -2106,7 +2185,8 @@ impl VdpaConfig { impl VsockConfig { pub const SYNTAX: &'static str = "Virtio VSOCK parameters \ - \"cid=,socket=,iommu=on|off,id=,pci_segment=\""; + \"cid=,socket=,iommu=on|off,id=,\ + pci_segment=,addr=\""; pub fn parse(vsock: &str) -> Result { let mut parser = OptionParser::new(); @@ -2115,7 +2195,8 @@ impl VsockConfig { .add("cid") .add("iommu") .add("id") - .add("pci_segment"); + .add("pci_segment") + .add("addr"); parser.parse(vsock).map_err(Error::ParseVsock)?; let socket = parser @@ -2137,12 +2218,20 @@ impl VsockConfig { .map_err(Error::ParseVsock)? .unwrap_or_default(); + let (bdf_device_id, _bdf_function) = if let Some(s) = parser.get("addr") { + let (a, b) = parse_addr(s.as_str()).map_err(Error::ParseDisk)?; + (Some(a), Some(b)) + } else { + (None, None) + }; + Ok(VsockConfig { cid, socket, iommu, id, pci_segment, + bdf_device_id, }) } @@ -3453,6 +3542,7 @@ mod tests { pci_segment: 0, serial: None, queue_affinity: None, + bdf_device_id: None, } } @@ -3571,6 +3661,7 @@ mod tests { offload_tso: true, offload_ufo: true, offload_csum: true, + bdf_device_id: None, } } @@ -3653,6 +3744,7 @@ mod tests { RngConfig { src: PathBuf::from("/dev/random"), iommu: true, + bdf_device_id: None, } ); assert_eq!( @@ -3673,6 +3765,7 @@ mod tests { queue_size: 1024, id: None, pci_segment: 0, + bdf_device_id: None, } } @@ -3703,6 +3796,7 @@ mod tests { discard_writes: false, id: None, pci_segment: 0, + bdf_device_id: None, } } @@ -3867,6 +3961,7 @@ mod tests { iommu: false, id: None, pci_segment: 0, + bdf_device_id: None, } } @@ -3911,6 +4006,7 @@ mod tests { iommu: false, id: None, pci_segment: 0, + bdf_device_id: None, } ); assert_eq!( @@ -3921,6 +4017,7 @@ mod tests { iommu: true, id: None, pci_segment: 0, + bdf_device_id: None, } ); Ok(()) @@ -4173,6 +4270,7 @@ mod tests { rng: RngConfig { src: PathBuf::from("/dev/urandom"), iommu: false, + bdf_device_id: None, }, balloon: None, fs: None, @@ -4485,6 +4583,7 @@ mod tests { id: None, iommu: true, pci_segment: 1, + bdf_device_id: None, }); still_valid_config.validate().unwrap(); @@ -4561,6 +4660,7 @@ mod tests { id: None, iommu: false, pci_segment: 1, + bdf_device_id: None, }); assert_eq!( invalid_config.validate(), diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index 8cd0c6a3ed..3c505e0a78 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -892,6 +892,7 @@ struct MetaVirtioDevice { iommu: bool, id: String, pci_segment: u16, + bdf_device: Option, dma_handler: Option>, } @@ -1640,6 +1641,7 @@ impl DeviceManager { handle.id, handle.pci_segment, handle.dma_handler, + handle.bdf_device, )?; if handle.iommu { @@ -1668,7 +1670,8 @@ impl DeviceManager { } if let Some(iommu_device) = iommu_device { - let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?; + let dev_id = + self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None, None)?; self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); } } @@ -2381,6 +2384,7 @@ impl DeviceManager { id: id.clone(), pci_segment: 0, dma_handler: None, + bdf_device: None, }); // Fill the device tree with a new node. In case of restore, we @@ -2834,6 +2838,7 @@ impl DeviceManager { id, pci_segment: disk_cfg.pci_segment, dma_handler: None, + bdf_device: disk_cfg.bdf_device_id, }) } @@ -3011,6 +3016,7 @@ impl DeviceManager { id, pci_segment: net_cfg.pci_segment, dma_handler: None, + bdf_device: net_cfg.bdf_device_id, }) } @@ -3058,6 +3064,7 @@ impl DeviceManager { id: id.clone(), pci_segment: 0, dma_handler: None, + bdf_device: None, }); // Fill the device tree with a new node. In case of restore, we @@ -3119,6 +3126,7 @@ impl DeviceManager { id, pci_segment: fs_cfg.pci_segment, dma_handler: None, + bdf_device: fs_cfg.bdf_device_id, }) } else { Err(DeviceManagerError::NoVirtioFsSock) @@ -3308,6 +3316,7 @@ impl DeviceManager { id, pci_segment: pmem_cfg.pci_segment, dma_handler: None, + bdf_device: pmem_cfg.bdf_device_id, }) } @@ -3379,6 +3388,7 @@ impl DeviceManager { id, pci_segment: vsock_cfg.pci_segment, dma_handler: None, + bdf_device: vsock_cfg.bdf_device_id, }) } @@ -3438,6 +3448,7 @@ impl DeviceManager { id: memory_zone_id.clone(), pci_segment: 0, dma_handler: None, + bdf_device: None, }); // Fill the device tree with a new node. In case of restore, we @@ -3527,6 +3538,7 @@ impl DeviceManager { id: id.clone(), pci_segment: 0, dma_handler: None, + bdf_device: balloon_config.bdf_device_id, }); self.device_tree @@ -3568,6 +3580,7 @@ impl DeviceManager { id: id.clone(), pci_segment: 0, dma_handler: None, + bdf_device: None, }); self.device_tree @@ -3626,6 +3639,7 @@ impl DeviceManager { id, pci_segment: vdpa_cfg.pci_segment, dma_handler: Some(vdpa_mapping), + bdf_device: vdpa_cfg.bdf_device_id, }) } @@ -3712,7 +3726,7 @@ impl DeviceManager { }; let (pci_segment_id, pci_device_bdf, resources) = - self.pci_resources(&vfio_name, device_cfg.pci_segment)?; + self.pci_resources(&vfio_name, device_cfg.pci_segment, None)?; let mut needs_dma_mapping = false; @@ -3949,7 +3963,7 @@ impl DeviceManager { }; let (pci_segment_id, pci_device_bdf, resources) = - self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?; + self.pci_resources(&vfio_user_name, device_cfg.pci_segment, None)?; let legacy_interrupt_group = if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { @@ -4061,6 +4075,7 @@ impl DeviceManager { virtio_device_id: String, pci_segment_id: u16, dma_handler: Option>, + bdf_device: Option, ) -> DeviceManagerResult { let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}"); @@ -4069,7 +4084,7 @@ impl DeviceManager { node.children = vec![virtio_device_id.clone()]; let (pci_segment_id, pci_device_bdf, resources) = - self.pci_resources(&id, pci_segment_id)?; + self.pci_resources(&id, pci_segment_id, bdf_device)?; // Update the existing virtio node by setting the parent. if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) { @@ -4206,7 +4221,7 @@ impl DeviceManager { info!("Creating pvpanic device {}", id); let (pci_segment_id, pci_device_bdf, resources) = - self.pci_resources(&id, pci_segment_id)?; + self.pci_resources(&id, pci_segment_id, None)?; let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); @@ -4289,6 +4304,7 @@ impl DeviceManager { &self, id: &str, pci_segment_id: u16, + pci_device_id: Option, ) -> DeviceManagerResult<(u16, PciBdf, Option>)> { // Look for the id in the device tree. If it can be found, that means // the device is being restored, otherwise it's created from scratch. @@ -4316,7 +4332,7 @@ impl DeviceManager { (pci_segment_id, pci_device_bdf, resources) } else { let pci_device_bdf = - self.pci_segments[pci_segment_id as usize].allocate_device_bdf(None)?; + self.pci_segments[pci_segment_id as usize].allocate_device_bdf(pci_device_id)?; (pci_segment_id, pci_device_bdf, None) }) @@ -4796,6 +4812,7 @@ impl DeviceManager { handle.id.clone(), handle.pci_segment, handle.dma_handler, + None, )?; // Update the PCIU bitmap diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index ed4c8b36f3..b64b00c1f6 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -2695,6 +2695,7 @@ mod unit_tests { rng: RngConfig { src: PathBuf::from("/dev/urandom"), iommu: false, + bdf_device_id: None, }, balloon: None, fs: None, diff --git a/vmm/src/vm_config.rs b/vmm/src/vm_config.rs index d7062e29ea..c5b30c9d66 100644 --- a/vmm/src/vm_config.rs +++ b/vmm/src/vm_config.rs @@ -276,6 +276,10 @@ pub struct DiskConfig { pub serial: Option, #[serde(default)] pub queue_affinity: Option>, + // #[serde(default)] + // pub bus_hint: Option, + #[serde(default)] + pub bdf_device_id: Option, } impl ApplyLandlock for DiskConfig { @@ -341,6 +345,7 @@ pub struct NetConfig { pub offload_ufo: bool, #[serde(default = "default_netconfig_true")] pub offload_csum: bool, + pub bdf_device_id: Option, } pub fn default_netconfig_true() -> bool { @@ -401,6 +406,7 @@ pub struct RngConfig { pub src: PathBuf, #[serde(default)] pub iommu: bool, + pub bdf_device_id: Option, } pub const DEFAULT_RNG_SOURCE: &str = "/dev/urandom"; @@ -410,6 +416,7 @@ impl Default for RngConfig { RngConfig { src: PathBuf::from(DEFAULT_RNG_SOURCE), iommu: false, + bdf_device_id: None, } } } @@ -431,6 +438,7 @@ pub struct BalloonConfig { /// Option to enable free page reporting from the guest. #[serde(default)] pub free_page_reporting: bool, + pub bdf_device_id: Option, } #[cfg(feature = "pvmemcontrol")] @@ -449,6 +457,7 @@ pub struct FsConfig { pub id: Option, #[serde(default)] pub pci_segment: u16, + pub bdf_device_id: Option, } pub fn default_fsconfig_num_queues() -> usize { @@ -479,6 +488,7 @@ pub struct PmemConfig { pub id: Option, #[serde(default)] pub pci_segment: u16, + pub bdf_device_id: Option, } impl ApplyLandlock for PmemConfig { @@ -614,6 +624,7 @@ pub struct VdpaConfig { pub id: Option, #[serde(default)] pub pci_segment: u16, + pub bdf_device_id: Option, } pub fn default_vdpaconfig_num_queues() -> usize { @@ -637,6 +648,7 @@ pub struct VsockConfig { pub id: Option, #[serde(default)] pub pci_segment: u16, + pub bdf_device_id: Option, } impl ApplyLandlock for VsockConfig { From 1ca7c38f1734991e5b04885374ddcd6f80191c63 Mon Sep 17 00:00:00 2001 From: Pascal Scholz Date: Fri, 7 Nov 2025 14:37:28 +0100 Subject: [PATCH 5/6] vmm: Add tests for BDF device address parsing in configs Signed-off-by: Pascal Scholz On-behalf-of: SAP pascal.scholz@sap.com --- src/main.rs | 52 +++++++++++++++++++++++++++++++++--------- vmm/src/config.rs | 57 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 10 deletions(-) diff --git a/src/main.rs b/src/main.rs index 0be6b36af6..6a04af1dca 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1212,6 +1212,24 @@ mod unit_tests { }"#, true, ), + ( + vec![ + "cloud-hypervisor", + "--kernel", + "/path/to/kernel", + "--disk", + "path=/path/to/disk/1,addr=15.0", + "path=/path/to/disk/2", + ], + r#"{ + "payload": {"kernel": "/path/to/kernel"}, + "disks": [ + {"path": "/path/to/disk/1", "bdf_device_id": 15}, + {"path": "/path/to/disk/2"} + ] + }"#, + true, + ), ( vec![ "cloud-hypervisor", @@ -1423,6 +1441,20 @@ mod unit_tests { }"#, true, ), + ( + vec![ + "cloud-hypervisor", "--kernel", "/path/to/kernel", + "--net", + "mac=12:34:56:78:90:ab,host_mac=34:56:78:90:ab:cd,tap=tap0,ip=1.2.3.4,mask=5.6.7.8,addr=15.0", + ], + r#"{ + "payload": {"kernel": "/path/to/kernel"}, + "net": [ + {"mac": "12:34:56:78:90:ab", "host_mac": "34:56:78:90:ab:cd", "tap": "tap0", "ip": "1.2.3.4", "mask": "5.6.7.8", "num_queues": 2, "queue_size": 256, "bdf_device_id": 15} + ] + }"#, + true, + ), #[cfg(target_arch = "x86_64")] ( vec![ @@ -1494,11 +1526,11 @@ mod unit_tests { "--kernel", "/path/to/kernel", "--rng", - "src=/path/to/entropy/source", + "src=/path/to/entropy/source,addr=15.0", ], r#"{ "payload": {"kernel": "/path/to/kernel"}, - "rng": {"src": "/path/to/entropy/source"} + "rng": {"src": "/path/to/entropy/source", "bdf_device_id": 15} }"#, true, )] @@ -1515,14 +1547,14 @@ mod unit_tests { "cloud-hypervisor", "--kernel", "/path/to/kernel", "--memory", "shared=true", "--fs", - "tag=virtiofs1,socket=/path/to/sock1", + "tag=virtiofs1,socket=/path/to/sock1,addr=15.0", "tag=virtiofs2,socket=/path/to/sock2", ], r#"{ "payload": {"kernel": "/path/to/kernel"}, "memory" : { "shared": true, "size": 536870912 }, "fs": [ - {"tag": "virtiofs1", "socket": "/path/to/sock1"}, + {"tag": "virtiofs1", "socket": "/path/to/sock1", "bdf_device_id": 15}, {"tag": "virtiofs2", "socket": "/path/to/sock2"} ] }"#, @@ -1594,13 +1626,13 @@ mod unit_tests { "--kernel", "/path/to/kernel", "--pmem", - "file=/path/to/img/1,size=1G", + "file=/path/to/img/1,size=1G,addr=15.0", "file=/path/to/img/2,size=2G", ], r#"{ "payload": {"kernel": "/path/to/kernel"}, "pmem": [ - {"file": "/path/to/img/1", "size": 1073741824}, + {"file": "/path/to/img/1", "size": 1073741824,"bdf_device_id": 15}, {"file": "/path/to/img/2", "size": 2147483648} ] }"#, @@ -1878,13 +1910,13 @@ mod unit_tests { "--kernel", "/path/to/kernel", "--vdpa", - "path=/path/to/device/1", + "path=/path/to/device/1,addr=15.0", "path=/path/to/device/2,num_queues=2", ], r#"{ "payload": {"kernel": "/path/to/kernel"}, "vdpa": [ - {"path": "/path/to/device/1", "num_queues": 1}, + {"path": "/path/to/device/1", "num_queues": 1, "bdf_device_id": 15}, {"path": "/path/to/device/2", "num_queues": 2} ] }"#, @@ -1923,11 +1955,11 @@ mod unit_tests { "--kernel", "/path/to/kernel", "--vsock", - "cid=123,socket=/path/to/sock/1", + "cid=123,socket=/path/to/sock/1,addr=15.0", ], r#"{ "payload": {"kernel": "/path/to/kernel"}, - "vsock": {"cid": 123, "socket": "/path/to/sock/1"} + "vsock": {"cid": 123, "socket": "/path/to/sock/1", "bdf_device_id": 15} }"#, true, ), diff --git a/vmm/src/config.rs b/vmm/src/config.rs index 814c0c9f7a..0d97d7ba06 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -3637,6 +3637,13 @@ mod tests { ..disk_fixture() } ); + assert_eq!( + DiskConfig::parse("path=/path/to_file,addr=15.0")?, + DiskConfig { + bdf_device_id: Some(15), + ..disk_fixture() + } + ); Ok(()) } @@ -3726,6 +3733,14 @@ mod tests { } ); + assert_eq!( + NetConfig::parse("mac=de:ad:be:ef:12:34,host_mac=12:34:de:ad:be:ef,addr=15.0")?, + NetConfig { + bdf_device_id: Some(15), + ..net_fixture() + } + ); + Ok(()) } @@ -3754,6 +3769,13 @@ mod tests { ..Default::default() } ); + assert_eq!( + RngConfig::parse("addr=15.0")?, + RngConfig { + bdf_device_id: Some(15), + ..Default::default() + } + ); Ok(()) } @@ -3785,6 +3807,14 @@ mod tests { } ); + assert_eq!( + FsConfig::parse("tag=mytag,socket=/tmp/sock,addr=15.0")?, + FsConfig { + bdf_device_id: Some(15), + ..fs_fixture() + } + ); + Ok(()) } @@ -3824,6 +3854,13 @@ mod tests { ..pmem_fixture() } ); + assert_eq!( + PmemConfig::parse("file=/tmp/pmem,size=128M,addr=15.0")?, + PmemConfig { + bdf_device_id: Some(15), + ..pmem_fixture() + } + ); Ok(()) } @@ -3978,6 +4015,13 @@ mod tests { ..vdpa_fixture() } ); + assert_eq!( + VdpaConfig::parse("path=/dev/vhost-vdpa,addr=15.0")?, + VdpaConfig { + bdf_device_id: Some(15), + ..vdpa_fixture() + } + ); Ok(()) } @@ -4020,6 +4064,18 @@ mod tests { bdf_device_id: None, } ); + + assert_eq!( + VsockConfig::parse("socket=/tmp/sock,cid=3,iommu=on,addr=15.0")?, + VsockConfig { + cid: 3, + socket: PathBuf::from("/tmp/sock"), + iommu: true, + id: None, + pci_segment: 0, + bdf_device_id: Some(15), + } + ); Ok(()) } @@ -4097,6 +4153,7 @@ mod tests { id: Some("net0".to_owned()), num_queues: 2, fds: Some(vec![-1, -1, -1, -1]), + bdf_device_id: Some(15), ..net_fixture() }, NetConfig { From 30358681ce2c8fdefd1d745a6ef1abf278c11b8e Mon Sep 17 00:00:00 2001 From: Pascal Scholz Date: Mon, 10 Nov 2025 14:09:33 +0100 Subject: [PATCH 6/6] vmm: Panic if a PCI function ID other than 0 is used Currently we do not support multi function PCI devices, so we only support assigning function IDs equal to 0. --- vmm/src/config.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/vmm/src/config.rs b/vmm/src/config.rs index 0d97d7ba06..3e66ca6b92 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -1078,7 +1078,15 @@ fn parse_addr(s: &str) -> std::result::Result<(u8, u8), OptionParserError> { let b = b .parse::() .map_err(|_| OptionParserError::InvalidSyntax(s.to_owned()))?; - Ok((a, b)) + let f = if b != 0 { + Err(OptionParserError::InvalidValue(format!( + "invalid device function ID: {b}" + ))) + } else { + Ok(b) + }?; + + Ok((a, f)) } impl DiskConfig {