From d7366ac9562a2c281dbb37ee6a075a3315a4dd7e Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Wed, 20 May 2026 16:45:13 +0200 Subject: [PATCH 1/3] fix(tests): fix compilation in integration test Previous commit (cd3fe9a7) changed the signature of ArchVm::get_dirty_bitmap() to get a page_size argument, but corresponding integration test was not updated to match this change. Signed-off-by: Babis Chalios --- src/vmm/tests/integration_tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vmm/tests/integration_tests.rs b/src/vmm/tests/integration_tests.rs index 903b152f50b..c114ebbf411 100644 --- a/src/vmm/tests/integration_tests.rs +++ b/src/vmm/tests/integration_tests.rs @@ -138,7 +138,7 @@ fn test_dirty_bitmap_success() { for (vmm, _) in vmms { // Let it churn for a while and dirty some pages... thread::sleep(Duration::from_millis(100)); - let bitmap = vmm.lock().unwrap().vm.get_dirty_bitmap().unwrap(); + let bitmap = vmm.lock().unwrap().vm.get_dirty_bitmap(4096).unwrap(); let num_dirty_pages: u32 = bitmap .values() .map(|bitmap_per_region| { From 1b5225907477d27d34a0359a2fcbd05d33567dcc Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Thu, 21 May 2026 14:21:01 +0200 Subject: [PATCH 2/3] fix(discard_range): require page size alignment for region GuestRegionMmapExt::discard_range() is used to deallocate guest memory that we don't use any more, for example when we use balloon inflation or free page reporting/hinting. There is the implicit requirement that the range we are discarding is aligned (both starting address and lenght) to the page size used to back the guest memory. If this alignment is not respected by the caller, we can end up with undefined behaviour. For example, if we use huge pages to back memory but we receive from the guest regions to discard that are 4K pages aligned, we might end up removing memory that we are not meant to. This currently doesn't happen but the requirement is not explicitly encoded in the type system. Add a check for these requirements and return an error when they are not met. This way, we can't shoot ourselves in the foot in the future. Signed-off-by: Babis Chalios --- src/vmm/src/vstate/memory.rs | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/src/vmm/src/vstate/memory.rs b/src/vmm/src/vstate/memory.rs index 86545209e88..0bef82815c9 100644 --- a/src/vmm/src/vstate/memory.rs +++ b/src/vmm/src/vstate/memory.rs @@ -6,7 +6,7 @@ // found in the THIRD-PARTY file. use std::fs::File; -use std::io::SeekFrom; +use std::io::{self, SeekFrom}; use std::ops::Deref; use std::sync::{Arc, Mutex}; @@ -24,7 +24,7 @@ pub use vm_memory::{ use vm_memory::{GuestMemoryError, GuestMemoryRegionBytes, VolatileSlice, WriteVolatile}; use vmm_sys_util::errno; -use crate::utils::{get_page_size, u64_to_usize}; +use crate::utils::{get_page_size, u64_to_usize, usize_to_u64}; use crate::vmm_config::machine_config::HugePageConfig; use crate::vstate::vm::VmError; use crate::{DirtyBitmap, Vm}; @@ -388,8 +388,28 @@ impl GuestRegionMmapExt { caddr: MemoryRegionAddress, len: usize, ) -> Result<(), GuestMemoryError> { - let phys_address = self.get_host_address(caddr)?; + let region_page_size = if self.flags() & HugePageConfig::Hugetlbfs2M.mmap_flags() != 0 { + HugePageConfig::Hugetlbfs2M.page_size() + } else { + HugePageConfig::None.page_size() + }; + let end = caddr + .0 + .checked_add(usize_to_u64(len)) + .ok_or(GuestMemoryError::GuestAddressOverflow)?; + + // Both the starting address and the length of the region we want to discard needs to be + // page alinged, otherwise we won't be able to actually discard the memory. + if !caddr.0.is_multiple_of(usize_to_u64(region_page_size)) + || !end.is_multiple_of(usize_to_u64(region_page_size)) + { + return Err(GuestMemoryError::IOError( + io::ErrorKind::InvalidInput.into(), + )); + } + + let phys_address = self.get_host_address(caddr)?; match (self.inner.file_offset(), self.inner.flags()) { // If and only if we are resuming from a snapshot file, we have a file and it's mapped // private From ecc1b51b90d3c820a62dfe90c950c27364bae549 Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Tue, 19 May 2026 21:01:36 -0700 Subject: [PATCH 3/3] fix(memory): punch holes for shared discard ranges Use fallocate(PUNCH_HOLE|KEEP_SIZE) for MAP_SHARED file-backed guest memory so memfd-backed balloon hinting/reporting clears the shared backing instead of only dropping PTEs. Signed-off-by: Babis Chalios --- src/vmm/src/test_utils/mod.rs | 17 ++++++ src/vmm/src/vstate/memory.rs | 108 ++++++++++++++++++++++++++++++---- 2 files changed, 115 insertions(+), 10 deletions(-) diff --git a/src/vmm/src/test_utils/mod.rs b/src/vmm/src/test_utils/mod.rs index 41809b71b34..6cc5050d91a 100644 --- a/src/vmm/src/test_utils/mod.rs +++ b/src/vmm/src/test_utils/mod.rs @@ -3,6 +3,7 @@ #![allow(missing_docs)] +use std::fs::File; use std::sync::{Arc, Mutex}; use vm_memory::{GuestAddress, GuestRegionCollection}; @@ -53,6 +54,22 @@ pub fn multi_region_mem(regions: &[(GuestAddress, usize)]) -> GuestMemoryMmap { .unwrap() } +/// Creates a [`GuestMemoryMmap`] with multiple regions and without dirty page tracking. +pub fn multi_region_mem_memfd( + regions: &[(GuestAddress, usize)], + huge_page_cfg: HugePageConfig, +) -> (GuestMemoryMmap, Arc) { + let (reg, file) = memory::memfd_backed(regions, false, huge_page_cfg).unwrap(); + let mem = GuestRegionCollection::from_regions( + reg.into_iter() + .map(|region| GuestRegionMmapExt::dram_from_mmap_region(region, 0)) + .collect(), + ) + .unwrap(); + + (mem, file) +} + pub fn multi_region_mem_raw(regions: &[(GuestAddress, usize)]) -> Vec { memory::anonymous(regions.iter().copied(), false, HugePageConfig::None) .expect("Cannot initialize memory") diff --git a/src/vmm/src/vstate/memory.rs b/src/vmm/src/vstate/memory.rs index 0bef82815c9..a7f1b447f59 100644 --- a/src/vmm/src/vstate/memory.rs +++ b/src/vmm/src/vstate/memory.rs @@ -22,7 +22,8 @@ pub use vm_memory::{ GuestUsize, MemoryRegionAddress, MmapRegion, address, }; use vm_memory::{GuestMemoryError, GuestMemoryRegionBytes, VolatileSlice, WriteVolatile}; -use vmm_sys_util::errno; +use vmm_sys_util::fallocate::FallocateMode; +use vmm_sys_util::{errno, fallocate}; use crate::utils::{get_page_size, u64_to_usize, usize_to_u64}; use crate::vmm_config::machine_config::HugePageConfig; @@ -411,8 +412,7 @@ impl GuestRegionMmapExt { let phys_address = self.get_host_address(caddr)?; match (self.inner.file_offset(), self.inner.flags()) { - // If and only if we are resuming from a snapshot file, we have a file and it's mapped - // private + // If we are resuming from a snapshot file, we have a file and it's mapped private (Some(_), flags) if flags & libc::MAP_PRIVATE != 0 => { // Mmap a new anonymous region over the present one in order to create a hole // with zero pages. @@ -440,12 +440,29 @@ impl GuestRegionMmapExt { Ok(()) } } - // Match either the case of an anonymous mapping, or the case - // of a shared file mapping. - // TODO: madvise(MADV_DONTNEED) doesn't actually work with memfd - // (or in general MAP_SHARED of a fd). In those cases we should use - // fallocate64(FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE). - // We keep falling to the madvise branch to keep the previous behaviour. + // If we back memory over memfd we have a file mapped shared. + (Some(file_offset), flags) if flags & libc::MAP_SHARED != 0 => { + let Some(offset) = file_offset.start().checked_add(caddr.raw_value()) else { + return Err(GuestMemoryError::InvalidGuestAddress(GuestAddress( + caddr.raw_value(), + ))); + }; + + fallocate::fallocate( + file_offset.file(), + FallocateMode::PunchHole, + true, + offset, + usize_to_u64(len), + ) + .map_err(|err| { + error!("discard_range: punching hole failed: {err:?}"); + GuestMemoryError::IOError(err.into()) + })?; + + Ok(()) + } + // Anonymous mapping. _ => { // Madvise the region in order to mark it as not used. // SAFETY: The address and length are known to be valid. @@ -893,7 +910,7 @@ mod tests { use super::*; use crate::snapshot::Snapshot; - use crate::test_utils::single_region_mem; + use crate::test_utils::{multi_region_mem_memfd, single_region_mem}; use crate::utils::{get_page_size, mib_to_bytes}; use crate::vstate::memory::test_utils::into_region_ext; @@ -1482,6 +1499,77 @@ mod tests { ); } + fn check_mem_contents(mem: &GuestMemoryMmap, offset: usize, expected: &[u8]) { + let addr = GuestAddress(usize_to_u64(offset)); + let mut actual_page = vec![0u8; expected.len()]; + mem.read(actual_page.as_mut_slice(), addr).unwrap(); + assert_eq!(actual_page, expected); + } + + fn test_discard_range_on_memfd(huge_pages: HugePageConfig) { + // 8MiB of memory in total (multiples of both possible page sizes) + const REGION_SIZE: usize = 4 * 1024 * 1024; + + let (mem, _file) = multi_region_mem_memfd( + &[ + (GuestAddress(0), REGION_SIZE), + (GuestAddress(usize_to_u64(REGION_SIZE)), REGION_SIZE), + ], + huge_pages, + ); + + let page_size = huge_pages.page_size(); + + // Fill up memory with 1s + let ones = vec![1u8; 2 * REGION_SIZE]; + mem.write(&ones, GuestAddress(0)).unwrap(); + + check_mem_contents(&mem, 0, &vec![1u8; 2 * REGION_SIZE]); + + // Discard the entire first region + mem.discard_range(GuestAddress(0), REGION_SIZE).unwrap(); + check_mem_contents(&mem, 0, &vec![0u8; REGION_SIZE]); + check_mem_contents(&mem, REGION_SIZE, &vec![1u8; REGION_SIZE]); + + // discard_range() works on page granularity. Discard the first page of the second region. + mem.discard_range(GuestAddress(usize_to_u64(REGION_SIZE)), page_size) + .unwrap(); + check_mem_contents(&mem, REGION_SIZE, &vec![0u8; page_size]); + check_mem_contents( + &mem, + REGION_SIZE + page_size, + &vec![1u8; REGION_SIZE - page_size], + ); + + // discard_range() won't actually work with unaligned regions + + // Try to discard less than a page + mem.discard_range(GuestAddress(usize_to_u64(REGION_SIZE + page_size)), 1024) + .unwrap_err(); + mem.discard_range( + GuestAddress(usize_to_u64(REGION_SIZE + page_size)), + page_size + 1024, + ) + .unwrap_err(); + + // Try to discard unaligned address + mem.discard_range( + GuestAddress(usize_to_u64(REGION_SIZE + page_size + 1024)), + page_size, + ) + .unwrap_err(); + } + + #[test] + fn test_discard_range_on_memfd_4k() { + test_discard_range_on_memfd(HugePageConfig::None) + } + + #[test] + fn test_discard_range_on_memfd_2m() { + test_discard_range_on_memfd(HugePageConfig::Hugetlbfs2M) + } + /// Verifies that `slots_intersecting_range` returns the correct slots for /// ranges at slot boundaries, interior to a slot, and spanning two slots. #[test]