diff --git a/src/vmm/src/test_utils/mod.rs b/src/vmm/src/test_utils/mod.rs index 41809b71b34..6cc5050d91a 100644 --- a/src/vmm/src/test_utils/mod.rs +++ b/src/vmm/src/test_utils/mod.rs @@ -3,6 +3,7 @@ #![allow(missing_docs)] +use std::fs::File; use std::sync::{Arc, Mutex}; use vm_memory::{GuestAddress, GuestRegionCollection}; @@ -53,6 +54,22 @@ pub fn multi_region_mem(regions: &[(GuestAddress, usize)]) -> GuestMemoryMmap { .unwrap() } +/// Creates a [`GuestMemoryMmap`] with multiple regions and without dirty page tracking. +pub fn multi_region_mem_memfd( + regions: &[(GuestAddress, usize)], + huge_page_cfg: HugePageConfig, +) -> (GuestMemoryMmap, Arc) { + let (reg, file) = memory::memfd_backed(regions, false, huge_page_cfg).unwrap(); + let mem = GuestRegionCollection::from_regions( + reg.into_iter() + .map(|region| GuestRegionMmapExt::dram_from_mmap_region(region, 0)) + .collect(), + ) + .unwrap(); + + (mem, file) +} + pub fn multi_region_mem_raw(regions: &[(GuestAddress, usize)]) -> Vec { memory::anonymous(regions.iter().copied(), false, HugePageConfig::None) .expect("Cannot initialize memory") diff --git a/src/vmm/src/vstate/memory.rs b/src/vmm/src/vstate/memory.rs index 86545209e88..a7f1b447f59 100644 --- a/src/vmm/src/vstate/memory.rs +++ b/src/vmm/src/vstate/memory.rs @@ -6,7 +6,7 @@ // found in the THIRD-PARTY file. use std::fs::File; -use std::io::SeekFrom; +use std::io::{self, SeekFrom}; use std::ops::Deref; use std::sync::{Arc, Mutex}; @@ -22,9 +22,10 @@ pub use vm_memory::{ GuestUsize, MemoryRegionAddress, MmapRegion, address, }; use vm_memory::{GuestMemoryError, GuestMemoryRegionBytes, VolatileSlice, WriteVolatile}; -use vmm_sys_util::errno; +use vmm_sys_util::fallocate::FallocateMode; +use vmm_sys_util::{errno, fallocate}; -use crate::utils::{get_page_size, u64_to_usize}; +use crate::utils::{get_page_size, u64_to_usize, usize_to_u64}; use crate::vmm_config::machine_config::HugePageConfig; use crate::vstate::vm::VmError; use crate::{DirtyBitmap, Vm}; @@ -388,11 +389,30 @@ impl GuestRegionMmapExt { caddr: MemoryRegionAddress, len: usize, ) -> Result<(), GuestMemoryError> { - let phys_address = self.get_host_address(caddr)?; + let region_page_size = if self.flags() & HugePageConfig::Hugetlbfs2M.mmap_flags() != 0 { + HugePageConfig::Hugetlbfs2M.page_size() + } else { + HugePageConfig::None.page_size() + }; + + let end = caddr + .0 + .checked_add(usize_to_u64(len)) + .ok_or(GuestMemoryError::GuestAddressOverflow)?; + + // Both the starting address and the length of the region we want to discard needs to be + // page alinged, otherwise we won't be able to actually discard the memory. + if !caddr.0.is_multiple_of(usize_to_u64(region_page_size)) + || !end.is_multiple_of(usize_to_u64(region_page_size)) + { + return Err(GuestMemoryError::IOError( + io::ErrorKind::InvalidInput.into(), + )); + } + let phys_address = self.get_host_address(caddr)?; match (self.inner.file_offset(), self.inner.flags()) { - // If and only if we are resuming from a snapshot file, we have a file and it's mapped - // private + // If we are resuming from a snapshot file, we have a file and it's mapped private (Some(_), flags) if flags & libc::MAP_PRIVATE != 0 => { // Mmap a new anonymous region over the present one in order to create a hole // with zero pages. @@ -420,12 +440,29 @@ impl GuestRegionMmapExt { Ok(()) } } - // Match either the case of an anonymous mapping, or the case - // of a shared file mapping. - // TODO: madvise(MADV_DONTNEED) doesn't actually work with memfd - // (or in general MAP_SHARED of a fd). In those cases we should use - // fallocate64(FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE). - // We keep falling to the madvise branch to keep the previous behaviour. + // If we back memory over memfd we have a file mapped shared. + (Some(file_offset), flags) if flags & libc::MAP_SHARED != 0 => { + let Some(offset) = file_offset.start().checked_add(caddr.raw_value()) else { + return Err(GuestMemoryError::InvalidGuestAddress(GuestAddress( + caddr.raw_value(), + ))); + }; + + fallocate::fallocate( + file_offset.file(), + FallocateMode::PunchHole, + true, + offset, + usize_to_u64(len), + ) + .map_err(|err| { + error!("discard_range: punching hole failed: {err:?}"); + GuestMemoryError::IOError(err.into()) + })?; + + Ok(()) + } + // Anonymous mapping. _ => { // Madvise the region in order to mark it as not used. // SAFETY: The address and length are known to be valid. @@ -873,7 +910,7 @@ mod tests { use super::*; use crate::snapshot::Snapshot; - use crate::test_utils::single_region_mem; + use crate::test_utils::{multi_region_mem_memfd, single_region_mem}; use crate::utils::{get_page_size, mib_to_bytes}; use crate::vstate::memory::test_utils::into_region_ext; @@ -1462,6 +1499,77 @@ mod tests { ); } + fn check_mem_contents(mem: &GuestMemoryMmap, offset: usize, expected: &[u8]) { + let addr = GuestAddress(usize_to_u64(offset)); + let mut actual_page = vec![0u8; expected.len()]; + mem.read(actual_page.as_mut_slice(), addr).unwrap(); + assert_eq!(actual_page, expected); + } + + fn test_discard_range_on_memfd(huge_pages: HugePageConfig) { + // 8MiB of memory in total (multiples of both possible page sizes) + const REGION_SIZE: usize = 4 * 1024 * 1024; + + let (mem, _file) = multi_region_mem_memfd( + &[ + (GuestAddress(0), REGION_SIZE), + (GuestAddress(usize_to_u64(REGION_SIZE)), REGION_SIZE), + ], + huge_pages, + ); + + let page_size = huge_pages.page_size(); + + // Fill up memory with 1s + let ones = vec![1u8; 2 * REGION_SIZE]; + mem.write(&ones, GuestAddress(0)).unwrap(); + + check_mem_contents(&mem, 0, &vec![1u8; 2 * REGION_SIZE]); + + // Discard the entire first region + mem.discard_range(GuestAddress(0), REGION_SIZE).unwrap(); + check_mem_contents(&mem, 0, &vec![0u8; REGION_SIZE]); + check_mem_contents(&mem, REGION_SIZE, &vec![1u8; REGION_SIZE]); + + // discard_range() works on page granularity. Discard the first page of the second region. + mem.discard_range(GuestAddress(usize_to_u64(REGION_SIZE)), page_size) + .unwrap(); + check_mem_contents(&mem, REGION_SIZE, &vec![0u8; page_size]); + check_mem_contents( + &mem, + REGION_SIZE + page_size, + &vec![1u8; REGION_SIZE - page_size], + ); + + // discard_range() won't actually work with unaligned regions + + // Try to discard less than a page + mem.discard_range(GuestAddress(usize_to_u64(REGION_SIZE + page_size)), 1024) + .unwrap_err(); + mem.discard_range( + GuestAddress(usize_to_u64(REGION_SIZE + page_size)), + page_size + 1024, + ) + .unwrap_err(); + + // Try to discard unaligned address + mem.discard_range( + GuestAddress(usize_to_u64(REGION_SIZE + page_size + 1024)), + page_size, + ) + .unwrap_err(); + } + + #[test] + fn test_discard_range_on_memfd_4k() { + test_discard_range_on_memfd(HugePageConfig::None) + } + + #[test] + fn test_discard_range_on_memfd_2m() { + test_discard_range_on_memfd(HugePageConfig::Hugetlbfs2M) + } + /// Verifies that `slots_intersecting_range` returns the correct slots for /// ranges at slot boundaries, interior to a slot, and spanning two slots. #[test] diff --git a/src/vmm/tests/integration_tests.rs b/src/vmm/tests/integration_tests.rs index 903b152f50b..c114ebbf411 100644 --- a/src/vmm/tests/integration_tests.rs +++ b/src/vmm/tests/integration_tests.rs @@ -138,7 +138,7 @@ fn test_dirty_bitmap_success() { for (vmm, _) in vmms { // Let it churn for a while and dirty some pages... thread::sleep(Duration::from_millis(100)); - let bitmap = vmm.lock().unwrap().vm.get_dirty_bitmap().unwrap(); + let bitmap = vmm.lock().unwrap().vm.get_dirty_bitmap(4096).unwrap(); let num_dirty_pages: u32 = bitmap .values() .map(|bitmap_per_region| {