From c6efbb364900b9a710521518bae234c290c1f2c2 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Mon, 27 Jan 2025 15:53:35 +0000 Subject: [PATCH 1/6] refactor: remove unused `set_dirty_page_tracking()` function In addition to being unused, it was also wrong, because it only updated the flag on KVM's side, but kept firecracker's tracking disabled. Signed-off-by: Patrick Roy --- src/vmm/src/lib.rs | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 6833a3a12d2..41e26177efd 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -631,18 +631,6 @@ impl Vmm { Ok(bitmap) } - /// Enables or disables KVM dirty page tracking. - pub fn set_dirty_page_tracking(&mut self, enable: bool) -> Result<(), VmmError> { - // This function _always_ results in an ioctl update. The VMM is stateless in the sense - // that it's unaware of the current dirty page tracking setting. - // The VMM's consumer will need to cache the dirty tracking setting internally. For - // example, if this function were to be exposed through the VMM controller, the VMM - // resources should cache the flag. - self.vm - .set_kvm_memory_regions(&self.guest_memory, enable) - .map_err(VmmError::Vm) - } - /// Updates the path of the host file backing the emulated block device with id `drive_id`. /// We update the disk image on the device and its virtio configuration. pub fn update_block_device_path( From f3facd287e03c73483d7ab29f97fb690eb511ed2 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Thu, 30 Jan 2025 16:53:11 +0000 Subject: [PATCH 2/6] refactor: drop `track_dirty_pages` parameter from memory_init We already know about dirty page tracking inside that function, based on whether the memory regions have a bitmap associated with them or not. So drop passing this information in again via a parameter, which saves us quite a bit of plumbing. Suggested-by: Nikita Kalyazin Signed-off-by: Patrick Roy --- src/vmm/src/builder.rs | 9 +++------ src/vmm/src/device_manager/mmio.rs | 6 +++--- src/vmm/src/vstate/vm.rs | 27 ++++++++++++--------------- 3 files changed, 18 insertions(+), 24 deletions(-) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 4b43e67541f..8da22ba2fdd 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -155,7 +155,6 @@ fn create_vmm_and_vcpus( event_manager: &mut EventManager, guest_memory: GuestMemoryMmap, uffd: Option, - track_dirty_pages: bool, vcpu_count: u8, kvm_capabilities: Vec, ) -> Result<(Vmm, Vec), StartMicrovmError> { @@ -172,7 +171,7 @@ fn create_vmm_and_vcpus( kvm.check_memory(&guest_memory) .map_err(VmmError::Kvm) .map_err(StartMicrovmError::Internal)?; - vm.memory_init(&guest_memory, track_dirty_pages) + vm.memory_init(&guest_memory) .map_err(VmmError::Vm) .map_err(StartMicrovmError::Internal)?; @@ -292,7 +291,6 @@ pub fn build_microvm_for_boot( event_manager, guest_memory, None, - vm_resources.machine_config.track_dirty_pages, vm_resources.machine_config.vcpu_count, cpu_template.kvm_capabilities.clone(), )?; @@ -482,7 +480,6 @@ pub fn build_microvm_from_snapshot( event_manager, guest_memory, uffd, - vm_resources.machine_config.track_dirty_pages, vm_resources.machine_config.vcpu_count, microvm_state.kvm_state.kvm_cap_modifiers.clone(), )?; @@ -1139,7 +1136,7 @@ pub(crate) mod tests { let kvm = Kvm::new(vec![]).unwrap(); let mut vm = Vm::new(&kvm).unwrap(); - vm.memory_init(&guest_memory, false).unwrap(); + vm.memory_init(&guest_memory).unwrap(); let mmio_device_manager = MMIODeviceManager::new(); let acpi_device_manager = ACPIDeviceManager::new(); #[cfg(target_arch = "x86_64")] @@ -1393,7 +1390,7 @@ pub(crate) mod tests { let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); #[allow(unused_mut)] let mut vm = Vm::new(&kvm).unwrap(); - vm.memory_init(&guest_memory, false).unwrap(); + vm.memory_init(&guest_memory).unwrap(); let evfd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); #[cfg(target_arch = "x86_64")] diff --git a/src/vmm/src/device_manager/mmio.rs b/src/vmm/src/device_manager/mmio.rs index 635bc1bc6e0..31d5f611f22 100644 --- a/src/vmm/src/device_manager/mmio.rs +++ b/src/vmm/src/device_manager/mmio.rs @@ -664,7 +664,7 @@ mod tests { let guest_mem = multi_region_mem(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]); let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); let mut vm = Vm::new(&kvm).unwrap(); - vm.memory_init(&guest_mem, false).unwrap(); + vm.memory_init(&guest_mem).unwrap(); let mut device_manager = MMIODeviceManager::new(); let mut resource_allocator = ResourceAllocator::new().unwrap(); @@ -694,7 +694,7 @@ mod tests { let guest_mem = multi_region_mem(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]); let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); let mut vm = Vm::new(&kvm).unwrap(); - vm.memory_init(&guest_mem, false).unwrap(); + vm.memory_init(&guest_mem).unwrap(); let mut device_manager = MMIODeviceManager::new(); let mut resource_allocator = ResourceAllocator::new().unwrap(); @@ -749,7 +749,7 @@ mod tests { let guest_mem = multi_region_mem(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]); let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); let mut vm = Vm::new(&kvm).unwrap(); - vm.memory_init(&guest_mem, false).unwrap(); + vm.memory_init(&guest_mem).unwrap(); let mem_clone = guest_mem.clone(); diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs index 1bcf191b8b9..19ca0b2a76c 100644 --- a/src/vmm/src/vstate/vm.rs +++ b/src/vmm/src/vstate/vm.rs @@ -129,12 +129,8 @@ impl Vm { } /// Initializes the guest memory. - pub fn memory_init( - &self, - guest_mem: &GuestMemoryMmap, - track_dirty_pages: bool, - ) -> Result<(), VmError> { - self.set_kvm_memory_regions(guest_mem, track_dirty_pages)?; + pub fn memory_init(&self, guest_mem: &GuestMemoryMmap) -> Result<(), VmError> { + self.set_kvm_memory_regions(guest_mem)?; #[cfg(target_arch = "x86_64")] self.fd .set_tss_address(u64_to_usize(crate::arch::x86_64::layout::KVM_TSS_ADDRESS)) @@ -146,16 +142,17 @@ impl Vm { pub(crate) fn set_kvm_memory_regions( &self, guest_mem: &GuestMemoryMmap, - track_dirty_pages: bool, ) -> Result<(), VmError> { - let mut flags = 0u32; - if track_dirty_pages { - flags |= KVM_MEM_LOG_DIRTY_PAGES; - } guest_mem .iter() .zip(0u32..) .try_for_each(|(region, slot)| { + let flags = if region.bitmap().is_some() { + KVM_MEM_LOG_DIRTY_PAGES + } else { + 0 + }; + let memory_region = kvm_userspace_memory_region { slot, guest_phys_addr: region.start_addr().raw_value(), @@ -359,7 +356,7 @@ pub(crate) mod tests { pub(crate) fn setup_vm_with_memory(mem_size: usize) -> (Kvm, Vm, GuestMemoryMmap) { let (kvm, vm) = setup_vm(); let gm = single_region_mem(mem_size); - vm.memory_init(&gm, false).unwrap(); + vm.memory_init(&gm).unwrap(); (kvm, vm, gm) } @@ -375,7 +372,7 @@ pub(crate) mod tests { let (_, vm) = setup_vm(); // Create valid memory region and test that the initialization is successful. let gm = single_region_mem(0x1000); - vm.memory_init(&gm, true).unwrap(); + vm.memory_init(&gm).unwrap(); } #[cfg(target_arch = "x86_64")] @@ -452,13 +449,13 @@ pub(crate) mod tests { let (_, vm) = setup_vm(); let gm = single_region_mem(0x1000); - let res = vm.set_kvm_memory_regions(&gm, false); + let res = vm.set_kvm_memory_regions(&gm); res.unwrap(); // Trying to set a memory region with a size that is not a multiple of GUEST_PAGE_SIZE // will result in error. let gm = single_region_mem(0x10); - let res = vm.set_kvm_memory_regions(&gm, false); + let res = vm.set_kvm_memory_regions(&gm); assert_eq!( res.unwrap_err().to_string(), "Cannot set the memory regions: Invalid argument (os error 22)" From 17c5ca3ba710b8fd5433c01a1a687c4576fee0b3 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Mon, 27 Jan 2025 15:48:29 +0000 Subject: [PATCH 3/6] refactor: drop `offset` field from `GuestMemoryRegionState` In praxis, the way we wrote our snapshot files has always been just writing all regions in-order. This mean that the offset of a region is simply the sum of the sizes of the preceding regions. The new `GuestMemoryMmap::create` code already computes the offsets for mapping the memory file this way, so drop the explicit calculation at snapshot creation time (as the calculated value isnt used by the restoration anymore). Do not bump the snapshot version number, because we already did so since the last release. Signed-off-by: Patrick Roy --- src/vmm/src/persist.rs | 9 +++++---- src/vmm/src/vstate/memory.rs | 20 ++++++-------------- 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index c9aadad10a9..5c352f3b260 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -584,13 +584,15 @@ fn create_guest_memory( ) -> Result<(GuestMemoryMmap, Vec), GuestMemoryFromUffdError> { let guest_memory = GuestMemoryMmap::from_state(None, mem_state, track_dirty_pages, huge_pages)?; let mut backend_mappings = Vec::with_capacity(guest_memory.num_regions()); - for (mem_region, state_region) in guest_memory.iter().zip(mem_state.regions.iter()) { + let mut offset = 0; + for mem_region in guest_memory.iter() { backend_mappings.push(GuestRegionUffdMapping { base_host_virt_addr: mem_region.as_ptr() as u64, size: mem_region.size(), - offset: state_region.offset, + offset, page_size_kib: huge_pages.page_size_kib(), }); + offset += mem_region.size() as u64; } Ok((guest_memory, backend_mappings)) @@ -770,7 +772,6 @@ mod tests { regions: vec![GuestMemoryRegionState { base_address: 0, size: 0x20000, - offset: 0x10000, }], }; @@ -779,7 +780,7 @@ mod tests { assert_eq!(uffd_regions.len(), 1); assert_eq!(uffd_regions[0].size, 0x20000); - assert_eq!(uffd_regions[0].offset, 0x10000); + assert_eq!(uffd_regions[0].offset, 0); assert_eq!( uffd_regions[0].page_size_kib, HugePageConfig::None.page_size_kib() diff --git a/src/vmm/src/vstate/memory.rs b/src/vmm/src/vstate/memory.rs index a84fd6c4be4..6e6674ebef7 100644 --- a/src/vmm/src/vstate/memory.rs +++ b/src/vmm/src/vstate/memory.rs @@ -122,8 +122,6 @@ pub struct GuestMemoryRegionState { pub base_address: u64, /// Region size. pub size: usize, - /// Offset in file/buffer where the region is saved. - pub offset: u64, } /// Describes guest memory regions and their snapshot file mappings. @@ -232,6 +230,7 @@ impl GuestMemoryExtension for GuestMemoryMmap { track_dirty_pages: bool, huge_pages: HugePageConfig, ) -> Result { + let mut offset = 0; match file { Some(f) => { if huge_pages.is_hugetlbfs() { @@ -242,10 +241,12 @@ impl GuestMemoryExtension for GuestMemoryMmap { .regions .iter() .map(|r| { - f.try_clone().map(|file_clone| { - let offset = FileOffset::new(file_clone, r.offset); + let fo = f.try_clone().map(|file_clone| { + let offset = FileOffset::new(file_clone, offset); (offset, GuestAddress(r.base_address), r.size) - }) + }); + offset += r.size as u64; + fo }) .collect::, std::io::Error>>() .map_err(MemoryError::FileError)?; @@ -266,15 +267,11 @@ impl GuestMemoryExtension for GuestMemoryMmap { /// Describes GuestMemoryMmap through a GuestMemoryState struct. fn describe(&self) -> GuestMemoryState { let mut guest_memory_state = GuestMemoryState::default(); - let mut offset = 0; self.iter().for_each(|region| { guest_memory_state.regions.push(GuestMemoryRegionState { base_address: region.start_addr().0, size: u64_to_usize(region.len()), - offset, }); - - offset += region.len(); }); guest_memory_state } @@ -536,7 +533,6 @@ mod tests { regions: vec![GuestMemoryRegionState { base_address: 0, size: 4096, - offset: 0, }], }; let file = TempFile::new().unwrap().into_file(); @@ -652,12 +648,10 @@ mod tests { GuestMemoryRegionState { base_address: 0, size: page_size, - offset: 0, }, GuestMemoryRegionState { base_address: page_size as u64 * 2, size: page_size, - offset: page_size as u64, }, ], }; @@ -679,12 +673,10 @@ mod tests { GuestMemoryRegionState { base_address: 0, size: page_size * 3, - offset: 0, }, GuestMemoryRegionState { base_address: page_size as u64 * 4, size: page_size * 3, - offset: page_size as u64 * 3, }, ], }; From b23157ff51109905682434b19f5e4bcfafc0e5b2 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Tue, 28 Jan 2025 16:00:43 +0000 Subject: [PATCH 4/6] doc: Include historic 3.0.0 snapshot bump We forgot to include this in the 1.9.0 changelog. Let's retroactively do it. Signed-off-by: Patrick Roy --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d6a8e1b2ded..7ad104e1a04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -118,7 +118,8 @@ and this project adheres to VMGenID support for microVMs running on ARM hosts with 6.1 guest kernels. Support for VMGenID via DeviceTree bindings exists only on mainline 6.10 Linux onwards. Users of Firecracker will need to backport the relevant patches on - top of their 6.1 kernels to make use of the feature. + top of their 6.1 kernels to make use of the feature. As a result, Firecracker + snapshot version is now 3.0.0 - [#4732](https://github.com/firecracker-microvm/firecracker/pull/4732), [#4733](https://github.com/firecracker-microvm/firecracker/pull/4733), [#4741](https://github.com/firecracker-microvm/firecracker/pull/4741), From a3c66939ee17d29a0491a3c95f0a416546af0e83 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Thu, 30 Jan 2025 17:33:10 +0000 Subject: [PATCH 5/6] refactor(test): Replace duplicated code with loops Some tests in memory.rs were runnign effectively the same test scenario twice, the only difference being the state of dirty page tracking. Just use a loop over the two boolean values here to avoid the copy-paste. Also remove a leftover test that was referring to "guard pages", but actually only repeated one of the dirty page tracking blocks. Guard pages were removed in 71cf036e56ce19dcf3144fd26bef6ae728f5464b. Signed-off-by: Patrick Roy --- src/vmm/src/vstate/memory.rs | 53 +++++++----------------------------- 1 file changed, 10 insertions(+), 43 deletions(-) diff --git a/src/vmm/src/vstate/memory.rs b/src/vmm/src/vstate/memory.rs index 6e6674ebef7..63e4fdeadc7 100644 --- a/src/vmm/src/vstate/memory.rs +++ b/src/vmm/src/vstate/memory.rs @@ -431,8 +431,7 @@ mod tests { #[test] fn test_from_raw_regions() { - // Check dirty page tracking is off. - { + for dirty_page_tracking in [true, false] { let region_size = 0x10000; let regions = vec![ (GuestAddress(0x0), region_size), @@ -441,27 +440,14 @@ mod tests { (GuestAddress(0x30000), region_size), ]; - let guest_memory = - GuestMemoryMmap::from_raw_regions(®ions, false, HugePageConfig::None).unwrap(); - guest_memory.iter().for_each(|region| { - assert!(region.bitmap().is_none()); - }); - } - - // Check dirty page tracking is on. - { - let region_size = 0x10000; - let regions = vec![ - (GuestAddress(0x0), region_size), - (GuestAddress(0x10000), region_size), - (GuestAddress(0x20000), region_size), - (GuestAddress(0x30000), region_size), - ]; - - let guest_memory = - GuestMemoryMmap::from_raw_regions(®ions, true, HugePageConfig::None).unwrap(); + let guest_memory = GuestMemoryMmap::from_raw_regions( + ®ions, + dirty_page_tracking, + HugePageConfig::None, + ) + .unwrap(); guest_memory.iter().for_each(|region| { - assert!(region.bitmap().is_some()); + assert_eq!(region.bitmap().is_some(), dirty_page_tracking); }); } } @@ -497,32 +483,13 @@ mod tests { ), ]; - // Test that all regions are guarded. - { + for dirty_page_tracking in [true, false] { let guest_memory = GuestMemoryMmap::from_raw_regions_file(regions.clone(), false, false).unwrap(); guest_memory.iter().for_each(|region| { assert_eq!(region.size(), region_size); assert!(region.file_offset().is_some()); - assert!(region.bitmap().is_none()); - }); - } - - // Check dirty page tracking is off. - { - let guest_memory = - GuestMemoryMmap::from_raw_regions_file(regions.clone(), false, false).unwrap(); - guest_memory.iter().for_each(|region| { - assert!(region.bitmap().is_none()); - }); - } - - // Check dirty page tracking is on. - { - let guest_memory = - GuestMemoryMmap::from_raw_regions_file(regions, true, false).unwrap(); - guest_memory.iter().for_each(|region| { - assert!(region.bitmap().is_some()); + assert_eq!(region.bitmap().is_some(), dirty_page_tracking); }); } } From 6a14208faccbe88b4283cce1b907484e318f6a78 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Thu, 30 Jan 2025 17:47:40 +0000 Subject: [PATCH 6/6] refactor: centralize GuestMemoryMmap creation In this day and age, Firecracker supports theoretically 4 different ways of backing guest memory: 1. Normal MAP_ANONYMOUS | MAP_PRIVATE memory 2. memfd backed memory, mapped as shared 3. direct mapping of a snapshot file 4. MAP_ANONYMOUS again, but this time regions are described by snapshot file. We have 3 different functions for creating these different backing stores, which then call each other and vm_memory's APIs. Clean this up by consolidating these into just one function that can be called with generic memory backing options, plus 3 wrappers for the three actually used ways of backing memory. For this, hoist up the hugepages/file-based restore incompatibility check, as with a dedicated function for dealing with the "snapshot restored by mapping file" case, this function simply will not take a huge pages argument, so we have to check this somewhere else. Signed-off-by: Patrick Roy --- .../devices/virtio/block/vhost_user/device.rs | 12 +- .../src/devices/virtio/block/virtio/io/mod.rs | 8 +- src/vmm/src/devices/virtio/vhost_user.rs | 28 +- src/vmm/src/persist.rs | 30 +- src/vmm/src/resources.rs | 4 +- src/vmm/src/test_utils/mod.rs | 2 +- src/vmm/src/vstate/memory.rs | 311 ++++++------------ 7 files changed, 134 insertions(+), 261 deletions(-) diff --git a/src/vmm/src/devices/virtio/block/vhost_user/device.rs b/src/vmm/src/devices/virtio/block/vhost_user/device.rs index 62218157c8b..cdd17e2ea98 100644 --- a/src/vmm/src/devices/virtio/block/vhost_user/device.rs +++ b/src/vmm/src/devices/virtio/block/vhost_user/device.rs @@ -378,7 +378,7 @@ mod tests { use crate::devices::virtio::block::virtio::device::FileEngineType; use crate::devices::virtio::mmio::VIRTIO_MMIO_INT_CONFIG; use crate::test_utils::create_tmp_socket; - use crate::vstate::memory::{FileOffset, GuestAddress, GuestMemoryExtension}; + use crate::vstate::memory::{GuestAddress, GuestMemoryExtension}; #[test] fn test_from_config() { @@ -778,12 +778,10 @@ mod tests { let region_size = 0x10000; let file = TempFile::new().unwrap().into_file(); file.set_len(region_size as u64).unwrap(); - let regions = vec![( - FileOffset::new(file.try_clone().unwrap(), 0x0), - GuestAddress(0x0), - region_size, - )]; - let guest_memory = GuestMemoryMmap::from_raw_regions_file(regions, false, false).unwrap(); + let regions = vec![(GuestAddress(0x0), region_size)]; + let guest_memory = + GuestMemoryMmap::create(regions.into_iter(), libc::MAP_PRIVATE, Some(file), false) + .unwrap(); // During actiavion of the device features, memory and queues should be set and activated. vhost_block.activate(guest_memory).unwrap(); diff --git a/src/vmm/src/devices/virtio/block/virtio/io/mod.rs b/src/vmm/src/devices/virtio/block/virtio/io/mod.rs index 09e86b6968d..cc49dae3eb7 100644 --- a/src/vmm/src/devices/virtio/block/virtio/io/mod.rs +++ b/src/vmm/src/devices/virtio/block/virtio/io/mod.rs @@ -230,8 +230,12 @@ pub mod tests { } fn create_mem() -> GuestMemoryMmap { - GuestMemoryMmap::from_raw_regions(&[(GuestAddress(0), MEM_LEN)], true, HugePageConfig::None) - .unwrap() + GuestMemoryMmap::anonymous( + [(GuestAddress(0), MEM_LEN)].into_iter(), + true, + HugePageConfig::None, + ) + .unwrap() } fn check_dirty_mem(mem: &GuestMemoryMmap, addr: GuestAddress, len: u32) { diff --git a/src/vmm/src/devices/virtio/vhost_user.rs b/src/vmm/src/devices/virtio/vhost_user.rs index ad86c9942af..cca506a57c2 100644 --- a/src/vmm/src/devices/virtio/vhost_user.rs +++ b/src/vmm/src/devices/virtio/vhost_user.rs @@ -466,7 +466,7 @@ mod tests { use super::*; use crate::test_utils::create_tmp_socket; - use crate::vstate::memory::{FileOffset, GuestAddress, GuestMemoryExtension}; + use crate::vstate::memory::{GuestAddress, GuestMemoryExtension}; #[test] fn test_new() { @@ -759,19 +759,13 @@ mod tests { let file_size = 2 * region_size; file.set_len(file_size as u64).unwrap(); let regions = vec![ - ( - FileOffset::new(file.try_clone().unwrap(), 0x0), - GuestAddress(0x0), - region_size, - ), - ( - FileOffset::new(file.try_clone().unwrap(), 0x10000), - GuestAddress(0x10000), - region_size, - ), + (GuestAddress(0x0), region_size), + (GuestAddress(0x10000), region_size), ]; - let guest_memory = GuestMemoryMmap::from_raw_regions_file(regions, false, false).unwrap(); + let guest_memory = + GuestMemoryMmap::create(regions.into_iter(), libc::MAP_PRIVATE, Some(file), false) + .unwrap(); vuh.update_mem_table(&guest_memory).unwrap(); @@ -883,13 +877,11 @@ mod tests { let region_size = 0x10000; let file = TempFile::new().unwrap().into_file(); file.set_len(region_size as u64).unwrap(); - let regions = vec![( - FileOffset::new(file.try_clone().unwrap(), 0x0), - GuestAddress(0x0), - region_size, - )]; + let regions = vec![(GuestAddress(0x0), region_size)]; - let guest_memory = GuestMemoryMmap::from_raw_regions_file(regions, false, false).unwrap(); + let guest_memory = + GuestMemoryMmap::create(regions.into_iter(), libc::MAP_PRIVATE, Some(file), false) + .unwrap(); let mut queue = Queue::new(69); queue.initialize(&guest_memory).unwrap(); diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 5c352f3b260..3c5f3e7e754 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -448,16 +448,19 @@ pub fn restore_from_snapshot( let mem_state = µvm_state.memory_state; let (guest_memory, uffd) = match params.mem_backend.backend_type { - MemBackendType::File => ( - guest_memory_from_file( - mem_backend_path, - mem_state, - track_dirty_pages, - vm_resources.machine_config.huge_pages, + MemBackendType::File => { + if vm_resources.machine_config.huge_pages.is_hugetlbfs() { + return Err(RestoreFromSnapshotGuestMemoryError::File( + GuestMemoryFromFileError::HugetlbfsSnapshot, + ) + .into()); + } + ( + guest_memory_from_file(mem_backend_path, mem_state, track_dirty_pages) + .map_err(RestoreFromSnapshotGuestMemoryError::File)?, + None, ) - .map_err(RestoreFromSnapshotGuestMemoryError::File)?, - None, - ), + } MemBackendType::Uffd => guest_memory_from_uffd( mem_backend_path, mem_state, @@ -513,17 +516,17 @@ pub enum GuestMemoryFromFileError { File(#[from] std::io::Error), /// Failed to restore guest memory: {0} Restore(#[from] MemoryError), + /// Cannot restore hugetlbfs backed snapshot by mapping the memory file. Please use uffd. + HugetlbfsSnapshot, } fn guest_memory_from_file( mem_file_path: &Path, mem_state: &GuestMemoryState, track_dirty_pages: bool, - huge_pages: HugePageConfig, ) -> Result { let mem_file = File::open(mem_file_path)?; - let guest_mem = - GuestMemoryMmap::from_state(Some(&mem_file), mem_state, track_dirty_pages, huge_pages)?; + let guest_mem = GuestMemoryMmap::snapshot_file(mem_file, mem_state, track_dirty_pages)?; Ok(guest_mem) } @@ -582,7 +585,8 @@ fn create_guest_memory( track_dirty_pages: bool, huge_pages: HugePageConfig, ) -> Result<(GuestMemoryMmap, Vec), GuestMemoryFromUffdError> { - let guest_memory = GuestMemoryMmap::from_state(None, mem_state, track_dirty_pages, huge_pages)?; + let guest_memory = + GuestMemoryMmap::anonymous(mem_state.regions(), track_dirty_pages, huge_pages)?; let mut backend_mappings = Vec::with_capacity(guest_memory.num_regions()); let mut offset = 0; for mem_region in guest_memory.iter() { diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index 2928a22c6ca..d6c5fb31a5a 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -472,8 +472,8 @@ impl VmResources { ) } else { let regions = crate::arch::arch_memory_regions(self.machine_config.mem_size_mib << 20); - GuestMemoryMmap::from_raw_regions( - ®ions, + GuestMemoryMmap::anonymous( + regions.into_iter(), self.machine_config.track_dirty_pages, self.machine_config.huge_pages, ) diff --git a/src/vmm/src/test_utils/mod.rs b/src/vmm/src/test_utils/mod.rs index 1ba79a55231..2ca7f5ce773 100644 --- a/src/vmm/src/test_utils/mod.rs +++ b/src/vmm/src/test_utils/mod.rs @@ -34,7 +34,7 @@ pub fn single_region_mem_at(at: u64, size: usize) -> GuestMemoryMmap { /// Creates a [`GuestMemoryMmap`] with multiple regions and without dirty page tracking. pub fn multi_region_mem(regions: &[(GuestAddress, usize)]) -> GuestMemoryMmap { - GuestMemoryMmap::from_raw_regions(regions, false, HugePageConfig::None) + GuestMemoryMmap::anonymous(regions.iter().copied(), false, HugePageConfig::None) .expect("Cannot initialize memory") } diff --git a/src/vmm/src/vstate/memory.rs b/src/vmm/src/vstate/memory.rs index 63e4fdeadc7..a9e16a8c2e6 100644 --- a/src/vmm/src/vstate/memory.rs +++ b/src/vmm/src/vstate/memory.rs @@ -8,6 +8,7 @@ use std::fs::File; use std::io::SeekFrom; +use libc::c_int; use serde::{Deserialize, Serialize}; pub use vm_memory::bitmap::{AtomicBitmap, Bitmap, BitmapSlice, BS}; pub use vm_memory::mmap::MmapRegionBuilder; @@ -19,6 +20,7 @@ pub use vm_memory::{ use vm_memory::{Error as VmMemoryError, GuestMemoryError, WriteVolatile}; use vmm_sys_util::errno; +use crate::arch::arch_memory_regions; use crate::utils::{get_page_size, u64_to_usize}; use crate::vmm_config::machine_config::HugePageConfig; use crate::DirtyBitmap; @@ -51,8 +53,6 @@ pub enum MemoryError { Memfd(memfd::Error), /// Cannot resize memfd file: {0} MemfdSetLen(std::io::Error), - /// Cannot restore hugetlbfs backed snapshot by mapping the memory file. Please use uffd. - HugetlbfsSnapshot, } /// Defines the interface for snapshotting memory. @@ -60,35 +60,59 @@ pub trait GuestMemoryExtension where Self: Sized, { + /// Creates a [`GuestMemoryMmap`] with the given configuration + fn create( + regions: impl Iterator, + mmap_flags: libc::c_int, + file: Option, + track_dirty_pages: bool, + ) -> Result; + /// Creates a GuestMemoryMmap with `size` in MiB backed by a memfd. fn memfd_backed( mem_size_mib: usize, track_dirty_pages: bool, huge_pages: HugePageConfig, - ) -> Result; + ) -> Result { + let memfd_file = create_memfd(mem_size_mib, huge_pages.into())?.into_file(); + let regions = arch_memory_regions(mem_size_mib << 20).into_iter(); - /// Creates a GuestMemoryMmap from raw regions. - fn from_raw_regions( - regions: &[(GuestAddress, usize)], - track_dirty_pages: bool, - huge_pages: HugePageConfig, - ) -> Result; + Self::create( + regions, + libc::MAP_SHARED | huge_pages.mmap_flags(), + Some(memfd_file), + track_dirty_pages, + ) + } /// Creates a GuestMemoryMmap from raw regions. - fn from_raw_regions_file( - regions: Vec<(FileOffset, GuestAddress, usize)>, + fn anonymous( + regions: impl Iterator, track_dirty_pages: bool, - shared: bool, - ) -> Result; + huge_pages: HugePageConfig, + ) -> Result { + Self::create( + regions, + libc::MAP_PRIVATE | libc::MAP_ANONYMOUS | huge_pages.mmap_flags(), + None, + track_dirty_pages, + ) + } /// Creates a GuestMemoryMmap given a `file` containing the data /// and a `state` containing mapping information. - fn from_state( - file: Option<&File>, + fn snapshot_file( + file: File, state: &GuestMemoryState, track_dirty_pages: bool, - huge_pages: HugePageConfig, - ) -> Result; + ) -> Result { + Self::create( + state.regions(), + libc::MAP_PRIVATE, + Some(file), + track_dirty_pages, + ) + } /// Describes GuestMemoryMmap through a GuestMemoryState struct. fn describe(&self) -> GuestMemoryState; @@ -131,137 +155,51 @@ pub struct GuestMemoryState { pub regions: Vec, } -impl GuestMemoryExtension for GuestMemoryMmap { - /// Creates a GuestMemoryMmap with `size` in MiB backed by a memfd. - fn memfd_backed( - mem_size_mib: usize, - track_dirty_pages: bool, - huge_pages: HugePageConfig, - ) -> Result { - let memfd_file = create_memfd(mem_size_mib, huge_pages.into())?.into_file(); - - let mut offset: u64 = 0; - let regions = crate::arch::arch_memory_regions(mem_size_mib << 20) +impl GuestMemoryState { + /// Turns this [`GuestMemoryState`] into a description of guest memory regions as understood + /// by the creation functions of [`GuestMemoryExtensions`] + pub fn regions(&self) -> impl Iterator + '_ { + self.regions .iter() - .map(|(guest_address, region_size)| { - let file_clone = memfd_file.try_clone().map_err(MemoryError::FileError)?; - let file_offset = FileOffset::new(file_clone, offset); - offset += *region_size as u64; - Ok((file_offset, *guest_address, *region_size)) - }) - .collect::, MemoryError>>()?; - - Self::from_raw_regions_file(regions, track_dirty_pages, true) + .map(|region| (GuestAddress(region.base_address), region.size)) } +} - /// Creates a GuestMemoryMmap from raw regions backed by anonymous memory. - fn from_raw_regions( - regions: &[(GuestAddress, usize)], +impl GuestMemoryExtension for GuestMemoryMmap { + fn create( + regions: impl Iterator, + mmap_flags: c_int, + file: Option, track_dirty_pages: bool, - huge_pages: HugePageConfig, ) -> Result { - let prot = libc::PROT_READ | libc::PROT_WRITE; - // MAP_NORESERVE for 4K-backed page regions means that no swap space will be reserved for - // the region. For hugetlbfs regions, it means that pages in the hugetlbfs pool will - // not be reserved at mmap-time. This means that instead of failing at mmap-time if - // the hugetlbfs page pool is too small to accommodate the entire VM, Firecracker might - // receive a SIGBUS if a pagefault ever cannot be served due to the pool being depleted. - let flags = - libc::MAP_NORESERVE | libc::MAP_PRIVATE | libc::MAP_ANONYMOUS | huge_pages.mmap_flags(); - + let mut offset = 0; let regions = regions - .iter() - .map(|(guest_address, region_size)| { - let bitmap = match track_dirty_pages { - true => Some(AtomicBitmap::with_len(*region_size)), - false => None, - }; - let region = MmapRegionBuilder::new_with_bitmap(*region_size, bitmap) - .with_mmap_prot(prot) - .with_mmap_flags(flags) - .build() - .map_err(MemoryError::MmapRegionError)?; - - GuestRegionMmap::new(region, *guest_address).map_err(MemoryError::VmMemoryError) - }) - .collect::, MemoryError>>()?; + .map(|(start, size)| { + let mut builder = MmapRegionBuilder::new_with_bitmap( + size, + track_dirty_pages.then(|| AtomicBitmap::with_len(size)), + ) + .with_mmap_prot(libc::PROT_READ | libc::PROT_WRITE) + .with_mmap_flags(libc::MAP_NORESERVE | mmap_flags); - GuestMemoryMmap::from_regions(regions).map_err(MemoryError::VmMemoryError) - } + if let Some(ref file) = file { + let file_offset = + FileOffset::new(file.try_clone().map_err(MemoryError::FileError)?, offset); - /// Creates a GuestMemoryMmap from raw regions backed by file. - fn from_raw_regions_file( - regions: Vec<(FileOffset, GuestAddress, usize)>, - track_dirty_pages: bool, - shared: bool, - ) -> Result { - let prot = libc::PROT_READ | libc::PROT_WRITE; - let flags = if shared { - libc::MAP_NORESERVE | libc::MAP_SHARED - } else { - libc::MAP_NORESERVE | libc::MAP_PRIVATE - }; - let regions = regions - .into_iter() - .map(|(file_offset, guest_address, region_size)| { - let bitmap = match track_dirty_pages { - true => Some(AtomicBitmap::with_len(region_size)), - false => None, - }; - let region = MmapRegionBuilder::new_with_bitmap(region_size, bitmap) - .with_mmap_prot(prot) - .with_mmap_flags(flags) - .with_file_offset(file_offset) - .build() - .map_err(MemoryError::MmapRegionError)?; - - GuestRegionMmap::new(region, guest_address).map_err(MemoryError::VmMemoryError) - }) - .collect::, MemoryError>>()?; + builder = builder.with_file_offset(file_offset); + } - GuestMemoryMmap::from_regions(regions).map_err(MemoryError::VmMemoryError) - } + offset += size as u64; - /// Creates a GuestMemoryMmap backed by a `file` if present, otherwise backed - /// by anonymous memory. Memory layout and ranges are described in `state` param. - fn from_state( - file: Option<&File>, - state: &GuestMemoryState, - track_dirty_pages: bool, - huge_pages: HugePageConfig, - ) -> Result { - let mut offset = 0; - match file { - Some(f) => { - if huge_pages.is_hugetlbfs() { - return Err(MemoryError::HugetlbfsSnapshot); - } + GuestRegionMmap::new( + builder.build().map_err(MemoryError::MmapRegionError)?, + start, + ) + .map_err(MemoryError::VmMemoryError) + }) + .collect::, _>>()?; - let regions = state - .regions - .iter() - .map(|r| { - let fo = f.try_clone().map(|file_clone| { - let offset = FileOffset::new(file_clone, offset); - (offset, GuestAddress(r.base_address), r.size) - }); - offset += r.size as u64; - fo - }) - .collect::, std::io::Error>>() - .map_err(MemoryError::FileError)?; - - Self::from_raw_regions_file(regions, track_dirty_pages, false) - } - None => { - let regions = state - .regions - .iter() - .map(|r| (GuestAddress(r.base_address), r.size)) - .collect::>(); - Self::from_raw_regions(®ions, track_dirty_pages, huge_pages) - } - } + GuestMemoryMmap::from_regions(regions).map_err(MemoryError::VmMemoryError) } /// Describes GuestMemoryMmap through a GuestMemoryState struct. @@ -430,7 +368,7 @@ mod tests { use crate::utils::get_page_size; #[test] - fn test_from_raw_regions() { + fn test_anonymous() { for dirty_page_tracking in [true, false] { let region_size = 0x10000; let regions = vec![ @@ -440,8 +378,8 @@ mod tests { (GuestAddress(0x30000), region_size), ]; - let guest_memory = GuestMemoryMmap::from_raw_regions( - ®ions, + let guest_memory = GuestMemoryMmap::anonymous( + regions.into_iter(), dirty_page_tracking, HugePageConfig::None, ) @@ -452,66 +390,6 @@ mod tests { } } - #[test] - fn test_from_raw_regions_file() { - let region_size = 0x10000; - - let file = TempFile::new().unwrap().into_file(); - let file_size = 4 * region_size; - file.set_len(file_size as u64).unwrap(); - - let regions = vec![ - ( - FileOffset::new(file.try_clone().unwrap(), 0x0), - GuestAddress(0x0), - region_size, - ), - ( - FileOffset::new(file.try_clone().unwrap(), 0x10000), - GuestAddress(0x10000), - region_size, - ), - ( - FileOffset::new(file.try_clone().unwrap(), 0x20000), - GuestAddress(0x20000), - region_size, - ), - ( - FileOffset::new(file.try_clone().unwrap(), 0x30000), - GuestAddress(0x30000), - region_size, - ), - ]; - - for dirty_page_tracking in [true, false] { - let guest_memory = - GuestMemoryMmap::from_raw_regions_file(regions.clone(), false, false).unwrap(); - guest_memory.iter().for_each(|region| { - assert_eq!(region.size(), region_size); - assert!(region.file_offset().is_some()); - assert_eq!(region.bitmap().is_some(), dirty_page_tracking); - }); - } - } - - #[test] - fn test_from_state() { - let state = GuestMemoryState { - regions: vec![GuestMemoryRegionState { - base_address: 0, - size: 4096, - }], - }; - let file = TempFile::new().unwrap().into_file(); - - // No mapping of snapshots that were taken with hugetlbfs enabled - let err = - GuestMemoryMmap::from_state(Some(&file), &state, false, HugePageConfig::Hugetlbfs2M) - .unwrap_err(); - - assert!(matches!(err, MemoryError::HugetlbfsSnapshot), "{:?}", err); - } - #[test] fn test_mark_dirty() { let page_size = get_page_size().unwrap(); @@ -523,7 +401,7 @@ mod tests { (GuestAddress(region_size as u64 * 2), region_size), // pages 6-8 ]; let guest_memory = - GuestMemoryMmap::from_raw_regions(®ions, true, HugePageConfig::None).unwrap(); + GuestMemoryMmap::anonymous(regions.into_iter(), true, HugePageConfig::None).unwrap(); let dirty_map = [ // page 0: not dirty @@ -578,8 +456,8 @@ mod tests { let region_size = page_size * 3; // Test with a single region - let guest_memory = GuestMemoryMmap::from_raw_regions( - &[(GuestAddress(0), region_size)], + let guest_memory = GuestMemoryMmap::anonymous( + [(GuestAddress(0), region_size)].into_iter(), false, HugePageConfig::None, ) @@ -593,7 +471,7 @@ mod tests { (GuestAddress(region_size as u64 * 2), region_size), // pages 6-8 ]; let guest_memory = - GuestMemoryMmap::from_raw_regions(®ions, true, HugePageConfig::None).unwrap(); + GuestMemoryMmap::anonymous(regions.into_iter(), true, HugePageConfig::None).unwrap(); check_serde(&guest_memory); } @@ -607,7 +485,7 @@ mod tests { (GuestAddress(page_size as u64 * 2), page_size), ]; let guest_memory = - GuestMemoryMmap::from_raw_regions(&mem_regions[..], true, HugePageConfig::None) + GuestMemoryMmap::anonymous(mem_regions.into_iter(), true, HugePageConfig::None) .unwrap(); let expected_memory_state = GuestMemoryState { @@ -632,7 +510,7 @@ mod tests { (GuestAddress(page_size as u64 * 4), page_size * 3), ]; let guest_memory = - GuestMemoryMmap::from_raw_regions(&mem_regions[..], true, HugePageConfig::None) + GuestMemoryMmap::anonymous(mem_regions.into_iter(), true, HugePageConfig::None) .unwrap(); let expected_memory_state = GuestMemoryState { @@ -665,7 +543,8 @@ mod tests { (region_2_address, region_size), ]; let guest_memory = - GuestMemoryMmap::from_raw_regions(&mem_regions, true, HugePageConfig::None).unwrap(); + GuestMemoryMmap::anonymous(mem_regions.into_iter(), true, HugePageConfig::None) + .unwrap(); // Check that Firecracker bitmap is clean. guest_memory.iter().for_each(|r| { assert!(!r.bitmap().dirty_at(0)); @@ -687,13 +566,8 @@ mod tests { let mut memory_file = TempFile::new().unwrap().into_file(); guest_memory.dump(&mut memory_file).unwrap(); - let restored_guest_memory = GuestMemoryMmap::from_state( - Some(&memory_file), - &memory_state, - false, - HugePageConfig::None, - ) - .unwrap(); + let restored_guest_memory = + GuestMemoryMmap::snapshot_file(memory_file, &memory_state, false).unwrap(); // Check that the region contents are the same. let mut restored_region = vec![0u8; page_size * 2]; @@ -721,7 +595,8 @@ mod tests { (region_2_address, region_size), ]; let guest_memory = - GuestMemoryMmap::from_raw_regions(&mem_regions, true, HugePageConfig::None).unwrap(); + GuestMemoryMmap::anonymous(mem_regions.into_iter(), true, HugePageConfig::None) + .unwrap(); // Check that Firecracker bitmap is clean. guest_memory.iter().for_each(|r| { assert!(!r.bitmap().dirty_at(0)); @@ -751,8 +626,7 @@ mod tests { // We can restore from this because this is the first dirty dump. let restored_guest_memory = - GuestMemoryMmap::from_state(Some(&file), &memory_state, false, HugePageConfig::None) - .unwrap(); + GuestMemoryMmap::snapshot_file(file, &memory_state, false).unwrap(); // Check that the region contents are the same. let mut restored_region = vec![0u8; region_size]; @@ -809,7 +683,8 @@ mod tests { (region_2_address, region_size), ]; let guest_memory = - GuestMemoryMmap::from_raw_regions(&mem_regions, true, HugePageConfig::None).unwrap(); + GuestMemoryMmap::anonymous(mem_regions.into_iter(), true, HugePageConfig::None) + .unwrap(); // Check that Firecracker bitmap is clean. guest_memory.iter().for_each(|r| {