diff --git a/src/vmm/src/vstate/memory.rs b/src/vmm/src/vstate/memory.rs index b23876b2262..5f0390ac72d 100644 --- a/src/vmm/src/vstate/memory.rs +++ b/src/vmm/src/vstate/memory.rs @@ -107,6 +107,9 @@ where /// Resets all the memory region bitmaps fn reset_dirty(&self); + + /// Store the dirty bitmap in internal store + fn store_dirty_bitmap(&self, dirty_bitmap: &DirtyBitmap, page_size: usize); } /// State of a guest memory region saved to file/buffer. @@ -301,56 +304,57 @@ impl GuestMemoryExtension for GuestMemoryMmap { let mut writer_offset = 0; let page_size = get_page_size().map_err(MemoryError::PageSize)?; - self.iter() - .enumerate() - .try_for_each(|(slot, region)| { - let kvm_bitmap = dirty_bitmap.get(&slot).unwrap(); - let firecracker_bitmap = region.bitmap(); - let mut write_size = 0; - let mut dirty_batch_start: u64 = 0; - - for (i, v) in kvm_bitmap.iter().enumerate() { - for j in 0..64 { - let is_kvm_page_dirty = ((v >> j) & 1u64) != 0u64; - let page_offset = ((i * 64) + j) * page_size; - let is_firecracker_page_dirty = firecracker_bitmap.dirty_at(page_offset); - if is_kvm_page_dirty || is_firecracker_page_dirty { - // We are at the start of a new batch of dirty pages. - if write_size == 0 { - // Seek forward over the unmodified pages. - writer - .seek(SeekFrom::Start(writer_offset + page_offset as u64)) - .unwrap(); - dirty_batch_start = page_offset as u64; - } - write_size += page_size; - } else if write_size > 0 { - // We are at the end of a batch of dirty pages. - writer.write_all_volatile( - ®ion.get_slice( - MemoryRegionAddress(dirty_batch_start), - write_size, - )?, - )?; - - write_size = 0; + let write_result = self.iter().enumerate().try_for_each(|(slot, region)| { + let kvm_bitmap = dirty_bitmap.get(&slot).unwrap(); + let firecracker_bitmap = region.bitmap(); + let mut write_size = 0; + let mut dirty_batch_start: u64 = 0; + + for (i, v) in kvm_bitmap.iter().enumerate() { + for j in 0..64 { + let is_kvm_page_dirty = ((v >> j) & 1u64) != 0u64; + let page_offset = ((i * 64) + j) * page_size; + let is_firecracker_page_dirty = firecracker_bitmap.dirty_at(page_offset); + + if is_kvm_page_dirty || is_firecracker_page_dirty { + // We are at the start of a new batch of dirty pages. + if write_size == 0 { + // Seek forward over the unmodified pages. + writer + .seek(SeekFrom::Start(writer_offset + page_offset as u64)) + .unwrap(); + dirty_batch_start = page_offset as u64; } + write_size += page_size; + } else if write_size > 0 { + // We are at the end of a batch of dirty pages. + writer.write_all_volatile( + ®ion + .get_slice(MemoryRegionAddress(dirty_batch_start), write_size)?, + )?; + + write_size = 0; } } + } - if write_size > 0 { - writer.write_all_volatile( - ®ion.get_slice(MemoryRegionAddress(dirty_batch_start), write_size)?, - )?; - } - writer_offset += region.len(); - if let Some(bitmap) = firecracker_bitmap { - bitmap.reset(); - } + if write_size > 0 { + writer.write_all_volatile( + ®ion.get_slice(MemoryRegionAddress(dirty_batch_start), write_size)?, + )?; + } + writer_offset += region.len(); - Ok(()) - }) - .map_err(MemoryError::WriteMemory) + Ok(()) + }); + + if write_result.is_err() { + self.store_dirty_bitmap(dirty_bitmap, page_size); + } else { + self.reset_dirty(); + } + + write_result.map_err(MemoryError::WriteMemory) } /// Resets all the memory region bitmaps @@ -361,6 +365,26 @@ impl GuestMemoryExtension for GuestMemoryMmap { } }) } + + /// Stores the dirty bitmap inside into the internal bitmap + fn store_dirty_bitmap(&self, dirty_bitmap: &DirtyBitmap, page_size: usize) { + self.iter().enumerate().for_each(|(slot, region)| { + let kvm_bitmap = dirty_bitmap.get(&slot).unwrap(); + let firecracker_bitmap = region.bitmap(); + + for (i, v) in kvm_bitmap.iter().enumerate() { + for j in 0..64 { + let is_kvm_page_dirty = ((v >> j) & 1u64) != 0u64; + + if is_kvm_page_dirty { + let page_offset = ((i * 64) + j) * page_size; + + firecracker_bitmap.mark_dirty(page_offset, 1) + } + } + } + }); + } } fn create_memfd( @@ -812,6 +836,42 @@ mod tests { assert_eq!(expected_first_region, diff_file_content); } + #[test] + fn test_store_dirty_bitmap() { + let page_size = get_page_size().unwrap(); + + // Two regions of three pages each, with a one page gap between them. + let region_1_address = GuestAddress(0); + let region_2_address = GuestAddress(page_size as u64 * 4); + let region_size = page_size * 3; + let mem_regions = [ + (region_1_address, region_size), + (region_2_address, region_size), + ]; + let guest_memory = + GuestMemoryMmap::from_raw_regions(&mem_regions, true, HugePageConfig::None).unwrap(); + + // Check that Firecracker bitmap is clean. + guest_memory.iter().for_each(|r| { + assert!(!r.bitmap().dirty_at(0)); + assert!(!r.bitmap().dirty_at(page_size)); + assert!(!r.bitmap().dirty_at(page_size * 2)); + }); + + let mut dirty_bitmap: DirtyBitmap = HashMap::new(); + dirty_bitmap.insert(0, vec![0b101]); + dirty_bitmap.insert(1, vec![0b101]); + + guest_memory.store_dirty_bitmap(&dirty_bitmap, page_size); + + // Assert that the bitmap now reports as being dirty maching the dirty bitmap + guest_memory.iter().for_each(|r| { + assert!(r.bitmap().dirty_at(0)); + assert!(!r.bitmap().dirty_at(page_size)); + assert!(r.bitmap().dirty_at(page_size * 2)); + }); + } + #[test] fn test_create_memfd() { let size = 1; diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py index 5e10779cc5a..8ed11a9f77e 100644 --- a/tests/framework/microvm.py +++ b/tests/framework/microvm.py @@ -214,6 +214,7 @@ def __init__( self.log_file = None self.metrics_file = None self._spawned = False + self._killed = False # device dictionaries self.iface = {} @@ -238,6 +239,9 @@ def __repr__(self): def kill(self): """All clean up associated with this microVM should go here.""" # pylint: disable=subprocess-run-check + # if it was already killed, return + if self._killed: + return # Stop any registered monitors for monitor in self.monitors: @@ -287,6 +291,7 @@ def kill(self): # Mark the microVM as not spawned, so we avoid trying to kill twice. self._spawned = False + self._killed = True if self.time_api_requests: self._validate_api_response_times() @@ -1014,8 +1019,9 @@ def kill(self): for vm in self.vms: vm.kill() vm.jailer.cleanup() - if len(vm.jailer.jailer_id) > 0: - shutil.rmtree(vm.jailer.chroot_base_with_id()) + chroot_base_with_id = vm.jailer.chroot_base_with_id() + if len(vm.jailer.jailer_id) > 0 and chroot_base_with_id.exists(): + shutil.rmtree(chroot_base_with_id) vm.netns.cleanup() self.vms.clear() diff --git a/tests/integration_tests/functional/test_snapshot_not_losing_dirty_pages.py b/tests/integration_tests/functional/test_snapshot_not_losing_dirty_pages.py new file mode 100644 index 00000000000..9f0ed465215 --- /dev/null +++ b/tests/integration_tests/functional/test_snapshot_not_losing_dirty_pages.py @@ -0,0 +1,73 @@ +# Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +"""Test that the no dirty pages lost in case of error during snapshot creation.""" + +import subprocess +from pathlib import Path + +import psutil +import pytest + + +@pytest.fixture +def mount_tmpfs_small(worker_id): + """Mount a small tmpfs and return its path""" + mnt_path = Path(f"/mnt/{worker_id}") + mnt_path.mkdir(parents=True) + subprocess.check_call( + ["mount", "-o", "size=512M", "-t", "tmpfs", "none", str(mnt_path)] + ) + try: + yield mnt_path + finally: + subprocess.check_call(["umount", mnt_path]) + mnt_path.rmdir() + + +def test_diff_snapshot_works_after_error( + microvm_factory, guest_kernel_linux_5_10, rootfs_ubuntu_22, mount_tmpfs_small +): + """ + Test that if a partial snapshot errors it will work after and not lose data + """ + uvm = microvm_factory.build( + guest_kernel_linux_5_10, + rootfs_ubuntu_22, + jailer_kwargs={"chroot_base": mount_tmpfs_small}, + ) + + vm_mem_size = 128 + uvm.spawn() + uvm.basic_config(mem_size_mib=vm_mem_size, track_dirty_pages=True) + uvm.add_net_iface() + uvm.start() + uvm.wait_for_up() + + chroot = Path(uvm.chroot()) + + # Create a large file dynamically based on available space + fill = chroot / "fill" + disk_usage = psutil.disk_usage(chroot) + target_size = round(disk_usage.free * 0.9) # Attempt to fill 90% of free space + + subprocess.check_call(f"fallocate -l {target_size} {fill}", shell=True) + + try: + uvm.snapshot_diff() + except RuntimeError: + msg = "No space left on device" + uvm.check_log_message(msg) + else: + assert False, "This should fail" + + fill.unlink() + + # Now there is enough space for it to work + snap2 = uvm.snapshot_diff() + + vm2 = microvm_factory.build() + vm2.spawn() + vm2.restore_from_snapshot(snap2, resume=True) + vm2.wait_for_up() + + uvm.kill()