diff --git a/docs/snapshotting/handling-page-faults-on-snapshot-resume.md b/docs/snapshotting/handling-page-faults-on-snapshot-resume.md index ea790624a18..a5da980124e 100644 --- a/docs/snapshotting/handling-page-faults-on-snapshot-resume.md +++ b/docs/snapshotting/handling-page-faults-on-snapshot-resume.md @@ -162,7 +162,7 @@ connect/send data. ### Example An example of a handler process can be found -[here](../../src/firecracker/examples/uffd/valid_handler.rs). The process is +[here](../../src/firecracker/examples/uffd/on_demand_handler.rs). The process is designed to tackle faults on a certain address by loading into memory the entire region that the address belongs to, but users can choose any other behavior that suits their use case best. diff --git a/resources/overlay/usr/local/bin/fast_page_fault_helper.c b/resources/overlay/usr/local/bin/fast_page_fault_helper.c index 591ac3b9612..7558f7b09fc 100644 --- a/resources/overlay/usr/local/bin/fast_page_fault_helper.c +++ b/resources/overlay/usr/local/bin/fast_page_fault_helper.c @@ -10,17 +10,31 @@ // This way, the `memset` will trigger a fast page fault for every page in // the memory region. -#include // perror +#include // perror, fopen, fprintf #include // sigwait and friends #include // memset #include // mmap +#include // clock_gettime +#include // open #define MEM_SIZE_MIB (128 * 1024 * 1024) +#define NANOS_PER_SEC 1000000000 +#define PAGE_SIZE 4096 -int main(int argc, char *const argv[]) { +void touch_memory(void *mem, size_t size, char val) { + void *end = mem + size; + for (; mem < end; mem += PAGE_SIZE) { + *((char *)mem) = val; + } +} + +int main() { sigset_t set; int signal; void *ptr; + struct timespec start, end; + long duration_nanos; + FILE *out_file; sigemptyset(&set); if (sigaddset(&set, SIGUSR1) == -1) { @@ -39,11 +53,27 @@ int main(int argc, char *const argv[]) { return 1; } - memset(ptr, 1, MEM_SIZE_MIB); + touch_memory(ptr, MEM_SIZE_MIB, 1); sigwait(&set, &signal); - memset(ptr, 2, MEM_SIZE_MIB); + clock_gettime(CLOCK_BOOTTIME, &start); + touch_memory(ptr, MEM_SIZE_MIB, 2); + clock_gettime(CLOCK_BOOTTIME, &end); + + duration_nanos = (end.tv_sec - start.tv_sec) * NANOS_PER_SEC + end.tv_nsec - start.tv_nsec; + + out_file = fopen("/tmp/fast_page_fault_helper.out", "w"); + if (out_file == NULL) { + perror("fopen"); + return 1; + } + + fprintf(out_file, "%ld", duration_nanos); + if (fclose(out_file)) { + perror("fclose"); + return 1; + } return 0; } \ No newline at end of file diff --git a/src/firecracker/Cargo.toml b/src/firecracker/Cargo.toml index 29778af061d..40b2795050d 100644 --- a/src/firecracker/Cargo.toml +++ b/src/firecracker/Cargo.toml @@ -57,8 +57,8 @@ name = "uffd_malicious_handler" path = "examples/uffd/malicious_handler.rs" [[example]] -name = "uffd_valid_handler" -path = "examples/uffd/valid_handler.rs" +name = "uffd_on_demand_handler" +path = "examples/uffd/on_demand_handler.rs" [[example]] name = "uffd_fault_all_handler" diff --git a/src/firecracker/examples/uffd/fault_all_handler.rs b/src/firecracker/examples/uffd/fault_all_handler.rs index cfeaa099236..6711350497a 100644 --- a/src/firecracker/examples/uffd/fault_all_handler.rs +++ b/src/firecracker/examples/uffd/fault_all_handler.rs @@ -11,6 +11,7 @@ use std::fs::File; use std::os::unix::net::UnixListener; use uffd_utils::{Runtime, UffdHandler}; +use utils::time::{get_time_us, ClockType}; fn main() { let mut args = std::env::args(); @@ -34,10 +35,13 @@ fn main() { match event { userfaultfd::Event::Pagefault { .. } => { + let start = get_time_us(ClockType::Monotonic); for region in uffd_handler.mem_regions.clone() { - uffd_handler - .serve_pf(region.mapping.base_host_virt_addr as _, region.mapping.size); + uffd_handler.serve_pf(region.base_host_virt_addr as _, region.size); } + let end = get_time_us(ClockType::Monotonic); + + println!("Finished Faulting All: {}us", end - start); } _ => panic!("Unexpected event on userfaultfd"), } diff --git a/src/firecracker/examples/uffd/valid_handler.rs b/src/firecracker/examples/uffd/on_demand_handler.rs similarity index 96% rename from src/firecracker/examples/uffd/valid_handler.rs rename to src/firecracker/examples/uffd/on_demand_handler.rs index 936b9f517a3..3be958b3578 100644 --- a/src/firecracker/examples/uffd/valid_handler.rs +++ b/src/firecracker/examples/uffd/on_demand_handler.rs @@ -10,7 +10,7 @@ mod uffd_utils; use std::fs::File; use std::os::unix::net::UnixListener; -use uffd_utils::{MemPageState, Runtime, UffdHandler}; +use uffd_utils::{Runtime, UffdHandler}; fn main() { let mut args = std::env::args(); @@ -86,8 +86,9 @@ fn main() { deferred_events.push(event); } } - userfaultfd::Event::Remove { start, end } => uffd_handler - .update_mem_state_mappings(start as u64, end as u64, MemPageState::Removed), + userfaultfd::Event::Remove { start, end } => { + uffd_handler.mark_range_removed(start as u64, end as u64) + } _ => panic!("Unexpected event on userfaultfd"), } } diff --git a/src/firecracker/examples/uffd/uffd_utils.rs b/src/firecracker/examples/uffd/uffd_utils.rs index a2f7879f591..dcc05151967 100644 --- a/src/firecracker/examples/uffd/uffd_utils.rs +++ b/src/firecracker/examples/uffd/uffd_utils.rs @@ -4,7 +4,7 @@ // Not everything is used by both binaries #![allow(dead_code)] -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::fs::File; use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd}; use std::os::unix::net::UnixStream; @@ -34,26 +34,20 @@ pub struct GuestRegionUffdMapping { pub page_size_kib: usize, } -#[derive(Debug, Clone, Copy)] -pub enum MemPageState { - Uninitialized, - FromFile, - Removed, - Anonymous, -} - -#[derive(Debug, Clone)] -pub struct MemRegion { - pub mapping: GuestRegionUffdMapping, - page_states: HashMap, +impl GuestRegionUffdMapping { + fn contains(&self, fault_page_addr: u64) -> bool { + fault_page_addr >= self.base_host_virt_addr + && fault_page_addr < self.base_host_virt_addr + self.size as u64 + } } #[derive(Debug)] pub struct UffdHandler { - pub mem_regions: Vec, + pub mem_regions: Vec, pub page_size: usize, backing_buffer: *const u8, uffd: Uffd, + removed_pages: HashSet, } impl UffdHandler { @@ -92,13 +86,12 @@ impl UffdHandler { let uffd = unsafe { Uffd::from_raw_fd(file.into_raw_fd()) }; - let mem_regions = create_mem_regions(&mappings, page_size); - Self { - mem_regions, + mem_regions: mappings, page_size, backing_buffer, uffd, + removed_pages: HashSet::new(), } } @@ -106,13 +99,12 @@ impl UffdHandler { self.uffd.read_event() } - pub fn update_mem_state_mappings(&mut self, start: u64, end: u64, state: MemPageState) { - for region in self.mem_regions.iter_mut() { - for (key, value) in region.page_states.iter_mut() { - if key >= &start && key < &end { - *value = state; - } - } + pub fn mark_range_removed(&mut self, start: u64, end: u64) { + let pfn_start = start / self.page_size as u64; + let pfn_end = end / self.page_size as u64; + + for pfn in pfn_start..pfn_end { + self.removed_pages.insert(pfn); } } @@ -120,33 +112,16 @@ impl UffdHandler { // Find the start of the page that the current faulting address belongs to. let dst = (addr as usize & !(self.page_size - 1)) as *mut libc::c_void; let fault_page_addr = dst as u64; - - // Get the state of the current faulting page. - for region in self.mem_regions.iter() { - match region.page_states.get(&fault_page_addr) { - // Our simple PF handler has a simple strategy: - // There exist 4 states in which a memory page can be in: - // 1. Uninitialized - page was never touched - // 2. FromFile - the page is populated with content from snapshotted memory file - // 3. Removed - MADV_DONTNEED was called due to balloon inflation - // 4. Anonymous - page was zeroed out -> this implies that more than one page fault - // event was received. This can be a consequence of guest reclaiming back its - // memory from the host (through balloon device) - Some(MemPageState::Uninitialized) | Some(MemPageState::FromFile) => { - match self.populate_from_file(region, fault_page_addr, len) { - Some((start, end)) => { - self.update_mem_state_mappings(start, end, MemPageState::FromFile) - } - None => return false, - } - return true; - } - Some(MemPageState::Removed) | Some(MemPageState::Anonymous) => { - let (start, end) = self.zero_out(fault_page_addr); - self.update_mem_state_mappings(start, end, MemPageState::Anonymous); - return true; + let fault_pfn = fault_page_addr / self.page_size as u64; + + if self.removed_pages.contains(&fault_pfn) { + self.zero_out(fault_page_addr); + return true; + } else { + for region in self.mem_regions.iter() { + if region.contains(fault_page_addr) { + return self.populate_from_file(region, fault_page_addr, len); } - None => {} } } @@ -156,13 +131,14 @@ impl UffdHandler { ); } - fn populate_from_file(&self, region: &MemRegion, dst: u64, len: usize) -> Option<(u64, u64)> { - let offset = dst - region.mapping.base_host_virt_addr; - let src = self.backing_buffer as u64 + region.mapping.offset + offset; + fn populate_from_file(&self, region: &GuestRegionUffdMapping, dst: u64, len: usize) -> bool { + let offset = dst - region.base_host_virt_addr; + let src = self.backing_buffer as u64 + region.offset + offset; - let ret = unsafe { + unsafe { match self.uffd.copy(src as *const _, dst as *mut _, len, true) { - Ok(value) => value, + // Make sure the UFFD copied some bytes. + Ok(value) => assert!(value > 0), // Catch EAGAIN errors, which occur when a `remove` event lands in the UFFD // queue while we're processing `pagefault` events. // The weird cast is because the `bytes_copied` field is based on the @@ -172,12 +148,12 @@ impl UffdHandler { Err(Error::PartiallyCopied(bytes_copied)) if bytes_copied == 0 || bytes_copied == (-libc::EAGAIN) as usize => { - return None + return false } Err(Error::CopyFailed(errno)) if std::io::Error::from(errno).raw_os_error().unwrap() == libc::EEXIST => { - len + () } Err(e) => { panic!("Uffd copy failed: {e:?}"); @@ -185,13 +161,10 @@ impl UffdHandler { } }; - // Make sure the UFFD copied some bytes. - assert!(ret > 0); - - Some((dst, dst + len as u64)) + true } - fn zero_out(&mut self, addr: u64) -> (u64, u64) { + fn zero_out(&mut self, addr: u64) { let ret = unsafe { self.uffd .zeropage(addr as *mut _, self.page_size, true) @@ -199,8 +172,6 @@ impl UffdHandler { }; // Make sure the UFFD zeroed out some bytes. assert!(ret > 0); - - (addr, addr + self.page_size as u64) } } @@ -345,28 +316,6 @@ impl Runtime { } } -fn create_mem_regions(mappings: &Vec, page_size: usize) -> Vec { - let mut mem_regions: Vec = Vec::with_capacity(mappings.len()); - - for r in mappings.iter() { - let mapping = r.clone(); - let mut addr = r.base_host_virt_addr; - let end_addr = r.base_host_virt_addr + r.size as u64; - let mut page_states = HashMap::new(); - - while addr < end_addr { - page_states.insert(addr, MemPageState::Uninitialized); - addr += page_size as u64; - } - mem_regions.push(MemRegion { - mapping, - page_states, - }); - } - - mem_regions -} - #[cfg(test)] mod tests { use std::mem::MaybeUninit; diff --git a/tests/conftest.py b/tests/conftest.py index fa309427ef1..fb0b88b677a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -33,6 +33,7 @@ import host_tools.cargo_build as build_tools from framework import defs, utils from framework.artifacts import disks, kernel_params +from framework.defs import DEFAULT_BINARY_DIR from framework.microvm import MicroVMFactory from framework.properties import global_props from framework.utils_cpu_templates import ( @@ -293,14 +294,11 @@ def get(self, _netns_id): def microvm_factory(request, record_property, results_dir, netns_factory): """Fixture to create microvms simply.""" - if binary_dir := request.config.getoption("--binary-dir"): - fc_binary_path = Path(binary_dir) / "firecracker" - jailer_binary_path = Path(binary_dir) / "jailer" - if not fc_binary_path.exists(): - raise RuntimeError("Firecracker binary does not exist") - else: - fc_binary_path, jailer_binary_path = build_tools.get_firecracker_binaries() - record_property("firecracker_bin", str(fc_binary_path)) + binary_dir = request.config.getoption("--binary-dir") or DEFAULT_BINARY_DIR + if isinstance(binary_dir, str): + binary_dir = Path(binary_dir) + + record_property("firecracker_bin", str(binary_dir / "firecracker")) # If `--custom-cpu-template` option is provided, the given CPU template will # be applied afterwards unless overwritten. @@ -316,8 +314,7 @@ def microvm_factory(request, record_property, results_dir, netns_factory): # We could override the chroot base like so # jailer_kwargs={"chroot_base": "/srv/jailo"} uvm_factory = MicroVMFactory( - fc_binary_path, - jailer_binary_path, + binary_dir, netns_factory=netns_factory, custom_cpu_template=custom_cpu_template, ) diff --git a/tests/framework/defs.py b/tests/framework/defs.py index f017dc231ee..38fbe6802df 100644 --- a/tests/framework/defs.py +++ b/tests/framework/defs.py @@ -26,6 +26,13 @@ # Absolute path to the test results folder TEST_RESULTS_DIR = FC_WORKSPACE_DIR / "test_results" +DEFAULT_BINARY_DIR = ( + LOCAL_BUILD_PATH + / "cargo_target" + / f"{platform.machine()}-unknown-linux-musl" + / "release" +) + # The minimum required host kernel version for which io_uring is supported in # Firecracker. MIN_KERNEL_VERSION_FOR_IO_URING = "5.10.51" diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py index f93a0dabf19..7d2e9002822 100644 --- a/tests/framework/microvm.py +++ b/tests/framework/microvm.py @@ -38,6 +38,7 @@ from framework.microvm_helpers import MicrovmHelpers from framework.properties import global_props from framework.utils_drive import VhostUserBlkBackend, VhostUserBlkBackendType +from framework.utils_uffd import spawn_pf_handler, uffd_handler from host_tools.fcmetrics import FCMetricsMonitor from host_tools.memory import MemoryMonitor @@ -201,6 +202,7 @@ def __init__( self.ssh_key = None self.initrd_file = None self.boot_args = None + self.uffd_handler = None self.fc_binary_path = Path(fc_binary_path) assert fc_binary_path.exists() @@ -1073,13 +1075,25 @@ def wait_for_ssh_up(self): class MicroVMFactory: """MicroVM factory""" - def __init__(self, fc_binary_path: Path, jailer_binary_path: Path, **kwargs): + def __init__(self, binary_path: Path, **kwargs): self.vms = [] - self.fc_binary_path = Path(fc_binary_path) - self.jailer_binary_path = Path(jailer_binary_path) + self.binary_path = binary_path self.netns_factory = kwargs.pop("netns_factory", net_tools.NetNs) self.kwargs = kwargs + assert self.fc_binary_path.exists(), "missing firecracker binary" + assert self.jailer_binary_path.exists(), "missing jailer binary" + + @property + def fc_binary_path(self): + """The path to the firecracker binary from which this factory will build VMs""" + return self.binary_path / "firecracker" + + @property + def jailer_binary_path(self): + """The path to the jailer binary using which this factory will build VMs""" + return self.binary_path / "jailer" + def build(self, kernel=None, rootfs=None, **kwargs): """Build a microvm""" kwargs = self.kwargs | kwargs @@ -1115,6 +1129,52 @@ def build_from_snapshot(self, snapshot: Snapshot): vm.restore_from_snapshot(snapshot, resume=True) return vm + def build_n_from_snapshot( + self, + snapshot, + nr_vms, + *, + uffd_handler_name=None, + incremental=False, + use_snapshot_editor=True, + ): + """A generator of `n` microvms restored, either all restored from the same given snapshot + (incremental=False), or created by taking successive snapshots of restored VMs + """ + for _ in range(nr_vms): + microvm = self.build() + microvm.spawn() + + uffd_path = None + if uffd_handler_name is not None: + pf_handler = spawn_pf_handler( + microvm, + uffd_handler(uffd_handler_name, binary_dir=self.binary_path), + snapshot.mem, + ) + uffd_path = pf_handler.socket_path + + snapshot_copy = microvm.restore_from_snapshot( + snapshot, resume=True, uffd_path=uffd_path + ) + + yield microvm + + if incremental: + new_snapshot = microvm.make_snapshot(snapshot.snapshot_type) + + if snapshot.is_diff: + new_snapshot = new_snapshot.rebase_snapshot( + snapshot, use_snapshot_editor + ) + + snapshot = new_snapshot + + microvm.kill() + snapshot_copy.delete() + + snapshot.delete() + def kill(self): """Clean up all built VMs""" for vm in self.vms: diff --git a/tests/framework/utils_uffd.py b/tests/framework/utils_uffd.py index 9de89809a20..a502003ff5f 100644 --- a/tests/framework/utils_uffd.py +++ b/tests/framework/utils_uffd.py @@ -21,7 +21,7 @@ def __init__(self, name, socket_path, mem_file, chroot_path, log_file_name): """Instantiate the handler process with arguments.""" self._proc = None self._handler_name = name - self._socket_path = socket_path + self.socket_path = socket_path self._mem_file = mem_file self._chroot = chroot_path self._log_file = log_file_name @@ -35,7 +35,7 @@ def spawn(self, uid, gid): chroot_log_file = Path("/") / self._log_file with open(chroot_log_file, "w", encoding="utf-8") as logfile: - args = [f"/{self._handler_name}", self._socket_path, self._mem_file] + args = [f"/{self._handler_name}", self.socket_path, self._mem_file] self._proc = subprocess.Popen( args, stdout=logfile, stderr=subprocess.STDOUT ) @@ -48,7 +48,7 @@ def spawn(self, uid, gid): # The page fault handler will create the socket path with root rights. # Change rights to the jailer's. - os.chown(self._socket_path, uid, gid) + os.chown(self.socket_path, uid, gid) @property def proc(self): @@ -89,10 +89,11 @@ def spawn_pf_handler(vm, handler_path, mem_path): handler_name, SOCKET_PATH, jailed_mem, vm.chroot(), "uffd.log" ) uffd_handler.spawn(vm.jailer.uid, vm.jailer.gid) + vm.uffd_handler = uffd_handler return uffd_handler -def uffd_handler(handler_name): +def uffd_handler(handler_name, **kwargs): """Retrieves the uffd handler with the given name""" - return cargo_build.get_example(f"uffd_{handler_name}_handler") + return cargo_build.get_example(f"uffd_{handler_name}_handler", **kwargs) diff --git a/tests/host_tools/cargo_build.py b/tests/host_tools/cargo_build.py index 5a029f8c5f4..56af399dea7 100644 --- a/tests/host_tools/cargo_build.py +++ b/tests/host_tools/cargo_build.py @@ -7,7 +7,7 @@ from pathlib import Path from framework import defs, utils -from framework.defs import FC_WORKSPACE_DIR +from framework.defs import DEFAULT_BINARY_DIR from framework.with_filelock import with_filelock DEFAULT_TARGET = f"{platform.machine()}-unknown-linux-musl" @@ -56,26 +56,14 @@ def cargo_test(path, extra_args=""): cargo("test", extra_args + " --all --no-fail-fast", env=env) -def get_binary(name, *, workspace_dir=FC_WORKSPACE_DIR, example=None): +def get_binary(name, *, binary_dir=DEFAULT_BINARY_DIR, example=None): """Get a binary. The binaries are built before starting a testrun.""" - target_dir = workspace_dir / "build" / "cargo_target" / DEFAULT_TARGET_DIR - bin_path = target_dir / name + bin_path = binary_dir / name if example: - bin_path = target_dir / "examples" / example + bin_path = binary_dir / "examples" / example return bin_path -def get_firecracker_binaries(*, workspace_dir=FC_WORKSPACE_DIR): - """Build the Firecracker and Jailer binaries if they don't exist. - - Returns the location of the firecracker related binaries eventually after - building them in case they do not exist at the specified root_path. - """ - return get_binary("firecracker", workspace_dir=workspace_dir), get_binary( - "jailer", workspace_dir=workspace_dir - ) - - def get_example(name, *args, package="firecracker", **kwargs): """Build an example binary""" return get_binary(package, *args, **kwargs, example=name) diff --git a/tests/integration_tests/functional/test_snapshot_basic.py b/tests/integration_tests/functional/test_snapshot_basic.py index 6fe41f4d5a6..09e14b92448 100644 --- a/tests/integration_tests/functional/test_snapshot_basic.py +++ b/tests/integration_tests/functional/test_snapshot_basic.py @@ -149,12 +149,9 @@ def test_5_snapshots( snapshot = vm.make_snapshot(snapshot_type) vm.kill() - for i in range(seq_len): - logger.info("Load snapshot #%s, mem %s", i, snapshot.mem) - microvm = microvm_factory.build() - microvm.spawn() - copied_snapshot = microvm.restore_from_snapshot(snapshot, resume=True) - + for microvm in microvm_factory.build_n_from_snapshot( + snapshot, seq_len, incremental=True, use_snapshot_editor=use_snapshot_editor + ): # FIXME: This and the sleep below reduce the rate of vsock/ssh connection # related spurious test failures, although we do not know why this is the case. time.sleep(2) @@ -171,21 +168,6 @@ def test_5_snapshots( check_filesystem(microvm.ssh, "squashfs", "/dev/vda") time.sleep(2) - logger.info("Create snapshot %s #%d.", snapshot_type, i + 1) - new_snapshot = microvm.make_snapshot(snapshot_type) - - # If we are testing incremental snapshots we must merge the base with - # current layer. - if snapshot.is_diff: - logger.info("Base: %s, Layer: %s", snapshot.mem, new_snapshot.mem) - new_snapshot = new_snapshot.rebase_snapshot( - snapshot, use_snapshot_editor=use_snapshot_editor - ) - - microvm.kill() - copied_snapshot.delete() - # Update the base for next iteration. - snapshot = new_snapshot def test_patch_drive_snapshot(uvm_nano, microvm_factory): @@ -524,27 +506,13 @@ def test_vmgenid(guest_kernel_linux_6_1, rootfs, microvm_factory, snapshot_type) base_snapshot = snapshot base_vm.kill() - for i in range(5): - vm = microvm_factory.build() - vm.spawn() - copied_snapshot = vm.restore_from_snapshot(snapshot, resume=True) - + for i, vm in enumerate( + microvm_factory.build_n_from_snapshot(base_snapshot, 5, incremental=True) + ): # We should have as DMESG_VMGENID_RESUME messages as # snapshots we have resumed check_vmgenid_update_count(vm, i + 1) - snapshot = vm.make_snapshot(snapshot_type) - vm.kill() - copied_snapshot.delete() - - # If we are testing incremental snapshots we ust merge the base with - # current layer. - if snapshot.is_diff: - snapshot = snapshot.rebase_snapshot(base_snapshot) - - # Update the base for next iteration - base_snapshot = snapshot - # TODO add `global_props.host_os == "amzn2"` condition # once amazon linux kernels have patches. diff --git a/tests/integration_tests/functional/test_uffd.py b/tests/integration_tests/functional/test_uffd.py index cb5ac0c44c9..95819844f05 100644 --- a/tests/integration_tests/functional/test_uffd.py +++ b/tests/integration_tests/functional/test_uffd.py @@ -9,7 +9,7 @@ import requests from framework.utils import Timeout, check_output -from framework.utils_uffd import SOCKET_PATH, spawn_pf_handler, uffd_handler +from framework.utils_uffd import spawn_pf_handler, uffd_handler @pytest.fixture(scope="function", name="snapshot") @@ -92,9 +92,9 @@ def test_valid_handler(uvm_plain, snapshot): vm.spawn() # Spawn page fault handler process. - _pf_handler = spawn_pf_handler(vm, uffd_handler("valid"), snapshot.mem) + pf_handler = spawn_pf_handler(vm, uffd_handler("on_demand"), snapshot.mem) - vm.restore_from_snapshot(snapshot, resume=True, uffd_path=SOCKET_PATH) + vm.restore_from_snapshot(snapshot, resume=True, uffd_path=pf_handler.socket_path) # Inflate balloon. vm.api.balloon.patch(amount_mib=200) @@ -125,13 +125,15 @@ def test_malicious_handler(uvm_plain, snapshot): vm.spawn() # Spawn page fault handler process. - _pf_handler = spawn_pf_handler(vm, uffd_handler("malicious"), snapshot.mem) + pf_handler = spawn_pf_handler(vm, uffd_handler("malicious"), snapshot.mem) # We expect Firecracker to freeze while resuming from a snapshot # due to the malicious handler's unavailability. try: with Timeout(seconds=30): - vm.restore_from_snapshot(snapshot, resume=True, uffd_path=SOCKET_PATH) + vm.restore_from_snapshot( + snapshot, resume=True, uffd_path=pf_handler.socket_path + ) assert False, "Firecracker should freeze" except (TimeoutError, requests.exceptions.ReadTimeout): pass diff --git a/tests/integration_tests/performance/test_huge_pages.py b/tests/integration_tests/performance/test_huge_pages.py index 65ae2e6fbc2..5839245ebd9 100644 --- a/tests/integration_tests/performance/test_huge_pages.py +++ b/tests/integration_tests/performance/test_huge_pages.py @@ -10,7 +10,7 @@ from framework.microvm import HugePagesConfig from framework.properties import global_props from framework.utils_ftrace import ftrace_events -from framework.utils_uffd import SOCKET_PATH, spawn_pf_handler, uffd_handler +from framework.utils_uffd import spawn_pf_handler, uffd_handler def check_hugetlbfs_in_use(pid: int, allocation_name: str): @@ -93,9 +93,9 @@ def test_hugetlbfs_snapshot(microvm_factory, guest_kernel_linux_5_10, rootfs): vm.spawn() # Spawn page fault handler process. - _pf_handler = spawn_pf_handler(vm, uffd_handler("valid"), snapshot.mem) + pf_handler = spawn_pf_handler(vm, uffd_handler("on_demand"), snapshot.mem) - vm.restore_from_snapshot(snapshot, resume=True, uffd_path=SOCKET_PATH) + vm.restore_from_snapshot(snapshot, resume=True, uffd_path=pf_handler.socket_path) check_hugetlbfs_in_use(vm.firecracker_pid, "/anon_hugepage") @@ -135,9 +135,11 @@ def test_hugetlbfs_diff_snapshot(microvm_factory, uvm_plain): vm.spawn() # Spawn page fault handler process. - _pf_handler = spawn_pf_handler(vm, uffd_handler("valid"), snapshot_merged.mem) + pf_handler = spawn_pf_handler(vm, uffd_handler("on_demand"), snapshot_merged.mem) - vm.restore_from_snapshot(snapshot_merged, resume=True, uffd_path=SOCKET_PATH) + vm.restore_from_snapshot( + snapshot_merged, resume=True, uffd_path=pf_handler.socket_path + ) # Verify if the restored microvm works. @@ -193,10 +195,12 @@ def test_ept_violation_count( vm.spawn() # Spawn page fault handler process. - _pf_handler = spawn_pf_handler(vm, uffd_handler("fault_all"), snapshot.mem) + pf_handler = spawn_pf_handler(vm, uffd_handler("fault_all"), snapshot.mem) with ftrace_events("kvm:*"): - vm.restore_from_snapshot(snapshot, resume=True, uffd_path=SOCKET_PATH) + vm.restore_from_snapshot( + snapshot, resume=True, uffd_path=pf_handler.socket_path + ) # Verify if guest can run commands, and also wake up the fast page fault helper to trigger page faults. vm.ssh.check_output(f"kill -s {signal.SIGUSR1} {pid}") diff --git a/tests/integration_tests/performance/test_snapshot_ab.py b/tests/integration_tests/performance/test_snapshot_ab.py index 23224de6b31..655566dcb13 100644 --- a/tests/integration_tests/performance/test_snapshot_ab.py +++ b/tests/integration_tests/performance/test_snapshot_ab.py @@ -1,17 +1,20 @@ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Performance benchmark for snapshot restore.""" +import re +import signal import tempfile +import time from dataclasses import dataclass from functools import lru_cache -from typing import List import pytest import host_tools.drive as drive_tools -from framework.microvm import Microvm +from framework.microvm import HugePagesConfig, Microvm USEC_IN_MSEC = 1000 +NS_IN_MSEC = 1_000_000 ITERATIONS = 30 @@ -34,21 +37,17 @@ class SnapshotRestoreTest: nets: int = 3 blocks: int = 3 all_devices: bool = False + huge_pages: HugePagesConfig = HugePagesConfig.NONE @property def id(self): """Computes a unique id for this test instance""" return "all_dev" if self.all_devices else f"{self.vcpus}vcpu_{self.mem}mb" - def configure_vm( - self, - microvm_factory, - guest_kernel_acpi, - rootfs, - ) -> Microvm: + def boot_vm(self, microvm_factory, guest_kernel, rootfs, metrics) -> Microvm: """Creates the initial snapshot that will be loaded repeatedly to sample latencies""" vm = microvm_factory.build( - guest_kernel_acpi, + guest_kernel, rootfs, monitor_memory=False, ) @@ -58,6 +57,7 @@ def configure_vm( vcpu_count=self.vcpus, mem_size_mib=self.mem, rootfs_io_engine="Sync", + huge_pages=self.huge_pages, ) for _ in range(self.nets): @@ -74,38 +74,19 @@ def configure_vm( ) vm.api.vsock.put(vsock_id="vsock0", guest_cid=3, uds_path="/v.sock") - return vm - - def sample_latency( - self, microvm_factory, snapshot, guest_kernel_linux_5_10 - ) -> List[float]: - """Collects latency samples for the microvm configuration specified by this instance""" - values = [] + metrics.set_dimensions( + { + "net_devices": str(self.nets), + "block_devices": str(self.blocks), + "vsock_devices": str(int(self.all_devices)), + "balloon_devices": str(int(self.all_devices)), + "huge_pages_config": str(self.huge_pages), + **vm.dimensions, + } + ) + vm.start() - for _ in range(ITERATIONS): - microvm = microvm_factory.build( - kernel=guest_kernel_linux_5_10, - monitor_memory=False, - ) - microvm.spawn(emit_metrics=True) - snapshot_copy = microvm.restore_from_snapshot(snapshot, resume=True) - - value = 0 - # Parse all metric data points in search of load_snapshot time. - microvm.flush_metrics() - metrics = microvm.get_all_metrics() - for data_point in metrics: - cur_value = data_point["latencies_us"]["load_snapshot"] - if cur_value > 0: - value = cur_value / USEC_IN_MSEC - break - assert value > 0 - values.append(value) - microvm.kill() - snapshot_copy.delete() - - snapshot.delete() - return values + return vm @pytest.mark.nonci @@ -134,28 +115,109 @@ def test_restore_latency( We only test a single guest kernel, as the guest kernel does not "participate" in snapshot restore. """ - vm = test_setup.configure_vm(microvm_factory, guest_kernel_linux_5_10, rootfs) - vm.start() + vm = test_setup.boot_vm(microvm_factory, guest_kernel_linux_5_10, rootfs, metrics) + + snapshot = vm.snapshot_full() + vm.kill() + + metrics.put_dimensions( + {"performance_test": "test_restore_latency", "uffd_handler": "None"} + ) + + for microvm in microvm_factory.build_n_from_snapshot(snapshot, ITERATIONS): + value = 0 + # Parse all metric data points in search of load_snapshot time. + microvm.flush_metrics() + for data_point in microvm.get_all_metrics(): + cur_value = data_point["latencies_us"]["load_snapshot"] + if cur_value > 0: + value = cur_value / USEC_IN_MSEC + break + assert value > 0 + metrics.put_metric("latency", value, "Milliseconds") + + +# When using the fault-all handler, all guest memory will be faulted in way before the helper tool +# wakes up, because it gets faulted in on the first page fault. In this scenario, we are not measuring UFFD +# latencies, but KVM latencies of setting up missing EPT entries. +@pytest.mark.nonci +@pytest.mark.parametrize("uffd_handler", [None, "on_demand", "fault_all"]) +@pytest.mark.parametrize("huge_pages", HugePagesConfig) +def test_post_restore_latency( + microvm_factory, rootfs, guest_kernel_linux_5_10, metrics, uffd_handler, huge_pages +): + """Collects latency metric of post-restore memory accesses done inside the guest""" + if huge_pages != HugePagesConfig.NONE and uffd_handler is None: + pytest.skip("huge page snapshots can only be restored using uffd") + + test_setup = SnapshotRestoreTest(mem=1024, vcpus=2, huge_pages=huge_pages) + vm = test_setup.boot_vm(microvm_factory, guest_kernel_linux_5_10, rootfs, metrics) + + vm.ssh.check_output( + "nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1