From 0b9ff9909e0b6738f656ef7c2ce922d252b02ac6 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Mon, 3 Mar 2025 12:05:36 +0000 Subject: [PATCH 01/23] refactor(test): stop hardcoding uffd socket path everywhere Instead use the property of the `UffdHandler` object that `spawn_uffd_handler` returns. Signed-off-by: Patrick Roy --- tests/framework/utils_uffd.py | 6 +++--- .../integration_tests/functional/test_uffd.py | 12 +++++++----- .../performance/test_huge_pages.py | 18 +++++++++++------- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/tests/framework/utils_uffd.py b/tests/framework/utils_uffd.py index 9de89809a20..690eb423573 100644 --- a/tests/framework/utils_uffd.py +++ b/tests/framework/utils_uffd.py @@ -21,7 +21,7 @@ def __init__(self, name, socket_path, mem_file, chroot_path, log_file_name): """Instantiate the handler process with arguments.""" self._proc = None self._handler_name = name - self._socket_path = socket_path + self.socket_path = socket_path self._mem_file = mem_file self._chroot = chroot_path self._log_file = log_file_name @@ -35,7 +35,7 @@ def spawn(self, uid, gid): chroot_log_file = Path("/") / self._log_file with open(chroot_log_file, "w", encoding="utf-8") as logfile: - args = [f"/{self._handler_name}", self._socket_path, self._mem_file] + args = [f"/{self._handler_name}", self.socket_path, self._mem_file] self._proc = subprocess.Popen( args, stdout=logfile, stderr=subprocess.STDOUT ) @@ -48,7 +48,7 @@ def spawn(self, uid, gid): # The page fault handler will create the socket path with root rights. # Change rights to the jailer's. - os.chown(self._socket_path, uid, gid) + os.chown(self.socket_path, uid, gid) @property def proc(self): diff --git a/tests/integration_tests/functional/test_uffd.py b/tests/integration_tests/functional/test_uffd.py index cb5ac0c44c9..35b03d7aa34 100644 --- a/tests/integration_tests/functional/test_uffd.py +++ b/tests/integration_tests/functional/test_uffd.py @@ -9,7 +9,7 @@ import requests from framework.utils import Timeout, check_output -from framework.utils_uffd import SOCKET_PATH, spawn_pf_handler, uffd_handler +from framework.utils_uffd import spawn_pf_handler, uffd_handler @pytest.fixture(scope="function", name="snapshot") @@ -92,9 +92,9 @@ def test_valid_handler(uvm_plain, snapshot): vm.spawn() # Spawn page fault handler process. - _pf_handler = spawn_pf_handler(vm, uffd_handler("valid"), snapshot.mem) + pf_handler = spawn_pf_handler(vm, uffd_handler("valid"), snapshot.mem) - vm.restore_from_snapshot(snapshot, resume=True, uffd_path=SOCKET_PATH) + vm.restore_from_snapshot(snapshot, resume=True, uffd_path=pf_handler.socket_path) # Inflate balloon. vm.api.balloon.patch(amount_mib=200) @@ -125,13 +125,15 @@ def test_malicious_handler(uvm_plain, snapshot): vm.spawn() # Spawn page fault handler process. - _pf_handler = spawn_pf_handler(vm, uffd_handler("malicious"), snapshot.mem) + pf_handler = spawn_pf_handler(vm, uffd_handler("malicious"), snapshot.mem) # We expect Firecracker to freeze while resuming from a snapshot # due to the malicious handler's unavailability. try: with Timeout(seconds=30): - vm.restore_from_snapshot(snapshot, resume=True, uffd_path=SOCKET_PATH) + vm.restore_from_snapshot( + snapshot, resume=True, uffd_path=pf_handler.socket_path + ) assert False, "Firecracker should freeze" except (TimeoutError, requests.exceptions.ReadTimeout): pass diff --git a/tests/integration_tests/performance/test_huge_pages.py b/tests/integration_tests/performance/test_huge_pages.py index 65ae2e6fbc2..acfee58139d 100644 --- a/tests/integration_tests/performance/test_huge_pages.py +++ b/tests/integration_tests/performance/test_huge_pages.py @@ -10,7 +10,7 @@ from framework.microvm import HugePagesConfig from framework.properties import global_props from framework.utils_ftrace import ftrace_events -from framework.utils_uffd import SOCKET_PATH, spawn_pf_handler, uffd_handler +from framework.utils_uffd import spawn_pf_handler, uffd_handler def check_hugetlbfs_in_use(pid: int, allocation_name: str): @@ -93,9 +93,9 @@ def test_hugetlbfs_snapshot(microvm_factory, guest_kernel_linux_5_10, rootfs): vm.spawn() # Spawn page fault handler process. - _pf_handler = spawn_pf_handler(vm, uffd_handler("valid"), snapshot.mem) + pf_handler = spawn_pf_handler(vm, uffd_handler("valid"), snapshot.mem) - vm.restore_from_snapshot(snapshot, resume=True, uffd_path=SOCKET_PATH) + vm.restore_from_snapshot(snapshot, resume=True, uffd_path=pf_handler.socket_path) check_hugetlbfs_in_use(vm.firecracker_pid, "/anon_hugepage") @@ -135,9 +135,11 @@ def test_hugetlbfs_diff_snapshot(microvm_factory, uvm_plain): vm.spawn() # Spawn page fault handler process. - _pf_handler = spawn_pf_handler(vm, uffd_handler("valid"), snapshot_merged.mem) + pf_handler = spawn_pf_handler(vm, uffd_handler("valid"), snapshot_merged.mem) - vm.restore_from_snapshot(snapshot_merged, resume=True, uffd_path=SOCKET_PATH) + vm.restore_from_snapshot( + snapshot_merged, resume=True, uffd_path=pf_handler.socket_path + ) # Verify if the restored microvm works. @@ -193,10 +195,12 @@ def test_ept_violation_count( vm.spawn() # Spawn page fault handler process. - _pf_handler = spawn_pf_handler(vm, uffd_handler("fault_all"), snapshot.mem) + pf_handler = spawn_pf_handler(vm, uffd_handler("fault_all"), snapshot.mem) with ftrace_events("kvm:*"): - vm.restore_from_snapshot(snapshot, resume=True, uffd_path=SOCKET_PATH) + vm.restore_from_snapshot( + snapshot, resume=True, uffd_path=pf_handler.socket_path + ) # Verify if guest can run commands, and also wake up the fast page fault helper to trigger page faults. vm.ssh.check_output(f"kill -s {signal.SIGUSR1} {pid}") From f0207f4ff3d1b50195bea40d441857ccc2ca54d8 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Thu, 27 Feb 2025 11:42:47 +0000 Subject: [PATCH 02/23] test: have fast_page_fault_helper print time memset took The memset causes page faults in the guest which will be handled by UFFD. By timing the memset from inside the guest, we can measure the latency of these page faults as observed by an application running inside the guest. Signed-off-by: Patrick Roy --- .../usr/local/bin/fast_page_fault_helper.c | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/resources/overlay/usr/local/bin/fast_page_fault_helper.c b/resources/overlay/usr/local/bin/fast_page_fault_helper.c index 591ac3b9612..96e565f0c3f 100644 --- a/resources/overlay/usr/local/bin/fast_page_fault_helper.c +++ b/resources/overlay/usr/local/bin/fast_page_fault_helper.c @@ -10,17 +10,23 @@ // This way, the `memset` will trigger a fast page fault for every page in // the memory region. -#include // perror +#include // perror, fopen, fprintf #include // sigwait and friends #include // memset #include // mmap +#include // clock_gettime +#include // open #define MEM_SIZE_MIB (128 * 1024 * 1024) +#define NANOS_PER_SEC 1000000000 -int main(int argc, char *const argv[]) { +int main() { sigset_t set; int signal; void *ptr; + struct timespec start, end; + long duration_nanos; + FILE *out_file; sigemptyset(&set); if (sigaddset(&set, SIGUSR1) == -1) { @@ -43,7 +49,23 @@ int main(int argc, char *const argv[]) { sigwait(&set, &signal); + clock_gettime(CLOCK_BOOTTIME, &start); memset(ptr, 2, MEM_SIZE_MIB); + clock_gettime(CLOCK_BOOTTIME, &end); + + duration_nanos = (end.tv_sec - start.tv_sec) * NANOS_PER_SEC + end.tv_nsec - start.tv_nsec; + + out_file = fopen("/tmp/fast_page_fault_helper.out", "w"); + if (out_file == NULL) { + perror("fopen"); + return 1; + } + + fprintf(out_file, "%ld", duration_nanos); + if (fclose(out_file)) { + perror("fclose"); + return 1; + } return 0; } \ No newline at end of file From b04661cc68cc9233ed146a473a3edb5bc2f2c3b2 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Mon, 3 Mar 2025 12:48:54 +0000 Subject: [PATCH 03/23] refactor(uffd): Make example UFFD fast in absense of balloon device Make the example UFFD handler use a HashSet to only store address ranges that were actually removed due to expanding memory balloon. This eliminates the HashMap that previously held entries for every single page in guest memory, which was causing the example uffd handler to slow to a crawl (taking about 40s to fault in 128MB of memory). This is possible because the UFFD handler doesn't actually care about the 4 states a page can supposedly be in, it only needs to know whether a page needs to be served from the memory snapshot file, or zeroed out. Since zero-ing out is the rarer of the two (only happens if a balloon device caused some previously faulted in memory to be madvise'd away), we only store information about pages that we got remove messages about, and simply assume every other page needs to be served from the snapshot file. This means that in the happy path of "no balloon device", we never need to store any information about the memory at all. This one simple optimization brings the fault time down to 500ms (from 40s) for the 128MB scenario, which makes the example handler usable for performance tests for page fault latencies. Signed-off-by: Patrick Roy --- .../examples/uffd/fault_all_handler.rs | 3 +- src/firecracker/examples/uffd/uffd_utils.rs | 121 +++++------------- .../examples/uffd/valid_handler.rs | 7 +- 3 files changed, 40 insertions(+), 91 deletions(-) diff --git a/src/firecracker/examples/uffd/fault_all_handler.rs b/src/firecracker/examples/uffd/fault_all_handler.rs index cfeaa099236..d0f127a8ce1 100644 --- a/src/firecracker/examples/uffd/fault_all_handler.rs +++ b/src/firecracker/examples/uffd/fault_all_handler.rs @@ -35,8 +35,7 @@ fn main() { match event { userfaultfd::Event::Pagefault { .. } => { for region in uffd_handler.mem_regions.clone() { - uffd_handler - .serve_pf(region.mapping.base_host_virt_addr as _, region.mapping.size); + uffd_handler.serve_pf(region.base_host_virt_addr as _, region.size); } } _ => panic!("Unexpected event on userfaultfd"), diff --git a/src/firecracker/examples/uffd/uffd_utils.rs b/src/firecracker/examples/uffd/uffd_utils.rs index a2f7879f591..dcc05151967 100644 --- a/src/firecracker/examples/uffd/uffd_utils.rs +++ b/src/firecracker/examples/uffd/uffd_utils.rs @@ -4,7 +4,7 @@ // Not everything is used by both binaries #![allow(dead_code)] -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::fs::File; use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd}; use std::os::unix::net::UnixStream; @@ -34,26 +34,20 @@ pub struct GuestRegionUffdMapping { pub page_size_kib: usize, } -#[derive(Debug, Clone, Copy)] -pub enum MemPageState { - Uninitialized, - FromFile, - Removed, - Anonymous, -} - -#[derive(Debug, Clone)] -pub struct MemRegion { - pub mapping: GuestRegionUffdMapping, - page_states: HashMap, +impl GuestRegionUffdMapping { + fn contains(&self, fault_page_addr: u64) -> bool { + fault_page_addr >= self.base_host_virt_addr + && fault_page_addr < self.base_host_virt_addr + self.size as u64 + } } #[derive(Debug)] pub struct UffdHandler { - pub mem_regions: Vec, + pub mem_regions: Vec, pub page_size: usize, backing_buffer: *const u8, uffd: Uffd, + removed_pages: HashSet, } impl UffdHandler { @@ -92,13 +86,12 @@ impl UffdHandler { let uffd = unsafe { Uffd::from_raw_fd(file.into_raw_fd()) }; - let mem_regions = create_mem_regions(&mappings, page_size); - Self { - mem_regions, + mem_regions: mappings, page_size, backing_buffer, uffd, + removed_pages: HashSet::new(), } } @@ -106,13 +99,12 @@ impl UffdHandler { self.uffd.read_event() } - pub fn update_mem_state_mappings(&mut self, start: u64, end: u64, state: MemPageState) { - for region in self.mem_regions.iter_mut() { - for (key, value) in region.page_states.iter_mut() { - if key >= &start && key < &end { - *value = state; - } - } + pub fn mark_range_removed(&mut self, start: u64, end: u64) { + let pfn_start = start / self.page_size as u64; + let pfn_end = end / self.page_size as u64; + + for pfn in pfn_start..pfn_end { + self.removed_pages.insert(pfn); } } @@ -120,33 +112,16 @@ impl UffdHandler { // Find the start of the page that the current faulting address belongs to. let dst = (addr as usize & !(self.page_size - 1)) as *mut libc::c_void; let fault_page_addr = dst as u64; - - // Get the state of the current faulting page. - for region in self.mem_regions.iter() { - match region.page_states.get(&fault_page_addr) { - // Our simple PF handler has a simple strategy: - // There exist 4 states in which a memory page can be in: - // 1. Uninitialized - page was never touched - // 2. FromFile - the page is populated with content from snapshotted memory file - // 3. Removed - MADV_DONTNEED was called due to balloon inflation - // 4. Anonymous - page was zeroed out -> this implies that more than one page fault - // event was received. This can be a consequence of guest reclaiming back its - // memory from the host (through balloon device) - Some(MemPageState::Uninitialized) | Some(MemPageState::FromFile) => { - match self.populate_from_file(region, fault_page_addr, len) { - Some((start, end)) => { - self.update_mem_state_mappings(start, end, MemPageState::FromFile) - } - None => return false, - } - return true; - } - Some(MemPageState::Removed) | Some(MemPageState::Anonymous) => { - let (start, end) = self.zero_out(fault_page_addr); - self.update_mem_state_mappings(start, end, MemPageState::Anonymous); - return true; + let fault_pfn = fault_page_addr / self.page_size as u64; + + if self.removed_pages.contains(&fault_pfn) { + self.zero_out(fault_page_addr); + return true; + } else { + for region in self.mem_regions.iter() { + if region.contains(fault_page_addr) { + return self.populate_from_file(region, fault_page_addr, len); } - None => {} } } @@ -156,13 +131,14 @@ impl UffdHandler { ); } - fn populate_from_file(&self, region: &MemRegion, dst: u64, len: usize) -> Option<(u64, u64)> { - let offset = dst - region.mapping.base_host_virt_addr; - let src = self.backing_buffer as u64 + region.mapping.offset + offset; + fn populate_from_file(&self, region: &GuestRegionUffdMapping, dst: u64, len: usize) -> bool { + let offset = dst - region.base_host_virt_addr; + let src = self.backing_buffer as u64 + region.offset + offset; - let ret = unsafe { + unsafe { match self.uffd.copy(src as *const _, dst as *mut _, len, true) { - Ok(value) => value, + // Make sure the UFFD copied some bytes. + Ok(value) => assert!(value > 0), // Catch EAGAIN errors, which occur when a `remove` event lands in the UFFD // queue while we're processing `pagefault` events. // The weird cast is because the `bytes_copied` field is based on the @@ -172,12 +148,12 @@ impl UffdHandler { Err(Error::PartiallyCopied(bytes_copied)) if bytes_copied == 0 || bytes_copied == (-libc::EAGAIN) as usize => { - return None + return false } Err(Error::CopyFailed(errno)) if std::io::Error::from(errno).raw_os_error().unwrap() == libc::EEXIST => { - len + () } Err(e) => { panic!("Uffd copy failed: {e:?}"); @@ -185,13 +161,10 @@ impl UffdHandler { } }; - // Make sure the UFFD copied some bytes. - assert!(ret > 0); - - Some((dst, dst + len as u64)) + true } - fn zero_out(&mut self, addr: u64) -> (u64, u64) { + fn zero_out(&mut self, addr: u64) { let ret = unsafe { self.uffd .zeropage(addr as *mut _, self.page_size, true) @@ -199,8 +172,6 @@ impl UffdHandler { }; // Make sure the UFFD zeroed out some bytes. assert!(ret > 0); - - (addr, addr + self.page_size as u64) } } @@ -345,28 +316,6 @@ impl Runtime { } } -fn create_mem_regions(mappings: &Vec, page_size: usize) -> Vec { - let mut mem_regions: Vec = Vec::with_capacity(mappings.len()); - - for r in mappings.iter() { - let mapping = r.clone(); - let mut addr = r.base_host_virt_addr; - let end_addr = r.base_host_virt_addr + r.size as u64; - let mut page_states = HashMap::new(); - - while addr < end_addr { - page_states.insert(addr, MemPageState::Uninitialized); - addr += page_size as u64; - } - mem_regions.push(MemRegion { - mapping, - page_states, - }); - } - - mem_regions -} - #[cfg(test)] mod tests { use std::mem::MaybeUninit; diff --git a/src/firecracker/examples/uffd/valid_handler.rs b/src/firecracker/examples/uffd/valid_handler.rs index 936b9f517a3..3be958b3578 100644 --- a/src/firecracker/examples/uffd/valid_handler.rs +++ b/src/firecracker/examples/uffd/valid_handler.rs @@ -10,7 +10,7 @@ mod uffd_utils; use std::fs::File; use std::os::unix::net::UnixListener; -use uffd_utils::{MemPageState, Runtime, UffdHandler}; +use uffd_utils::{Runtime, UffdHandler}; fn main() { let mut args = std::env::args(); @@ -86,8 +86,9 @@ fn main() { deferred_events.push(event); } } - userfaultfd::Event::Remove { start, end } => uffd_handler - .update_mem_state_mappings(start as u64, end as u64, MemPageState::Removed), + userfaultfd::Event::Remove { start, end } => { + uffd_handler.mark_range_removed(start as u64, end as u64) + } _ => panic!("Unexpected event on userfaultfd"), } } From 6c914809ea323c388dbe403e8790cff7977b4bde Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Tue, 4 Mar 2025 12:44:36 +0000 Subject: [PATCH 04/23] refactor(test): Store binary_dir inside microvm Just store the binary dir inside the microvm class instead of separate paths to firecracker and jailer binaries. This centralizes the existance assertions, but means we cant theoretically use jailer and firecracker binaries from different locations anymore (however, nothing in the test framework was doing this, as MicroVMFactory already did not support this and hardcoded the binary names relative to the --binary-dir option). Signed-off-by: Patrick Roy --- tests/conftest.py | 17 +++++++---------- tests/framework/defs.py | 7 +++++++ tests/framework/microvm.py | 18 +++++++++++++++--- tests/host_tools/cargo_build.py | 11 ----------- .../security/test_vulnerabilities.py | 6 ++---- tools/sandbox.py | 9 ++++----- .../test-docker-rootfs.py | 4 ++-- 7 files changed, 37 insertions(+), 35 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index fa309427ef1..fb0b88b677a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -33,6 +33,7 @@ import host_tools.cargo_build as build_tools from framework import defs, utils from framework.artifacts import disks, kernel_params +from framework.defs import DEFAULT_BINARY_DIR from framework.microvm import MicroVMFactory from framework.properties import global_props from framework.utils_cpu_templates import ( @@ -293,14 +294,11 @@ def get(self, _netns_id): def microvm_factory(request, record_property, results_dir, netns_factory): """Fixture to create microvms simply.""" - if binary_dir := request.config.getoption("--binary-dir"): - fc_binary_path = Path(binary_dir) / "firecracker" - jailer_binary_path = Path(binary_dir) / "jailer" - if not fc_binary_path.exists(): - raise RuntimeError("Firecracker binary does not exist") - else: - fc_binary_path, jailer_binary_path = build_tools.get_firecracker_binaries() - record_property("firecracker_bin", str(fc_binary_path)) + binary_dir = request.config.getoption("--binary-dir") or DEFAULT_BINARY_DIR + if isinstance(binary_dir, str): + binary_dir = Path(binary_dir) + + record_property("firecracker_bin", str(binary_dir / "firecracker")) # If `--custom-cpu-template` option is provided, the given CPU template will # be applied afterwards unless overwritten. @@ -316,8 +314,7 @@ def microvm_factory(request, record_property, results_dir, netns_factory): # We could override the chroot base like so # jailer_kwargs={"chroot_base": "/srv/jailo"} uvm_factory = MicroVMFactory( - fc_binary_path, - jailer_binary_path, + binary_dir, netns_factory=netns_factory, custom_cpu_template=custom_cpu_template, ) diff --git a/tests/framework/defs.py b/tests/framework/defs.py index f017dc231ee..38fbe6802df 100644 --- a/tests/framework/defs.py +++ b/tests/framework/defs.py @@ -26,6 +26,13 @@ # Absolute path to the test results folder TEST_RESULTS_DIR = FC_WORKSPACE_DIR / "test_results" +DEFAULT_BINARY_DIR = ( + LOCAL_BUILD_PATH + / "cargo_target" + / f"{platform.machine()}-unknown-linux-musl" + / "release" +) + # The minimum required host kernel version for which io_uring is supported in # Firecracker. MIN_KERNEL_VERSION_FOR_IO_URING = "5.10.51" diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py index f93a0dabf19..85ed588b809 100644 --- a/tests/framework/microvm.py +++ b/tests/framework/microvm.py @@ -1073,13 +1073,25 @@ def wait_for_ssh_up(self): class MicroVMFactory: """MicroVM factory""" - def __init__(self, fc_binary_path: Path, jailer_binary_path: Path, **kwargs): + def __init__(self, binary_path: Path, **kwargs): self.vms = [] - self.fc_binary_path = Path(fc_binary_path) - self.jailer_binary_path = Path(jailer_binary_path) + self.binary_path = binary_path self.netns_factory = kwargs.pop("netns_factory", net_tools.NetNs) self.kwargs = kwargs + assert self.fc_binary_path.exists(), "missing firecracker binary" + assert self.jailer_binary_path.exists(), "missing jailer binary" + + @property + def fc_binary_path(self): + """The path to the firecracker binary from which this factory will build VMs""" + return self.binary_path / "firecracker" + + @property + def jailer_binary_path(self): + """The path to the jailer binary using which this factory will build VMs""" + return self.binary_path / "jailer" + def build(self, kernel=None, rootfs=None, **kwargs): """Build a microvm""" kwargs = self.kwargs | kwargs diff --git a/tests/host_tools/cargo_build.py b/tests/host_tools/cargo_build.py index 5a029f8c5f4..54302901211 100644 --- a/tests/host_tools/cargo_build.py +++ b/tests/host_tools/cargo_build.py @@ -65,17 +65,6 @@ def get_binary(name, *, workspace_dir=FC_WORKSPACE_DIR, example=None): return bin_path -def get_firecracker_binaries(*, workspace_dir=FC_WORKSPACE_DIR): - """Build the Firecracker and Jailer binaries if they don't exist. - - Returns the location of the firecracker related binaries eventually after - building them in case they do not exist at the specified root_path. - """ - return get_binary("firecracker", workspace_dir=workspace_dir), get_binary( - "jailer", workspace_dir=workspace_dir - ) - - def get_example(name, *args, package="firecracker", **kwargs): """Build an example binary""" return get_binary(package, *args, **kwargs, example=name) diff --git a/tests/integration_tests/security/test_vulnerabilities.py b/tests/integration_tests/security/test_vulnerabilities.py index a4461aded78..14307e56888 100644 --- a/tests/integration_tests/security/test_vulnerabilities.py +++ b/tests/integration_tests/security/test_vulnerabilities.py @@ -206,10 +206,8 @@ def microvm_factory_a(record_property): """MicroVMFactory using revision A binaries""" revision_a = global_props.buildkite_revision_a bin_dir = git_clone(Path("../build") / revision_a, revision_a).resolve() - fc_bin = bin_dir / "firecracker" - jailer_bin = bin_dir / "jailer" - record_property("firecracker_bin", str(fc_bin)) - uvm_factory = MicroVMFactory(fc_bin, jailer_bin) + record_property("firecracker_bin", str(bin_dir / "firecracker")) + uvm_factory = MicroVMFactory(bin_dir) yield uvm_factory uvm_factory.kill() diff --git a/tools/sandbox.py b/tools/sandbox.py index 3cdcec345a7..8cdf3277352 100755 --- a/tools/sandbox.py +++ b/tools/sandbox.py @@ -14,8 +14,8 @@ from pathlib import Path from framework.artifacts import disks, kernels +from framework.defs import DEFAULT_BINARY_DIR from framework.microvm import MicroVMFactory -from host_tools.cargo_build import get_firecracker_binaries kernels = list(kernels("vmlinux-*")) rootfs = list(disks("ubuntu*ext4")) @@ -61,17 +61,16 @@ def parse_byte_size(param): args = parser.parse_args() print(args) -bins = None +binary_dir = None if args.binary_dir: binary_dir = Path(args.binary_dir).resolve() - bins = binary_dir / "firecracker", binary_dir / "jailer" else: - bins = get_firecracker_binaries() + binary_dir = DEFAULT_BINARY_DIR cpu_template = None if args.cpu_template_path is not None: cpu_template = json.loads(args.cpu_template_path.read_text()) -vmfcty = MicroVMFactory(*bins) +vmfcty = MicroVMFactory(binary_dir) print(f"uvm with kernel {args.kernel} ...") uvm = vmfcty.build(args.kernel, args.rootfs) diff --git a/tools/test-popular-containers/test-docker-rootfs.py b/tools/test-popular-containers/test-docker-rootfs.py index aac24ac2ecc..1434e40173c 100755 --- a/tools/test-popular-containers/test-docker-rootfs.py +++ b/tools/test-popular-containers/test-docker-rootfs.py @@ -17,8 +17,8 @@ # pylint: disable=wrong-import-position from framework.artifacts import kernels +from framework.defs import DEFAULT_BINARY_DIR from framework.microvm import MicroVMFactory -from host_tools.cargo_build import get_firecracker_binaries # pylint: enable=wrong-import-position @@ -26,7 +26,7 @@ # Use the latest guest kernel kernel = kernels[-1] -vmfcty = MicroVMFactory(*get_firecracker_binaries()) +vmfcty = MicroVMFactory(DEFAULT_BINARY_DIR) # (may take a while to compile Firecracker...) for rootfs in Path(".").glob("*.ext4"): From 0a2b6d2397e80a84fe2ded2e60ffc3d13174d63d Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Tue, 4 Mar 2025 12:50:02 +0000 Subject: [PATCH 05/23] test: replace workspace_dir arg with binary_dir in get_binary Instead of get_binary assuming it always operates on a cargo build directory, have it directly operate on a "binary directory" in the sense of the --binary-dir parameter. Nothing was using the workspace_dir arg, so no functional changes. Signed-off-by: Patrick Roy --- tests/host_tools/cargo_build.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/host_tools/cargo_build.py b/tests/host_tools/cargo_build.py index 54302901211..56af399dea7 100644 --- a/tests/host_tools/cargo_build.py +++ b/tests/host_tools/cargo_build.py @@ -7,7 +7,7 @@ from pathlib import Path from framework import defs, utils -from framework.defs import FC_WORKSPACE_DIR +from framework.defs import DEFAULT_BINARY_DIR from framework.with_filelock import with_filelock DEFAULT_TARGET = f"{platform.machine()}-unknown-linux-musl" @@ -56,12 +56,11 @@ def cargo_test(path, extra_args=""): cargo("test", extra_args + " --all --no-fail-fast", env=env) -def get_binary(name, *, workspace_dir=FC_WORKSPACE_DIR, example=None): +def get_binary(name, *, binary_dir=DEFAULT_BINARY_DIR, example=None): """Get a binary. The binaries are built before starting a testrun.""" - target_dir = workspace_dir / "build" / "cargo_target" / DEFAULT_TARGET_DIR - bin_path = target_dir / name + bin_path = binary_dir / name if example: - bin_path = target_dir / "examples" / example + bin_path = binary_dir / "examples" / example return bin_path From 73683cb7d0b49cd7dc9950a8c2be8b5b0aec83d7 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Tue, 4 Mar 2025 12:54:51 +0000 Subject: [PATCH 06/23] test: pass through kwargs from uffd_handler() to get_example() To allow specifying a different directory in which to look for the uffd example binaries, so that they can be used in A/B-tests. Signed-off-by: Patrick Roy --- tests/framework/utils_uffd.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/framework/utils_uffd.py b/tests/framework/utils_uffd.py index 690eb423573..7715a5c2157 100644 --- a/tests/framework/utils_uffd.py +++ b/tests/framework/utils_uffd.py @@ -93,6 +93,6 @@ def spawn_pf_handler(vm, handler_path, mem_path): return uffd_handler -def uffd_handler(handler_name): +def uffd_handler(handler_name, **kwargs): """Retrieves the uffd handler with the given name""" - return cargo_build.get_example(f"uffd_{handler_name}_handler") + return cargo_build.get_example(f"uffd_{handler_name}_handler", **kwargs) From 295bb78c9dca4935f62f17b146e138bdc44283f6 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Tue, 4 Mar 2025 12:52:28 +0000 Subject: [PATCH 07/23] devtool: also copy example binaries when doing build --rev Also copy the example binaries into the target directory when building some arbitrary revision (for example for A/B-tests). Signed-off-by: Patrick Roy --- tools/devtool | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/devtool b/tools/devtool index 7bf8d4ad6fb..5f495b838c1 100755 --- a/tools/devtool +++ b/tools/devtool @@ -517,8 +517,9 @@ cmd_build() { if [ ! -z "$revision" ]; then popd git branch -D $branch_name - mkdir -p build/"$revision" + mkdir -p build/"$revision"/examples cp $tmp_dir/build/cargo_target/$(uname -m)-unknown-linux-$libc/$profile/* build/"$revision" + cp $tmp_dir/build/cargo_target/$(uname -m)-unknown-linux-$libc/$profile/examples/* build/"$revision"/examples cmd_sh "rm -rf $tmp_dir" fi From a2ce40cffcec4f196cbe28c798b2ca73bf34d173 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Mon, 3 Mar 2025 15:55:44 +0000 Subject: [PATCH 08/23] fix(test): stop passing guest kernel when building vms from snapshot When building a microvm from a snapshot, there is no need to specify a guest kernel. Remove the parameter from SnapshotRestoreTest.sample_latency. Signed-off-by: Patrick Roy --- tests/integration_tests/performance/test_snapshot_ab.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/integration_tests/performance/test_snapshot_ab.py b/tests/integration_tests/performance/test_snapshot_ab.py index 23224de6b31..c718b43f920 100644 --- a/tests/integration_tests/performance/test_snapshot_ab.py +++ b/tests/integration_tests/performance/test_snapshot_ab.py @@ -76,15 +76,12 @@ def configure_vm( return vm - def sample_latency( - self, microvm_factory, snapshot, guest_kernel_linux_5_10 - ) -> List[float]: + def sample_latency(self, microvm_factory, snapshot) -> List[float]: """Collects latency samples for the microvm configuration specified by this instance""" values = [] for _ in range(ITERATIONS): microvm = microvm_factory.build( - kernel=guest_kernel_linux_5_10, monitor_memory=False, ) microvm.spawn(emit_metrics=True) @@ -154,7 +151,6 @@ def test_restore_latency( samples = test_setup.sample_latency( microvm_factory, snapshot, - guest_kernel_linux_5_10, ) for sample in samples: From e926f08ff974431413e70b154ddc94097245fce5 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Mon, 3 Mar 2025 16:39:29 +0000 Subject: [PATCH 09/23] test_snapshot_ab: rename guest_kernel_acpi to guest_kernel This was the result of a find and replace gone awry it seems. Signed-off-by: Patrick Roy --- tests/integration_tests/performance/test_snapshot_ab.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration_tests/performance/test_snapshot_ab.py b/tests/integration_tests/performance/test_snapshot_ab.py index c718b43f920..4b09db126e4 100644 --- a/tests/integration_tests/performance/test_snapshot_ab.py +++ b/tests/integration_tests/performance/test_snapshot_ab.py @@ -43,12 +43,12 @@ def id(self): def configure_vm( self, microvm_factory, - guest_kernel_acpi, + guest_kernel, rootfs, ) -> Microvm: """Creates the initial snapshot that will be loaded repeatedly to sample latencies""" vm = microvm_factory.build( - guest_kernel_acpi, + guest_kernel, rootfs, monitor_memory=False, ) From 5ef13faa21ca801e6e0b46d2b013720818b14a22 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Tue, 4 Mar 2025 09:02:03 +0000 Subject: [PATCH 10/23] test: store uffd handler in microvm attribute So that we can later refer to it in tests, for example to parse its logs. Signed-off-by: Patrick Roy --- tests/framework/microvm.py | 1 + tests/framework/utils_uffd.py | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py index 85ed588b809..8d87d137018 100644 --- a/tests/framework/microvm.py +++ b/tests/framework/microvm.py @@ -201,6 +201,7 @@ def __init__( self.ssh_key = None self.initrd_file = None self.boot_args = None + self.uffd_handler = None self.fc_binary_path = Path(fc_binary_path) assert fc_binary_path.exists() diff --git a/tests/framework/utils_uffd.py b/tests/framework/utils_uffd.py index 7715a5c2157..a502003ff5f 100644 --- a/tests/framework/utils_uffd.py +++ b/tests/framework/utils_uffd.py @@ -89,6 +89,7 @@ def spawn_pf_handler(vm, handler_path, mem_path): handler_name, SOCKET_PATH, jailed_mem, vm.chroot(), "uffd.log" ) uffd_handler.spawn(vm.jailer.uid, vm.jailer.gid) + vm.uffd_handler = uffd_handler return uffd_handler From 1ac8a3a1d817d498ea4ef3545493cafc92e93e64 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Mon, 3 Mar 2025 16:17:58 +0000 Subject: [PATCH 11/23] test: Add MicroVMFactory.build_n_from_snapshot Add a utility function that allows building multiple microvms from the same snapshot sequentially. Signed-off-by: Patrick Roy --- tests/framework/microvm.py | 47 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py index 8d87d137018..7d2e9002822 100644 --- a/tests/framework/microvm.py +++ b/tests/framework/microvm.py @@ -38,6 +38,7 @@ from framework.microvm_helpers import MicrovmHelpers from framework.properties import global_props from framework.utils_drive import VhostUserBlkBackend, VhostUserBlkBackendType +from framework.utils_uffd import spawn_pf_handler, uffd_handler from host_tools.fcmetrics import FCMetricsMonitor from host_tools.memory import MemoryMonitor @@ -1128,6 +1129,52 @@ def build_from_snapshot(self, snapshot: Snapshot): vm.restore_from_snapshot(snapshot, resume=True) return vm + def build_n_from_snapshot( + self, + snapshot, + nr_vms, + *, + uffd_handler_name=None, + incremental=False, + use_snapshot_editor=True, + ): + """A generator of `n` microvms restored, either all restored from the same given snapshot + (incremental=False), or created by taking successive snapshots of restored VMs + """ + for _ in range(nr_vms): + microvm = self.build() + microvm.spawn() + + uffd_path = None + if uffd_handler_name is not None: + pf_handler = spawn_pf_handler( + microvm, + uffd_handler(uffd_handler_name, binary_dir=self.binary_path), + snapshot.mem, + ) + uffd_path = pf_handler.socket_path + + snapshot_copy = microvm.restore_from_snapshot( + snapshot, resume=True, uffd_path=uffd_path + ) + + yield microvm + + if incremental: + new_snapshot = microvm.make_snapshot(snapshot.snapshot_type) + + if snapshot.is_diff: + new_snapshot = new_snapshot.rebase_snapshot( + snapshot, use_snapshot_editor + ) + + snapshot = new_snapshot + + microvm.kill() + snapshot_copy.delete() + + snapshot.delete() + def kill(self): """Clean up all built VMs""" for vm in self.vms: From 2e010088ee9d9101ecbb4f63e93007b8fefb895e Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Mon, 3 Mar 2025 16:27:32 +0000 Subject: [PATCH 12/23] test: use build_n_from_snapshot to replace explicit loops Replace snapshot creation loops in test_5_snapshots, test_vmgenid and test_restore_latency with calls to build_n_from_snapshot Signed-off-by: Patrick Roy --- .../functional/test_snapshot_basic.py | 44 +++---------------- .../performance/test_snapshot_ab.py | 11 +---- 2 files changed, 7 insertions(+), 48 deletions(-) diff --git a/tests/integration_tests/functional/test_snapshot_basic.py b/tests/integration_tests/functional/test_snapshot_basic.py index 6fe41f4d5a6..09e14b92448 100644 --- a/tests/integration_tests/functional/test_snapshot_basic.py +++ b/tests/integration_tests/functional/test_snapshot_basic.py @@ -149,12 +149,9 @@ def test_5_snapshots( snapshot = vm.make_snapshot(snapshot_type) vm.kill() - for i in range(seq_len): - logger.info("Load snapshot #%s, mem %s", i, snapshot.mem) - microvm = microvm_factory.build() - microvm.spawn() - copied_snapshot = microvm.restore_from_snapshot(snapshot, resume=True) - + for microvm in microvm_factory.build_n_from_snapshot( + snapshot, seq_len, incremental=True, use_snapshot_editor=use_snapshot_editor + ): # FIXME: This and the sleep below reduce the rate of vsock/ssh connection # related spurious test failures, although we do not know why this is the case. time.sleep(2) @@ -171,21 +168,6 @@ def test_5_snapshots( check_filesystem(microvm.ssh, "squashfs", "/dev/vda") time.sleep(2) - logger.info("Create snapshot %s #%d.", snapshot_type, i + 1) - new_snapshot = microvm.make_snapshot(snapshot_type) - - # If we are testing incremental snapshots we must merge the base with - # current layer. - if snapshot.is_diff: - logger.info("Base: %s, Layer: %s", snapshot.mem, new_snapshot.mem) - new_snapshot = new_snapshot.rebase_snapshot( - snapshot, use_snapshot_editor=use_snapshot_editor - ) - - microvm.kill() - copied_snapshot.delete() - # Update the base for next iteration. - snapshot = new_snapshot def test_patch_drive_snapshot(uvm_nano, microvm_factory): @@ -524,27 +506,13 @@ def test_vmgenid(guest_kernel_linux_6_1, rootfs, microvm_factory, snapshot_type) base_snapshot = snapshot base_vm.kill() - for i in range(5): - vm = microvm_factory.build() - vm.spawn() - copied_snapshot = vm.restore_from_snapshot(snapshot, resume=True) - + for i, vm in enumerate( + microvm_factory.build_n_from_snapshot(base_snapshot, 5, incremental=True) + ): # We should have as DMESG_VMGENID_RESUME messages as # snapshots we have resumed check_vmgenid_update_count(vm, i + 1) - snapshot = vm.make_snapshot(snapshot_type) - vm.kill() - copied_snapshot.delete() - - # If we are testing incremental snapshots we ust merge the base with - # current layer. - if snapshot.is_diff: - snapshot = snapshot.rebase_snapshot(base_snapshot) - - # Update the base for next iteration - base_snapshot = snapshot - # TODO add `global_props.host_os == "amzn2"` condition # once amazon linux kernels have patches. diff --git a/tests/integration_tests/performance/test_snapshot_ab.py b/tests/integration_tests/performance/test_snapshot_ab.py index 4b09db126e4..b91feb03f80 100644 --- a/tests/integration_tests/performance/test_snapshot_ab.py +++ b/tests/integration_tests/performance/test_snapshot_ab.py @@ -80,13 +80,7 @@ def sample_latency(self, microvm_factory, snapshot) -> List[float]: """Collects latency samples for the microvm configuration specified by this instance""" values = [] - for _ in range(ITERATIONS): - microvm = microvm_factory.build( - monitor_memory=False, - ) - microvm.spawn(emit_metrics=True) - snapshot_copy = microvm.restore_from_snapshot(snapshot, resume=True) - + for microvm in microvm_factory.build_n_from_snapshot(snapshot, ITERATIONS): value = 0 # Parse all metric data points in search of load_snapshot time. microvm.flush_metrics() @@ -98,10 +92,7 @@ def sample_latency(self, microvm_factory, snapshot) -> List[float]: break assert value > 0 values.append(value) - microvm.kill() - snapshot_copy.delete() - snapshot.delete() return values From e2690afb408f77597bf5d7eefc42ce9cd0c51025 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Mon, 3 Mar 2025 16:29:43 +0000 Subject: [PATCH 13/23] test_snapshot_ab: move metrics dimension setting into configure_vm Move the initialization of the metrics logger dimensions into `configure_vm`, as later on there will be multiple tests having to do exactly the same sequence of configure + setup metrics. Signed-off-by: Patrick Roy --- .../performance/test_snapshot_ab.py | 34 +++++++++---------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/tests/integration_tests/performance/test_snapshot_ab.py b/tests/integration_tests/performance/test_snapshot_ab.py index b91feb03f80..5ad055a7495 100644 --- a/tests/integration_tests/performance/test_snapshot_ab.py +++ b/tests/integration_tests/performance/test_snapshot_ab.py @@ -40,12 +40,7 @@ def id(self): """Computes a unique id for this test instance""" return "all_dev" if self.all_devices else f"{self.vcpus}vcpu_{self.mem}mb" - def configure_vm( - self, - microvm_factory, - guest_kernel, - rootfs, - ) -> Microvm: + def configure_vm(self, microvm_factory, guest_kernel, rootfs, metrics) -> Microvm: """Creates the initial snapshot that will be loaded repeatedly to sample latencies""" vm = microvm_factory.build( guest_kernel, @@ -74,6 +69,16 @@ def configure_vm( ) vm.api.vsock.put(vsock_id="vsock0", guest_cid=3, uds_path="/v.sock") + metrics.set_dimensions( + { + "net_devices": str(self.nets), + "block_devices": str(self.blocks), + "vsock_devices": str(int(self.all_devices)), + "balloon_devices": str(int(self.all_devices)), + **vm.dimensions, + } + ) + return vm def sample_latency(self, microvm_factory, snapshot) -> List[float]: @@ -122,23 +127,16 @@ def test_restore_latency( We only test a single guest kernel, as the guest kernel does not "participate" in snapshot restore. """ - vm = test_setup.configure_vm(microvm_factory, guest_kernel_linux_5_10, rootfs) - vm.start() - - metrics.set_dimensions( - { - "performance_test": "test_restore_latency", - "net_devices": str(test_setup.nets), - "block_devices": str(test_setup.blocks), - "vsock_devices": str(int(test_setup.all_devices)), - "balloon_devices": str(int(test_setup.all_devices)), - **vm.dimensions, - } + vm = test_setup.configure_vm( + microvm_factory, guest_kernel_linux_5_10, rootfs, metrics ) + vm.start() snapshot = vm.snapshot_full() vm.kill() + metrics.put_dimensions({"performance_test": "test_restore_latency"}) + samples = test_setup.sample_latency( microvm_factory, snapshot, From 5b19b195480c69adfc3cd883b04015389417d240 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Mon, 3 Mar 2025 16:31:02 +0000 Subject: [PATCH 14/23] test_snapshot_ab: start VM inside configure_vm No need to defer the starting. Also rename it to `boot_vm` to more accurately reflect what it does now. Signed-off-by: Patrick Roy --- tests/integration_tests/performance/test_snapshot_ab.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/integration_tests/performance/test_snapshot_ab.py b/tests/integration_tests/performance/test_snapshot_ab.py index 5ad055a7495..189e58b25b1 100644 --- a/tests/integration_tests/performance/test_snapshot_ab.py +++ b/tests/integration_tests/performance/test_snapshot_ab.py @@ -40,7 +40,7 @@ def id(self): """Computes a unique id for this test instance""" return "all_dev" if self.all_devices else f"{self.vcpus}vcpu_{self.mem}mb" - def configure_vm(self, microvm_factory, guest_kernel, rootfs, metrics) -> Microvm: + def boot_vm(self, microvm_factory, guest_kernel, rootfs, metrics) -> Microvm: """Creates the initial snapshot that will be loaded repeatedly to sample latencies""" vm = microvm_factory.build( guest_kernel, @@ -78,6 +78,7 @@ def configure_vm(self, microvm_factory, guest_kernel, rootfs, metrics) -> Microv **vm.dimensions, } ) + vm.start() return vm @@ -127,10 +128,7 @@ def test_restore_latency( We only test a single guest kernel, as the guest kernel does not "participate" in snapshot restore. """ - vm = test_setup.configure_vm( - microvm_factory, guest_kernel_linux_5_10, rootfs, metrics - ) - vm.start() + vm = test_setup.boot_vm(microvm_factory, guest_kernel_linux_5_10, rootfs, metrics) snapshot = vm.snapshot_full() vm.kill() From 873515a0d51f2dc45f0673db8e0db0dab118c21d Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Mon, 3 Mar 2025 16:32:11 +0000 Subject: [PATCH 15/23] test_snapshot_ab: open code sample_latency This function only has one possible call-site, and that's test_restore_latency. Inlining it into its call site makes things a bit simpler (no need for a temporary array). Signed-off-by: Patrick Roy --- .../performance/test_snapshot_ab.py | 38 ++++++------------- 1 file changed, 11 insertions(+), 27 deletions(-) diff --git a/tests/integration_tests/performance/test_snapshot_ab.py b/tests/integration_tests/performance/test_snapshot_ab.py index 189e58b25b1..5caa87bb17f 100644 --- a/tests/integration_tests/performance/test_snapshot_ab.py +++ b/tests/integration_tests/performance/test_snapshot_ab.py @@ -4,7 +4,6 @@ import tempfile from dataclasses import dataclass from functools import lru_cache -from typing import List import pytest @@ -82,25 +81,6 @@ def boot_vm(self, microvm_factory, guest_kernel, rootfs, metrics) -> Microvm: return vm - def sample_latency(self, microvm_factory, snapshot) -> List[float]: - """Collects latency samples for the microvm configuration specified by this instance""" - values = [] - - for microvm in microvm_factory.build_n_from_snapshot(snapshot, ITERATIONS): - value = 0 - # Parse all metric data points in search of load_snapshot time. - microvm.flush_metrics() - metrics = microvm.get_all_metrics() - for data_point in metrics: - cur_value = data_point["latencies_us"]["load_snapshot"] - if cur_value > 0: - value = cur_value / USEC_IN_MSEC - break - assert value > 0 - values.append(value) - - return values - @pytest.mark.nonci @pytest.mark.parametrize( @@ -135,10 +115,14 @@ def test_restore_latency( metrics.put_dimensions({"performance_test": "test_restore_latency"}) - samples = test_setup.sample_latency( - microvm_factory, - snapshot, - ) - - for sample in samples: - metrics.put_metric("latency", sample, "Milliseconds") + for microvm in microvm_factory.build_n_from_snapshot(snapshot, ITERATIONS): + value = 0 + # Parse all metric data points in search of load_snapshot time. + microvm.flush_metrics() + for data_point in microvm.get_all_metrics(): + cur_value = data_point["latencies_us"]["load_snapshot"] + if cur_value > 0: + value = cur_value / USEC_IN_MSEC + break + assert value > 0 + metrics.put_metric("latency", value, "Milliseconds") From 1488c7b48cfb115a62231e9884a68e54d20c935d Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Thu, 27 Feb 2025 16:59:10 +0000 Subject: [PATCH 16/23] test: add post-restore latency test Add a snapshot latency test that output metrics on how long it takes to memset a 128MB memory region post-snapshot restore. We only collect these latencies for a single configuration (1024MB, 2vCPU), since they will be the same no matter the size of the memory. Run with an on-demand uffd handler and compare to mmap-ing the snapshot file as a baseline. Also use the fault_all handler, which will allow us to measure latencies due to "fast" page faults, e.g. those that are just for setting up stage-2 page able entries but where otherwise memory is already faulted into userspace. Signed-off-by: Patrick Roy --- .../performance/test_snapshot_ab.py | 50 ++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/tests/integration_tests/performance/test_snapshot_ab.py b/tests/integration_tests/performance/test_snapshot_ab.py index 5caa87bb17f..9133eb9d4de 100644 --- a/tests/integration_tests/performance/test_snapshot_ab.py +++ b/tests/integration_tests/performance/test_snapshot_ab.py @@ -1,7 +1,9 @@ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Performance benchmark for snapshot restore.""" +import signal import tempfile +import time from dataclasses import dataclass from functools import lru_cache @@ -11,6 +13,7 @@ from framework.microvm import Microvm USEC_IN_MSEC = 1000 +NS_IN_MSEC = 1_000_000 ITERATIONS = 30 @@ -113,7 +116,9 @@ def test_restore_latency( snapshot = vm.snapshot_full() vm.kill() - metrics.put_dimensions({"performance_test": "test_restore_latency"}) + metrics.put_dimensions( + {"performance_test": "test_restore_latency", "uffd_handler": "None"} + ) for microvm in microvm_factory.build_n_from_snapshot(snapshot, ITERATIONS): value = 0 @@ -126,3 +131,46 @@ def test_restore_latency( break assert value > 0 metrics.put_metric("latency", value, "Milliseconds") + + +# When using the fault-all handler, all guest memory will be faulted in way before the helper tool +# wakes up, because it gets faulted in on the first page fault. In this scenario, we are not measuring UFFD +# latencies, but KVM latencies of setting up missing EPT entries. +@pytest.mark.nonci +@pytest.mark.parametrize("uffd_handler", [None, "valid", "fault_all"]) +def test_post_restore_latency( + microvm_factory, rootfs, guest_kernel_linux_5_10, metrics, uffd_handler +): + """Collects latency metric of post-restore memory accesses done inside the guest""" + test_setup = SnapshotRestoreTest(mem=1024, vcpus=2) + vm = test_setup.boot_vm(microvm_factory, guest_kernel_linux_5_10, rootfs, metrics) + + vm.ssh.check_output( + "nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 Date: Tue, 4 Mar 2025 08:32:36 +0000 Subject: [PATCH 17/23] test: Also emit post-restore latency metrics for huge pages Runs the new test_post_restore_latency test also for hugepages-backed VMs Signed-off-by: Patrick Roy --- .../performance/test_snapshot_ab.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/integration_tests/performance/test_snapshot_ab.py b/tests/integration_tests/performance/test_snapshot_ab.py index 9133eb9d4de..70d0ed6adc2 100644 --- a/tests/integration_tests/performance/test_snapshot_ab.py +++ b/tests/integration_tests/performance/test_snapshot_ab.py @@ -10,7 +10,7 @@ import pytest import host_tools.drive as drive_tools -from framework.microvm import Microvm +from framework.microvm import HugePagesConfig, Microvm USEC_IN_MSEC = 1000 NS_IN_MSEC = 1_000_000 @@ -36,6 +36,7 @@ class SnapshotRestoreTest: nets: int = 3 blocks: int = 3 all_devices: bool = False + huge_pages: HugePagesConfig = HugePagesConfig.NONE @property def id(self): @@ -55,6 +56,7 @@ def boot_vm(self, microvm_factory, guest_kernel, rootfs, metrics) -> Microvm: vcpu_count=self.vcpus, mem_size_mib=self.mem, rootfs_io_engine="Sync", + huge_pages=self.huge_pages, ) for _ in range(self.nets): @@ -77,6 +79,7 @@ def boot_vm(self, microvm_factory, guest_kernel, rootfs, metrics) -> Microvm: "block_devices": str(self.blocks), "vsock_devices": str(int(self.all_devices)), "balloon_devices": str(int(self.all_devices)), + "huge_pages_config": str(self.huge_pages), **vm.dimensions, } ) @@ -138,11 +141,15 @@ def test_restore_latency( # latencies, but KVM latencies of setting up missing EPT entries. @pytest.mark.nonci @pytest.mark.parametrize("uffd_handler", [None, "valid", "fault_all"]) +@pytest.mark.parametrize("huge_pages", HugePagesConfig) def test_post_restore_latency( - microvm_factory, rootfs, guest_kernel_linux_5_10, metrics, uffd_handler + microvm_factory, rootfs, guest_kernel_linux_5_10, metrics, uffd_handler, huge_pages ): """Collects latency metric of post-restore memory accesses done inside the guest""" - test_setup = SnapshotRestoreTest(mem=1024, vcpus=2) + if huge_pages != HugePagesConfig.NONE and uffd_handler is None: + pytest.skip("huge page snapshots can only be restored using uffd") + + test_setup = SnapshotRestoreTest(mem=1024, vcpus=2, huge_pages=huge_pages) vm = test_setup.boot_vm(microvm_factory, guest_kernel_linux_5_10, rootfs, metrics) vm.ssh.check_output( From 5af2dc208f2de31dc7fa58958c3bd716057aa7bc Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Tue, 4 Mar 2025 08:56:28 +0000 Subject: [PATCH 18/23] test: Add post-restore population latency test While the post_restore_latency test measures the time on-demand faulting takes from the guest's perspective, this test measures the actual time it takes to fault in guest memory. We cannot collect this data by simply using the fault_all handler in the post-restore test, because the handler will get triggered way before the fast_page_fault_helper script continues running (because the first page fault will be triggered by sshd or the kernel, so by the time out helper runs, the uffd handler will already be done and we won't notice any latency). Therefore, have the fault_all handler print the time it took to populate all of guest memory to its log file, and parse this number. Signed-off-by: Patrick Roy --- .../examples/uffd/fault_all_handler.rs | 5 +++ .../performance/test_snapshot_ab.py | 40 +++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/src/firecracker/examples/uffd/fault_all_handler.rs b/src/firecracker/examples/uffd/fault_all_handler.rs index d0f127a8ce1..6711350497a 100644 --- a/src/firecracker/examples/uffd/fault_all_handler.rs +++ b/src/firecracker/examples/uffd/fault_all_handler.rs @@ -11,6 +11,7 @@ use std::fs::File; use std::os::unix::net::UnixListener; use uffd_utils::{Runtime, UffdHandler}; +use utils::time::{get_time_us, ClockType}; fn main() { let mut args = std::env::args(); @@ -34,9 +35,13 @@ fn main() { match event { userfaultfd::Event::Pagefault { .. } => { + let start = get_time_us(ClockType::Monotonic); for region in uffd_handler.mem_regions.clone() { uffd_handler.serve_pf(region.base_host_virt_addr as _, region.size); } + let end = get_time_us(ClockType::Monotonic); + + println!("Finished Faulting All: {}us", end - start); } _ => panic!("Unexpected event on userfaultfd"), } diff --git a/tests/integration_tests/performance/test_snapshot_ab.py b/tests/integration_tests/performance/test_snapshot_ab.py index 70d0ed6adc2..7e20befcc27 100644 --- a/tests/integration_tests/performance/test_snapshot_ab.py +++ b/tests/integration_tests/performance/test_snapshot_ab.py @@ -1,6 +1,7 @@ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Performance benchmark for snapshot restore.""" +import re import signal import tempfile import time @@ -181,3 +182,42 @@ def test_post_restore_latency( ) metrics.put_metric("fault_latency", int(duration) / NS_IN_MSEC, "Milliseconds") + + +@pytest.mark.nonci +@pytest.mark.parametrize("huge_pages", HugePagesConfig) +def test_population_latency( + microvm_factory, rootfs, guest_kernel_linux_5_10, metrics, huge_pages +): + """Collects population latency metrics (e.g. how long it takes UFFD handler to fault in all memory)""" + test_setup = SnapshotRestoreTest(mem=128, vcpus=1, huge_pages=huge_pages) + vm = test_setup.boot_vm(microvm_factory, guest_kernel_linux_5_10, rootfs, metrics) + snapshot = vm.snapshot_full() + vm.kill() + + metrics.put_dimensions( + {"performance_test": "test_population_latency", "uffd_handler": "fault_all"} + ) + + for microvm in microvm_factory.build_n_from_snapshot( + snapshot, ITERATIONS, uffd_handler_name="fault_all" + ): + # do _something_ to trigger a pagefault, which will then cause the UFFD handler to fault in _everything_ + microvm.ssh.check_output("true") + + for _ in range(5): + time.sleep(1) + + match = re.match( + r"Finished Faulting All: (\d+)us", microvm.uffd_handler.log_data + ) + + if match: + latency_us = int(match.group(1)) + + metrics.put_metric( + "populate_latency", latency_us / 1000, "Milliseconds" + ) + break + else: + raise RuntimeError("UFFD handler did not print population latency after 5s") From b0ecfb4b742e6162959309166e44212f36339da8 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Tue, 4 Mar 2025 13:42:55 +0000 Subject: [PATCH 19/23] fix(ab): its dict.keys(), not dict.key() We were calling a non-existing function on an error path. This is why I prefer compiled languages smh. Signed-off-by: Patrick Roy --- tools/ab_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ab_test.py b/tools/ab_test.py index 7349adcb862..b296627ba31 100755 --- a/tools/ab_test.py +++ b/tools/ab_test.py @@ -136,7 +136,7 @@ def load_data_series(report_path: Path, tag=None, *, reemit: bool = False): # multiple EMF log messages. We need to reassemble :( assert ( processed_emf[dimension_set].keys() == result.keys() - ), f"Found incompatible metrics associated with dimension set {dimension_set}: {processed_emf[dimension_set].key()} in one EMF message, but {result.keys()} in another." + ), f"Found incompatible metrics associated with dimension set {dimension_set}: {processed_emf[dimension_set].keys()} in one EMF message, but {result.keys()} in another." for metric, (values, unit) in processed_emf[dimension_set].items(): assert result[metric][1] == unit From c2100d90fff1a2a9511faa5bcd280857f1889c8f Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Tue, 4 Mar 2025 14:22:03 +0000 Subject: [PATCH 20/23] test(ab): allow dimensions to be split into multiple lists When using metrics.put_dimensions from python code, then the resulting EMF messages will have a list of lists in their "Dimensions" key. Currently, ab_test.py assumes that this list of lists always has length one, meaning it ignores additional dimensions added via put_dimensions. Have it instead flatten the list of lists so that it always considers all dimensions. Signed-off-by: Patrick Roy --- tools/ab_test.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/ab_test.py b/tools/ab_test.py index b296627ba31..7c4cdcada7a 100755 --- a/tools/ab_test.py +++ b/tools/ab_test.py @@ -64,7 +64,11 @@ def extract_dimensions(emf): # Skipped tests emit a duration metric, but have no dimensions set return {} - dimension_list = emf["_aws"]["CloudWatchMetrics"][0]["Dimensions"][0] + dimension_list = [ + dim + for dimensions in emf["_aws"]["CloudWatchMetrics"][0]["Dimensions"] + for dim in dimensions + ] return {key: emf[key] for key in emf if key in dimension_list} From 03ff310cb647f5cf85575d64c22a54b4df4e5cd8 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Thu, 6 Mar 2025 12:33:21 +0000 Subject: [PATCH 21/23] refactor: rename uffd_valid_handler to uffd_on_demand_handler This better describes what the handler actually does (technically, fault_all_handler is also "valid"), and now that some tests emit the name of the uffd handler as a metric, the names should be descriptive from the get-go to avoid confusion and future renamings. Signed-off-by: Patrick Roy --- docs/snapshotting/handling-page-faults-on-snapshot-resume.md | 2 +- src/firecracker/Cargo.toml | 4 ++-- .../examples/uffd/{valid_handler.rs => on_demand_handler.rs} | 0 tests/integration_tests/functional/test_uffd.py | 2 +- tests/integration_tests/performance/test_huge_pages.py | 4 ++-- tests/integration_tests/performance/test_snapshot_ab.py | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) rename src/firecracker/examples/uffd/{valid_handler.rs => on_demand_handler.rs} (100%) diff --git a/docs/snapshotting/handling-page-faults-on-snapshot-resume.md b/docs/snapshotting/handling-page-faults-on-snapshot-resume.md index ea790624a18..a5da980124e 100644 --- a/docs/snapshotting/handling-page-faults-on-snapshot-resume.md +++ b/docs/snapshotting/handling-page-faults-on-snapshot-resume.md @@ -162,7 +162,7 @@ connect/send data. ### Example An example of a handler process can be found -[here](../../src/firecracker/examples/uffd/valid_handler.rs). The process is +[here](../../src/firecracker/examples/uffd/on_demand_handler.rs). The process is designed to tackle faults on a certain address by loading into memory the entire region that the address belongs to, but users can choose any other behavior that suits their use case best. diff --git a/src/firecracker/Cargo.toml b/src/firecracker/Cargo.toml index 29778af061d..40b2795050d 100644 --- a/src/firecracker/Cargo.toml +++ b/src/firecracker/Cargo.toml @@ -57,8 +57,8 @@ name = "uffd_malicious_handler" path = "examples/uffd/malicious_handler.rs" [[example]] -name = "uffd_valid_handler" -path = "examples/uffd/valid_handler.rs" +name = "uffd_on_demand_handler" +path = "examples/uffd/on_demand_handler.rs" [[example]] name = "uffd_fault_all_handler" diff --git a/src/firecracker/examples/uffd/valid_handler.rs b/src/firecracker/examples/uffd/on_demand_handler.rs similarity index 100% rename from src/firecracker/examples/uffd/valid_handler.rs rename to src/firecracker/examples/uffd/on_demand_handler.rs diff --git a/tests/integration_tests/functional/test_uffd.py b/tests/integration_tests/functional/test_uffd.py index 35b03d7aa34..95819844f05 100644 --- a/tests/integration_tests/functional/test_uffd.py +++ b/tests/integration_tests/functional/test_uffd.py @@ -92,7 +92,7 @@ def test_valid_handler(uvm_plain, snapshot): vm.spawn() # Spawn page fault handler process. - pf_handler = spawn_pf_handler(vm, uffd_handler("valid"), snapshot.mem) + pf_handler = spawn_pf_handler(vm, uffd_handler("on_demand"), snapshot.mem) vm.restore_from_snapshot(snapshot, resume=True, uffd_path=pf_handler.socket_path) diff --git a/tests/integration_tests/performance/test_huge_pages.py b/tests/integration_tests/performance/test_huge_pages.py index acfee58139d..5839245ebd9 100644 --- a/tests/integration_tests/performance/test_huge_pages.py +++ b/tests/integration_tests/performance/test_huge_pages.py @@ -93,7 +93,7 @@ def test_hugetlbfs_snapshot(microvm_factory, guest_kernel_linux_5_10, rootfs): vm.spawn() # Spawn page fault handler process. - pf_handler = spawn_pf_handler(vm, uffd_handler("valid"), snapshot.mem) + pf_handler = spawn_pf_handler(vm, uffd_handler("on_demand"), snapshot.mem) vm.restore_from_snapshot(snapshot, resume=True, uffd_path=pf_handler.socket_path) @@ -135,7 +135,7 @@ def test_hugetlbfs_diff_snapshot(microvm_factory, uvm_plain): vm.spawn() # Spawn page fault handler process. - pf_handler = spawn_pf_handler(vm, uffd_handler("valid"), snapshot_merged.mem) + pf_handler = spawn_pf_handler(vm, uffd_handler("on_demand"), snapshot_merged.mem) vm.restore_from_snapshot( snapshot_merged, resume=True, uffd_path=pf_handler.socket_path diff --git a/tests/integration_tests/performance/test_snapshot_ab.py b/tests/integration_tests/performance/test_snapshot_ab.py index 7e20befcc27..655566dcb13 100644 --- a/tests/integration_tests/performance/test_snapshot_ab.py +++ b/tests/integration_tests/performance/test_snapshot_ab.py @@ -141,7 +141,7 @@ def test_restore_latency( # wakes up, because it gets faulted in on the first page fault. In this scenario, we are not measuring UFFD # latencies, but KVM latencies of setting up missing EPT entries. @pytest.mark.nonci -@pytest.mark.parametrize("uffd_handler", [None, "valid", "fault_all"]) +@pytest.mark.parametrize("uffd_handler", [None, "on_demand", "fault_all"]) @pytest.mark.parametrize("huge_pages", HugePagesConfig) def test_post_restore_latency( microvm_factory, rootfs, guest_kernel_linux_5_10, metrics, uffd_handler, huge_pages From 35861f092a2133a289cc77f908dae68678f919f3 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Thu, 6 Mar 2025 13:17:50 +0000 Subject: [PATCH 22/23] test: avoid memsetting in favor of just touching one byte per page To bring down the overhead of the testing script, only touch a single byte on each page (as that's enough to fault in the entire page), instead of memsetting everything. Signed-off-by: Patrick Roy --- .../overlay/usr/local/bin/fast_page_fault_helper.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/resources/overlay/usr/local/bin/fast_page_fault_helper.c b/resources/overlay/usr/local/bin/fast_page_fault_helper.c index 96e565f0c3f..7558f7b09fc 100644 --- a/resources/overlay/usr/local/bin/fast_page_fault_helper.c +++ b/resources/overlay/usr/local/bin/fast_page_fault_helper.c @@ -19,6 +19,14 @@ #define MEM_SIZE_MIB (128 * 1024 * 1024) #define NANOS_PER_SEC 1000000000 +#define PAGE_SIZE 4096 + +void touch_memory(void *mem, size_t size, char val) { + void *end = mem + size; + for (; mem < end; mem += PAGE_SIZE) { + *((char *)mem) = val; + } +} int main() { sigset_t set; @@ -45,12 +53,12 @@ int main() { return 1; } - memset(ptr, 1, MEM_SIZE_MIB); + touch_memory(ptr, MEM_SIZE_MIB, 1); sigwait(&set, &signal); clock_gettime(CLOCK_BOOTTIME, &start); - memset(ptr, 2, MEM_SIZE_MIB); + touch_memory(ptr, MEM_SIZE_MIB, 2); clock_gettime(CLOCK_BOOTTIME, &end); duration_nanos = (end.tv_sec - start.tv_sec) * NANOS_PER_SEC + end.tv_nsec - start.tv_nsec; From a2fb9442393df5c127e797766c70b5a19cd6041c Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Thu, 6 Mar 2025 16:35:12 +0000 Subject: [PATCH 23/23] fix: allocate more huge pages for performance tests On m6a/6.1, post_populate with fault_all handler and 2M pages fails because of the UFFD handler panicking during faulting, returning `PartiallyCopied(1022MB)`. This is because we are depleting the huge pages pool. Thus, allocate some more. Admittedly, I do not understand why thisd only happens on m6a/6.1. Signed-off-by: Patrick Roy --- tools/devtool | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/devtool b/tools/devtool index 5f495b838c1..35794256138 100755 --- a/tools/devtool +++ b/tools/devtool @@ -743,9 +743,9 @@ cmd_test() { # It seems that even if the tests using huge pages run sequentially on ag=1 agents, right-sizing the huge pages # pool to the total number of huge pages used across all tests results in spurious failures with pool depletion # anyway (something else on the host seems to be stealing our huge pages, and we cannot "ear mark" them for - # Firecracker processes). Thus, just allocate 4GB of them and call it a day. + # Firecracker processes). Thus, just allocate 8GB of them and call it a day. say "Setting up huge pages pool" - num_hugetlbfs_pages=2048 + num_hugetlbfs_pages=4096 huge_pages_old=$(cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages) huge_pages_new=$(echo $num_hugetlbfs_pages |sudo tee /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages)