Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
0b9ff99
refactor(test): stop hardcoding uffd socket path everywhere
roypat Mar 3, 2025
f0207f4
test: have fast_page_fault_helper print time memset took
roypat Feb 27, 2025
b04661c
refactor(uffd): Make example UFFD fast in absense of balloon device
roypat Mar 3, 2025
6c91480
refactor(test): Store binary_dir inside microvm
roypat Mar 4, 2025
0a2b6d2
test: replace workspace_dir arg with binary_dir in get_binary
roypat Mar 4, 2025
73683cb
test: pass through kwargs from uffd_handler() to get_example()
roypat Mar 4, 2025
295bb78
devtool: also copy example binaries when doing build --rev
roypat Mar 4, 2025
a2ce40c
fix(test): stop passing guest kernel when building vms from snapshot
roypat Mar 3, 2025
e926f08
test_snapshot_ab: rename guest_kernel_acpi to guest_kernel
roypat Mar 3, 2025
5ef13fa
test: store uffd handler in microvm attribute
roypat Mar 4, 2025
1ac8a3a
test: Add MicroVMFactory.build_n_from_snapshot
roypat Mar 3, 2025
2e01008
test: use build_n_from_snapshot to replace explicit loops
roypat Mar 3, 2025
e2690af
test_snapshot_ab: move metrics dimension setting into configure_vm
roypat Mar 3, 2025
5b19b19
test_snapshot_ab: start VM inside configure_vm
roypat Mar 3, 2025
873515a
test_snapshot_ab: open code sample_latency
roypat Mar 3, 2025
1488c7b
test: add post-restore latency test
roypat Feb 27, 2025
187f807
test: Also emit post-restore latency metrics for huge pages
roypat Mar 4, 2025
5af2dc2
test: Add post-restore population latency test
roypat Mar 4, 2025
b0ecfb4
fix(ab): its dict.keys(), not dict.key()
roypat Mar 4, 2025
c2100d9
test(ab): allow dimensions to be split into multiple lists
roypat Mar 4, 2025
03ff310
refactor: rename uffd_valid_handler to uffd_on_demand_handler
roypat Mar 6, 2025
35861f0
test: avoid memsetting in favor of just touching one byte per page
roypat Mar 6, 2025
a2fb944
fix: allocate more huge pages for performance tests
roypat Mar 6, 2025
c2d23d9
Merge branch 'main' into snapshot-latency-test
roypat Mar 7, 2025
3e37ae6
Merge branch 'main' into snapshot-latency-test
roypat Mar 10, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ connect/send data.
### Example

An example of a handler process can be found
[here](../../src/firecracker/examples/uffd/valid_handler.rs). The process is
[here](../../src/firecracker/examples/uffd/on_demand_handler.rs). The process is
designed to tackle faults on a certain address by loading into memory the entire
region that the address belongs to, but users can choose any other behavior that
suits their use case best.
38 changes: 34 additions & 4 deletions resources/overlay/usr/local/bin/fast_page_fault_helper.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,31 @@
// This way, the `memset` will trigger a fast page fault for every page in
// the memory region.

#include <stdio.h> // perror
#include <stdio.h> // perror, fopen, fprintf
#include <signal.h> // sigwait and friends
#include <string.h> // memset
#include <sys/mman.h> // mmap
#include <time.h> // clock_gettime
#include <fcntl.h> // open

#define MEM_SIZE_MIB (128 * 1024 * 1024)
#define NANOS_PER_SEC 1000000000
#define PAGE_SIZE 4096

int main(int argc, char *const argv[]) {
void touch_memory(void *mem, size_t size, char val) {
void *end = mem + size;
for (; mem < end; mem += PAGE_SIZE) {
*((char *)mem) = val;
}
}

int main() {
sigset_t set;
int signal;
void *ptr;
struct timespec start, end;
long duration_nanos;
FILE *out_file;

sigemptyset(&set);
if (sigaddset(&set, SIGUSR1) == -1) {
Expand All @@ -39,11 +53,27 @@ int main(int argc, char *const argv[]) {
return 1;
}

memset(ptr, 1, MEM_SIZE_MIB);
touch_memory(ptr, MEM_SIZE_MIB, 1);

sigwait(&set, &signal);

memset(ptr, 2, MEM_SIZE_MIB);
clock_gettime(CLOCK_BOOTTIME, &start);
touch_memory(ptr, MEM_SIZE_MIB, 2);
clock_gettime(CLOCK_BOOTTIME, &end);

duration_nanos = (end.tv_sec - start.tv_sec) * NANOS_PER_SEC + end.tv_nsec - start.tv_nsec;

out_file = fopen("/tmp/fast_page_fault_helper.out", "w");
if (out_file == NULL) {
perror("fopen");
return 1;
}

fprintf(out_file, "%ld", duration_nanos);
if (fclose(out_file)) {
perror("fclose");
return 1;
}

return 0;
}
4 changes: 2 additions & 2 deletions src/firecracker/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ name = "uffd_malicious_handler"
path = "examples/uffd/malicious_handler.rs"

[[example]]
name = "uffd_valid_handler"
path = "examples/uffd/valid_handler.rs"
name = "uffd_on_demand_handler"
path = "examples/uffd/on_demand_handler.rs"

[[example]]
name = "uffd_fault_all_handler"
Expand Down
8 changes: 6 additions & 2 deletions src/firecracker/examples/uffd/fault_all_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use std::fs::File;
use std::os::unix::net::UnixListener;

use uffd_utils::{Runtime, UffdHandler};
use utils::time::{get_time_us, ClockType};

fn main() {
let mut args = std::env::args();
Expand All @@ -34,10 +35,13 @@ fn main() {

match event {
userfaultfd::Event::Pagefault { .. } => {
let start = get_time_us(ClockType::Monotonic);
for region in uffd_handler.mem_regions.clone() {
uffd_handler
.serve_pf(region.mapping.base_host_virt_addr as _, region.mapping.size);
uffd_handler.serve_pf(region.base_host_virt_addr as _, region.size);
}
let end = get_time_us(ClockType::Monotonic);

println!("Finished Faulting All: {}us", end - start);
}
_ => panic!("Unexpected event on userfaultfd"),
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ mod uffd_utils;
use std::fs::File;
use std::os::unix::net::UnixListener;

use uffd_utils::{MemPageState, Runtime, UffdHandler};
use uffd_utils::{Runtime, UffdHandler};

fn main() {
let mut args = std::env::args();
Expand Down Expand Up @@ -86,8 +86,9 @@ fn main() {
deferred_events.push(event);
}
}
userfaultfd::Event::Remove { start, end } => uffd_handler
.update_mem_state_mappings(start as u64, end as u64, MemPageState::Removed),
userfaultfd::Event::Remove { start, end } => {
uffd_handler.mark_range_removed(start as u64, end as u64)
}
_ => panic!("Unexpected event on userfaultfd"),
}
}
Expand Down
121 changes: 35 additions & 86 deletions src/firecracker/examples/uffd/uffd_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// Not everything is used by both binaries
#![allow(dead_code)]

use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use std::fs::File;
use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd};
use std::os::unix::net::UnixStream;
Expand Down Expand Up @@ -34,26 +34,20 @@ pub struct GuestRegionUffdMapping {
pub page_size_kib: usize,
}

#[derive(Debug, Clone, Copy)]
pub enum MemPageState {
Uninitialized,
FromFile,
Removed,
Anonymous,
}

#[derive(Debug, Clone)]
pub struct MemRegion {
pub mapping: GuestRegionUffdMapping,
page_states: HashMap<u64, MemPageState>,
impl GuestRegionUffdMapping {
fn contains(&self, fault_page_addr: u64) -> bool {
fault_page_addr >= self.base_host_virt_addr
&& fault_page_addr < self.base_host_virt_addr + self.size as u64
}
}

#[derive(Debug)]
pub struct UffdHandler {
pub mem_regions: Vec<MemRegion>,
pub mem_regions: Vec<GuestRegionUffdMapping>,
pub page_size: usize,
backing_buffer: *const u8,
uffd: Uffd,
removed_pages: HashSet<u64>,
}

impl UffdHandler {
Expand Down Expand Up @@ -92,61 +86,42 @@ impl UffdHandler {

let uffd = unsafe { Uffd::from_raw_fd(file.into_raw_fd()) };

let mem_regions = create_mem_regions(&mappings, page_size);

Self {
mem_regions,
mem_regions: mappings,
page_size,
backing_buffer,
uffd,
removed_pages: HashSet::new(),
}
}

pub fn read_event(&mut self) -> Result<Option<Event>, Error> {
self.uffd.read_event()
}

pub fn update_mem_state_mappings(&mut self, start: u64, end: u64, state: MemPageState) {
for region in self.mem_regions.iter_mut() {
for (key, value) in region.page_states.iter_mut() {
if key >= &start && key < &end {
*value = state;
}
}
pub fn mark_range_removed(&mut self, start: u64, end: u64) {
let pfn_start = start / self.page_size as u64;
let pfn_end = end / self.page_size as u64;

for pfn in pfn_start..pfn_end {
self.removed_pages.insert(pfn);
}
}

pub fn serve_pf(&mut self, addr: *mut u8, len: usize) -> bool {
// Find the start of the page that the current faulting address belongs to.
let dst = (addr as usize & !(self.page_size - 1)) as *mut libc::c_void;
let fault_page_addr = dst as u64;

// Get the state of the current faulting page.
for region in self.mem_regions.iter() {
match region.page_states.get(&fault_page_addr) {
// Our simple PF handler has a simple strategy:
// There exist 4 states in which a memory page can be in:
// 1. Uninitialized - page was never touched
// 2. FromFile - the page is populated with content from snapshotted memory file
// 3. Removed - MADV_DONTNEED was called due to balloon inflation
// 4. Anonymous - page was zeroed out -> this implies that more than one page fault
// event was received. This can be a consequence of guest reclaiming back its
// memory from the host (through balloon device)
Some(MemPageState::Uninitialized) | Some(MemPageState::FromFile) => {
match self.populate_from_file(region, fault_page_addr, len) {
Some((start, end)) => {
self.update_mem_state_mappings(start, end, MemPageState::FromFile)
}
None => return false,
}
return true;
}
Some(MemPageState::Removed) | Some(MemPageState::Anonymous) => {
let (start, end) = self.zero_out(fault_page_addr);
self.update_mem_state_mappings(start, end, MemPageState::Anonymous);
return true;
let fault_pfn = fault_page_addr / self.page_size as u64;

if self.removed_pages.contains(&fault_pfn) {
self.zero_out(fault_page_addr);
return true;
} else {
for region in self.mem_regions.iter() {
if region.contains(fault_page_addr) {
return self.populate_from_file(region, fault_page_addr, len);
}
None => {}
}
}

Expand All @@ -156,13 +131,14 @@ impl UffdHandler {
);
}

fn populate_from_file(&self, region: &MemRegion, dst: u64, len: usize) -> Option<(u64, u64)> {
let offset = dst - region.mapping.base_host_virt_addr;
let src = self.backing_buffer as u64 + region.mapping.offset + offset;
fn populate_from_file(&self, region: &GuestRegionUffdMapping, dst: u64, len: usize) -> bool {
let offset = dst - region.base_host_virt_addr;
let src = self.backing_buffer as u64 + region.offset + offset;

let ret = unsafe {
unsafe {
match self.uffd.copy(src as *const _, dst as *mut _, len, true) {
Ok(value) => value,
// Make sure the UFFD copied some bytes.
Ok(value) => assert!(value > 0),
// Catch EAGAIN errors, which occur when a `remove` event lands in the UFFD
// queue while we're processing `pagefault` events.
// The weird cast is because the `bytes_copied` field is based on the
Expand All @@ -172,35 +148,30 @@ impl UffdHandler {
Err(Error::PartiallyCopied(bytes_copied))
if bytes_copied == 0 || bytes_copied == (-libc::EAGAIN) as usize =>
{
return None
return false
}
Err(Error::CopyFailed(errno))
if std::io::Error::from(errno).raw_os_error().unwrap() == libc::EEXIST =>
{
len
()
}
Err(e) => {
panic!("Uffd copy failed: {e:?}");
}
}
};

// Make sure the UFFD copied some bytes.
assert!(ret > 0);

Some((dst, dst + len as u64))
true
}

fn zero_out(&mut self, addr: u64) -> (u64, u64) {
fn zero_out(&mut self, addr: u64) {
let ret = unsafe {
self.uffd
.zeropage(addr as *mut _, self.page_size, true)
.expect("Uffd zeropage failed")
};
// Make sure the UFFD zeroed out some bytes.
assert!(ret > 0);

(addr, addr + self.page_size as u64)
}
}

Expand Down Expand Up @@ -345,28 +316,6 @@ impl Runtime {
}
}

fn create_mem_regions(mappings: &Vec<GuestRegionUffdMapping>, page_size: usize) -> Vec<MemRegion> {
let mut mem_regions: Vec<MemRegion> = Vec::with_capacity(mappings.len());

for r in mappings.iter() {
let mapping = r.clone();
let mut addr = r.base_host_virt_addr;
let end_addr = r.base_host_virt_addr + r.size as u64;
let mut page_states = HashMap::new();

while addr < end_addr {
page_states.insert(addr, MemPageState::Uninitialized);
addr += page_size as u64;
}
mem_regions.push(MemRegion {
mapping,
page_states,
});
}

mem_regions
}

#[cfg(test)]
mod tests {
use std::mem::MaybeUninit;
Expand Down
17 changes: 7 additions & 10 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import host_tools.cargo_build as build_tools
from framework import defs, utils
from framework.artifacts import disks, kernel_params
from framework.defs import DEFAULT_BINARY_DIR
from framework.microvm import MicroVMFactory
from framework.properties import global_props
from framework.utils_cpu_templates import (
Expand Down Expand Up @@ -293,14 +294,11 @@ def get(self, _netns_id):
def microvm_factory(request, record_property, results_dir, netns_factory):
"""Fixture to create microvms simply."""

if binary_dir := request.config.getoption("--binary-dir"):
fc_binary_path = Path(binary_dir) / "firecracker"
jailer_binary_path = Path(binary_dir) / "jailer"
if not fc_binary_path.exists():
raise RuntimeError("Firecracker binary does not exist")
else:
fc_binary_path, jailer_binary_path = build_tools.get_firecracker_binaries()
record_property("firecracker_bin", str(fc_binary_path))
binary_dir = request.config.getoption("--binary-dir") or DEFAULT_BINARY_DIR
if isinstance(binary_dir, str):
binary_dir = Path(binary_dir)

record_property("firecracker_bin", str(binary_dir / "firecracker"))

# If `--custom-cpu-template` option is provided, the given CPU template will
# be applied afterwards unless overwritten.
Expand All @@ -316,8 +314,7 @@ def microvm_factory(request, record_property, results_dir, netns_factory):
# We could override the chroot base like so
# jailer_kwargs={"chroot_base": "/srv/jailo"}
uvm_factory = MicroVMFactory(
fc_binary_path,
jailer_binary_path,
binary_dir,
netns_factory=netns_factory,
custom_cpu_template=custom_cpu_template,
)
Expand Down
7 changes: 7 additions & 0 deletions tests/framework/defs.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@
# Absolute path to the test results folder
TEST_RESULTS_DIR = FC_WORKSPACE_DIR / "test_results"

DEFAULT_BINARY_DIR = (
LOCAL_BUILD_PATH
/ "cargo_target"
/ f"{platform.machine()}-unknown-linux-musl"
/ "release"
)

# The minimum required host kernel version for which io_uring is supported in
# Firecracker.
MIN_KERNEL_VERSION_FOR_IO_URING = "5.10.51"
Expand Down
Loading