Skip to content

Commit

Permalink
Speed up registration of stack maps
Browse files Browse the repository at this point in the history
This commit fixes a perf issue I was seeing in some local benchmarking
where registration of stack maps took a nontrivial amount of time for a
module that didn't even use stack maps! The fix here is to largely just
do the same thing as bytecodealliance#2811 which is to use the in-memory data structures
of a `CompiledModule` rather than rebuilding different data structures
when a module is registered with a `Store`.

The `StackMapRegistry` type now takes a lookup object for a range of
pcs, simplifying registration. Registration additionally doesn't even
happen if a module is pre-determined to not have any stack maps inside
of it. This trait object encapsulates all the logic that was previously
used in the rebuilt data structures and also leverages conveniences like
`CompiledModule::func_by_pc` added recently as well.

With this all combined it means that modules which don't have stack maps
now skip this registration step entirely and modules with stack maps
should do much less work during registration as well.
  • Loading branch information
alexcrichton committed Apr 8, 2021
1 parent 18dd82b commit 9f1aa64
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 161 deletions.
90 changes: 81 additions & 9 deletions crates/jit/src/instantiate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use std::sync::Arc;
use thiserror::Error;
use wasmtime_debug::create_gdbjit_image;
use wasmtime_environ::entity::PrimaryMap;
use wasmtime_environ::ir::StackMap;
use wasmtime_environ::isa::TargetIsa;
use wasmtime_environ::wasm::{
DefinedFuncIndex, InstanceTypeIndex, ModuleTypeIndex, SignatureIndex, WasmFuncType,
Expand All @@ -25,6 +26,7 @@ use wasmtime_environ::{
ModuleSignature, ModuleTranslation, StackMapInformation, TrapInformation,
};
use wasmtime_profiling::ProfilingAgent;
use wasmtime_runtime::StackMapLookup;
use wasmtime_runtime::{GdbJitImageRegistration, InstantiationError, VMFunctionBody, VMTrampoline};

/// An error condition while setting up a wasm instance, be it validation,
Expand Down Expand Up @@ -214,6 +216,7 @@ pub struct CompiledModule {
code: Arc<ModuleCode>,
finished_functions: FinishedFunctions,
trampolines: Vec<(SignatureIndex, VMTrampoline)>,
has_stack_maps: bool,
}

impl CompiledModule {
Expand Down Expand Up @@ -270,6 +273,8 @@ impl CompiledModule {
let start = code_range.0 as usize;
let end = start + code_range.1;

let has_stack_maps = artifacts.funcs.values().any(|f| f.stack_maps.len() > 0);

Ok(Arc::new(Self {
artifacts,
code: Arc::new(ModuleCode {
Expand All @@ -279,6 +284,7 @@ impl CompiledModule {
}),
finished_functions,
trampolines,
has_stack_maps,
}))
}

Expand Down Expand Up @@ -313,15 +319,12 @@ impl CompiledModule {
///
/// The iterator returned iterates over the span of the compiled function in
/// memory with the stack maps associated with those bytes.
pub fn stack_maps(
&self,
) -> impl Iterator<Item = (*mut [VMFunctionBody], &[StackMapInformation])> {
self.finished_functions().values().copied().zip(
self.artifacts
.funcs
.values()
.map(|f| f.stack_maps.as_slice()),
)
pub fn stack_maps(self: &Arc<Self>) -> Option<Box<dyn StackMapLookup>> {
if self.has_stack_maps {
Some(Box::new(StackMapLookupImpl(self.clone())))
} else {
None
}
}

/// Lookups a defined function by a program counter value.
Expand Down Expand Up @@ -588,3 +591,72 @@ mod arc_serde {
Ok(Arc::new(T::deserialize(de)?))
}
}

struct StackMapLookupImpl(Arc<CompiledModule>);

impl StackMapLookup for StackMapLookupImpl {
fn lookup(&self, pc: usize) -> Option<&StackMap> {
let (index, start, _end) = self.0.func_by_pc(pc)?;
let func = self.0.artifacts.funcs.get(index)?;
let offset = (pc - start) as u32;

// Do a binary search to find the stack map for the given PC.
//
// Because GC safepoints are technically only associated with a single
// PC, we should ideally only care about `Ok(index)` values returned
// from the binary search. However, safepoints are inserted right before
// calls, and there are two things that can disturb the PC/offset
// associated with the safepoint versus the PC we actually use to query
// for the stack map:
//
// 1. The `backtrace` crate gives us the PC in a frame that will be
// *returned to*, and where execution will continue from, rather than
// the PC of the call we are currently at. So we would need to
// disassemble one instruction backwards to query the actual PC for
// the stack map.
//
// TODO: One thing we *could* do to make this a little less error
// prone, would be to assert/check that the nearest GC safepoint
// found is within `max_encoded_size(any kind of call instruction)`
// our queried PC for the target architecture.
//
// 2. Cranelift's stack maps only handle the stack, not
// registers. However, some references that are arguments to a call
// may need to be in registers. In these cases, what Cranelift will
// do is:
//
// a. spill all the live references,
// b. insert a GC safepoint for those references,
// c. reload the references into registers, and finally
// d. make the call.
//
// Step (c) adds drift between the GC safepoint and the location of
// the call, which is where we actually walk the stack frame and
// collect its live references.
//
// Luckily, the spill stack slots for the live references are still
// up to date, so we can still find all the on-stack roots.
// Furthermore, we do not have a moving GC, so we don't need to worry
// whether the following code will reuse the references in registers
// (which would not have been updated to point to the moved objects)
// or reload from the stack slots (which would have been updated to
// point to the moved objects).
let index = match func
.stack_maps
.binary_search_by_key(&offset, |map| map.code_offset)
{
// Exact hit.
Ok(pos) => pos,

// `Err(0)` means that the associated stack map would have been the
// first element in the array if this pc had an associated stack
// map, but this pc does not have an associated stack map. This can
// only happen inside a Wasm frame if there are no live refs at this
// pc.
Err(0) => return None,

Err(n) => n - 1,
};
Some(&func.stack_maps[index].stack_map)
}
}
168 changes: 29 additions & 139 deletions crates/runtime/src/externref.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,7 @@ use std::hash::{Hash, Hasher};
use std::mem;
use std::ops::Deref;
use std::ptr::{self, NonNull};
use std::rc::Rc;
use wasmtime_environ::{ir::StackMap, StackMapInformation};
use wasmtime_environ::ir::StackMap;

/// An external reference to some opaque data.
///
Expand Down Expand Up @@ -757,166 +756,55 @@ struct StackMapRegistryInner {
ranges: BTreeMap<usize, ModuleStackMaps>,
}

#[derive(Debug)]
struct ModuleStackMaps {
/// The range of PCs that this module covers. Different modules must always
/// have distinct ranges.
range: std::ops::Range<usize>,

/// A map from a PC in this module (that is a GC safepoint) to its
/// associated stack map. If `None` then it means that the PC is the start
/// of a range which has no stack map.
pc_to_stack_map: Vec<(usize, Option<Rc<StackMap>>)>,
start: usize,
lookup: Box<dyn StackMapLookup>,
}

/// A trait used to abstract how stack maps are located.
pub trait StackMapLookup {
/// Returns the stack map, if any, for the program with the given `pc`
/// value.
fn lookup(&self, pc: usize) -> Option<&StackMap>;
}

impl StackMapRegistry {
/// Register the stack maps for a given module.
///
/// The stack maps should be given as an iterator over a function's PC range
/// in memory (that is, where the JIT actually allocated and emitted the
/// function's code at), and the stack maps and code offsets within that
/// range for each of its GC safepoints.
/// The `start` and `end` range encompass where the code for this module
/// lives, and the `lookup` object provided will be used to lookup stack
/// maps within this range of pc values.
pub fn register_stack_maps<'a>(
&self,
stack_maps: impl IntoIterator<Item = (std::ops::Range<usize>, &'a [StackMapInformation])>,
start: usize,
end: usize,
lookup: Box<dyn StackMapLookup>,
) {
let mut min = usize::max_value();
let mut max = 0;
let mut pc_to_stack_map = vec![];
let mut last_is_none_marker = true;

for (range, infos) in stack_maps {
let len = range.end - range.start;

min = std::cmp::min(min, range.start);
max = std::cmp::max(max, range.end);

// Add a marker between functions indicating that this function's pc
// starts with no stack map so when our binary search later on finds
// a pc between the start of the function and the function's first
// stack map it doesn't think the previous stack map is our stack
// map.
//
// We skip this if the previous entry pushed was also a `None`
// marker, in which case the starting pc already has no stack map.
// This is also skipped if the first `code_offset` is zero since
// what we'll push applies for the first pc anyway.
if !last_is_none_marker && (infos.is_empty() || infos[0].code_offset > 0) {
pc_to_stack_map.push((range.start, None));
last_is_none_marker = true;
}

for info in infos {
assert!((info.code_offset as usize) < len);
pc_to_stack_map.push((
range.start + (info.code_offset as usize),
Some(Rc::new(info.stack_map.clone())),
));
last_is_none_marker = false;
}
}

if pc_to_stack_map.is_empty() {
// Nothing to register.
return;
}

let module_stack_maps = ModuleStackMaps {
range: min..max,
pc_to_stack_map,
};
let module_stack_maps = ModuleStackMaps { start, lookup };

let mut inner = self.inner.borrow_mut();

// Assert that this chunk of ranges doesn't collide with any other known
// chunks.
if let Some((_, prev)) = inner.ranges.range(max..).next() {
assert!(prev.range.start > max);
if let Some((_, prev)) = inner.ranges.range(end..).next() {
assert!(prev.start > end);
}
if let Some((prev_end, _)) = inner.ranges.range(..=min).next_back() {
assert!(*prev_end < min);
if let Some((prev_end, _)) = inner.ranges.range(..=start).next_back() {
assert!(*prev_end < start);
}

let old = inner.ranges.insert(max, module_stack_maps);
let old = inner.ranges.insert(end, module_stack_maps);
assert!(old.is_none());
}

/// Lookup the stack map for the given PC, if any.
pub fn lookup_stack_map(&self, pc: usize) -> Option<Rc<StackMap>> {
let inner = self.inner.borrow();
let stack_maps = inner.module_stack_maps(pc)?;

// Do a binary search to find the stack map for the given PC.
//
// Because GC safepoints are technically only associated with a single
// PC, we should ideally only care about `Ok(index)` values returned
// from the binary search. However, safepoints are inserted right before
// calls, and there are two things that can disturb the PC/offset
// associated with the safepoint versus the PC we actually use to query
// for the stack map:
//
// 1. The `backtrace` crate gives us the PC in a frame that will be
// *returned to*, and where execution will continue from, rather than
// the PC of the call we are currently at. So we would need to
// disassemble one instruction backwards to query the actual PC for
// the stack map.
//
// TODO: One thing we *could* do to make this a little less error
// prone, would be to assert/check that the nearest GC safepoint
// found is within `max_encoded_size(any kind of call instruction)`
// our queried PC for the target architecture.
//
// 2. Cranelift's stack maps only handle the stack, not
// registers. However, some references that are arguments to a call
// may need to be in registers. In these cases, what Cranelift will
// do is:
//
// a. spill all the live references,
// b. insert a GC safepoint for those references,
// c. reload the references into registers, and finally
// d. make the call.
//
// Step (c) adds drift between the GC safepoint and the location of
// the call, which is where we actually walk the stack frame and
// collect its live references.
//
// Luckily, the spill stack slots for the live references are still
// up to date, so we can still find all the on-stack roots.
// Furthermore, we do not have a moving GC, so we don't need to worry
// whether the following code will reuse the references in registers
// (which would not have been updated to point to the moved objects)
// or reload from the stack slots (which would have been updated to
// point to the moved objects).
let index = match stack_maps
.pc_to_stack_map
.binary_search_by_key(&pc, |(pc, _stack_map)| *pc)
{
// Exact hit.
Ok(i) => i,

// `Err(0)` means that the associated stack map would have been the
// first element in the array if this pc had an associated stack
// map, but this pc does not have an associated stack map. This can
// only happen inside a Wasm frame if there are no live refs at this
// pc.
Err(0) => return None,

Err(n) => n - 1,
};

let stack_map = stack_maps.pc_to_stack_map[index].1.as_ref()?.clone();
Some(stack_map)
}
}

impl StackMapRegistryInner {
fn module_stack_maps(&self, pc: usize) -> Option<&ModuleStackMaps> {
fn lookup_stack_map(&self, pc: usize) -> Option<&StackMap> {
let (end, stack_maps) = self.ranges.range(pc..).next()?;
if pc < stack_maps.range.start || *end < pc {
None
} else {
Some(stack_maps)
if pc < stack_maps.start || *end < pc {
return None;
}
stack_maps.lookup.lookup(pc)
}
}

Expand Down Expand Up @@ -1003,7 +891,8 @@ pub unsafe fn gc(
if cfg!(debug_assertions) {
// Assert that there aren't any Wasm frames on the stack.
backtrace::trace(|frame| {
let stack_map = stack_maps_registry.lookup_stack_map(frame.ip() as usize);
let inner = stack_maps_registry.inner.borrow();
let stack_map = inner.lookup_stack_map(frame.ip() as usize);
assert!(stack_map.is_none());
true
});
Expand Down Expand Up @@ -1044,6 +933,7 @@ pub unsafe fn gc(
});
}

let stack_maps_registry = stack_maps_registry.inner.borrow();
backtrace::trace(|frame| {
let pc = frame.ip() as usize;
let sp = frame.sp() as usize;
Expand Down
18 changes: 5 additions & 13 deletions crates/wasmtime/src/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,11 @@ impl Store {
// We need to know about all the stack maps of all instantiated modules
// so when performing a GC we know about all wasm frames that we find
// on the stack.
self.register_stack_maps(module.compiled_module());
if let Some(lookup) = module.compiled_module().stack_maps() {
let (start, end) = module.compiled_module().code().range();
self.stack_map_registry()
.register_stack_maps(start, end, lookup);
}

// Signatures are loaded into our `SignatureRegistry` here
// once-per-module (and once-per-signature). This allows us to create
Expand All @@ -292,18 +296,6 @@ impl Store {
self.signatures().borrow_mut().register_module(module);
}

fn register_stack_maps(&self, module: &CompiledModule) {
self.stack_map_registry()
.register_stack_maps(module.stack_maps().map(|(func, stack_maps)| unsafe {
let ptr = (*func).as_ptr();
let len = (*func).len();
let start = ptr as usize;
let end = ptr as usize + len;
let range = start..end;
(range, stack_maps)
}));
}

pub(crate) fn bump_resource_counts(&self, module: &Module) -> Result<()> {
let config = self.engine().config();

Expand Down

0 comments on commit 9f1aa64

Please sign in to comment.