diff --git a/Cargo.lock b/Cargo.lock index 884ba40db..58b0bb755 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -799,6 +799,7 @@ dependencies = [ "anyhow 1.0.28 (registry+https://github.com/rust-lang/crates.io-index)", "byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)", "cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)", "lucet-module 0.7.0-dev", @@ -830,6 +831,7 @@ dependencies = [ "bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)", "cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", "getrandom 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/lucet-runtime/Cargo.toml b/lucet-runtime/Cargo.toml index 5732f683d..5fd0c7e08 100644 --- a/lucet-runtime/Cargo.toml +++ b/lucet-runtime/Cargo.toml @@ -15,6 +15,7 @@ lucet-runtime-internals = { path = "lucet-runtime-internals", version = "=0.7.0- lucet-module = { path = "../lucet-module", version = "=0.7.0-dev" } num-traits = "0.2" num-derive = "0.3.0" +cfg-if = "*" [dev-dependencies] byteorder = "1.2" diff --git a/lucet-runtime/include/lucet_types.h b/lucet-runtime/include/lucet_types.h index fd4619aaf..61762fe58 100644 --- a/lucet-runtime/include/lucet_types.h +++ b/lucet-runtime/include/lucet_types.h @@ -16,6 +16,8 @@ #include #endif +#include + enum lucet_error { lucet_error_ok, lucet_error_invalid_argument, diff --git a/lucet-runtime/lucet-runtime-internals/Cargo.toml b/lucet-runtime/lucet-runtime-internals/Cargo.toml index d9e229629..73963a1c4 100644 --- a/lucet-runtime/lucet-runtime-internals/Cargo.toml +++ b/lucet-runtime/lucet-runtime-internals/Cargo.toml @@ -13,6 +13,7 @@ edition = "2018" lucet-module = { path = "../../lucet-module", version = "=0.7.0-dev" } lucet-runtime-macros = { path = "../lucet-runtime-macros", version = "=0.7.0-dev" } +cfg-if = "*" anyhow = "1.0" bitflags = "1.0" bincode = "1.1.4" diff --git a/lucet-runtime/lucet-runtime-internals/build.rs b/lucet-runtime/lucet-runtime-internals/build.rs index b1dd1f009..ad919d664 100644 --- a/lucet-runtime/lucet-runtime-internals/build.rs +++ b/lucet-runtime/lucet-runtime-internals/build.rs @@ -3,8 +3,16 @@ use std::fs::File; use std::path::Path; fn main() { + let context_asm_arch = match env::var("CARGO_CFG_TARGET_ARCH").unwrap().as_str() { + "x86_64" => "x86_64", + "x86" => "i686", + arch => { + panic!("unsupported architecture {}", arch); + } + }; + cc::Build::new() - .file("src/context/context_asm.S") + .file(&format!("src/context/sysdep/{}/context_asm.S", context_asm_arch)) .compile("context_context_asm"); cc::Build::new() .file("src/instance/siginfo_ext.c") diff --git a/lucet-runtime/lucet-runtime-internals/src/alloc/mod.rs b/lucet-runtime/lucet-runtime-internals/src/alloc/mod.rs index f54a721bc..83ab1e97d 100644 --- a/lucet-runtime/lucet-runtime-internals/src/alloc/mod.rs +++ b/lucet-runtime/lucet-runtime-internals/src/alloc/mod.rs @@ -339,7 +339,8 @@ impl Alloc { /// Since the stack grows down, `alloc.stack_mut()[0]` is the top of the stack, and /// `alloc.stack_mut()[alloc.limits.stack_size - 1]` is the last word at the bottom of the /// stack. - pub unsafe fn stack_u64_mut(&mut self) -> &mut [u64] { + #[cfg(target_pointer_width = "64")] + pub unsafe fn stack_words_mut(&mut self) -> &mut [u64] { assert!( self.slot().stack as usize % 8 == 0, "stack is 8-byte aligned" @@ -353,6 +354,21 @@ impl Alloc { self.slot().limits.stack_size / 8, ) } + #[cfg(target_pointer_width = "32")] + pub unsafe fn stack_words_mut(&mut self) -> &mut [u32] { + assert!( + self.slot().stack as usize % 4 == 0, + "stack is 4-byte aligned" + ); + assert!( + self.slot().limits.stack_size % 4 == 0, + "stack size is multiple of 4-bytes" + ); + std::slice::from_raw_parts_mut( + self.slot().stack as *mut u32, + self.slot().limits.stack_size / 4, + ) + } /// Return the globals as a slice. pub unsafe fn globals(&self) -> &[GlobalValue] { @@ -454,12 +470,27 @@ pub const DEFAULT_SIGNAL_STACK_SIZE: usize = { impl Limits { pub const fn default() -> Limits { - Limits { - heap_memory_size: 16 * 64 * 1024, - heap_address_space_size: 0x0002_0000_0000, - stack_size: 128 * 1024, - globals_size: 4096, - signal_stack_size: DEFAULT_SIGNAL_STACK_SIZE, + use cfg_if::cfg_if; + cfg_if! { + if #[cfg(target_pointer_width = "64")] { + Limits { + heap_memory_size: 16 * 64 * 1024, + heap_address_space_size: 0x0002_0000_0000, + stack_size: 128 * 1024, + globals_size: 4096, + signal_stack_size: DEFAULT_SIGNAL_STACK_SIZE, + } + } else if #[cfg(target_pointer_width = "32")] { + Limits { + heap_memory_size: 16 * 64 * 1024, + heap_address_space_size: 0xffff_ffff, + stack_size: 128 * 1024, + globals_size: 4096, + signal_stack_size: DEFAULT_SIGNAL_STACK_SIZE, + } + } else { + panic!("unsupported architecture!"); + } } } } diff --git a/lucet-runtime/lucet-runtime-internals/src/alloc/tests.rs b/lucet-runtime/lucet-runtime-internals/src/alloc/tests.rs index 5f7f8b5b2..306613de4 100644 --- a/lucet-runtime/lucet-runtime-internals/src/alloc/tests.rs +++ b/lucet-runtime/lucet-runtime-internals/src/alloc/tests.rs @@ -650,7 +650,7 @@ macro_rules! alloc_tests { unsafe { let heap_ptr = inst.alloc_mut().heap_mut().as_ptr() as *mut c_void; let mut child = ContextHandle::create_and_init( - inst.alloc_mut().stack_u64_mut(), + inst.alloc_mut().stack_words_mut(), heap_touching_child as usize, &[Val::CPtr(heap_ptr)], ) @@ -699,7 +699,7 @@ macro_rules! alloc_tests { unsafe { let heap_ptr = inst.alloc_mut().heap_mut().as_ptr() as *mut c_void; let mut child = ContextHandle::create_and_init( - inst.alloc_mut().stack_u64_mut(), + inst.alloc_mut().stack_words_mut(), stack_pattern_child as usize, &[Val::CPtr(heap_ptr)], ) diff --git a/lucet-runtime/lucet-runtime-internals/src/archdeps/i686/mod.rs b/lucet-runtime/lucet-runtime-internals/src/archdeps/i686/mod.rs new file mode 100644 index 000000000..83c285bd4 --- /dev/null +++ b/lucet-runtime/lucet-runtime-internals/src/archdeps/i686/mod.rs @@ -0,0 +1 @@ +pub mod val; diff --git a/lucet-runtime/lucet-runtime-internals/src/archdeps/i686/val.rs b/lucet-runtime/lucet-runtime-internals/src/archdeps/i686/val.rs new file mode 100644 index 000000000..66ced9eca --- /dev/null +++ b/lucet-runtime/lucet-runtime-internals/src/archdeps/i686/val.rs @@ -0,0 +1,318 @@ +//! Typed values for passing into and returning from sandboxed +//! programs. + +use libc::c_void; +use std::arch::x86::{ + __m128, _mm_castpd_ps, _mm_castps_pd, _mm_load_pd1, _mm_load_ps1, _mm_setzero_ps, + _mm_storeu_pd, _mm_storeu_ps, +}; + +use lucet_module::ValueType; + +impl Val { + pub fn value_type(&self) -> ValueType { + match self { + // USize, ISize, and CPtr are all as fits for definitions on the target architecture + // (wasm) which is all 32-bit. + Val::USize(_) | Val::ISize(_) | Val::CPtr(_) => ValueType::I32, + Val::GuestPtr(_) => ValueType::I32, + Val::I8(_) | Val::U8(_) | Val::I16(_) | Val::U16(_) | Val::I32(_) | Val::U32(_) => { + ValueType::I32 + } + Val::I64(_) | Val::U64(_) => ValueType::I64, + Val::Bool(_) => ValueType::I32, + Val::F32(_) => ValueType::F32, + Val::F64(_) => ValueType::F64, + } + } +} + +/// Typed values used for passing arguments into guest functions. +#[derive(Clone, Copy, Debug)] +pub enum Val { + CPtr(*const c_void), + /// A WebAssembly linear memory address + GuestPtr(u32), + U8(u8), + U16(u16), + U32(u32), + U64(u64), + I8(i8), + I16(i16), + I32(i32), + I64(i64), + USize(usize), + ISize(isize), + Bool(bool), + F32(f32), + F64(f64), +} + +// the pointer variant is just a wrapper; the caller will know they're still responsible for their +// safety +unsafe impl Send for Val {} +unsafe impl Sync for Val {} + +impl From<*const T> for Val { + fn from(x: *const T) -> Val { + Val::CPtr(x as *const c_void) + } +} + +impl From<*mut T> for Val { + fn from(x: *mut T) -> Val { + Val::CPtr(x as *mut c_void) + } +} + +macro_rules! impl_from_scalars { + ( { $( $ctor:ident : $ty:ty ),* } ) => { + $( + impl From<$ty> for Val { + fn from(x: $ty) -> Val { + Val::$ctor(x) + } + } + )* + }; +} + +// Since there is overlap in these enum variants, we can't have instances for all of them, such as +// GuestPtr +impl_from_scalars!({ + U8: u8, + U16: u16, + U32: u32, + U64: u64, + I8: i8, + I16: i16, + I32: i32, + I64: i64, + USize: usize, + ISize: isize, + Bool: bool, + F32: f32, + F64: f64 +}); + +/// Register representation of `Val`. +/// +/// When mapping `Val`s to x86_64 registers, we map floating point +/// values into the SSE registers _xmmN_, and all other values into +/// general-purpose (integer) registers. +pub enum RegVal { + GpReg(u32), + FpReg(__m128), +} + +/// Convert a `Val` to its representation when stored in an +/// argument register. +pub fn val_to_reg(val: &Val) -> RegVal { + use self::RegVal::*; + use self::Val::*; + match *val { + CPtr(v) => GpReg(v as u32), + GuestPtr(v) => GpReg(v as u32), + U8(v) => GpReg(v as u32), + U16(v) => GpReg(v as u32), + U32(v) => GpReg(v as u32), + U64(v) => GpReg(v as u32), // TODO: aaaaAAAAA + I8(v) => GpReg(v as u32), + I16(v) => GpReg(v as u32), + I32(v) => GpReg(v as u32), + I64(v) => GpReg(v as u32), // TODO: aaaaAAAA + USize(v) => GpReg(v as u32), + ISize(v) => GpReg(v as u32), + Bool(false) => GpReg(0u32), + Bool(true) => GpReg(1u32), + Val::F32(v) => FpReg(unsafe { _mm_load_ps1(&v as *const f32) }), + Val::F64(v) => FpReg(unsafe { _mm_castpd_ps(_mm_load_pd1(&v as *const f64)) }), + } +} + +/// Convert a `Val` to its representation when spilled onto the +/// stack. +pub fn val_to_stack(val: &Val) -> u32 { + use self::Val::*; + match *val { + CPtr(v) => v as u32, + GuestPtr(v) => v as u32, + U8(v) => v as u32, + U16(v) => v as u32, + U32(v) => v as u32, + U64(v) => v as u32, // TODO: aaaAAA + I8(v) => v as u32, + I16(v) => v as u32, + I32(v) => v as u32, + I64(v) => v as u32, // TODO: aaaAAA + USize(v) => v as u32, + ISize(v) => v as u32, + Bool(false) => 0u32, + Bool(true) => 1u32, + F32(v) => v.to_bits(), + F64(v) => v.to_bits() as u32, // TODO: aaaAAA + } +} + +/// A value returned by a guest function. +/// +/// Since the Rust type system cannot know the type of the returned value, the user must use the +/// appropriate `From` implementation or `as_T` method. +#[derive(Clone, Copy, Debug)] +pub struct UntypedRetVal { + fp: __m128, + gp: u32, +} + +impl std::fmt::Display for UntypedRetVal { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "") + } +} + +impl UntypedRetVal { + pub(crate) fn new(gp: u32, fp: __m128) -> UntypedRetVal { + UntypedRetVal { gp, fp } + } +} + +impl From for UntypedRetVal { + fn from(reg: RegVal) -> UntypedRetVal { + match reg { + RegVal::GpReg(r) => UntypedRetVal::new(r, unsafe { _mm_setzero_ps() }), + RegVal::FpReg(r) => UntypedRetVal::new(0, r), + } + } +} + +impl> From for UntypedRetVal { + fn from(v: T) -> UntypedRetVal { + val_to_reg(&v.into()).into() + } +} + +macro_rules! impl_from_fp { + ( $ty:ty, $f:ident, $as:ident ) => { + impl From for $ty { + fn from(retval: UntypedRetVal) -> $ty { + $f(retval.fp) + } + } + + impl From<&UntypedRetVal> for $ty { + fn from(retval: &UntypedRetVal) -> $ty { + $f(retval.fp) + } + } + + impl UntypedRetVal { + pub fn $as(&self) -> $ty { + $f(self.fp) + } + } + }; +} + +impl_from_fp!(f32, __m128_as_f32, as_f32); +impl_from_fp!(f64, __m128_as_f64, as_f64); + +macro_rules! impl_from_gp { + ( $ty:ty, $as:ident ) => { + impl From for $ty { + fn from(retval: UntypedRetVal) -> $ty { + retval.gp as $ty + } + } + + impl From<&UntypedRetVal> for $ty { + fn from(retval: &UntypedRetVal) -> $ty { + retval.gp as $ty + } + } + + impl UntypedRetVal { + pub fn $as(&self) -> $ty { + self.gp as $ty + } + } + }; +} + +impl_from_gp!(u8, as_u8); +impl_from_gp!(u16, as_u16); +impl_from_gp!(u32, as_u32); +impl_from_gp!(u64, as_u64); + +impl_from_gp!(i8, as_i8); +impl_from_gp!(i16, as_i16); +impl_from_gp!(i32, as_i32); +impl_from_gp!(i64, as_i64); + +impl From for bool { + fn from(retval: UntypedRetVal) -> bool { + retval.gp != 0 + } +} + +impl From<&UntypedRetVal> for bool { + fn from(retval: &UntypedRetVal) -> bool { + retval.gp != 0 + } +} + +impl UntypedRetVal { + pub fn as_bool(&self) -> bool { + self.gp != 0 + } + + pub fn as_ptr(&self) -> *const T { + self.gp as *const T + } + + pub fn as_mut(&self) -> *mut T { + self.gp as *mut T + } +} + +impl Default for UntypedRetVal { + fn default() -> UntypedRetVal { + let fp = unsafe { _mm_setzero_ps() }; + UntypedRetVal { fp, gp: 0 } + } +} + +pub trait UntypedRetValInternal { + fn fp(&self) -> __m128; + fn gp(&self) -> u32; +} + +impl UntypedRetValInternal for UntypedRetVal { + fn fp(&self) -> __m128 { + self.fp + } + + fn gp(&self) -> u32 { + self.gp + } +} + +// Helpers that we might want to put in a utils module someday + +/// Interpret the contents of a `__m128` register as an `f32`. +pub fn __m128_as_f32(v: __m128) -> f32 { + let mut out: [f32; 4] = [0.0; 4]; + unsafe { + _mm_storeu_ps(&mut out[0] as *mut f32, v); + } + out[0] +} + +/// Interpret the contents of a `__m128` register as an `f64`. +pub fn __m128_as_f64(v: __m128) -> f64 { + let mut out: [f64; 2] = [0.0; 2]; + unsafe { + let vd = _mm_castps_pd(v); + _mm_storeu_pd(&mut out[0] as *mut f64, vd); + } + out[0] +} diff --git a/lucet-runtime/lucet-runtime-internals/src/archdeps/mod.rs b/lucet-runtime/lucet-runtime-internals/src/archdeps/mod.rs new file mode 100644 index 000000000..1fc2ee8b9 --- /dev/null +++ b/lucet-runtime/lucet-runtime-internals/src/archdeps/mod.rs @@ -0,0 +1,21 @@ +use cfg_if::cfg_if; + +cfg_if! { + if #[cfg(target_arch = "x86")] { + pub mod i686; + pub use i686 as arch_impl; + } else if #[cfg(target_arch = "x86_64")] { + pub mod x86_64; + pub use x86_64 as arch_impl; + } +} + +pub mod val { + use crate::archdeps::arch_impl; + pub use arch_impl::val::Val as Val; + pub use arch_impl::val::RegVal as RegVal; + pub use arch_impl::val::UntypedRetVal as UntypedRetVal; + pub(crate) use arch_impl::val::UntypedRetValInternal as UntypedRetValInternal; + pub use arch_impl::val::val_to_reg as val_to_reg; + pub use arch_impl::val::val_to_stack as val_to_stack; +} diff --git a/lucet-runtime/lucet-runtime-internals/src/archdeps/x86_64/mod.rs b/lucet-runtime/lucet-runtime-internals/src/archdeps/x86_64/mod.rs new file mode 100644 index 000000000..de52d99fa --- /dev/null +++ b/lucet-runtime/lucet-runtime-internals/src/archdeps/x86_64/mod.rs @@ -0,0 +1 @@ +pub use val; diff --git a/lucet-runtime/lucet-runtime-internals/src/val.rs b/lucet-runtime/lucet-runtime-internals/src/archdeps/x86_64/val.rs similarity index 100% rename from lucet-runtime/lucet-runtime-internals/src/val.rs rename to lucet-runtime/lucet-runtime-internals/src/archdeps/x86_64/val.rs diff --git a/lucet-runtime/lucet-runtime-internals/src/c_api.rs b/lucet-runtime/lucet-runtime-internals/src/c_api.rs index e192d6bb5..05596881b 100644 --- a/lucet-runtime/lucet-runtime-internals/src/c_api.rs +++ b/lucet-runtime/lucet-runtime-internals/src/c_api.rs @@ -664,12 +664,27 @@ pub mod lucet_val { fp: [0; 16], gp: [0; 8], }; - unsafe { - core::arch::x86_64::_mm_storeu_ps( - v.fp.as_mut().as_mut_ptr() as *mut f32, - retval.fp(), - ); - *(v.gp.as_mut().as_mut_ptr() as *mut u64) = retval.gp(); + use cfg_if::cfg_if; + cfg_if! { + if #[cfg(target_arch = "x86")] { + unsafe { + core::arch::x86::_mm_storeu_ps( + v.fp.as_mut().as_mut_ptr() as *mut f32, + retval.fp(), + ); + *(v.gp.as_mut().as_mut_ptr() as *mut u32) = retval.gp(); + } + } else if #[cfg(target_arch = "x86_64")] { + unsafe { + core::arch::x86_64::_mm_storeu_ps( + v.fp.as_mut().as_mut_ptr() as *mut f32, + retval.fp(), + ); + *(v.gp.as_mut().as_mut_ptr() as *mut u64) = retval.gp(); + } + } else { + panic!("unsupported architecture!"); + } } v } diff --git a/lucet-runtime/lucet-runtime-internals/src/context/mod.rs b/lucet-runtime/lucet-runtime-internals/src/context/mod.rs index a6f3b13b3..d559426cd 100644 --- a/lucet-runtime/lucet-runtime-internals/src/context/mod.rs +++ b/lucet-runtime/lucet-runtime-internals/src/context/mod.rs @@ -1,732 +1,6 @@ -#![allow(improper_ctypes)] - -#[cfg(test)] -mod tests; - -use crate::instance::Instance; -use crate::val::{val_to_reg, val_to_stack, RegVal, UntypedRetVal, Val}; - -use std::arch::x86_64::{__m128, _mm_setzero_ps}; -use std::ptr::NonNull; -use std::{mem, ptr}; -use thiserror::Error; - -/// Callee-saved general-purpose registers in the AMD64 ABI. -/// -/// # Layout -/// -/// `repr(C)` is required to preserve the ordering of members, which are read by the assembly at -/// hard-coded offsets. -/// -/// # TODOs -/// -/// - Unlike the C code, this doesn't use the `packed` repr due to warnings in the Nomicon: -/// . Since the members are all -/// `u64`, this should be fine? -#[repr(C)] -pub(crate) struct GpRegs { - pub(crate) rbx: u64, - pub(crate) rsp: u64, - rbp: u64, - pub(crate) rdi: u64, - r12: u64, - r13: u64, - r14: u64, - r15: u64, - pub(crate) rsi: u64, -} - -impl GpRegs { - fn new() -> Self { - GpRegs { - rbx: 0, - rsp: 0, - rbp: 0, - rdi: 0, - r12: 0, - r13: 0, - r14: 0, - r15: 0, - rsi: 0, - } - } -} - -/// Floating-point argument registers in the AMD64 ABI. -/// -/// # Layout -/// -/// `repr(C)` is required to preserve the ordering of members, which are read by the assembly at -/// hard-coded offsets. -/// -/// # TODOs -/// -/// - Unlike the C code, this doesn't use the `packed` repr due to warnings in the Nomicon: -/// . Since the members are all -/// `__m128`, this should be fine? -#[repr(C)] -struct FpRegs { - xmm0: __m128, - xmm1: __m128, - xmm2: __m128, - xmm3: __m128, - xmm4: __m128, - xmm5: __m128, - xmm6: __m128, - xmm7: __m128, -} - -impl FpRegs { - fn new() -> Self { - let zero = unsafe { _mm_setzero_ps() }; - FpRegs { - xmm0: zero, - xmm1: zero, - xmm2: zero, - xmm3: zero, - xmm4: zero, - xmm5: zero, - xmm6: zero, - xmm7: zero, - } - } -} - -/// Everything we need to make a context switch: a signal mask, and the registers and return values -/// that are manipulated directly by assembly code. -/// -/// A context also tracks which other context to swap back to if a child context's entrypoint function -/// returns, and can optionally contain a callback function to be run just before that swap occurs. -/// -/// # Layout -/// -/// The `repr(C)` and order of fields in this struct are very important, as the assembly code reads -/// and writes hard-coded offsets from the base of the struct. Without `repr(C)`, Rust is free to -/// reorder the fields. -/// -/// Contexts are also `repr(align(64))` in order to align to cache lines and minimize contention -/// when running multiple threads. -/// -/// # Movement -/// -/// `Context` values must not be moved once they've been initialized. Contexts contain a pointer to -/// their stack, which in turn contains a pointer back to the context. If the context gets moved, -/// that pointer becomes invalid, and the behavior of returning from that context becomes undefined. -#[repr(C, align(64))] -pub struct Context { - pub(crate) gpr: GpRegs, - fpr: FpRegs, - retvals_gp: [u64; 2], - retval_fp: __m128, - parent_ctx: *mut Context, - // TODO ACF 2019-10-23: make Instance into a generic parameter? - backstop_callback: *const unsafe extern "C" fn(*mut Instance), - callback_data: *mut Instance, -} - -impl Context { - /// Create an all-zeroed `Context`. - pub fn new() -> Self { - Context { - gpr: GpRegs::new(), - fpr: FpRegs::new(), - retvals_gp: [0; 2], - retval_fp: unsafe { _mm_setzero_ps() }, - parent_ctx: ptr::null_mut(), - backstop_callback: Context::default_backstop_callback as *const _, - callback_data: ptr::null_mut(), - } - } - - /// Get a raw pointer to the instance's callback data. - pub(crate) fn callback_data_ptr(&self) -> *mut Instance { - self.callback_data - } -} - -/// A wrapper around a `Context`, primarily meant for use in test code. -/// -/// Users of this library interact with contexts implicitly via `Instance` values, but for testing -/// the context code independently, it is helpful to use contexts directly. -/// -/// # Movement of `ContextHandle` -/// -/// `ContextHandle` keeps a pointer to a `Context` rather than keeping all of the data directly as -/// fields in order to have better control over where that data lives in memory. We always want that -/// data to be heap-allocated, and to never move once it has been initialized. The `ContextHandle`, -/// by contrast, should be treated like a normal Rust value with no such restrictions. -/// -/// Until the `Unpin` marker trait arrives in stable Rust, it is difficult to enforce this with the -/// type system alone, so we use a bit of unsafety and (hopefully) clever API design to ensure that -/// the data cannot be moved. -/// -/// We create the `Context` within a box to allocate it on the heap, then convert it into a raw -/// pointer to relinquish ownership. When accessing the internal structure via the `DerefMut` trait, -/// data must not be moved out of the `Context` with functions like `mem::replace`. -/// -/// # Layout -/// -/// Foreign code accesses the `internal` pointer in tests, so it is important that it is the first -/// member, and that the struct is `repr(C)`. -#[repr(C)] -pub struct ContextHandle { - internal: NonNull, -} - -impl Drop for ContextHandle { - fn drop(&mut self) { - unsafe { - // create a box from the pointer so that it'll get dropped - // and we won't leak `Context`s - Box::from_raw(self.internal.as_ptr()); - } - } -} - -impl std::ops::Deref for ContextHandle { - type Target = Context; - fn deref(&self) -> &Self::Target { - unsafe { self.internal.as_ref() } - } -} - -impl std::ops::DerefMut for ContextHandle { - fn deref_mut(&mut self) -> &mut Self::Target { - unsafe { self.internal.as_mut() } - } -} - -impl ContextHandle { - /// Create an all-zeroed `ContextHandle`. - pub fn new() -> Self { - let internal = NonNull::new(Box::into_raw(Box::new(Context::new()))) - .expect("Box::into_raw should never return NULL"); - ContextHandle { internal } - } - - pub fn create_and_init( - stack: &mut [u64], - fptr: usize, - args: &[Val], - ) -> Result { - let mut child = ContextHandle::new(); - Context::init(stack, &mut child, fptr, args)?; - Ok(child) - } -} - -struct CallStackBuilder<'a> { - offset: usize, - stack: &'a mut [u64], -} - -impl<'a> CallStackBuilder<'a> { - pub fn new(stack: &'a mut [u64]) -> Self { - CallStackBuilder { offset: 0, stack } - } - - fn push(&mut self, val: u64) { - self.offset += 1; - self.stack[self.stack.len() - self.offset] = val; - } - - /// Stores `args` onto the stack such that when a return address is written after, the - /// complete unit will be 16-byte aligned, as the x86_64 ABI requires. - /// - /// That is to say, `args` will be padded such that the current top of stack is 8-byte - /// aligned. - fn store_args(&mut self, args: &[u64]) { - let items_end = args.len() + self.offset; - - if items_end % 2 == 1 { - // we need to add one entry just before the arguments so that the arguments start on an - // aligned address. - self.push(0); - } - - for arg in args.iter().rev() { - self.push(*arg); - } - } - - fn offset(&self) -> usize { - self.offset - } - - fn into_inner(self) -> (&'a mut [u64], usize) { - (self.stack, self.offset) - } -} - -impl Context { - /// Initialize a new child context. - /// - /// - `stack`: The stack for the child; *must be 16-byte aligned*. - /// - /// - `child`: The context for the child. The fields of this structure will be overwritten by - /// `init`. - /// - /// - `fptr`: A pointer to the entrypoint for the child. Note that while the type signature here - /// is for a void function of no arguments (equivalent to `void (*fptr)(void)` in C), the - /// entrypoint actually can be a function of any argument or return type that corresponds to a - /// `val::Val` variant. - /// - /// - `args`: A slice of arguments for the `fptr` entrypoint. These must match the number and - /// types of `fptr`'s actual arguments exactly, otherwise swapping to this context will cause - /// undefined behavior. - /// - /// # Errors - /// - /// - `Error::UnalignedStack` if the _end_ of `stack` is not 16-byte aligned. - /// - /// # Examples - /// - /// ## C entrypoint - /// - /// This example initializes a context that will start in a C function `entrypoint` when first - /// swapped to. - /// - /// ```c - /// void entrypoint(uint64_t x, float y); - /// ``` - /// - /// ```no_run - /// # use lucet_runtime_internals::context::Context; - /// # use lucet_runtime_internals::val::Val; - /// extern "C" { fn entrypoint(x: u64, y: f32); } - /// // allocating an even number of `u64`s seems to reliably yield - /// // properly aligned stacks, but TODO do better - /// let mut stack = vec![0u64; 1024].into_boxed_slice(); - /// let mut child = Context::new(); - /// let res = Context::init( - /// &mut *stack, - /// &mut child, - /// entrypoint as usize, - /// &[Val::U64(120), Val::F32(3.14)], - /// ); - /// assert!(res.is_ok()); - /// ``` - /// - /// ## Rust entrypoint - /// - /// This example initializes a context that will start in a Rust function `entrypoint` when - /// first swapped to. Note that we mark `entrypoint` as `extern "C"` to make sure it is compiled - /// with C calling conventions. - /// - /// ```no_run - /// # use lucet_runtime_internals::context::{Context, ContextHandle}; - /// # use lucet_runtime_internals::val::Val; - /// extern "C" fn entrypoint(x: u64, y: f32) { } - /// // allocating an even number of `u64`s seems to reliably yield - /// // properly aligned stacks, but TODO do better - /// let mut stack = vec![0u64; 1024].into_boxed_slice(); - /// let mut child = Context::new(); - /// let res = Context::init( - /// &mut *stack, - /// &mut child, - /// entrypoint as usize, - /// &[Val::U64(120), Val::F32(3.14)], - /// ); - /// assert!(res.is_ok()); - /// ``` - /// - /// # Implementation details - /// - /// This prepares a stack for the child context structured as follows, assuming an 0x1000 byte - /// stack: - /// ```text - /// 0x1000: +-------------------------+ - /// 0x0ff8: | NULL | // Null added if necessary for alignment. - /// 0x0ff0: | spilled_arg_1 | // Guest arguments follow. - /// 0x0fe8: | spilled_arg_2 | - /// 0x0fe0: ~ spilled_arg_3 ~ // The three arguments here are just for show. - /// 0x0fd8: | lucet_context_backstop | <-- This forms an ABI-matching call frame for fptr. - /// 0x0fd0: | fptr | <-- The actual guest code we want to run. - /// 0x0fc8: | lucet_context_bootstrap | <-- The guest stack pointer starts here. - /// 0x0fc0: | | - /// 0x0XXX: ~ ~ // Rest of the stack needs no preparation. - /// 0x0000: | | - /// +-------------------------+ - /// ``` - /// - /// This packing of data on the stack is interwoven with noteworthy constraints on what the - /// backstop may do: - /// * The backstop must not return on the guest stack. - /// - The next value will be a spilled argument or NULL. Neither are an intended address. - /// * The backstop cannot have ABI-conforming spilled arguments. - /// - No code runs between `fptr` and `lucet_context_backstop`, so nothing exists to - /// clean up `fptr`'s arguments. `lucet_context_backstop` would have to adjust the - /// stack pointer by a variable amount, and it does not, so `rsp` will continue to - /// point to guest arguments. - /// - This is why bootstrap recieves arguments via rbp, pointing elsewhere on the stack. - /// - /// The bootstrap function must be careful, but is less constrained since it can clean up - /// and prepare a context for `fptr`. - pub fn init( - stack: &mut [u64], - child: &mut Context, - fptr: usize, - args: &[Val], - ) -> Result<(), Error> { - Context::init_with_callback( - stack, - child, - Context::default_backstop_callback, - ptr::null_mut(), - fptr, - args, - ) - } - - /// The default backstop callback does nothing, and is just a marker. - extern "C" fn default_backstop_callback(_: *mut Instance) {} - - /// Similar to `Context::init()`, but allows setting a callback function to be run when the - /// guest entrypoint returns. - /// - /// After the entrypoint function returns, but before swapping back to the parent context, - /// `backstop_callback` will be run with the single argument `callback_data`. - pub fn init_with_callback( - stack: &mut [u64], - child: &mut Context, - backstop_callback: unsafe extern "C" fn(*mut Instance), - callback_data: *mut Instance, - fptr: usize, - args: &[Val], - ) -> Result<(), Error> { - if !stack_is_aligned(stack) { - return Err(Error::UnalignedStack); - } - - if backstop_callback != Context::default_backstop_callback { - child.backstop_callback = backstop_callback as *const _; - child.callback_data = callback_data; - } - - let mut gp_args_ix = 0; - let mut fp_args_ix = 0; - let mut gp_regs_values = [0u64; 6]; - - let mut spilled_args = vec![]; - - for arg in args { - match val_to_reg(arg) { - RegVal::GpReg(v) => { - if gp_args_ix >= 6 { - spilled_args.push(val_to_stack(arg)); - } else { - gp_regs_values[gp_args_ix] = v; - gp_args_ix += 1; - } - } - RegVal::FpReg(v) => { - if fp_args_ix >= 8 { - spilled_args.push(val_to_stack(arg)); - } else { - child.bootstrap_fp_ix_arg(fp_args_ix, v); - fp_args_ix += 1; - } - } - } - } - - // set up an initial call stack for guests to bootstrap into and execute - let mut stack_builder = CallStackBuilder::new(stack); - - // we actually don't want to put an explicit pointer to these arguments anywhere. we'll - // line up the rest of the stack such that these are in argument position when we jump to - // `fptr`. - stack_builder.store_args(spilled_args.as_slice()); - - // the stack must be aligned in the environment we'll execute `fptr` from - this is an ABI - // requirement and can cause segfaults if not upheld. - assert_eq!( - stack_builder.offset() % 2, - 0, - "incorrect alignment for guest call frame" - ); - - // we execute the guest code via returns, so we make a "call stack" of routines like: - // -> lucet_context_backstop() - // -> fptr() - // -> lucet_context_bootstrap() - // - // with each address the start of the named function, so when the inner function - // completes it returns to begin the next function up. - stack_builder.push(lucet_context_backstop as u64); - stack_builder.push(fptr as u64); - - // add all general purpose arguments for the guest to be bootstrapped - for arg in gp_regs_values.iter() { - stack_builder.push(*arg); - } - - stack_builder.push(lucet_context_bootstrap as u64); - - let (stack, stack_start) = stack_builder.into_inner(); - - // Stack pointer: this points to the return address that will be used by `swap`, in place - // of the original (eg, in the host) return address. The return address this points to is - // the address of the first function to run on `swap`: `lucet_context_bootstrap`. - child.gpr.rsp = &mut stack[stack.len() - stack_start] as *mut u64 as u64; - - // Base pointer: `rbp` will be saved through all guest code, and preserved for when we - // reach the backstop. This allows us to prepare an argument for `lucet_context_backstop` - // even at the entrypoint of the guest. - child.gpr.rbp = child as *const Context as u64; - - Ok(()) - } - - /// Save the current context, and swap to another context. - /// - /// - `from`: the current context is written here - /// - `to`: the context to read from and swap to - /// - /// The current registers, including the stack pointer, are saved to `from`. The current stack - /// pointer is then replaced by the value saved in `to.gpr.rsp`, so when `swap` returns, it will - /// return to the pointer saved in `to`'s stack. - /// - /// If `to` was freshly initialized by passing it as the `child` argument to `init`, `swap` will - /// return to the function that bootstraps arguments and then calls the entrypoint that was - /// passed to `init`. - /// - /// If `to` was previously passed as the `from` argument to another call to `swap`, the program - /// will return as if from that _first_ call to `swap`. - /// - /// The address of `from` will be saved as `to.parent_ctx`. If `to` was initialized by `init`, - /// it will swap back to the `from` context when the entrypoint function returns via - /// `lucet_context_backstop`. - /// - /// # Safety - /// - /// The value in `to.gpr.rsp` must be a valid pointer into the stack that was originally passed - /// to `init` when the `to` context was initialized, or to the original stack created implicitly - /// by Rust. - /// - /// The registers saved in the `to` context must match the arguments expected by the entrypoint - /// of the function passed to `init`, or be unaltered from when they were previously written by - /// `swap`. - /// - /// If `to` was initialized by `init`, the `from` context must not be moved, dropped, or - /// otherwise invalidated while in the `to` context unless `to`'s entrypoint function never - /// returns. - /// - /// If `from` is never returned to, `swap`ped to, or `set` to, resources could leak due to - /// implicit `drop`s never being called: - /// - /// ```no_run - /// # use lucet_runtime_internals::context::Context; - /// fn f(x: Box, child: &mut Context) { - /// let mut xs = vec![187; 410757864530]; - /// xs[0] += *x; - /// - /// // manually drop here to avoid leaks - /// drop(x); - /// drop(xs); - /// - /// let mut parent = Context::new(); - /// unsafe { Context::swap(&mut parent, child); } - /// // implicit `drop(x)` and `drop(xs)` here never get called unless we swap back - /// } - /// ``` - /// - /// # Examples - /// - /// The typical case is to initialize a new child context, and then swap to it from a zeroed - /// parent context. - /// - /// ```no_run - /// # use lucet_runtime_internals::context::Context; - /// # extern "C" fn entrypoint() {} - /// # let mut stack = vec![0u64; 1024].into_boxed_slice(); - /// let mut parent = Context::new(); - /// let mut child = Context::new(); - /// Context::init( - /// &mut stack, - /// &mut child, - /// entrypoint as usize, - /// &[], - /// ).unwrap(); - /// - /// unsafe { Context::swap(&mut parent, &mut child); } - /// ``` - #[inline] - pub unsafe fn swap(from: &mut Context, to: &mut Context) { - to.parent_ctx = from; - lucet_context_swap(from as *mut _, to as *mut _); - } - - /// Swap to another context without saving the current context. - /// - /// - `to`: the context to read from and swap to - /// - /// The current registers, including the stack pointer, are discarded. The current stack pointer - /// is then replaced by the value saved in `to.gpr.rsp`, so when `swap` returns, it will return - /// to the pointer saved in `to`'s stack. - /// - /// If `to` was freshly initialized by passing it as the child to `init`, `swap` will return to - /// the function that bootstraps arguments and then calls the entrypoint that was passed to - /// `init`. - /// - /// If `to` was previously passed as the `from` argument to another call to `swap`, the program - /// will return as if from the call to `swap`. - /// - /// # Safety - /// - /// ## Stack and registers - /// - /// The value in `to.gpr.rsp` must be a valid pointer into the stack that was originally passed - /// to `init` when the context was initialized, or to the original stack created implicitly by - /// Rust. - /// - /// The registers saved in `to` must match the arguments expected by the entrypoint of the - /// function passed to `init`, or be unaltered from when they were previously written by `swap`. - /// - /// ## Returning - /// - /// If `to` is a context freshly initialized by `init` (as opposed to a context populated only - /// by `swap`, such as a host context), at least one of the following must be true, otherwise - /// the program will return to a context with uninitialized registers: - /// - /// - The `fptr` argument to `init` is a function that never returns - /// - /// - A valid context must have been passed as the `from` argument to `swap` when entering the - /// current context before this call to `set` - /// - /// ## Resource leaks - /// - /// Since control flow will not return to the calling context, care must be taken to ensure that - /// any resources owned by the calling context are manually dropped. The implicit `drop`s - /// inserted by Rust at the end of the calling scope will not be reached: - /// - /// ```no_run - /// # use lucet_runtime_internals::context::Context; - /// fn f(x: Box, child: &Context) { - /// let mut xs = vec![187; 410757864530]; - /// xs[0] += *x; - /// - /// // manually drop here to avoid leaks - /// drop(x); - /// drop(xs); - /// - /// unsafe { Context::set(child); } - /// // implicit `drop(x)` and `drop(xs)` here never get called - /// } - /// ``` - #[inline] - pub unsafe fn set(to: &Context) -> ! { - lucet_context_set(to as *const Context); - } - - /// Clear (zero) return values. - pub fn clear_retvals(&mut self) { - self.retvals_gp = [0; 2]; - let zero = unsafe { _mm_setzero_ps() }; - self.retval_fp = zero; - } - - /// Get the general-purpose return value at index `idx`. - /// - /// If this method is called before the context has returned from its original entrypoint, the - /// result will be `0`. - pub fn get_retval_gp(&self, idx: usize) -> u64 { - self.retvals_gp[idx] - } - - /// Get the floating point return value. - /// - /// If this method is called before the context has returned from its original entrypoint, the - /// result will be `0.0`. - pub fn get_retval_fp(&self) -> __m128 { - self.retval_fp - } - - /// Get the return value as an `UntypedRetVal`. - /// - /// This combines the 0th general-purpose return value, and the single floating-point return value. - pub fn get_untyped_retval(&self) -> UntypedRetVal { - let gp = self.get_retval_gp(0); - let fp = self.get_retval_fp(); - UntypedRetVal::new(gp, fp) - } - - /// Put one of the first 8 floating-point arguments into a `Context` register. - /// - /// - `ix`: ABI floating-point argument number - /// - `arg`: argument value - fn bootstrap_fp_ix_arg(&mut self, ix: usize, arg: __m128) { - match ix { - 0 => self.fpr.xmm0 = arg, - 1 => self.fpr.xmm1 = arg, - 2 => self.fpr.xmm2 = arg, - 3 => self.fpr.xmm3 = arg, - 4 => self.fpr.xmm4 = arg, - 5 => self.fpr.xmm5 = arg, - 6 => self.fpr.xmm6 = arg, - 7 => self.fpr.xmm7 = arg, - _ => panic!("unexpected fp register index {}", ix), - } - } -} - -/// Errors that may arise when working with contexts. -#[derive(Debug, Error)] -pub enum Error { - /// Raised when the bottom of the stack provided to `Context::init` is not 16-byte aligned - #[error("context initialized with unaligned stack")] - UnalignedStack, -} - -/// Check whether the bottom (highest address) of the stack is 16-byte aligned, as required by the -/// ABI. -fn stack_is_aligned(stack: &[u64]) -> bool { - let size = stack.len(); - let last_elt_addr = &stack[size - 1] as *const u64 as usize; - let bottom_addr = last_elt_addr + mem::size_of::(); - bottom_addr % 16 == 0 -} - -extern "C" { - /// Bootstraps arguments and calls the entrypoint via returning; implemented in assembly. - /// - /// Loads general-purpose arguments from the callee-saved registers in a `Context` to the - /// appropriate argument registers for the AMD64 ABI, and then returns to the entrypoint. - fn lucet_context_bootstrap(); - - /// Stores return values into the parent context, and then swaps to it; implemented in assembly. - /// - /// This is where the entrypoint function returns to, so that we swap back to the parent on - /// return. - fn lucet_context_backstop(); - - /// Saves the current context and performs the context switch. Implemented in assembly. - fn lucet_context_swap(from: *mut Context, to: *mut Context); - - /// Performs the context switch; implemented in assembly. - /// - /// Never returns because the current context is discarded. - pub(crate) fn lucet_context_set(to: *const Context) -> !; - - /// Runs an entry callback after performing a context switch. Implemented in assembly. - /// - /// In practice, this is used with `enter_guest_region` so that the guest will appropriately - /// set itself to be terminable upon entry before continuing to any guest code. - /// - /// `lucet_context_activate` is essentially a function with three arguments: - /// * rdi: the data for the entry callback. - /// * rsi: the address of the entry callback. - /// * rbx: the address of the guest code to execute. - /// - /// We do not actually define `lucet_context_activate` as having these arguments because we - /// manually load these arguments, as well as a pointer to this function, into the context's - /// registers. See `Instance::with_activation_routine` for more information. - /// - /// Note that `rbx` is used to store the address of the guest code because it is a callee-saved - /// register in the System V calling convention. It is also a non-violatile register on - /// Windows, which is a nice additional benefit. - /// - /// For more information, see `Instance::swap_and_return`, `Instance::with_activation_routine`, - /// `enter_guest_region`, and `lucet_context_activate`'s assembly implementation. - pub(crate) fn lucet_context_activate(); -} +mod sysdep; +pub(crate) use sysdep::arch_impl::GpRegs as GpRegs; +pub(crate) use sysdep::arch_impl::Context as Context; +pub(crate) use sysdep::arch_impl::Error as Error; +pub(crate) use sysdep::arch_impl::lucet_context_set as lucet_context_set; +pub(crate) use sysdep::arch_impl::lucet_context_activate as lucet_context_activate; diff --git a/lucet-runtime/lucet-runtime-internals/src/context/sysdep/i686/context_asm.S b/lucet-runtime/lucet-runtime-internals/src/context/sysdep/i686/context_asm.S new file mode 100644 index 000000000..21460418b --- /dev/null +++ b/lucet-runtime/lucet-runtime-internals/src/context/sysdep/i686/context_asm.S @@ -0,0 +1,226 @@ +/* + The lucet_context_swap function is taken from Xudong Huang's + generator-rs project. Its MIT license is provided below. + + Copyright (c) 2017 Xudong Huang + + Permission is hereby granted, free of charge, to any + person obtaining a copy of this software and associated + documentation files (the "Software"), to deal in the + Software without restriction, including without + limitation the rights to use, copy, modify, merge, + publish, distribute, sublicense, and/or sell copies of + the Software, and to permit persons to whom the Software + is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice + shall be included in all copies or substantial portions + of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF + ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED + TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT + SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR + IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + +*/ + +.text +.globl lucet_context_bootstrap +#ifdef __ELF__ +.type lucet_context_bootstrap,@function +#else +.globl _lucet_context_bootstrap +#endif +.align 16 +lucet_context_bootstrap: +_lucet_context_bootstrap: + // Move each of the argument values into the corresponding call + // argument register. +// pop %r9 +// pop %r8 + pop %ecx + pop %edx + pop %esi + pop %edi + + /* the next thing on the stack is the guest function - return to it */ + ret +#ifdef __ELF__ +.size lucet_context_bootstrap,.-lucet_context_bootstrap +#endif + +.text +.globl lucet_context_backstop +#ifdef __ELF__ +.type lucet_context_backstop,@function +#else +.globl _lucet_context_backstop +#endif +.align 16 +lucet_context_backstop: +_lucet_context_backstop: + // Note that `rbp` here really has no relation to any stack! + // Instead, it's a pointer to the guest context. + mov (10*8 + 8*16 + 8*2 + 16)(%ebp), %edi /* load the parent context to forward values in return value registers */ + mov %eax, (10*8 + 8*16 + 8*0)(%ebp) /* store return values before swapping back -- offset is offsetof(struct lucet_context, retvals) */ + mov %edx, (10*8 + 8*16 + 8*1)(%ebp) + movdqu %xmm0, (10*8 + 8*16 + 8*2)(%ebp) /* floating-point return value */ + + // load `backstop_callback`, but skip calling it if it's null + mov (10*8 + 8*16 + 8*2 + 16 + 8)(%ebp), %esi + test %esi, %esi +#ifdef __ELF__ + jz no_backstop_callback@PLT +#else + jz no_backstop_callback +#endif + + // load `callback_data`, arg 1 + mov (10*8 + 8*16 + 8*2 + 16 + 8 + 8)(%ebp), %edi + // call `backstop_callback` + call *%esi + +no_backstop_callback: + mov %ebp, %edi /* load the guest context to the "from" argument */ + mov (10*8 + 8*16 + 8*2 + 16)(%ebp), %esi /* load the parent context to the "to" argument */ + +#ifdef __ELF__ + jmp lucet_context_swap@PLT +#else + jmp lucet_context_swap +#endif +#ifdef __ELF__ +.size lucet_context_backstop,.-lucet_context_backstop +#endif + +.text +.globl lucet_context_swap +#ifdef __ELF__ +.type lucet_context_swap,@function +#else +.globl _lucet_context_swap +#endif +.align 16 +lucet_context_swap: +_lucet_context_swap: + // store everything in offsets from rdi (1st arg) + mov %ebx, (0*8)(%edi) + mov %esp, (1*8)(%edi) + mov %ebp, (2*8)(%edi) + mov %edi, (3*8)(%edi) +// mov %r12, (4*8)(%rdi) +// mov %r13, (5*8)(%rdi) +// mov %r14, (6*8)(%rdi) +// mov %r15, (7*8)(%rdi) + mov %esi, (8*8)(%edi) + + movdqu %xmm0, (10*8 + 0*16)(%edi) + movdqu %xmm1, (10*8 + 1*16)(%edi) + movdqu %xmm2, (10*8 + 2*16)(%edi) + movdqu %xmm3, (10*8 + 3*16)(%edi) + movdqu %xmm4, (10*8 + 4*16)(%edi) + movdqu %xmm5, (10*8 + 5*16)(%edi) + movdqu %xmm6, (10*8 + 6*16)(%edi) + movdqu %xmm7, (10*8 + 7*16)(%edi) + + // load everything from offsets from rsi (2nd arg) + mov (0*8)(%esi), %ebx + mov (1*8)(%esi), %esp + mov (2*8)(%esi), %ebp + mov (3*8)(%esi), %edi +// mov (4*8)(%rsi), %r12 +// mov (5*8)(%rsi), %r13 +// mov (6*8)(%rsi), %r14 +// mov (7*8)(%rsi), %r15 + + movdqu (10*8 + 0*16)(%esi), %xmm0 + movdqu (10*8 + 1*16)(%esi), %xmm1 + movdqu (10*8 + 2*16)(%esi), %xmm2 + movdqu (10*8 + 3*16)(%esi), %xmm3 + movdqu (10*8 + 4*16)(%esi), %xmm4 + movdqu (10*8 + 5*16)(%esi), %xmm5 + movdqu (10*8 + 6*16)(%esi), %xmm6 + movdqu (10*8 + 7*16)(%esi), %xmm7 + + // restore rsi when we're done with the context pointer + mov (8*8)(%esi), %esi + + ret +#ifdef __ELF__ +.size lucet_context_swap,.-lucet_context_swap +#endif + +.text +.globl lucet_context_set +#ifdef __ELF__ +.type lucet_context_set,@function +#else +.globl _lucet_context_set +#endif +.align 16 +lucet_context_set: +_lucet_context_set: + // load everything from offsets from rdi (1st arg) + mov (0*8)(%edi), %ebx + mov (1*8)(%edi), %esp + mov (2*8)(%edi), %ebp +// mov (4*8)(%edi), %r12 +// mov (5*8)(%edi), %r13 +// mov (6*8)(%edi), %r14 +// mov (7*8)(%edi), %r15 + mov (8*8)(%edi), %esi + + movdqu (10*8 + 0*16)(%edi), %xmm0 + movdqu (10*8 + 1*16)(%edi), %xmm1 + movdqu (10*8 + 2*16)(%edi), %xmm2 + movdqu (10*8 + 3*16)(%edi), %xmm3 + movdqu (10*8 + 4*16)(%edi), %xmm4 + movdqu (10*8 + 5*16)(%edi), %xmm5 + movdqu (10*8 + 6*16)(%edi), %xmm6 + movdqu (10*8 + 7*16)(%edi), %xmm7 + + // load rdi from itself last + mov (3*8)(%edi), %edi + ret +#ifdef __ELF__ +.size lucet_context_set,.-lucet_context_set +#endif + +.text +.globl lucet_context_activate +#ifdef __ELF__ +.type lucet_context_activate,@function +#else +.globl _lucet_context_activate +#endif +.align 16 +// `lucet_context_activate` is essentially a function with three arguments: +// * rdi: the data for the entry callback. +// * rsi: the address of the entry callback. +// * rbx: the address of the guest code to execute. +// +// See `lucet_runtime_internals::context::lucet_context_activate` for more info. +// +// Note that `rbx` is used to store the address of the guest code because it is +// a callee-saved register in the System V calling convention. It is also a +// non-violatile register on Windows, which is a nice benefit. +lucet_context_activate: +_lucet_context_activate: + // First, we call the entry callback whose address is stored in `rsi`, + // passing along the value of `rdi` as the first argument. + call *%esi + // Now, jump to the guest code at the address in `rbx`. + jmp *%ebx +#ifdef __ELF__ +.size lucet_context_activate,.-lucet_context_activate +#endif + +/* Mark that we don't need executable stack. */ +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/lucet-runtime/lucet-runtime-internals/src/context/sysdep/i686/mod.rs b/lucet-runtime/lucet-runtime-internals/src/context/sysdep/i686/mod.rs new file mode 100644 index 000000000..b780b20b3 --- /dev/null +++ b/lucet-runtime/lucet-runtime-internals/src/context/sysdep/i686/mod.rs @@ -0,0 +1,730 @@ +#![allow(improper_ctypes)] + +#[cfg(test)] +mod tests; + +use crate::instance::Instance; +use crate::val::{val_to_reg, val_to_stack, RegVal, UntypedRetVal, Val}; + +use std::arch::x86::{__m128, _mm_setzero_ps}; +use std::ptr::NonNull; +use std::{mem, ptr}; +use thiserror::Error; + +/// Callee-saved general-purpose registers in the AMD64 ABI. +/// +/// # Layout +/// +/// `repr(C)` is required to preserve the ordering of members, which are read by the assembly at +/// hard-coded offsets. +/// +/// # TODOs +/// +/// - Unlike the C code, this doesn't use the `packed` repr due to warnings in the Nomicon: +/// . Since the members are all +/// `u64`, this should be fine? +#[repr(C)] +pub(crate) struct GpRegs { + pub(crate) ebx: u32, + pub(crate) esp: u32, + ebp: u32, + pub(crate) edi: u32, + pub(crate) esi: u32, + pub(crate) fill: u32, // TODO: iirc this needs to be an even number of items? +} + +impl GpRegs { + fn new() -> Self { + GpRegs { + ebx: 0, + esp: 0, + ebp: 0, + edi: 0, + esi: 0, + fill: 0, + } + } +} + +/// Floating-point argument registers in the AMD64 ABI. +/// +/// # Layout +/// +/// `repr(C)` is required to preserve the ordering of members, which are read by the assembly at +/// hard-coded offsets. +/// +/// # TODOs +/// +/// - Unlike the C code, this doesn't use the `packed` repr due to warnings in the Nomicon: +/// . Since the members are all +/// `__m128`, this should be fine? +#[repr(C)] +struct FpRegs { + xmm0: __m128, + xmm1: __m128, + xmm2: __m128, + xmm3: __m128, + xmm4: __m128, + xmm5: __m128, + xmm6: __m128, + xmm7: __m128, +} + +impl FpRegs { + fn new() -> Self { + let zero = unsafe { _mm_setzero_ps() }; + FpRegs { + xmm0: zero, + xmm1: zero, + xmm2: zero, + xmm3: zero, + xmm4: zero, + xmm5: zero, + xmm6: zero, + xmm7: zero, + } + } +} + +/// Everything we need to make a context switch: a signal mask, and the registers and return values +/// that are manipulated directly by assembly code. +/// +/// A context also tracks which other context to swap back to if a child context's entrypoint function +/// returns, and can optionally contain a callback function to be run just before that swap occurs. +/// +/// # Layout +/// +/// The `repr(C)` and order of fields in this struct are very important, as the assembly code reads +/// and writes hard-coded offsets from the base of the struct. Without `repr(C)`, Rust is free to +/// reorder the fields. +/// +/// Contexts are also `repr(align(64))` in order to align to cache lines and minimize contention +/// when running multiple threads. +/// +/// # Movement +/// +/// `Context` values must not be moved once they've been initialized. Contexts contain a pointer to +/// their stack, which in turn contains a pointer back to the context. If the context gets moved, +/// that pointer becomes invalid, and the behavior of returning from that context becomes undefined. +#[repr(C, align(64))] +pub struct Context { + pub(crate) gpr: GpRegs, + fpr: FpRegs, + retvals_gp: [u32; 2], + retval_fp: __m128, + parent_ctx: *mut Context, + // TODO ACF 2019-10-23: make Instance into a generic parameter? + backstop_callback: *const unsafe extern "C" fn(*mut Instance), + callback_data: *mut Instance, +} + +impl Context { + /// Create an all-zeroed `Context`. + pub fn new() -> Self { + Context { + gpr: GpRegs::new(), + fpr: FpRegs::new(), + retvals_gp: [0; 2], + retval_fp: unsafe { _mm_setzero_ps() }, + parent_ctx: ptr::null_mut(), + backstop_callback: Context::default_backstop_callback as *const _, + callback_data: ptr::null_mut(), + } + } + + /// Get a raw pointer to the instance's callback data. + pub(crate) fn callback_data_ptr(&self) -> *mut Instance { + self.callback_data + } +} + +/// A wrapper around a `Context`, primarily meant for use in test code. +/// +/// Users of this library interact with contexts implicitly via `Instance` values, but for testing +/// the context code independently, it is helpful to use contexts directly. +/// +/// # Movement of `ContextHandle` +/// +/// `ContextHandle` keeps a pointer to a `Context` rather than keeping all of the data directly as +/// fields in order to have better control over where that data lives in memory. We always want that +/// data to be heap-allocated, and to never move once it has been initialized. The `ContextHandle`, +/// by contrast, should be treated like a normal Rust value with no such restrictions. +/// +/// Until the `Unpin` marker trait arrives in stable Rust, it is difficult to enforce this with the +/// type system alone, so we use a bit of unsafety and (hopefully) clever API design to ensure that +/// the data cannot be moved. +/// +/// We create the `Context` within a box to allocate it on the heap, then convert it into a raw +/// pointer to relinquish ownership. When accessing the internal structure via the `DerefMut` trait, +/// data must not be moved out of the `Context` with functions like `mem::replace`. +/// +/// # Layout +/// +/// Foreign code accesses the `internal` pointer in tests, so it is important that it is the first +/// member, and that the struct is `repr(C)`. +#[repr(C)] +pub struct ContextHandle { + internal: NonNull, +} + +impl Drop for ContextHandle { + fn drop(&mut self) { + unsafe { + // create a box from the pointer so that it'll get dropped + // and we won't leak `Context`s + Box::from_raw(self.internal.as_ptr()); + } + } +} + +impl std::ops::Deref for ContextHandle { + type Target = Context; + fn deref(&self) -> &Self::Target { + unsafe { self.internal.as_ref() } + } +} + +impl std::ops::DerefMut for ContextHandle { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { self.internal.as_mut() } + } +} + +impl ContextHandle { + /// Create an all-zeroed `ContextHandle`. + pub fn new() -> Self { + let internal = NonNull::new(Box::into_raw(Box::new(Context::new()))) + .expect("Box::into_raw should never return NULL"); + ContextHandle { internal } + } + + pub fn create_and_init( + stack: &mut [u32], + fptr: usize, + args: &[Val], + ) -> Result { + let mut child = ContextHandle::new(); + Context::init(stack, &mut child, fptr, args)?; + Ok(child) + } +} + +struct CallStackBuilder<'a> { + offset: usize, + stack: &'a mut [u32], +} + +impl<'a> CallStackBuilder<'a> { + pub fn new(stack: &'a mut [u32]) -> Self { + CallStackBuilder { offset: 0, stack } + } + + fn push(&mut self, val: u32) { + self.offset += 1; + self.stack[self.stack.len() - self.offset] = val; + } + + /// Stores `args` onto the stack such that when a return address is written after, the + /// complete unit will be 8-byte aligned. + /// + /// TODO: dunno if this is necessary for any calling convention. just took 64-bit numbers and + /// cut em in half. + /// + /// That is to say, `args` will be padded such that the current top of stack is 4-byte + /// aligned. + fn store_args(&mut self, args: &[u32]) { + let items_end = args.len() + self.offset; + + if items_end % 2 == 1 { + // we need to add one entry just before the arguments so that the arguments start on an + // aligned address. + self.push(0); + } + + for arg in args.iter().rev() { + self.push(*arg); + } + } + + fn offset(&self) -> usize { + self.offset + } + + fn into_inner(self) -> (&'a mut [u32], usize) { + (self.stack, self.offset) + } +} + +impl Context { + /// Initialize a new child context. + /// + /// - `stack`: The stack for the child; *must be 16-byte aligned*. + /// + /// - `child`: The context for the child. The fields of this structure will be overwritten by + /// `init`. + /// + /// - `fptr`: A pointer to the entrypoint for the child. Note that while the type signature here + /// is for a void function of no arguments (equivalent to `void (*fptr)(void)` in C), the + /// entrypoint actually can be a function of any argument or return type that corresponds to a + /// `val::Val` variant. + /// + /// - `args`: A slice of arguments for the `fptr` entrypoint. These must match the number and + /// types of `fptr`'s actual arguments exactly, otherwise swapping to this context will cause + /// undefined behavior. + /// + /// # Errors + /// + /// - `Error::UnalignedStack` if the _end_ of `stack` is not 16-byte aligned. + /// + /// # Examples + /// + /// ## C entrypoint + /// + /// This example initializes a context that will start in a C function `entrypoint` when first + /// swapped to. + /// + /// ```c + /// void entrypoint(uint64_t x, float y); + /// ``` + /// + /// ```no_run + /// # use lucet_runtime_internals::context::Context; + /// # use lucet_runtime_internals::val::Val; + /// extern "C" { fn entrypoint(x: u64, y: f32); } + /// // allocating an even number of `u64`s seems to reliably yield + /// // properly aligned stacks, but TODO do better + /// let mut stack = vec![0u32; 1024].into_boxed_slice(); + /// let mut child = Context::new(); + /// let res = Context::init( + /// &mut *stack, + /// &mut child, + /// entrypoint as usize, + /// &[Val::U64(120), Val::F32(3.14)], + /// ); + /// assert!(res.is_ok()); + /// ``` + /// + /// ## Rust entrypoint + /// + /// This example initializes a context that will start in a Rust function `entrypoint` when + /// first swapped to. Note that we mark `entrypoint` as `extern "C"` to make sure it is compiled + /// with C calling conventions. + /// + /// ```no_run + /// # use lucet_runtime_internals::context::{Context, ContextHandle}; + /// # use lucet_runtime_internals::val::Val; + /// extern "C" fn entrypoint(x: u64, y: f32) { } + /// // allocating an even number of `u64`s seems to reliably yield + /// // properly aligned stacks, but TODO do better + /// let mut stack = vec![0u32; 1024].into_boxed_slice(); + /// let mut child = Context::new(); + /// let res = Context::init( + /// &mut *stack, + /// &mut child, + /// entrypoint as usize, + /// &[Val::U64(120), Val::F32(3.14)], + /// ); + /// assert!(res.is_ok()); + /// ``` + /// + /// # Implementation details + /// + /// This prepares a stack for the child context structured as follows, assuming an 0x1000 byte + /// stack: + /// ```text + /// 0x1000: +-------------------------+ + /// 0x0ff8: | NULL | // Null added if necessary for alignment. + /// 0x0ff0: | spilled_arg_1 | // Guest arguments follow. + /// 0x0fe8: | spilled_arg_2 | + /// 0x0fe0: ~ spilled_arg_3 ~ // The three arguments here are just for show. + /// 0x0fd8: | lucet_context_backstop | <-- This forms an ABI-matching call frame for fptr. + /// 0x0fd0: | fptr | <-- The actual guest code we want to run. + /// 0x0fc8: | lucet_context_bootstrap | <-- The guest stack pointer starts here. + /// 0x0fc0: | | + /// 0x0XXX: ~ ~ // Rest of the stack needs no preparation. + /// 0x0000: | | + /// +-------------------------+ + /// ``` + /// + /// This packing of data on the stack is interwoven with noteworthy constraints on what the + /// backstop may do: + /// * The backstop must not return on the guest stack. + /// - The next value will be a spilled argument or NULL. Neither are an intended address. + /// * The backstop cannot have ABI-conforming spilled arguments. + /// - No code runs between `fptr` and `lucet_context_backstop`, so nothing exists to + /// clean up `fptr`'s arguments. `lucet_context_backstop` would have to adjust the + /// stack pointer by a variable amount, and it does not, so `esp` will continue to + /// point to guest arguments. + /// - This is why bootstrap recieves arguments via ebp, pointing elsewhere on the stack. + /// + /// The bootstrap function must be careful, but is less constrained since it can clean up + /// and prepare a context for `fptr`. + pub fn init( + stack: &mut [u32], + child: &mut Context, + fptr: usize, + args: &[Val], + ) -> Result<(), Error> { + Context::init_with_callback( + stack, + child, + Context::default_backstop_callback, + ptr::null_mut(), + fptr, + args, + ) + } + + /// The default backstop callback does nothing, and is just a marker. + extern "C" fn default_backstop_callback(_: *mut Instance) {} + + /// Similar to `Context::init()`, but allows setting a callback function to be run when the + /// guest entrypoint returns. + /// + /// After the entrypoint function returns, but before swapping back to the parent context, + /// `backstop_callback` will be run with the single argument `callback_data`. + pub fn init_with_callback( + stack: &mut [u32], + child: &mut Context, + backstop_callback: unsafe extern "C" fn(*mut Instance), + callback_data: *mut Instance, + fptr: usize, + args: &[Val], + ) -> Result<(), Error> { + if !stack_is_aligned(stack) { + return Err(Error::UnalignedStack); + } + + if backstop_callback != Context::default_backstop_callback { + child.backstop_callback = backstop_callback as *const _; + child.callback_data = callback_data; + } + + let mut gp_args_ix = 0; + let mut fp_args_ix = 0; + let mut gp_regs_values = [0u32; 6]; + + let mut spilled_args = vec![]; + + for arg in args { + match val_to_reg(arg) { + RegVal::GpReg(v) => { + if gp_args_ix >= 6 { + spilled_args.push(val_to_stack(arg)); + } else { + gp_regs_values[gp_args_ix] = v; + gp_args_ix += 1; + } + } + RegVal::FpReg(v) => { + if fp_args_ix >= 8 { + spilled_args.push(val_to_stack(arg)); + } else { + child.bootstrap_fp_ix_arg(fp_args_ix, v); + fp_args_ix += 1; + } + } + } + } + + // set up an initial call stack for guests to bootstrap into and execute + let mut stack_builder = CallStackBuilder::new(stack); + + // we actually don't want to put an explicit pointer to these arguments anywhere. we'll + // line up the rest of the stack such that these are in argument position when we jump to + // `fptr`. + stack_builder.store_args(spilled_args.as_slice()); + + // the stack must be aligned in the environment we'll execute `fptr` from - this is an ABI + // requirement and can cause segfaults if not upheld. + assert_eq!( + stack_builder.offset() % 2, + 0, + "incorrect alignment for guest call frame" + ); + + // we execute the guest code via returns, so we make a "call stack" of routines like: + // -> lucet_context_backstop() + // -> fptr() + // -> lucet_context_bootstrap() + // + // with each address the start of the named function, so when the inner function + // completes it returns to begin the next function up. + stack_builder.push(lucet_context_backstop as u32); + stack_builder.push(fptr as u32); + + // add all general purpose arguments for the guest to be bootstrapped + for arg in gp_regs_values.iter() { + stack_builder.push(*arg); + } + + stack_builder.push(lucet_context_bootstrap as u32); + + let (stack, stack_start) = stack_builder.into_inner(); + + // Stack pointer: this points to the return address that will be used by `swap`, in place + // of the original (eg, in the host) return address. The return address this points to is + // the address of the first function to run on `swap`: `lucet_context_bootstrap`. + child.gpr.esp = &mut stack[stack.len() - stack_start] as *mut u32 as u32; + + // Base pointer: `ebp` will be saved through all guest code, and preserved for when we + // reach the backstop. This allows us to prepare an argument for `lucet_context_backstop` + // even at the entrypoint of the guest. + child.gpr.ebp = child as *const Context as u32; + + Ok(()) + } + + /// Save the current context, and swap to another context. + /// + /// - `from`: the current context is written here + /// - `to`: the context to read from and swap to + /// + /// The current registers, including the stack pointer, are saved to `from`. The current stack + /// pointer is then replaced by the value saved in `to.gpr.esp`, so when `swap` returns, it will + /// return to the pointer saved in `to`'s stack. + /// + /// If `to` was freshly initialized by passing it as the `child` argument to `init`, `swap` will + /// return to the function that bootstraps arguments and then calls the entrypoint that was + /// passed to `init`. + /// + /// If `to` was previously passed as the `from` argument to another call to `swap`, the program + /// will return as if from that _first_ call to `swap`. + /// + /// The address of `from` will be saved as `to.parent_ctx`. If `to` was initialized by `init`, + /// it will swap back to the `from` context when the entrypoint function returns via + /// `lucet_context_backstop`. + /// + /// # Safety + /// + /// The value in `to.gpr.esp` must be a valid pointer into the stack that was originally passed + /// to `init` when the `to` context was initialized, or to the original stack created implicitly + /// by Rust. + /// + /// The registers saved in the `to` context must match the arguments expected by the entrypoint + /// of the function passed to `init`, or be unaltered from when they were previously written by + /// `swap`. + /// + /// If `to` was initialized by `init`, the `from` context must not be moved, dropped, or + /// otherwise invalidated while in the `to` context unless `to`'s entrypoint function never + /// returns. + /// + /// If `from` is never returned to, `swap`ped to, or `set` to, resources could leak due to + /// implicit `drop`s never being called: + /// + /// ```no_run + /// # use lucet_runtime_internals::context::Context; + /// fn f(x: Box, child: &mut Context) { + /// let mut xs = vec![187; 410757864530]; + /// xs[0] += *x; + /// + /// // manually drop here to avoid leaks + /// drop(x); + /// drop(xs); + /// + /// let mut parent = Context::new(); + /// unsafe { Context::swap(&mut parent, child); } + /// // implicit `drop(x)` and `drop(xs)` here never get called unless we swap back + /// } + /// ``` + /// + /// # Examples + /// + /// The typical case is to initialize a new child context, and then swap to it from a zeroed + /// parent context. + /// + /// ```no_run + /// # use lucet_runtime_internals::context::Context; + /// # extern "C" fn entrypoint() {} + /// # let mut stack = vec![0u32; 1024].into_boxed_slice(); + /// let mut parent = Context::new(); + /// let mut child = Context::new(); + /// Context::init( + /// &mut stack, + /// &mut child, + /// entrypoint as usize, + /// &[], + /// ).unwrap(); + /// + /// unsafe { Context::swap(&mut parent, &mut child); } + /// ``` + #[inline] + pub unsafe fn swap(from: &mut Context, to: &mut Context) { + to.parent_ctx = from; + lucet_context_swap(from as *mut _, to as *mut _); + } + + /// Swap to another context without saving the current context. + /// + /// - `to`: the context to read from and swap to + /// + /// The current registers, including the stack pointer, are discarded. The current stack pointer + /// is then replaced by the value saved in `to.gpr.esp`, so when `swap` returns, it will return + /// to the pointer saved in `to`'s stack. + /// + /// If `to` was freshly initialized by passing it as the child to `init`, `swap` will return to + /// the function that bootstraps arguments and then calls the entrypoint that was passed to + /// `init`. + /// + /// If `to` was previously passed as the `from` argument to another call to `swap`, the program + /// will return as if from the call to `swap`. + /// + /// # Safety + /// + /// ## Stack and registers + /// + /// The value in `to.gpr.esp` must be a valid pointer into the stack that was originally passed + /// to `init` when the context was initialized, or to the original stack created implicitly by + /// Rust. + /// + /// The registers saved in `to` must match the arguments expected by the entrypoint of the + /// function passed to `init`, or be unaltered from when they were previously written by `swap`. + /// + /// ## Returning + /// + /// If `to` is a context freshly initialized by `init` (as opposed to a context populated only + /// by `swap`, such as a host context), at least one of the following must be true, otherwise + /// the program will return to a context with uninitialized registers: + /// + /// - The `fptr` argument to `init` is a function that never returns + /// + /// - A valid context must have been passed as the `from` argument to `swap` when entering the + /// current context before this call to `set` + /// + /// ## Resource leaks + /// + /// Since control flow will not return to the calling context, care must be taken to ensure that + /// any resources owned by the calling context are manually dropped. The implicit `drop`s + /// inserted by Rust at the end of the calling scope will not be reached: + /// + /// ```no_run + /// # use lucet_runtime_internals::context::Context; + /// fn f(x: Box, child: &Context) { + /// let mut xs = vec![187; 410757864530]; + /// xs[0] += *x; + /// + /// // manually drop here to avoid leaks + /// drop(x); + /// drop(xs); + /// + /// unsafe { Context::set(child); } + /// // implicit `drop(x)` and `drop(xs)` here never get called + /// } + /// ``` + #[inline] + pub unsafe fn set(to: &Context) -> ! { + lucet_context_set(to as *const Context); + } + + /// Clear (zero) return values. + pub fn clear_retvals(&mut self) { + self.retvals_gp = [0; 2]; + let zero = unsafe { _mm_setzero_ps() }; + self.retval_fp = zero; + } + + /// Get the general-purpose return value at index `idx`. + /// + /// If this method is called before the context has returned from its original entrypoint, the + /// result will be `0`. + pub fn get_retval_gp(&self, idx: usize) -> u32 { + self.retvals_gp[idx] + } + + /// Get the floating point return value. + /// + /// If this method is called before the context has returned from its original entrypoint, the + /// result will be `0.0`. + pub fn get_retval_fp(&self) -> __m128 { + self.retval_fp + } + + /// Get the return value as an `UntypedRetVal`. + /// + /// This combines the 0th general-purpose return value, and the single floating-point return value. + pub fn get_untyped_retval(&self) -> UntypedRetVal { + let gp = self.get_retval_gp(0); + let fp = self.get_retval_fp(); + UntypedRetVal::new(gp, fp) + } + + /// Put one of the first 8 floating-point arguments into a `Context` register. + /// + /// - `ix`: ABI floating-point argument number + /// - `arg`: argument value + fn bootstrap_fp_ix_arg(&mut self, ix: usize, arg: __m128) { + match ix { + 0 => self.fpr.xmm0 = arg, + 1 => self.fpr.xmm1 = arg, + 2 => self.fpr.xmm2 = arg, + 3 => self.fpr.xmm3 = arg, + 4 => self.fpr.xmm4 = arg, + 5 => self.fpr.xmm5 = arg, + 6 => self.fpr.xmm6 = arg, + 7 => self.fpr.xmm7 = arg, + _ => panic!("unexpected fp register index {}", ix), + } + } +} + +/// Errors that may arise when working with contexts. +#[derive(Debug, Error)] +pub enum Error { + /// Raised when the bottom of the stack provided to `Context::init` is not 16-byte aligned + #[error("context initialized with unaligned stack")] + UnalignedStack, +} + +/// TODO: is this necessary? is abi real?? does abi even align +/// Check whether the bottom (highest address) of the stack is 16-byte aligned, as required by the +/// ABI. +fn stack_is_aligned(stack: &[u32]) -> bool { + let size = stack.len(); + let last_elt_addr = &stack[size - 1] as *const u32 as usize; + let bottom_addr = last_elt_addr + mem::size_of::(); + bottom_addr % 8 == 0 +} + +extern "C" { + /// Bootstraps arguments and calls the entrypoint via returning; implemented in assembly. + /// + /// Loads general-purpose arguments from the callee-saved registers in a `Context` to the + /// appropriate argument registers for the AMD64 ABI, and then returns to the entrypoint. + fn lucet_context_bootstrap(); + + /// Stores return values into the parent context, and then swaps to it; implemented in assembly. + /// + /// This is where the entrypoint function returns to, so that we swap back to the parent on + /// return. + fn lucet_context_backstop(); + + /// Saves the current context and performs the context switch. Implemented in assembly. + fn lucet_context_swap(from: *mut Context, to: *mut Context); + + /// Performs the context switch; implemented in assembly. + /// + /// Never returns because the current context is discarded. + pub(crate) fn lucet_context_set(to: *const Context) -> !; + + /// Runs an entry callback after performing a context switch. Implemented in assembly. + /// + /// In practice, this is used with `enter_guest_region` so that the guest will appropriately + /// set itself to be terminable upon entry before continuing to any guest code. + /// + /// `lucet_context_activate` is essentially a function with three arguments: + /// * rdi: the data for the entry callback. + /// * rsi: the address of the entry callback. + /// * rbx: the address of the guest code to execute. + /// + /// We do not actually define `lucet_context_activate` as having these arguments because we + /// manually load these arguments, as well as a pointer to this function, into the context's + /// registers. See `Instance::with_activation_routine` for more information. + /// + /// Note that `rbx` is used to store the address of the guest code because it is a callee-saved + /// register in the System V calling convention. It is also a non-violatile register on + /// Windows, which is a nice additional benefit. + /// + /// For more information, see `Instance::swap_and_return`, `Instance::with_activation_routine`, + /// `enter_guest_region`, and `lucet_context_activate`'s assembly implementation. + pub(crate) fn lucet_context_activate(); +} diff --git a/lucet-runtime/lucet-runtime-internals/src/context/sysdep/mod.rs b/lucet-runtime/lucet-runtime-internals/src/context/sysdep/mod.rs new file mode 100644 index 000000000..05a6ffa12 --- /dev/null +++ b/lucet-runtime/lucet-runtime-internals/src/context/sysdep/mod.rs @@ -0,0 +1,11 @@ +use cfg_if::cfg_if; + +cfg_if! { + if #[cfg(target_arch = "x86")] { + pub mod i686; + pub use i686 as arch_impl; + } else if #[cfg(target_arch = "x86_64")] { + pub mod x86_64; + pub use x86_64 as arch_impl; + } +} diff --git a/lucet-runtime/lucet-runtime-internals/src/context/context_asm.S b/lucet-runtime/lucet-runtime-internals/src/context/sysdep/x86_64/context_asm.S similarity index 100% rename from lucet-runtime/lucet-runtime-internals/src/context/context_asm.S rename to lucet-runtime/lucet-runtime-internals/src/context/sysdep/x86_64/context_asm.S diff --git a/lucet-runtime/lucet-runtime-internals/src/context/sysdep/x86_64/mod.rs b/lucet-runtime/lucet-runtime-internals/src/context/sysdep/x86_64/mod.rs new file mode 100644 index 000000000..a6f3b13b3 --- /dev/null +++ b/lucet-runtime/lucet-runtime-internals/src/context/sysdep/x86_64/mod.rs @@ -0,0 +1,732 @@ +#![allow(improper_ctypes)] + +#[cfg(test)] +mod tests; + +use crate::instance::Instance; +use crate::val::{val_to_reg, val_to_stack, RegVal, UntypedRetVal, Val}; + +use std::arch::x86_64::{__m128, _mm_setzero_ps}; +use std::ptr::NonNull; +use std::{mem, ptr}; +use thiserror::Error; + +/// Callee-saved general-purpose registers in the AMD64 ABI. +/// +/// # Layout +/// +/// `repr(C)` is required to preserve the ordering of members, which are read by the assembly at +/// hard-coded offsets. +/// +/// # TODOs +/// +/// - Unlike the C code, this doesn't use the `packed` repr due to warnings in the Nomicon: +/// . Since the members are all +/// `u64`, this should be fine? +#[repr(C)] +pub(crate) struct GpRegs { + pub(crate) rbx: u64, + pub(crate) rsp: u64, + rbp: u64, + pub(crate) rdi: u64, + r12: u64, + r13: u64, + r14: u64, + r15: u64, + pub(crate) rsi: u64, +} + +impl GpRegs { + fn new() -> Self { + GpRegs { + rbx: 0, + rsp: 0, + rbp: 0, + rdi: 0, + r12: 0, + r13: 0, + r14: 0, + r15: 0, + rsi: 0, + } + } +} + +/// Floating-point argument registers in the AMD64 ABI. +/// +/// # Layout +/// +/// `repr(C)` is required to preserve the ordering of members, which are read by the assembly at +/// hard-coded offsets. +/// +/// # TODOs +/// +/// - Unlike the C code, this doesn't use the `packed` repr due to warnings in the Nomicon: +/// . Since the members are all +/// `__m128`, this should be fine? +#[repr(C)] +struct FpRegs { + xmm0: __m128, + xmm1: __m128, + xmm2: __m128, + xmm3: __m128, + xmm4: __m128, + xmm5: __m128, + xmm6: __m128, + xmm7: __m128, +} + +impl FpRegs { + fn new() -> Self { + let zero = unsafe { _mm_setzero_ps() }; + FpRegs { + xmm0: zero, + xmm1: zero, + xmm2: zero, + xmm3: zero, + xmm4: zero, + xmm5: zero, + xmm6: zero, + xmm7: zero, + } + } +} + +/// Everything we need to make a context switch: a signal mask, and the registers and return values +/// that are manipulated directly by assembly code. +/// +/// A context also tracks which other context to swap back to if a child context's entrypoint function +/// returns, and can optionally contain a callback function to be run just before that swap occurs. +/// +/// # Layout +/// +/// The `repr(C)` and order of fields in this struct are very important, as the assembly code reads +/// and writes hard-coded offsets from the base of the struct. Without `repr(C)`, Rust is free to +/// reorder the fields. +/// +/// Contexts are also `repr(align(64))` in order to align to cache lines and minimize contention +/// when running multiple threads. +/// +/// # Movement +/// +/// `Context` values must not be moved once they've been initialized. Contexts contain a pointer to +/// their stack, which in turn contains a pointer back to the context. If the context gets moved, +/// that pointer becomes invalid, and the behavior of returning from that context becomes undefined. +#[repr(C, align(64))] +pub struct Context { + pub(crate) gpr: GpRegs, + fpr: FpRegs, + retvals_gp: [u64; 2], + retval_fp: __m128, + parent_ctx: *mut Context, + // TODO ACF 2019-10-23: make Instance into a generic parameter? + backstop_callback: *const unsafe extern "C" fn(*mut Instance), + callback_data: *mut Instance, +} + +impl Context { + /// Create an all-zeroed `Context`. + pub fn new() -> Self { + Context { + gpr: GpRegs::new(), + fpr: FpRegs::new(), + retvals_gp: [0; 2], + retval_fp: unsafe { _mm_setzero_ps() }, + parent_ctx: ptr::null_mut(), + backstop_callback: Context::default_backstop_callback as *const _, + callback_data: ptr::null_mut(), + } + } + + /// Get a raw pointer to the instance's callback data. + pub(crate) fn callback_data_ptr(&self) -> *mut Instance { + self.callback_data + } +} + +/// A wrapper around a `Context`, primarily meant for use in test code. +/// +/// Users of this library interact with contexts implicitly via `Instance` values, but for testing +/// the context code independently, it is helpful to use contexts directly. +/// +/// # Movement of `ContextHandle` +/// +/// `ContextHandle` keeps a pointer to a `Context` rather than keeping all of the data directly as +/// fields in order to have better control over where that data lives in memory. We always want that +/// data to be heap-allocated, and to never move once it has been initialized. The `ContextHandle`, +/// by contrast, should be treated like a normal Rust value with no such restrictions. +/// +/// Until the `Unpin` marker trait arrives in stable Rust, it is difficult to enforce this with the +/// type system alone, so we use a bit of unsafety and (hopefully) clever API design to ensure that +/// the data cannot be moved. +/// +/// We create the `Context` within a box to allocate it on the heap, then convert it into a raw +/// pointer to relinquish ownership. When accessing the internal structure via the `DerefMut` trait, +/// data must not be moved out of the `Context` with functions like `mem::replace`. +/// +/// # Layout +/// +/// Foreign code accesses the `internal` pointer in tests, so it is important that it is the first +/// member, and that the struct is `repr(C)`. +#[repr(C)] +pub struct ContextHandle { + internal: NonNull, +} + +impl Drop for ContextHandle { + fn drop(&mut self) { + unsafe { + // create a box from the pointer so that it'll get dropped + // and we won't leak `Context`s + Box::from_raw(self.internal.as_ptr()); + } + } +} + +impl std::ops::Deref for ContextHandle { + type Target = Context; + fn deref(&self) -> &Self::Target { + unsafe { self.internal.as_ref() } + } +} + +impl std::ops::DerefMut for ContextHandle { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { self.internal.as_mut() } + } +} + +impl ContextHandle { + /// Create an all-zeroed `ContextHandle`. + pub fn new() -> Self { + let internal = NonNull::new(Box::into_raw(Box::new(Context::new()))) + .expect("Box::into_raw should never return NULL"); + ContextHandle { internal } + } + + pub fn create_and_init( + stack: &mut [u64], + fptr: usize, + args: &[Val], + ) -> Result { + let mut child = ContextHandle::new(); + Context::init(stack, &mut child, fptr, args)?; + Ok(child) + } +} + +struct CallStackBuilder<'a> { + offset: usize, + stack: &'a mut [u64], +} + +impl<'a> CallStackBuilder<'a> { + pub fn new(stack: &'a mut [u64]) -> Self { + CallStackBuilder { offset: 0, stack } + } + + fn push(&mut self, val: u64) { + self.offset += 1; + self.stack[self.stack.len() - self.offset] = val; + } + + /// Stores `args` onto the stack such that when a return address is written after, the + /// complete unit will be 16-byte aligned, as the x86_64 ABI requires. + /// + /// That is to say, `args` will be padded such that the current top of stack is 8-byte + /// aligned. + fn store_args(&mut self, args: &[u64]) { + let items_end = args.len() + self.offset; + + if items_end % 2 == 1 { + // we need to add one entry just before the arguments so that the arguments start on an + // aligned address. + self.push(0); + } + + for arg in args.iter().rev() { + self.push(*arg); + } + } + + fn offset(&self) -> usize { + self.offset + } + + fn into_inner(self) -> (&'a mut [u64], usize) { + (self.stack, self.offset) + } +} + +impl Context { + /// Initialize a new child context. + /// + /// - `stack`: The stack for the child; *must be 16-byte aligned*. + /// + /// - `child`: The context for the child. The fields of this structure will be overwritten by + /// `init`. + /// + /// - `fptr`: A pointer to the entrypoint for the child. Note that while the type signature here + /// is for a void function of no arguments (equivalent to `void (*fptr)(void)` in C), the + /// entrypoint actually can be a function of any argument or return type that corresponds to a + /// `val::Val` variant. + /// + /// - `args`: A slice of arguments for the `fptr` entrypoint. These must match the number and + /// types of `fptr`'s actual arguments exactly, otherwise swapping to this context will cause + /// undefined behavior. + /// + /// # Errors + /// + /// - `Error::UnalignedStack` if the _end_ of `stack` is not 16-byte aligned. + /// + /// # Examples + /// + /// ## C entrypoint + /// + /// This example initializes a context that will start in a C function `entrypoint` when first + /// swapped to. + /// + /// ```c + /// void entrypoint(uint64_t x, float y); + /// ``` + /// + /// ```no_run + /// # use lucet_runtime_internals::context::Context; + /// # use lucet_runtime_internals::val::Val; + /// extern "C" { fn entrypoint(x: u64, y: f32); } + /// // allocating an even number of `u64`s seems to reliably yield + /// // properly aligned stacks, but TODO do better + /// let mut stack = vec![0u64; 1024].into_boxed_slice(); + /// let mut child = Context::new(); + /// let res = Context::init( + /// &mut *stack, + /// &mut child, + /// entrypoint as usize, + /// &[Val::U64(120), Val::F32(3.14)], + /// ); + /// assert!(res.is_ok()); + /// ``` + /// + /// ## Rust entrypoint + /// + /// This example initializes a context that will start in a Rust function `entrypoint` when + /// first swapped to. Note that we mark `entrypoint` as `extern "C"` to make sure it is compiled + /// with C calling conventions. + /// + /// ```no_run + /// # use lucet_runtime_internals::context::{Context, ContextHandle}; + /// # use lucet_runtime_internals::val::Val; + /// extern "C" fn entrypoint(x: u64, y: f32) { } + /// // allocating an even number of `u64`s seems to reliably yield + /// // properly aligned stacks, but TODO do better + /// let mut stack = vec![0u64; 1024].into_boxed_slice(); + /// let mut child = Context::new(); + /// let res = Context::init( + /// &mut *stack, + /// &mut child, + /// entrypoint as usize, + /// &[Val::U64(120), Val::F32(3.14)], + /// ); + /// assert!(res.is_ok()); + /// ``` + /// + /// # Implementation details + /// + /// This prepares a stack for the child context structured as follows, assuming an 0x1000 byte + /// stack: + /// ```text + /// 0x1000: +-------------------------+ + /// 0x0ff8: | NULL | // Null added if necessary for alignment. + /// 0x0ff0: | spilled_arg_1 | // Guest arguments follow. + /// 0x0fe8: | spilled_arg_2 | + /// 0x0fe0: ~ spilled_arg_3 ~ // The three arguments here are just for show. + /// 0x0fd8: | lucet_context_backstop | <-- This forms an ABI-matching call frame for fptr. + /// 0x0fd0: | fptr | <-- The actual guest code we want to run. + /// 0x0fc8: | lucet_context_bootstrap | <-- The guest stack pointer starts here. + /// 0x0fc0: | | + /// 0x0XXX: ~ ~ // Rest of the stack needs no preparation. + /// 0x0000: | | + /// +-------------------------+ + /// ``` + /// + /// This packing of data on the stack is interwoven with noteworthy constraints on what the + /// backstop may do: + /// * The backstop must not return on the guest stack. + /// - The next value will be a spilled argument or NULL. Neither are an intended address. + /// * The backstop cannot have ABI-conforming spilled arguments. + /// - No code runs between `fptr` and `lucet_context_backstop`, so nothing exists to + /// clean up `fptr`'s arguments. `lucet_context_backstop` would have to adjust the + /// stack pointer by a variable amount, and it does not, so `rsp` will continue to + /// point to guest arguments. + /// - This is why bootstrap recieves arguments via rbp, pointing elsewhere on the stack. + /// + /// The bootstrap function must be careful, but is less constrained since it can clean up + /// and prepare a context for `fptr`. + pub fn init( + stack: &mut [u64], + child: &mut Context, + fptr: usize, + args: &[Val], + ) -> Result<(), Error> { + Context::init_with_callback( + stack, + child, + Context::default_backstop_callback, + ptr::null_mut(), + fptr, + args, + ) + } + + /// The default backstop callback does nothing, and is just a marker. + extern "C" fn default_backstop_callback(_: *mut Instance) {} + + /// Similar to `Context::init()`, but allows setting a callback function to be run when the + /// guest entrypoint returns. + /// + /// After the entrypoint function returns, but before swapping back to the parent context, + /// `backstop_callback` will be run with the single argument `callback_data`. + pub fn init_with_callback( + stack: &mut [u64], + child: &mut Context, + backstop_callback: unsafe extern "C" fn(*mut Instance), + callback_data: *mut Instance, + fptr: usize, + args: &[Val], + ) -> Result<(), Error> { + if !stack_is_aligned(stack) { + return Err(Error::UnalignedStack); + } + + if backstop_callback != Context::default_backstop_callback { + child.backstop_callback = backstop_callback as *const _; + child.callback_data = callback_data; + } + + let mut gp_args_ix = 0; + let mut fp_args_ix = 0; + let mut gp_regs_values = [0u64; 6]; + + let mut spilled_args = vec![]; + + for arg in args { + match val_to_reg(arg) { + RegVal::GpReg(v) => { + if gp_args_ix >= 6 { + spilled_args.push(val_to_stack(arg)); + } else { + gp_regs_values[gp_args_ix] = v; + gp_args_ix += 1; + } + } + RegVal::FpReg(v) => { + if fp_args_ix >= 8 { + spilled_args.push(val_to_stack(arg)); + } else { + child.bootstrap_fp_ix_arg(fp_args_ix, v); + fp_args_ix += 1; + } + } + } + } + + // set up an initial call stack for guests to bootstrap into and execute + let mut stack_builder = CallStackBuilder::new(stack); + + // we actually don't want to put an explicit pointer to these arguments anywhere. we'll + // line up the rest of the stack such that these are in argument position when we jump to + // `fptr`. + stack_builder.store_args(spilled_args.as_slice()); + + // the stack must be aligned in the environment we'll execute `fptr` from - this is an ABI + // requirement and can cause segfaults if not upheld. + assert_eq!( + stack_builder.offset() % 2, + 0, + "incorrect alignment for guest call frame" + ); + + // we execute the guest code via returns, so we make a "call stack" of routines like: + // -> lucet_context_backstop() + // -> fptr() + // -> lucet_context_bootstrap() + // + // with each address the start of the named function, so when the inner function + // completes it returns to begin the next function up. + stack_builder.push(lucet_context_backstop as u64); + stack_builder.push(fptr as u64); + + // add all general purpose arguments for the guest to be bootstrapped + for arg in gp_regs_values.iter() { + stack_builder.push(*arg); + } + + stack_builder.push(lucet_context_bootstrap as u64); + + let (stack, stack_start) = stack_builder.into_inner(); + + // Stack pointer: this points to the return address that will be used by `swap`, in place + // of the original (eg, in the host) return address. The return address this points to is + // the address of the first function to run on `swap`: `lucet_context_bootstrap`. + child.gpr.rsp = &mut stack[stack.len() - stack_start] as *mut u64 as u64; + + // Base pointer: `rbp` will be saved through all guest code, and preserved for when we + // reach the backstop. This allows us to prepare an argument for `lucet_context_backstop` + // even at the entrypoint of the guest. + child.gpr.rbp = child as *const Context as u64; + + Ok(()) + } + + /// Save the current context, and swap to another context. + /// + /// - `from`: the current context is written here + /// - `to`: the context to read from and swap to + /// + /// The current registers, including the stack pointer, are saved to `from`. The current stack + /// pointer is then replaced by the value saved in `to.gpr.rsp`, so when `swap` returns, it will + /// return to the pointer saved in `to`'s stack. + /// + /// If `to` was freshly initialized by passing it as the `child` argument to `init`, `swap` will + /// return to the function that bootstraps arguments and then calls the entrypoint that was + /// passed to `init`. + /// + /// If `to` was previously passed as the `from` argument to another call to `swap`, the program + /// will return as if from that _first_ call to `swap`. + /// + /// The address of `from` will be saved as `to.parent_ctx`. If `to` was initialized by `init`, + /// it will swap back to the `from` context when the entrypoint function returns via + /// `lucet_context_backstop`. + /// + /// # Safety + /// + /// The value in `to.gpr.rsp` must be a valid pointer into the stack that was originally passed + /// to `init` when the `to` context was initialized, or to the original stack created implicitly + /// by Rust. + /// + /// The registers saved in the `to` context must match the arguments expected by the entrypoint + /// of the function passed to `init`, or be unaltered from when they were previously written by + /// `swap`. + /// + /// If `to` was initialized by `init`, the `from` context must not be moved, dropped, or + /// otherwise invalidated while in the `to` context unless `to`'s entrypoint function never + /// returns. + /// + /// If `from` is never returned to, `swap`ped to, or `set` to, resources could leak due to + /// implicit `drop`s never being called: + /// + /// ```no_run + /// # use lucet_runtime_internals::context::Context; + /// fn f(x: Box, child: &mut Context) { + /// let mut xs = vec![187; 410757864530]; + /// xs[0] += *x; + /// + /// // manually drop here to avoid leaks + /// drop(x); + /// drop(xs); + /// + /// let mut parent = Context::new(); + /// unsafe { Context::swap(&mut parent, child); } + /// // implicit `drop(x)` and `drop(xs)` here never get called unless we swap back + /// } + /// ``` + /// + /// # Examples + /// + /// The typical case is to initialize a new child context, and then swap to it from a zeroed + /// parent context. + /// + /// ```no_run + /// # use lucet_runtime_internals::context::Context; + /// # extern "C" fn entrypoint() {} + /// # let mut stack = vec![0u64; 1024].into_boxed_slice(); + /// let mut parent = Context::new(); + /// let mut child = Context::new(); + /// Context::init( + /// &mut stack, + /// &mut child, + /// entrypoint as usize, + /// &[], + /// ).unwrap(); + /// + /// unsafe { Context::swap(&mut parent, &mut child); } + /// ``` + #[inline] + pub unsafe fn swap(from: &mut Context, to: &mut Context) { + to.parent_ctx = from; + lucet_context_swap(from as *mut _, to as *mut _); + } + + /// Swap to another context without saving the current context. + /// + /// - `to`: the context to read from and swap to + /// + /// The current registers, including the stack pointer, are discarded. The current stack pointer + /// is then replaced by the value saved in `to.gpr.rsp`, so when `swap` returns, it will return + /// to the pointer saved in `to`'s stack. + /// + /// If `to` was freshly initialized by passing it as the child to `init`, `swap` will return to + /// the function that bootstraps arguments and then calls the entrypoint that was passed to + /// `init`. + /// + /// If `to` was previously passed as the `from` argument to another call to `swap`, the program + /// will return as if from the call to `swap`. + /// + /// # Safety + /// + /// ## Stack and registers + /// + /// The value in `to.gpr.rsp` must be a valid pointer into the stack that was originally passed + /// to `init` when the context was initialized, or to the original stack created implicitly by + /// Rust. + /// + /// The registers saved in `to` must match the arguments expected by the entrypoint of the + /// function passed to `init`, or be unaltered from when they were previously written by `swap`. + /// + /// ## Returning + /// + /// If `to` is a context freshly initialized by `init` (as opposed to a context populated only + /// by `swap`, such as a host context), at least one of the following must be true, otherwise + /// the program will return to a context with uninitialized registers: + /// + /// - The `fptr` argument to `init` is a function that never returns + /// + /// - A valid context must have been passed as the `from` argument to `swap` when entering the + /// current context before this call to `set` + /// + /// ## Resource leaks + /// + /// Since control flow will not return to the calling context, care must be taken to ensure that + /// any resources owned by the calling context are manually dropped. The implicit `drop`s + /// inserted by Rust at the end of the calling scope will not be reached: + /// + /// ```no_run + /// # use lucet_runtime_internals::context::Context; + /// fn f(x: Box, child: &Context) { + /// let mut xs = vec![187; 410757864530]; + /// xs[0] += *x; + /// + /// // manually drop here to avoid leaks + /// drop(x); + /// drop(xs); + /// + /// unsafe { Context::set(child); } + /// // implicit `drop(x)` and `drop(xs)` here never get called + /// } + /// ``` + #[inline] + pub unsafe fn set(to: &Context) -> ! { + lucet_context_set(to as *const Context); + } + + /// Clear (zero) return values. + pub fn clear_retvals(&mut self) { + self.retvals_gp = [0; 2]; + let zero = unsafe { _mm_setzero_ps() }; + self.retval_fp = zero; + } + + /// Get the general-purpose return value at index `idx`. + /// + /// If this method is called before the context has returned from its original entrypoint, the + /// result will be `0`. + pub fn get_retval_gp(&self, idx: usize) -> u64 { + self.retvals_gp[idx] + } + + /// Get the floating point return value. + /// + /// If this method is called before the context has returned from its original entrypoint, the + /// result will be `0.0`. + pub fn get_retval_fp(&self) -> __m128 { + self.retval_fp + } + + /// Get the return value as an `UntypedRetVal`. + /// + /// This combines the 0th general-purpose return value, and the single floating-point return value. + pub fn get_untyped_retval(&self) -> UntypedRetVal { + let gp = self.get_retval_gp(0); + let fp = self.get_retval_fp(); + UntypedRetVal::new(gp, fp) + } + + /// Put one of the first 8 floating-point arguments into a `Context` register. + /// + /// - `ix`: ABI floating-point argument number + /// - `arg`: argument value + fn bootstrap_fp_ix_arg(&mut self, ix: usize, arg: __m128) { + match ix { + 0 => self.fpr.xmm0 = arg, + 1 => self.fpr.xmm1 = arg, + 2 => self.fpr.xmm2 = arg, + 3 => self.fpr.xmm3 = arg, + 4 => self.fpr.xmm4 = arg, + 5 => self.fpr.xmm5 = arg, + 6 => self.fpr.xmm6 = arg, + 7 => self.fpr.xmm7 = arg, + _ => panic!("unexpected fp register index {}", ix), + } + } +} + +/// Errors that may arise when working with contexts. +#[derive(Debug, Error)] +pub enum Error { + /// Raised when the bottom of the stack provided to `Context::init` is not 16-byte aligned + #[error("context initialized with unaligned stack")] + UnalignedStack, +} + +/// Check whether the bottom (highest address) of the stack is 16-byte aligned, as required by the +/// ABI. +fn stack_is_aligned(stack: &[u64]) -> bool { + let size = stack.len(); + let last_elt_addr = &stack[size - 1] as *const u64 as usize; + let bottom_addr = last_elt_addr + mem::size_of::(); + bottom_addr % 16 == 0 +} + +extern "C" { + /// Bootstraps arguments and calls the entrypoint via returning; implemented in assembly. + /// + /// Loads general-purpose arguments from the callee-saved registers in a `Context` to the + /// appropriate argument registers for the AMD64 ABI, and then returns to the entrypoint. + fn lucet_context_bootstrap(); + + /// Stores return values into the parent context, and then swaps to it; implemented in assembly. + /// + /// This is where the entrypoint function returns to, so that we swap back to the parent on + /// return. + fn lucet_context_backstop(); + + /// Saves the current context and performs the context switch. Implemented in assembly. + fn lucet_context_swap(from: *mut Context, to: *mut Context); + + /// Performs the context switch; implemented in assembly. + /// + /// Never returns because the current context is discarded. + pub(crate) fn lucet_context_set(to: *const Context) -> !; + + /// Runs an entry callback after performing a context switch. Implemented in assembly. + /// + /// In practice, this is used with `enter_guest_region` so that the guest will appropriately + /// set itself to be terminable upon entry before continuing to any guest code. + /// + /// `lucet_context_activate` is essentially a function with three arguments: + /// * rdi: the data for the entry callback. + /// * rsi: the address of the entry callback. + /// * rbx: the address of the guest code to execute. + /// + /// We do not actually define `lucet_context_activate` as having these arguments because we + /// manually load these arguments, as well as a pointer to this function, into the context's + /// registers. See `Instance::with_activation_routine` for more information. + /// + /// Note that `rbx` is used to store the address of the guest code because it is a callee-saved + /// register in the System V calling convention. It is also a non-violatile register on + /// Windows, which is a nice additional benefit. + /// + /// For more information, see `Instance::swap_and_return`, `Instance::with_activation_routine`, + /// `enter_guest_region`, and `lucet_context_activate`'s assembly implementation. + pub(crate) fn lucet_context_activate(); +} diff --git a/lucet-runtime/lucet-runtime-internals/src/instance.rs b/lucet-runtime/lucet-runtime-internals/src/instance.rs index 274bd7d08..ed2dc3045 100644 --- a/lucet-runtime/lucet-runtime-internals/src/instance.rs +++ b/lucet-runtime/lucet-runtime-internals/src/instance.rs @@ -977,7 +977,7 @@ impl Instance { let self_ptr = self as *mut _; Context::init_with_callback( - unsafe { self.alloc.stack_u64_mut() }, + unsafe { self.alloc.stack_words_mut() }, &mut self.ctx, execution::exit_guest_region, self_ptr, @@ -1009,17 +1009,38 @@ impl Instance { /// `execution::enter_guest_region` for more info. // TODO KTM 2020-03-13: This should be a method on `Context`. fn install_activator(&mut self) { - unsafe { - // Get a raw pointer to the top of the guest stack. - let top_of_stack = self.ctx.gpr.rsp as *mut u64; - // Move the guest code address to rbx, and then put the address of the activation thunk - // at the top of the stack, so that we will start execution at `enter_guest_region`. - self.ctx.gpr.rbx = *top_of_stack; - *top_of_stack = crate::context::lucet_context_activate as u64; - // Pass a pointer to our guest-side entrypoint bootstrap code in `rsi`, and then put - // its first argument (a raw pointer to `self`) in `rdi`. - self.ctx.gpr.rsi = execution::enter_guest_region as u64; - self.ctx.gpr.rdi = self.ctx.callback_data_ptr() as u64; + use cfg_if::cfg_if; + cfg_if! { + if #[cfg(target_arch = "x86")] { + unsafe { + // Get a raw pointer to the top of the guest stack. + let top_of_stack = self.ctx.gpr.esp as *mut u32; + // Move the guest code address to rbx, and then put the address of the activation thunk + // at the top of the stack, so that we will start execution at `enter_guest_region`. + self.ctx.gpr.ebx = *top_of_stack; + *top_of_stack = crate::context::lucet_context_activate as u32; + // Pass a pointer to our guest-side entrypoint bootstrap code in `rsi`, and then put + // its first argument (a raw pointer to `self`) in `rdi`. + self.ctx.gpr.esi = execution::enter_guest_region as u32; + self.ctx.gpr.edi = self.ctx.callback_data_ptr() as u32; + } + + } else if #[cfg(target_arch = "x86_64")] { + unsafe { + // Get a raw pointer to the top of the guest stack. + let top_of_stack = self.ctx.gpr.rsp as *mut u64; + // Move the guest code address to rbx, and then put the address of the activation thunk + // at the top of the stack, so that we will start execution at `enter_guest_region`. + self.ctx.gpr.rbx = *top_of_stack; + *top_of_stack = crate::context::lucet_context_activate as u64; + // Pass a pointer to our guest-side entrypoint bootstrap code in `rsi`, and then put + // its first argument (a raw pointer to `self`) in `rdi`. + self.ctx.gpr.rsi = execution::enter_guest_region as u64; + self.ctx.gpr.rdi = self.ctx.callback_data_ptr() as u64; + } + } else { + panic!("unsupported architecture"); + } } } diff --git a/lucet-runtime/lucet-runtime-internals/src/lib.rs b/lucet-runtime/lucet-runtime-internals/src/lib.rs index 6737f9b3a..cbb911673 100644 --- a/lucet-runtime/lucet-runtime-internals/src/lib.rs +++ b/lucet-runtime/lucet-runtime-internals/src/lib.rs @@ -24,7 +24,8 @@ pub mod lock_testpoints; pub mod module; pub mod region; pub mod sysdeps; -pub mod val; +pub mod archdeps; +pub use archdeps::val as val; pub mod vmctx; /// The size of a page in WebAssembly heaps. diff --git a/lucet-runtime/lucet-runtime-internals/src/sysdeps/linux.rs b/lucet-runtime/lucet-runtime-internals/src/sysdeps/linux.rs index da58e43cb..4d02d0755 100644 --- a/lucet-runtime/lucet-runtime-internals/src/sysdeps/linux.rs +++ b/lucet-runtime/lucet-runtime-internals/src/sysdeps/linux.rs @@ -1,4 +1,16 @@ -use libc::{c_void, ucontext_t, REG_RDI, REG_RIP}; +use libc::{c_void, ucontext_t}; +use cfg_if::cfg_if; +cfg_if! { + if #[cfg(target_arch = "x86")] { + use libc::{REG_EDI, REG_EIP}; + use REG_EDI as REG_DI; + use REG_EIP as REG_IP; + } else if #[cfg(target_arch = "x86_64")] { + use libc::{REG_RDI, REG_RIP}; + use REG_RDI as REG_DI; + use REG_RIP as REG_IP; + } +} #[derive(Clone, Copy, Debug)] pub struct UContextPtr(*mut ucontext_t); @@ -13,19 +25,19 @@ impl UContextPtr { #[inline] pub fn get_ip(self) -> *const c_void { let mcontext = &unsafe { self.0.as_ref().unwrap() }.uc_mcontext; - mcontext.gregs[REG_RIP as usize] as *const _ + mcontext.gregs[REG_IP as usize] as *const _ } #[inline] pub fn set_ip(self, new_ip: *const c_void) { let mut mcontext = &mut unsafe { self.0.as_mut().unwrap() }.uc_mcontext; - mcontext.gregs[REG_RIP as usize] = new_ip as i64; + mcontext.gregs[REG_IP as usize] = new_ip as _; } #[inline] pub fn set_rdi(self, new_rdi: u64) { let mut mcontext = &mut unsafe { self.0.as_mut().unwrap() }.uc_mcontext; - mcontext.gregs[REG_RDI as usize] = new_rdi as i64; + mcontext.gregs[REG_DI as usize] = new_rdi as _; } } diff --git a/lucet-runtime/lucet-runtime-tests/build.rs b/lucet-runtime/lucet-runtime-tests/build.rs index e1f7f62ea..9d2382bee 100644 --- a/lucet-runtime/lucet-runtime-tests/build.rs +++ b/lucet-runtime/lucet-runtime-tests/build.rs @@ -1,8 +1,18 @@ +use std::env; + fn main() { // TODO: this should only be built for tests, but Cargo doesn't // currently let you specify different build.rs options for tests: // + let traps_asm_file = match env::var("CARGO_CFG_TARGET_ARCH").unwrap().as_str() { + "x86_64" => "traps_x86_64.S", + "x86" => "traps_i686.S", + arch => { + panic!("unsupported architecture {}", arch); + } + }; + cc::Build::new() - .file("src/guest_fault/traps.S") - .compile("guest_fault_traps"); + .file(&format!("src/guest_fault/{}", traps_asm_file)) + .compile("context_context_asm"); } diff --git a/lucet-runtime/lucet-runtime-tests/src/guest_fault/traps_i686.S b/lucet-runtime/lucet-runtime-tests/src/guest_fault/traps_i686.S new file mode 100644 index 000000000..a6077f36e --- /dev/null +++ b/lucet-runtime/lucet-runtime-tests/src/guest_fault/traps_i686.S @@ -0,0 +1,70 @@ + .text + .globl guest_func_illegal_instr # -- Begin function guest_func_illegal_instr +#ifdef __ELF__ + .type guest_func_illegal_instr,@function +#else + .globl _guest_func_illegal_instr +#endif + .p2align 4, 0x90 +guest_func_illegal_instr: # @guest_func_illegal_instr +_guest_func_illegal_instr: + .cfi_startproc +# %bb.0: + push %ebp + .cfi_def_cfa_offset 8 + .cfi_offset %ebp, -8 + mov %esp, %ebp + .cfi_def_cfa_register %ebp + mov %edi, -4(%ebp) + #APP + ud2 + #NO_APP + pop %ebp + .cfi_def_cfa %esp, 4 + ret +.Lfunc_end0: +#ifdef ___ELF__ + .size guest_func_illegal_instr, .Lfunc_end0-guest_func_illegal_instr +#endif + .cfi_endproc + # -- End function + .globl guest_func_oob # -- Begin function guest_func_oob +#ifdef __ELF__ + .type guest_func_oob,@function +#else + .globl _guest_func_oob +#endif + .p2align 4, 0x90 +guest_func_oob: # @guest_func_oob +_guest_func_oob: + .cfi_startproc +# %bb.0: + push %ebp + .cfi_def_cfa_offset 8 + .cfi_offset %ebp, -8 + mov %esp, %ebp + .cfi_def_cfa_register %ebp + sub $8, %esp + mov %edi, -4(%ebp) + mov -4(%ebp), %edi +#ifdef __ELF__ + call lucet_vmctx_get_heap@PLT +#else + call _lucet_vmctx_get_heap +#endif + mov %eax, -8(%ebp) + mov -8(%ebp), %eax + movb $0, 65537(%eax) + add $8, %esp + pop %ebp + .cfi_def_cfa %esp, 4 + ret +.Lfunc_end1: +#ifdef __ELF__ + .size guest_func_oob, .Lfunc_end1-guest_func_oob +#endif + .cfi_endproc + +#if defined(__linux__) && defined(__ELF__) + .section ".note.GNU-stack","",@progbits +#endif diff --git a/lucet-runtime/lucet-runtime-tests/src/guest_fault/traps.S b/lucet-runtime/lucet-runtime-tests/src/guest_fault/traps_x86_64.S similarity index 100% rename from lucet-runtime/lucet-runtime-tests/src/guest_fault/traps.S rename to lucet-runtime/lucet-runtime-tests/src/guest_fault/traps_x86_64.S diff --git a/lucet-runtime/src/c_api.rs b/lucet-runtime/src/c_api.rs index 2428636f9..bc4fa1ed6 100644 --- a/lucet-runtime/src/c_api.rs +++ b/lucet-runtime/src/c_api.rs @@ -351,12 +351,29 @@ pub unsafe extern "C" fn lucet_retval_gp(retval: *const lucet_untyped_retval) -> } } +use cfg_if::cfg_if; +cfg_if! { + if #[cfg(target_arch = "x86_64")] { + use core::arch::x86_64::_mm_storeu_ps; + use core::arch::x86_64::_mm_loadu_ps; + use core::arch::x86_64::_mm_storeu_pd; + use core::arch::x86_64::_mm_loadu_pd; + } else if #[cfg(target_arch = "x86")] { + use core::arch::x86::_mm_storeu_ps; + use core::arch::x86::_mm_loadu_ps; + use core::arch::x86::_mm_storeu_pd; + use core::arch::x86::_mm_loadu_pd; + } else { + panic!("unsupported architecture!"); + } +} + #[no_mangle] pub unsafe extern "C" fn lucet_retval_f32(retval: *const lucet_untyped_retval) -> f32 { let mut v = 0.0f32; - core::arch::x86_64::_mm_storeu_ps( + _mm_storeu_ps( &mut v as *mut f32, - core::arch::x86_64::_mm_loadu_ps((*retval).fp.as_ptr() as *const f32), + _mm_loadu_ps((*retval).fp.as_ptr() as *const f32), ); v } @@ -364,9 +381,9 @@ pub unsafe extern "C" fn lucet_retval_f32(retval: *const lucet_untyped_retval) - #[no_mangle] pub unsafe extern "C" fn lucet_retval_f64(retval: *const lucet_untyped_retval) -> f64 { let mut v = 0.0f64; - core::arch::x86_64::_mm_storeu_pd( + _mm_storeu_pd( &mut v as *mut f64, - core::arch::x86_64::_mm_loadu_pd((*retval).fp.as_ptr() as *const f64), + _mm_loadu_pd((*retval).fp.as_ptr() as *const f64), ); v } diff --git a/lucet-spectest/src/script.rs b/lucet-spectest/src/script.rs index 9c16a42c4..ccd94e036 100644 --- a/lucet-spectest/src/script.rs +++ b/lucet-spectest/src/script.rs @@ -100,7 +100,7 @@ impl ScriptEnv { let lucet_region = MmapRegion::create( 1, &lucet_runtime::Limits { - heap_memory_size: 4 * 1024 * 1024 * 1024, + heap_memory_size: 1023 * 1024 * 1024, ..lucet_runtime::Limits::default() }, ) diff --git a/lucetc/src/pointer.rs b/lucetc/src/pointer.rs index d90b26146..979f6fee4 100644 --- a/lucetc/src/pointer.rs +++ b/lucetc/src/pointer.rs @@ -1,4 +1,10 @@ use cranelift_codegen::ir; +#[cfg(target_pointer_width = "64")] pub const NATIVE_POINTER: ir::Type = ir::types::I64; +#[cfg(target_pointer_width = "32")] +pub const NATIVE_POINTER: ir::Type = ir::types::I32; +#[cfg(target_pointer_width = "64")] pub const NATIVE_POINTER_SIZE: usize = 8; +#[cfg(target_pointer_width = "32")] +pub const NATIVE_POINTER_SIZE: usize = 4; diff --git a/lucetc/src/types.rs b/lucetc/src/types.rs index ee4957b3c..96ffc94cd 100644 --- a/lucetc/src/types.rs +++ b/lucetc/src/types.rs @@ -67,7 +67,7 @@ pub fn to_lucet_signature(value: &ir::Signature) -> Result { - if value.is_int() && value.bits() == 64 { + if value.is_int() && value.bits() == (crate::pointer::NATIVE_POINTER_SIZE as u16 * 8) { // this is VMContext, so we can move on. } else { return Err(SignatureError::BadElement(