From 6670c2a8f06d088f0a30bdc60a9f1a35709e81ff Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 2 Apr 2026 14:25:01 -0700 Subject: [PATCH] More strictly check bounds in FACT trampolines This commit is a hardening of the various in-bounds checks and such of the FACT compiler, in particular as related to strings. The previous implementation would check bounds in a few places but this was a bit ad-hoc and not uniformly done. There's no known issue with the prior checks, but given the sensitive nature of these checks I feel it's best to make this a bit more rigorous. Specifically the `malloc` helpers, and a newly added `realloc` helper, will internally verify not only alignment but additionally the size of the allocation itself. All manual invocations of `realloc` are switched over to this helper. Additionally all conversion of a guest pointer to a more structured value now additionally goes through helpers which performs these same checks to ensure that everything is in-bounds. The net result is that this should have no behavior change from before. A suite of tests are added for behavior around large strings, specifically exercising the maximum allowable size of strings. This uncovered a few minor issues in transcoding where spec-wise Wasmtime previously transcoded too many bytes before performing a growing `realloc`. Finally a few refactorings were done in FACT to handle some helpers going away, notably around translating the `map` type, which cleans up the internals as well. --- crates/cranelift/src/compiler/component.rs | 2 + crates/environ/src/component.rs | 4 +- crates/environ/src/fact/trampoline.rs | 764 +++++++++--------- crates/environ/src/fact/transcode.rs | 7 +- .../src/runtime/vm/component/libcalls.rs | 17 +- .../component-model/big-strings.wast | 391 ++++++++- 6 files changed, 798 insertions(+), 387 deletions(-) diff --git a/crates/cranelift/src/compiler/component.rs b/crates/cranelift/src/compiler/component.rs index c165868b00e4..82d3ef15e369 100644 --- a/crates/cranelift/src/compiler/component.rs +++ b/crates/cranelift/src/compiler/component.rs @@ -1860,6 +1860,8 @@ impl TrampolineCompiler<'_> { args.push(self.len_param(1, from64)); args.push(self.ptr_param(2, to64, to_base)); args.push(self.len_param(3, to64)); + let first_pass = self.builder.func.dfg.block_params(self.block0)[6]; + args.push(first_pass); } Transcode::Utf8ToCompactUtf16 | Transcode::Utf16ToCompactUtf16 => { diff --git a/crates/environ/src/component.rs b/crates/environ/src/component.rs index d60b7a25077e..3a9b232d3f51 100644 --- a/crates/environ/src/component.rs +++ b/crates/environ/src/component.rs @@ -211,8 +211,8 @@ macro_rules! foreach_builtin_component_function { latin1_to_latin1(vmctx: vmctx, src: ptr_u8, len: size, dst: ptr_u8) -> bool; latin1_to_utf16(vmctx: vmctx, src: ptr_u8, len: size, dst: ptr_u16) -> bool; utf8_to_utf16(vmctx: vmctx, src: ptr_u8, len: size, dst: ptr_u16) -> size; - utf16_to_utf8(vmctx: vmctx, src: ptr_u16, src_len: size, dst: ptr_u8, dst_len: size, ret2: ptr_size) -> size; - latin1_to_utf8(vmctx: vmctx, src: ptr_u8, src_len: size, dst: ptr_u8, dst_len: size, ret2: ptr_size) -> size; + utf16_to_utf8(vmctx: vmctx, src: ptr_u16, src_len: size, dst: ptr_u8, dst_len: size, first_pass: u32, ret2: ptr_size) -> size; + latin1_to_utf8(vmctx: vmctx, src: ptr_u8, src_len: size, dst: ptr_u8, dst_len: size, first_pass: u32, ret2: ptr_size) -> size; utf16_to_compact_probably_utf16(vmctx: vmctx, src: ptr_u16, len: size, dst: ptr_u16) -> size; utf8_to_latin1(vmctx: vmctx, src: ptr_u8, len: size, dst: ptr_u8, ret2: ptr_size) -> size; utf16_to_latin1(vmctx: vmctx, src: ptr_u16, len: size, dst: ptr_u8, ret2: ptr_size) -> size; diff --git a/crates/environ/src/fact/trampoline.rs b/crates/environ/src/fact/trampoline.rs index 0758f46f856f..90d69740c3a6 100644 --- a/crates/environ/src/fact/trampoline.rs +++ b/crates/environ/src/fact/trampoline.rs @@ -943,28 +943,22 @@ impl<'a, 'b> Compiler<'a, 'b> { let lower_mem_opts = lower_opts.data_model.unwrap_memory(); let (addr, ty) = param_locals[0]; assert_eq!(ty, lower_mem_opts.ptr()); - let align = src_tys - .iter() - .map(|t| self.types.align(lower_mem_opts, t)) - .max() - .unwrap_or(1); - Source::Memory(self.memory_operand(lower_opts, TempLocal::new(addr, ty), align)) + let abi = CanonicalAbiInfo::record(src_tys.iter().map(|t| self.types.canonical_abi(t))); + Source::Memory(self.memory_operand_abi( + lower_opts, + TempLocal::new(addr, ty), + &abi, + Trap::MemoryOutOfBounds, + )) }; let dst = if let Some(flat) = &dst_flat { Destination::Stack(flat, lift_opts) } else { + // If there are too many parameters then space is allocated in the + // destination module for the parameters via its `realloc` function. let abi = CanonicalAbiInfo::record(dst_tys.iter().map(|t| self.types.canonical_abi(t))); - match lift_opts.data_model { - DataModel::Gc {} => todo!("CM+GC"), - DataModel::LinearMemory(opts) => { - let (size, align) = opts.sizealign(&abi); - // If there are too many parameters then space is allocated in the - // destination module for the parameters via its `realloc` function. - let size = MallocSize::Const(size); - Destination::Memory(self.malloc(lift_opts, size, align)) - } - } + Destination::Memory(self.malloc_abi(lift_opts, &abi, Trap::MemoryOutOfBounds)) }; let srcs = src @@ -1024,12 +1018,7 @@ impl<'a, 'b> Compiler<'a, 'b> { // return value of the function itself. The imported function will // return a linear memory address at which the values can be read // from. - let lift_mem_opts = lift_opts.data_model.unwrap_memory(); - let align = src_tys - .iter() - .map(|t| self.types.align(lift_mem_opts, t)) - .max() - .unwrap_or(1); + let abi = CanonicalAbiInfo::record(src_tys.iter().map(|t| self.types.canonical_abi(t))); assert_eq!( result_locals.len(), if lower_opts.async_ || lift_opts.async_ { @@ -1040,7 +1029,12 @@ impl<'a, 'b> Compiler<'a, 'b> { ); let (addr, ty) = result_locals[0]; assert_eq!(ty, lift_opts.data_model.unwrap_memory().ptr()); - Source::Memory(self.memory_operand(lift_opts, TempLocal::new(addr, ty), align)) + Source::Memory(self.memory_operand_abi( + lift_opts, + TempLocal::new(addr, ty), + &abi, + Trap::MemoryOutOfBounds, + )) }; let dst = if let Some(flat) = &dst_flat { @@ -1049,15 +1043,15 @@ impl<'a, 'b> Compiler<'a, 'b> { // This is slightly different than `translate_params` where the // return pointer was provided by the caller of this function // meaning the last parameter local is a pointer into linear memory. - let lower_mem_opts = lower_opts.data_model.unwrap_memory(); - let align = dst_tys - .iter() - .map(|t| self.types.align(lower_mem_opts, t)) - .max() - .unwrap_or(1); + let abi = CanonicalAbiInfo::record(dst_tys.iter().map(|t| self.types.canonical_abi(t))); let (addr, ty) = *param_locals.last().expect("no retptr"); assert_eq!(ty, lower_opts.data_model.unwrap_memory().ptr()); - Destination::Memory(self.memory_operand(lower_opts, TempLocal::new(addr, ty), align)) + Destination::Memory(self.memory_operand_abi( + lower_opts, + TempLocal::new(addr, ty), + &abi, + Trap::MemoryOutOfBounds, + )) }; let srcs = src @@ -1633,16 +1627,33 @@ impl<'a, 'b> Compiler<'a, 'b> { }; let dst_str = match src_opts.string_encoding { - StringEncoding::Utf8 => match dst_opts.string_encoding { - StringEncoding::Utf8 => self.string_copy(&src_str, FE::Utf8, dst_opts, FE::Utf8), - StringEncoding::Utf16 => self.string_utf8_to_utf16(&src_str, dst_opts), - StringEncoding::CompactUtf16 => { - self.string_to_compact(&src_str, FE::Utf8, dst_opts) + StringEncoding::Utf8 => { + self.validate_guest_pointer( + src_opts, + &src_str.ptr, + &AllocSize::Local(src_str.len.idx), + 1, + Trap::StringOutOfBounds, + ); + match dst_opts.string_encoding { + StringEncoding::Utf8 => { + self.string_copy(&src_str, FE::Utf8, dst_opts, FE::Utf8) + } + StringEncoding::Utf16 => self.string_utf8_to_utf16(&src_str, dst_opts), + StringEncoding::CompactUtf16 => { + self.string_to_compact(&src_str, FE::Utf8, dst_opts) + } } - }, + } StringEncoding::Utf16 => { - self.verify_aligned(src_mem_opts, src_str.ptr.idx, 2); + self.validate_guest_pointer( + src_opts, + &src_str.ptr, + &AllocSize::DoubleLocal(src_str.len.idx), + 2, + Trap::StringOutOfBounds, + ); match dst_opts.string_encoding { StringEncoding::Utf8 => { self.string_deflate_to_utf8(&src_str, FE::Utf16, dst_opts) @@ -1657,8 +1668,6 @@ impl<'a, 'b> Compiler<'a, 'b> { } StringEncoding::CompactUtf16 => { - self.verify_aligned(src_mem_opts, src_str.ptr.idx, 2); - // Test the tag big to see if this is a utf16 or a latin1 string // at runtime... self.instruction(LocalGet(src_str.len.idx)); @@ -1673,6 +1682,18 @@ impl<'a, 'b> Compiler<'a, 'b> { self.ptr_uconst(src_mem_opts, UTF16_TAG); self.ptr_xor(src_mem_opts); self.instruction(LocalSet(src_str.len.idx)); + + // Now that we dynamically know this is utf16 perform a + // validation of the guest's pointer to ensure it's aligned and + // in-bounds. + self.validate_guest_pointer( + src_opts, + &src_str.ptr, + &AllocSize::DoubleLocal(src_str.len.idx), + 2, + Trap::StringOutOfBounds, + ); + let s1 = match dst_opts.string_encoding { StringEncoding::Utf8 => { self.string_deflate_to_utf8(&src_str, FE::Utf16, dst_opts) @@ -1687,6 +1708,16 @@ impl<'a, 'b> Compiler<'a, 'b> { self.instruction(Else); + // Now that we dynamically know this is latin1 perform the + // same validation above, but with a different byte length. + self.validate_guest_pointer( + src_opts, + &src_str.ptr, + &AllocSize::Local(src_str.len.idx), + 2, + Trap::StringOutOfBounds, + ); + // In the latin1 block the `src_len` local is already the number // of code units, so the string transcoding is all that needs to // happen. @@ -1760,6 +1791,12 @@ impl<'a, 'b> Compiler<'a, 'b> { ) -> WasmString<'c> { assert!(dst_enc.width() >= src_enc.width()); + // Validate the string's length is in-bounds. Note that `dst_enc` is + // specifically used here since it's the larger of the two encodings. + // The code-unit size of the src/dst is going to be the same so this is + // the encoding to validate. + self.validate_string_length(src, dst_enc); + let src_mem_opts = { match &src.opts.data_model { DataModel::Gc {} => todo!("CM+GC"), @@ -1773,31 +1810,25 @@ impl<'a, 'b> Compiler<'a, 'b> { } }; - let (src_byte_len_tmp, src_byte_len) = - self.source_string_byte_len(src, src_enc, src_mem_opts); - // Convert the source code units length to the destination byte // length type. - self.convert_src_len_to_dst( - src.len.idx, - src.opts.data_model.unwrap_memory().ptr(), - dst_opts.data_model.unwrap_memory().ptr(), - ); - let dst_len = self.local_tee_new_tmp(dst_opts.data_model.unwrap_memory().ptr()); + self.convert_src_len_to_dst(src.len.idx, src_mem_opts.ptr(), dst_mem_opts.ptr()); + let dst_len = self.local_tee_new_tmp(dst_mem_opts.ptr()); if dst_enc.width() > 1 { assert_eq!(dst_enc.width(), 2); self.ptr_uconst(dst_mem_opts, 1); self.ptr_shl(dst_mem_opts); } - let dst_byte_len = self.local_set_new_tmp(dst_opts.data_model.unwrap_memory().ptr()); + let dst_byte_len = self.local_set_new_tmp(dst_mem_opts.ptr()); // Allocate space in the destination using the calculated byte // length. let dst = { let dst_mem = self.malloc( dst_opts, - MallocSize::Local(dst_byte_len.idx), + AllocSize::Local(dst_byte_len.idx), dst_enc.align().into(), + Trap::StringOutOfBounds, ); WasmString { ptr: dst_mem.addr, @@ -1806,13 +1837,6 @@ impl<'a, 'b> Compiler<'a, 'b> { } }; - // Validate that `src_len + src_ptr` and - // `dst_mem.addr_local + dst_byte_len` are both in-bounds. This - // is done by loading the last byte of the string and if that - // doesn't trap then it's known valid. - self.validate_string_inbounds(src, src_byte_len); - self.validate_string_inbounds(&dst, dst_byte_len.idx); - // If the validations pass then the host `transcode` intrinsic // is invoked. This will either raise a trap or otherwise succeed // in which case we're done. @@ -1830,45 +1854,10 @@ impl<'a, 'b> Compiler<'a, 'b> { self.instruction(Call(transcode.as_u32())); self.free_temp_local(dst_byte_len); - if let Some(tmp) = src_byte_len_tmp { - self.free_temp_local(tmp); - } dst } - /// Calculate the source byte length given the size of each code - /// unit. - /// - /// Returns an optional temporary local if it was needed, which the caller - /// needs to deallocate with `free_temp_local`. Additionally returns the - /// index of the local which contains the byte length of the string, which - /// may point to the temporary local passed in. - fn source_string_byte_len( - &mut self, - src: &WasmString<'_>, - src_enc: FE, - src_mem_opts: &LinearMemoryOptions, - ) -> (Option, u32) { - self.validate_string_length(src, src_enc); - - if src_enc.width() == 1 { - (None, src.len.idx) - } else { - assert_eq!(src_enc.width(), 2); - - // Note that this shouldn't overflow given `validate_string_length` - // above. - self.instruction(LocalGet(src.len.idx)); - self.ptr_uconst(src_mem_opts, 1); - self.ptr_shl(src_mem_opts); - let tmp = self.local_set_new_tmp(src.opts.data_model.unwrap_memory().ptr()); - - let idx = tmp.idx; - (Some(tmp), idx) - } - } - // Corresponding function for `store_string_to_utf8` in the spec. // // This translation works by possibly performing a number of @@ -1910,7 +1899,12 @@ impl<'a, 'b> Compiler<'a, 'b> { let dst_byte_len = self.local_set_new_tmp(dst_opts.data_model.unwrap_memory().ptr()); let dst = { - let dst_mem = self.malloc(dst_opts, MallocSize::Local(dst_byte_len.idx), 1); + let dst_mem = self.malloc( + dst_opts, + AllocSize::Local(dst_byte_len.idx), + 1, + Trap::StringOutOfBounds, + ); WasmString { ptr: dst_mem.addr, len: dst_len, @@ -1918,24 +1912,6 @@ impl<'a, 'b> Compiler<'a, 'b> { } }; - // Ensure buffers are all in-bounds - let mut src_byte_len_tmp = None; - let src_byte_len = match src_enc { - FE::Latin1 => src.len.idx, - FE::Utf16 => { - self.instruction(LocalGet(src.len.idx)); - self.ptr_uconst(src_mem_opts, 1); - self.ptr_shl(src_mem_opts); - let tmp = self.local_set_new_tmp(src.opts.data_model.unwrap_memory().ptr()); - let ret = tmp.idx; - src_byte_len_tmp = Some(tmp); - ret - } - FE::Utf8 => unreachable!(), - }; - self.validate_string_inbounds(src, src_byte_len); - self.validate_string_inbounds(&dst, dst_byte_len.idx); - // Perform the initial transcode let op = match src_enc { FE::Latin1 => Transcode::Latin1ToUtf8, @@ -1947,6 +1923,7 @@ impl<'a, 'b> Compiler<'a, 'b> { self.instruction(LocalGet(src.len.idx)); self.instruction(LocalGet(dst.ptr.idx)); self.instruction(LocalGet(dst_byte_len.idx)); + self.instruction(I32Const(1)); // first_pass = true self.instruction(Call(transcode.as_u32())); self.instruction(LocalSet(dst.len.idx)); let src_len_tmp = self.local_set_new_tmp(src.opts.data_model.unwrap_memory().ptr()); @@ -1959,12 +1936,8 @@ impl<'a, 'b> Compiler<'a, 'b> { self.ptr_ne(src_mem_opts); self.instruction(If(BlockType::Empty)); - // Here a worst-case reallocation is performed to grow `dst_mem`. - // In-line a check is also performed that the worst-case byte size - // fits within the maximum size of strings. - self.instruction(LocalGet(dst.ptr.idx)); // old_ptr - self.instruction(LocalGet(dst_byte_len.idx)); // old_size - self.ptr_uconst(dst_mem_opts, 1); // align + // Check that the worst-case byte size fits within the maximum size of + // strings. let factor = match src_enc { FE::Latin1 => 2, FE::Utf16 => 3, @@ -1978,12 +1951,22 @@ impl<'a, 'b> Compiler<'a, 'b> { ); self.ptr_uconst(dst_mem_opts, factor.into()); self.ptr_mul(dst_mem_opts); - self.instruction(LocalTee(dst_byte_len.idx)); - self.instruction(Call(dst_mem_opts.realloc.unwrap().as_u32())); - self.instruction(LocalSet(dst.ptr.idx)); + let new_byte_len = self.local_set_new_tmp(dst_mem_opts.ptr()); - // Verify that the destination is still in-bounds - self.validate_string_inbounds(&dst, dst_byte_len.idx); + // Do a worst-case reallocation is performed to grow `dst_mem`. + // Afterwards update our `dst_byte_len` local to reflect the new byte + // length. + self.realloc( + dst_opts, + &dst.ptr, + AllocSize::Local(dst_byte_len.idx), + AllocSize::Local(new_byte_len.idx), + 1, + Trap::StringOutOfBounds, + ); + self.instruction(LocalGet(new_byte_len.idx)); + self.instruction(LocalSet(dst_byte_len.idx)); + self.free_temp_local(new_byte_len); // Perform another round of transcoding that should be guaranteed // to succeed. Note that all the parameters here are offset by the @@ -2005,6 +1988,7 @@ impl<'a, 'b> Compiler<'a, 'b> { self.instruction(LocalGet(dst_byte_len.idx)); self.instruction(LocalGet(dst.len.idx)); self.ptr_sub(dst_mem_opts); + self.instruction(I32Const(0)); // first_pass = false self.instruction(Call(transcode.as_u32())); // Add the second result, the amount of destination units encoded, @@ -2033,12 +2017,14 @@ impl<'a, 'b> Compiler<'a, 'b> { self.instruction(LocalGet(dst_byte_len.idx)); self.ptr_ne(dst_mem_opts); self.instruction(If(BlockType::Empty)); - self.instruction(LocalGet(dst.ptr.idx)); // old_ptr - self.instruction(LocalGet(dst_byte_len.idx)); // old_size - self.ptr_uconst(dst_mem_opts, 1); // align - self.instruction(LocalGet(dst.len.idx)); // new_size - self.instruction(Call(dst_mem_opts.realloc.unwrap().as_u32())); - self.instruction(LocalSet(dst.ptr.idx)); + self.realloc( + dst_opts, + &dst.ptr, + AllocSize::Local(dst_byte_len.idx), + AllocSize::Local(dst.len.idx), + 1, + Trap::StringOutOfBounds, + ); self.instruction(End); // If the first transcode was enough then assert that the returned @@ -2058,9 +2044,6 @@ impl<'a, 'b> Compiler<'a, 'b> { self.free_temp_local(src_len_tmp); self.free_temp_local(dst_byte_len); - if let Some(tmp) = src_byte_len_tmp { - self.free_temp_local(tmp); - } dst } @@ -2104,7 +2087,12 @@ impl<'a, 'b> Compiler<'a, 'b> { self.ptr_shl(dst_mem_opts); let dst_byte_len = self.local_set_new_tmp(dst_opts.data_model.unwrap_memory().ptr()); let dst = { - let dst_mem = self.malloc(dst_opts, MallocSize::Local(dst_byte_len.idx), 2); + let dst_mem = self.malloc( + dst_opts, + AllocSize::Local(dst_byte_len.idx), + 2, + Trap::StringOutOfBounds, + ); WasmString { ptr: dst_mem.addr, len: dst_len, @@ -2112,9 +2100,6 @@ impl<'a, 'b> Compiler<'a, 'b> { } }; - self.validate_string_inbounds(src, src.len.idx); - self.validate_string_inbounds(&dst, dst_byte_len.idx); - let transcode = self.transcoder(src, &dst, Transcode::Utf8ToUtf16); self.instruction(LocalGet(src.ptr.idx)); self.instruction(LocalGet(src.len.idx)); @@ -2133,20 +2118,14 @@ impl<'a, 'b> Compiler<'a, 'b> { self.instruction(LocalGet(dst.len.idx)); self.ptr_ne(dst_mem_opts); self.instruction(If(BlockType::Empty)); - self.instruction(LocalGet(dst.ptr.idx)); - self.instruction(LocalGet(dst_byte_len.idx)); - self.ptr_uconst(dst_mem_opts, 2); - self.instruction(LocalGet(dst.len.idx)); - self.ptr_uconst(dst_mem_opts, 1); - self.ptr_shl(dst_mem_opts); - self.instruction(Call(match dst.opts.data_model { - DataModel::Gc {} => todo!("CM+GC"), - DataModel::LinearMemory(LinearMemoryOptions { realloc, .. }) => { - realloc.unwrap().as_u32() - } - })); - self.instruction(LocalSet(dst.ptr.idx)); - self.verify_aligned(dst_opts.data_model.unwrap_memory(), dst.ptr.idx, 2); + self.realloc( + dst.opts, + &dst.ptr, + AllocSize::Local(dst_byte_len.idx), + AllocSize::DoubleLocal(dst.len.idx), + 2, + Trap::StringOutOfBounds, + ); self.instruction(End); // end of shrink-to-fit self.free_temp_local(dst_byte_len); @@ -2188,7 +2167,12 @@ impl<'a, 'b> Compiler<'a, 'b> { self.ptr_shl(dst_mem_opts); let dst_byte_len = self.local_set_new_tmp(dst_mem_opts.ptr()); let dst = { - let dst_mem = self.malloc(dst_opts, MallocSize::Local(dst_byte_len.idx), 2); + let dst_mem = self.malloc( + dst_opts, + AllocSize::Local(dst_byte_len.idx), + 2, + Trap::StringOutOfBounds, + ); WasmString { ptr: dst_mem.addr, len: dst_len, @@ -2203,9 +2187,6 @@ impl<'a, 'b> Compiler<'a, 'b> { ); let src_byte_len = self.local_set_new_tmp(src_mem_opts.ptr()); - self.validate_string_inbounds(src, src_byte_len.idx); - self.validate_string_inbounds(&dst, dst_byte_len.idx); - let transcode = self.transcoder(src, &dst, Transcode::Utf16ToCompactProbablyUtf16); self.instruction(LocalGet(src.ptr.idx)); self.instruction(LocalGet(src.len.idx)); @@ -2235,13 +2216,14 @@ impl<'a, 'b> Compiler<'a, 'b> { self.ptr_br_if(dst_mem_opts, 0); // Here `realloc` is used to downsize the string - self.instruction(LocalGet(dst.ptr.idx)); // old_ptr - self.instruction(LocalGet(dst_byte_len.idx)); // old_size - self.ptr_uconst(dst_mem_opts, 2); // align - self.instruction(LocalGet(dst.len.idx)); // new_size - self.instruction(Call(dst_mem_opts.realloc.unwrap().as_u32())); - self.instruction(LocalSet(dst.ptr.idx)); - self.verify_aligned(dst_opts.data_model.unwrap_memory(), dst.ptr.idx, 2); + self.realloc( + dst.opts, + &dst.ptr, + AllocSize::Local(dst_byte_len.idx), + AllocSize::Local(dst.len.idx), + 2, + Trap::StringOutOfBounds, + ); self.free_temp_local(dst_byte_len); self.free_temp_local(src_byte_len); @@ -2270,14 +2252,18 @@ impl<'a, 'b> Compiler<'a, 'b> { DataModel::LinearMemory(opts) => opts, }; - let (src_byte_len_tmp, src_byte_len) = - self.source_string_byte_len(src, src_enc, src_mem_opts); + self.validate_string_length(src, src_enc); self.convert_src_len_to_dst(src.len.idx, src_mem_opts.ptr(), dst_mem_opts.ptr()); let dst_len = self.local_tee_new_tmp(dst_mem_opts.ptr()); let dst_byte_len = self.local_set_new_tmp(dst_mem_opts.ptr()); let dst = { - let dst_mem = self.malloc(dst_opts, MallocSize::Local(dst_byte_len.idx), 2); + let dst_mem = self.malloc( + dst_opts, + AllocSize::Local(dst_byte_len.idx), + 2, + Trap::StringOutOfBounds, + ); WasmString { ptr: dst_mem.addr, len: dst_len, @@ -2285,9 +2271,6 @@ impl<'a, 'b> Compiler<'a, 'b> { } }; - self.validate_string_inbounds(src, src_byte_len); - self.validate_string_inbounds(&dst, dst_byte_len.idx); - // Perform the initial latin1 transcode. This returns the number of // source code units consumed and the number of destination code // units (bytes) written. @@ -2319,13 +2302,14 @@ impl<'a, 'b> Compiler<'a, 'b> { self.instruction(LocalGet(dst.len.idx)); self.ptr_ne(dst_mem_opts); self.instruction(If(BlockType::Empty)); - self.instruction(LocalGet(dst.ptr.idx)); // old_ptr - self.instruction(LocalGet(dst_byte_len.idx)); // old_size - self.ptr_uconst(dst_mem_opts, 2); // align - self.instruction(LocalGet(dst.len.idx)); // new_size - self.instruction(Call(dst_mem_opts.realloc.unwrap().as_u32())); - self.instruction(LocalSet(dst.ptr.idx)); - self.verify_aligned(dst_opts.data_model.unwrap_memory(), dst.ptr.idx, 2); + self.realloc( + dst.opts, + &dst.ptr, + AllocSize::Local(dst_byte_len.idx), + AllocSize::Local(dst.len.idx), + 2, + Trap::StringOutOfBounds, + ); self.instruction(End); // In this block the latin1 encoding failed. The host transcode @@ -2342,17 +2326,21 @@ impl<'a, 'b> Compiler<'a, 'b> { // Reallocate the buffer with twice the source code units in byte // size. - self.instruction(LocalGet(dst.ptr.idx)); // old_ptr - self.instruction(LocalGet(dst_byte_len.idx)); // old_size - self.ptr_uconst(dst_mem_opts, 2); // align self.convert_src_len_to_dst(src.len.idx, src_mem_opts.ptr(), dst_mem_opts.ptr()); self.ptr_uconst(dst_mem_opts, 1); self.ptr_shl(dst_mem_opts); - self.instruction(LocalTee(dst_byte_len.idx)); - self.instruction(Call(dst_mem_opts.realloc.unwrap().as_u32())); - self.instruction(LocalSet(dst.ptr.idx)); - self.verify_aligned(dst_opts.data_model.unwrap_memory(), dst.ptr.idx, 2); - self.validate_string_inbounds(&dst, dst_byte_len.idx); + let new_byte_len = self.local_set_new_tmp(dst_mem_opts.ptr()); + self.realloc( + dst.opts, + &dst.ptr, + AllocSize::Local(dst_byte_len.idx), + AllocSize::Local(new_byte_len.idx), + 2, + Trap::StringOutOfBounds, + ); + self.instruction(LocalGet(new_byte_len.idx)); + self.instruction(LocalSet(dst_byte_len.idx)); + self.free_temp_local(new_byte_len); // Call the host utf16 transcoding function. This will inflate the // prior latin1 bytes and then encode the rest of the source string @@ -2384,15 +2372,14 @@ impl<'a, 'b> Compiler<'a, 'b> { self.convert_src_len_to_dst(src.len.idx, src_mem_opts.ptr(), dst_mem_opts.ptr()); self.ptr_ne(dst_mem_opts); self.instruction(If(BlockType::Empty)); - self.instruction(LocalGet(dst.ptr.idx)); // old_ptr - self.instruction(LocalGet(dst_byte_len.idx)); // old_size - self.ptr_uconst(dst_mem_opts, 2); // align - self.instruction(LocalGet(dst.len.idx)); - self.ptr_uconst(dst_mem_opts, 1); - self.ptr_shl(dst_mem_opts); - self.instruction(Call(dst_mem_opts.realloc.unwrap().as_u32())); - self.instruction(LocalSet(dst.ptr.idx)); - self.verify_aligned(dst_opts.data_model.unwrap_memory(), dst.ptr.idx, 2); + self.realloc( + dst.opts, + &dst.ptr, + AllocSize::Local(dst_byte_len.idx), + AllocSize::DoubleLocal(dst.len.idx), + 2, + Trap::StringOutOfBounds, + ); self.instruction(End); // Tag the returned pointer as utf16 @@ -2405,9 +2392,6 @@ impl<'a, 'b> Compiler<'a, 'b> { self.free_temp_local(src_len_tmp); self.free_temp_local(dst_byte_len); - if let Some(tmp) = src_byte_len_tmp { - self.free_temp_local(tmp); - } dst } @@ -2466,70 +2450,6 @@ impl<'a, 'b> Compiler<'a, 'b> { } } - fn validate_string_inbounds(&mut self, s: &WasmString<'_>, byte_len: u32) { - match &s.opts.data_model { - DataModel::Gc {} => todo!("CM+GC"), - DataModel::LinearMemory(opts) => { - self.validate_memory_inbounds(opts, s.ptr.idx, byte_len, Trap::StringOutOfBounds) - } - } - } - - fn validate_memory_inbounds( - &mut self, - opts: &LinearMemoryOptions, - ptr_local: u32, - byte_len_local: u32, - trap: Trap, - ) { - let extend_to_64 = |me: &mut Self| { - if !opts.memory64() { - me.instruction(I64ExtendI32U); - } - }; - - self.instruction(Block(BlockType::Empty)); - self.instruction(Block(BlockType::Empty)); - let (memory, ty) = opts.memory.unwrap(); - - // Calculate the full byte size of memory with `memory.size`. Note that - // arithmetic here is done always in 64-bits to accommodate 4G memories. - // Additionally it's assumed that 64-bit memories never fill up - // entirely. - self.instruction(MemorySize(memory.as_u32())); - extend_to_64(self); - self.instruction(I64Const(ty.page_size_log2.into())); - self.instruction(I64Shl); - - // Calculate the end address of the string. This is done by adding the - // base pointer to the byte length. For 32-bit memories there's no need - // to check for overflow since everything is extended to 64-bit, but for - // 64-bit memories overflow is checked. - self.instruction(LocalGet(ptr_local)); - extend_to_64(self); - self.instruction(LocalGet(byte_len_local)); - extend_to_64(self); - self.instruction(I64Add); - if opts.memory64() { - let tmp = self.local_tee_new_tmp(ValType::I64); - self.instruction(LocalGet(ptr_local)); - self.ptr_lt_u(opts); - self.instruction(BrIf(0)); - self.instruction(LocalGet(tmp.idx)); - self.free_temp_local(tmp); - } - - // If the byte size of memory is greater than the final address of the - // string then the string is invalid. Note that if it's precisely equal - // then that's ok. - self.instruction(I64GeU); - self.instruction(BrIf(1)); - - self.instruction(End); - self.trap(trap); - self.instruction(End); - } - /// Shared preamble for translating list-like sequences (lists and maps). /// /// Emits: load ptr/len from source, compute byte lengths, malloc @@ -2578,10 +2498,6 @@ impl<'a, 'b> Compiler<'a, 'b> { let src_len = self.local_set_new_tmp(src_mem_opts.ptr()); let src_ptr = self.local_set_new_tmp(src_mem_opts.ptr()); - // Create a `Memory` operand which will internally assert that the - // `src_ptr` value is properly aligned. - let src_mem = self.memory_operand(src_opts, src_ptr, src_element_align); - // Calculate the source/destination byte lengths into unique locals. let src_byte_len = self.calculate_list_byte_len(src_mem_opts, src_len.idx, src_element_size); @@ -2598,28 +2514,24 @@ impl<'a, 'b> Compiler<'a, 'b> { ret }; + // Create a `Memory` operand which will internally assert that the + // `src_ptr` value is properly aligned. + let src_mem = self.memory_operand( + src_opts, + src_ptr, + AllocSize::Local(src_byte_len.idx), + src_element_align, + Trap::ListOutOfBounds, + ); + // Here `realloc` is invoked (in a `malloc`-like fashion) to allocate // space for the sequence in the destination memory. This will also // internally insert checks that the returned pointer is aligned // correctly for the destination. let dst_mem = self.malloc( dst_opts, - MallocSize::Local(dst_byte_len.idx), + AllocSize::Local(dst_byte_len.idx), dst_element_align, - ); - - // With all the pointers and byte lengths verify that both the source - // and the destination buffers are in-bounds. - self.validate_memory_inbounds( - src_mem_opts, - src_mem.addr.idx, - src_byte_len.idx, - Trap::ListOutOfBounds, - ); - self.validate_memory_inbounds( - dst_mem_opts, - dst_mem.addr.idx, - dst_byte_len.idx, Trap::ListOutOfBounds, ); @@ -2808,8 +2720,6 @@ impl<'a, 'b> Compiler<'a, 'b> { let src_key_abi = self.types.canonical_abi(&src_map_ty.key); let src_value_abi = self.types.canonical_abi(&src_map_ty.value); let src_entry_abi = CanonicalAbiInfo::record([src_key_abi, src_value_abi].into_iter()); - let (_, src_key_align) = self.types.size_align(src_mem_opts, &src_map_ty.key); - let (_, src_value_align) = self.types.size_align(src_mem_opts, &src_map_ty.value); let (src_tuple_size, src_entry_align) = src_mem_opts.sizealign(&src_entry_abi); let src_value_offset = { let mut offset = 0u32; @@ -2825,8 +2735,6 @@ impl<'a, 'b> Compiler<'a, 'b> { let dst_key_abi = self.types.canonical_abi(&dst_map_ty.key); let dst_value_abi = self.types.canonical_abi(&dst_map_ty.value); let dst_entry_abi = CanonicalAbiInfo::record([dst_key_abi, dst_value_abi].into_iter()); - let (_, dst_key_align) = self.types.size_align(dst_mem_opts, &dst_map_ty.key); - let (_, dst_value_align) = self.types.size_align(dst_mem_opts, &dst_map_ty.value); let (dst_tuple_size, dst_entry_align) = dst_mem_opts.sizealign(&dst_entry_abi); let dst_value_offset = { let mut offset = 0u32; @@ -2849,71 +2757,40 @@ impl<'a, 'b> Compiler<'a, 'b> { ); if let Some(ref loop_state) = seq.loop_state { - let key_src = Source::Memory(self.memory_operand( - seq.src_opts, - TempLocal { - idx: loop_state.cur_src_ptr.idx, - ty: src_mem_opts.ptr(), - needs_free: false, - }, - src_key_align, - )); - let key_dst = Destination::Memory(self.memory_operand( - seq.dst_opts, - TempLocal { - idx: loop_state.cur_dst_ptr.idx, - ty: dst_mem_opts.ptr(), - needs_free: false, - }, - dst_key_align, - )); + let key_src = Source::Memory(Memory { + opts: seq.src_opts, + offset: 0, + addr: TempLocal::new(loop_state.cur_src_ptr.idx, src_mem_opts.ptr()), + }); + let key_dst = Destination::Memory(Memory { + opts: seq.dst_opts, + offset: 0, + addr: TempLocal::new(loop_state.cur_dst_ptr.idx, dst_mem_opts.ptr()), + }); self.translate(&src_map_ty.key, &key_src, &dst_map_ty.key, &key_dst); - if src_value_offset > 0 { - self.instruction(LocalGet(loop_state.cur_src_ptr.idx)); - self.ptr_uconst(src_mem_opts, src_value_offset); - self.ptr_add(src_mem_opts); - self.instruction(LocalSet(loop_state.cur_src_ptr.idx)); - } - if dst_value_offset > 0 { - self.instruction(LocalGet(loop_state.cur_dst_ptr.idx)); - self.ptr_uconst(dst_mem_opts, dst_value_offset); - self.ptr_add(dst_mem_opts); - self.instruction(LocalSet(loop_state.cur_dst_ptr.idx)); - } - - let value_src = Source::Memory(self.memory_operand( - seq.src_opts, - TempLocal { - idx: loop_state.cur_src_ptr.idx, - ty: src_mem_opts.ptr(), - needs_free: false, - }, - src_value_align, - )); - let value_dst = Destination::Memory(self.memory_operand( - seq.dst_opts, - TempLocal { - idx: loop_state.cur_dst_ptr.idx, - ty: dst_mem_opts.ptr(), - needs_free: false, - }, - dst_value_align, - )); + let value_src = Source::Memory(Memory { + opts: seq.src_opts, + offset: src_value_offset, + addr: TempLocal::new(loop_state.cur_src_ptr.idx, src_mem_opts.ptr()), + }); + let value_dst = Destination::Memory(Memory { + opts: seq.dst_opts, + offset: dst_value_offset, + addr: TempLocal::new(loop_state.cur_dst_ptr.idx, dst_mem_opts.ptr()), + }); self.translate(&src_map_ty.value, &value_src, &dst_map_ty.value, &value_dst); // Advance past value + trailing padding to the next entry - let src_advance_to_next = src_tuple_size - src_value_offset; - if src_advance_to_next > 0 { + if src_tuple_size > 0 { self.instruction(LocalGet(loop_state.cur_src_ptr.idx)); - self.ptr_uconst(src_mem_opts, src_advance_to_next); + self.ptr_uconst(src_mem_opts, src_tuple_size); self.ptr_add(src_mem_opts); self.instruction(LocalSet(loop_state.cur_src_ptr.idx)); } - let dst_advance_to_next = dst_tuple_size - dst_value_offset; - if dst_advance_to_next > 0 { + if dst_tuple_size > 0 { self.instruction(LocalGet(loop_state.cur_dst_ptr.idx)); - self.ptr_uconst(dst_mem_opts, dst_advance_to_next); + self.ptr_uconst(dst_mem_opts, dst_tuple_size); self.ptr_add(dst_mem_opts); self.instruction(LocalSet(loop_state.cur_dst_ptr.idx)); } @@ -3700,21 +3577,6 @@ impl<'a, 'b> Compiler<'a, 'b> { self.instruction(GlobalSet(flags_global.as_u32())); } - fn verify_aligned(&mut self, opts: &LinearMemoryOptions, addr_local: u32, align: u32) { - // If the alignment is 1 then everything is trivially aligned and the - // check can be omitted. - if align == 1 { - return; - } - self.instruction(LocalGet(addr_local)); - assert!(align.is_power_of_two()); - self.ptr_uconst(opts, align - 1); - self.ptr_and(opts); - self.ptr_if(opts, BlockType::Empty); - self.trap(Trap::UnalignedPointer); - self.instruction(End); - } - fn assert_aligned(&mut self, ty: &InterfaceType, mem: &Memory) { let mem_opts = mem.mem_opts(); if !self.module.tunables.debug_adapter_modules { @@ -3735,7 +3597,39 @@ impl<'a, 'b> Compiler<'a, 'b> { self.instruction(End); } - fn malloc<'c>(&mut self, opts: &'c Options, size: MallocSize, align: u32) -> Memory<'c> { + /// Helper to invoke the guest's `realloc` function with a statically known + /// `abi`. + /// + /// This will internally validate the return value is properly aligned and + /// additionally within bounds of memory. + fn malloc_abi<'c>( + &mut self, + opts: &'c Options, + abi: &CanonicalAbiInfo, + oob_trap: Trap, + ) -> Memory<'c> { + match &opts.data_model { + DataModel::Gc {} => todo!("CM+GC"), + DataModel::LinearMemory(mem_opts) => { + let (size, align) = mem_opts.sizealign(abi); + let size = AllocSize::Const(size); + self.malloc(opts, size, align, oob_trap) + } + } + } + + /// Helper to invoke the guest's `realloc` function with the specified + /// `size` and `align`. + /// + /// This will internally validate the return value is properly aligned and + /// additionally within bounds of memory. + fn malloc<'c>( + &mut self, + opts: &'c Options, + size: AllocSize, + align: u32, + oob_trap: Trap, + ) -> Memory<'c> { match &opts.data_model { DataModel::Gc {} => todo!("CM+GC"), DataModel::LinearMemory(mem_opts) => { @@ -3743,25 +3637,156 @@ impl<'a, 'b> Compiler<'a, 'b> { self.ptr_uconst(mem_opts, 0); self.ptr_uconst(mem_opts, 0); self.ptr_uconst(mem_opts, align); - match size { - MallocSize::Const(size) => self.ptr_uconst(mem_opts, size), - MallocSize::Local(idx) => self.instruction(LocalGet(idx)), - } + self.alloc_size(mem_opts, &size); self.instruction(Call(realloc.as_u32())); let addr = self.local_set_new_tmp(mem_opts.ptr()); - self.memory_operand(opts, addr, align) + self.memory_operand(opts, addr, size, align, oob_trap) } } } - fn memory_operand<'c>(&mut self, opts: &'c Options, addr: TempLocal, align: u32) -> Memory<'c> { - let ret = Memory { + /// Helper to invoke the guest's `realloc` function with the specified + /// arguments. + /// + /// This will internally validate the return value is properly aligned and + /// additionally within bounds of memory. + fn realloc( + &mut self, + opts: &Options, + ptr: &TempLocal, + prev_size: AllocSize, + size: AllocSize, + align: u32, + oob_trap: Trap, + ) { + match &opts.data_model { + DataModel::Gc {} => todo!("CM+GC"), + DataModel::LinearMemory(mem_opts) => { + let realloc = mem_opts.realloc.unwrap(); + self.instruction(LocalGet(ptr.idx)); + self.alloc_size(mem_opts, &prev_size); + self.ptr_uconst(mem_opts, align); + self.alloc_size(mem_opts, &size); + self.instruction(Call(realloc.as_u32())); + self.instruction(LocalSet(ptr.idx)); + self.validate_guest_pointer(opts, &ptr, &size, align, oob_trap) + } + } + } + + /// Convenience helper aruond `memory_operand` which takes a + /// statically known `abi` of the allocation. + fn memory_operand_abi<'c>( + &mut self, + opts: &'c Options, + addr: TempLocal, + abi: &CanonicalAbiInfo, + oob_trap: Trap, + ) -> Memory<'c> { + match &opts.data_model { + DataModel::Gc {} => todo!("CM+GC"), + DataModel::LinearMemory(mem_opts) => { + let (size, align) = mem_opts.sizealign(abi); + self.memory_operand(opts, addr, AllocSize::Const(size), align, oob_trap) + } + } + } + + /// Creates a `Memory` operand from the parts provided after validating + /// that everything is in-bounds according to `validate_guest_pointer`. + fn memory_operand<'c>( + &mut self, + opts: &'c Options, + addr: TempLocal, + size: AllocSize, + align: u32, + oob_trap: Trap, + ) -> Memory<'c> { + self.validate_guest_pointer(opts, &addr, &size, align, oob_trap); + Memory { addr, - offset: 0, opts, + offset: 0, + } + } + + /// Validates that the guest pointer `addr` is in-bounds for `size` amount + /// of bytes. + /// + /// Additionally validates that `addr` is aligned to `align`. + /// + /// Traps with `oob_trap` if the `addr` value is not in-bounds for the + /// linear memory specified by `opts`. + fn validate_guest_pointer( + &mut self, + opts: &Options, + addr: &TempLocal, + size: &AllocSize, + align: u32, + oob_trap: Trap, + ) { + let mem_opts = match &opts.data_model { + DataModel::Gc {} => todo!("CM+GC"), + DataModel::LinearMemory(mem_opts) => mem_opts, + }; + + // If the alignment is 1 then everything is trivially aligned and the + // check can be omitted. + if align != 1 { + self.instruction(LocalGet(addr.idx)); + assert!(align.is_power_of_two()); + self.ptr_uconst(mem_opts, align - 1); + self.ptr_and(mem_opts); + self.ptr_if(mem_opts, BlockType::Empty); + self.trap(Trap::UnalignedPointer); + self.instruction(End); + } + + let extend_to_64 = |me: &mut Self| { + if !mem_opts.memory64() { + me.instruction(I64ExtendI32U); + } }; - self.verify_aligned(opts.data_model.unwrap_memory(), ret.addr.idx, align); - ret + + self.instruction(Block(BlockType::Empty)); + self.instruction(Block(BlockType::Empty)); + + // Calculate the full byte size of memory with `memory.size`. Note that + // arithmetic here is done always in 64-bits to accommodate 4G memories. + // Additionally it's assumed that 64-bit memories never fill up + // entirely. + self.instruction(MemorySize(mem_opts.memory.unwrap().0.as_u32())); + extend_to_64(self); + self.instruction(I64Const(16)); + self.instruction(I64Shl); + + // Calculate the end address of the string. This is done by adding the + // base pointer to the byte length. For 32-bit memories there's no need + // to check for overflow since everything is extended to 64-bit, but for + // 64-bit memories overflow is checked. + self.instruction(LocalGet(addr.idx)); + extend_to_64(self); + self.alloc_size(mem_opts, size); + extend_to_64(self); + self.instruction(I64Add); + if mem_opts.memory64() { + let tmp = self.local_tee_new_tmp(ValType::I64); + self.instruction(LocalGet(addr.idx)); + self.ptr_lt_u(mem_opts); + self.instruction(BrIf(0)); + self.instruction(LocalGet(tmp.idx)); + self.free_temp_local(tmp); + } + + // If the byte size of memory is greater than the final address of the + // string then the string is invalid. Note that if it's precisely equal + // then that's ok. + self.instruction(I64GeU); + self.instruction(BrIf(1)); + + self.instruction(End); + self.trap(oob_trap); + self.instruction(End); } /// Generates a new local in this function of the `ty` specified, @@ -4174,6 +4199,20 @@ impl<'a, 'b> Compiler<'a, 'b> { fn f64_store(&mut self, mem: &Memory) { self.instruction(F64Store(mem.memarg(3))); } + + /// Push a pointer-typed value for `opts` on the wasm stack representing + /// the `size` passed in. + fn alloc_size(&mut self, opts: &LinearMemoryOptions, size: &AllocSize) { + match size { + AllocSize::Const(size) => self.ptr_uconst(opts, *size), + AllocSize::Local(idx) => self.instruction(LocalGet(*idx)), + AllocSize::DoubleLocal(idx) => { + self.instruction(LocalGet(*idx)); + self.ptr_uconst(opts, 1); + self.ptr_shl(opts); + } + } + } } impl<'a> Source<'a> { @@ -4391,9 +4430,10 @@ struct SequenceTranslation<'a> { loop_state: Option, } -enum MallocSize { +enum AllocSize { Const(u32), Local(u32), + DoubleLocal(u32), } struct WasmString<'a> { diff --git a/crates/environ/src/fact/transcode.rs b/crates/environ/src/fact/transcode.rs index 15e7b620ee1d..e1b159542e69 100644 --- a/crates/environ/src/fact/transcode.rs +++ b/crates/environ/src/fact/transcode.rs @@ -56,9 +56,10 @@ impl Transcoder { // Transcoding to utf8 as a smaller format takes all the parameters // and returns the amount of space consumed in the src/destination - Transcode::Utf16ToUtf8 | Transcode::Latin1ToUtf8 => { - types.function(&[from_ptr, from_ptr, to_ptr, to_ptr], &[from_ptr, to_ptr]) - } + Transcode::Utf16ToUtf8 | Transcode::Latin1ToUtf8 => types.function( + &[from_ptr, from_ptr, to_ptr, to_ptr, ValType::I32], + &[from_ptr, to_ptr], + ), // The return type is a tagged length which indicates which was // used diff --git a/crates/wasmtime/src/runtime/vm/component/libcalls.rs b/crates/wasmtime/src/runtime/vm/component/libcalls.rs index d9c7588468c0..b5cd7db9341f 100644 --- a/crates/wasmtime/src/runtime/vm/component/libcalls.rs +++ b/crates/wasmtime/src/runtime/vm/component/libcalls.rs @@ -341,6 +341,7 @@ unsafe fn utf16_to_utf8( src_len: usize, dst: *mut u8, dst_len: usize, + first_pass: u32, ) -> Result { let src = unsafe { slice::from_raw_parts(src, src_len) }; let mut dst = unsafe { slice::from_raw_parts_mut(dst, dst_len) }; @@ -361,6 +362,12 @@ unsafe fn utf16_to_utf8( for ch in core::char::decode_utf16(src_iter) { let ch = ch.map_err(|_| format_err!("invalid utf16 encoding"))?; + // The spec mandates that the first pass of transcoding bails out on the + // first multibyte character. + if first_pass != 0 && u32::from(ch) >= 0x80 { + break; + } + // If the destination doesn't have enough space for this character // then the loop is ended and this function will be called later with a // larger destination buffer. @@ -396,11 +403,19 @@ unsafe fn latin1_to_utf8( src_len: usize, dst: *mut u8, dst_len: usize, + first_pass: u32, ) -> Result { let src = unsafe { slice::from_raw_parts(src, src_len) }; let dst = unsafe { slice::from_raw_parts_mut(dst, dst_len) }; assert_no_overlap(src, dst); - let (read, written) = encoding_rs::mem::convert_latin1_to_utf8_partial(src, dst); + // The spec mandates that this transcoding in the first pass halts when a + // multi-byte utf8 code point is encountered, so handle that here. + let stop = if first_pass != 0 { + src.iter().position(|i| *i >= 0x80).unwrap_or(src.len()) + } else { + src.len() + }; + let (read, written) = encoding_rs::mem::convert_latin1_to_utf8_partial(&src[..stop], dst); log::trace!("latin1-to-utf8 {src_len}/{dst_len} => ({read}, {written})"); Ok(SizePair { src_read: read, diff --git a/tests/misc_testsuite/component-model/big-strings.wast b/tests/misc_testsuite/component-model/big-strings.wast index f04151019382..d0c36510c9c5 100644 --- a/tests/misc_testsuite/component-model/big-strings.wast +++ b/tests/misc_testsuite/component-model/big-strings.wast @@ -1,66 +1,419 @@ ;;! multi_memory = true ;;! hogs_memory = true -;; Sending a massive string +;; This is a test which exercises the various behaviors of sending massive +;; strings from one component to another. This ensures that all memory accesses +;; are bounds checked, for example. This additionally ensures that all maximal +;; widths of strings are respected. +;; +;; The test here is relatively carefully crafted to not actually need to create +;; massive strings at runtime. The goal here is to test what kind of trap +;; happens before any "real" transcoding happens. Transcoding of 1 or 2 bytes +;; should happen but eventually `realloc` will fail-fast before any real +;; transcoding. Memory growth is assumed to be VM-based and thus quite fast. (component definition $A (component $A (core module $m (memory (export "m") 1) + (global $allow (mut i32) (i32.const 0)) (func (export "f") (param i32 i32) unreachable) - (func (export "realloc") (param i32 i32 i32 i32) (result i32) unreachable) + (func (export "realloc") (param i32 i32 i32 i32) (result i32) + (local $target-pages i32) + ;; if realloc isn't allowed, then trap + global.get $allow + i32.eqz + if unreachable end + + i32.const 0 + global.set $allow + + (local.set $target-pages + (i32.shr_u + (i32.add + (local.get 3) + (i32.const 65535)) + (i32.const 16))) + + (if (i32.lt_u (memory.size) (local.get $target-pages)) + (then + (memory.grow (i32.sub (local.get $target-pages) (memory.size))) + i32.const -1 + i32.eq + if unreachable end + ) + ) + + i32.const 0 + ) + + (func (export "allow-one-realloc") + (global.set $allow (i32.const 1))) ) (core instance $i (instantiate $m)) - (func (export "f") (param "x" string) + (func (export "utf8") (param "x" string) (canon lift (core func $i "f") (memory $i "m") (realloc (func $i "realloc")) + string-encoding=utf8 ) ) + (func (export "utf16") (param "x" string) + (canon lift + (core func $i "f") + (memory $i "m") + (realloc (func $i "realloc")) + string-encoding=utf16 + ) + ) + (func (export "latin1-utf16") (param "x" string) + (canon lift + (core func $i "f") + (memory $i "m") + (realloc (func $i "realloc")) + string-encoding=latin1+utf16 + ) + ) + + (func (export "allow-one-realloc") (canon lift (core func $i "allow-one-realloc"))) ) (instance $a (instantiate $A)) (component $B - (import "f" (func $f (param "x" string))) + (import "a" (instance $a + (export "utf8" (func (param "x" string))) + (export "utf16" (func (param "x" string))) + (export "latin1-utf16" (func (param "x" string))) + )) + (core module $libc (memory (export "mem") 1)) (core instance $libc (instantiate $libc)) - (core func $f (canon lower (func $f) (memory $libc "mem"))) + (core func $utf8-utf8 (canon lower (func $a "utf8") string-encoding=utf8 (memory $libc "mem"))) + (core func $utf8-utf16 (canon lower (func $a "utf16") string-encoding=utf8 (memory $libc "mem"))) + (core func $utf8-latin1+utf16 (canon lower (func $a "latin1-utf16") string-encoding=utf8 (memory $libc "mem"))) + + (core func $utf16-utf8 (canon lower (func $a "utf8") string-encoding=utf16 (memory $libc "mem"))) + (core func $utf16-utf16 (canon lower (func $a "utf16") string-encoding=utf16 (memory $libc "mem"))) + (core func $utf16-latin1+utf16 (canon lower (func $a "latin1-utf16") string-encoding=utf16 (memory $libc "mem"))) + + (core func $latin1+utf16-utf8 (canon lower (func $a "utf8") string-encoding=latin1+utf16 (memory $libc "mem"))) + (core func $latin1+utf16-utf16 (canon lower (func $a "utf16") string-encoding=latin1+utf16 (memory $libc "mem"))) + (core func $latin1+utf16-latin1+utf16 (canon lower (func $a "latin1-utf16") string-encoding=latin1+utf16 (memory $libc "mem"))) + (core module $m - (import "" "f" (func $f (param i32 i32))) + (import "" "utf8-utf8" (func $utf8-utf8 (param i32 i32))) + (import "" "utf8-utf16" (func $utf8-utf16 (param i32 i32))) + (import "" "utf8-latin1+utf16" (func $utf8-latin1+utf16 (param i32 i32))) + + (import "" "utf16-utf8" (func $utf16-utf8 (param i32 i32))) + (import "" "utf16-utf16" (func $utf16-utf16 (param i32 i32))) + (import "" "utf16-latin1+utf16" (func $utf16-latin1+utf16 (param i32 i32))) + + (import "" "latin1+utf16-utf8" (func $latin1+utf16-utf8 (param i32 i32))) + (import "" "latin1+utf16-utf16" (func $latin1+utf16-utf16 (param i32 i32))) + (import "" "latin1+utf16-latin1+utf16" (func $latin1+utf16-latin1+utf16 (param i32 i32))) + (import "" "mem" (memory 1)) - (func (export "run") (param i32) - (call $f (i32.const 0) (local.get 0))) + (func (export "utf8-utf8") (param i32) + (call $utf8-utf8 (i32.const 0) (local.get 0))) + (func (export "utf8-utf16") (param i32) + (call $utf8-utf16 (i32.const 0) (local.get 0))) + (func (export "utf8-latin1+utf16") (param i32) + (call $utf8-latin1+utf16 (i32.const 0) (local.get 0))) + + (func (export "utf16-utf8") (param i32) + (call $utf16-utf8 (i32.const 0) (local.get 0))) + (func (export "utf16-utf16") (param i32) + (call $utf16-utf16 (i32.const 0) (local.get 0))) + (func (export "utf16-latin1+utf16") (param i32) + (call $utf16-latin1+utf16 (i32.const 0) (local.get 0))) + + (func (export "latin1+utf16-utf8") (param i32) + (call $latin1+utf16-utf8 (i32.const 0) (local.get 0))) + (func (export "latin1+utf16-utf16") (param i32) + (call $latin1+utf16-utf16 (i32.const 0) (local.get 0))) + (func (export "latin1+utf16-latin1+utf16") (param i32) + (call $latin1+utf16-latin1+utf16 (i32.const 0) (local.get 0))) (func (export "grow") (param i32) (result i32) (memory.grow (local.get 0))) + (func (export "store8") (param i32 i32) + (i32.store8 (local.get 0) (local.get 1))) ) (core instance $i (instantiate $m (with "" (instance - (export "f" (func $f)) + (export "utf8-utf8" (func $utf8-utf8)) + (export "utf8-utf16" (func $utf8-utf16)) + (export "utf8-latin1+utf16" (func $utf8-latin1+utf16)) + (export "utf16-utf8" (func $utf16-utf8)) + (export "utf16-utf16" (func $utf16-utf16)) + (export "utf16-latin1+utf16" (func $utf16-latin1+utf16)) + (export "latin1+utf16-utf8" (func $latin1+utf16-utf8)) + (export "latin1+utf16-utf16" (func $latin1+utf16-utf16)) + (export "latin1+utf16-latin1+utf16" (func $latin1+utf16-latin1+utf16)) + (export "mem" (memory $libc "mem")) )) )) - (func (export "run") (param "x" u32) (canon lift (core func $i "run"))) + + (func (export "utf8-utf8") (param "x" u32) (canon lift (core func $i "utf8-utf8"))) + (func (export "utf8-utf16") (param "x" u32) (canon lift (core func $i "utf8-utf16"))) + (func (export "utf8-latin1-utf16") (param "x" u32) (canon lift (core func $i "utf8-latin1+utf16"))) + + (func (export "utf16-utf8") (param "x" u32) (canon lift (core func $i "utf16-utf8"))) + (func (export "utf16-utf16") (param "x" u32) (canon lift (core func $i "utf16-utf16"))) + (func (export "utf16-latin1-utf16") (param "x" u32) (canon lift (core func $i "utf16-latin1+utf16"))) + + (func (export "latin1-utf16-utf8") (param "x" u32) (canon lift (core func $i "latin1+utf16-utf8"))) + (func (export "latin1-utf16-utf16") (param "x" u32) (canon lift (core func $i "latin1+utf16-utf16"))) + (func (export "latin1-utf16-latin1-utf16") (param "x" u32) (canon lift (core func $i "latin1+utf16-latin1+utf16"))) + (func (export "grow") (param "x" u32) (result s32) (canon lift (core func $i "grow"))) + (func (export "store8") (param "addr" u32) (param "val" u8) + (canon lift (core func $i "store8"))) ) - (instance $b (instantiate $B (with "f" (func $a "f")))) - (export "run" (func $b "run")) + (instance $b (instantiate $B (with "a" (instance $a)))) + + (export "utf8-utf8" (func $b "utf8-utf8")) + (export "utf8-utf16" (func $b "utf8-utf16")) + (export "utf8-latin1-utf16" (func $b "utf8-latin1-utf16")) + + (export "utf16-utf8" (func $b "utf16-utf8")) + (export "utf16-utf16" (func $b "utf16-utf16")) + (export "utf16-latin1-utf16" (func $b "utf16-latin1-utf16")) + + (export "latin1-utf16-utf8" (func $b "latin1-utf16-utf8")) + (export "latin1-utf16-utf16" (func $b "latin1-utf16-utf16")) + (export "latin1-utf16-latin1-utf16" (func $b "latin1-utf16-latin1-utf16")) + (export "grow" (func $b "grow")) + (export "store8" (func $b "store8")) + (export "allow-one-realloc" (func $a "allow-one-realloc")) ) -;; Wildly out of bounds is just rejected + +;; Test the 9 various permutations below. For each permutation there's a test +;; for the string flat-out being out-of-bounds, a test for a just in-bounds +;; string which hits `unreachable` in the realloc, and then a test for a just +;; out-of-bounds string. + +;; utf8 -> utf8 -- can pass up to (1<<31)-1 +(component instance $A $A) +(assert_trap (invoke "utf8-utf8" (u32.const 0x7fff_ffff)) "string content out-of-bounds") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_trap (invoke "utf8-utf8" (u32.const 0x7fff_ffff)) "unreachable") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_trap (invoke "utf8-utf8" (u32.const 0x8000_0000)) "string content out-of-bounds") + +;; utf8 -> utf16 -- worst case alloc up-front means that the maximum byte length +;; is half the prior case. +(component instance $A $A) +(assert_trap (invoke "utf8-utf16" (u32.const 0x3fff_ffff)) "string content out-of-bounds") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_trap (invoke "utf8-utf16" (u32.const 0x3fff_ffff)) "unreachable") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_trap (invoke "utf8-utf16" (u32.const 0x4000_0000)) "string content out-of-bounds") + +;; utf8 -> latin1+utf16 -- initial utf8-string can't be too big +(component instance $A $A) +(assert_trap (invoke "utf8-latin1-utf16" (u32.const 0x7fff_ffff)) "string content out-of-bounds") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_trap (invoke "utf8-latin1-utf16" (u32.const 0x7fff_ffff)) "unreachable") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_trap (invoke "utf8-latin1-utf16" (u32.const 0x8000_0000)) "string content out-of-bounds") + +;; utf8 -> latin1+utf16 -- mid-transcode inflation to utf16 has limits on size +;; which only shows up on the second realloc. +;; +;; here `chr(0x100).encode('utf8') == "\xc4\x80" +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_return (invoke "allow-one-realloc")) +(assert_return (invoke "store8" (u32.const 0) (u8.const 0xc4))) +(assert_return (invoke "store8" (u32.const 1) (u8.const 0x80))) +(assert_trap (invoke "utf8-latin1-utf16" (u32.const 0x3fff_ffff)) "unreachable") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_return (invoke "allow-one-realloc")) +(assert_return (invoke "store8" (u32.const 0) (u8.const 0xc4))) +(assert_return (invoke "store8" (u32.const 1) (u8.const 0x80))) +(assert_trap (invoke "utf8-latin1-utf16" (u32.const 0x4000_0000)) "string content out-of-bounds") + +;; utf16 -> utf8 -- if all utf16 code units become 1 utf16 byte then up to +;; (1<<30)-1 utf16 codepoints are allowed +(component instance $A $A) +(assert_trap (invoke "utf16-utf8" (u32.const 0x3fff_ffff)) "string content out-of-bounds") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_trap (invoke "utf16-utf8" (u32.const 0x3fff_ffff)) "unreachable") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_trap (invoke "utf16-utf8" (u32.const 0x4000_0000)) "string content out-of-bounds") + +;; utf16 -> utf8 -- if a utf16 code unit becomes more than one utf8 byte then +;; ((1<<31)-1)/3 utf16 codepoints are allowed +;; +;; "ΓΏ" in utf16 is two bytes, "\xff\x00", and encodes as a multi-byte value in +;; utf8 which causes encoding to switch inflate the utf-8 buffer to the maximum +;; length. This means that a single realloc happens, a few bytes are transcoded, +;; and then a second realloc happens. The boundary around invoking this second +;; realloc and testing for too-large a string is what's tested here. +(component instance $A $A) +(assert_trap (invoke "utf16-utf8" (u32.const 715827882)) "string content out-of-bounds") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_return (invoke "allow-one-realloc")) +(assert_return (invoke "store8" (u32.const 0) (u8.const 0xff))) +(assert_return (invoke "store8" (u32.const 1) (u8.const 0x00))) +(assert_trap (invoke "utf16-utf8" (u32.const 715827882)) "unreachable") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_return (invoke "allow-one-realloc")) +(assert_return (invoke "store8" (u32.const 0) (u8.const 0xff))) +(assert_return (invoke "store8" (u32.const 1) (u8.const 0x00))) +(assert_trap (invoke "utf16-utf8" (u32.const 715827883)) "string content out-of-bounds") + +;; utf16 -> utf16 -- (1<<30)-1 utf16 codepoints are allowed +(component instance $A $A) +(assert_trap (invoke "utf16-utf16" (u32.const 0x3fff_ffff)) "string content out-of-bounds") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_trap (invoke "utf16-utf16" (u32.const 0x3fff_ffff)) "unreachable") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_trap (invoke "utf16-utf16" (u32.const 0x4000_0000)) "string content out-of-bounds") + +;; utf16 -> latin1+utf16 -- initial utf16-string can't be too big +(component instance $A $A) +(assert_trap (invoke "utf16-latin1-utf16" (u32.const 0x3fff_ffff)) "string content out-of-bounds") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_trap (invoke "utf16-latin1-utf16" (u32.const 0x3fff_ffff)) "unreachable") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_trap (invoke "utf16-latin1-utf16" (u32.const 0x4000_0000)) "string content out-of-bounds") + +;; utf16 -> latin1+utf16 -- mid-transcode inflation to utf16 has limits on size +;; which only shows up on the second realloc. +;; +;; here `chr(0x100).encode('utf-16-le') == "\x00\x01" +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_return (invoke "allow-one-realloc")) +(assert_return (invoke "store8" (u32.const 0) (u8.const 0x00))) +(assert_return (invoke "store8" (u32.const 1) (u8.const 0x01))) +(assert_trap (invoke "utf16-latin1-utf16" (u32.const 0x3fff_ffff)) "unreachable") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_return (invoke "allow-one-realloc")) +(assert_return (invoke "store8" (u32.const 0) (u8.const 0x00))) +(assert_return (invoke "store8" (u32.const 1) (u8.const 0x01))) +(assert_trap (invoke "utf16-latin1-utf16" (u32.const 0x4000_0000)) "string content out-of-bounds") + +;; latin1+utf16 -> utf8 / latin1 -> utf8 - if it's all single-byte utf8 +;; characters there's no actual limit. +(component instance $A $A) +(assert_trap (invoke "latin1-utf16-utf8" (u32.const 0x3fff_ffff)) "string content out-of-bounds") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_trap (invoke "latin1-utf16-utf8" (u32.const 0x3fff_ffff)) "unreachable") + +;; latin1+utf16 -> utf8 / latin1 -> utf8 - with a multi-byte character there's +;; a size limit. +(component instance $A $A) +(assert_trap (invoke "latin1-utf16-utf8" (u32.const 0x3fff_ffff)) "string content out-of-bounds") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_return (invoke "allow-one-realloc")) +(assert_return (invoke "store8" (u32.const 0) (u8.const 0xff))) +(assert_trap (invoke "latin1-utf16-utf8" (u32.const 0x3fff_ffff)) "unreachable") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_return (invoke "allow-one-realloc")) +(assert_return (invoke "store8" (u32.const 0) (u8.const 0xff))) +(assert_trap (invoke "latin1-utf16-utf8" (u32.const 0x4000_0000)) "string content out-of-bounds") + +;; latin1+utf16 -> utf8 / utf16 -> utf8 - if it's all single-byte utf8 +;; characters there's no actual limit. (component instance $A $A) -(assert_trap (invoke "run" (u32.const 0x8000_0000)) "string content out-of-bounds") +(assert_trap (invoke "latin1-utf16-utf8" (u32.const 0x3fff_ffff)) "string content out-of-bounds") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_trap (invoke "latin1-utf16-utf8" (u32.const 0x3fff_ffff)) "unreachable") -;; In-bounds, and just under the limit. Should hit the `unreachable` in the -;; `realloc`. +;; latin1+utf16 -> utf8 / utf16 -> utf8 - string can be too large just like +;; normal utf16 -> utf8 path. +(component instance $A $A) +(assert_trap (invoke "latin1-utf16-utf8" (u32.const 0xbfff_ffff)) "string content out-of-bounds") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_trap (invoke "latin1-utf16-utf8" (u32.const 0xbfff_ffff)) "unreachable") (component instance $A $A) (assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) -(assert_trap (invoke "run" (u32.const 0x7fff_ffff)) "unreachable") +(assert_trap (invoke "latin1-utf16-utf8" (u32.const 0xc000_0000)) "string content out-of-bounds") -;; Size exceeds `(1<<31)-1` +;; latin1+utf16 -> utf8 / utf16 -> utf8 - with multi-byte characters we're +;; limited unlike the +;; +;; for more details see the utf16 -> utf8 case far above. Note that +;; `715827882 == 0x2aaaaaaa` and with the upper bit set that's 0xaaaaaaaa +(component instance $A $A) +(assert_trap (invoke "latin1-utf16-utf8" (u32.const 0xaaaaaaaa)) "string content out-of-bounds") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_return (invoke "allow-one-realloc")) +(assert_return (invoke "store8" (u32.const 0) (u8.const 0xff))) +(assert_trap (invoke "latin1-utf16-utf8" (u32.const 0xaaaaaaaa)) "unreachable") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_return (invoke "allow-one-realloc")) +(assert_return (invoke "store8" (u32.const 0) (u8.const 0xff))) +(assert_trap (invoke "latin1-utf16-utf8" (u32.const 0xaaaaaaab)) "string content out-of-bounds") + +;; latin1+utf16 -> utf16 / latin1 -> utf16 - simple inflation but the string +;; can be too big since it's doubling in byte size. +(component instance $A $A) +(assert_trap (invoke "latin1-utf16-utf16" (u32.const 0x3fff_ffff)) "string content out-of-bounds") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_trap (invoke "latin1-utf16-utf16" (u32.const 0x3fff_ffff)) "unreachable") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_trap (invoke "latin1-utf16-utf16" (u32.const 0x4000_0000)) "string content out-of-bounds") + +;; latin1+utf16 -> utf16 / utf16 -> utf16 - simple inflation, but string can be +;; too large. +(component instance $A $A) +(assert_trap (invoke "latin1-utf16-utf16" (u32.const 0xbfff_ffff)) "string content out-of-bounds") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_trap (invoke "latin1-utf16-utf16" (u32.const 0xbfff_ffff)) "unreachable") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_trap (invoke "latin1-utf16-utf16" (u32.const 0xc000_0000)) "string content out-of-bounds") + +;; latin1+utf16 -> latin1+utf16 - latin1 src is a simple copy with no limit. +(component instance $A $A) +(assert_trap (invoke "latin1-utf16-latin1-utf16" (u32.const 0x7fff_ffff)) "string content out-of-bounds") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_trap (invoke "latin1-utf16-latin1-utf16" (u32.const 0x7fff_ffff)) "unreachable") + +;; latin1+utf16 -> latin1+utf16 - utf16 src means that the string can be +;; too large, so test that here. +(component instance $A $A) +(assert_trap (invoke "latin1-utf16-latin1-utf16" (u32.const 0xbfff_ffff)) "string content out-of-bounds") +(component instance $A $A) +(assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) +(assert_trap (invoke "latin1-utf16-latin1-utf16" (u32.const 0xbfff_ffff)) "unreachable") (component instance $A $A) (assert_return (invoke "grow" (u32.const 65530)) (s32.const 1)) -(assert_trap (invoke "run" (u32.const 0x8000_0000)) "string content out-of-bounds") +(assert_trap (invoke "latin1-utf16-latin1-utf16" (u32.const 0xc000_0000)) "string content out-of-bounds")