From c0d0e406e62a2167f421ec0cdb653fd56600ddc0 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 8 Jan 2024 13:11:20 -0600 Subject: [PATCH 1/3] Change update of gcc on MinGW (#7760) Try not passing `-y -u` to `pacman` to avoid full system updates. Currently full system updates might update the `msys2-runtime` package before actually updating the package we requested, meaning that this might not actually update anything given an update. This is what's currently happening on CI which is breaking due to an update of gcc not actually updating gcc. I'm mostly reading the invocation in rust-lang/rust CI and seeing that it doesn't pass `-y -u` and hopeing that by copying that here things might work. prtest:full --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 5eec5e795973..55dcc0c51795 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -431,7 +431,7 @@ jobs: if: matrix.target == 'x86_64-pc-windows-gnu' # Update binutils if MinGW due to https://github.com/rust-lang/rust/issues/112368 - - run: C:/msys64/usr/bin/pacman.exe -Syu --needed mingw-w64-x86_64-gcc --noconfirm + - run: C:/msys64/usr/bin/pacman.exe -S --needed mingw-w64-x86_64-gcc --noconfirm if: matrix.target == 'x86_64-pc-windows-gnu' - shell: pwsh run: echo "C:\msys64\mingw64\bin" >> $Env:GITHUB_PATH From 176dca67f11518ec2ed9aaddb2c0e704492a880c Mon Sep 17 00:00:00 2001 From: Tyler Rockwood Date: Mon, 8 Jan 2024 12:12:50 -0600 Subject: [PATCH 2/3] adopters: add Redpanda (#7758) Redpanda embeds Wasmtime to power our inline data transforms, allowing developers to transform data directly within the broker. --- ADOPTERS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/ADOPTERS.md b/ADOPTERS.md index 581828946d9d..979bbf18fa12 100644 --- a/ADOPTERS.md +++ b/ADOPTERS.md @@ -14,5 +14,6 @@ Wasmtime is used in many different production use-cases. This list has grown sig | [Fermyon](https://fermyon.com) | [@tschneidereit](https://github.com/tschneidereit) | ![production](https://img.shields.io/badge/-production-blue?style=flat) | Fermyon Cloud is a cloud application platform for WebAssembly-based serverless functions and microservices. | | [InfinyOn](https://infinyon.com/) | [@sehz](https://github.com/sehz) | ![production](https://img.shields.io/badge/-production-blue?style=flat) | InfinyOn leverages the power of WebAssembly SmartModules to execute real-time data transformations. | | [Microsoft](https://microsoft.com/) | [@devigned](https://gist.github.com/devigned) | ![production](https://img.shields.io/badge/-production-blue?style=flat) | Microsoft has had Wasmtime in preview for its WebAssembly System Interface (WASI) node pools in Azure Kubernetes Service since October 2021. | +| [Redpanda](https://redpanda.com/) | [@rockwotj](https://github.com/rockwotj) | ![beta](https://img.shields.io/badge/-beta-orange?style=flat) | Redpanda Data Transforms allow developers to transform data directly in the message broker. | | [Shopify](https://www.shopify.com/) | [@saulecabrera](https://github.com/saulecabrera) | ![production](https://img.shields.io/badge/-production-blue?style=flat) | Shopify Functions allow developers to customize the backend logic of Shopify. | | [SingleStore](https://www.singlestore.com/) | [@Kylebrown9](https://github.com/Kylebrown9) | ![production](https://img.shields.io/badge/-production-blue?style=flat) | SingleStoreDB Cloud embeds Wasmtime to bring developers' code to the data, safely and with speed and scale. | From 446a7f5e02a3875ccb30770c08d2219f61f11eeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C3=BAl=20Cabrera?= Date: Mon, 8 Jan 2024 14:57:40 -0500 Subject: [PATCH 3/3] winch: Multi-Value Part 2: Blocks (#7707) * winch: Multi-Value Part 2: Blocks This commit adds support for the Multi-Value proposal for blocks. In general, this change, introduces multiple building blocks to enable supporting arbitrary params and results in blocks: * `BlockType`: Introduce a block type, to categorize the type of each block, this makes it easier to categorize blocks per type and also makes it possible to defer the calculation of the `ABIResults` until they are actually needed rather than calculating everyghing upfront even though they might not be needed (when in an unreachable state). * Push/pop operations are now frame aware. Given that each `ControlStackFrame` contains all the information needed regarding params and results, this change moves the the implementation of the push and pop operations to the `ControlStackFrame` struct. * `StackState`: this struct holds the entry and exit invariants of each block; these invariants are pre-computed when entering the block and used throughout the code generation, to handle params, results and assert the respective invariants. In terms of the mechanics of the implementation: when entering each block, if there are results on the stack, the expected stack pointer offsets will be calculated via the `StackState`, and the `target_offset` will be used to create the block's `RetArea`. Note that when entering the block and calculating the `StackState` no space is actually reserved for the results, any space increase in the stack is deffered until the results are popped from the value stack via `ControlStackFrame::pop_abi_results`. The trickiest bit of the implementation is handling constant values that need to be placed on the right location on the machine stack. Given that constants are generally not spilled, this means that in order to keep the machine and value stack in sync (spilled-values-wise), values must be shuffled to ensure that constants are placed in the expected location results wise. See the comment in `ControlStackFrame::adjust_stack_results` for more details. * Review fixes --- fuzz/fuzz_targets/differential.rs | 2 - tests/misc_testsuite/winch/block.wast | 89 +++ tests/misc_testsuite/winch/br.wast | 41 ++ tests/misc_testsuite/winch/if.wast | 135 ++++ tests/misc_testsuite/winch/loop.wast | 161 +++++ winch/codegen/src/abi/mod.rs | 94 ++- winch/codegen/src/codegen/call.rs | 46 +- winch/codegen/src/codegen/context.rs | 117 +--- winch/codegen/src/codegen/control.rs | 933 +++++++++++++++++++------- winch/codegen/src/codegen/env.rs | 67 +- winch/codegen/src/codegen/mod.rs | 82 ++- winch/codegen/src/isa/aarch64/abi.rs | 10 +- winch/codegen/src/isa/x64/abi.rs | 11 +- winch/codegen/src/isa/x64/masm.rs | 2 +- winch/codegen/src/masm.rs | 22 +- winch/codegen/src/stack.rs | 22 +- winch/codegen/src/trampoline.rs | 13 +- winch/codegen/src/visitor.rs | 96 +-- 18 files changed, 1393 insertions(+), 550 deletions(-) diff --git a/fuzz/fuzz_targets/differential.rs b/fuzz/fuzz_targets/differential.rs index fa3f37dc1d93..fef388c85bb1 100644 --- a/fuzz/fuzz_targets/differential.rs +++ b/fuzz/fuzz_targets/differential.rs @@ -82,10 +82,8 @@ fn execute_one(data: &[u8]) -> Result<()> { // When fuzzing Winch, explicitly override the compiler strategy, which by // default its arbitrary implementation unconditionally returns // `Cranelift`. - // We also explicitly disable multi-value support. if fuzz_winch { config.wasmtime.compiler_strategy = CompilerStrategy::Winch; - config.module_config.config.multi_value_enabled = false; } // Choose an engine that Wasmtime will be differentially executed against. diff --git a/tests/misc_testsuite/winch/block.wast b/tests/misc_testsuite/winch/block.wast index 6a1a9c65e0da..aa5c966f57a6 100644 --- a/tests/misc_testsuite/winch/block.wast +++ b/tests/misc_testsuite/winch/block.wast @@ -16,6 +16,21 @@ (block (result i32) (i32.const 7)) ) + (func (export "multi") (result i32) + (block (call $dummy) (call $dummy) (call $dummy) (call $dummy)) + (block (result i32) + (call $dummy) (call $dummy) (call $dummy) (i32.const 7) (call $dummy) + ) + (drop) + + (block (result i32 i64 i32) + (call $dummy) (call $dummy) (call $dummy) (i32.const 8) (call $dummy) + (call $dummy) (call $dummy) (call $dummy) (i64.const 7) (call $dummy) + (call $dummy) (call $dummy) (call $dummy) (i32.const 9) (call $dummy) + ) + (drop) (drop) + ) + (func (export "nested") (result i32) (block (result i32) (block (call $dummy) (block) (nop)) @@ -158,6 +173,70 @@ (local.get 0) ) + (func (export "param") (result i32) + (i32.const 1) + (block (param i32) (result i32) + (i32.const 2) + (i32.add) + ) + ) + (func (export "params") (result i32) + (i32.const 1) + (i32.const 2) + (block (param i32 i32) (result i32) + (i32.add) + ) + ) + (func (export "params-id") (result i32) + (i32.const 1) + (i32.const 2) + (block (param i32 i32) (result i32 i32)) + (i32.add) + ) + (func (export "param-break") (result i32) + (i32.const 1) + (block (param i32) (result i32) + (i32.const 2) + (i32.add) + (br 0) + ) + ) + (func (export "params-break") (result i32) + (i32.const 1) + (i32.const 2) + (block (param i32 i32) (result i32) + (i32.add) + (br 0) + ) + ) + (func (export "params-id-break") (result i32) + (i32.const 1) + (i32.const 2) + (block (param i32 i32) (result i32 i32) (br 0)) + (i32.add) + ) + + (type $block-sig-1 (func)) + (type $block-sig-2 (func (result i32))) + (type $block-sig-3 (func (param $x i32))) + (type $block-sig-4 (func (param i32 f64 i32) (result i32 f64 i32))) + + (func (export "type-use") + (block (type $block-sig-1)) + (block (type $block-sig-2) (i32.const 0)) + (block (type $block-sig-3) (drop)) + (i32.const 0) (f64.const 0) (i32.const 0) + (block (type $block-sig-4)) + (drop) (drop) (drop) + (block (type $block-sig-2) (result i32) (i32.const 0)) + (block (type $block-sig-3) (param i32) (drop)) + (i32.const 0) (f64.const 0) (i32.const 0) + (block (type $block-sig-4) + (param i32) (param f64 i32) (result i32 f64) (result i32) + ) + (drop) (drop) (drop) + ) + (func (export "effects") (result i32) (local i32) (block @@ -174,6 +253,7 @@ (assert_return (invoke "empty")) (assert_return (invoke "singular") (i32.const 7)) +(assert_return (invoke "multi") (i32.const 8)) (assert_return (invoke "nested") (i32.const 9)) (assert_return (invoke "deep") (i32.const 150)) (assert_return (invoke "as-if-condition")) @@ -196,3 +276,12 @@ (assert_return (invoke "as-br_table-last") (i32.const 2)) (assert_return (invoke "break-bare") (i32.const 19)) (assert_return (invoke "break-repeated") (i32.const 18)) + +(assert_return (invoke "param") (i32.const 3)) +(assert_return (invoke "params") (i32.const 3)) +(assert_return (invoke "params-id") (i32.const 3)) +(assert_return (invoke "param-break") (i32.const 3)) +(assert_return (invoke "params-break") (i32.const 3)) +(assert_return (invoke "params-id-break") (i32.const 3)) + +(assert_return (invoke "type-use")) diff --git a/tests/misc_testsuite/winch/br.wast b/tests/misc_testsuite/winch/br.wast index 012c203a4e4a..a1e5b7c3447d 100644 --- a/tests/misc_testsuite/winch/br.wast +++ b/tests/misc_testsuite/winch/br.wast @@ -4,6 +4,16 @@ ;; Auxiliary definition (func $dummy) + (func (export "type-i32") (block (drop (i32.ctz (br 0))))) + (func (export "type-i64") (block (drop (i64.ctz (br 0))))) + (func (export "type-f32") (block (drop (f32.neg (br 0))))) + (func (export "type-f64") (block (drop (f64.neg (br 0))))) + (func (export "type-i32-i32") (block (drop (i32.add (br 0))))) + (func (export "type-i64-i64") (block (drop (i64.add (br 0))))) + (func (export "type-f32-f32") (block (drop (f32.add (br 0))))) + (func (export "type-f64-f64") (block (drop (f64.add (br 0))))) + + (func (export "type-i32-value") (result i32) (block (result i32) (i32.ctz (br 0 (i32.const 1)))) ) @@ -11,6 +21,18 @@ (block (result i64) (i64.ctz (br 0 (i64.const 2)))) ) + (func (export "type-f32-value") (result f32) + (block (result f32) (f32.neg (br 0 (f32.const 3)))) + ) + (func (export "type-f64-value") (result f64) + (block (result f64) (f64.neg (br 0 (f64.const 4)))) + ) + (func (export "type-f64-f64-value") (result f64 f64) + (block (result f64 f64) + (f64.add (br 0 (f64.const 4) (f64.const 5))) (f64.const 6) + ) + ) + (func (export "as-block-first") (block (br 0) (call $dummy)) ) @@ -61,6 +83,11 @@ (block (result i64) (return (br 0 (i64.const 7)))) ) + (func (export "as-return-values") (result i32 i64) + (i32.const 2) + (block (result i64) (return (br 0 (i32.const 1) (i64.const 7)))) + ) + (func (export "as-if-cond") (result i32) (block (result i32) (if (result i32) (br 0 (i32.const 2)) @@ -217,8 +244,21 @@ ) ) +(assert_return (invoke "type-i32")) +(assert_return (invoke "type-i64")) +(assert_return (invoke "type-f32")) +(assert_return (invoke "type-f64")) +(assert_return (invoke "type-i32-i32")) +(assert_return (invoke "type-i64-i64")) +(assert_return (invoke "type-f32-f32")) +(assert_return (invoke "type-f64-f64")) + (assert_return (invoke "type-i32-value") (i32.const 1)) (assert_return (invoke "type-i64-value") (i64.const 2)) +(assert_return (invoke "type-f32-value") (f32.const 3)) +(assert_return (invoke "type-f64-value") (f64.const 4)) +(assert_return (invoke "type-f64-f64-value") (f64.const 4) (f64.const 5)) + (assert_return (invoke "as-block-first")) (assert_return (invoke "as-block-mid")) (assert_return (invoke "as-block-last")) @@ -231,6 +271,7 @@ (assert_return (invoke "as-br_if-value") (i32.const 8)) (assert_return (invoke "as-br_if-value-cond") (i32.const 9)) (assert_return (invoke "as-return-value") (i64.const 7)) +(assert_return (invoke "as-return-values") (i32.const 2) (i64.const 7)) (assert_return (invoke "as-if-cond") (i32.const 2)) (assert_return (invoke "as-if-then" (i32.const 1) (i32.const 6)) (i32.const 3)) diff --git a/tests/misc_testsuite/winch/if.wast b/tests/misc_testsuite/winch/if.wast index 8f67dd9b673e..3604ec8a1de6 100644 --- a/tests/misc_testsuite/winch/if.wast +++ b/tests/misc_testsuite/winch/if.wast @@ -19,6 +19,86 @@ (if (result i32) (local.get 0) (then (i32.const 7)) (else (i32.const 8))) ) + (func (export "multi") (param i32) (result i32 i32) + (if (local.get 0) (then (call $dummy) (call $dummy) (call $dummy))) + (if (local.get 0) (then) (else (call $dummy) (call $dummy) (call $dummy))) + (if (result i32) (local.get 0) + (then (call $dummy) (call $dummy) (i32.const 8) (call $dummy)) + (else (call $dummy) (call $dummy) (i32.const 9) (call $dummy)) + ) + + (if (result i32 i64 i32) (local.get 0) + (then + (call $dummy) (call $dummy) (i32.const 1) (call $dummy) + (call $dummy) (call $dummy) (i64.const 2) (call $dummy) + (call $dummy) (call $dummy) (i32.const 3) (call $dummy) + ) + (else + (call $dummy) (call $dummy) (i32.const -1) (call $dummy) + (call $dummy) (call $dummy) (i64.const -2) (call $dummy) + (call $dummy) (call $dummy) (i32.const -3) (call $dummy) + ) + ) + (drop) (drop) + ) + + (func (export "break-multi-value") (param i32) (result i32 i32 i64) + (if (result i32 i32 i64) (local.get 0) + (then + (br 0 (i32.const 18) (i32.const -18) (i64.const 18)) + (i32.const 19) (i32.const -19) (i64.const 19) + ) + (else + (br 0 (i32.const -18) (i32.const 18) (i64.const -18)) + (i32.const -19) (i32.const 19) (i64.const -19) + ) + ) + ) + + (func (export "param") (param i32) (result i32) + (i32.const 1) + (if (param i32) (result i32) (local.get 0) + (then (i32.const 2) (i32.add)) + (else (i32.const -2) (i32.add)) + ) + ) + (func (export "params") (param i32) (result i32) + (i32.const 1) + (i32.const 2) + (if (param i32 i32) (result i32) (local.get 0) + (then (i32.add)) + (else (i32.sub)) + ) + ) + + (func (export "param-break") (param i32) (result i32) + (i32.const 1) + (if (param i32) (result i32) (local.get 0) + (then (i32.const 2) (i32.add) (br 0)) + (else (i32.const -2) (i32.add) (br 0)) + ) + ) + (func (export "params-break") (param i32) (result i32) + (i32.const 1) + (i32.const 2) + (if (param i32 i32) (result i32) (local.get 0) + (then (i32.add) (br 0)) + (else (i32.sub) (br 0)) + ) + ) + (func (export "params-id-break") (param i32) (result i32) + (i32.const 1) + (i32.const 2) + (if (param i32 i32) (result i32 i32) (local.get 0) (then (br 0))) + (i32.add) + ) + (func (export "params-id") (param i32) (result i32) + (i32.const 1) + (i32.const 2) + (if (param i32 i32) (result i32 i32) (local.get 0) (then)) + (i32.add) + ) + (func (export "nested") (param i32 i32) (result i32) (if (result i32) (local.get 0) (then @@ -239,6 +319,34 @@ (if (i32.const 0) (then (unreachable)) (else (br_table 0 (i32.const 0)) (unreachable))) (i32.const 19) ) + + (type $block-sig-1 (func)) + (type $block-sig-2 (func (result i32))) + (type $block-sig-3 (func (param $x i32))) + (type $block-sig-4 (func (param i32 f64 i32) (result i32 f64 i32))) + + (func (export "type-use") + (if (type $block-sig-1) (i32.const 1) (then)) + (if (type $block-sig-2) (i32.const 1) + (then (i32.const 0)) (else (i32.const 2)) + ) + (if (type $block-sig-3) (i32.const 1) (then (drop)) (else (drop))) + (i32.const 0) (f64.const 0) (i32.const 0) + (if (type $block-sig-4) (i32.const 1) (then)) + (drop) (drop) (drop) + (if (type $block-sig-2) (result i32) (i32.const 1) + (then (i32.const 0)) (else (i32.const 2)) + ) + (if (type $block-sig-3) (param i32) (i32.const 1) + (then (drop)) (else (drop)) + ) + (i32.const 0) (f64.const 0) (i32.const 0) + (if (type $block-sig-4) + (param i32) (param f64 i32) (result i32 f64) (result i32) + (i32.const 1) (then) + ) + (drop) (drop) (drop) + ) ) (assert_return (invoke "empty" (i32.const 0))) @@ -251,6 +359,11 @@ (assert_return (invoke "singular" (i32.const 10)) (i32.const 7)) (assert_return (invoke "singular" (i32.const -10)) (i32.const 7)) +(assert_return (invoke "multi" (i32.const 0)) (i32.const 9) (i32.const -1)) +(assert_return (invoke "multi" (i32.const 1)) (i32.const 8) (i32.const 1)) +(assert_return (invoke "multi" (i32.const 13)) (i32.const 8) (i32.const 1)) +(assert_return (invoke "multi" (i32.const -5)) (i32.const 8) (i32.const 1)) + (assert_return (invoke "nested" (i32.const 0) (i32.const 0)) (i32.const 11)) (assert_return (invoke "nested" (i32.const 1) (i32.const 0)) (i32.const 10)) (assert_return (invoke "nested" (i32.const 0) (i32.const 1)) (i32.const 10)) @@ -302,6 +415,27 @@ (assert_return (invoke "break-value" (i32.const 1)) (i32.const 18)) (assert_return (invoke "break-value" (i32.const 0)) (i32.const 21)) +(assert_return (invoke "break-multi-value" (i32.const 0)) + (i32.const -18) (i32.const 18) (i64.const -18) +) +(assert_return (invoke "break-multi-value" (i32.const 1)) + (i32.const 18) (i32.const -18) (i64.const 18) +) + +(assert_return (invoke "param" (i32.const 0)) (i32.const -1)) +(assert_return (invoke "param" (i32.const 1)) (i32.const 3)) +(assert_return (invoke "params" (i32.const 0)) (i32.const -1)) +(assert_return (invoke "params" (i32.const 1)) (i32.const 3)) +(assert_return (invoke "params-id" (i32.const 0)) (i32.const 3)) +(assert_return (invoke "params-id" (i32.const 1)) (i32.const 3)) +(assert_return (invoke "param-break" (i32.const 0)) (i32.const -1)) +(assert_return (invoke "param-break" (i32.const 1)) (i32.const 3)) +(assert_return (invoke "params-break" (i32.const 0)) (i32.const -1)) +(assert_return (invoke "params-break" (i32.const 1)) (i32.const 3)) +(assert_return (invoke "params-id-break" (i32.const 0)) (i32.const 3)) +(assert_return (invoke "params-id-break" (i32.const 1)) (i32.const 3)) + + (assert_return (invoke "effects" (i32.const 1)) (i32.const -14)) (assert_return (invoke "effects" (i32.const 0)) (i32.const -6)) @@ -314,3 +448,4 @@ (assert_return (invoke "as-br_table-last" (i32.const 1)) (i32.const 2)) (assert_return (invoke "break-bare") (i32.const 19)) +(assert_return (invoke "type-use")) diff --git a/tests/misc_testsuite/winch/loop.wast b/tests/misc_testsuite/winch/loop.wast index 71cd6e752ee2..f012d8890315 100644 --- a/tests/misc_testsuite/winch/loop.wast +++ b/tests/misc_testsuite/winch/loop.wast @@ -15,6 +15,17 @@ (loop (result i32) (i32.const 7)) ) + (func (export "multi") (result i32) + (loop (call $dummy) (call $dummy) (call $dummy) (call $dummy)) + (loop (result i32) (call $dummy) (call $dummy) (i32.const 8) (call $dummy)) + (drop) + (loop (result i32 i64 i32) + (call $dummy) (call $dummy) (call $dummy) (i32.const 8) (call $dummy) + (call $dummy) (call $dummy) (call $dummy) (i64.const 7) (call $dummy) + (call $dummy) (call $dummy) (call $dummy) (i32.const 9) (call $dummy) + ) + (drop) (drop) + ) (func (export "nested") (result i32) (loop (result i32) @@ -53,6 +64,17 @@ (i32.const 19) ) + (func (export "break-multi-value") (result i32 i32 i64) + (block (result i32 i32 i64) + (i32.const 0) (i32.const 0) (i64.const 0) + (loop (param i32 i32 i64) + (block (br 2 (i32.const 18) (i32.const -18) (i64.const 18))) + (br 0 (i32.const 20) (i32.const -20) (i64.const 20)) + ) + (i32.const 19) (i32.const -19) (i64.const 19) + ) + ) + (func (export "deep") (result i32) (loop (result i32) (block (result i32) (loop (result i32) (block (result i32) @@ -156,6 +178,75 @@ (local.get 0) ) + (func (export "cont-inner") (result i32) + (local i32) + (local.set 0 (i32.const 0)) + (local.set 0 (i32.add (local.get 0) (loop (result i32) (loop (result i32) (br 1))))) + (local.set 0 (i32.add (local.get 0) (loop (result i32) (i32.ctz (br 0))))) + (local.set 0 (i32.add (local.get 0) (loop (result i32) (i32.ctz (loop (result i32) (br 1)))))) + (local.get 0) + ) + + (func (export "param") (result i32) + (i32.const 1) + (loop (param i32) (result i32) + (i32.const 2) + (i32.add) + ) + ) + (func (export "params") (result i32) + (i32.const 1) + (i32.const 2) + (loop (param i32 i32) (result i32) + (i32.add) + ) + ) + (func (export "params-id") (result i32) + (i32.const 1) + (i32.const 2) + (loop (param i32 i32) (result i32 i32)) + (i32.add) + ) + (func (export "param-break") (result i32) + (local $x i32) + (i32.const 1) + (loop (param i32) (result i32) + (i32.const 4) + (i32.add) + (local.tee $x) + (local.get $x) + (i32.const 10) + (i32.lt_u) + (br_if 0) + ) + ) + (func (export "params-break") (result i32) + (local $x i32) + (i32.const 1) + (i32.const 2) + (loop (param i32 i32) (result i32) + (i32.add) + (local.tee $x) + (i32.const 3) + (local.get $x) + (i32.const 10) + (i32.lt_u) + (br_if 0) + (drop) + ) + ) + (func (export "params-id-break") (result i32) + (local $x i32) + (local.set $x (i32.const 0)) + (i32.const 1) + (i32.const 2) + (loop (param i32 i32) (result i32 i32) + (local.set $x (i32.add (local.get $x) (i32.const 1))) + (br_if 0 (i32.lt_u (local.get $x) (i32.const 10))) + ) + (i32.add) + ) + (func $fx (export "effects") (result i32) (local i32) (block @@ -199,10 +290,55 @@ ) (local.get 1) ) + + (func (export "nesting") (param f32 f32) (result f32) + (local f32 f32) + (block + (loop + (br_if 1 (f32.eq (local.get 0) (f32.const 0))) + (local.set 2 (local.get 1)) + (block + (loop + (br_if 1 (f32.eq (local.get 2) (f32.const 0))) + (br_if 3 (f32.lt (local.get 2) (f32.const 0))) + (local.set 3 (f32.add (local.get 3) (local.get 2))) + (local.set 2 (f32.sub (local.get 2) (f32.const 2))) + (br 0) + ) + ) + (local.set 3 (f32.div (local.get 3) (local.get 0))) + (local.set 0 (f32.sub (local.get 0) (f32.const 1))) + (br 0) + ) + ) + (local.get 3) + ) + + (type $block-sig-1 (func)) + (type $block-sig-2 (func (result i32))) + (type $block-sig-3 (func (param $x i32))) + (type $block-sig-4 (func (param i32 f64 i32) (result i32 f64 i32))) + + (func (export "type-use") + (loop (type $block-sig-1)) + (loop (type $block-sig-2) (i32.const 0)) + (loop (type $block-sig-3) (drop)) + (i32.const 0) (f64.const 0) (i32.const 0) + (loop (type $block-sig-4)) + (drop) (drop) (drop) + (loop (type $block-sig-2) (result i32) (i32.const 0)) + (loop (type $block-sig-3) (param i32) (drop)) + (i32.const 0) (f64.const 0) (i32.const 0) + (loop (type $block-sig-4) + (param i32) (param f64 i32) (result i32 f64) (result i32) + ) + (drop) (drop) (drop) + ) ) (assert_return (invoke "empty")) (assert_return (invoke "singular") (i32.const 7)) +(assert_return (invoke "multi") (i32.const 8)) (assert_return (invoke "nested") (i32.const 9)) (assert_return (invoke "deep") (i32.const 150)) (assert_return (invoke "as-if-condition")) @@ -219,6 +355,12 @@ (assert_return (invoke "as-binary-operand") (i32.const 12)) (assert_return (invoke "as-test-operand") (i32.const 0)) (assert_return (invoke "break-inner") (i32.const 0x1f)) +(assert_return (invoke "param") (i32.const 3)) +(assert_return (invoke "params") (i32.const 3)) +(assert_return (invoke "params-id") (i32.const 3)) +(assert_return (invoke "param-break") (i32.const 13)) +(assert_return (invoke "params-break") (i32.const 12)) +(assert_return (invoke "params-id-break") (i32.const 3)) (assert_return (invoke "effects") (i32.const 1)) (assert_return (invoke "while" (i64.const 0)) (i64.const 1)) (assert_return (invoke "while" (i64.const 1)) (i64.const 1)) @@ -232,6 +374,25 @@ (assert_return (invoke "for" (i64.const 3)) (i64.const 6)) (assert_return (invoke "for" (i64.const 5)) (i64.const 120)) (assert_return (invoke "for" (i64.const 20)) (i64.const 2432902008176640000)) +(assert_return (invoke "nesting" (f32.const 0) (f32.const 7)) (f32.const 0)) +(assert_return (invoke "nesting" (f32.const 7) (f32.const 0)) (f32.const 0)) +(assert_return (invoke "nesting" (f32.const 1) (f32.const 1)) (f32.const 1)) +(assert_return (invoke "nesting" (f32.const 1) (f32.const 2)) (f32.const 2)) +(assert_return (invoke "nesting" (f32.const 1) (f32.const 3)) (f32.const 4)) +(assert_return (invoke "nesting" (f32.const 1) (f32.const 4)) (f32.const 6)) +(assert_return (invoke "nesting" (f32.const 1) (f32.const 100)) (f32.const 2550)) +(assert_return (invoke "nesting" (f32.const 1) (f32.const 101)) (f32.const 2601)) +(assert_return (invoke "nesting" (f32.const 2) (f32.const 1)) (f32.const 1)) +(assert_return (invoke "nesting" (f32.const 3) (f32.const 1)) (f32.const 1)) +(assert_return (invoke "nesting" (f32.const 10) (f32.const 1)) (f32.const 1)) +(assert_return (invoke "nesting" (f32.const 2) (f32.const 2)) (f32.const 3)) +(assert_return (invoke "nesting" (f32.const 2) (f32.const 3)) (f32.const 4)) +(assert_return (invoke "nesting" (f32.const 7) (f32.const 4)) (f32.const 10.3095235825)) +(assert_return (invoke "nesting" (f32.const 7) (f32.const 100)) (f32.const 4381.54785156)) +(assert_return (invoke "nesting" (f32.const 7) (f32.const 101)) (f32.const 2601)) +(assert_return (invoke "break-multi-value") + (i32.const 18) (i32.const -18) (i64.const 18) +) (assert_return (invoke "break-bare") (i32.const 19)) (assert_return (invoke "break-repeated") (i32.const 18)) diff --git a/winch/codegen/src/abi/mod.rs b/winch/codegen/src/abi/mod.rs index 5083b5bd2776..dcfa732db693 100644 --- a/winch/codegen/src/abi/mod.rs +++ b/winch/codegen/src/abi/mod.rs @@ -1,5 +1,3 @@ -//! This module provides all the necessary building blocks for -//! implementing ISA specific ABIs. //! //! # Default ABI //! @@ -232,11 +230,25 @@ impl Default for ABIOperands { /// Machine stack location of the stack results. #[derive(Debug, Copy, Clone)] pub(crate) enum RetArea { - /// Addressed from SP at the given offset. + /// Addressed from the stack pointer at the given offset. SP(SPOffset), /// The address of the results base is stored at a particular, /// well known [LocalSlot]. Slot(LocalSlot), + /// The return area cannot be fully resolved ahead-of-time. + /// If there are results on the stack, this is the default state to which + /// all return areas get initialized to until they can be fully resolved to + /// either a [RetArea::SP] or [RetArea::Slot]. + /// + /// This allows a more explicit differentiation between the existence of + /// a return area versus no return area at all. + Uninit, +} + +impl Default for RetArea { + fn default() -> Self { + Self::Uninit + } } impl RetArea { @@ -260,6 +272,22 @@ impl RetArea { _ => unreachable!(), } } + + /// Returns true if the return area is addressed via the stack pointer. + pub fn is_sp(&self) -> bool { + match self { + Self::SP(_) => true, + _ => false, + } + } + + /// Returns true if the return area is uninitiliazed. + pub fn is_uninit(&self) -> bool { + match self { + Self::Uninit => true, + _ => false, + } + } } /// ABI-specific representation of an [`ABISig`]. @@ -267,33 +295,8 @@ impl RetArea { pub(crate) struct ABIResults { /// The result operands. operands: ABIOperands, -} - -/// Data about the [`ABIResults`]. -/// This struct is meant to be used once the [`ABIResults`] can be -/// materialized to a particular location in the machine stack, -/// if any. -#[derive(Debug, Clone)] -pub(crate) struct ABIResultsData { - /// The results. - pub results: ABIResults, - /// The return pointer, if any. - pub ret_area: Option, -} - -impl ABIResultsData { - /// Create a [`ABIResultsData`] without a stack results base. - pub fn wrap(results: ABIResults) -> Self { - Self { - results, - ret_area: None, - } - } - - /// Unwraps the stack results base. - pub fn unwrap_ret_area(&self) -> &RetArea { - self.ret_area.as_ref().unwrap() - } + /// The return area, if there are results on the stack. + ret_area: Option, } impl ABIResults { @@ -357,7 +360,8 @@ impl ABIResults { /// Create a new [`ABIResults`] from [`ABIOperands`]. pub fn new(operands: ABIOperands) -> Self { - Self { operands } + let ret_area = (operands.bytes > 0).then(|| RetArea::default()); + Self { operands, ret_area } } /// Returns a reference to a [HashSet], which includes @@ -376,6 +380,11 @@ impl ABIResults { self.operands.inner.len() } + /// Returns the length of results on the stack. + pub fn stack_operands_len(&self) -> usize { + self.operands().len() - self.regs().len() + } + /// Get the [`ABIOperand`] result in the nth position. #[cfg(test)] pub fn get(&self, n: usize) -> Option<&ABIOperand> { @@ -400,9 +409,28 @@ impl ABIResults { /// Returns true if the [`ABIResults`] require space on the machine stack /// for results. - pub fn has_stack_results(&self) -> bool { + pub fn on_stack(&self) -> bool { self.operands.bytes > 0 } + + /// Set the return area of the signature. + /// + /// # Panics + /// + /// This function will panic if trying to set a return area if there are + /// no results on the stack or if trying to set an uninitialize return area. + /// This method must only be used when the return area can be fully + /// materialized. + pub fn set_ret_area(&mut self, area: RetArea) { + debug_assert!(self.on_stack()); + debug_assert!(!area.is_uninit()); + self.ret_area = Some(area); + } + + /// Returns a reference to the return area, if any. + pub fn ret_area(&self) -> Option<&RetArea> { + self.ret_area.as_ref() + } } /// ABI-specific representation of an [`ABISig`]. @@ -587,7 +615,7 @@ impl ABISig { /// Returns true if the signature has results on the stack. pub fn has_stack_results(&self) -> bool { - self.results.has_stack_results() + self.results.on_stack() } } diff --git a/winch/codegen/src/codegen/call.rs b/winch/codegen/src/codegen/call.rs index 9df0a1d11c15..a93048e26bbc 100644 --- a/winch/codegen/src/codegen/call.rs +++ b/winch/codegen/src/codegen/call.rs @@ -57,12 +57,12 @@ //! └──────────────────────────────────────────────────┘ ------> Stack pointer when emitting the call use crate::{ - abi::{ABIOperand, ABIResultsData, ABISig, RetArea, ABI}, + abi::{ABIOperand, ABISig, RetArea, ABI}, codegen::{ ptr_type_from_ptr_size, BuiltinFunction, BuiltinType, Callee, CalleeInfo, CodeGenContext, TypedReg, }, - masm::{CalleeKind, MacroAssembler, OperandSize, SPOffset}, + masm::{CalleeKind, MacroAssembler, MemMoveDirection, OperandSize, SPOffset}, reg::Reg, stack::Val, CallingConvention, @@ -92,15 +92,14 @@ impl FnCall { { let callee = resolve(context); let ptr_type = ptr_type_from_ptr_size(context.vmoffsets.ptr.size()); - let sig = Self::get_sig::(&callee, ptr_type); - let sig = sig.as_ref(); - let kind = Self::map(&context.vmoffsets, &callee, sig, context, masm); + let mut sig = Self::get_sig::(&callee, ptr_type); + let kind = Self::map(&context.vmoffsets, &callee, sig.as_ref(), context, masm); context.spill(masm); let ret_area = Self::make_ret_area(&sig, masm); let arg_stack_space = sig.params_stack_size(); let reserved_stack = masm.call(arg_stack_space, |masm| { - Self::assign(sig, ret_area.as_ref(), context, masm); + Self::assign(sig.as_ref(), ret_area.as_ref(), context, masm); kind }); @@ -109,14 +108,18 @@ impl FnCall { _ => {} } - Self::cleanup(sig, reserved_stack, ret_area, masm, context); + Self::cleanup(&mut sig, reserved_stack, ret_area, masm, context); } /// Calculates the return area for the callee, if any. fn make_ret_area(callee_sig: &ABISig, masm: &mut M) -> Option { - callee_sig.results.has_stack_results().then(|| { - masm.reserve_stack(callee_sig.results_stack_size()); - RetArea::sp(masm.sp_offset()) + callee_sig.has_stack_results().then(|| { + let base = masm.sp_offset().as_u32(); + let end = base + callee_sig.results_stack_size(); + if end > base { + masm.reserve_stack(end - base); + } + RetArea::sp(SPOffset::from_u32(end)) }) } @@ -300,7 +303,7 @@ impl FnCall { /// Cleanup stack space, handle multiple results, and free registers after /// emitting the call. fn cleanup( - sig: &ABISig, + sig: &mut Cow<'_, ABISig>, reserved_space: u32, ret_area: Option, masm: &mut M, @@ -330,16 +333,29 @@ impl FnCall { let result_bytes = sig.results_stack_size(); debug_assert!(sp.as_u32() >= stack_consumed + result_bytes); let dst = SPOffset::from_u32(sp.as_u32() - stack_consumed); - masm.memmove(sp, dst, result_bytes); + masm.memmove(sp, dst, result_bytes, MemMoveDirection::LowToHigh); } }; // Free the bytes consumed by the call. masm.free_stack(stack_consumed); - let mut results_data = ABIResultsData::wrap(sig.results.clone()); - results_data.ret_area = ret_area; + if let Some(area) = ret_area { + debug_assert!(!area.is_uninit()); + if stack_consumed > 0 { + sig.to_mut() + .results + .set_ret_area(RetArea::sp(masm.sp_offset())); + } else { + // If theres a return area, and no memory was adjusted + // (memmoved), the offsets should be equal. + debug_assert_eq!(area.unwrap_sp(), masm.sp_offset()); + sig.to_mut().results.set_ret_area(area); + } + } - context.push_abi_results(&results_data, masm); + context.push_abi_results(&sig.results, masm, |results, _, _| { + results.ret_area().copied() + }); } } diff --git a/winch/codegen/src/codegen/context.rs b/winch/codegen/src/codegen/context.rs index 27cba2aae7eb..b6fd06b00325 100644 --- a/winch/codegen/src/codegen/context.rs +++ b/winch/codegen/src/codegen/context.rs @@ -2,7 +2,7 @@ use wasmtime_environ::{VMOffsets, WasmHeapType, WasmType}; use super::ControlStackFrame; use crate::{ - abi::{ABIOperand, ABIResultsData, RetArea, ABI}, + abi::{ABIOperand, ABIResults, RetArea, ABI}, codegen::BuiltinFunctions, frame::Frame, isa::reg::RegClass, @@ -156,7 +156,7 @@ impl<'a, 'builtins> CodeGenContext<'a, 'builtins> { if val.is_mem() { let mem = val.unwrap_mem(); - debug_assert!(mem.slot.offset.as_u32() == masm.sp_offset().as_u32()); + debug_assert_eq!(mem.slot.offset.as_u32(), masm.sp_offset().as_u32()); masm.pop(reg, val.ty().into()); } else { self.move_val_to_reg(&val, reg, masm); @@ -170,7 +170,7 @@ impl<'a, 'builtins> CodeGenContext<'a, 'builtins> { } /// Pops the value stack top and stores it at the specified address. - fn pop_to_addr(&mut self, masm: &mut M, addr: M::Address) { + pub fn pop_to_addr(&mut self, masm: &mut M, addr: M::Address) { let val = self.stack.pop().expect("a value at stack top"); let size: OperandSize = val.ty().into(); match val { @@ -363,14 +363,16 @@ impl<'a, 'builtins> CodeGenContext<'a, 'builtins> { M: MacroAssembler, F: FnMut(&mut M, &mut Self, &mut ControlStackFrame), { - let (_, target_sp) = dest.base_stack_len_and_sp(); + let state = dest.stack_state(); + let target_offset = state.target_offset; + let base_offset = state.base_offset; // Invariant: The SP, must be greater or equal to the target // SP, given that we haven't popped any results by this point // yet. But it may happen in the callback. - assert!(masm.sp_offset().as_u32() >= target_sp.as_u32()); + assert!(masm.sp_offset().as_u32() >= base_offset.as_u32()); f(masm, self, dest); - // The following snippet, pops the stack pointer and to ensure that it + // The following snippet, pops the stack pointer to ensure that it // is correctly placed according to the expectations of the destination // branch. // @@ -390,7 +392,7 @@ impl<'a, 'builtins> CodeGenContext<'a, 'builtins> { // return location according to the ABI (a register for single value // returns and a memory slot for 1+ returns). This could happen in the // callback invocation above if the callback invokes - // `CodeGenContext::pop_abi_results` (e.g. `br` instruction). + // `ControlStackFrame::pop_abi_results` (e.g. `br` instruction). // // After an unconditional jump, the compiler will enter in an // unreachable state; instead of immediately truncating the value stack @@ -399,78 +401,34 @@ impl<'a, 'builtins> CodeGenContext<'a, 'builtins> { // of the value stack once reachability is actually restored. At that // point, the right stack pointer offset will also be restored, which // should match the contents of the value stack. - masm.ensure_sp_for_jump(target_sp); + masm.ensure_sp_for_jump(target_offset); dest.set_as_target(); masm.jmp(*dest.label()); self.reachable = false; } - /// A combination of [Self::pop_abi_results] and [Self::push_abi_results] - /// to be used on conditional branches: br_if and br_table. - pub fn top_abi_results(&mut self, result: &ABIResultsData, masm: &mut M) { - self.pop_abi_results(result, masm); - self.push_abi_results(result, masm); - } - - /// Handles the emission of the ABI result. This function is used at the end - /// of a block or function to pop the results from the value stack into the - /// corresponding ABI result location. - pub fn pop_abi_results(&mut self, data: &ABIResultsData, masm: &mut M) { - let retptr = data - .results - .has_stack_results() - .then(|| match data.unwrap_ret_area() { - RetArea::Slot(slot) => { - let base = self - .without::<_, M, _>(data.results.regs(), masm, |cx, masm| cx.any_gpr(masm)); - let local_addr = masm.local_address(slot); - masm.load_ptr(local_addr, base); - Some(base) - } - _ => None, - }) - .flatten(); - - // Results are popped in reverse order, starting from registers, continuing - // to memory values in order to maintain the value stack ordering invariant. - // See comments in [ABIResults] for more details. - for operand in data.results.operands().iter().rev() { - match operand { - ABIOperand::Reg { reg, .. } => { - let TypedReg { reg, .. } = self.pop_to_reg(masm, Some(*reg)); - self.free_reg(reg); - } - ABIOperand::Stack { offset, .. } => { - let addr = match data.unwrap_ret_area() { - RetArea::SP(base) => { - let slot_offset = base.as_u32() - *offset; - masm.address_from_sp(SPOffset::from_u32(slot_offset)) - } - RetArea::Slot(_) => masm.address_at_reg(retptr.unwrap(), *offset), - }; - - self.pop_to_addr(masm, addr); - } - } - } - - if let Some(reg) = retptr { - self.free_reg(reg); - } - } + /// Push the ABI representation of the results stack. + pub fn push_abi_results( + &mut self, + results: &ABIResults, + masm: &mut M, + mut calculate_ret_area: F, + ) where + M: MacroAssembler, + F: FnMut(&ABIResults, &mut CodeGenContext, &mut M) -> Option, + { + let area = results + .on_stack() + .then(|| calculate_ret_area(&results, self, masm).unwrap()); - /// Push ABI results into the value stack. This function is used at the end - /// of a block or after a function call to push the corresponding ABI - /// results into the value stack. - pub fn push_abi_results(&mut self, data: &ABIResultsData, masm: &mut M) { - for operand in data.results.operands().iter() { + for operand in results.operands().iter() { match operand { ABIOperand::Reg { reg, ty, .. } => { assert!(self.regalloc.reg_available(*reg)); let typed_reg = TypedReg::new(*ty, self.reg(*reg, masm)); self.stack.push(typed_reg.into()); } - ABIOperand::Stack { ty, offset, size } => match data.unwrap_ret_area() { + ABIOperand::Stack { ty, offset, size } => match area.unwrap() { RetArea::SP(sp_offset) => { let slot = StackSlot::new(SPOffset::from_u32(sp_offset.as_u32() - offset), *size); @@ -490,7 +448,7 @@ impl<'a, 'builtins> CodeGenContext<'a, 'builtins> { /// This function is intended to only be used when restoring the code /// generation's reachability state, when handling an unreachable end or /// else. - fn truncate_stack_to(&mut self, target: usize) { + pub fn truncate_stack_to(&mut self, target: usize) { if self.stack.len() > target { self.drop_last(self.stack.len() - target, |regalloc, val| match val { Val::Reg(tr) => regalloc.free(tr.reg), @@ -499,29 +457,6 @@ impl<'a, 'builtins> CodeGenContext<'a, 'builtins> { } } - /// This function ensures that the state of the -- machine and value -- - /// stack is the right one when reaching a control frame branch in which - /// reachability is restored or when reaching the end of a function in an - /// unreachable state. This function is intended to be called when handling - /// an unreachable else or end. - /// - /// This function will truncate the value stack to the length expected by - /// the control frame and will also set the stack pointer offset to - /// reflect the new length of the value stack. - pub fn ensure_stack_state( - &mut self, - masm: &mut M, - frame: &ControlStackFrame, - ) { - let (base_len, base_sp) = frame.base_stack_len_and_sp(); - masm.reset_stack_pointer(base_sp); - self.truncate_stack_to(base_len); - - // The size of the stack sometimes can be less given that locals are - // removed last, and not accounted as part of the [SPOffset]. - debug_assert!(self.stack.sizeof(self.stack.len()) <= base_sp.as_u32()); - } - /// Spill locals and registers to memory. // TODO: optimize the spill range; // At any point in the program, the stack might already contain memory diff --git a/winch/codegen/src/codegen/control.rs b/winch/codegen/src/codegen/control.rs index 43bb0b21b0f9..4387f3665356 100644 --- a/winch/codegen/src/codegen/control.rs +++ b/winch/codegen/src/codegen/control.rs @@ -1,39 +1,196 @@ //! Data structures for control flow emission. //! -//! As of the current implementation, Winch doesn't offer support for the -//! multi-value proposal, which in the context of control flow constructs it -//! means that blocks don't take any params and produce 0 or 1 return. The -//! intention is to implement support for multi-value across the compiler, when -//! that time comes, here are some general changes that will be needed for -//! control flow: -//! -//! * Consider having a copy of the block params on the side, and push them when -//! encountering an else or duplicate the block params upfront. If no else is -//! present, clean the extra copies from the stack. -//! -//! * Eagerly load the block params. Params can flow "downward" as the block -//! results in the case of an empty then or else block: -//! (module -//! (func (export "params") (param i32) (result i32) -//! (i32.const 2) -//! (if (param i32) (result i32) (local.get 0) -//! (then)) -//! (i32.const 3) -//! (i32.add) -//! ) -//! -//! As a future optimization, we could perform a look ahead to the next -//! instruction when reaching any of the comparison instructions. If the next -//! instruction is a control instruction, we could avoid emitting +//! Winch currently doesn't apply any sort of optimizations to control flow, but +//! as a future optimization, for starters, we could perform a look ahead to the +//! next instruction when reaching any of the comparison instructions. If the +//! next instruction is a control instruction, we could avoid emitting //! a [`crate::masm::MacroAssembler::cmp_with_set`] and instead emit //! a conditional jump inline when emitting the control flow instruction. -use super::{CodeGenContext, MacroAssembler, OperandSize}; +use super::{CodeGenContext, OperandSize, Reg, TypedReg}; use crate::{ - abi::ABIResultsData, - codegen::env::BlockTypeInfo, - masm::{IntCmpKind, SPOffset}, + abi::{ABIOperand, ABIResults, ABISig, RetArea, ABI}, + masm::{IntCmpKind, MacroAssembler, MemMoveDirection, RegImm, SPOffset}, + stack::Val, + CallingConvention, }; use cranelift_codegen::MachLabel; +use wasmtime_environ::{WasmFuncType, WasmType}; + +/// Categorization of the type of the block. +#[derive(Debug, Clone)] +pub(crate) enum BlockType { + /// Doesn't produce or consume any values. + Void, + /// Produces a single value. + Single(WasmType), + /// Consumes multiple values and produces multiple values. + Func(WasmFuncType), + /// An already resolved ABI signature. + ABISig(ABISig), +} + +/// Holds all the information about the signature of the block. +#[derive(Debug, Clone)] +pub(crate) struct BlockSig { + /// The type of the block. + pub ty: BlockType, + /// ABI representation of the results of the block. + results: Option, + /// ABI representation of the params of the block interpreted as results. + params: Option, +} + +impl BlockSig { + /// Create a new [BlockSig]. + pub fn new(ty: BlockType) -> Self { + Self { + ty, + results: None, + params: None, + } + } + + /// Create a new [BlockSig] from an [ABISig]. + pub fn from_sig(sig: ABISig) -> Self { + Self { + ty: BlockType::sig(sig), + results: None, + params: None, + } + } + + /// Return the ABI representation of the results of the block. + /// This method will lazily initialize the results if not present. + pub fn results(&mut self) -> &mut ABIResults + where + M: MacroAssembler, + { + if self.ty.is_sig() { + return match &mut self.ty { + BlockType::ABISig(sig) => &mut sig.results, + _ => unreachable!(), + }; + } + + if self.results.is_some() { + return self.results.as_mut().unwrap(); + } + + let results = match &self.ty { + BlockType::Void => ::abi_results(&[], &CallingConvention::Default), + BlockType::Single(ty) => { + ::abi_results(&[*ty], &CallingConvention::Default) + } + BlockType::Func(f) => { + ::abi_results(f.returns(), &CallingConvention::Default) + } + BlockType::ABISig(_) => unreachable!(), + }; + + self.results = Some(results); + self.results.as_mut().unwrap() + } + + /// Construct an ABI result representation of the params of the block. + /// This is needed for loops and for handling cases in which params flow as + /// the block's results, i.e. in the presence of an empty then or else. + pub fn params(&mut self) -> &mut ABIResults + where + M: MacroAssembler, + { + if self.params.is_some() { + return self.params.as_mut().unwrap(); + } + + let params_as_results = match &self.ty { + BlockType::Void | BlockType::Single(_) => { + ::abi_results(&[], &CallingConvention::Default) + } + BlockType::Func(f) => { + ::abi_results(f.params(), &CallingConvention::Default) + } + // Once we have created a block type from a known signature, we + // can't modify its meaning. This should only be used for the + // function body block, in which case there's no need for treating + // params as results. + BlockType::ABISig(_) => unreachable!(), + }; + + self.params = Some(params_as_results); + self.params.as_mut().unwrap() + } + + /// Returns the signature param count. + pub fn param_count(&self) -> usize { + match &self.ty { + BlockType::Void | BlockType::Single(_) => 0, + BlockType::Func(f) => f.params().len(), + BlockType::ABISig(sig) => sig.params_without_retptr().len(), + } + } + + /// Returns the signature return count. + pub fn return_count(&self) -> usize { + match &self.ty { + BlockType::Void => 0, + BlockType::Single(_) => 1, + BlockType::Func(f) => f.returns().len(), + BlockType::ABISig(sig) => sig.results().len(), + } + } +} + +impl BlockType { + /// Create a [BlockType::Void]. + pub fn void() -> Self { + Self::Void + } + + /// Create a [BlockType::Single] from the given [WasmType]. + pub fn single(ty: WasmType) -> Self { + Self::Single(ty) + } + + /// Create a [BlockType::Func] from the given [WasmFuncType]. + pub fn func(ty: WasmFuncType) -> Self { + Self::Func(ty) + } + + /// Create a [BlockType::ABISig]. + pub fn sig(sig: ABISig) -> Self { + Self::ABISig(sig) + } + + /// Returns true if the type of the block is [BlockType::ABISig]. + pub fn is_sig(&self) -> bool { + match self { + Self::ABISig(_) => true, + _ => false, + } + } +} + +/// The expected value and machine stack state when entering and exiting the block. +#[derive(Debug, Default, Copy, Clone)] +pub(crate) struct StackState { + /// The base stack pointer offset. + /// This offset is set when entering the block, after saving any live + /// registers and locals. + /// It is calcuated by substracting the size, in bytes, of any block params + /// to the current stack pointer offset. + pub base_offset: SPOffset, + /// The target stack pointer offset. + /// This offset is calculated by adding the size of the stack results + /// to the base stack pointer offset. + pub target_offset: SPOffset, + /// The base length of the value stack when entering the block. + /// Which is the current length of the value stack minus any block parameters. + pub base_len: usize, + /// The target length of the value stack when exiting the block. + /// Calculate by adding the number of results to the base value stack + /// length. + pub target_len: usize, +} /// Holds the all the metdata to support the emission /// of control flow instructions. @@ -44,42 +201,30 @@ pub(crate) enum ControlStackFrame { cont: MachLabel, /// The exit label of the block. exit: MachLabel, - /// Data about the block's results. - results_data: ABIResultsData, - /// Information about the parameters and returns of the block. - block_type_info: BlockTypeInfo, - /// The length of the value stack at the beginning of the If. - base_stack_len: usize, - /// The stack pointer offset at the beginning of the If. - base_sp: SPOffset, + /// The signature of the block. + sig: BlockSig, + /// The stack state of the block. + stack_state: StackState, /// Local reachability state when entering the block. reachable: bool, }, Else { /// The exit label of the block. exit: MachLabel, - /// Data about the block's results. - results_data: ABIResultsData, - /// Information about the parameters and returns of the block. - block_type_info: BlockTypeInfo, - /// The length of the value stack at the beginning of the Else. - base_stack_len: usize, - /// The stack pointer offset at the beginning of the Else. - base_sp: SPOffset, + /// The signature of the block. + sig: BlockSig, + /// The stack state of the block. + stack_state: StackState, /// Local reachability state when entering the block. reachable: bool, }, Block { /// The block exit label. exit: MachLabel, - /// The length of the value stack at the beginning of the block. - base_stack_len: usize, - /// Data about the block's results. - results_data: ABIResultsData, - /// Information about the parameters and returns of the block. - block_type_info: BlockTypeInfo, - /// The stack pointer offset at the beginning of the Block. - base_sp: SPOffset, + /// The signature of the block. + sig: BlockSig, + /// The stack state of the block. + stack_state: StackState, /// Exit state of the block. /// /// This flag is used to dertermine if a block is a branch @@ -90,69 +235,43 @@ pub(crate) enum ControlStackFrame { Loop { /// The start of the Loop. head: MachLabel, - /// The length of the value stack at the beginning of the Loop. - base_stack_len: usize, - /// The stack pointer offset at the beginning of the Loop. - base_sp: SPOffset, - /// Information about the parameters and returns of the block. - block_type_info: BlockTypeInfo, + /// The stack state of the block. + stack_state: StackState, + /// The signature of the block. + sig: BlockSig, }, } impl ControlStackFrame { /// Returns [`ControlStackFrame`] for an if. pub fn r#if( - results_data: ABIResultsData, - block_type_info: BlockTypeInfo, + sig: BlockSig, masm: &mut M, context: &mut CodeGenContext, ) -> Self { let mut control = Self::If { cont: masm.get_label(), exit: masm.get_label(), - results_data, - block_type_info, + sig, reachable: context.reachable, - base_stack_len: 0, - base_sp: SPOffset::from_u32(0), + stack_state: Default::default(), }; control.emit(masm, context); control } - /// Creates a block that represents the base - /// block for the function body. - pub fn function_body_block( - results_data: ABIResultsData, - block_type_info: BlockTypeInfo, - masm: &mut M, - context: &mut CodeGenContext, - ) -> Self { - Self::Block { - base_stack_len: context.stack.len(), - results_data, - block_type_info, - is_branch_target: false, - exit: masm.get_label(), - base_sp: masm.sp_offset(), - } - } - /// Returns [`ControlStackFrame`] for a block. pub fn block( - results_data: ABIResultsData, - block_type_info: BlockTypeInfo, + sig: BlockSig, masm: &mut M, context: &mut CodeGenContext, ) -> Self { let mut control = Self::Block { - base_stack_len: 0, - results_data, - block_type_info, + sig, is_branch_target: false, exit: masm.get_label(), - base_sp: SPOffset::from_u32(0), + stack_state: Default::default(), }; control.emit(masm, context); @@ -161,15 +280,14 @@ impl ControlStackFrame { /// Returns [`ControlStackFrame`] for a loop. pub fn r#loop( - block_type_info: BlockTypeInfo, + sig: BlockSig, masm: &mut M, context: &mut CodeGenContext, ) -> Self { let mut control = Self::Loop { - base_stack_len: 0, - block_type_info, + stack_state: Default::default(), + sig, head: masm.get_label(), - base_sp: SPOffset::from_u32(0), }; control.emit(masm, context); @@ -177,49 +295,123 @@ impl ControlStackFrame { } fn init(&mut self, masm: &mut M, context: &mut CodeGenContext) { - assert!(self.block_type_info().param_count == 0); - assert!(self.block_type_info().result_count < 2); - // Save any live registers and locals. - context.spill(masm); - self.set_base_stack_len(context.stack.len()); - self.set_base_sp(masm.sp_offset()); - } - - fn set_base_stack_len(&mut self, len: usize) { - use ControlStackFrame::*; + self.calculate_stack_state(context, masm); + // If the block has stack results, immediately resolve the return area + // base. + if self.results::().on_stack() { + let results_base = self.stack_state().target_offset; + self.results::().set_ret_area(RetArea::sp(results_base)); + } - match self { - If { base_stack_len, .. } - | Block { base_stack_len, .. } - | Loop { base_stack_len, .. } => *base_stack_len = len, - _ => {} + if self.is_if() || self.is_loop() { + // Preemptively handle block params as results so that the params + // are correctly placed in memory. This is especially + // important for control flow joins with empty blocks: + // + //(module + // (func (export "params") (param i32) (result i32) + // (i32.const 2) + // (if (param i32) (result i32) (local.get 0) + // (then)) + // (i32.const 3) + // (i32.add) + // ) + //) + let base_offset = self.stack_state().base_offset; + if self.params::().on_stack() { + let offset = base_offset.as_u32() + self.params::().size(); + self.params::() + .set_ret_area(RetArea::sp(SPOffset::from_u32(offset))); + } + Self::top_abi_results_impl( + self.params::(), + context, + masm, + |params: &ABIResults, _, _| params.ret_area().copied(), + ); } } - fn set_base_sp(&mut self, base: SPOffset) { + /// Calculates the [StackState] of the block. + fn calculate_stack_state( + &mut self, + context: &mut CodeGenContext, + masm: &mut M, + ) { use ControlStackFrame::*; + let sig = self.sig(); + // If the block type contains a full [ABISig], do not take into account + // the params, since these are the params of the function that is + // currently being compiled and the value stack doesn't currently + // contain any values anyway. + let param_count = if sig.ty.is_sig() { + 0 + } else { + sig.param_count() + }; + let return_count = sig.return_count(); + debug_assert!(context.stack.len() >= param_count); + let results_size = self.results::().size(); + + // Save any live registers and locals. + context.spill(masm); + + let base_len = context.stack.len() - param_count; + let stack_consumed = context.stack.sizeof(param_count); + let current_sp = masm.sp_offset(); + let base_offset = SPOffset::from_u32(current_sp.as_u32() - stack_consumed); match self { - If { base_sp, .. } | Block { base_sp, .. } | Loop { base_sp, .. } => *base_sp = base, + If { stack_state, .. } | Block { stack_state, .. } | Loop { stack_state, .. } => { + stack_state.base_offset = base_offset; + stack_state.base_len = base_len; + stack_state.target_offset = SPOffset::from_u32(base_offset.as_u32() + results_size); + stack_state.target_len = base_len + return_count; + } _ => {} } } - fn block_type_info(&mut self) -> &BlockTypeInfo { + /// This function ensures that the state of the -- machine and value -- + /// stack is the right one when reaching a control frame branch in which + /// reachability is restored or when reaching the end of a function in an + /// unreachable state. This function is intended to be called when handling + /// an unreachable else or end. + // + /// This function will truncate the value stack to the the base length of + /// the control frame and will also set the stack pointer offset to reflect + /// the offset expected by the target branch. + /// + // NB: This method is assumed to be called *before* pushing any block + // results to the value stack, so that any excess values are cleaned up. + pub fn ensure_stack_state( + &mut self, + masm: &mut M, + context: &mut CodeGenContext, + ) { + let state = self.stack_state(); + // This assumes that at jump sites, the machine stack poiter will be + // adjusted to match the expectations of the target branch (e.g. + // `target_offset`); after performing the jump, the MacroAssembler + // implementation will soft-reset the stack pointer offset to its + // original offset, ensure that other parts of the program have access + // to the right offset, this is especially important in conditional + // branches. + // When restoring reachability we ensure that the MacroAssembler offset + // is set to match the expectations of the target branch, similar to how + // the machine stack pointer was adjusted at jump sites. + masm.reset_stack_pointer(state.target_offset); + // We use the base length, because this function is assumed to be called + // *before* pushing any results to the value stack. This way, any excess + // values will be discarded. + context.truncate_stack_to(state.base_len); + } + + /// Return the type information of the block. + pub fn sig(&self) -> &BlockSig { use ControlStackFrame::*; match self { - If { - block_type_info, .. - } - | Else { - block_type_info, .. - } - | Loop { - block_type_info, .. - } - | Block { - block_type_info, .. - } => block_type_info, + If { sig, .. } | Else { sig, .. } | Loop { sig, .. } | Block { sig, .. } => sig, } } @@ -234,7 +426,13 @@ impl ControlStackFrame { match *self { If { cont, .. } => { // Pop the condition value. - let top = context.pop_to_reg(masm, None); + // Because in the case of Self::If, Self::init, will top the + // branch params, we exclude any result registers from being + // used as the branch test. + let top = + context.without::<_, _, _>(self.params::().regs(), masm, |cx, masm| { + cx.pop_to_reg(masm, None) + }); self.init(masm, context); masm.branch( IntCmpKind::Eq, @@ -257,57 +455,53 @@ impl ControlStackFrame { /// Handles the else branch if the current control stack frame is /// [`ControlStackFrame::If`]. pub fn emit_else(&mut self, masm: &mut M, context: &mut CodeGenContext) { - use ControlStackFrame::*; - match self { - If { - results_data, - base_stack_len, - exit, - block_type_info, - .. - } => { - assert!( - (*base_stack_len + block_type_info.result_count - block_type_info.param_count) - == context.stack.len() - ); - // Before emitting an unconditional jump to the exit branch, - // we handle the result of the if-then block. - context.pop_abi_results(results_data, masm); - // Before binding the else branch, we emit the jump to the end - // label. - masm.jmp(*exit); - // Bind the else branch. - self.bind_else(masm, context.reachable); - } - _ => unreachable!(), - } + debug_assert!(self.is_if()); + let state = self.stack_state(); + + debug_assert_eq!(state.target_len, context.stack.len()); + self.pop_abi_results(context, masm, |results, _, _| results.ret_area().copied()); + masm.jmp(*self.exit_label().unwrap()); + self.bind_else(masm, context); } /// Binds the else branch label and converts `self` to /// [`ControlStackFrame::Else`]. - pub fn bind_else(&mut self, masm: &mut M, reachable: bool) { + pub fn bind_else(&mut self, masm: &mut M, context: &mut CodeGenContext) { use ControlStackFrame::*; match self { If { cont, - results_data, - block_type_info, - base_stack_len, - base_sp, + sig, + stack_state, exit, .. } => { // Bind the else branch. masm.bind(*cont); + // Push the abi results to the value stack, so that they are + // used as params for the else branch. At the beginning of the + // if block, any params are preemptively resolved as results; + // when reaching the else all params are already materialized as + // stack results. As part of ensuring the right state when + // entering the else branch, the following snippet also soft + // resets the stack pointer so that it matches the expectations + // of the else branch: the stack pointer is expected to be at + // the base stack pointer, plus the params stack size in bytes. + let params_size = sig.params::().size(); + context.push_abi_results::(sig.params::(), masm, |params, _, _| { + params.ret_area().copied() + }); + masm.reset_stack_pointer(SPOffset::from_u32( + stack_state.base_offset.as_u32() + params_size, + )); + // Update the stack control frame with an else control frame. *self = ControlStackFrame::Else { exit: *exit, - base_stack_len: *base_stack_len, - reachable, - base_sp: *base_sp, - results_data: results_data.clone(), - block_type_info: *block_type_info, + stack_state: *stack_state, + reachable: context.reachable, + sig: sig.clone(), }; } _ => unreachable!(), @@ -318,52 +512,22 @@ impl ControlStackFrame { pub fn emit_end(&mut self, masm: &mut M, context: &mut CodeGenContext) { use ControlStackFrame::*; match self { - If { - results_data, - base_stack_len, - block_type_info, - .. - } - | Else { - results_data, - base_stack_len, - block_type_info, - .. - } - | Block { - results_data, - base_stack_len, - block_type_info, - .. - } => { - assert!( - (*base_stack_len + block_type_info.result_count - block_type_info.param_count) - == context.stack.len() - ); + If { stack_state, .. } | Else { stack_state, .. } | Block { stack_state, .. } => { + debug_assert_eq!(stack_state.target_len, context.stack.len()); // Before binding the exit label, we handle the block results. - context.pop_abi_results(results_data, masm); + self.pop_abi_results(context, masm, |results, _, _| results.ret_area().copied()); self.bind_end(masm, context); } - Loop { - block_type_info, - base_stack_len, - .. - } => { - assert!( - (*base_stack_len + block_type_info.result_count - block_type_info.param_count) - == context.stack.len() - ); + Loop { stack_state, .. } => { + debug_assert_eq!(stack_state.target_len, context.stack.len()); } } } /// Binds the exit label of the current control stack frame and pushes the /// ABI results to the value stack. - pub fn bind_end(&self, masm: &mut M, context: &mut CodeGenContext) { - // Push the results to the value stack. - if let Some(data) = self.results() { - context.push_abi_results(data, masm); - } + pub fn bind_end(&mut self, masm: &mut M, context: &mut CodeGenContext) { + self.push_abi_results(context, masm); self.bind_exit_label(masm); } @@ -419,17 +583,344 @@ impl ControlStackFrame { /// Returns [`crate::abi::ABIResults`] of the control stack frame /// block. - pub fn results(&self) -> Option<&ABIResultsData> { + pub fn results(&mut self) -> &mut ABIResults + where + M: MacroAssembler, + { use ControlStackFrame::*; match self { - If { results_data, .. } | Else { results_data, .. } | Block { results_data, .. } => { - Some(results_data) + If { sig, .. } | Else { sig, .. } | Block { sig, .. } => sig.results::(), + Loop { sig, .. } => sig.params::(), + } + } + + /// Returns the block params interpreted as [crate::abi::ABIResults]. + pub fn params(&mut self) -> &mut ABIResults + where + M: MacroAssembler, + { + use ControlStackFrame::*; + match self { + If { sig, .. } | Else { sig, .. } | Block { sig, .. } | Loop { sig, .. } => { + sig.params::() } - Loop { .. } => None, } } + /// Orchestrates how blocks results are handled. + /// Results are handled in reverse order, starting from register results + /// continuing to memory values. This guarantees that the stack ordering + /// invariant is maintained. See [ABIResults] for more details. + /// + /// This function will iterate through each result and invoke the provided + /// callback if there are results on the stack. + /// + /// Calculating the return area involves ensuring that there's enough stack + /// space to store the block's results. To make the process of handling + /// multiple results easier, this function will save all live registers and + /// locals right after handling any register results. This will ensure that + /// the top `n` values in the value stack are correctly placed in the memory + /// locations corresponding to multiple stack results. Once the iteration + /// over all the results is done, the stack result area of the block will be + /// updated. + pub fn pop_abi_results( + &mut self, + context: &mut CodeGenContext, + masm: &mut M, + calculate_ret_area: F, + ) where + M: MacroAssembler, + F: FnMut(&ABIResults, &mut CodeGenContext, &mut M) -> Option, + { + Self::pop_abi_results_impl(self.results::(), context, masm, calculate_ret_area) + } + + /// Shared implementation for poppping the ABI results. + /// This is needed because, in some cases, params must be interpreted and + /// used as the results of the block. When emitting code at control flow + /// joins, the block params are interpreted as results, to ensure that they + /// can correctly "flow" as the results of the block. This is especially + /// important in the presence of empty then, else and loop blocks. This + /// interpretation is an internal detail of the control module, and having + /// a shared implementation allows the caller to decide how the + /// results should be interpreted. + pub fn pop_abi_results_impl( + results: &mut ABIResults, + context: &mut CodeGenContext, + masm: &mut M, + mut calculate_ret_area: F, + ) where + M: MacroAssembler, + F: FnMut(&ABIResults, &mut CodeGenContext, &mut M) -> Option, + { + let mut iter = results.operands().iter().rev().peekable(); + + while let Some(ABIOperand::Reg { reg, .. }) = iter.peek() { + let TypedReg { reg, .. } = context.pop_to_reg(masm, Some(*reg)); + context.free_reg(reg); + iter.next().unwrap(); + } + + let ret_area = calculate_ret_area(results, context, masm); + + let retptr = Self::maybe_load_retptr(ret_area.as_ref(), &results, context, masm); + if let Some(area) = ret_area { + if area.is_sp() { + Self::ensure_ret_area(&area, context, masm); + } + } + + if let Some(retptr) = retptr { + while let Some(ABIOperand::Stack { offset, .. }) = iter.peek() { + let addr = masm.address_at_reg(retptr, *offset); + context.pop_to_addr(masm, addr); + iter.next().unwrap(); + } + context.free_reg(retptr); + } + + if let Some(area) = ret_area { + if area.is_sp() { + Self::adjust_stack_results(area, results, context, masm); + } + } + } + + /// Convenience wrapper around [CodeGenContext::push_abi_results] using the + /// results of the current frame. + fn push_abi_results(&mut self, context: &mut CodeGenContext, masm: &mut M) + where + M: MacroAssembler, + { + context.push_abi_results(self.results::(), masm, |results, _, _| { + results.ret_area().copied() + }) + } + + /// Preemptively handles the ABI results of the current frame. + /// This function is meant to be used when emitting control flow with joins, + /// in which it's not possible to know at compile time which branch will be + /// taken. + pub fn top_abi_results( + &mut self, + context: &mut CodeGenContext, + masm: &mut M, + calculate_ret_area: F, + ) where + M: MacroAssembler, + F: FnMut(&ABIResults, &mut CodeGenContext, &mut M) -> Option, + { + Self::top_abi_results_impl::(self.results::(), context, masm, calculate_ret_area) + } + + /// Internal implementation of [Self::top_abi_results]. + /// See [Self::pop_abi_results_impl] on why an internal implementation is + /// needed. + fn top_abi_results_impl( + results: &mut ABIResults, + context: &mut CodeGenContext, + masm: &mut M, + mut calculate_ret_area: F, + ) where + M: MacroAssembler, + F: FnMut(&ABIResults, &mut CodeGenContext, &mut M) -> Option, + { + let mut area = None; + Self::pop_abi_results_impl::(results, context, masm, |r, context, masm| { + area = calculate_ret_area(r, context, masm); + area + }); + // Use the previously calculated area to ensure that the ret area is + // kept in sync between both operations. + context.push_abi_results::(results, masm, |_, _, _| area); + } + + // If the results on the stack are handled via the stack pointer, ensure + // that the stack results are correctly located. In general, since values in + // the value stack are spilled when exiting the block, the top `n` entries + // in the value stack, representing the `n` stack results of the block are + // almost correctly located. However, since constants are not + // spilled, their presence complicate block exits. For this reason, the + // last step for finalizing multiple block results involves: + // * Scanning the value stack from oldest to newest memory values and + // calculating the source and destination of each value, if the source + // is closer to the stack pointer (greater) than the destination, + // perform a memory move of the bytes to its destination, else stop, + // because the memory values are in place. + // * Scanning the value stack from newest to oldest and calculating the + // source and destination of each value, if the source is closer to the + // frame pointer (less) than the destination, perform a memory move of + // the bytes to its destination, else stop, because the memory values + // are in place. + // * Lastly, iterate over the top `n` elements of the value stack which + // and spill any constant values, placing them in their respective + // memory location. + // + // The implementation in Winch is inspired by how this is handled in + // SpiderMonkey's WebAssembly Baseline Compiler: + // https://wingolog.org/archives/2020/04/03/multi-value-webassembly-in-firefox-from-1-to-n + fn adjust_stack_results( + ret_area: RetArea, + results: &ABIResults, + context: &mut CodeGenContext, + masm: &mut M, + ) where + M: MacroAssembler, + { + debug_assert!(ret_area.is_sp()); + let results_offset = ret_area.unwrap_sp(); + + // Start iterating from memory values that are closer to the + // frame pointer (oldest entries first). + for (i, operand) in results.operands().iter().enumerate() { + if operand.is_reg() { + break; + } + + let value_index = (context.stack.len() - results.stack_operands_len()) + i; + let val = context.stack.inner()[value_index]; + + match (val, operand) { + (Val::Memory(mem), ABIOperand::Stack { offset, size, .. }) => { + let dst = results_offset.as_u32() - *offset; + let src = mem.slot.offset; + + // Values are moved from lower (SP) to higher (FP) + // addresses. + if src.as_u32() <= dst { + break; + } + + masm.memmove( + src, + SPOffset::from_u32(dst), + *size, + MemMoveDirection::LowToHigh, + ); + } + _ => {} + } + } + + // Start iterating from memory values that are closer to the + // stack pointer (newest entries first). + for (i, operand) in results + .operands() + .iter() + .rev() + // Skip any register results. + .skip(results.regs().len()) + .enumerate() + { + let value_index = context.stack.len() - i - 1; + let val = context.stack.inner()[value_index]; + match (val, operand) { + (Val::Memory(mem), ABIOperand::Stack { offset, size, .. }) => { + let dst = results_offset.as_u32() - *offset; + let src = mem.slot.offset; + + // Values are moved from higher (FP) to lower (SP) + // addresses. + if src.as_u32() >= dst { + break; + } + + masm.memmove( + src, + SPOffset::from_u32(dst), + *size, + MemMoveDirection::HighToLow, + ); + } + _ => {} + } + } + + // Finally store any constants in the value stack in their respective + // locations. + for operand in results + .operands() + .iter() + .take(results.stack_operands_len()) + .rev() + { + // If we want to do this, we should start from newest, essentially from top to + // bottom in the iteration of the operands. + match (operand, context.stack.peek().unwrap()) { + (ABIOperand::Stack { ty, offset, .. }, Val::I32(v)) => { + let addr = + masm.address_from_sp(SPOffset::from_u32(results_offset.as_u32() - *offset)); + masm.store(RegImm::i32(*v), addr, (*ty).into()); + } + (ABIOperand::Stack { ty, offset, .. }, Val::I64(v)) => { + let addr = + masm.address_from_sp(SPOffset::from_u32(results_offset.as_u32() - *offset)); + masm.store(RegImm::i64(*v), addr, (*ty).into()); + } + (ABIOperand::Stack { ty, offset, .. }, Val::F32(v)) => { + let addr = + masm.address_from_sp(SPOffset::from_u32(results_offset.as_u32() - *offset)); + masm.store(RegImm::f32(v.bits()), addr, (*ty).into()); + } + (ABIOperand::Stack { ty, offset, .. }, Val::F64(v)) => { + let addr = + masm.address_from_sp(SPOffset::from_u32(results_offset.as_u32() - *offset)); + masm.store(RegImm::f64(v.bits()), addr, (*ty).into()); + } + (_, v) => debug_assert!(v.is_mem()), + } + + let _ = context.stack.pop().unwrap(); + } + + // Adjust any excess stack space: the stack space after handling the + // block's results should be the exact amount needed by the return area. + debug_assert!(masm.sp_offset().as_u32() >= results_offset.as_u32()); + masm.free_stack(masm.sp_offset().as_u32() - results_offset.as_u32()); + } + + /// Ensures that there is enough space for return values on the stack. + /// This function is called at the end of all blocks and when branching from + /// within blocks. + fn ensure_ret_area(ret_area: &RetArea, context: &mut CodeGenContext, masm: &mut M) + where + M: MacroAssembler, + { + debug_assert!(ret_area.is_sp()); + // Save any live registers and locals when exiting the block to ensure + // that the respective values are correctly located in memory. + // See [Self::adjust_stack_results] for more details. + context.spill(masm); + if ret_area.unwrap_sp() > masm.sp_offset() { + masm.reserve_stack(ret_area.unwrap_sp().as_u32() - masm.sp_offset().as_u32()) + } + } + + /// Loads the return pointer, if it exists, into the next available register. + fn maybe_load_retptr( + ret_area: Option<&RetArea>, + results: &ABIResults, + context: &mut CodeGenContext, + masm: &mut M, + ) -> Option + where + M: MacroAssembler, + { + ret_area + .map(|area| match area { + RetArea::Slot(slot) => { + let base = context + .without::<_, M, _>(results.regs(), masm, |cx, masm| cx.any_gpr(masm)); + let local_addr = masm.local_address(&slot); + masm.load_ptr(local_addr, base); + Some(base) + } + _ => None, + }) + .flatten() + } + /// This function is used at the end of unreachable code handling /// to determine if the reachability status should be updated. pub fn is_next_sequence_reachable(&self) -> bool { @@ -452,41 +943,17 @@ impl ControlStackFrame { } } - /// Returns the value stack length and stack pointer offset of the - /// control frame registered at entry. - pub fn base_stack_len_and_sp(&self) -> (usize, SPOffset) { + /// Returns a reference to the [StackState] of the block. + pub fn stack_state(&self) -> &StackState { use ControlStackFrame::*; match self { - If { - base_sp, - base_stack_len, - .. - } - | Else { - base_sp, - base_stack_len, - .. - } - | Block { - base_sp, - base_stack_len, - .. - } - | Loop { - base_sp, - base_stack_len, - .. - } => (*base_stack_len, *base_sp), + If { stack_state, .. } + | Else { stack_state, .. } + | Block { stack_state, .. } + | Loop { stack_state, .. } => stack_state, } } - /// Resolves how to handle results when the current frame is a - /// jump target Notably in the case of loops we don't take into - /// account the frame's results. - pub fn as_target_results(&self) -> Option<&ABIResultsData> { - self.results() - } - /// Returns true if the current frame is [ControlStackFrame::If]. pub fn is_if(&self) -> bool { match self { @@ -494,4 +961,12 @@ impl ControlStackFrame { _ => false, } } + + /// Returns true if the current frame is [ControlStackFrame::Loop]. + pub fn is_loop(&self) -> bool { + match self { + Self::Loop { .. } => true, + _ => false, + } + } } diff --git a/winch/codegen/src/codegen/env.rs b/winch/codegen/src/codegen/env.rs index 48a690cf189b..a65cad550bc4 100644 --- a/winch/codegen/src/codegen/env.rs +++ b/winch/codegen/src/codegen/env.rs @@ -1,8 +1,4 @@ -use crate::{ - abi::{ABIResults, ABIResultsData}, - codegen::{BuiltinFunction, OperandSize, ABI}, - CallingConvention, -}; +use crate::codegen::{control, BlockSig, BuiltinFunction, OperandSize}; use std::collections::{ hash_map::Entry::{Occupied, Vacant}, HashMap, @@ -68,34 +64,6 @@ pub struct CalleeInfo { pub index: FuncIndex, } -/// Holds information about a block's param and return count. -#[derive(Default, Debug, Copy, Clone)] -pub(crate) struct BlockTypeInfo { - /// Parameter count. - pub param_count: usize, - /// Result count. - pub result_count: usize, -} - -impl BlockTypeInfo { - /// Creates a [`BlockTypeInfo`] with one result. - pub fn with_single_result() -> Self { - Self { - param_count: 0, - result_count: 1, - } - } - - /// Creates a new [`BlockTypeInfo`] with the given param and result - /// count. - pub fn new(params: usize, results: usize) -> Self { - Self { - param_count: params, - result_count: results, - } - } -} - /// The function environment. /// /// Contains all information about the module and runtime that is accessible to @@ -167,41 +135,20 @@ impl<'a, 'translation, 'data, P: PtrSize> FuncEnv<'a, 'translation, 'data, P> { } } - pub(crate) fn resolve_block_type_info(&self, ty: BlockType) -> BlockTypeInfo { + /// Converts a [wasmparser::BlockType] into a [BlockSig]. + pub(crate) fn resolve_block_sig(&self, ty: BlockType) -> BlockSig { use BlockType::*; match ty { - Empty => BlockTypeInfo::default(), - Type(_) => BlockTypeInfo::with_single_result(), - FuncType(idx) => { - let sig_index = - self.translation.module.types[TypeIndex::from_u32(idx)].unwrap_function(); - let sig = &self.types[sig_index]; - BlockTypeInfo::new(sig.params().len(), sig.returns().len()) - } - } - } - - /// Resolves the type of the block in terms of [`wasmtime_environ::WasmType`]. - // TODO:: - // Profile this operation and if proven to be significantly expensive, - // intern ABIResultsData instead of recreating it every time. - pub(crate) fn resolve_block_results_data(&self, blockty: BlockType) -> ABIResultsData { - use BlockType::*; - match blockty { - Empty => ABIResultsData::wrap(ABIResults::default()), + Empty => BlockSig::new(control::BlockType::void()), Type(ty) => { let ty = self.convert_valtype(ty); - let results = ::abi_results(&[ty], &CallingConvention::Default); - ABIResultsData::wrap(results) + BlockSig::new(control::BlockType::single(ty)) } FuncType(idx) => { let sig_index = self.translation.module.types[TypeIndex::from_u32(idx)].unwrap_function(); - let results = ::abi_results( - &self.types[sig_index].returns(), - &CallingConvention::Default, - ); - ABIResultsData::wrap(results) + let sig = &self.types[sig_index]; + BlockSig::new(control::BlockType::func(sig.clone())) } } } diff --git a/winch/codegen/src/codegen/mod.rs b/winch/codegen/src/codegen/mod.rs index ddbc2bb1330e..5ae66bf84d80 100644 --- a/winch/codegen/src/codegen/mod.rs +++ b/winch/codegen/src/codegen/mod.rs @@ -1,8 +1,8 @@ use crate::{ - abi::{ABIOperand, ABIResultsData, ABISig, RetArea, ABI}, - codegen::BlockTypeInfo, + abi::{ABIOperand, ABISig, RetArea, ABI}, + codegen::BlockSig, isa::reg::Reg, - masm::{IntCmpKind, MacroAssembler, OperandSize, RegImm, TrapCode}, + masm::{IntCmpKind, MacroAssembler, OperandSize, RegImm, SPOffset, TrapCode}, stack::TypedReg, }; use anyhow::Result; @@ -27,7 +27,7 @@ where M: MacroAssembler, { /// The ABI-specific representation of the function signature, excluding results. - sig: ABISig, + pub sig: ABISig, /// The code generation context. pub context: CodeGenContext<'a, 'translation>, @@ -81,24 +81,24 @@ where self.masm.prologue(); self.masm.reserve_stack(self.context.frame.locals_size); - // If the function has multiple returns, assign the corresponding base. - let mut results_data = ABIResultsData::wrap(self.sig.results.clone()); - if self.sig.params.has_retptr() { - results_data.ret_area = - Some(RetArea::slot(self.context.frame.results_base_slot.unwrap())); - } // Once we have emitted the epilogue and reserved stack space for the locals, we push the // base control flow block. - self.control_frames - .push(ControlStackFrame::function_body_block( - results_data, - BlockTypeInfo::new( - self.sig.params_without_retptr().len(), - self.sig.results.len(), - ), - self.masm, - &mut self.context, - )); + self.control_frames.push(ControlStackFrame::block( + BlockSig::from_sig(self.sig.clone()), + self.masm, + &mut self.context, + )); + + // Set the return area of the results *after* initializing the block. In + // the function body block case, we'll treat the results as any other + // case, addressed from the stack pointer, and when ending the function + // the return area will be set to the return pointer. + if self.sig.params.has_retptr() { + self.sig + .results + .set_ret_area(RetArea::slot(self.context.frame.results_base_slot.unwrap())); + } + Ok(()) } @@ -117,34 +117,26 @@ where // if-then branch, but if the `if` was reachable at // entry, the if-else branch will be reachable. self.context.reachable = true; - self.context.ensure_stack_state(self.masm, &frame); - frame.bind_else(self.masm, self.context.reachable); + frame.ensure_stack_state(self.masm, &mut self.context); + frame.bind_else(self.masm, &mut self.context); } } pub fn handle_unreachable_end(&mut self) { - let frame = self.control_frames.pop().unwrap(); + let mut frame = self.control_frames.pop().unwrap(); // We just popped the outermost block. let is_outermost = self.control_frames.len() == 0; + if frame.is_next_sequence_reachable() { self.context.reachable = true; - - self.context.ensure_stack_state(self.masm, &frame); - // If the current frame is the outermost frame, which corresponds to the - // current function's body, only bind the exit label as we don't need to - // push any more values to the value stack, else perform the entire `bind_end` - // process, which involves pushing results to the value stack. - if is_outermost { - frame.bind_exit_label(self.masm); - } else { - frame.bind_end(self.masm, &mut self.context); - } + frame.ensure_stack_state(self.masm, &mut self.context); + frame.bind_end(self.masm, &mut self.context); } else if is_outermost { // If we reach the end of the function in an unreachable // state, perform the necessary cleanup to leave the stack // and SP in the expected state. The compiler can enter // in this state through an infinite loop. - self.context.ensure_stack_state(self.masm, &frame); + frame.ensure_stack_state(self.masm, &mut self.context); } } @@ -288,7 +280,25 @@ where /// Emit the usual function end instruction sequence. fn emit_end(&mut self) -> Result<()> { - assert!(self.context.stack.len() == 0); + // The implicit body block is treated a normal block (it pushes results + // to the stack); so when reaching the end, we pop them taking as + // reference the current function's signature. + let base = SPOffset::from_u32(self.context.frame.locals_size); + if self.context.reachable { + ControlStackFrame::pop_abi_results_impl( + &mut self.sig.results, + &mut self.context, + self.masm, + |results, _, _| results.ret_area().copied(), + ); + } else { + // If we reach the end of the function in a unreachable code state, + // simly truncate to the the expected values. + // The compiler could enter in this state through an infinite loop. + self.context.truncate_stack_to(0); + self.masm.reset_stack_pointer(base); + } + debug_assert_eq!(self.context.stack.len(), 0); self.masm.epilogue(self.context.frame.locals_size); Ok(()) } diff --git a/winch/codegen/src/isa/aarch64/abi.rs b/winch/codegen/src/isa/aarch64/abi.rs index 4102e029c79d..c63927b73dae 100644 --- a/winch/codegen/src/isa/aarch64/abi.rs +++ b/winch/codegen/src/isa/aarch64/abi.rs @@ -96,19 +96,15 @@ impl ABI for Aarch64ABI { let mut params_index_env = RegIndexEnv::default(); let results = Self::abi_results(returns, call_conv); - let params = ABIParams::from::<_, Self>( - params, - 0, - results.has_stack_results(), - |ty, stack_offset| { + let params = + ABIParams::from::<_, Self>(params, 0, results.on_stack(), |ty, stack_offset| { Self::to_abi_operand( ty, stack_offset, &mut params_index_env, ParamsOrReturns::Params, ) - }, - ); + }); ABISig::new(params, results) } diff --git a/winch/codegen/src/isa/x64/abi.rs b/winch/codegen/src/isa/x64/abi.rs index b32b3fefbbb7..fe9009a376e0 100644 --- a/winch/codegen/src/isa/x64/abi.rs +++ b/winch/codegen/src/isa/x64/abi.rs @@ -118,7 +118,7 @@ impl ABI for X64ABI { let params = ABIParams::from::<_, Self>( params, params_stack_offset, - results.has_stack_results(), + results.on_stack(), |ty, stack_offset| { Self::to_abi_operand( ty, @@ -235,7 +235,14 @@ impl X64ABI { let next_stack = if params_or_returns == ParamsOrReturns::Params { align_to(stack_offset, slot_size) + slot_size } else { - align_to(stack_offset, ty_size) + ty_size + // For the default calling convention, we don't type-size align, + // given that results on the stack must match spills generated + // from within the compiler, which are not type-size aligned. + if call_conv.is_default() { + stack_offset + ty_size + } else { + align_to(stack_offset, ty_size) + ty_size + } }; (arg, next_stack) }; diff --git a/winch/codegen/src/isa/x64/masm.rs b/winch/codegen/src/isa/x64/masm.rs index e01eb6d344ea..459a64964851 100644 --- a/winch/codegen/src/isa/x64/masm.rs +++ b/winch/codegen/src/isa/x64/masm.rs @@ -710,7 +710,7 @@ impl Masm for MacroAssembler { } fn epilogue(&mut self, locals_size: u32) { - assert!(self.sp_offset == locals_size); + assert_eq!(self.sp_offset, locals_size); let rsp = rsp(); if locals_size > 0 { diff --git a/winch/codegen/src/masm.rs b/winch/codegen/src/masm.rs index 33d1cc940afa..a2e09b009a65 100644 --- a/winch/codegen/src/masm.rs +++ b/winch/codegen/src/masm.rs @@ -23,8 +23,21 @@ pub(crate) enum RemKind { Unsigned, } +/// The direction to perform the memory move. +#[derive(Debug, Clone, Eq, PartialEq)] +pub(crate) enum MemMoveDirection { + /// From high memory addresses to low memory addresses. + /// Invariant: the source location is closer to the FP than the destination + /// location, which will be closer to the SP. + HighToLow, + /// From low memory addresses to high memory addresses. + /// Invariant: the source location is closer to the SP than the destination + /// location, which will be closer to the FP. + LowToHigh, +} + /// Representation of the stack pointer offset. -#[derive(Copy, Clone, Eq, PartialEq, Debug, PartialOrd, Ord)] +#[derive(Copy, Clone, Eq, PartialEq, Debug, PartialOrd, Ord, Default)] pub struct SPOffset(u32); impl SPOffset { @@ -430,8 +443,11 @@ pub(crate) trait MacroAssembler { /// Performs a memory move of bytes from src to dest. /// Bytes are moved in blocks of 8 bytes, where possible. - fn memmove(&mut self, src: SPOffset, dst: SPOffset, bytes: u32) { - debug_assert!(dst.as_u32() < src.as_u32()); + fn memmove(&mut self, src: SPOffset, dst: SPOffset, bytes: u32, direction: MemMoveDirection) { + match direction { + MemMoveDirection::LowToHigh => debug_assert!(dst.as_u32() < src.as_u32()), + MemMoveDirection::HighToLow => debug_assert!(dst.as_u32() > src.as_u32()), + } // At least 4 byte aligned. debug_assert!(bytes % 4 == 0); let mut remaining = bytes; diff --git a/winch/codegen/src/stack.rs b/winch/codegen/src/stack.rs index e1ade9231b92..7f8ec170c1f5 100644 --- a/winch/codegen/src/stack.rs +++ b/winch/codegen/src/stack.rs @@ -331,23 +331,6 @@ impl Stack { self.inner[partition..].into_iter() } - /// Duplicates the top `n` elements of the stack. - // Will be needed for control flow, it's just not integrated yet. - #[allow(dead_code)] - pub fn dup(&mut self, n: usize) { - let len = self.len(); - assert!(n <= len); - let partition = len - n; - - if n > 0 { - for e in partition..len { - if let Some(v) = self.inner.get(e) { - self.push(*v) - } - } - } - } - /// Pops the top element of the stack, if any. pub fn pop(&mut self) -> Option { self.inner.pop() @@ -396,6 +379,11 @@ impl Stack { &mut self.inner } + /// Get a reference to the inner stack representation. + pub fn inner(&self) -> &Vec { + &self.inner + } + /// Calculates the size of, in bytes, of the top n [Memory] entries /// in the value stack. pub fn sizeof(&self, top: usize) -> u32 { diff --git a/winch/codegen/src/trampoline.rs b/winch/codegen/src/trampoline.rs index bb2f399bda95..6d3e4bcfd75b 100644 --- a/winch/codegen/src/trampoline.rs +++ b/winch/codegen/src/trampoline.rs @@ -9,9 +9,6 @@ // loading/storing the VM context pointer. The real value of the operand size // and VM context type should be derived from the ABI's pointer size. This is // going to be relevant once 32-bit architectures are supported. -// -// TODO: Are guardrails needed for params/results? Especially when dealing -// with the array calling convention. use crate::{ abi::{ABIOperand, ABIParams, ABISig, RetArea, ABI}, codegen::ptr_type_from_ptr_size, @@ -156,7 +153,7 @@ where self.store_results_to_array(&wasm_sig, ret_area.as_ref()); - if wasm_sig.results.has_stack_results() { + if wasm_sig.has_stack_results() { self.masm.free_stack(wasm_sig.results.size()); } @@ -240,7 +237,7 @@ where self.masm.free_stack(reserved_stack); self.forward_results(&wasm_sig, &native_sig, ret_area.as_ref(), offsets.last()); - if wasm_sig.results.has_stack_results() { + if wasm_sig.has_stack_results() { self.masm.free_stack(wasm_sig.results.size()); } self.epilogue_with_callee_saved_restore(spill_size); @@ -250,7 +247,7 @@ where /// Creates the return area in the caller's frame. fn make_ret_area(&mut self, sig: &ABISig) -> Option { - sig.results.has_stack_results().then(|| { + sig.has_stack_results().then(|| { self.masm.reserve_stack(sig.results.size()); let offs = self.masm.sp_offset(); RetArea::sp(offs) @@ -291,7 +288,7 @@ where let results_spill = self.spill(callee_sig.results()); let mut spill_offsets_iter = results_spill.0.iter(); - let caller_retptr = caller_sig.results.has_stack_results().then(|| { + let caller_retptr = caller_sig.has_stack_results().then(|| { let fp = ::fp_reg(); let arg_base: u32 = ::arg_base_offset().into(); match caller_sig.params.unwrap_results_area_operand() { @@ -426,7 +423,7 @@ where self.masm.free_stack(reserved_stack); self.forward_results(&native_sig, &wasm_sig, ret_area.as_ref(), offsets.last()); - if native_sig.results.has_stack_results() { + if native_sig.has_stack_results() { self.masm.free_stack(native_sig.results.size()); } diff --git a/winch/codegen/src/visitor.rs b/winch/codegen/src/visitor.rs index 4afc9a895f58..280fb43cb566 100644 --- a/winch/codegen/src/visitor.rs +++ b/winch/codegen/src/visitor.rs @@ -4,11 +4,11 @@ //! which validates and dispatches to the corresponding //! machine code emitter. -use crate::abi::ABI; +use crate::abi::{RetArea, ABI}; use crate::codegen::{control_index, Callee, CodeGen, ControlStackFrame, FnCall}; use crate::masm::{ - DivKind, ExtendKind, FloatCmpKind, IntCmpKind, MacroAssembler, OperandSize, RegImm, RemKind, - RoundingMode, ShiftKind, + DivKind, ExtendKind, FloatCmpKind, IntCmpKind, MacroAssembler, MemMoveDirection, OperandSize, + RegImm, RemKind, RoundingMode, SPOffset, ShiftKind, }; use crate::stack::{TypedReg, Val}; use cranelift_codegen::ir::TrapCode; @@ -922,19 +922,7 @@ where self.handle_unreachable_end(); } else { let mut control = self.control_frames.pop().unwrap(); - let is_outermost = self.control_frames.len() == 0; - // If it's not the outermost control stack frame, emit the the full "end" sequence, - // which involves, popping results from the value stack, pushing results back to the - // value stack and binding the exit label. - // Else, pop values from the value stack and bind the exit label. - if !is_outermost { - control.emit_end(self.masm, &mut self.context); - } else { - if let Some(data) = control.results() { - self.context.pop_abi_results(data, self.masm); - } - control.bind_exit_label(self.masm); - } + control.emit_end(self.masm, &mut self.context); } } @@ -1340,8 +1328,7 @@ where fn visit_if(&mut self, blockty: BlockType) { self.control_frames.push(ControlStackFrame::r#if( - self.env.resolve_block_results_data::(blockty), - self.env.resolve_block_type_info(blockty), + self.env.resolve_block_sig(blockty), self.masm, &mut self.context, )); @@ -1361,8 +1348,7 @@ where fn visit_block(&mut self, blockty: BlockType) { self.control_frames.push(ControlStackFrame::block( - self.env.resolve_block_results_data::(blockty), - self.env.resolve_block_type_info(blockty), + self.env.resolve_block_sig(blockty), self.masm, &mut self.context, )); @@ -1370,7 +1356,7 @@ where fn visit_loop(&mut self, blockty: BlockType) { self.control_frames.push(ControlStackFrame::r#loop( - self.env.resolve_block_type_info(blockty), + self.env.resolve_block_sig(blockty), self.masm, &mut self.context, )); @@ -1381,9 +1367,8 @@ where let frame = &mut self.control_frames[index]; self.context .unconditional_jump(frame, self.masm, |masm, cx, frame| { - if let Some(r) = frame.as_target_results() { - cx.pop_abi_results(r, masm); - } + frame + .pop_abi_results::(cx, masm, |results, _, _| results.ret_area().copied()); }); } @@ -1392,23 +1377,39 @@ where let frame = &mut self.control_frames[index]; frame.set_as_target(); - let top = if let Some(data) = frame.as_target_results() { + let top = { let top = self.context.without::( - data.results.regs(), + frame.results::().regs(), self.masm, |ctx, masm| ctx.pop_to_reg(masm, None), ); - self.context.top_abi_results(data, self.masm); + frame.top_abi_results::( + &mut self.context, + self.masm, + |results, context, masm| { + // In the case of `br_if` theres a possibility that we'll + // exit early from the block or falltrough, for + // a falltrough, we cannot rely on the pre-computed return area; + // it must be recalculated so that any values that are + // generated are correctly placed near the current stack + // pointer. + results.on_stack().then(|| { + let stack_consumed = context.stack.sizeof(results.stack_operands_len()); + let base = masm.sp_offset().as_u32() - stack_consumed; + let offs = base + results.size(); + RetArea::sp(SPOffset::from_u32(offs)) + }) + }, + ); top - } else { - self.context.pop_to_reg(self.masm, None) }; // Emit instructions to balance the machine stack if the frame has // a different offset. let current_sp_offset = self.masm.sp_offset(); - let (_, frame_sp_offset) = frame.base_stack_len_and_sp(); - let (label, cmp, needs_cleanup) = if current_sp_offset > frame_sp_offset { + let results_size = frame.results::().size(); + let state = frame.stack_state(); + let (label, cmp, needs_cleanup) = if current_sp_offset > state.target_offset { (self.masm.get_label(), IntCmpKind::Eq, true) } else { (*frame.label(), IntCmpKind::Ne, false) @@ -1421,7 +1422,13 @@ where if needs_cleanup { // Emit instructions to balance the stack and jump if not falling // through. - self.masm.ensure_sp_for_jump(frame_sp_offset); + self.masm.memmove( + current_sp_offset, + state.target_offset, + results_size, + MemMoveDirection::LowToHigh, + ); + self.masm.ensure_sp_for_jump(state.target_offset); self.masm.jmp(*frame.label()); // Restore sp_offset to what it was for falling through and emit @@ -1440,24 +1447,22 @@ where let labels: SmallVec<[_; 5]> = (0..len).map(|_| self.masm.get_label()).collect(); let default_index = control_index(targets.default(), self.control_frames.len()); - let default_result = self.control_frames[default_index].as_target_results(); + let default_frame = &mut self.control_frames[default_index]; + let default_result = default_frame.results::(); - let (index, tmp) = if let Some(data) = default_result { + let (index, tmp) = { let index_and_tmp = self.context.without::<(TypedReg, _), M, _>( - data.results.regs(), + default_result.regs(), self.masm, |cx, masm| (cx.pop_to_reg(masm, None), cx.any_gpr(masm)), ); // Materialize any constants or locals into their result representation, // so that when reachability is restored, they are correctly located. - self.context.top_abi_results(data, self.masm); + default_frame.top_abi_results::(&mut self.context, self.masm, |results, _, _| { + results.ret_area().copied() + }); index_and_tmp - } else { - ( - self.context.pop_to_reg(self.masm, None), - self.context.any_gpr(self.masm), - ) }; self.masm.jmp_table(&labels, index.into(), tmp); @@ -1486,8 +1491,8 @@ where self.masm.bind(*l); // Ensure that the stack pointer is correctly positioned before // jumping to the jump table code. - let (_, offset) = frame.base_stack_len_and_sp(); - self.masm.ensure_sp_for_jump(offset); + let state = frame.stack_state(); + self.masm.ensure_sp_for_jump(state.target_offset); self.masm.jmp(*frame.label()); frame.set_as_target(); } @@ -1508,9 +1513,8 @@ where let outermost = &mut self.control_frames[0]; self.context .unconditional_jump(outermost, self.masm, |masm, cx, frame| { - if let Some(data) = frame.as_target_results() { - cx.pop_abi_results(data, masm); - } + frame + .pop_abi_results::(cx, masm, |results, _, _| results.ret_area().copied()); }); }