diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index e61deed16aa..95c47e7752c 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -2788,69 +2788,79 @@ struct OptimizeInstructions // simple peephole optimizations - all we care about is a single instruction // at a time, and its inputs). bool areConsecutiveInputsEqual(Expression* left, Expression* right) { - // When we look for a tee/get pair, we can consider the fallthrough values - // for the first, as the fallthrough happens last (however, we must use - // NoTeeBrIf as we do not want to look through the tee). We cannot do this - // on the second, however, as there could be effects in the middle. - // TODO: Use effects here perhaps. - left = - Properties::getFallthrough(left, - getPassOptions(), - *getModule(), - Properties::FallthroughBehavior::NoTeeBrIf); - if (areMatchingTeeAndGet(left, right)) { - return true; + // The fallthrough expression of `left` produces its value. That value may + // depend on effects from other non-fallthrough expressions in `left`, but + // those expressions are generally executed before the fallthrough value and + // will affect the values of `left` and `right` equally, so we can ignore + // them. The exceptions are `local.tee` instructions and br_if conditions, + // which execute after the fallthrough and might affect only the value of + // `right`. + EffectAnalyzer interferingEffects(getPassOptions(), *getModule()); + bool matchingTeeAndGet = false; + while (true) { + left = + Properties::getFallthrough(left, + getPassOptions(), + *getModule(), + Properties::FallthroughBehavior::NoTeeBrIf); + if (auto* tee = left->dynCast()) { + assert(tee->isTee()); + // If `right` reads directly from this local.tee, then we know their + // values are the same. We know no children of this tee will be executed + // after it, so we need not look for further effects. But there might be + // interfering sets in previous br_if conditions, so we cannot just + // return here. + if (areMatchingTeeAndGet(left, right)) { + matchingTeeAndGet = true; + left = getFallthrough(left); + break; + } + interferingEffects.visit(tee); + left = tee->value; + continue; + } + if (auto* br = left->dynCast(); br && br->condition) { + assert(br->value); + interferingEffects.walk(br->condition); + left = br->value; + continue; + } + // We have found the real fallthrough expression. + break; } - // Ignore extraneous things and compare them syntactically. We can also - // look at the full fallthrough for both sides now. - auto* originalLeft = left; - left = getFallthrough(left); - auto* originalRight = right; - right = getFallthrough(right); - if (!ExpressionAnalyzer::equal(left, right)) { - return false; + // We similarly want to find the fallthrough expression of `right`. But this + // time, it is the expressions that execute before, not after, the + // fallthrough that can affect its value. + while (true) { + auto* next = Properties::getImmediateFallthrough( + right, getPassOptions(), *getModule()); + if (next == right) { + // We have found the fallthrough expression. + break; + } + // Gather the effects of all the non-fallthrough children of the + // container. + for (auto* child : ChildIterator(right)) { + if (child == next) { + // Skip children that execute after the fallthrough value, such as + // br_if conditions. + break; + } + interferingEffects.walk(child); + } + right = next; } - // We must also not have non-fallthrough effects that invalidate us, such as - // this situation: - // - // (local.get $x) - // (block - // (local.set $x ..) - // (local.get $x) - // ) - // - // The fallthroughs are identical, but the set may cause us to read a - // different value. - if (originalRight != right) { - // TODO: We could be more precise here and ignore right itself in - // originalRightEffects. - auto originalRightEffects = effects(originalRight); - auto rightEffects = effects(right); - if (originalRightEffects.invalidates(rightEffects)) { - return false; - } + // We have both fallthrough expressions. See if they look the same. + if (!matchingTeeAndGet && !ExpressionAnalyzer::equal(left, right)) { + return false; } - // The same, with left, as we can have this situation: - // - // (local.tee $x ..) - // (something using $x) - // ) - // (something using $x) - // - // The fallthroughs are identical, but the tee may cause us to read a - // different value. - if (originalLeft != left) { - auto originalLeftEffects = effects(originalLeft); - // |left == right| here (we would have exited early, otherwise, above), so - // we could compute either. Compute |left| as it might have better cache - // locality. - auto leftEffects = effects(left); - if (originalLeftEffects.invalidates(leftEffects)) { - return false; - } + // They do look the same! Make sure nothing executed in between them can + // affect the value of `right` and make it different from `left`. + if (interferingEffects.orderedBefore(effects(right))) { + return false; } // To be equal, they must also be known to return the same result diff --git a/test/lit/passes/optimize-instructions-struct-rmw.wast b/test/lit/passes/optimize-instructions-struct-rmw.wast index d8ea93ef4b4..4f196b53b5a 100644 --- a/test/lit/passes/optimize-instructions-struct-rmw.wast +++ b/test/lit/passes/optimize-instructions-struct-rmw.wast @@ -1,6 +1,6 @@ ;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. -;; RUN: wasm-opt %s -all --optimize-instructions --preserve-type-order -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt -all --optimize-instructions --preserve-type-order -S -o - | filecheck %s (module ;; CHECK: (type $i32 (shared (struct (field (mut i32))))) @@ -1476,3 +1476,675 @@ ) ) ) + +(module + ;; Test the effect analysis in areConsecutiveInputsEqual by optimizing (or + ;; not) cmpxchg operations with matching expected and replacement operands. + ;; CHECK: (type $struct (shared (struct (field (mut i32))))) + (type $struct (shared (struct (field (mut i32))))) + ;; CHECK: (type $array (shared (array (mut i32)))) + (type $array (shared (array (mut i32)))) + + ;; CHECK: (func $release-store-before (type $2) (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (struct.atomic.get acqrel $struct 0 + ;; CHECK-NEXT: (block (result (ref $struct)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.tee $temp + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (array.atomic.set acqrel $array + ;; CHECK-NEXT: (local.get $array) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $release-store-before (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + (local $temp i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + ;; This tee does not affect the value of the right-hand side, so we can + ;; ignore it. + (local.tee $temp + (block (result i32) + ;; This release store is not ordered before the struct.get, so we know + ;; it cannot influence the struct.get's value, so we can optimize. + (array.atomic.set acqrel $array (local.get $array) (i32.const 0) (i32.const 0)) + (struct.get $struct 0 (local.get $struct)) + ) + ) + (struct.get $struct 0 (local.get $struct)) + ) + ) + + ;; CHECK: (func $acquire-load-before (type $2) (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + ;; CHECK-NEXT: (local $temp i32) + ;; CHECK-NEXT: (struct.atomic.get acqrel $struct 0 + ;; CHECK-NEXT: (block (result (ref $struct)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.tee $temp + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (array.atomic.get acqrel $array + ;; CHECK-NEXT: (local.get $array) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $acquire-load-before (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + (local $temp i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + ;; This tee does not affect the value of the right-hand side, so we can + ;; ignore it. + (local.tee $temp + (block (result i32) + ;; This acquire load is ordered before the struct.get, so can affect + ;; its value. However, it affects both struct.gets equally, so we can + ;; optimize anyway. (The code used to do a coarser check for ordering + ;; here that would prevent optimization.) + (drop (array.atomic.get acqrel $array (local.get $array) (i32.const 0))) + (struct.get $struct 0 (local.get $struct)) + ) + ) + (struct.get $struct 0 (local.get $struct)) + ) + ) + + ;; CHECK: (func $release-store-middle-lhs (type $2) (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + ;; CHECK-NEXT: (block $l (result i32) + ;; CHECK-NEXT: (struct.atomic.get acqrel $struct 0 + ;; CHECK-NEXT: (block (result (ref $struct)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (br_if $l + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (array.atomic.set acqrel $array + ;; CHECK-NEXT: (local.get $array) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $release-store-middle-lhs (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + (block $l (result i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (br_if $l + ;; This is the fallthrough value. + (struct.get $struct 0 (local.get $struct)) + ;; The condition is executed after the fallthrough, so its effects can + ;; affect the right-hand side in principle. However, the release store + ;; does not affect the value, so we can still optimize. + (block (result i32) + (array.atomic.set acqrel $array (local.get $array) (i32.const 0) (i32.const 0)) + (i32.const 0) + ) + ) + (struct.get $struct 0 (local.get $struct)) + ) + ) + ) + + ;; CHECK: (func $acquire-load-middle-lhs (type $2) (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + ;; CHECK-NEXT: (block $l (result i32) + ;; CHECK-NEXT: (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: (br_if $l + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (array.atomic.get acqrel $array + ;; CHECK-NEXT: (local.get $array) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $acquire-load-middle-lhs (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + (block $l (result i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (br_if $l + ;; This is the fallthrough value. + (struct.get $struct 0 (local.get $struct)) + ;; The acquire load could form a synchronization edge that forces the + ;; the right-hand struct.get to observe a different value, so it + ;; blocks optimization. + (block (result i32) + (drop (array.atomic.get acqrel $array (local.get $array) (i32.const 0))) + (i32.const 0) + ) + ) + (struct.get $struct 0 (local.get $struct)) + ) + ) + ) + + ;; CHECK: (func $release-store-middle-rhs (type $2) (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + ;; CHECK-NEXT: (struct.atomic.get acqrel $struct 0 + ;; CHECK-NEXT: (block (result (ref $struct)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (array.atomic.set acqrel $array + ;; CHECK-NEXT: (local.get $array) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $release-store-middle-rhs (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (struct.get $struct 0 (local.get $struct)) + (block (result i32) + ;; This release store is executed between the LHS and RHS struct.gets, + ;; but does not change the value observed by the RHS, so does not block + ;; optimization. + (array.atomic.set acqrel $array (local.get $array) (i32.const 0) (i32.const 0)) + (struct.get $struct 0 (local.get $struct)) + ) + ) + ) + + ;; CHECK: (func $acquire-load-middle-rhs (type $2) (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + ;; CHECK-NEXT: (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (array.atomic.get acqrel $array + ;; CHECK-NEXT: (local.get $array) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $acquire-load-middle-rhs (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (struct.get $struct 0 (local.get $struct)) + (block (result i32) + ;; This acquire load is executed between the LHS and RHS struct.gets and + ;; can create a synchronization edge forcing the RHS to observe a + ;; different value, so it blocks optimization. + (drop (array.atomic.get acqrel $array (local.get $array) (i32.const 0))) + (struct.get $struct 0 (local.get $struct)) + ) + ) + ) + + ;; CHECK: (func $release-store-after (type $2) (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + ;; CHECK-NEXT: (block $l (result i32) + ;; CHECK-NEXT: (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (br_if $l + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (array.atomic.set acqrel $array + ;; CHECK-NEXT: (local.get $array) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $release-store-after (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + (block $l (result i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (struct.get $struct 0 (local.get $struct)) + (br_if $l + ;; This is the RHS value. + (struct.get $struct 0 (local.get $struct)) + ;; The condition is executed after the RHS value is computed, so its + ;; effects cannot affect the values seen by the cmpxchg and can never + ;; block optimization. + ;; TODO: getImmediateFallthroughPtr "helpfully" only allows + ;; fallthrough via br_if when the value and condition can be + ;; reordered, so unnecessarily blocks this optimization. + (block (result i32) + (array.atomic.set acqrel $array (local.get $array) (i32.const 0) (i32.const 0)) + (i32.const 0) + ) + ) + ) + ) + ) + + ;; CHECK: (func $acquire-load-after (type $2) (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + ;; CHECK-NEXT: (block $l (result i32) + ;; CHECK-NEXT: (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (br_if $l + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (array.atomic.get acqrel $array + ;; CHECK-NEXT: (local.get $array) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $acquire-load-after (param $struct (ref $struct)) (param $array (ref $array)) (result i32) + (block $l (result i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (struct.get $struct 0 (local.get $struct)) + (br_if $l + ;; This is the RHS value. + (struct.get $struct 0 (local.get $struct)) + ;; The condition is executed after the RHS value is computed, so its + ;; effects cannot affect the values seen by the cmpxchg and can never + ;; block optimization. + ;; TODO: getImmediateFallthroughPtr "helpfully" only allows + ;; fallthrough via br_if when the value and condition can be + ;; reordered. In this case the value and condition _can_ be reordered, + ;; but the reordering check is currently symmetric, so the + ;; optimization is still blocked. + (block (result i32) + (drop (array.atomic.get acqrel $array (local.get $array) (i32.const 0))) + (i32.const 0) + ) + ) + ) + ) + ) + + ;; CHECK: (func $outer-tee-match (type $3) (param $struct (ref $struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (local $y i32) + ;; CHECK-NEXT: (struct.atomic.get acqrel $struct 0 + ;; CHECK-NEXT: (block (result (ref $struct)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (local.tee $y + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $outer-tee-match (param $struct (ref $struct)) (result i32) + (local $x i32) + (local $y i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (local.tee $x + (local.tee $y + (i32.const 0) + ) + ) + ;; We recognize that this comes from the LHS tee and can optimize. + (local.get $x) + ) + ) + + ;; CHECK: (func $inner-tee-match (type $3) (param $struct (ref $struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (local $y i32) + ;; CHECK-NEXT: (struct.atomic.get acqrel $struct 0 + ;; CHECK-NEXT: (block (result (ref $struct)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (local.tee $y + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $inner-tee-match (param $struct (ref $struct)) (result i32) + (local $x i32) + (local $y i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (local.tee $x + (local.tee $y + (i32.const 0) + ) + ) + ;; We recognize that this comes from the LHS tee and can optimize. + (local.get $y) + ) + ) + + ;; CHECK: (func $br_if-after-tee (type $3) (param $struct (ref $struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (block $l (result i32) + ;; CHECK-NEXT: (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: (br_if $l + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $br_if-after-tee (param $struct (ref $struct)) (result i32) + (local $x i32) + (block $l (result i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (br_if $l + (local.tee $x + (i32.const 0) + ) + ;; The br_if condition is evaluated after the local.tee, so it + ;; interferes and blocks optimization. + (block (result i32) + (local.set $x (i32.const 2)) + (i32.const 0) + ) + ) + (local.get $x) + ) + ) + ) + + ;; CHECK: (func $tee-interference (type $3) (param $struct (ref $struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $tee-interference (param $struct (ref $struct)) (result i32) + (local $x i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (local.tee $x + ;; The fallthrough values depends on $x, so the optimization is blocked. + (i32.add (local.get $x) (i32.const 1)) + ) + (i32.add (local.get $x) (i32.const 1)) + ) + ) + + ;; CHECK: (func $tee-set-before (type $3) (param $struct (ref $struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (struct.atomic.get acqrel $struct 0 + ;; CHECK-NEXT: (block (result (ref $struct)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $tee-set-before (param $struct (ref $struct)) (result i32) + (local $x i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (local.tee $x + (block (result i32) + ;; This set is executed before the tee, so it does not block + ;; optimization. + (local.set $x (i32.const 1)) + (i32.const 0) + ) + ) + (local.get $x) + ) + ) + + ;; CHECK: (func $tee-set-middle-lhs (type $3) (param $struct (ref $struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (block $l (result i32) + ;; CHECK-NEXT: (struct.atomic.get acqrel $struct 0 + ;; CHECK-NEXT: (block (result (ref $struct)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (br_if $l + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $tee-set-middle-lhs (param $struct (ref $struct)) (result i32) + (local $x i32) + (block $l (result i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (local.tee $x + (br_if $l + (i32.const 0) + ;; The condition is evaluated after the LHS value, so this would + ;; normally block optimization. However, it is evaluated before the + ;; local.tee, so actually it is fine. + (block (result i32) + (local.set $x (i32.const 1)) + (i32.const 0) + ) + ) + ) + (local.get $x) + ) + ) + ) + + ;; CHECK: (func $tee-set-middle-rhs (type $3) (param $struct (ref $struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $tee-set-middle-rhs (param $struct (ref $struct)) (result i32) + (local $x i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (local.tee $x + (i32.const 1) + ) + (block (result i32) + ;; This set change the value of $x and blocks optimization. + (local.set $x (i32.const 2)) + (local.get $x) + ) + ) + ) + + ;; CHECK: (func $tee-set-after (type $3) (param $struct (ref $struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (block $l (result i32) + ;; CHECK-NEXT: (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: (local.tee $x + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (br_if $l + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $tee-set-after (param $struct (ref $struct)) (result i32) + (local $x i32) + (block $l (result i32) + (struct.atomic.rmw.cmpxchg acqrel acqrel $struct 0 + (local.get $struct) + (local.tee $x + (i32.const 1) + ) + (br_if $l + (local.get $x) + ;; The condition is evaluated after the value, so its effects do not + ;; block optimization. + ;; TODO: getImmediateFallthroughPtr once again blocks this + ;; optimization. + (block (result i32) + (local.set $x (i32.const 2)) + (i32.const 0) + ) + ) + ) + ) + ) +)