diff --git a/src/ir/CMakeLists.txt b/src/ir/CMakeLists.txt index 919069770c5..5521529cd91 100644 --- a/src/ir/CMakeLists.txt +++ b/src/ir/CMakeLists.txt @@ -2,6 +2,7 @@ FILE(GLOB ir_HEADERS *.h) set(ir_SOURCES ExpressionAnalyzer.cpp ExpressionManipulator.cpp + constraint.cpp drop.cpp effects.cpp eh-utils.cpp diff --git a/src/ir/abstract.h b/src/ir/abstract.h index 04b04e34223..d0f3794908c 100644 --- a/src/ir/abstract.h +++ b/src/ir/abstract.h @@ -56,7 +56,8 @@ enum Op { GtS, GtU, GeS, - GeU + GeU, + Invalid }; inline bool hasAnyRotateShift(BinaryOp op) { diff --git a/src/ir/constraint.cpp b/src/ir/constraint.cpp new file mode 100644 index 00000000000..3225c98f95a --- /dev/null +++ b/src/ir/constraint.cpp @@ -0,0 +1,133 @@ +/* + * Copyright 2026 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "ir/constraint.h" +#include "ir/properties.h" +#include "wasm.h" + +namespace wasm::constraint { + +namespace { + +// Core comparison of two constraints: whether a => b +// +// Returns a Result, or an empty option if we should keep working (i.e., a +// result of Unknown means we are certain we can just return Unknown). +std::optional checkPair(const Constraint& a, const Constraint& b) { + // A thing always implies itself. + if (a == b) { + return True; + } + + // Comparisons of two constants. + if (auto* aConstant = std::get_if(&a.value)) { + if (auto* bConstant = std::get_if(&b.value)) { + switch (a.op) { + case Abstract::Eq: { + switch (b.op) { + case Abstract::Eq: { + // x == c vs x == c', and we already handled full equality + // earlier, hence c != c', and we found a contradiction. + assert(*aConstant != *bConstant); + return False; + } + case Abstract::Ne: { + // x == c vs x != c'. We can infer the result based on relating c + // and c'. + return *aConstant != *bConstant ? True : False; + } + default: { + } + } + break; + } + case Abstract::Ne: { + switch (b.op) { + case Abstract::Eq: { + // x != c vs x == c'. If c == c', we can infer. + if (*aConstant == *bConstant) { + return False; + } + return {}; + } + case Abstract::Ne: { + // x != c vs x != c', and we already handled full equality + // earlier, hence c != c', and we can infer nothing. + assert(*aConstant != *bConstant); + return {}; + } + default: { + } + } + break; + } + default: { + } + } + } + } + + return {}; +} + +} // anonymous namespace + +Result AndedConstraintSet::check(const Constraint& condition) const { + // Sometimes a single constraint is enough to determine the condition. + for (auto& c : *this) { + if (auto result = checkPair(c, condition)) { + return *result; + } + } + + // TODO smarts for multiple constraints + + // Otherwise, who knows. + return Unknown; +} + +void AndedConstraintSet::fuzzyOr(const AndedConstraintSet& other) { + // If one is empty (no constraints, everything is true, and we can prove + // nothing useful) then it does not add anything to the other. + if (empty()) { + *this = other; + return; + } + if (other.empty()) { + return; + } + + // If this is already implied by current constraints, then it is redundant. + // E.g. if we are { x = 10 } and other is { x >= 0 } then all we need is + // { x >= 0 } as the result of the OR. + if (check(other) == True) { + *this = other; + return; + } + if (other.check(*this) == True) { + return; + } + + // TODO smarts + + // Otherwise, we don't know how to nicely OR these things, and expand to the + // trivial set of no constraints. + clear(); +} + +} // namespace wasm::constraint diff --git a/src/ir/constraint.h b/src/ir/constraint.h new file mode 100644 index 00000000000..9bdf0578ef8 --- /dev/null +++ b/src/ir/constraint.h @@ -0,0 +1,152 @@ +/* + * Copyright 2026 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Constraints on the values of things, like x >=0, x < 42, and x == y. Allows +// inference whether other things are true given a set of constraints, like +// { x == 10 } => { x >= 5 }. +// + +#ifndef wasm_ir_constraint_h +#define wasm_ir_constraint_h + +#include + +#include "ir/abstract.h" +#include "support/inplace_vector.h" +#include "support/utilities.h" +#include "wasm.h" + +namespace wasm::constraint { + +// A value in a constraint, either a local index or literal value. +struct Value : public std::variant { + bool operator==(const Value&) const = default; +}; + +// A constraint: some operation and some value, like "is equal to 17" or "is +// less than local 6". +struct Constraint { + // The operation relating two values, and the values. + Abstract::Op op = Abstract::Invalid; + Value value; + + bool operator==(const Constraint&) const = default; + + operator bool() const { return op != Abstract::Invalid; } +}; + +// We limit constraints to a low number to ensure good performance even with +// simple brute-force solving. +// TODO: use a generic constraint solver..? +inline constexpr std::size_t MaxConstraints = 3; + +// What we infer from one thing about another: true/false, or unknown. +enum Result { True, False, Unknown }; + +// A set of constraints connected by the logical "and" operation. That is, all +// the constraints are simultaneously true about some value. In the examples in +// the comments below, `x` is used for the thing all the constraints are talking +// about, which looks like a local, but it could be a global or a struct field +// or anything else in general. +struct AndedConstraintSet : inplace_vector { + // Check a condition against this set, that is, whether the existing + // constraints prove that it must be true, false, or unknown: whether + // + // { this } => { condition } + // + // https://en.wikipedia.org/wiki/Material_conditional#Truth_table + Result check(const Constraint& condition) const; + + // Check an entire other set. + Result check(const AndedConstraintSet& other) const { + if (other.empty()) { + // The empty set of constraints is always true. + return True; + } + + Result result = Unknown; + for (auto& c : other) { + auto currResult = check(c); + if (currResult == Unknown) { + // If something is unknown, it all is. + return Unknown; + } + if (result == Unknown) { + // This is the first result + result = currResult; + } else if (result != currResult) { + // This is a later result, and different, so give up. + return Unknown; + } + } + return result; + } + + bool full() const { return size() == MaxConstraints; } + + // Add a constraint to the set, ANDed with the others. The caller must make + // sure not to add too many (i.e. it is invalid to call this when full()). + void and_(const Constraint& c) { + assert(!full()); + push_back(c); + } + + // Merge constraints using OR. We cannot represent such a thing directly + // (we only use AND), so we approximate it in a fuzzy way. For example, this + // would be valid: + // + // fuzzyOr({ x == 5 }, { x == 10 }) == { x >= 5 && x <= 10 } + // + // Note how the result here still accepts the values 5 and 10, but it also + // allows more. Formally, this has the following mathematical property: + // + // (X || Y) => fuzzyOr(X, Y) + // + // That is, if X or Y is true, the result of fuzzOr is also true. But the + // reverse is not always the case: fuzzyOr may be true without X || Y being + // true (see the truth table linked above, and the value 8 in the example). + // + // Returning to the example, we can use this to optimize as follows: if + // two code paths reaching a location have x == 5 and x == 10, so the value in + // the merge location is either 5 or 10, then if we see some i32.ge_s that + // does x >= 0 then we can evaluate it with check(): + // + // { x >= 5 && x <= 10 }.check({ x >= 0 }) == True + // + // And it is valid to optimize that i32.ge_s into a constant 1, since + // + // { x == 5 || x == 10 } => + // { x >= 5 && x <= 10 } => + // { x >= 0 } + // + // I.e. the constraints imply the truth of the thing we are evaluating. + // + // Note that the fuzziness here means that fuzzyOr() can do a better or a + // worse job. It is always valid for fuzzyOr to return { } or any other + // always-true thing (see the truth table linked above). But then: + // + // { x == 5 || x == 10 } => + // { } =!!> + // { x >= 0 } + // + // If we become too fuzzy, we lose the ability to imply anything useful. + void fuzzyOr(const AndedConstraintSet& other); +}; + +} // namespace wasm::constraint + +#endif // wasm_ir_constraint_h diff --git a/test/gtest/CMakeLists.txt b/test/gtest/CMakeLists.txt index 4bd358032d7..8db86712bba 100644 --- a/test/gtest/CMakeLists.txt +++ b/test/gtest/CMakeLists.txt @@ -9,6 +9,7 @@ set(unittest_SOURCES arena.cpp cast-check.cpp cfg.cpp + constraint.cpp dataflow.cpp delta_debugging.cpp dfa_minimization.cpp diff --git a/test/gtest/constraint.cpp b/test/gtest/constraint.cpp new file mode 100644 index 00000000000..b43ca3bde01 --- /dev/null +++ b/test/gtest/constraint.cpp @@ -0,0 +1,168 @@ +#include "ir/constraint.h" +#include "ir/abstract.h" +#include "gtest/gtest.h" + +using namespace wasm; +using namespace wasm::Abstract; +using namespace wasm::constraint; + +TEST(ConstraintTest, TestEmpty) { + // An empty constraint is invalid. + Constraint c; + EXPECT_FALSE(c); +} + +TEST(ConstraintTest, TestEq) { + // Sets start empty. + AndedConstraintSet s; + EXPECT_TRUE(s.empty()); + + // x == 5 (we use "x" for the name of the thing being compared, in these + // comments). + Constraint c{Eq, Literal(int32_t(5))}; + + // We can't infer anything using an empty set. + EXPECT_EQ(s.check(c), Unknown); + + // If we add it, then things check out: a thing always proves itself true. + s.and_(c); + EXPECT_EQ(s.size(), 1); + EXPECT_EQ(s.check(c), True); + + // x == 10, a different number: we can infer false. + EXPECT_EQ(s.check(Constraint{Eq, Literal(int32_t(10))}), False); + + // x != 15: we can infer true. + EXPECT_EQ(s.check(Constraint{Ne, Literal(int32_t(15))}), True); + + // x != 5: we can infer false. + EXPECT_EQ(s.check(Constraint{Ne, Literal(int32_t(5))}), False); +} + +TEST(ConstraintTest, TestNe) { + AndedConstraintSet s; + // x != 5 + Constraint c{Ne, Literal(int32_t(5))}; + s.and_(c); + + // Checks out versus itself. + EXPECT_EQ(s.check(c), True); + + // x == 10: we don't know. + EXPECT_EQ(s.check(Constraint{Eq, Literal(int32_t(10))}), Unknown); + + // x != 15: we don't know. + EXPECT_EQ(s.check(Constraint{Ne, Literal(int32_t(15))}), Unknown); + + // x == 5: we can infer false. + EXPECT_EQ(s.check(Constraint{Eq, Literal(int32_t(5))}), False); +} + +TEST(ConstraintTest, TestMulti) { + AndedConstraintSet s; + // x != 5 && x != 10 + Constraint c{Ne, Literal(int32_t(5))}; + Constraint d{Ne, Literal(int32_t(10))}; + s.and_(c); + s.and_(d); + + // Each checks out versus itself. + EXPECT_EQ(s.check(c), True); + EXPECT_EQ(s.check(d), True); + + // x == 5: false. + EXPECT_EQ(s.check(Constraint{Eq, Literal(int32_t(5))}), False); + + // x == 10: false. + EXPECT_EQ(s.check(Constraint{Eq, Literal(int32_t(10))}), False); + + // x == 15: we don't know. + EXPECT_EQ(s.check(Constraint{Eq, Literal(int32_t(15))}), Unknown); + + // x != 15: we don't know. + EXPECT_EQ(s.check(Constraint{Ne, Literal(int32_t(15))}), Unknown); +} + +TEST(ConstraintTest, TestSets) { + // x == 5 + Constraint c{Eq, Literal(int32_t(5))}; + + AndedConstraintSet s; + + // Any set always proves itself to be true. + EXPECT_EQ(s.check(s), True); + + // Ditto after adding something. + s.and_(c); + EXPECT_EQ(s.check(s), True); + + // Another set, empty. + AndedConstraintSet t; + + // Any set always proves an empty set to be true. + EXPECT_EQ(s.check(t), True); + + // Make both sets contain the same stuff. + t.and_(c); + EXPECT_EQ(s.check(t), True); + + // Now t has *different* stuff, x == 10, which given s is false. + t.clear(); + t.and_(Constraint{Eq, Literal(int32_t(10))}); + EXPECT_EQ(s.check(t), False); + + // Same, with x != 10. Now we know it is true. + t.clear(); + t.and_(Constraint{Ne, Literal(int32_t(10))}); + EXPECT_EQ(s.check(t), True); + + // In reverse, we can infer nothing: knowing x != 10 does not say if x == 5. + EXPECT_EQ(t.check(s), Unknown); +} + +TEST(ConstraintTest, TestOrTrivial) { + // { x == 5 } + AndedConstraintSet s; + s.and_(Constraint{Eq, Literal(int32_t(5))}); + + // { } + AndedConstraintSet empty; + + // Anything ORed with the empty set is unchanged. + auto t = s; + t.fuzzyOr(empty); + EXPECT_EQ(t, s); + + // Flipped. + t = empty; + t.fuzzyOr(s); + EXPECT_EQ(t, s); + + // ORing with oneself changes nothing + t = s; + t.fuzzyOr(s); + EXPECT_EQ(t, s); +} + +TEST(ConstraintTest, TestOrImplies) { + // { x == 5 } + AndedConstraintSet s; + s.and_(Constraint{Eq, Literal(int32_t(5))}); + + // { x != 10 } + AndedConstraintSet t; + t.and_(Constraint{Ne, Literal(int32_t(10))}); + + // ORing these leaves us with x != 10. + auto u = s; + u.fuzzyOr(t); + EXPECT_EQ(u, t); + + // Flipped. + u = t; + u.fuzzyOr(s); + EXPECT_EQ(u, t); +} + +// TODO: test a fuzzyOr of { x = 10 } and { x >= 0 }, once we support +// inequalities