Skip to content

Commit

Permalink
Backport egraph fixes to 17.0.1 (#7889)
Browse files Browse the repository at this point in the history
* Guard recursion in `will_simplify_with_ireduce` (#7882)

Add a test to expose issues with unbounded recursion through `iadd`
during egraph rewrites, and bound the recursion of
`will_simplify_with_ireduce`.

Fixes #7874

Co-authored-by: Nick Fitzgerald <fitzgen@gmail.com>

* Cranelift: Use a fixpoint loop to compute the best value for each eclass (#7859)

* Cranelift: Use a fixpoint loop to compute the best value for each eclass

Fixes #7857

* Remove fixpoint loop early-continue optimization

* Add document describing optimization rule invariants

* Make select optimizations use subsume

* Remove invalid debug assert

* Remove now-unused methods

* Add commutative adds to cost tests

* Add missing subsume uses in egraph rules (#7879)

* Fix a few egraph rules that needed `subsume`

There were a few rules that dropped value references from the LHS
without using subsume. I think they were probably benign as they
produced constant results, but this change is in the spirit of our
revised guidelines for egraph rules.

* Augment egraph rule guideline 2 to talk about constants

* Update release notes

---------

Co-authored-by: Nick Fitzgerald <fitzgen@gmail.com>
  • Loading branch information
elliottt and fitzgen authored Feb 7, 2024
1 parent ab5a448 commit fff54cb
Show file tree
Hide file tree
Showing 10 changed files with 382 additions and 88 deletions.
15 changes: 15 additions & 0 deletions RELEASES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
--------------------------------------------------------------------------------

## 17.0.1

### Fixed

* Fix an egraph elaboration fuzzbug that was allowing values with dependencies
that shouldn't be duplicated to be chosen in a context that would make them
invalid.
[#7859](https://github.com/bytecodealliance/wasmtime/pull/7859)
[#7879](https://github.com/bytecodealliance/wasmtime/pull/7879)
* Fix an egraph rule bug that was allowing unconstrained recursion through the
DFG to run away on large functions.
[#7882](https://github.com/bytecodealliance/wasmtime/pull/7882)

--------------------------------------------------------------------------------

## 17.0.0

Released 2024-01-25
Expand Down
1 change: 1 addition & 0 deletions cranelift/codegen/src/egraph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -700,4 +700,5 @@ pub(crate) struct Stats {
pub(crate) elaborate_func: u64,
pub(crate) elaborate_func_pre_insts: u64,
pub(crate) elaborate_func_post_insts: u64,
pub(crate) elaborate_best_cost_fixpoint_iters: u64,
}
78 changes: 60 additions & 18 deletions cranelift/codegen/src/egraph/cost.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ impl Cost {
const DEPTH_BITS: u8 = 8;
const DEPTH_MASK: u32 = (1 << Self::DEPTH_BITS) - 1;
const OP_COST_MASK: u32 = !Self::DEPTH_MASK;
const MAX_OP_COST: u32 = (Self::OP_COST_MASK >> Self::DEPTH_BITS) - 1;
const MAX_OP_COST: u32 = Self::OP_COST_MASK >> Self::DEPTH_BITS;

pub(crate) fn infinity() -> Cost {
// 2^32 - 1 is, uh, pretty close to infinite... (we use `Cost`
Expand All @@ -86,14 +86,16 @@ impl Cost {
Cost(0)
}

/// Construct a new finite cost from the given parts.
/// Construct a new `Cost` from the given parts.
///
/// The opcode cost is clamped to the maximum value representable.
fn new_finite(opcode_cost: u32, depth: u8) -> Cost {
let opcode_cost = std::cmp::min(opcode_cost, Self::MAX_OP_COST);
let cost = Cost((opcode_cost << Self::DEPTH_BITS) | u32::from(depth));
debug_assert_ne!(cost, Cost::infinity());
cost
/// If the opcode cost is greater than or equal to the maximum representable
/// opcode cost, then the resulting `Cost` saturates to infinity.
fn new(opcode_cost: u32, depth: u8) -> Cost {
if opcode_cost >= Self::MAX_OP_COST {
Self::infinity()
} else {
Cost(opcode_cost << Self::DEPTH_BITS | u32::from(depth))
}
}

fn depth(&self) -> u8 {
Expand All @@ -111,7 +113,7 @@ impl Cost {
/// that satisfies `inst_predicates::is_pure_for_egraph()`.
pub(crate) fn of_pure_op(op: Opcode, operand_costs: impl IntoIterator<Item = Self>) -> Self {
let c = pure_op_cost(op) + operand_costs.into_iter().sum();
Cost::new_finite(c.op_cost(), c.depth().saturating_add(1))
Cost::new(c.op_cost(), c.depth().saturating_add(1))
}
}

Expand All @@ -131,12 +133,9 @@ impl std::ops::Add<Cost> for Cost {
type Output = Cost;

fn add(self, other: Cost) -> Cost {
let op_cost = std::cmp::min(
self.op_cost().saturating_add(other.op_cost()),
Self::MAX_OP_COST,
);
let op_cost = self.op_cost().saturating_add(other.op_cost());
let depth = std::cmp::max(self.depth(), other.depth());
Cost::new_finite(op_cost, depth)
Cost::new(op_cost, depth)
}
}

Expand All @@ -147,11 +146,11 @@ impl std::ops::Add<Cost> for Cost {
fn pure_op_cost(op: Opcode) -> Cost {
match op {
// Constants.
Opcode::Iconst | Opcode::F32const | Opcode::F64const => Cost::new_finite(1, 0),
Opcode::Iconst | Opcode::F32const | Opcode::F64const => Cost::new(1, 0),

// Extends/reduces.
Opcode::Uextend | Opcode::Sextend | Opcode::Ireduce | Opcode::Iconcat | Opcode::Isplit => {
Cost::new_finite(2, 0)
Cost::new(2, 0)
}

// "Simple" arithmetic.
Expand All @@ -163,9 +162,52 @@ fn pure_op_cost(op: Opcode) -> Cost {
| Opcode::Bnot
| Opcode::Ishl
| Opcode::Ushr
| Opcode::Sshr => Cost::new_finite(3, 0),
| Opcode::Sshr => Cost::new(3, 0),

// Everything else (pure.)
_ => Cost::new_finite(4, 0),
_ => Cost::new(4, 0),
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn add_cost() {
let a = Cost::new(5, 2);
let b = Cost::new(37, 3);
assert_eq!(a + b, Cost::new(42, 3));
assert_eq!(b + a, Cost::new(42, 3));
}

#[test]
fn add_infinity() {
let a = Cost::new(5, 2);
let b = Cost::infinity();
assert_eq!(a + b, Cost::infinity());
assert_eq!(b + a, Cost::infinity());
}

#[test]
fn op_cost_saturates_to_infinity() {
let a = Cost::new(Cost::MAX_OP_COST - 10, 2);
let b = Cost::new(11, 2);
assert_eq!(a + b, Cost::infinity());
assert_eq!(b + a, Cost::infinity());
}

#[test]
fn depth_saturates_to_max_depth() {
let a = Cost::new(10, u8::MAX);
let b = Cost::new(10, 1);
assert_eq!(
Cost::of_pure_op(Opcode::Iconst, [a, b]),
Cost::new(21, u8::MAX)
);
assert_eq!(
Cost::of_pure_op(Opcode::Iconst, [b, a]),
Cost::new(21, u8::MAX)
);
}
}
149 changes: 111 additions & 38 deletions cranelift/codegen/src/egraph/elaborate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use super::Stats;
use crate::dominator_tree::DominatorTree;
use crate::fx::{FxHashMap, FxHashSet};
use crate::hash_map::Entry as HashEntry;
use crate::inst_predicates::is_pure_for_egraph;
use crate::ir::{Block, Function, Inst, Value, ValueDef};
use crate::loop_analysis::{Loop, LoopAnalysis};
use crate::scoped_hash_map::ScopedHashMap;
Expand Down Expand Up @@ -216,46 +217,112 @@ impl<'a> Elaborator<'a> {

fn compute_best_values(&mut self) {
let best = &mut self.value_to_best_value;
for (value, def) in self.func.dfg.values_and_defs() {
trace!("computing best for value {:?} def {:?}", value, def);
match def {
ValueDef::Union(x, y) => {
// Pick the best of the two options based on
// min-cost. This works because each element of `best`
// is a `(cost, value)` tuple; `cost` comes first so
// the natural comparison works based on cost, and
// breaks ties based on value number.
trace!(" -> best of {:?} and {:?}", best[x], best[y]);
best[value] = std::cmp::min(best[x], best[y]);
trace!(" -> {:?}", best[value]);
}
ValueDef::Param(_, _) => {
best[value] = BestEntry(Cost::zero(), value);
}
// If the Inst is inserted into the layout (which is,
// at this point, only the side-effecting skeleton),
// then it must be computed and thus we give it zero
// cost.
ValueDef::Result(inst, _) => {
if let Some(_) = self.func.layout.inst_block(inst) {
best[value] = BestEntry(Cost::zero(), value);
} else {
trace!(" -> value {}: result, computing cost", value);
let inst_data = &self.func.dfg.insts[inst];
// N.B.: at this point we know that the opcode is
// pure, so `pure_op_cost`'s precondition is
// satisfied.
let cost = Cost::of_pure_op(
inst_data.opcode(),
self.func.dfg.inst_values(inst).map(|value| best[value].0),

// Do a fixpoint loop to compute the best value for each eclass.
//
// The maximum number of iterations is the length of the longest chain
// of `vNN -> vMM` edges in the dataflow graph where `NN < MM`, so this
// is *technically* quadratic, but `cranelift-frontend` won't construct
// any such edges. NaN canonicalization will introduce some of these
// edges, but they are chains of only two or three edges. So in
// practice, we *never* do more than a handful of iterations here unless
// (a) we parsed the CLIF from text and the text was funkily numbered,
// which we don't really care about, or (b) the CLIF producer did
// something weird, in which case it is their responsibility to stop
// doing that.
trace!("Entering fixpoint loop to compute the best values for each eclass");
let mut keep_going = true;
while keep_going {
keep_going = false;
trace!(
"fixpoint iteration {}",
self.stats.elaborate_best_cost_fixpoint_iters
);
self.stats.elaborate_best_cost_fixpoint_iters += 1;

for (value, def) in self.func.dfg.values_and_defs() {
trace!("computing best for value {:?} def {:?}", value, def);
let orig_best_value = best[value];

match def {
ValueDef::Union(x, y) => {
// Pick the best of the two options based on
// min-cost. This works because each element of `best`
// is a `(cost, value)` tuple; `cost` comes first so
// the natural comparison works based on cost, and
// breaks ties based on value number.
best[value] = std::cmp::min(best[x], best[y]);
trace!(
" -> best of union({:?}, {:?}) = {:?}",
best[x],
best[y],
best[value]
);
best[value] = BestEntry(cost, value);
}
}
};
debug_assert_ne!(best[value].0, Cost::infinity());
debug_assert_ne!(best[value].1, Value::reserved_value());
trace!("best for eclass {:?}: {:?}", value, best[value]);
ValueDef::Param(_, _) => {
best[value] = BestEntry(Cost::zero(), value);
}
// If the Inst is inserted into the layout (which is,
// at this point, only the side-effecting skeleton),
// then it must be computed and thus we give it zero
// cost.
ValueDef::Result(inst, _) => {
if let Some(_) = self.func.layout.inst_block(inst) {
best[value] = BestEntry(Cost::zero(), value);
} else {
let inst_data = &self.func.dfg.insts[inst];
// N.B.: at this point we know that the opcode is
// pure, so `pure_op_cost`'s precondition is
// satisfied.
let cost = Cost::of_pure_op(
inst_data.opcode(),
self.func.dfg.inst_values(inst).map(|value| best[value].0),
);
best[value] = BestEntry(cost, value);
trace!(" -> cost of value {} = {:?}", value, cost);
}
}
};

// Keep on iterating the fixpoint loop while we are finding new
// best values.
keep_going |= orig_best_value != best[value];
}
}

if cfg!(any(feature = "trace-log", debug_assertions)) {
trace!("finished fixpoint loop to compute best value for each eclass");
for value in self.func.dfg.values() {
trace!("-> best for eclass {:?}: {:?}", value, best[value]);
debug_assert_ne!(best[value].1, Value::reserved_value());
// You might additionally be expecting an assert that the best
// cost is not infinity, however infinite cost *can* happen in
// practice. First, note that our cost function doesn't know
// about any shared structure in the dataflow graph, it only
// sums operand costs. (And trying to avoid that by deduping a
// single operation's operands is a losing game because you can
// always just add one indirection and go from `add(x, x)` to
// `add(foo(x), bar(x))` to hide the shared structure.) Given
// that blindness to sharing, we can make cost grow
// exponentially with a linear sequence of operations:
//
// v0 = iconst.i32 1 ;; cost = 1
// v1 = iadd v0, v0 ;; cost = 3 + 1 + 1
// v2 = iadd v1, v1 ;; cost = 3 + 5 + 5
// v3 = iadd v2, v2 ;; cost = 3 + 13 + 13
// v4 = iadd v3, v3 ;; cost = 3 + 29 + 29
// v5 = iadd v4, v4 ;; cost = 3 + 61 + 61
// v6 = iadd v5, v5 ;; cost = 3 + 125 + 125
// ;; etc...
//
// Such a chain can cause cost to saturate to infinity. How do
// we choose which e-node is best when there are multiple that
// have saturated to infinity? It doesn't matter. As long as
// invariant (2) for optimization rules is upheld by our rule
// set (see `cranelift/codegen/src/opts/README.md`) it is safe
// to choose *any* e-node in the e-class. At worst we will
// produce suboptimal code, but never an incorrectness.
}
}
}

Expand Down Expand Up @@ -606,7 +673,13 @@ impl<'a> Elaborator<'a> {
}
inst
};

// Place the inst just before `before`.
debug_assert!(
is_pure_for_egraph(self.func, inst),
"something has gone very wrong if we are elaborating effectful \
instructions, they should have remained in the skeleton"
);
self.func.layout.insert_inst(inst, before);

// Update the inst's arguments.
Expand Down
Loading

0 comments on commit fff54cb

Please sign in to comment.