Skip to content

Commit

Permalink
Cranelift: Fix union node bitpacking (#7465)
Browse files Browse the repository at this point in the history
* Cranelift: Fix union node bitpacking

It turns out we have just been taking the newest rewrite's value for a eclass
union and never actually comparing costs and taking the value with the minimum
cost. Whoops!

Fixing this made some test expectations fail, which we resolved by tweaking the
cost function to give materializing constants nonzero cost. This way we prefer
`-x` to `0 - x`.

We also made elaboration function break ties between values with the same cost
with the value index. It prefers larger value indices, since the original
value's index will be lower than all of its rewritten values' indices. This
heuristically prefers rewritten values because we hope our rewrites are all
improvements even when the cost function can't show that.

Co-Authored-By: Chris Fallin <chris@cfallin.org>
Co-Authored-By: Trevor Elliott <telliott@fastly.com>

* Add more information to assertion message

* Fix off-by-one bug in assertion

* Limit number of matches consumed from ISLE

We generally want to clamp down and avoid runaway behavior here.

But there also seems to be some sort of rustc/llvm bug on Rust 1.71 that is
causing iteration to wild here. This commit avoids that bug.

* Update test expectation

* prtest:full

---------

Co-authored-by: Chris Fallin <chris@cfallin.org>
Co-authored-by: Trevor Elliott <telliott@fastly.com>
  • Loading branch information
3 people committed Nov 7, 2023
1 parent a2d5b53 commit e39c6b7
Show file tree
Hide file tree
Showing 11 changed files with 70 additions and 128 deletions.
16 changes: 16 additions & 0 deletions cranelift/codegen/src/egraph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -230,19 +230,35 @@ where
return orig_value;
}
isle_ctx.ctx.rewrite_depth += 1;
trace!(
"Incrementing rewrite depth; now {}",
isle_ctx.ctx.rewrite_depth
);

// Invoke the ISLE toplevel constructor, getting all new
// values produced as equivalents to this value.
trace!("Calling into ISLE with original value {}", orig_value);
isle_ctx.ctx.stats.rewrite_rule_invoked += 1;
let mut optimized_values =
crate::opts::generated_code::constructor_simplify(&mut isle_ctx, orig_value);
trace!(
" -> returned from ISLE, optimized values's size hint = {:?}",
optimized_values.size_hint()
);

// Create a union of all new values with the original (or
// maybe just one new value marked as "subsuming" the
// original, if present.)
let mut i = 0;
let mut union_value = orig_value;
while let Some(optimized_value) = optimized_values.next(&mut isle_ctx) {
i += 1;
const MATCHES_LIMIT: u32 = 5;
if i > MATCHES_LIMIT {
trace!("Reached maximum matches limit; too many optimized values, ignoring rest.");
break;
}

trace!(
"Returned from ISLE for {}, got {:?}",
orig_value,
Expand Down
11 changes: 7 additions & 4 deletions cranelift/codegen/src/egraph/cost.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,13 @@ impl std::ops::Add<Cost> for Cost {
pub(crate) fn pure_op_cost(op: Opcode) -> Cost {
match op {
// Constants.
Opcode::Iconst | Opcode::F32const | Opcode::F64const => Cost(0),
Opcode::Iconst | Opcode::F32const | Opcode::F64const => Cost(1),

// Extends/reduces.
Opcode::Uextend | Opcode::Sextend | Opcode::Ireduce | Opcode::Iconcat | Opcode::Isplit => {
Cost(1)
Cost(2)
}

// "Simple" arithmetic.
Opcode::Iadd
| Opcode::Isub
Expand All @@ -84,8 +86,9 @@ pub(crate) fn pure_op_cost(op: Opcode) -> Cost {
| Opcode::Bnot
| Opcode::Ishl
| Opcode::Ushr
| Opcode::Sshr => Cost(2),
| Opcode::Sshr => Cost(3),

// Everything else (pure.)
_ => Cost(3),
_ => Cost(4),
}
}
34 changes: 28 additions & 6 deletions cranelift/codegen/src/egraph/elaborate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ pub(crate) struct Elaborator<'a> {
value_to_elaborated_value: ScopedHashMap<Value, ElaboratedValue>,
/// Map from Value to the best (lowest-cost) Value in its eclass
/// (tree of union value-nodes).
value_to_best_value: SecondaryMap<Value, (Cost, Value)>,
value_to_best_value: SecondaryMap<Value, BestEntry>,
/// Stack of blocks and loops in current elaboration path.
loop_stack: SmallVec<[LoopStackEntry; 8]>,
/// The current block into which we are elaborating.
Expand All @@ -64,6 +64,28 @@ pub(crate) struct Elaborator<'a> {
stats: &'a mut Stats,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
struct BestEntry(Cost, Value);

impl PartialOrd for BestEntry {
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
Some(self.cmp(other))
}
}

impl Ord for BestEntry {
#[inline]
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.0.cmp(&other.0).then_with(|| {
// Note that this comparison is reversed. When costs are equal,
// prefer the value with the bigger index. This is a heuristic that
// prefers results of rewrites to the original value, since we
// expect that our rewrites are generally improvements.
self.1.cmp(&other.1).reverse()
})
}
}

#[derive(Clone, Copy, Debug)]
struct ElaboratedValue {
in_block: Block,
Expand Down Expand Up @@ -120,7 +142,7 @@ impl<'a> Elaborator<'a> {
) -> Self {
let num_values = func.dfg.num_values();
let mut value_to_best_value =
SecondaryMap::with_default((Cost::infinity(), Value::reserved_value()));
SecondaryMap::with_default(BestEntry(Cost::infinity(), Value::reserved_value()));
value_to_best_value.resize(num_values);
Self {
func,
Expand Down Expand Up @@ -208,15 +230,15 @@ impl<'a> Elaborator<'a> {
trace!(" -> {:?}", best[value]);
}
ValueDef::Param(_, _) => {
best[value] = (Cost::zero(), value);
best[value] = BestEntry(Cost::zero(), value);
}
// If the Inst is inserted into the layout (which is,
// at this point, only the side-effecting skeleton),
// then it must be computed and thus we give it zero
// cost.
ValueDef::Result(inst, _) => {
if let Some(_) = self.func.layout.inst_block(inst) {
best[value] = (Cost::zero(), value);
best[value] = BestEntry(Cost::zero(), value);
} else {
trace!(" -> value {}: result, computing cost", value);
let inst_data = &self.func.dfg.insts[inst];
Expand All @@ -230,7 +252,7 @@ impl<'a> Elaborator<'a> {
.fold(pure_op_cost(inst_data.opcode()), |cost, value| {
cost + best[value].0
});
best[value] = (cost, value);
best[value] = BestEntry(cost, value);
}
}
};
Expand Down Expand Up @@ -319,7 +341,7 @@ impl<'a> Elaborator<'a> {
// value) here so we have a full view of the
// eclass.
trace!("looking up best value for {}", value);
let (_, best_value) = self.value_to_best_value[value];
let BestEntry(_, best_value) = self.value_to_best_value[value];
trace!("elaborate: value {} -> best {}", value, best_value);
debug_assert_ne!(best_value, Value::reserved_value());

Expand Down
11 changes: 8 additions & 3 deletions cranelift/codegen/src/ir/dfg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -551,10 +551,15 @@ struct ValueDataPacked(u64);
/// (and is implied by `mask`), by translating 2^32-1 (0xffffffff)
/// into 2^n-1 and panic'ing on 2^n..2^32-1.
fn encode_narrow_field(x: u32, bits: u8) -> u32 {
let max = (1 << bits) - 1;
if x == 0xffff_ffff {
(1 << bits) - 1
max
} else {
debug_assert!(x < (1 << bits));
debug_assert!(
x < max,
"{x} does not fit into {bits} bits (must be less than {max} to \
allow for a 0xffffffff sentinal)"
);
x
}
}
Expand Down Expand Up @@ -630,7 +635,7 @@ impl From<ValueData> for ValueDataPacked {
Self::make(Self::TAG_ALIAS, ty, 0, original.as_bits())
}
ValueData::Union { ty, x, y } => {
Self::make(Self::TAG_ALIAS, ty, x.as_bits(), y.as_bits())
Self::make(Self::TAG_UNION, ty, x.as_bits(), y.as_bits())
}
}
}
Expand Down
8 changes: 4 additions & 4 deletions cranelift/filetests/filetests/egraph/arithmetic.clif
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@ target x86_64

function %f0(i32) -> i32 {
block0(v0: i32):
v1 = iconst.i32 2
v1 = iconst.i32 4
v2 = imul v0, v1
; check: v5 = ishl v0, v4 ; v4 = 1
; check: return v5
; check: v3 = iconst.i32 2
; check: v4 = ishl v0, v3 ; v3 = 2
; check: return v4
return v2
}

Expand Down Expand Up @@ -60,7 +61,6 @@ block0(v0: i32):
v2 = imul v0, v1
return v2
; check: v3 = ineg v0
; check: v4 -> v3
; check: return v3
}

Expand Down
11 changes: 0 additions & 11 deletions cranelift/filetests/filetests/egraph/cprop-splat.clif
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ block0:
;
; block0:
; v3 = vconst.i8x16 const0
; v4 -> v3
; return v3 ; v3 = const0
; }

Expand All @@ -30,7 +29,6 @@ block0:
;
; block0:
; v3 = vconst.i8x16 const0
; v4 -> v3
; return v3 ; v3 = const0
; }

Expand All @@ -46,7 +44,6 @@ block0:
;
; block0:
; v3 = vconst.i16x8 const0
; v4 -> v3
; return v3 ; v3 = const0
; }

Expand All @@ -62,7 +59,6 @@ block0:
;
; block0:
; v3 = vconst.i16x8 const0
; v4 -> v3
; return v3 ; v3 = const0
; }

Expand All @@ -78,7 +74,6 @@ block0:
;
; block0:
; v3 = vconst.i32x4 const0
; v4 -> v3
; return v3 ; v3 = const0
; }

Expand All @@ -94,7 +89,6 @@ block0:
;
; block0:
; v3 = vconst.i32x4 const0
; v4 -> v3
; return v3 ; v3 = const0
; }

Expand All @@ -110,7 +104,6 @@ block0:
;
; block0:
; v3 = vconst.i64x2 const0
; v4 -> v3
; return v3 ; v3 = const0
; }

Expand All @@ -126,7 +119,6 @@ block0:
;
; block0:
; v3 = vconst.i64x2 const0
; v4 -> v3
; return v3 ; v3 = const0
; }

Expand All @@ -142,7 +134,6 @@ block0:
;
; block0:
; v3 = vconst.i8x16 const0
; v4 -> v3
; return v3 ; v3 = const0
; }

Expand All @@ -158,7 +149,6 @@ block0:
;
; block0:
; v3 = vconst.f32x4 const0
; v4 -> v3
; return v3 ; v3 = const0
; }

Expand All @@ -174,7 +164,6 @@ block0:
;
; block0:
; v3 = vconst.f64x2 const0
; v4 -> v3
; return v3 ; v3 = const0
; }

Loading

0 comments on commit e39c6b7

Please sign in to comment.