Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion cranelift/assembler-x64/meta/src/instructions/bitmanip.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::dsl::{Feature::*, Inst, Location::*, VexLength::*};
use crate::dsl::{Eflags::*, Feature::*, Inst, Location::*, VexLength::*};
use crate::dsl::{fmt, implicit, inst, r, rex, rw, vex, w};

#[rustfmt::skip] // Keeps instructions on a single line.
Expand All @@ -24,6 +24,13 @@ pub fn list() -> Vec<Inst> {
inst("popcntl", fmt("RM", [w(r32), r(rm32)]), rex([0xF3, 0x0F, 0xB8]).r(), _64b | compat | popcnt),
inst("popcntq", fmt("RM", [w(r64), r(rm64)]), rex([0xF3, 0x0F, 0xB8]).r().w(), _64b | popcnt),

inst("btw", fmt("MR", [r(rm16), r(r16)]).flags(W), rex([0x66, 0x0F, 0xA3]).r(), _64b | compat),
inst("btl", fmt("MR", [r(rm32), r(r32)]).flags(W), rex([0x0F, 0xA3]).r(), _64b | compat),
inst("btq", fmt("MR", [r(rm64), r(r64)]).flags(W), rex([0x0F, 0xA3]).w().r(), _64b),
inst("btw", fmt("MI", [r(rm16), r(imm8)]).flags(W), rex([0x66, 0x0F, 0xBA]).digit(4).ib(), _64b | compat),
inst("btl", fmt("MI", [r(rm32), r(imm8)]).flags(W), rex([0x0F, 0xBA]).digit(4).ib(), _64b | compat),
inst("btq", fmt("MI", [r(rm64), r(imm8)]).flags(W), rex([0x0F, 0xBA]).w().digit(4).ib(), _64b),

// Note that the Intel manual calls has different names for these
// instructions than Capstone gives them:
//
Expand Down
53 changes: 49 additions & 4 deletions cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -3647,6 +3647,18 @@
(rule (x64_blsr $I32 src) (x64_blsrl_vm src))
(rule (x64_blsr $I64 src) (x64_blsrq_vm src))

;; Helper for creating `bt` instructions.
(decl x64_bt (Type GprMem Gpr) ProducesFlags)
(rule (x64_bt $I16 src1 src2) (x64_btw_mr src1 src2))
(rule (x64_bt $I32 src1 src2) (x64_btl_mr src1 src2))
(rule (x64_bt $I64 src1 src2) (x64_btq_mr src1 src2))

;; Helper for creating `bt` instructions.
(decl x64_bt_imm (Type GprMem u8) ProducesFlags)
(rule (x64_bt_imm $I16 src imm) (x64_btw_mi src imm))
(rule (x64_bt_imm $I32 src imm) (x64_btl_mi src imm))
(rule (x64_bt_imm $I64 src imm) (x64_btq_mi src imm))

;; Helper for creating `sarx` instructions.
(decl x64_sarx (Type GprMem Gpr) Gpr)
(rule (x64_sarx $I32 val amt) (x64_sarxl_rmv val amt))
Expand Down Expand Up @@ -4262,8 +4274,41 @@
(rule 2 (is_nonzero_cmp (vany_true vec)) (is_vany_true vec))
(rule 2 (is_nonzero_cmp (uextend val)) (is_nonzero_cmp val))
(rule 2 (is_nonzero_cmp (band a @ (value_type (ty_int (fits_in_64 ty))) b))
(is_nonzero_band ty a b))

(decl is_nonzero_band (Type Value Value) CondResult)
(rule 0 (is_nonzero_band ty a b) (CondResult.CC (x64_test ty a b) (CC.NZ)))

;; If a value is and'd with an immediate that has exactly one bit set then this
;; can pattern-match to the native `bt` instruction. Note that to have the
;; same semantics this requires that `a` is in a register which forces `bt` to
;; use modulo semantics for the second operand `b`, thus `put_in_gpr` is
;; manually used.
(rule 1 (is_nonzero_band (ty_32_or_64 ty) a (ishl (u64_from_iconst 1) b))
(CondResult.CC (x64_bt ty (put_in_gpr a) b) (CC.B)))

;; If a value is and'd one shifted by a variable value that matches `bt` as
;; well.
(rule 1 (is_nonzero_band $I64 a (u64_from_iconst (bt_imm n)))
(CondResult.CC (x64_bt_imm $I64 a n) (CC.B)))

;; If what we're testing against is a 32-bit integer then this is a candidate
;; for both the `test` and `bt` instructions (only `bt` if the integer as one
;; bit set). According to [1] the `test` instruction has a higher throughput
;; at least historically than the `bt` instruction so here `test` is explicitly
;; favored over `bt`, even if `bt` were applicable. Note that LLVM also looks to
;; favor `bt` as well.
;;
;; [1]: https://github.com/bytecodealliance/wasmtime/pull/11128#discussion_r2164888415
(rule 2 (is_nonzero_band ty a b @ (i32_from_iconst _))
(CondResult.CC (x64_test ty a b) (CC.NZ)))

;; Helper to test whether the `u64` input has a single bit set, and if so
;; yields the bit position of where that bit is set. Used in the lowering of
;; `x64_bt_imm` above.
(decl bt_imm (u8) u64)
(extern extractor bt_imm bt_imm)

;; Lower a CondResult to a boolean value in a register.
(decl lower_cond_bool (CondResult) Gpr)
(rule (lower_cond_bool (CondResult.CC producer cc))
Expand Down Expand Up @@ -4329,10 +4374,10 @@

;; For direct equality comparisons to zero transform the other operand into a
;; nonzero comparison and then invert the whole conditional to test for zero.
(rule 5 (emit_cmp (IntCC.Equal) a (u64_from_iconst 0))
(cond_invert (is_nonzero_cmp a)))
(rule 6 (emit_cmp (IntCC.Equal) (u64_from_iconst 0) a)
(cond_invert (is_nonzero_cmp a)))
(rule 5 (emit_cmp (IntCC.Equal) a (u64_from_iconst 0)) (cond_invert (is_nonzero_cmp a)))
(rule 6 (emit_cmp (IntCC.Equal) (u64_from_iconst 0) a) (cond_invert (is_nonzero_cmp a)))
(rule 5 (emit_cmp (IntCC.NotEqual) a (u64_from_iconst 0)) (is_nonzero_cmp a))
(rule 6 (emit_cmp (IntCC.NotEqual) (u64_from_iconst 0) a) (is_nonzero_cmp a))

;; 128-bit strict equality/inequality can't be easily tested using subtraction
;; but we can quickly determine whether any bits are different instead.
Expand Down
8 changes: 8 additions & 0 deletions cranelift/codegen/src/isa/x64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1080,6 +1080,14 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> {
self.emit(&MInst::External { inst: inst.into() });
ret.to_reg()
}

fn bt_imm(&mut self, val: u64) -> Option<u8> {
if val.count_ones() == 1 {
Some(u8::try_from(val.trailing_zeros()).unwrap())
} else {
None
}
}
}

impl IsleContext<'_, '_, MInst, X64Backend> {
Expand Down
Loading
Loading