Skip to content

Commit

Permalink
Merge with upstream (bytecodealliance#199)
Browse files Browse the repository at this point in the history
  • Loading branch information
dhil committed Jun 21, 2024
2 parents 93a812f + cb9d667 commit 711ed09
Show file tree
Hide file tree
Showing 27 changed files with 1,014 additions and 136 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -3120,10 +3120,10 @@
(decl amode (Type Value i32) AMode)
(rule 0 (amode ty val offset)
(amode_no_more_iconst ty val offset))
(rule 1 (amode ty (iadd x (iconst (simm32 y))) offset)
(rule 1 (amode ty (iadd x (i32_from_iconst y)) offset)
(if-let new_offset (s32_add_fallible y offset))
(amode_no_more_iconst ty x new_offset))
(rule 2 (amode ty (iadd (iconst (simm32 x)) y) offset)
(rule 2 (amode ty (iadd (i32_from_iconst x) y) offset)
(if-let new_offset (s32_add_fallible x offset))
(amode_no_more_iconst ty y new_offset))

Expand Down
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/riscv64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -855,7 +855,7 @@ fn compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32 {
align_to(clobbered_size, 16)
}

const DEFAULT_CLOBBERS: PRegSet = PRegSet::empty()
pub(crate) const DEFAULT_CLOBBERS: PRegSet = PRegSet::empty()
.with(px_reg(1))
.with(px_reg(5))
.with(px_reg(6))
Expand Down
4 changes: 2 additions & 2 deletions cranelift/codegen/src/isa/riscv64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -2393,10 +2393,10 @@
;;
;; We can't recurse into `amode` again since that could cause stack overflows.
;; See: https://github.com/bytecodealliance/wasmtime/pull/6968
(rule 1 (amode (iadd addr (iconst (simm32 y))) offset)
(rule 1 (amode (iadd addr (i32_from_iconst y)) offset)
(if-let new_offset (s32_add_fallible y offset))
(amode_inner addr new_offset))
(rule 2 (amode (iadd (iconst (simm32 x)) addr) offset)
(rule 2 (amode (iadd (i32_from_iconst x) addr) offset)
(if-let new_offset (s32_add_fallible x offset))
(amode_inner addr new_offset))

Expand Down
8 changes: 4 additions & 4 deletions cranelift/codegen/src/isa/riscv64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,14 @@ pub enum EmitVState {
#[derive(Default, Clone, Debug)]
pub struct EmitState {
/// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`.
stack_map: Option<StackMap>,
pub(crate) stack_map: Option<StackMap>,
/// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
/// optimized away at compiletime. See [cranelift_control].
ctrl_plane: ControlPlane,
pub(crate) ctrl_plane: ControlPlane,
/// Vector State
/// Controls the current state of the vector unit at the emission point.
vstate: EmitVState,
frame_layout: FrameLayout,
pub(crate) vstate: EmitVState,
pub(crate) frame_layout: FrameLayout,
}

impl EmitState {
Expand Down
36 changes: 33 additions & 3 deletions cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#[allow(unused)]
use crate::ir::LibCall;
use crate::isa::riscv64::inst::*;
use crate::isa::riscv64::{abi::DEFAULT_CLOBBERS, inst::*};
use std::borrow::Cow;

fn fa7() -> Reg {
Expand Down Expand Up @@ -2135,7 +2135,7 @@ fn riscv64_worst_case_instruction_size() {
let (flags, isa_flags) = make_test_flags();
let emit_info = EmitInfo::new(flags, isa_flags);

//there are all candidates potential generate a lot of bytes.
// These are all candidate instructions with potential to generate a lot of bytes.
let mut candidates: Vec<MInst> = vec![];

candidates.push(Inst::Popcnt {
Expand Down Expand Up @@ -2198,10 +2198,40 @@ fn riscv64_worst_case_instruction_size() {
}),
);

candidates.push(Inst::ReturnCallInd {
callee: a0(),
info: Box::new(ReturnCallInfo {
opcode: Opcode::ReturnCallIndirect,
new_stack_arg_size: 64,
uses: DEFAULT_CLOBBERS
.into_iter()
.map(|reg| CallArgPair {
vreg: reg.into(),
preg: reg.into(),
})
.collect(),
}),
});

let mut max: (u32, MInst) = (0, Inst::Nop0);
for i in candidates {
let mut buffer = MachBuffer::new();
i.emit(&mut buffer, &emit_info, &mut Default::default());
let mut emit_state = EmitState {
// This frame layout is important to ensure that the ReturnCallIndirect
// instruction in this test, becomes as large as practically possible.
frame_layout: FrameLayout {
tail_args_size: 64,
setup_area_size: 8192,
clobbered_callee_saves: DEFAULT_CLOBBERS
.into_iter()
.filter(|r| r.class() != RegClass::Vector)
.map(|r| Writable::from_reg(r.into()))
.collect(),
..Default::default()
},
..Default::default()
};
i.emit(&mut buffer, &emit_info, &mut emit_state);
let buffer = buffer.finish(&Default::default(), &mut Default::default());
let length = buffer.data().len() as u32;
if length > max.0 {
Expand Down
4 changes: 2 additions & 2 deletions cranelift/codegen/src/isa/riscv64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -844,8 +844,8 @@ impl MachInst for Inst {
}

fn worst_case_size() -> CodeOffset {
// calculate by test function riscv64_worst_case_instruction_size()
124
// Our worst case size is determined by the riscv64_worst_case_instruction_size test
168
}

fn ref_type_regclass(_settings: &settings::Flags) -> RegClass {
Expand Down
9 changes: 7 additions & 2 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -2465,20 +2465,25 @@

;; These rules should probably be handled in `gen_bitcast`, but it's convenient to have that return
;; a single register, instead of a `ValueRegs`
(rule 2 (lower (has_type $I128 (bitcast _ v @ (value_type (ty_vec_fits_in_register _)))))
(rule 3 (lower (has_type $I128 (bitcast _ v @ (value_type (ty_vec_fits_in_register _)))))
(value_regs
(gen_extractlane $I64X2 v 0)
(gen_extractlane $I64X2 v 1)))

;; Move the high half into a vector register, and then use vslide1up to move it up and
;; insert the lower half in one instruction.
(rule 1 (lower (has_type (ty_vec_fits_in_register _) (bitcast _ v @ (value_type $I128))))
(rule 2 (lower (has_type (ty_vec_fits_in_register _) (bitcast _ v @ (value_type $I128))))
(let ((lo XReg (value_regs_get v 0))
(hi XReg (value_regs_get v 1))
(vstate VState (vstate_from_type $I64X2))
(vec VReg (rv_vmv_sx hi vstate)))
(rv_vslide1up_vx vec vec lo (unmasked) vstate)))

;; `gen_bitcast` below only works with single register values, so handle I128
;; specially here.
(rule 1 (lower (has_type $I128 (bitcast _ v @ (value_type $I128))))
v)

(rule 0 (lower (has_type out_ty (bitcast _ v @ (value_type in_ty))))
(gen_bitcast v in_ty out_ty))

Expand Down
12 changes: 6 additions & 6 deletions cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1116,22 +1116,22 @@

(rule 0 (to_amode_add flags x y offset)
(amode_imm_reg_reg_shift flags x y offset))
(rule 1 (to_amode_add flags x (iconst (simm32 c)) offset)
(rule 1 (to_amode_add flags x (i32_from_iconst c) offset)
(if-let sum (s32_add_fallible offset c))
(amode_imm_reg flags x sum))
(rule 2 (to_amode_add flags (iconst (simm32 c)) x offset)
(rule 2 (to_amode_add flags (i32_from_iconst c) x offset)
(if-let sum (s32_add_fallible offset c))
(amode_imm_reg flags x sum))
(rule 3 (to_amode_add flags (iadd x (iconst (simm32 c))) y offset)
(rule 3 (to_amode_add flags (iadd x (i32_from_iconst c)) y offset)
(if-let sum (s32_add_fallible offset c))
(amode_imm_reg_reg_shift flags x y sum))
(rule 4 (to_amode_add flags (iadd (iconst (simm32 c)) x) y offset)
(rule 4 (to_amode_add flags (iadd (i32_from_iconst c) x) y offset)
(if-let sum (s32_add_fallible offset c))
(amode_imm_reg_reg_shift flags x y sum))
(rule 5 (to_amode_add flags x (iadd y (iconst (simm32 c))) offset)
(rule 5 (to_amode_add flags x (iadd y (i32_from_iconst c)) offset)
(if-let sum (s32_add_fallible offset c))
(amode_imm_reg_reg_shift flags x y sum))
(rule 6 (to_amode_add flags x (iadd (iconst (simm32 c)) y) offset)
(rule 6 (to_amode_add flags x (iadd (i32_from_iconst c) y) offset)
(if-let sum (s32_add_fallible offset c))
(amode_imm_reg_reg_shift flags x y sum))

Expand Down
44 changes: 29 additions & 15 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -979,9 +979,9 @@
(x64_imul ty y x))

;; lift out constants to use 3-operand form
(rule -3 (lower (has_type (ty_int_ref_16_to_64 ty) (imul x (iconst (simm32 y)))))
(rule -3 (lower (has_type (ty_int_ref_16_to_64 ty) (imul x (i32_from_iconst y))))
(x64_imul_imm ty x y))
(rule -2 (lower (has_type (ty_int_ref_16_to_64 ty) (imul (iconst (simm32 x)) y)))
(rule -2 (lower (has_type (ty_int_ref_16_to_64 ty) (imul (i32_from_iconst x) y)))
(x64_imul_imm ty y x))

;; `i128`.
Expand Down Expand Up @@ -2970,9 +2970,10 @@
(x64_movrm $I32 (to_amode flags address offset) value)))

;; IMM stores
(rule 2 (lower (store flags (has_type (fits_in_64 ty) (iconst (simm32 value))) address offset))
(rule 4 (lower (store flags value @ (value_type (fits_in_64 ty)) address offset))
(if-let (i32_from_iconst imm) value)
(side_effect
(x64_movimm_m ty (to_amode flags address offset) value)))
(x64_movimm_m ty (to_amode flags address offset) imm)))

;; F32 stores of values in XMM registers.
(rule 1 (lower (store flags
Expand Down Expand Up @@ -3308,6 +3309,12 @@
(rule 2 (lower_branch (brif (maybe_uextend (fcmp cc a b)) _ _) (two_targets then else))
(emit_side_effect (jmp_cond_fcmp (emit_fcmp cc a b) then else)))

(rule 2 (lower_branch (brif (maybe_uextend (vany_true a)) _ _) (two_targets then else))
(emit_side_effect (jmp_cond_icmp (emit_vany_true a) then else)))

(rule 2 (lower_branch (brif (maybe_uextend (vall_true a)) _ _) (two_targets then else))
(emit_side_effect (jmp_cond_icmp (emit_vall_true a) then else)))

(rule 1 (lower_branch (brif val @ (value_type $I128) _ _)
(two_targets then else))
(emit_side_effect (jmp_cond_icmp (cmp_zero_i128 (CC.Z) val) then else)))
Expand Down Expand Up @@ -4263,10 +4270,9 @@
;; TODO use Inst::gen_constant() instead.
(x64_xmm_load_const ty (const_to_vconst const)))

;; Special case for a zero-vector: don't load, xor instead.
(rule 1 (lower (has_type ty (vconst (u128_from_constant 0))))
(let ((dst Xmm (xmm_uninit_value)))
(x64_pxor dst dst)))
;; Special cases for known constant patterns to skip a 16-byte load.
(rule 1 (lower (has_type ty (vconst (u128_from_constant 0)))) (xmm_zero ty))
(rule 1 (lower (has_type ty (vconst (u128_from_constant -1)))) (vector_all_ones))

;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down Expand Up @@ -4630,30 +4636,38 @@
;; 0xffff then every byte was equal to zero, so test if the comparison is
;; not-equal or NZ.
(rule (lower (vany_true val))
(lower_icmp_bool (emit_vany_true val)))

(decl emit_vany_true (Value) IcmpCondResult)
(rule (emit_vany_true val)
(let (
(any_byte_zero Xmm (x64_pcmpeqb val (xmm_zero $I8X16)))
(mask Gpr (x64_pmovmskb (OperandSize.Size32) any_byte_zero))
)
(with_flags (x64_cmp_imm (OperandSize.Size32) mask 0xffff)
(x64_setcc (CC.NZ)))))
(icmp_cond_result (x64_cmp_imm (OperandSize.Size32) mask 0xffff)
(CC.NZ))))

;; Rules for `vall_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule 1 (lower (vall_true val @ (value_type ty)))
(rule (lower (vall_true val))
(lower_icmp_bool (emit_vall_true val)))

(decl emit_vall_true (Value) IcmpCondResult)
(rule 1 (emit_vall_true val @ (value_type ty))
(if-let $true (use_sse41))
(let ((src Xmm val)
(zeros Xmm (xmm_zero ty))
(cmp Xmm (x64_pcmpeq (vec_int_type ty) src zeros)))
(with_flags (x64_ptest cmp cmp) (x64_setcc (CC.Z)))))
(icmp_cond_result (x64_ptest cmp cmp) (CC.Z))))

;; Perform an appropriately-sized lane-wise comparison with zero. If the
;; result is all 0s then all of them are true because nothing was equal to
;; zero.
(rule (lower (vall_true val @ (value_type ty)))
(rule (emit_vall_true val @ (value_type ty))
(let ((lanes_with_zero Xmm (x64_pcmpeq (vec_int_type ty) val (xmm_zero ty)))
(mask Gpr (x64_pmovmskb (OperandSize.Size32) lanes_with_zero)))
(with_flags (x64_test (OperandSize.Size32) mask mask)
(x64_setcc (CC.Z)))))
(icmp_cond_result (x64_test (OperandSize.Size32) mask mask)
(CC.Z))))

;; Rules for `vhigh_bits` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down
9 changes: 4 additions & 5 deletions cranelift/codegen/src/machinst/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,10 @@ macro_rules! isle_lower_prelude_methods {
Some((constant << shift_amt) >> shift_amt)
}

fn i32_from_iconst(&mut self, val: Value) -> Option<i32> {
self.i64_from_iconst(val)?.try_into().ok()
}

fn zero_value(&mut self, value: Value) -> Option<Value> {
let insn = self.def_inst(value);
if insn.is_some() {
Expand Down Expand Up @@ -568,11 +572,6 @@ macro_rules! isle_lower_prelude_methods {
Some(value)
}

#[inline]
fn simm32(&mut self, x: Imm64) -> Option<i32> {
i64::from(x).try_into().ok()
}

#[inline]
fn uimm8(&mut self, x: Imm64) -> Option<u8> {
let x64: i64 = x.into();
Expand Down
13 changes: 5 additions & 8 deletions cranelift/codegen/src/prelude_lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,11 @@
(extractor (u64_from_iconst x)
(def_inst (iconst (u64_from_imm64 x))))

;; Extract a constant `i32` from a value defined by an `iconst`.
;; The value is sign extended to 32 bits.
(decl i32_from_iconst (i32) Value)
(extern extractor i32_from_iconst i32_from_iconst)

;; Extract a constant `i64` from a value defined by an `iconst`.
;; The value is sign extended to 64 bits.
(decl i64_from_iconst (i64) Value)
Expand All @@ -302,14 +307,6 @@
(decl maybe_uextend (Value) Value)
(extern extractor maybe_uextend maybe_uextend)

;; Get a signed 32-bit immediate in an u32 from an Imm64, if possible.
;; Note that this checks that the raw i64 value from the Imm64 fits in i32,
;; so `-1_u32` will not actually match -- it's treated as `0xFFFF_FFFF_i64`,
;; which doesn't fit in an i32 and thus doesn't match the extractor.
;; An Imm64 of `-1_i64` *will* match, however.
(decl simm32 (i32) Imm64)
(extern extractor simm32 simm32)

;; Get an unsigned 8-bit immediate in a u8 from an Imm64, if possible.
(decl uimm8 (u8) Imm64)
(extern extractor uimm8 uimm8)
Expand Down
57 changes: 57 additions & 0 deletions cranelift/filetests/filetests/isa/riscv64/issue8847-1.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
;; Compile test case

test compile
target riscv64

function u1:0() tail {
ss0 = explicit_slot 50, align = 512
ss1 = explicit_slot 47, align = 4
ss2 = explicit_slot 34, align = 32
ss3 = explicit_slot 103, align = 1024
ss4 = explicit_slot 110, align = 512
ss5 = explicit_slot 126, align = 512
sig0 = (i64 sext, i64 sext, i64 sext, i64 sext, i64 sext, i64 sext, i64 sext, i8 uext, i16 uext, i16, i64 sext, i64 sext, i128 uext, i8 sext, f32) tail

block0:
v0 = iconst.i64 0xef31_de2a_2352_79ff
v3 = iconst.i16 0xffef
v164 = iconst.i64 0
v7 = uextend.i128 v164 ; v164 = 0
v14 = iconst.i8 203
v15 = f32const -0x1.979796p24
v112 = iconst.i8 0
v134 = iconst.i8 0
v147 = iconst.i8 0
v154 = iconst.i8 0
v156 = iconst.i32 0
v157 = iconst.i32 0
v163 = iconst.i64 0
brif v112, block40, block39 ; v112 = 0

block40:
trap user0

block39:
brif.i8 v134, block58, block57 ; v134 = 0

block58:
trap user0

block57:
brif.i8 v147, block68, block67 ; v147 = 0

block68:
trap user0

block67:
brif.i8 v154, block73, block72 ; v154 = 0

block73:
br_table v156, block1, [block1, block1] ; v156 = 0

block72:
br_table v157, block1, [block1, block1] ; v157 = 0

block1 cold:
return_call_indirect.i64 sig0, v163(v0, v0, v0, v0, v0, v0, v0, v14, v3, v3, v0, v0, v7, v14, v15) ; v163 = 0, v0 = 0xef31_de2a_2352_79ff, v0 = 0xef31_de2a_2352_79ff, v0 = 0xef31_de2a_2352_79ff, v0 = 0xef31_de2a_2352_79ff, v0 = 0xef31_de2a_2352_79ff, v0 = 0xef31_de2a_2352_79ff, v0 = 0xef31_de2a_2352_79ff, v14 = 203, v3 = 0xffef, v3 = 0xffef, v0 = 0xef31_de2a_2352_79ff, v0 = 0xef31_de2a_2352_79ff, v14 = 203, v15 = -0x1.979796p24
}
Loading

0 comments on commit 711ed09

Please sign in to comment.