Skip to content

Commit

Permalink
riscv64: Implement SIMD shifts, v{all,any}_true and vhigh_bits (#…
Browse files Browse the repository at this point in the history
…6507)

* riscv64: Add SIMD shifts

* riscv64: Implement SIMD `vall_true`

* riscv64: Implement SIMD `vany_true`

* riscv64: Add SIMD `vhigh_bits`

* wasmtime: Enable more RISC-V SIMD tests
  • Loading branch information
afonso360 committed Jun 3, 2023
1 parent 176935e commit f7ae056
Show file tree
Hide file tree
Showing 25 changed files with 5,441 additions and 18 deletions.
4 changes: 0 additions & 4 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,12 +237,8 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
"almost_extmul",
"canonicalize_nan",
"cvt_from_uint",
"issue4807",
"issue_3327_bnot_lowering",
"load_splat_out_of_bounds",
"simd_align",
"simd_bit_shift",
"simd_boolean",
"simd_conversions",
"simd_f32x4",
"simd_f32x4_cmp",
Expand Down
17 changes: 17 additions & 0 deletions cranelift/codegen/src/isa/riscv64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1573,10 +1573,19 @@

;; UImm5 Helpers

;; Helper to go directly from a `Value`, when it's an `iconst`, to an `UImm5`.
(decl uimm5_from_value (UImm5) Value)
(extractor (uimm5_from_value n)
(iconst (u64_from_imm64 (uimm5_from_u64 n))))

;; Extract a `UImm5` from an `u8`.
(decl pure partial uimm5_from_u8 (UImm5) u8)
(extern extractor uimm5_from_u8 uimm5_from_u8)

;; Extract a `UImm5` from an `u64`.
(decl pure partial uimm5_from_u64 (UImm5) u64)
(extern extractor uimm5_from_u64 uimm5_from_u64)

(decl uimm5_bitcast_to_imm5 (UImm5) Imm5)
(extern constructor uimm5_bitcast_to_imm5 uimm5_bitcast_to_imm5)

Expand Down Expand Up @@ -1666,6 +1675,14 @@
(rv_and (value_regs_get x 0) (value_regs_get y 0)))


(decl gen_andi (XReg u64) XReg)
(rule 1 (gen_andi x (imm12_from_u64 y))
(rv_andi x y))

(rule 0 (gen_andi x y)
(rv_and x (imm $I64 y)))


(decl gen_or (Type ValueRegs ValueRegs) ValueRegs)
(rule 1 (gen_or $I128 x y)
(value_regs
Expand Down
36 changes: 29 additions & 7 deletions cranelift/codegen/src/isa/riscv64/inst/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -275,12 +275,15 @@ impl VecAluOpRRR {
VecAluOpRRR::VmulVV => 0b100101,
VecAluOpRRR::VmulhVV => 0b100111,
VecAluOpRRR::VmulhuVV | VecAluOpRRR::VfmulVV | VecAluOpRRR::VfmulVF => 0b100100,
VecAluOpRRR::VsllVV | VecAluOpRRR::VsllVX => 0b100101,
VecAluOpRRR::VsrlVV | VecAluOpRRR::VsrlVX => 0b101000,
VecAluOpRRR::VsraVV | VecAluOpRRR::VsraVX => 0b101001,
VecAluOpRRR::VandVV | VecAluOpRRR::VandVX => 0b001001,
VecAluOpRRR::VorVV | VecAluOpRRR::VorVX => 0b001010,
VecAluOpRRR::VxorVV | VecAluOpRRR::VxorVX => 0b001011,
VecAluOpRRR::VminuVV | VecAluOpRRR::VminuVX => 0b000100,
VecAluOpRRR::VminuVV | VecAluOpRRR::VminuVX | VecAluOpRRR::VredminuVS => 0b000100,
VecAluOpRRR::VminVV | VecAluOpRRR::VminVX => 0b000101,
VecAluOpRRR::VmaxuVV | VecAluOpRRR::VmaxuVX => 0b000110,
VecAluOpRRR::VmaxuVV | VecAluOpRRR::VmaxuVX | VecAluOpRRR::VredmaxuVS => 0b000110,
VecAluOpRRR::VmaxVV | VecAluOpRRR::VmaxVX => 0b000111,
VecAluOpRRR::VslidedownVX => 0b001111,
VecAluOpRRR::VfrsubVF => 0b100111,
Expand All @@ -293,6 +296,7 @@ impl VecAluOpRRR {
VecAluOpRRR::VssubuVV | VecAluOpRRR::VssubuVX => 0b100010,
VecAluOpRRR::VssubVV | VecAluOpRRR::VssubVX => 0b100011,
VecAluOpRRR::VfsgnjnVV => 0b001001,
VecAluOpRRR::VmsltVX => 0b011011,
}
}

Expand All @@ -304,6 +308,9 @@ impl VecAluOpRRR {
| VecAluOpRRR::VsubVV
| VecAluOpRRR::VssubVV
| VecAluOpRRR::VssubuVV
| VecAluOpRRR::VsllVV
| VecAluOpRRR::VsrlVV
| VecAluOpRRR::VsraVV
| VecAluOpRRR::VandVV
| VecAluOpRRR::VorVV
| VecAluOpRRR::VxorVV
Expand All @@ -312,16 +319,21 @@ impl VecAluOpRRR {
| VecAluOpRRR::VmaxuVV
| VecAluOpRRR::VmaxVV
| VecAluOpRRR::VmergeVVM => VecOpCategory::OPIVV,
VecAluOpRRR::VmulVV | VecAluOpRRR::VmulhVV | VecAluOpRRR::VmulhuVV => {
VecOpCategory::OPMVV
}
VecAluOpRRR::VmulVV
| VecAluOpRRR::VmulhVV
| VecAluOpRRR::VmulhuVV
| VecAluOpRRR::VredmaxuVS
| VecAluOpRRR::VredminuVS => VecOpCategory::OPMVV,
VecAluOpRRR::VaddVX
| VecAluOpRRR::VsaddVX
| VecAluOpRRR::VsadduVX
| VecAluOpRRR::VsubVX
| VecAluOpRRR::VssubVX
| VecAluOpRRR::VssubuVX
| VecAluOpRRR::VrsubVX
| VecAluOpRRR::VsllVX
| VecAluOpRRR::VsrlVX
| VecAluOpRRR::VsraVX
| VecAluOpRRR::VandVX
| VecAluOpRRR::VorVX
| VecAluOpRRR::VxorVX
Expand All @@ -330,7 +342,8 @@ impl VecAluOpRRR {
| VecAluOpRRR::VmaxuVX
| VecAluOpRRR::VmaxVX
| VecAluOpRRR::VslidedownVX
| VecAluOpRRR::VmergeVXM => VecOpCategory::OPIVX,
| VecAluOpRRR::VmergeVXM
| VecAluOpRRR::VmsltVX => VecOpCategory::OPIVX,
VecAluOpRRR::VfaddVV
| VecAluOpRRR::VfsubVV
| VecAluOpRRR::VfmulVV
Expand Down Expand Up @@ -385,6 +398,9 @@ impl VecAluOpRRImm5 {
match self {
VecAluOpRRImm5::VaddVI => 0b000000,
VecAluOpRRImm5::VrsubVI => 0b000011,
VecAluOpRRImm5::VsllVI => 0b100101,
VecAluOpRRImm5::VsrlVI => 0b101000,
VecAluOpRRImm5::VsraVI => 0b101001,
VecAluOpRRImm5::VandVI => 0b001001,
VecAluOpRRImm5::VorVI => 0b001010,
VecAluOpRRImm5::VxorVI => 0b001011,
Expand All @@ -399,6 +415,9 @@ impl VecAluOpRRImm5 {
match self {
VecAluOpRRImm5::VaddVI
| VecAluOpRRImm5::VrsubVI
| VecAluOpRRImm5::VsllVI
| VecAluOpRRImm5::VsrlVI
| VecAluOpRRImm5::VsraVI
| VecAluOpRRImm5::VandVI
| VecAluOpRRImm5::VorVI
| VecAluOpRRImm5::VxorVI
Expand All @@ -411,7 +430,10 @@ impl VecAluOpRRImm5 {

pub fn imm_is_unsigned(&self) -> bool {
match self {
VecAluOpRRImm5::VslidedownVI => true,
VecAluOpRRImm5::VsllVI
| VecAluOpRRImm5::VsrlVI
| VecAluOpRRImm5::VsraVI
| VecAluOpRRImm5::VslidedownVI => true,
VecAluOpRRImm5::VaddVI
| VecAluOpRRImm5::VrsubVI
| VecAluOpRRImm5::VandVI
Expand Down
76 changes: 76 additions & 0 deletions cranelift/codegen/src/isa/riscv64/inst_vector.isle
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@
(VmulVV)
(VmulhVV)
(VmulhuVV)
(VsllVV)
(VsrlVV)
(VsraVV)
(VandVV)
(VorVV)
(VxorVV)
Expand All @@ -112,6 +115,8 @@
(VfdivVV)
(VfsgnjnVV)
(VmergeVVM)
(VredmaxuVS)
(VredminuVS)

;; Vector-Scalar Opcodes
(VaddVX)
Expand All @@ -121,6 +126,9 @@
(VrsubVX)
(VssubVX)
(VssubuVX)
(VsllVX)
(VsrlVX)
(VsraVX)
(VandVX)
(VorVX)
(VxorVX)
Expand All @@ -137,6 +145,7 @@
(VfrdivVF)
(VmergeVXM)
(VfmergeVFM)
(VmsltVX)
))

;; Register-Imm ALU Ops
Expand All @@ -146,6 +155,9 @@
(VsaddVI)
(VsadduVI)
(VrsubVI)
(VsllVI)
(VsrlVI)
(VsraVI)
(VandVI)
(VorVI)
(VxorVI)
Expand Down Expand Up @@ -388,6 +400,51 @@
(rule (rv_vmulhu_vv vs2 vs1 mask vstate)
(vec_alu_rrr (VecAluOpRRR.VmulhuVV) vs2 vs1 mask vstate))

;; Helper for emitting the `sll.vv` instruction.
(decl rv_vsll_vv (VReg VReg VecOpMasking VState) VReg)
(rule (rv_vsll_vv vs2 vs1 mask vstate)
(vec_alu_rrr (VecAluOpRRR.VsllVV) vs2 vs1 mask vstate))

;; Helper for emitting the `sll.vx` instruction.
(decl rv_vsll_vx (VReg XReg VecOpMasking VState) VReg)
(rule (rv_vsll_vx vs2 vs1 mask vstate)
(vec_alu_rrr (VecAluOpRRR.VsllVX) vs2 vs1 mask vstate))

;; Helper for emitting the `vsll.vi` instruction.
(decl rv_vsll_vi (VReg UImm5 VecOpMasking VState) VReg)
(rule (rv_vsll_vi vs2 imm mask vstate)
(vec_alu_rr_uimm5 (VecAluOpRRImm5.VsllVI) vs2 imm mask vstate))

;; Helper for emitting the `srl.vv` instruction.
(decl rv_vsrl_vv (VReg VReg VecOpMasking VState) VReg)
(rule (rv_vsrl_vv vs2 vs1 mask vstate)
(vec_alu_rrr (VecAluOpRRR.VsrlVV) vs2 vs1 mask vstate))

;; Helper for emitting the `srl.vx` instruction.
(decl rv_vsrl_vx (VReg XReg VecOpMasking VState) VReg)
(rule (rv_vsrl_vx vs2 vs1 mask vstate)
(vec_alu_rrr (VecAluOpRRR.VsrlVX) vs2 vs1 mask vstate))

;; Helper for emitting the `vsrl.vi` instruction.
(decl rv_vsrl_vi (VReg UImm5 VecOpMasking VState) VReg)
(rule (rv_vsrl_vi vs2 imm mask vstate)
(vec_alu_rr_uimm5 (VecAluOpRRImm5.VsrlVI) vs2 imm mask vstate))

;; Helper for emitting the `sra.vv` instruction.
(decl rv_vsra_vv (VReg VReg VecOpMasking VState) VReg)
(rule (rv_vsra_vv vs2 vs1 mask vstate)
(vec_alu_rrr (VecAluOpRRR.VsraVV) vs2 vs1 mask vstate))

;; Helper for emitting the `sra.vx` instruction.
(decl rv_vsra_vx (VReg XReg VecOpMasking VState) VReg)
(rule (rv_vsra_vx vs2 vs1 mask vstate)
(vec_alu_rrr (VecAluOpRRR.VsraVX) vs2 vs1 mask vstate))

;; Helper for emitting the `vsra.vi` instruction.
(decl rv_vsra_vi (VReg UImm5 VecOpMasking VState) VReg)
(rule (rv_vsra_vi vs2 imm mask vstate)
(vec_alu_rr_uimm5 (VecAluOpRRImm5.VsraVI) vs2 imm mask vstate))

;; Helper for emitting the `vand.vv` instruction.
(decl rv_vand_vv (VReg VReg VecOpMasking VState) VReg)
(rule (rv_vand_vv vs2 vs1 mask vstate)
Expand Down Expand Up @@ -647,6 +704,25 @@
(vec_alu_rr_imm5 (VecAluOpRRImm5.VmergeVIM) vs2 imm (masked mask) vstate))


;; Helper for emitting the `vredminu.vs` instruction.
;;
;; vd[0] = minu( vs1[0] , vs2[*] )
(decl rv_vredminu_vs (VReg VReg VecOpMasking VState) VReg)
(rule (rv_vredminu_vs vs2 vs1 mask vstate)
(vec_alu_rrr (VecAluOpRRR.VredminuVS) vs2 vs1 mask vstate))

;; Helper for emitting the `vredmaxu.vs` instruction.
;;
;; vd[0] = maxu( vs1[0] , vs2[*] )
(decl rv_vredmaxu_vs (VReg VReg VecOpMasking VState) VReg)
(rule (rv_vredmaxu_vs vs2 vs1 mask vstate)
(vec_alu_rrr (VecAluOpRRR.VredmaxuVS) vs2 vs1 mask vstate))

;; Helper for emitting the `vmslt.vx` (Vector Mask Set Less Than) instruction.
(decl rv_vmslt_vx (VReg XReg VecOpMasking VState) VReg)
(rule (rv_vmslt_vx vs2 vs1 mask vstate)
(vec_alu_rrr (VecAluOpRRR.VmsltVX) vs2 vs1 mask vstate))

;;;; Multi-Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(decl gen_extractlane (Type VReg u8) Reg)
Expand Down
73 changes: 73 additions & 0 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,14 @@
(gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 (zero_reg) low)
(gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 low high))))

;; SIMD Cases
;; We don't need to mask anything since it is done by the instruction according to SEW.

(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (ishl x y)))
(rv_vsll_vx x (value_regs_get y 0) (unmasked) ty))

(rule 6 (lower (has_type (ty_vec_fits_in_register ty) (ishl x (maybe_uextend (uimm5_from_value y)))))
(rv_vsll_vi x y (unmasked) ty))

;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down Expand Up @@ -609,6 +617,14 @@
(gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high low)
(gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 (zero_reg) high))))

;; SIMD Cases
;; We don't need to mask or extend anything since it is done by the instruction according to SEW.

(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (ushr x y)))
(rv_vsrl_vx x (value_regs_get y 0) (unmasked) ty))

(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (ushr x (maybe_uextend (uimm5_from_value y)))))
(rv_vsrl_vi x y (unmasked) ty))

;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down Expand Up @@ -660,6 +676,15 @@
(gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high low)
(gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high_replacement high))))

;; SIMD Cases
;; We don't need to mask or extend anything since it is done by the instruction according to SEW.

(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (sshr x y)))
(rv_vsra_vx x (value_regs_get y 0) (unmasked) ty))

(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (sshr x (maybe_uextend (uimm5_from_value y)))))
(rv_vsra_vi x y (unmasked) ty))


;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (fits_in_64 ty) (rotl x y)))
Expand Down Expand Up @@ -1334,3 +1359,51 @@

(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (ssub_sat x (splat y))))
(rv_vssub_vx x y (unmasked) ty))

;;;; Rules for `vall_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Here we do a Vector Reduce operation. Get the unsigned minimum value of any
;; lane in the vector. The fixed input to the reduce operation is a 1.
;; This way, if any lane is 0, the result will be 0. Otherwise, the result will
;; be a 1.
;; The reduce operation leaves the result in the lowest lane, we then move it
;; into the destination X register.
(rule (lower (vall_true x @ (value_type (ty_vec_fits_in_register ty))))
(if-let one (imm5_from_i8 1))
;; We don't need to broadcast the immediate into all lanes, only into lane 0.
;; I did it this way since it uses one less instruction than with a vmv.s.x.
(let ((fixed VReg (rv_vmv_vi one ty))
(min VReg (rv_vredminu_vs x fixed (unmasked) ty)))
(rv_vmv_xs min ty)))


;;;; Rules for `vany_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Here we do a Vector Reduce operation. Get the unsigned maximum value of the
;; input vector register. Move the max to an X register, and do a `snez` on it
;; to ensure its either 1 or 0.
(rule (lower (vany_true x @ (value_type (ty_vec_fits_in_register ty))))
(let ((max VReg (rv_vredmaxu_vs x x (unmasked) ty))
(x_max XReg (rv_vmv_xs max ty)))
(rv_snez x_max)))


;;;; Rules for `vhigh_bits` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; To check if the MSB of a lane is set, we do a `vmslt` with zero, this sets
;; the mask bit to 1 if the value is negative (MSB 1) and 0 if not. We can then
;; just move that mask to an X Register.
;;
;; We must ensure that the move to the X register has a SEW with enough bits
;; to hold the full mask. Additionally, in some cases (e.g. i64x2) we are going
;; to read some tail bits. These are undefined, so we need to further mask them
;; off.
(rule (lower (vhigh_bits x @ (value_type (ty_vec_fits_in_register ty))))
(let ((mask VReg (rv_vmslt_vx x (zero_reg) (unmasked) ty))
;; Here we only need I64X1, but emit an AVL of 2 since it
;; saves one vector state change in the case of I64X2.
;;
;; TODO: For types that have more lanes than element bits, we can
;; use the original type as a VState and avoid a state change.
(x_mask XReg (rv_vmv_xs mask (vstate_from_type $I64X2))))
(gen_andi x_mask (ty_lane_mask ty))))
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isa/riscv64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,10 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend>
UImm5::maybe_from_u8(arg0)
}
#[inline]
fn uimm5_from_u64(&mut self, arg0: u64) -> Option<UImm5> {
arg0.try_into().ok().and_then(UImm5::maybe_from_u8)
}
#[inline]
fn writable_zero_reg(&mut self) -> WritableReg {
writable_zero_reg()
}
Expand Down
Loading

0 comments on commit f7ae056

Please sign in to comment.