Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

riscv64: Implement SIMD shifts, v{all,any}_true and vhigh_bits #6507

Merged
merged 5 commits into from
Jun 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,12 +237,8 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
"almost_extmul",
"canonicalize_nan",
"cvt_from_uint",
"issue4807",
"issue_3327_bnot_lowering",
"load_splat_out_of_bounds",
"simd_align",
"simd_bit_shift",
"simd_boolean",
"simd_conversions",
"simd_f32x4",
"simd_f32x4_cmp",
Expand Down
17 changes: 17 additions & 0 deletions cranelift/codegen/src/isa/riscv64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1573,10 +1573,19 @@

;; UImm5 Helpers

;; Helper to go directly from a `Value`, when it's an `iconst`, to an `UImm5`.
(decl uimm5_from_value (UImm5) Value)
(extractor (uimm5_from_value n)
(iconst (u64_from_imm64 (uimm5_from_u64 n))))

;; Extract a `UImm5` from an `u8`.
(decl pure partial uimm5_from_u8 (UImm5) u8)
(extern extractor uimm5_from_u8 uimm5_from_u8)

;; Extract a `UImm5` from an `u64`.
(decl pure partial uimm5_from_u64 (UImm5) u64)
(extern extractor uimm5_from_u64 uimm5_from_u64)

(decl uimm5_bitcast_to_imm5 (UImm5) Imm5)
(extern constructor uimm5_bitcast_to_imm5 uimm5_bitcast_to_imm5)

Expand Down Expand Up @@ -1666,6 +1675,14 @@
(rv_and (value_regs_get x 0) (value_regs_get y 0)))


(decl gen_andi (XReg u64) XReg)
(rule 1 (gen_andi x (imm12_from_u64 y))
(rv_andi x y))

(rule 0 (gen_andi x y)
(rv_and x (imm $I64 y)))


(decl gen_or (Type ValueRegs ValueRegs) ValueRegs)
(rule 1 (gen_or $I128 x y)
(value_regs
Expand Down
36 changes: 29 additions & 7 deletions cranelift/codegen/src/isa/riscv64/inst/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -275,12 +275,15 @@ impl VecAluOpRRR {
VecAluOpRRR::VmulVV => 0b100101,
VecAluOpRRR::VmulhVV => 0b100111,
VecAluOpRRR::VmulhuVV | VecAluOpRRR::VfmulVV | VecAluOpRRR::VfmulVF => 0b100100,
VecAluOpRRR::VsllVV | VecAluOpRRR::VsllVX => 0b100101,
VecAluOpRRR::VsrlVV | VecAluOpRRR::VsrlVX => 0b101000,
VecAluOpRRR::VsraVV | VecAluOpRRR::VsraVX => 0b101001,
VecAluOpRRR::VandVV | VecAluOpRRR::VandVX => 0b001001,
VecAluOpRRR::VorVV | VecAluOpRRR::VorVX => 0b001010,
VecAluOpRRR::VxorVV | VecAluOpRRR::VxorVX => 0b001011,
VecAluOpRRR::VminuVV | VecAluOpRRR::VminuVX => 0b000100,
VecAluOpRRR::VminuVV | VecAluOpRRR::VminuVX | VecAluOpRRR::VredminuVS => 0b000100,
VecAluOpRRR::VminVV | VecAluOpRRR::VminVX => 0b000101,
VecAluOpRRR::VmaxuVV | VecAluOpRRR::VmaxuVX => 0b000110,
VecAluOpRRR::VmaxuVV | VecAluOpRRR::VmaxuVX | VecAluOpRRR::VredmaxuVS => 0b000110,
VecAluOpRRR::VmaxVV | VecAluOpRRR::VmaxVX => 0b000111,
VecAluOpRRR::VslidedownVX => 0b001111,
VecAluOpRRR::VfrsubVF => 0b100111,
Expand All @@ -293,6 +296,7 @@ impl VecAluOpRRR {
VecAluOpRRR::VssubuVV | VecAluOpRRR::VssubuVX => 0b100010,
VecAluOpRRR::VssubVV | VecAluOpRRR::VssubVX => 0b100011,
VecAluOpRRR::VfsgnjnVV => 0b001001,
VecAluOpRRR::VmsltVX => 0b011011,
}
}

Expand All @@ -304,6 +308,9 @@ impl VecAluOpRRR {
| VecAluOpRRR::VsubVV
| VecAluOpRRR::VssubVV
| VecAluOpRRR::VssubuVV
| VecAluOpRRR::VsllVV
| VecAluOpRRR::VsrlVV
| VecAluOpRRR::VsraVV
| VecAluOpRRR::VandVV
| VecAluOpRRR::VorVV
| VecAluOpRRR::VxorVV
Expand All @@ -312,16 +319,21 @@ impl VecAluOpRRR {
| VecAluOpRRR::VmaxuVV
| VecAluOpRRR::VmaxVV
| VecAluOpRRR::VmergeVVM => VecOpCategory::OPIVV,
VecAluOpRRR::VmulVV | VecAluOpRRR::VmulhVV | VecAluOpRRR::VmulhuVV => {
VecOpCategory::OPMVV
}
VecAluOpRRR::VmulVV
| VecAluOpRRR::VmulhVV
| VecAluOpRRR::VmulhuVV
| VecAluOpRRR::VredmaxuVS
| VecAluOpRRR::VredminuVS => VecOpCategory::OPMVV,
VecAluOpRRR::VaddVX
| VecAluOpRRR::VsaddVX
| VecAluOpRRR::VsadduVX
| VecAluOpRRR::VsubVX
| VecAluOpRRR::VssubVX
| VecAluOpRRR::VssubuVX
| VecAluOpRRR::VrsubVX
| VecAluOpRRR::VsllVX
| VecAluOpRRR::VsrlVX
| VecAluOpRRR::VsraVX
| VecAluOpRRR::VandVX
| VecAluOpRRR::VorVX
| VecAluOpRRR::VxorVX
Expand All @@ -330,7 +342,8 @@ impl VecAluOpRRR {
| VecAluOpRRR::VmaxuVX
| VecAluOpRRR::VmaxVX
| VecAluOpRRR::VslidedownVX
| VecAluOpRRR::VmergeVXM => VecOpCategory::OPIVX,
| VecAluOpRRR::VmergeVXM
| VecAluOpRRR::VmsltVX => VecOpCategory::OPIVX,
VecAluOpRRR::VfaddVV
| VecAluOpRRR::VfsubVV
| VecAluOpRRR::VfmulVV
Expand Down Expand Up @@ -385,6 +398,9 @@ impl VecAluOpRRImm5 {
match self {
VecAluOpRRImm5::VaddVI => 0b000000,
VecAluOpRRImm5::VrsubVI => 0b000011,
VecAluOpRRImm5::VsllVI => 0b100101,
VecAluOpRRImm5::VsrlVI => 0b101000,
VecAluOpRRImm5::VsraVI => 0b101001,
VecAluOpRRImm5::VandVI => 0b001001,
VecAluOpRRImm5::VorVI => 0b001010,
VecAluOpRRImm5::VxorVI => 0b001011,
Expand All @@ -399,6 +415,9 @@ impl VecAluOpRRImm5 {
match self {
VecAluOpRRImm5::VaddVI
| VecAluOpRRImm5::VrsubVI
| VecAluOpRRImm5::VsllVI
| VecAluOpRRImm5::VsrlVI
| VecAluOpRRImm5::VsraVI
| VecAluOpRRImm5::VandVI
| VecAluOpRRImm5::VorVI
| VecAluOpRRImm5::VxorVI
Expand All @@ -411,7 +430,10 @@ impl VecAluOpRRImm5 {

pub fn imm_is_unsigned(&self) -> bool {
match self {
VecAluOpRRImm5::VslidedownVI => true,
VecAluOpRRImm5::VsllVI
| VecAluOpRRImm5::VsrlVI
| VecAluOpRRImm5::VsraVI
| VecAluOpRRImm5::VslidedownVI => true,
VecAluOpRRImm5::VaddVI
| VecAluOpRRImm5::VrsubVI
| VecAluOpRRImm5::VandVI
Expand Down
76 changes: 76 additions & 0 deletions cranelift/codegen/src/isa/riscv64/inst_vector.isle
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@
(VmulVV)
(VmulhVV)
(VmulhuVV)
(VsllVV)
(VsrlVV)
(VsraVV)
(VandVV)
(VorVV)
(VxorVV)
Expand All @@ -112,6 +115,8 @@
(VfdivVV)
(VfsgnjnVV)
(VmergeVVM)
(VredmaxuVS)
(VredminuVS)

;; Vector-Scalar Opcodes
(VaddVX)
Expand All @@ -121,6 +126,9 @@
(VrsubVX)
(VssubVX)
(VssubuVX)
(VsllVX)
(VsrlVX)
(VsraVX)
(VandVX)
(VorVX)
(VxorVX)
Expand All @@ -137,6 +145,7 @@
(VfrdivVF)
(VmergeVXM)
(VfmergeVFM)
(VmsltVX)
))

;; Register-Imm ALU Ops
Expand All @@ -146,6 +155,9 @@
(VsaddVI)
(VsadduVI)
(VrsubVI)
(VsllVI)
(VsrlVI)
(VsraVI)
(VandVI)
(VorVI)
(VxorVI)
Expand Down Expand Up @@ -388,6 +400,51 @@
(rule (rv_vmulhu_vv vs2 vs1 mask vstate)
(vec_alu_rrr (VecAluOpRRR.VmulhuVV) vs2 vs1 mask vstate))

;; Helper for emitting the `sll.vv` instruction.
(decl rv_vsll_vv (VReg VReg VecOpMasking VState) VReg)
(rule (rv_vsll_vv vs2 vs1 mask vstate)
(vec_alu_rrr (VecAluOpRRR.VsllVV) vs2 vs1 mask vstate))

;; Helper for emitting the `sll.vx` instruction.
(decl rv_vsll_vx (VReg XReg VecOpMasking VState) VReg)
(rule (rv_vsll_vx vs2 vs1 mask vstate)
(vec_alu_rrr (VecAluOpRRR.VsllVX) vs2 vs1 mask vstate))

;; Helper for emitting the `vsll.vi` instruction.
(decl rv_vsll_vi (VReg UImm5 VecOpMasking VState) VReg)
(rule (rv_vsll_vi vs2 imm mask vstate)
(vec_alu_rr_uimm5 (VecAluOpRRImm5.VsllVI) vs2 imm mask vstate))

;; Helper for emitting the `srl.vv` instruction.
(decl rv_vsrl_vv (VReg VReg VecOpMasking VState) VReg)
(rule (rv_vsrl_vv vs2 vs1 mask vstate)
(vec_alu_rrr (VecAluOpRRR.VsrlVV) vs2 vs1 mask vstate))

;; Helper for emitting the `srl.vx` instruction.
(decl rv_vsrl_vx (VReg XReg VecOpMasking VState) VReg)
(rule (rv_vsrl_vx vs2 vs1 mask vstate)
(vec_alu_rrr (VecAluOpRRR.VsrlVX) vs2 vs1 mask vstate))

;; Helper for emitting the `vsrl.vi` instruction.
(decl rv_vsrl_vi (VReg UImm5 VecOpMasking VState) VReg)
(rule (rv_vsrl_vi vs2 imm mask vstate)
(vec_alu_rr_uimm5 (VecAluOpRRImm5.VsrlVI) vs2 imm mask vstate))

;; Helper for emitting the `sra.vv` instruction.
(decl rv_vsra_vv (VReg VReg VecOpMasking VState) VReg)
(rule (rv_vsra_vv vs2 vs1 mask vstate)
(vec_alu_rrr (VecAluOpRRR.VsraVV) vs2 vs1 mask vstate))

;; Helper for emitting the `sra.vx` instruction.
(decl rv_vsra_vx (VReg XReg VecOpMasking VState) VReg)
(rule (rv_vsra_vx vs2 vs1 mask vstate)
(vec_alu_rrr (VecAluOpRRR.VsraVX) vs2 vs1 mask vstate))

;; Helper for emitting the `vsra.vi` instruction.
(decl rv_vsra_vi (VReg UImm5 VecOpMasking VState) VReg)
(rule (rv_vsra_vi vs2 imm mask vstate)
(vec_alu_rr_uimm5 (VecAluOpRRImm5.VsraVI) vs2 imm mask vstate))

;; Helper for emitting the `vand.vv` instruction.
(decl rv_vand_vv (VReg VReg VecOpMasking VState) VReg)
(rule (rv_vand_vv vs2 vs1 mask vstate)
Expand Down Expand Up @@ -647,6 +704,25 @@
(vec_alu_rr_imm5 (VecAluOpRRImm5.VmergeVIM) vs2 imm (masked mask) vstate))


;; Helper for emitting the `vredminu.vs` instruction.
;;
;; vd[0] = minu( vs1[0] , vs2[*] )
(decl rv_vredminu_vs (VReg VReg VecOpMasking VState) VReg)
(rule (rv_vredminu_vs vs2 vs1 mask vstate)
(vec_alu_rrr (VecAluOpRRR.VredminuVS) vs2 vs1 mask vstate))

;; Helper for emitting the `vredmaxu.vs` instruction.
;;
;; vd[0] = maxu( vs1[0] , vs2[*] )
(decl rv_vredmaxu_vs (VReg VReg VecOpMasking VState) VReg)
(rule (rv_vredmaxu_vs vs2 vs1 mask vstate)
(vec_alu_rrr (VecAluOpRRR.VredmaxuVS) vs2 vs1 mask vstate))

;; Helper for emitting the `vmslt.vx` (Vector Mask Set Less Than) instruction.
(decl rv_vmslt_vx (VReg XReg VecOpMasking VState) VReg)
(rule (rv_vmslt_vx vs2 vs1 mask vstate)
(vec_alu_rrr (VecAluOpRRR.VmsltVX) vs2 vs1 mask vstate))

;;;; Multi-Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(decl gen_extractlane (Type VReg u8) Reg)
Expand Down
73 changes: 73 additions & 0 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,14 @@
(gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 (zero_reg) low)
(gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 low high))))

;; SIMD Cases
;; We don't need to mask anything since it is done by the instruction according to SEW.

(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (ishl x y)))
(rv_vsll_vx x (value_regs_get y 0) (unmasked) ty))

(rule 6 (lower (has_type (ty_vec_fits_in_register ty) (ishl x (maybe_uextend (uimm5_from_value y)))))
(rv_vsll_vi x y (unmasked) ty))

;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down Expand Up @@ -609,6 +617,14 @@
(gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high low)
(gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 (zero_reg) high))))

;; SIMD Cases
;; We don't need to mask or extend anything since it is done by the instruction according to SEW.

(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (ushr x y)))
(rv_vsrl_vx x (value_regs_get y 0) (unmasked) ty))

(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (ushr x (maybe_uextend (uimm5_from_value y)))))
(rv_vsrl_vi x y (unmasked) ty))

;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down Expand Up @@ -660,6 +676,15 @@
(gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high low)
(gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high_replacement high))))

;; SIMD Cases
;; We don't need to mask or extend anything since it is done by the instruction according to SEW.

(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (sshr x y)))
(rv_vsra_vx x (value_regs_get y 0) (unmasked) ty))

(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (sshr x (maybe_uextend (uimm5_from_value y)))))
(rv_vsra_vi x y (unmasked) ty))


;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (fits_in_64 ty) (rotl x y)))
Expand Down Expand Up @@ -1334,3 +1359,51 @@

(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (ssub_sat x (splat y))))
(rv_vssub_vx x y (unmasked) ty))

;;;; Rules for `vall_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Here we do a Vector Reduce operation. Get the unsigned minimum value of any
;; lane in the vector. The fixed input to the reduce operation is a 1.
;; This way, if any lane is 0, the result will be 0. Otherwise, the result will
;; be a 1.
;; The reduce operation leaves the result in the lowest lane, we then move it
;; into the destination X register.
(rule (lower (vall_true x @ (value_type (ty_vec_fits_in_register ty))))
(if-let one (imm5_from_i8 1))
;; We don't need to broadcast the immediate into all lanes, only into lane 0.
;; I did it this way since it uses one less instruction than with a vmv.s.x.
(let ((fixed VReg (rv_vmv_vi one ty))
(min VReg (rv_vredminu_vs x fixed (unmasked) ty)))
(rv_vmv_xs min ty)))


;;;; Rules for `vany_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Here we do a Vector Reduce operation. Get the unsigned maximum value of the
;; input vector register. Move the max to an X register, and do a `snez` on it
;; to ensure its either 1 or 0.
(rule (lower (vany_true x @ (value_type (ty_vec_fits_in_register ty))))
(let ((max VReg (rv_vredmaxu_vs x x (unmasked) ty))
(x_max XReg (rv_vmv_xs max ty)))
(rv_snez x_max)))


;;;; Rules for `vhigh_bits` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; To check if the MSB of a lane is set, we do a `vmslt` with zero, this sets
;; the mask bit to 1 if the value is negative (MSB 1) and 0 if not. We can then
;; just move that mask to an X Register.
;;
;; We must ensure that the move to the X register has a SEW with enough bits
;; to hold the full mask. Additionally, in some cases (e.g. i64x2) we are going
;; to read some tail bits. These are undefined, so we need to further mask them
;; off.
(rule (lower (vhigh_bits x @ (value_type (ty_vec_fits_in_register ty))))
(let ((mask VReg (rv_vmslt_vx x (zero_reg) (unmasked) ty))
;; Here we only need I64X1, but emit an AVL of 2 since it
;; saves one vector state change in the case of I64X2.
;;
;; TODO: For types that have more lanes than element bits, we can
;; use the original type as a VState and avoid a state change.
(x_mask XReg (rv_vmv_xs mask (vstate_from_type $I64X2))))
(gen_andi x_mask (ty_lane_mask ty))))
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isa/riscv64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,10 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend>
UImm5::maybe_from_u8(arg0)
}
#[inline]
fn uimm5_from_u64(&mut self, arg0: u64) -> Option<UImm5> {
arg0.try_into().ok().and_then(UImm5::maybe_from_u8)
}
#[inline]
fn writable_zero_reg(&mut self) -> WritableReg {
writable_zero_reg()
}
Expand Down
Loading