diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index d580846863db..d525452b4dc7 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -2638,6 +2638,11 @@ ;; TODO: Port lower_splat_const() to ISLE. (extern constructor splat_const splat_const) +;; Lower a FloatCC to a Cond. +(decl fp_cond_code (FloatCC) Cond) +;; TODO: Port lower_fp_condcode() to ISLE. +(extern constructor fp_cond_code fp_cond_code) + ;; Generate comparison to zero operator from input condition code (decl float_cc_cmp_zero_to_vec_misc_op (FloatCC) VecMisc2) (extern constructor float_cc_cmp_zero_to_vec_misc_op float_cc_cmp_zero_to_vec_misc_op) @@ -2966,3 +2971,100 @@ (let ((dst WritableReg (temp_writable_reg $I64)) (_ Unit (emit (abi_stackslot_addr dst stack_slot offset)))) dst)) + +;; Helper for emitting instruction sequences to perform a vector comparison. + +(decl vec_cmp_vc (Reg Reg VectorSize) Reg) +(rule (vec_cmp_vc rn rm size) + (let ((dst Reg (vec_rrr (VecALUOp.Fcmeq) rn rn size)) + (tmp Reg (vec_rrr (VecALUOp.Fcmeq) rm rm size)) + (dst Reg (vec_rrr (VecALUOp.And) dst tmp size))) + dst)) + +(decl vec_cmp (Reg Reg Type Cond) Reg) + +;; Floating point Vs / Vc +(rule (vec_cmp rn rm ty (Cond.Vc)) + (if (ty_vector_float ty)) + (vec_cmp_vc rn rm (vector_size ty))) +(rule (vec_cmp rn rm ty (Cond.Vs)) + (if (ty_vector_float ty)) + (let ((tmp Reg (vec_cmp_vc rn rm (vector_size ty)))) + (vec_misc (VecMisc2.Not) tmp (vector_size ty)))) + +;; 'Less than' operations are implemented by swapping the order of +;; operands and using the 'greater than' instructions. +;; 'Not equal' is implemented with 'equal' and inverting the result. + +;; Floating-point +(rule (vec_cmp rn rm ty (Cond.Eq)) + (if (ty_vector_float ty)) + (vec_rrr (VecALUOp.Fcmeq) rn rm (vector_size ty))) +(rule (vec_cmp rn rm ty (Cond.Ne)) + (if (ty_vector_float ty)) + (let ((tmp Reg (vec_rrr (VecALUOp.Fcmeq) rn rm (vector_size ty)))) + (vec_misc (VecMisc2.Not) tmp (vector_size ty)))) +(rule (vec_cmp rn rm ty (Cond.Ge)) + (if (ty_vector_float ty)) + (vec_rrr (VecALUOp.Fcmge) rn rm (vector_size ty))) +(rule (vec_cmp rn rm ty (Cond.Gt)) + (if (ty_vector_float ty)) + (vec_rrr (VecALUOp.Fcmgt) rn rm (vector_size ty))) +;; Floating-point swapped-operands +(rule (vec_cmp rn rm ty (Cond.Mi)) + (if (ty_vector_float ty)) + (vec_rrr (VecALUOp.Fcmgt) rm rn (vector_size ty))) +(rule (vec_cmp rn rm ty (Cond.Ls)) + (if (ty_vector_float ty)) + (vec_rrr (VecALUOp.Fcmge) rm rn (vector_size ty))) + +;; Integer +(rule (vec_cmp rn rm ty (Cond.Eq)) + (if (ty_vector_not_float ty)) + (vec_rrr (VecALUOp.Cmeq) rn rm (vector_size ty))) +(rule (vec_cmp rn rm ty (Cond.Ne)) + (if (ty_vector_not_float ty)) + (let ((tmp Reg (vec_rrr (VecALUOp.Cmeq) rn rm (vector_size ty)))) + (vec_misc (VecMisc2.Not) tmp (vector_size ty)))) +(rule (vec_cmp rn rm ty (Cond.Ge)) + (if (ty_vector_not_float ty)) + (vec_rrr (VecALUOp.Cmge) rn rm (vector_size ty))) +(rule (vec_cmp rn rm ty (Cond.Gt)) + (if (ty_vector_not_float ty)) + (vec_rrr (VecALUOp.Cmgt) rn rm (vector_size ty))) +(rule (vec_cmp rn rm ty (Cond.Hs)) + (if (ty_vector_not_float ty)) + (vec_rrr (VecALUOp.Cmhs) rn rm (vector_size ty))) +(rule (vec_cmp rn rm ty (Cond.Hi)) + (if (ty_vector_not_float ty)) + (vec_rrr (VecALUOp.Cmhi) rn rm (vector_size ty))) +;; Integer swapped-operands +(rule (vec_cmp rn rm ty (Cond.Le)) + (if (ty_vector_not_float ty)) + (vec_rrr (VecALUOp.Cmge) rm rn (vector_size ty))) +(rule (vec_cmp rn rm ty (Cond.Lt)) + (if (ty_vector_not_float ty)) + (vec_rrr (VecALUOp.Cmgt) rm rn (vector_size ty))) +(rule (vec_cmp rn rm ty (Cond.Ls)) + (if (ty_vector_not_float ty)) + (vec_rrr (VecALUOp.Cmhs) rm rn (vector_size ty))) +(rule (vec_cmp rn rm ty (Cond.Lo)) + (if (ty_vector_not_float ty)) + (vec_rrr (VecALUOp.Cmhi) rm rn (vector_size ty))) + +;; Helper for determining if any value in a vector is true. +;; This operation is implemented by using umaxp to create a scalar value, which +;; is then compared against zero. +;; +;; umaxp vn.4s, vm.4s, vm.4s +;; mov xm, vn.d[0] +;; cmp xm, #0 +(decl vanytrue (Reg Type) ProducesFlags) +(rule (vanytrue src (ty_vec128 ty)) + (let ((src Reg (vec_rrr (VecALUOp.Umaxp) src src (VectorSize.Size32x4))) + (src Reg (mov_from_vec src 0 (ScalarSize.Size64)))) + (cmp_imm (OperandSize.Size64) src (u8_into_imm12 0)))) +(rule (vanytrue src ty) + (if (ty_vec64 ty)) + (let ((src Reg (mov_from_vec src 0 (ScalarSize.Size64)))) + (cmp_imm (OperandSize.Size64) src (u8_into_imm12 0)))) diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index ec6117c008a5..22f6005d3b96 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -2349,13 +2349,15 @@ impl MachInstEmit for Inst { &Inst::VecDupFromFpu { rd, rn, size } => { let rd = allocs.next_writable(rd); let rn = allocs.next(rn); - let imm5 = match size { - VectorSize::Size32x4 => 0b00100, - VectorSize::Size64x2 => 0b01000, + let q = size.is_128bits() as u32; + let imm5 = match size.lane_size() { + ScalarSize::Size32 => 0b00100, + ScalarSize::Size64 => 0b01000, _ => unimplemented!(), }; sink.put4( - 0b010_01110000_00000_000001_00000_00000 + 0b000_01110000_00000_000001_00000_00000 + | (q << 30) | (imm5 << 16) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg()), diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index 2571b877719c..3ea1a32f05a4 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -182,24 +182,9 @@ ;;;; Rules for `vany_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; This operation is implemented by using umaxp to create a scalar value, which -;; is then compared against zero. -;; -;; umaxp vn.4s, vm.4s, vm.4s -;; mov xm, vn.d[0] -;; cmp xm, #0 -;; cset xm, ne -(rule (lower (vany_true x @ (value_type (ty_vec128 ty)))) - (let ((x1 Reg (vec_rrr (VecALUOp.Umaxp) x x (VectorSize.Size32x4))) - (x2 Reg (mov_from_vec x1 0 (ScalarSize.Size64)))) - (with_flags (cmp_imm (OperandSize.Size64) x2 (u8_into_imm12 0)) - (materialize_bool_result (ty_bits ty) (Cond.Ne))))) - -(rule (lower (vany_true x @ (value_type ty))) - (if (ty_vec64 ty)) - (let ((x1 Reg (mov_from_vec x 0 (ScalarSize.Size64)))) - (with_flags (cmp_imm (OperandSize.Size64) x1 (u8_into_imm12 0)) - (materialize_bool_result (ty_bits ty) (Cond.Ne))))) +(rule (lower (has_type out_ty (vany_true x @ (value_type in_ty)))) + (with_flags (vanytrue x in_ty) + (materialize_bool_result (ty_bits out_ty) (Cond.Ne)))) ;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1704,6 +1689,16 @@ (vec_size VectorSize (vector_size ty))) (value_reg (float_cmp_zero_swap cond rn vec_size)))) +(rule (lower (has_type out_ty + (fcmp cond x @ (value_type (ty_scalar_float in_ty)) y))) + (with_flags (fpu_cmp (scalar_size in_ty) x y) + (materialize_bool_result + (ty_bits out_ty) + (fp_cond_code cond)))) + +(rule (lower (has_type out_ty (fcmp cond x @ (value_type in_ty) y))) + (if (ty_vector_float in_ty)) + (vec_cmp x y in_ty (fp_cond_code cond))) ;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1879,7 +1874,6 @@ (atomic_cas flags addr src1 src2)))) (atomic_cas_loop addr src1 src2 ty)) - ;;;; Rules for 'fvdemote' ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (fvdemote x)) (fcvtn x (ScalarSize.Size32))) @@ -2345,20 +2339,20 @@ ;; uses. Its second output is a flags output only ever meant to ;; check for overflow using the ;; `backend.unsigned_add_overflow_condition()` condition. -;; +;; ;; Note that the CLIF validation will ensure that no flag-setting ;; operation comes between this IaddIfcout and its use (e.g., a ;; Trapif). Thus, we can rely on implicit communication through the ;; processor flags rather than explicitly generating flags into a ;; register. We simply use the variant of the add instruction that ;; sets flags (`adds`) here. -;; +;; ;; Note that the second output (the flags) need not be generated, ;; because flags are never materialized into a register; the only ;; instructions that can use a value of type `iflags` or `fflags` ;; will look directly for the flags-producing instruction (which can ;; always be found, by construction) and merge it. -;; +;; ;; Now handle the iadd as above, except use an AddS opcode that sets ;; flags. diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index db1a59e5530f..231011bd9303 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -6,12 +6,14 @@ use generated_code::Context; // Types that the generated ISLE code uses via `use super::*`. use super::{ - lower_constant_f128, lower_constant_f32, lower_constant_f64, writable_zero_reg, zero_reg, - AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond, CondBrKind, - ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC, JTSequenceInfo, - MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, OperandSize, PairAMode, Reg, - ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, NZCV, + lower_constant_f128, lower_constant_f32, lower_constant_f64, lower_fp_condcode, + writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, + CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift, + Inst as MInst, IntCC, JTSequenceInfo, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode, + Opcode, OperandSize, PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, + NZCV, }; +use crate::ir::condcodes; use crate::isa::aarch64::inst::{FPULeftShiftImm, FPURightShiftImm}; use crate::isa::aarch64::lower::{lower_address, lower_pair_address, lower_splat_const}; use crate::isa::aarch64::settings::Flags as IsaFlags; @@ -520,6 +522,10 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { rd.to_reg() } + fn fp_cond_code(&mut self, cc: &condcodes::FloatCC) -> Cond { + lower_fp_condcode(*cc) + } + fn preg_sp(&mut self) -> PReg { super::regs::stack_reg().to_real_reg().unwrap().into() } diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 1202e8351087..320ea062457a 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -299,25 +299,7 @@ pub(crate) fn lower_insn_to_regs( lower_icmp(ctx, insn, condcode, IcmpOutput::Register(rd))?; } - Opcode::Fcmp => { - let condcode = ctx.data(insn).fp_cond_code().unwrap(); - let cond = lower_fp_condcode(condcode); - let ty = ctx.input_ty(insn, 0); - let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); - let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - - if !ty.is_vector() { - ctx.emit(Inst::FpuCmp { - size: ScalarSize::from_ty(ty), - rn, - rm, - }); - materialize_bool_result(ctx, insn, rd, cond); - } else { - lower_vector_compare(ctx, rd, rn, rm, ty, cond)?; - } - } + Opcode::Fcmp => implemented_in_isle(ctx), Opcode::Debugtrap => implemented_in_isle(ctx), diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 9532526f8b75..c918a909c295 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -397,6 +397,23 @@ macro_rules! isle_prelude_methods { } } + fn ty_vector_float(&mut self, ty: Type) -> Option { + if ty.is_vector() && ty.lane_type().is_float() { + Some(ty) + } else { + None + } + } + + #[inline] + fn ty_vector_not_float(&mut self, ty: Type) -> Option { + if ty.is_vector() && !ty.lane_type().is_float() { + Some(ty) + } else { + None + } + } + #[inline] fn ty_vec64_ctor(&mut self, ty: Type) -> Option { if ty.is_vector() && ty.bits() == 64 { diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 98df224bcf23..69d7bd2f6665 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -381,6 +381,15 @@ (decl ty_float_or_vec (Type) Type) (extern extractor ty_float_or_vec ty_float_or_vec) +;; A pure constructor that only matches vector floating-point types. +(decl pure ty_vector_float (Type) Type) +(extern constructor ty_vector_float ty_vector_float) + +;; A pure constructor that only matches vector types with lanes which +;; are not floating-point. +(decl pure ty_vector_not_float (Type) Type) +(extern constructor ty_vector_not_float ty_vector_not_float) + ;; A pure constructor/extractor that only matches 64-bit vector types. (decl pure ty_vec64 (Type) Type) (extern constructor ty_vec64 ty_vec64_ctor) diff --git a/cranelift/filetests/filetests/isa/aarch64/simd-logical-compile.clif b/cranelift/filetests/filetests/isa/aarch64/simd-logical-compile.clif index a1fa05f26696..34a41ea5413b 100644 --- a/cranelift/filetests/filetests/isa/aarch64/simd-logical-compile.clif +++ b/cranelift/filetests/filetests/isa/aarch64/simd-logical-compile.clif @@ -22,7 +22,7 @@ block0(v0: b32x4): ; umaxp v3.4s, v0.4s, v0.4s ; mov x5, v3.d[0] ; subs xzr, x5, #0 -; csetm x0, ne +; cset x0, ne ; ret function %vall_true_i64x2(i64x2) -> b1 { diff --git a/cranelift/filetests/filetests/runtests/simd-conversion.clif b/cranelift/filetests/filetests/runtests/simd-conversion.clif index 9648acda8c09..62cff744818e 100644 --- a/cranelift/filetests/filetests/runtests/simd-conversion.clif +++ b/cranelift/filetests/filetests/runtests/simd-conversion.clif @@ -39,3 +39,11 @@ block0(v0:f32x4): ; run: %fcvt_to_uint_sat([-0x8.1 -0x0.0 0x0.0 -0x1.0p100]) == [0 0 0 0] ; run: %fcvt_to_uint_sat([0xB2D05E00.0 0.0 0.0 0.0]) == [3000000000 0 0 0] ; run: %fcvt_to_uint_sat([+NaN +NaN +NaN +NaN]) == [0 0 0 0] + +function %fcvt_low_from_sint(i32x4) -> f64x2 { +block0(v0: i32x4): + v1 = fcvt_low_from_sint.f64x2 v0 + return v1 +} +; run: %fcvt_low_from_sint([0 1 -1 65535]) == [0x0.0 0x1.0] +; run: %fcvt_low_from_sint([-1 123456789 0 1]) == [-0x1.0 0x1.d6f3454p26]