Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Port AvgRound & SqmulRoundSat to ISLE (AArch64) #4639

Merged
merged 1 commit into from
Aug 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cranelift/codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,8 @@ fn define_simd_arithmetic(
"avg_round",
r#"
Unsigned average with rounding: `a := (x + y + 1) // 2`

The addition does not lose any information (such as from overflow).
"#,
&formats.binary,
)
Expand Down
7 changes: 7 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1542,6 +1542,13 @@
(_ Unit (emit (MInst.VecLanes op dst src size))))
dst))

;; Helper for emitting `MInst.VecShiftImm` instructions.
(decl vec_shift_imm (VecShiftImmOp u8 Reg VectorSize) Reg)
(rule (vec_shift_imm op imm src size)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.VecShiftImm op dst src size imm))))
dst))

;; Helper for emitting `MInst.VecDup` instructions.
(decl vec_dup (Reg VectorSize) Reg)
(rule (vec_dup src size)
Expand Down
29 changes: 16 additions & 13 deletions cranelift/codegen/src/isa/aarch64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1976,31 +1976,34 @@ impl MachInstEmit for Inst {
} => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let (is_shr, template) = match op {
VecShiftImmOp::Ushr => (true, 0b_011_011110_0000_000_000001_00000_00000_u32),
VecShiftImmOp::Sshr => (true, 0b_010_011110_0000_000_000001_00000_00000_u32),
VecShiftImmOp::Shl => (false, 0b_010_011110_0000_000_010101_00000_00000_u32),
let (is_shr, mut template) = match op {
VecShiftImmOp::Ushr => (true, 0b_001_011110_0000_000_000001_00000_00000_u32),
VecShiftImmOp::Sshr => (true, 0b_000_011110_0000_000_000001_00000_00000_u32),
VecShiftImmOp::Shl => (false, 0b_000_011110_0000_000_010101_00000_00000_u32),
};
if size.is_128bits() {
template |= 0b1 << 30;
}
let imm = imm as u32;
// Deal with the somewhat strange encoding scheme for, and limits on,
// the shift amount.
let immh_immb = match (size, is_shr) {
(VectorSize::Size64x2, true) if imm >= 1 && imm <= 64 => {
let immh_immb = match (size.lane_size(), is_shr) {
(ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => {
0b_1000_000_u32 | (64 - imm)
}
(VectorSize::Size32x4, true) if imm >= 1 && imm <= 32 => {
(ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => {
0b_0100_000_u32 | (32 - imm)
}
(VectorSize::Size16x8, true) if imm >= 1 && imm <= 16 => {
(ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => {
0b_0010_000_u32 | (16 - imm)
}
(VectorSize::Size8x16, true) if imm >= 1 && imm <= 8 => {
(ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => {
0b_0001_000_u32 | (8 - imm)
}
(VectorSize::Size64x2, false) if imm <= 63 => 0b_1000_000_u32 | imm,
(VectorSize::Size32x4, false) if imm <= 31 => 0b_0100_000_u32 | imm,
(VectorSize::Size16x8, false) if imm <= 15 => 0b_0010_000_u32 | imm,
(VectorSize::Size8x16, false) if imm <= 7 => 0b_0001_000_u32 | imm,
(ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm,
(ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm,
(ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm,
(ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm,
_ => panic!(
"aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {:?}, {:?}, {:?}",
op, size, imm
Expand Down
180 changes: 180 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3946,6 +3946,18 @@ fn test_aarch64_binemit() {
"smax v8.4s, v12.4s, v14.4s",
));

insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Urhadd,
rd: writable_vreg(8),
rn: vreg(1),
rm: vreg(3),
size: VectorSize::Size8x8,
},
"2814232E",
"urhadd v8.8b, v1.8b, v3.8b",
));

insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Urhadd,
Expand All @@ -3958,6 +3970,18 @@ fn test_aarch64_binemit() {
"urhadd v8.16b, v1.16b, v3.16b",
));

insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Urhadd,
rd: writable_vreg(2),
rn: vreg(13),
rm: vreg(6),
size: VectorSize::Size16x4,
},
"A215662E",
"urhadd v2.4h, v13.4h, v6.4h",
));

insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Urhadd,
Expand All @@ -3970,6 +3994,18 @@ fn test_aarch64_binemit() {
"urhadd v2.8h, v13.8h, v6.8h",
));

insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Urhadd,
rd: writable_vreg(8),
rn: vreg(12),
rm: vreg(14),
size: VectorSize::Size32x2,
},
"8815AE2E",
"urhadd v8.2s, v12.2s, v14.2s",
));

insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Urhadd,
Expand Down Expand Up @@ -5123,6 +5159,126 @@ fn test_aarch64_binemit() {
"sshr v3.8h, v19.8h, #1",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(25),
rn: vreg(6),
imm: 8,
size: VectorSize::Size8x8,
},
"D904082F",
"ushr v25.8b, v6.8b, #8",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(5),
rn: vreg(21),
imm: 1,
size: VectorSize::Size8x8,
},
"A5060F2F",
"ushr v5.8b, v21.8b, #1",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(25),
rn: vreg(6),
imm: 8,
size: VectorSize::Size8x16,
},
"D904086F",
"ushr v25.16b, v6.16b, #8",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(5),
rn: vreg(21),
imm: 1,
size: VectorSize::Size8x16,
},
"A5060F6F",
"ushr v5.16b, v21.16b, #1",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(25),
rn: vreg(6),
imm: 16,
size: VectorSize::Size16x4,
},
"D904102F",
"ushr v25.4h, v6.4h, #16",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(5),
rn: vreg(21),
imm: 1,
size: VectorSize::Size16x4,
},
"A5061F2F",
"ushr v5.4h, v21.4h, #1",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(25),
rn: vreg(6),
imm: 16,
size: VectorSize::Size16x8,
},
"D904106F",
"ushr v25.8h, v6.8h, #16",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(5),
rn: vreg(21),
imm: 1,
size: VectorSize::Size16x8,
},
"A5061F6F",
"ushr v5.8h, v21.8h, #1",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(25),
rn: vreg(6),
imm: 32,
size: VectorSize::Size32x2,
},
"D904202F",
"ushr v25.2s, v6.2s, #32",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(5),
rn: vreg(21),
imm: 1,
size: VectorSize::Size32x2,
},
"A5063F2F",
"ushr v5.2s, v21.2s, #1",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
Expand All @@ -5147,6 +5303,30 @@ fn test_aarch64_binemit() {
"ushr v5.4s, v21.4s, #1",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(25),
rn: vreg(6),
imm: 64,
size: VectorSize::Size64x2,
},
"D904406F",
"ushr v25.2d, v6.2d, #64",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Ushr,
rd: writable_vreg(5),
rn: vreg(21),
imm: 1,
size: VectorSize::Size64x2,
},
"A5067F6F",
"ushr v5.2d, v21.2d, #1",
));

insns.push((
Inst::VecShiftImm {
op: VecShiftImmOp::Shl,
Expand Down
21 changes: 21 additions & 0 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,27 @@
(rule (lower (has_type (fits_in_32 ty) (iabs x)))
(abs (OperandSize.Size32) (put_in_reg_sext32 x)))

;;;; Rules for `avg_round` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I64X2 (avg_round x y)))
(let ((one Reg (splat_const 1 (VectorSize.Size64x2)))
(c Reg (orr_vec x y (VectorSize.Size64x2)))
(c Reg (and_vec c one (VectorSize.Size64x2)))
(x Reg (vec_shift_imm (VecShiftImmOp.Ushr) 1 x
(VectorSize.Size64x2)))
(y Reg (vec_shift_imm (VecShiftImmOp.Ushr) 1 y
(VectorSize.Size64x2)))
(sum Reg (add_vec x y (VectorSize.Size64x2))))
(add_vec c sum (VectorSize.Size64x2))))

(rule (lower (has_type (lane_fits_in_32 ty) (avg_round x y)))
(vec_rrr (VecALUOp.Urhadd) x y (vector_size ty)))

;;;; Rules for `sqmul_round_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type ty @ (multi_lane _ _) (sqmul_round_sat x y)))
(vec_rrr (VecALUOp.Sqrdmulh) x y (vector_size ty)))

;;;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type ty @ (multi_lane _ _) (fadd rn rm)))
Expand Down
45 changes: 2 additions & 43 deletions cranelift/codegen/src/isa/aarch64/lower_inst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1502,27 +1502,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}

Opcode::Iabs => implemented_in_isle(ctx),
Opcode::AvgRound => {
let ty = ty.unwrap();

if ty.lane_bits() == 64 {
return Err(CodegenError::Unsupported(format!(
"AvgRound: Unsupported type: {:?}",
ty
)));
}

let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
ctx.emit(Inst::VecRRR {
alu_op: VecALUOp::Urhadd,
rd,
rn,
rm,
size: VectorSize::from_ty(ty),
});
}
Opcode::AvgRound => implemented_in_isle(ctx),

Opcode::Snarrow | Opcode::Unarrow | Opcode::Uunarrow => implemented_in_isle(ctx),

Expand Down Expand Up @@ -1583,28 +1563,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
},

Opcode::SqmulRoundSat => {
let ty = ty.unwrap();

if !ty.is_vector() || (ty.lane_type() != I16 && ty.lane_type() != I32) {
return Err(CodegenError::Unsupported(format!(
"SqmulRoundSat: Unsupported type: {:?}",
ty
)));
}

let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);

ctx.emit(Inst::VecRRR {
alu_op: VecALUOp::Sqrdmulh,
rd,
rn,
rm,
size: VectorSize::from_ty(ty),
});
}
Opcode::SqmulRoundSat => implemented_in_isle(ctx),

Opcode::FcvtLowFromSint => {
let ty = ty.unwrap();
Expand Down
Loading