From 71d2f9b8b64633a6b5028dbf97c609357144c356 Mon Sep 17 00:00:00 2001 From: Ujjwal Sharma Date: Thu, 3 Oct 2019 04:33:59 +0530 Subject: [PATCH] [codegen] legalize imul for 64-bit and 128-bit operands Add a legalization that legalizes imul.I64 for 32-bit ISAs and imul.I128 for 64-bit (and subsequently 32-bit) ISAs. Refs: https://github.com/bnjbvr/cranelift-x86/issues/4 --- cranelift-codegen/meta/src/shared/legalize.rs | 20 +++++++++++++++++++ filetests/isa/x86/imul-i128.clif | 20 +++++++++++++++++++ filetests/isa/x86/legalize-i128.clif | 20 +++++++++++++++++++ filetests/isa/x86/legalize-i64.clif | 15 ++++++++++++++ 4 files changed, 75 insertions(+) create mode 100644 filetests/isa/x86/imul-i128.clif create mode 100644 filetests/isa/x86/legalize-i128.clif diff --git a/cranelift-codegen/meta/src/shared/legalize.rs b/cranelift-codegen/meta/src/shared/legalize.rs index bf046f5dd..d9e0936f1 100644 --- a/cranelift-codegen/meta/src/shared/legalize.rs +++ b/cranelift-codegen/meta/src/shared/legalize.rs @@ -115,6 +115,7 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro let uextend = insts.by_name("uextend"); let uload8 = insts.by_name("uload8"); let uload16 = insts.by_name("uload16"); + let umulhi = insts.by_name("umulhi"); let ushr = insts.by_name("ushr"); let ushr_imm = insts.by_name("ushr_imm"); let urem = insts.by_name("urem"); @@ -336,6 +337,25 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro } } + // TODO(ryzokuken): explore the perf diff w/ x86_umulx and consider have a + // separate legalization for x86. + for &ty in &[I64, I128] { + narrow.legalize( + def!(a = imul.ty(x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + def!(al = imul(xl, yl)), + def!(a1 = imul(xh, yl)), + def!(a2 = imul(xl, yh)), + def!(a3 = iadd(a1, a2)), + def!(a4 = umulhi(xl, yl)), + def!(ah = iadd(a3, a4)), + def!(a = iconcat(al, ah)), + ], + ); + } + // Widen instructions with one input operand. for &op in &[bnot, popcnt] { for &int_ty in &[I8, I16] { diff --git a/filetests/isa/x86/imul-i128.clif b/filetests/isa/x86/imul-i128.clif new file mode 100644 index 000000000..2d683a32d --- /dev/null +++ b/filetests/isa/x86/imul-i128.clif @@ -0,0 +1,20 @@ +test run +target x86_64 haswell + +function %test_imul_i128() -> b1 { +ebb0: + v11 = iconst.i64 0xf2347ac4503f1e24 + v12 = iconst.i64 0x0098fe985354ab06 + v1 = iconcat v11, v12 + v21 = iconst.i64 0xf606ba453589ef89 + v22 = iconst.i64 0x042e1f3054ca7432 + v2 = iconcat v21, v22 + v31 = iconst.i64 0xbe2044b2742ebd44 + v32 = iconst.i64 0xa363ce3b6849f307 + v3 = iconcat v31, v32 + v4 = imul v1, v2 + v5 = icmp eq v3, v4 + return v5 +} + +; run diff --git a/filetests/isa/x86/legalize-i128.clif b/filetests/isa/x86/legalize-i128.clif new file mode 100644 index 000000000..17b0a9dc2 --- /dev/null +++ b/filetests/isa/x86/legalize-i128.clif @@ -0,0 +1,20 @@ +; Test the legalization of i128 instructions on x86_64. +test legalizer +target x86_64 haswell + +; regex: V=v\d+ + +function %imul(i128, i128) -> i128 { +ebb0(v1: i128, v2: i128): + v10 = imul v1, v2 + ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) + ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) + ; nextln: $(v10_lsb=$V) = imul $v1_lsb, $v2_lsb + ; nextln: $(v11=$V) = imul $v1_msb, $v2_lsb + ; nextln: $(v12=$V) = imul $v1_lsb, $v2_msb + ; nextln: $(v13=$V) = iadd $v11, $v12 + ; nextln: $(v99=$V), $(v14=$V) = x86_umulx $v1_lsb, $v2_lsb + ; nextln: $(v10_msb=$V) = iadd $v13, $v14 + ; nextln: v10 = iconcat $v10_lsb, $v10_msb + return v10 +} diff --git a/filetests/isa/x86/legalize-i64.clif b/filetests/isa/x86/legalize-i64.clif index 2723bd69b..797345817 100644 --- a/filetests/isa/x86/legalize-i64.clif +++ b/filetests/isa/x86/legalize-i64.clif @@ -26,6 +26,21 @@ ebb0(v1: i64, v2: i64): return v10 } +function %imul(i64, i64) -> i64 { +ebb0(v1: i64, v2: i64): + v10 = imul v1, v2 + ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) + ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) + ; nextln: $(v10_lsb=$V) = imul $v1_lsb, $v2_lsb + ; nextln: $(v11=$V) = imul $v1_msb, $v2_lsb + ; nextln: $(v12=$V) = imul $v1_lsb, $v2_msb + ; nextln: $(v13=$V) = iadd $v11, $v12 + ; nextln: $(v99=$V), $(v14=$V) = x86_umulx $v1_lsb, $v2_lsb + ; nextln: $(v10_msb=$V) = iadd $v13, $v14 + ; nextln: v10 = iconcat $v10_lsb, $v10_msb + return v10 +} + function %icmp_eq(i64, i64) -> b1 { ebb0(v1: i64, v2: i64): v10 = icmp eq v1, v2