From 6d48ce593d454058335fa58df353d7915ffb3ec9 Mon Sep 17 00:00:00 2001 From: Akira Saitoh Date: Wed, 13 Jul 2022 14:21:11 +0900 Subject: [PATCH] AArch64: Add floating point min/max pairwise instructions This commit adds floating point min/max pairwise instructions and binary encoding unit tests. Signed-off-by: Akira Saitoh --- compiler/aarch64/codegen/ARM64Debug.cpp | 16 ++++ compiler/aarch64/codegen/OMRInstOpCode.enum | 16 ++++ compiler/aarch64/codegen/OpBinary.cpp | 16 ++++ .../aarch64/BinaryEncoder.cpp | 86 +++++++++++++++++++ 4 files changed, 134 insertions(+) diff --git a/compiler/aarch64/codegen/ARM64Debug.cpp b/compiler/aarch64/codegen/ARM64Debug.cpp index 41657d4c929..4ce6d1988bb 100644 --- a/compiler/aarch64/codegen/ARM64Debug.cpp +++ b/compiler/aarch64/codegen/ARM64Debug.cpp @@ -1003,6 +1003,22 @@ static const char *opCodeToNameMap[] = "vfaddp2d", "faddp2s", "faddp2d", + "vfmaxnmp4s", + "vfmaxnmp2d", + "fmaxnmp2s", + "fmaxnmp2d", + "vfmaxp4s", + "vfmaxp2d", + "fmaxp2s", + "fmaxp2d", + "vfminnmp4s", + "vfminnmp2d", + "fminnmp2s", + "fminnmp2d", + "vfminp4s", + "vfminp2d", + "fminp2s", + "fminp2d", "nop", }; diff --git a/compiler/aarch64/codegen/OMRInstOpCode.enum b/compiler/aarch64/codegen/OMRInstOpCode.enum index 4dc08a16ab8..a540d421128 100644 --- a/compiler/aarch64/codegen/OMRInstOpCode.enum +++ b/compiler/aarch64/codegen/OMRInstOpCode.enum @@ -999,6 +999,22 @@ vfaddp2d, /* 0x6E60D400 FADDP */ faddp2s, /* 0x7E30D800 FADDP(scalar) */ faddp2d, /* 0x7E70D800 FADDP(scalar) */ + vfmaxnmp4s, /* 0x6E20C400 FMAXNMP */ + vfmaxnmp2d, /* 0x6E60C400 FMAXNMP */ + fmaxnmp2s, /* 0x7E30C800 FMAXNMP(scalar) */ + fmaxnmp2d, /* 0x7E70C800 FMAXNMP(scalar) */ + vfmaxp4s, /* 0x6E20F400 FMAXP */ + vfmaxp2d, /* 0x6E60F400 FMAXP */ + fmaxp2s, /* 0x7E30F800 FMAXP(scalar) */ + fmaxp2d, /* 0x7E70F800 FMAXP(scalar) */ + vfminnmp4s, /* 0x6EA0C400 FMINNMP */ + vfminnmp2d, /* 0x6EE0C400 FMINNMP */ + fminnmp2s, /* 0x7EB0C800 FMINNMP(scalar) */ + fminnmp2d, /* 0x7EF0C800 FMINNMP(scalar) */ + vfminp4s, /* 0x6EA0F400 FMINP */ + vfminp2d, /* 0x6EE0F400 FMINP */ + fminp2s, /* 0x7EB0F800 FMINP(scalar) */ + fminp2d, /* 0x7EF0F800 FMINP(scalar) */ /* Hint instructions */ nop, /* 0xD503201F NOP */ /* Internal OpCodes */ diff --git a/compiler/aarch64/codegen/OpBinary.cpp b/compiler/aarch64/codegen/OpBinary.cpp index 878cbb63141..b29ee162165 100644 --- a/compiler/aarch64/codegen/OpBinary.cpp +++ b/compiler/aarch64/codegen/OpBinary.cpp @@ -1000,6 +1000,22 @@ const OMR::ARM64::InstOpCode::OpCodeBinaryEntry OMR::ARM64::InstOpCode::binaryEn 0x6E60D400, /* FADDP vfaddp2d */ 0x7E30D800, /* FADDP(scalar) faddp2s */ 0x7E70D800, /* FADDP(scalar) faddp2d */ + 0x6E20C400, /* FMAXNMP vfmaxnmp4s */ + 0x6E60C400, /* FMAXNMP vfmaxnmp2d */ + 0x7E30C800, /* FMAXNMP(scalar) fmaxnmp2s */ + 0x7E70C800, /* FMAXNMP(scalar) fmaxnmp2d */ + 0x6E20F400, /* FMAXP vfmaxp4s */ + 0x6E60F400, /* FMAXP vfmaxp2d */ + 0x7E30F800, /* FMAXP(scalar) fmaxp2s */ + 0x7E70F800, /* FMAXP(scalar) fmaxp2d */ + 0x6EA0C400, /* FMINNMP vfminnmp4s */ + 0x6EE0C400, /* FMINNMP vfminnmp2d */ + 0x7EB0C800, /* FMINNMP(scalar) fminnmp2s */ + 0x7EF0C800, /* FMINNMP(scalar) fminnmp2d */ + 0x6EA0F400, /* FMINP vfminp4s */ + 0x6EE0F400, /* FMINP vfminp2d */ + 0x7EB0F800, /* FMINP(scalar) fminp2s */ + 0x7EF0F800, /* FMINP(scalar) fminp2d */ /* Hint instructions */ 0xD503201F, /* NOP nop */ }; diff --git a/fvtest/compilerunittest/aarch64/BinaryEncoder.cpp b/fvtest/compilerunittest/aarch64/BinaryEncoder.cpp index 1771107dc57..c96ba6e00de 100644 --- a/fvtest/compilerunittest/aarch64/BinaryEncoder.cpp +++ b/fvtest/compilerunittest/aarch64/BinaryEncoder.cpp @@ -2162,3 +2162,89 @@ INSTANTIATE_TEST_CASE_P(VectorFmulElem, ARM64Trg1Src2IndexedElementEncodingTest, std::make_tuple(TR::InstOpCode::vfmulelem_2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, 0, "4fcf9000"), std::make_tuple(TR::InstOpCode::vfmulelem_2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, 1, "4fdf9800") )); + +INSTANTIATE_TEST_CASE_P(VectorFloatMinMaxPairwise, ARM64Trg1Src2EncodingTest, ::testing::Values( + std::make_tuple(TR::InstOpCode::vfmaxnmp4s, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, "6e20c40f"), + std::make_tuple(TR::InstOpCode::vfmaxnmp4s, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, "6e20c41f"), + std::make_tuple(TR::InstOpCode::vfmaxnmp4s, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, "6e20c5e0"), + std::make_tuple(TR::InstOpCode::vfmaxnmp4s, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, "6e20c7e0"), + std::make_tuple(TR::InstOpCode::vfmaxnmp4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, "6e2fc400"), + std::make_tuple(TR::InstOpCode::vfmaxnmp4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, "6e3fc400"), + std::make_tuple(TR::InstOpCode::vfmaxnmp2d, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, "6e60c40f"), + std::make_tuple(TR::InstOpCode::vfmaxnmp2d, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, "6e60c41f"), + std::make_tuple(TR::InstOpCode::vfmaxnmp2d, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, "6e60c5e0"), + std::make_tuple(TR::InstOpCode::vfmaxnmp2d, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, "6e60c7e0"), + std::make_tuple(TR::InstOpCode::vfmaxnmp2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, "6e6fc400"), + std::make_tuple(TR::InstOpCode::vfmaxnmp2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, "6e7fc400"), + std::make_tuple(TR::InstOpCode::vfmaxp4s, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, "6e20f40f"), + std::make_tuple(TR::InstOpCode::vfmaxp4s, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, "6e20f41f"), + std::make_tuple(TR::InstOpCode::vfmaxp4s, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, "6e20f5e0"), + std::make_tuple(TR::InstOpCode::vfmaxp4s, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, "6e20f7e0"), + std::make_tuple(TR::InstOpCode::vfmaxp4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, "6e2ff400"), + std::make_tuple(TR::InstOpCode::vfmaxp4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, "6e3ff400"), + std::make_tuple(TR::InstOpCode::vfmaxp2d, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, "6e60f40f"), + std::make_tuple(TR::InstOpCode::vfmaxp2d, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, "6e60f41f"), + std::make_tuple(TR::InstOpCode::vfmaxp2d, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, "6e60f5e0"), + std::make_tuple(TR::InstOpCode::vfmaxp2d, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, "6e60f7e0"), + std::make_tuple(TR::InstOpCode::vfmaxp2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, "6e6ff400"), + std::make_tuple(TR::InstOpCode::vfmaxp2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, "6e7ff400"), + std::make_tuple(TR::InstOpCode::vfminnmp4s, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, "6ea0c40f"), + std::make_tuple(TR::InstOpCode::vfminnmp4s, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, "6ea0c41f"), + std::make_tuple(TR::InstOpCode::vfminnmp4s, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, "6ea0c5e0"), + std::make_tuple(TR::InstOpCode::vfminnmp4s, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, "6ea0c7e0"), + std::make_tuple(TR::InstOpCode::vfminnmp4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, "6eafc400"), + std::make_tuple(TR::InstOpCode::vfminnmp4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, "6ebfc400"), + std::make_tuple(TR::InstOpCode::vfminnmp2d, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, "6ee0c40f"), + std::make_tuple(TR::InstOpCode::vfminnmp2d, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, "6ee0c41f"), + std::make_tuple(TR::InstOpCode::vfminnmp2d, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, "6ee0c5e0"), + std::make_tuple(TR::InstOpCode::vfminnmp2d, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, "6ee0c7e0"), + std::make_tuple(TR::InstOpCode::vfminnmp2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, "6eefc400"), + std::make_tuple(TR::InstOpCode::vfminnmp2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, "6effc400"), + std::make_tuple(TR::InstOpCode::vfminp4s, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, "6ea0f40f"), + std::make_tuple(TR::InstOpCode::vfminp4s, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, "6ea0f41f"), + std::make_tuple(TR::InstOpCode::vfminp4s, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, "6ea0f5e0"), + std::make_tuple(TR::InstOpCode::vfminp4s, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, "6ea0f7e0"), + std::make_tuple(TR::InstOpCode::vfminp4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, "6eaff400"), + std::make_tuple(TR::InstOpCode::vfminp4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, "6ebff400"), + std::make_tuple(TR::InstOpCode::vfminp2d, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, "6ee0f40f"), + std::make_tuple(TR::InstOpCode::vfminp2d, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, "6ee0f41f"), + std::make_tuple(TR::InstOpCode::vfminp2d, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, "6ee0f5e0"), + std::make_tuple(TR::InstOpCode::vfminp2d, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, "6ee0f7e0"), + std::make_tuple(TR::InstOpCode::vfminp2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, "6eeff400"), + std::make_tuple(TR::InstOpCode::vfminp2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, "6efff400") +)); + +INSTANTIATE_TEST_CASE_P(ScalarFloatMinMaxPairwise, ARM64Trg1Src1EncodingTest, ::testing::Values( + std::make_tuple(TR::InstOpCode::fmaxnmp2s, TR::RealRegister::v15, TR::RealRegister::v0, "7e30c80f"), + std::make_tuple(TR::InstOpCode::fmaxnmp2s, TR::RealRegister::v31, TR::RealRegister::v0, "7e30c81f"), + std::make_tuple(TR::InstOpCode::fmaxnmp2s, TR::RealRegister::v0, TR::RealRegister::v15, "7e30c9e0"), + std::make_tuple(TR::InstOpCode::fmaxnmp2s, TR::RealRegister::v0, TR::RealRegister::v31, "7e30cbe0"), + std::make_tuple(TR::InstOpCode::fmaxnmp2d, TR::RealRegister::v15, TR::RealRegister::v0, "7e70c80f"), + std::make_tuple(TR::InstOpCode::fmaxnmp2d, TR::RealRegister::v31, TR::RealRegister::v0, "7e70c81f"), + std::make_tuple(TR::InstOpCode::fmaxnmp2d, TR::RealRegister::v0, TR::RealRegister::v15, "7e70c9e0"), + std::make_tuple(TR::InstOpCode::fmaxnmp2d, TR::RealRegister::v0, TR::RealRegister::v31, "7e70cbe0"), + std::make_tuple(TR::InstOpCode::fmaxp2s, TR::RealRegister::v15, TR::RealRegister::v0, "7e30f80f"), + std::make_tuple(TR::InstOpCode::fmaxp2s, TR::RealRegister::v31, TR::RealRegister::v0, "7e30f81f"), + std::make_tuple(TR::InstOpCode::fmaxp2s, TR::RealRegister::v0, TR::RealRegister::v15, "7e30f9e0"), + std::make_tuple(TR::InstOpCode::fmaxp2s, TR::RealRegister::v0, TR::RealRegister::v31, "7e30fbe0"), + std::make_tuple(TR::InstOpCode::fmaxp2d, TR::RealRegister::v15, TR::RealRegister::v0, "7e70f80f"), + std::make_tuple(TR::InstOpCode::fmaxp2d, TR::RealRegister::v31, TR::RealRegister::v0, "7e70f81f"), + std::make_tuple(TR::InstOpCode::fmaxp2d, TR::RealRegister::v0, TR::RealRegister::v15, "7e70f9e0"), + std::make_tuple(TR::InstOpCode::fmaxp2d, TR::RealRegister::v0, TR::RealRegister::v31, "7e70fbe0"), + std::make_tuple(TR::InstOpCode::fminnmp2s, TR::RealRegister::v15, TR::RealRegister::v0, "7eb0c80f"), + std::make_tuple(TR::InstOpCode::fminnmp2s, TR::RealRegister::v31, TR::RealRegister::v0, "7eb0c81f"), + std::make_tuple(TR::InstOpCode::fminnmp2s, TR::RealRegister::v0, TR::RealRegister::v15, "7eb0c9e0"), + std::make_tuple(TR::InstOpCode::fminnmp2s, TR::RealRegister::v0, TR::RealRegister::v31, "7eb0cbe0"), + std::make_tuple(TR::InstOpCode::fminnmp2d, TR::RealRegister::v15, TR::RealRegister::v0, "7ef0c80f"), + std::make_tuple(TR::InstOpCode::fminnmp2d, TR::RealRegister::v31, TR::RealRegister::v0, "7ef0c81f"), + std::make_tuple(TR::InstOpCode::fminnmp2d, TR::RealRegister::v0, TR::RealRegister::v15, "7ef0c9e0"), + std::make_tuple(TR::InstOpCode::fminnmp2d, TR::RealRegister::v0, TR::RealRegister::v31, "7ef0cbe0"), + std::make_tuple(TR::InstOpCode::fminp2s, TR::RealRegister::v15, TR::RealRegister::v0, "7eb0f80f"), + std::make_tuple(TR::InstOpCode::fminp2s, TR::RealRegister::v31, TR::RealRegister::v0, "7eb0f81f"), + std::make_tuple(TR::InstOpCode::fminp2s, TR::RealRegister::v0, TR::RealRegister::v15, "7eb0f9e0"), + std::make_tuple(TR::InstOpCode::fminp2s, TR::RealRegister::v0, TR::RealRegister::v31, "7eb0fbe0"), + std::make_tuple(TR::InstOpCode::fminp2d, TR::RealRegister::v15, TR::RealRegister::v0, "7ef0f80f"), + std::make_tuple(TR::InstOpCode::fminp2d, TR::RealRegister::v31, TR::RealRegister::v0, "7ef0f81f"), + std::make_tuple(TR::InstOpCode::fminp2d, TR::RealRegister::v0, TR::RealRegister::v15, "7ef0f9e0"), + std::make_tuple(TR::InstOpCode::fminp2d, TR::RealRegister::v0, TR::RealRegister::v31, "7ef0fbe0") +));