From 705243ed26be70cc65772914188178282d42ae11 Mon Sep 17 00:00:00 2001 From: BradleyWood Date: Wed, 6 Jul 2022 15:17:32 -0400 Subject: [PATCH 01/13] Fix logical bug when detecting SIMD encoding Signed-off-by: BradleyWood --- compiler/x/codegen/OMRInstOpCode.hpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/compiler/x/codegen/OMRInstOpCode.hpp b/compiler/x/codegen/OMRInstOpCode.hpp index 60dea2b4ce..61d8c343ea 100644 --- a/compiler/x/codegen/OMRInstOpCode.hpp +++ b/compiler/x/codegen/OMRInstOpCode.hpp @@ -479,11 +479,11 @@ class InstOpCode: public OMR::InstOpCode { supported = target->supportsFeature(OMR_FEATURE_X86_AVX512F); - if (flags & X86FeatureProp_EVEX128RequiresAVX512VL) + if (supported && flags & X86FeatureProp_EVEX128RequiresAVX512VL) supported = target->supportsFeature(OMR_FEATURE_X86_AVX512VL); - if (flags & X86FeatureProp_EVEX128RequiresAVX512BW) + if (supported && flags & X86FeatureProp_EVEX128RequiresAVX512BW) supported = target->supportsFeature(OMR_FEATURE_X86_AVX512BW); - if (flags & X86FeatureProp_EVEX128RequiresAVX512DQ) + if (supported && flags & X86FeatureProp_EVEX128RequiresAVX512DQ) supported = target->supportsFeature(OMR_FEATURE_X86_AVX512DQ); if (supported) @@ -492,7 +492,10 @@ class InstOpCode: public OMR::InstOpCode if (flags & X86FeatureProp_VEX128Supported) { - if (flags & X86FeatureProp_VEX128RequiresAVX ) + if (flags & X86FeatureProp_VEX128RequiresAVX && target->supportsFeature(OMR_FEATURE_X86_AVX)) + return OMR::X86::VEX_L128; + + if (flags & X86FeatureProp_VEX128RequiresAVX2 && target->supportsFeature(OMR_FEATURE_X86_AVX2)) return OMR::X86::VEX_L128; } @@ -511,11 +514,11 @@ class InstOpCode: public OMR::InstOpCode { supported = target->supportsFeature(OMR_FEATURE_X86_AVX512F); - if (flags & X86FeatureProp_EVEX256RequiresAVX512VL) + if (supported && flags & X86FeatureProp_EVEX256RequiresAVX512VL) supported = target->supportsFeature(OMR_FEATURE_X86_AVX512VL); - if (flags & X86FeatureProp_EVEX256RequiresAVX512BW) + if (supported && flags & X86FeatureProp_EVEX256RequiresAVX512BW) supported = target->supportsFeature(OMR_FEATURE_X86_AVX512BW); - if (flags & X86FeatureProp_EVEX256RequiresAVX512DQ) + if (supported && flags & X86FeatureProp_EVEX256RequiresAVX512DQ) supported = target->supportsFeature(OMR_FEATURE_X86_AVX512DQ); if (supported) @@ -526,7 +529,7 @@ class InstOpCode: public OMR::InstOpCode { supported = target->supportsFeature(OMR_FEATURE_X86_AVX); - if (flags & X86FeatureProp_VEX256RequiresAVX2) + if (supported && flags & X86FeatureProp_VEX256RequiresAVX2) supported = target->supportsFeature(OMR_FEATURE_X86_AVX2); if (supported) From f7b097ac609e9e23bedf25cda2f3a45a8603d718 Mon Sep 17 00:00:00 2001 From: BradleyWood Date: Tue, 5 Jul 2022 16:22:45 -0400 Subject: [PATCH 02/13] Add x86 feature flag properties to sqrtpd Signed-off-by: BradleyWood --- compiler/x/codegen/UnaryEvaluator.cpp | 4 +-- compiler/x/codegen/X86Ops.ins | 20 ++++++++------- fvtest/compilerunittest/x/BinaryEncoder.cpp | 28 +++++++++++++++------ 3 files changed, 33 insertions(+), 19 deletions(-) diff --git a/compiler/x/codegen/UnaryEvaluator.cpp b/compiler/x/codegen/UnaryEvaluator.cpp index 122afabbb4..b6fe3a3aaf 100644 --- a/compiler/x/codegen/UnaryEvaluator.cpp +++ b/compiler/x/codegen/UnaryEvaluator.cpp @@ -52,8 +52,8 @@ TR::Register *OMR::X86::TreeEvaluator::unaryVectorArithmeticEvaluator(TR::Node * TR_ASSERT_FATAL_WITH_NODE(node, opcode.getVectorResultDataType().getVectorElementType() == TR::Double, "Only double vsqrt is currently supported"); - regRegOpcode = supportsAvx ? OMR::InstOpCode::VSQRTPDRegReg : OMR::InstOpCode::SQRTPDRegReg; - regMemOpcode = supportsAvx ? OMR::InstOpCode::VSQRTPDRegMem : OMR::InstOpCode::bad; + regRegOpcode = OMR::InstOpCode::SQRTPDRegReg; + regMemOpcode = OMR::InstOpCode::VSQRTPDRegMem; // SSE RegMem instruction requires 16-byte alignment break; default: diff --git a/compiler/x/codegen/X86Ops.ins b/compiler/x/codegen/X86Ops.ins index 42f74d6668..f519a81d81 100644 --- a/compiler/x/codegen/X86Ops.ins +++ b/compiler/x/codegen/X86Ops.ins @@ -2631,20 +2631,22 @@ INSTRUCTION(MOVSDMemReg, movsd, PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_SIMDSingleSource), FEATURES(0)), INSTRUCTION(SQRTPDRegReg, sqrtpd, - BINARY(VEX_L___, VEX_vNONE, PREFIX_66, REX__, ESCAPE_0F__, 0x51, 0, ModRM_RM__, Immediate_0), - PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), - FEATURES(0)), -INSTRUCTION(VSQRTPDRegReg, vsqrtpd, - BINARY(VEX_L128, VEX_vNONE, PREFIX_66, REX__, ESCAPE_0F__, 0x51, 0, ModRM_RM__, Immediate_0), + BINARY(VEX_L128, VEX_vNONE, PREFIX_66, REX_W, ESCAPE_0F__, 0x51, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SIMDSingleSource), - FEATURES(0)), + FEATURES(X86FeatureProp_MinTargetSupported | + X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX | + X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | + X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | + X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F)), INSTRUCTION(VSQRTPDRegMem, vsqrtpd, - BINARY(VEX_L128, VEX_vNONE, PREFIX_66, REX__, ESCAPE_0F__, 0x51, 0, ModRM_RM__, Immediate_0), + BINARY(VEX_L128, VEX_vNONE, PREFIX_66, REX_W, ESCAPE_0F__, 0x51, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget), PROPERTY1(IA32OpProp1_XMMTarget | IA32OpProp1_SourceIsMemRef | IA32OpProp1_SIMDSingleSource), - FEATURES(0)), + FEATURES(X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX | + X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | + X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | + X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F)), INSTRUCTION(SQRTSSRegReg, sqrtss, BINARY(VEX_L128, VEX_vReg_, PREFIX_F3, REX__, ESCAPE_0F__, 0x51, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_SingleFP), diff --git a/fvtest/compilerunittest/x/BinaryEncoder.cpp b/fvtest/compilerunittest/x/BinaryEncoder.cpp index aa6bd1e146..5b5d56aced 100644 --- a/fvtest/compilerunittest/x/BinaryEncoder.cpp +++ b/fvtest/compilerunittest/x/BinaryEncoder.cpp @@ -120,8 +120,19 @@ INSTANTIATE_TEST_CASE_P(AVXSimdTest, XRegRegEncEncodingTest, ::testing::ValuesIn std::make_tuple(TR::InstOpCode::PADDBRegReg, TR::RealRegister::zmm1, TR::RealRegister::zmm2, OMR::X86::EVEX_L512, "62f17548fcca"), std::make_tuple(TR::InstOpCode::PADDBRegReg, TR::RealRegister::zmm2, TR::RealRegister::zmm1, OMR::X86::EVEX_L512, "62f16d48fcd1"), std::make_tuple(TR::InstOpCode::PADDBRegReg, TR::RealRegister::zmm1, TR::RealRegister::zmm1, OMR::X86::EVEX_L512, "62f17548fcc9"), - std::make_tuple(TR::InstOpCode::VSQRTPDRegReg, TR::RealRegister::xmm0, TR::RealRegister::xmm1, OMR::X86::VEX_L128, "c5f951c1"), - std::make_tuple(TR::InstOpCode::VSQRTPDRegReg, TR::RealRegister::xmm9, TR::RealRegister::xmm4, OMR::X86::VEX_L128, "c57951cc") + std::make_tuple(TR::InstOpCode::SQRTPDRegReg, TR::RealRegister::xmm0, TR::RealRegister::xmm1, OMR::X86::VEX_L128, "c4e1f951c1"), + std::make_tuple(TR::InstOpCode::SQRTPDRegReg, TR::RealRegister::xmm9, TR::RealRegister::xmm4, OMR::X86::VEX_L128, "c461f951cc"), + std::make_tuple(TR::InstOpCode::SQRTPDRegReg, TR::RealRegister::xmm0, TR::RealRegister::xmm1, OMR::X86::Legacy, "66480f51c1"), + std::make_tuple(TR::InstOpCode::SQRTPDRegReg, TR::RealRegister::xmm9, TR::RealRegister::xmm4, OMR::X86::Legacy, "664c0f51cc"), + std::make_tuple(TR::InstOpCode::SQRTPDRegReg, TR::RealRegister::xmm0, TR::RealRegister::xmm1, OMR::X86::EVEX_L128, "62f1fd0851c1"), + std::make_tuple(TR::InstOpCode::SQRTPDRegReg, TR::RealRegister::xmm0, TR::RealRegister::xmm1, OMR::X86::EVEX_L256, "62f1fd2851c1"), + std::make_tuple(TR::InstOpCode::SQRTPDRegReg, TR::RealRegister::xmm0, TR::RealRegister::xmm1, OMR::X86::EVEX_L512, "62f1fd4851c1"), + std::make_tuple(TR::InstOpCode::SQRTPSRegReg, TR::RealRegister::xmm9, TR::RealRegister::xmm4, OMR::X86::Legacy, "440f51cc"), + std::make_tuple(TR::InstOpCode::SQRTPSRegReg, TR::RealRegister::xmm0, TR::RealRegister::xmm1, OMR::X86::VEX_L128, "c5f851c1"), + std::make_tuple(TR::InstOpCode::SQRTPSRegReg, TR::RealRegister::xmm0, TR::RealRegister::xmm1, OMR::X86::VEX_L256, "c5fc51c1"), + std::make_tuple(TR::InstOpCode::SQRTPSRegReg, TR::RealRegister::xmm0, TR::RealRegister::xmm1, OMR::X86::EVEX_L128, "62f17c0851c1"), + std::make_tuple(TR::InstOpCode::SQRTPSRegReg, TR::RealRegister::xmm0, TR::RealRegister::xmm1, OMR::X86::EVEX_L256, "62f17c2851c1"), + std::make_tuple(TR::InstOpCode::SQRTPSRegReg, TR::RealRegister::xmm0, TR::RealRegister::xmm1, OMR::X86::EVEX_L512, "62f17c4851c1") ))); INSTANTIATE_TEST_CASE_P(AVXSimdRegRegVex128Test, XRegRegEncEncodingTest, ::testing::ValuesIn(*TRTest::MakeVector>( @@ -461,10 +472,7 @@ INSTANTIATE_TEST_CASE_P(Branch, XRegRegEncodingTest, ::testing::ValuesIn(*TRTest std::make_tuple(TR::InstOpCode::SBB8RegReg, TR::RealRegister::ecx, TR::RealRegister::eax, "481bc8"), std::make_tuple(TR::InstOpCode::SHRD4RegRegCL, TR::RealRegister::eax, TR::RealRegister::ecx, "0fadc8"), - std::make_tuple(TR::InstOpCode::SHRD4RegRegCL, TR::RealRegister::ecx, TR::RealRegister::eax, "0fadc1"), - - std::make_tuple(TR::InstOpCode::SQRTPDRegReg, TR::RealRegister::xmm0, TR::RealRegister::xmm1, "660f51c1"), - std::make_tuple(TR::InstOpCode::SQRTPDRegReg, TR::RealRegister::xmm9, TR::RealRegister::xmm4, "66440f51cc") + std::make_tuple(TR::InstOpCode::SHRD4RegRegCL, TR::RealRegister::ecx, TR::RealRegister::eax, "0fadc1") ))); class XRegMemEncEncodingTest : public TRTest::BinaryEncoderTest<>, public ::testing::WithParamInterface> {}; @@ -488,6 +496,10 @@ INSTANTIATE_TEST_CASE_P(X86RegMemEnc, XRegMemEncEncodingTest, ::testing::ValuesI std::make_tuple(TR::InstOpCode::MOVDQURegMem, TR::RealRegister::xmm1, TR::RealRegister::ecx, 0x0, OMR::X86::EVEX_L128, "62f17e086f09"), std::make_tuple(TR::InstOpCode::MOVDQURegMem, TR::RealRegister::xmm10, TR::RealRegister::eax, 0x0, OMR::X86::EVEX_L256, "62717e286f10"), std::make_tuple(TR::InstOpCode::MOVDQURegMem, TR::RealRegister::xmm10, TR::RealRegister::eax, 0x0, OMR::X86::EVEX_L512, "62717e486f10"), - std::make_tuple(TR::InstOpCode::VSQRTPDRegMem, TR::RealRegister::xmm1, TR::RealRegister::ecx, 0x8, OMR::X86::VEX_L128, "c5f9514908"), - std::make_tuple(TR::InstOpCode::VSQRTPDRegMem, TR::RealRegister::xmm10, TR::RealRegister::eax, 0x0, OMR::X86::VEX_L128, "c5795110") + std::make_tuple(TR::InstOpCode::VSQRTPDRegMem, TR::RealRegister::xmm1, TR::RealRegister::ecx, 0x8, OMR::X86::Legacy, "66480f514908"), + std::make_tuple(TR::InstOpCode::VSQRTPDRegMem, TR::RealRegister::xmm1, TR::RealRegister::ecx, 0x8, OMR::X86::VEX_L128, "c4e1f9514908"), + std::make_tuple(TR::InstOpCode::VSQRTPDRegMem, TR::RealRegister::xmm10, TR::RealRegister::eax, 0x0, OMR::X86::VEX_L256, "c461fd5110"), + std::make_tuple(TR::InstOpCode::VSQRTPDRegMem, TR::RealRegister::xmm10, TR::RealRegister::eax, 0x0, OMR::X86::EVEX_L128, "6271fd085110"), + std::make_tuple(TR::InstOpCode::VSQRTPDRegMem, TR::RealRegister::xmm10, TR::RealRegister::eax, 0x0, OMR::X86::EVEX_L256, "6271fd285110"), + std::make_tuple(TR::InstOpCode::VSQRTPDRegMem, TR::RealRegister::xmm10, TR::RealRegister::eax, 0x0, OMR::X86::EVEX_L512, "6271fd485110") ))); \ No newline at end of file From baae99727fae5ad56d6acac3d70926bce1b7d86d Mon Sep 17 00:00:00 2001 From: BradleyWood Date: Thu, 30 Jun 2022 13:28:15 -0400 Subject: [PATCH 03/13] Support unary ops in getNativeSIMDOpcode() --- compiler/x/codegen/OMRTreeEvaluator.cpp | 80 ++++++++++++++++++++----- 1 file changed, 64 insertions(+), 16 deletions(-) diff --git a/compiler/x/codegen/OMRTreeEvaluator.cpp b/compiler/x/codegen/OMRTreeEvaluator.cpp index 437460bc62..a29e30ef88 100644 --- a/compiler/x/codegen/OMRTreeEvaluator.cpp +++ b/compiler/x/codegen/OMRTreeEvaluator.cpp @@ -4028,9 +4028,9 @@ OMR::X86::TreeEvaluator::ibyteswapEvaluator(TR::Node *node, TR::CodeGenerator *c return target; } -enum BinaryArithmeticOps : uint32_t +enum ArithmeticOps : uint32_t { - BinaryArithmeticInvalid, + ArithmeticInvalid, BinaryArithmeticAdd, BinaryArithmeticSub, BinaryArithmeticMul, @@ -4038,7 +4038,12 @@ enum BinaryArithmeticOps : uint32_t BinaryArithmeticAnd, BinaryArithmeticOr, BinaryArithmeticXor, - NumBinaryArithmeticOps + NumBinaryArithmeticOps, + UnaryArithmeticMin, + UnaryArithmeticMax, + UnaryArithmeticAbs, + LastOp, + NumUnaryArithmeticOps = LastOp - NumBinaryArithmeticOps + 1 }; static const TR::InstOpCode::Mnemonic BinaryArithmeticOpCodesForReg[TR::NumOMRTypes][NumBinaryArithmeticOps] = @@ -4095,6 +4100,28 @@ static const TR::InstOpCode::Mnemonic VectorBinaryArithmeticOpCodesForMem[TR::Nu { TR::InstOpCode::bad, TR::InstOpCode::ADDPDRegMem, TR::InstOpCode::SUBPDRegMem, TR::InstOpCode::MULPDRegMem, TR::InstOpCode::DIVPDRegMem, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Double }; +static const TR::InstOpCode::Mnemonic VectorUnaryArithmeticOpCodesForReg[TR::NumVectorElementTypes][NumUnaryArithmeticOps] = + { + // Invalid, min, max, abs, + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int8 + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int16 + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int32 + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int64 + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Float + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Double + }; + + +static const TR::InstOpCode::Mnemonic VectorUnaryArithmeticOpCodesForMem[TR::NumVectorElementTypes][NumUnaryArithmeticOps] = + { + // Invalid, min, max, abs, + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int8 + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int16 + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int32 + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int64 + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Float + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Double + }; static const TR::ILOpCodes MemoryLoadOpCodes[TR::NumOMRTypes] = { @@ -4111,32 +4138,42 @@ static const TR::ILOpCodes MemoryLoadOpCodes[TR::NumOMRTypes] = TR::InstOpCode OMR::X86::TreeEvaluator::getNativeSIMDOpcode(TR::ILOpCodes opcode, TR::DataType type, bool memForm) { - BinaryArithmeticOps arithmetic; + ArithmeticOps binaryOp = ArithmeticInvalid; + ArithmeticOps unaryOp = ArithmeticInvalid; if (OMR::ILOpCode::isVectorOpCode(opcode)) { switch (OMR::ILOpCode::getVectorOperation(opcode)) { case TR::vadd: - arithmetic = BinaryArithmeticAdd; + binaryOp = BinaryArithmeticAdd; break; case TR::vsub: - arithmetic = BinaryArithmeticSub; + binaryOp = BinaryArithmeticSub; break; case TR::vmul: - arithmetic = BinaryArithmeticMul; + binaryOp = BinaryArithmeticMul; break; case TR::vdiv: - arithmetic = BinaryArithmeticDiv; + binaryOp = BinaryArithmeticDiv; break; case TR::vand: - arithmetic = BinaryArithmeticAnd; + binaryOp = BinaryArithmeticAnd; break; case TR::vor: - arithmetic = BinaryArithmeticOr; + binaryOp = BinaryArithmeticOr; break; case TR::vxor: - arithmetic = BinaryArithmeticXor; + binaryOp = BinaryArithmeticXor; + break; + case TR::vmin: + unaryOp = UnaryArithmeticMin; + break; + case TR::vmax: + unaryOp = UnaryArithmeticMax; + break; + case TR::vabs: + unaryOp = UnaryArithmeticAbs; break; default: return TR::InstOpCode::bad; @@ -4148,14 +4185,25 @@ TR::InstOpCode OMR::X86::TreeEvaluator::getNativeSIMDOpcode(TR::ILOpCodes opcode return TR::InstOpCode::bad; } - TR::InstOpCode::Mnemonic memOpcode = VectorBinaryArithmeticOpCodesForMem[type.getVectorElementType() - 1][arithmetic]; - TR::InstOpCode::Mnemonic regOpcode = VectorBinaryArithmeticOpCodesForReg[type.getVectorElementType() - 1][arithmetic]; + TR::InstOpCode::Mnemonic memOpcode; + TR::InstOpCode::Mnemonic regOpcode; + + if (binaryOp != ArithmeticInvalid) + { + memOpcode = VectorBinaryArithmeticOpCodesForMem[type.getVectorElementType() - 1][binaryOp]; + regOpcode = VectorBinaryArithmeticOpCodesForReg[type.getVectorElementType() - 1][binaryOp]; + } + else + { + memOpcode = VectorUnaryArithmeticOpCodesForMem[type.getVectorElementType() - 1][unaryOp - NumBinaryArithmeticOps]; + regOpcode = VectorUnaryArithmeticOpCodesForReg[type.getVectorElementType() - 1][unaryOp - NumBinaryArithmeticOps]; + } if (memOpcode == TR::InstOpCode::bad) - TR_ASSERT_FATAL(regOpcode == TR::InstOpCode::bad, "Missing mem-source opcode for vector operation"); + TR_ASSERT_FATAL(regOpcode == TR::InstOpCode::bad, "Missing mem-source opcode for vector operation"); if (regOpcode == TR::InstOpCode::bad) - TR_ASSERT_FATAL(memOpcode == TR::InstOpCode::bad, "Missing reg-source opcode for vector operation"); + TR_ASSERT_FATAL(memOpcode == TR::InstOpCode::bad, "Missing reg-source opcode for vector operation"); return memForm ? memOpcode : regOpcode; } @@ -4227,7 +4275,7 @@ TR::Register* OMR::X86::TreeEvaluator::floatingPointBinaryArithmeticEvaluator(TR { TR::DataType type = node->getDataType(); TR::ILOpCodes opcode = node->getOpCodeValue(); - BinaryArithmeticOps arithmetic; + ArithmeticOps arithmetic; switch (opcode) { From 28a9fb658b1a91a6ddb0315f34e6cc96796d8d6c Mon Sep 17 00:00:00 2001 From: BradleyWood Date: Thu, 30 Jun 2022 14:26:46 -0400 Subject: [PATCH 04/13] Add x86 opcodes for vabs, vmin, vmax --- compiler/x/codegen/X86Ops.ins | 154 ++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) diff --git a/compiler/x/codegen/X86Ops.ins b/compiler/x/codegen/X86Ops.ins index f519a81d81..5305436444 100644 --- a/compiler/x/codegen/X86Ops.ins +++ b/compiler/x/codegen/X86Ops.ins @@ -3402,6 +3402,44 @@ INSTRUCTION(PMULLDRegMem, pmulld, X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) ), +INSTRUCTION(PABSBRegReg, pabsb, + BINARY(VEX_L128, VEX_vNONE, PREFIX_66, REX__, ESCAPE_0F38, 0x1C, 0, ModRM_RM__, Immediate_0), + PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SIMDSingleSource), + FEATURES(X86FeatureProp_SSE3Supported | + X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | + X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX128RequiresAVX512BW | + X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | X86FeatureProp_EVEX256RequiresAVX512BW | + X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512BW) + ), +INSTRUCTION(PABSWRegReg, pabsw, + BINARY(VEX_L128, VEX_vNONE, PREFIX_66, REX__, ESCAPE_0F38, 0x1D, 0, ModRM_RM__, Immediate_0), + PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SIMDSingleSource), + FEATURES(X86FeatureProp_SSE3Supported | + X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | + X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX128RequiresAVX512BW | + X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | X86FeatureProp_EVEX256RequiresAVX512BW | + X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512BW) + ), +INSTRUCTION(PABSDRegReg, pabsd, + BINARY(VEX_L128, VEX_vNONE, PREFIX_66, REX__, ESCAPE_0F38, 0x1E, 0, ModRM_RM__, Immediate_0), + PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SIMDSingleSource), + FEATURES(X86FeatureProp_SSE3Supported | + X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | + X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | + X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | + X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) + ), +INSTRUCTION(PABSQRegReg, pabsq, + BINARY(VEX_L128, VEX_vNONE, PREFIX_66, REX_W, ESCAPE_0F38, 0x1F, 0, ModRM_RM__, Immediate_0), + PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SIMDSingleSource), + FEATURES(X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | + X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | + X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) + ), INSTRUCTION(PADDBRegReg, paddb, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F__, 0xfc, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), @@ -3672,6 +3710,122 @@ INSTRUCTION(PSRLDQRegImm1, psrldq, PROPERTY0(IA32OpProp_TargetRegisterInModRM | IA32OpProp_ByteImmediate | IA32OpProp_ModifiesTarget | IA32OpProp_UsesTarget), PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), FEATURES(0)), +INSTRUCTION(PMINSBRegReg, pminsb, + BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F38, 0x38, 0, ModRM_RM__, Immediate_0), + PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + FEATURES(X86FeatureProp_SSE4_1Supported | + X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | + X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX128RequiresAVX512BW | + X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | X86FeatureProp_EVEX256RequiresAVX512BW | + X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512BW) + ), +INSTRUCTION(PMINSWRegReg, pminsw, + BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F__, 0xEA, 0, ModRM_RM__, Immediate_0), + PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + FEATURES(X86FeatureProp_MinTargetSupported | + X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | + X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX128RequiresAVX512BW | + X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | X86FeatureProp_EVEX256RequiresAVX512BW | + X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512BW) + ), +INSTRUCTION(PMINSDRegReg, pminsd, + BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F38, 0x39, 0, ModRM_RM__, Immediate_0), + PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + FEATURES(X86FeatureProp_SSE4_1Supported | + X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | + X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | + X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | + X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) + ), +INSTRUCTION(PMINSQRegReg, pminsq, + BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX_W, ESCAPE_0F38, 0x39, 0, ModRM_RM__, Immediate_0), + PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + FEATURES(X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | + X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | + X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) + ), +INSTRUCTION(MINPSRegReg, minps, + BINARY(VEX_L128, VEX_vReg_, PREFIX___, REX__, ESCAPE_0F__, 0x5D, 0, ModRM_RM__, Immediate_0), + PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + FEATURES(X86FeatureProp_MinTargetSupported | + X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX | + X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | + X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | + X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) + ), +INSTRUCTION(MINPDRegReg, minpd, + BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX_W, ESCAPE_0F__, 0x5D, 0, ModRM_RM__, Immediate_0), + PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + FEATURES(X86FeatureProp_MinTargetSupported | + X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX | + X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | + X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | + X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) + ), +INSTRUCTION(PMAXSBRegReg, pmaxsb, + BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F38, 0x3C, 0, ModRM_RM__, Immediate_0), + PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + FEATURES(X86FeatureProp_SSE4_1Supported | + X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | + X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX128RequiresAVX512BW | + X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | X86FeatureProp_EVEX256RequiresAVX512BW | + X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512BW) + ), +INSTRUCTION(PMAXSWRegReg, pmaxsw, + BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F__, 0xEE, 0, ModRM_RM__, Immediate_0), + PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + FEATURES(X86FeatureProp_MinTargetSupported | + X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | + X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX128RequiresAVX512BW | + X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | X86FeatureProp_EVEX256RequiresAVX512BW | + X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512BW) + ), +INSTRUCTION(PMAXSDRegReg, pmaxsd, + BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F38, 0x3D, 0, ModRM_RM__, Immediate_0), + PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + FEATURES(X86FeatureProp_SSE4_1Supported | + X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | + X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | + X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | + X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) + ), +INSTRUCTION(PMAXSQRegReg, pmaxsq, + BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX_W, ESCAPE_0F38, 0x3D, 0, ModRM_RM__, Immediate_0), + PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + FEATURES(X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | + X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | + X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) + ), +INSTRUCTION(MAXPSRegReg, maxps, + BINARY(VEX_L128, VEX_vReg_, PREFIX___, REX__, ESCAPE_0F__, 0x5F, 0, ModRM_RM__, Immediate_0), + PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + FEATURES(X86FeatureProp_MinTargetSupported | + X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX | + X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | + X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | + X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) + ), +INSTRUCTION(MAXPDRegReg, maxpd, + BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX_W, ESCAPE_0F__, 0x5F, 0, ModRM_RM__, Immediate_0), + PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + FEATURES(X86FeatureProp_MinTargetSupported | + X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX | + X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | + X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | + X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) + ), INSTRUCTION(PMOVZXxmm18Reg, pmovzx, BINARY(VEX_L128, VEX_vNONE, PREFIX_66, REX__, ESCAPE_0F38, 0x30, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_UsesTarget | IA32OpProp_SourceRegisterInModRM), From 8fa0882bb0728886380c1488d6a8509ef089825a Mon Sep 17 00:00:00 2001 From: BradleyWood Date: Tue, 5 Jul 2022 12:15:31 -0400 Subject: [PATCH 05/13] Add x86 binary encoding tests for vmin, vmax Signed-off-by: BradleyWood --- fvtest/compilerunittest/x/BinaryEncoder.cpp | 76 +++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/fvtest/compilerunittest/x/BinaryEncoder.cpp b/fvtest/compilerunittest/x/BinaryEncoder.cpp index 5b5d56aced..3219a14c95 100644 --- a/fvtest/compilerunittest/x/BinaryEncoder.cpp +++ b/fvtest/compilerunittest/x/BinaryEncoder.cpp @@ -101,6 +101,82 @@ TEST_P(XRegRegEncEncodingTest, encode) { ASSERT_EQ(std::get<4>(GetParam()), encodeInstruction(instr)); } +INSTANTIATE_TEST_CASE_P(SIMDMinMaxTest, XRegRegEncEncodingTest, ::testing::ValuesIn(*TRTest::MakeVector>( + /* REX prefix 48 is unnecessary but not illegal */ + /* TODO: Remove it */ + std::make_tuple(TR::InstOpCode::PMINSBRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::Legacy, "660f3838c8"), + std::make_tuple(TR::InstOpCode::PMINSWRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::Legacy, "660feac8"), + std::make_tuple(TR::InstOpCode::PMINSDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::Legacy, "660f3839c8"), + std::make_tuple(TR::InstOpCode::MINPSRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::Legacy, "0f5dc8"), + std::make_tuple(TR::InstOpCode::MINPDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::Legacy, "66480f5dc8"), + std::make_tuple(TR::InstOpCode::PMAXSBRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::Legacy, "660f383cc8"), + std::make_tuple(TR::InstOpCode::PMAXSWRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::Legacy, "660feec8"), + std::make_tuple(TR::InstOpCode::PMAXSDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::Legacy, "660f383dc8"), + std::make_tuple(TR::InstOpCode::MAXPSRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::Legacy, "0f5fc8"), + std::make_tuple(TR::InstOpCode::MAXPDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::Legacy, "66480f5fc8"), + + std::make_tuple(TR::InstOpCode::PMINSBRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L128, "c4e27138c8"), + std::make_tuple(TR::InstOpCode::PMINSWRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L128, "c5f1eac8"), + std::make_tuple(TR::InstOpCode::PMINSDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L128, "c4e27139c8"), + std::make_tuple(TR::InstOpCode::MINPSRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L128, "c5f05dc8"), + std::make_tuple(TR::InstOpCode::MINPDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L128, "c4e1f15dc8"), + std::make_tuple(TR::InstOpCode::PMAXSBRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L128, "c4e2713cc8"), + std::make_tuple(TR::InstOpCode::PMAXSWRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L128, "c5f1eec8"), + std::make_tuple(TR::InstOpCode::PMAXSDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L128, "c4e2713dc8"), + std::make_tuple(TR::InstOpCode::MAXPSRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L128, "c5f05fc8"), + std::make_tuple(TR::InstOpCode::MAXPDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L128, "c4e1f15fc8"), + + std::make_tuple(TR::InstOpCode::PMINSBRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L256, "c4e27538c8"), + std::make_tuple(TR::InstOpCode::PMINSWRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L256, "c5f5eac8"), + std::make_tuple(TR::InstOpCode::PMINSDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L256, "c4e27539c8"), + std::make_tuple(TR::InstOpCode::MINPSRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L256, "c5f45dc8"), + std::make_tuple(TR::InstOpCode::MINPDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L256, "c4e1f55dc8"), + std::make_tuple(TR::InstOpCode::PMAXSBRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L256, "c4e2753cc8"), + std::make_tuple(TR::InstOpCode::PMAXSWRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L256, "c5f5eec8"), + std::make_tuple(TR::InstOpCode::PMAXSDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L256, "c4e2753dc8"), + std::make_tuple(TR::InstOpCode::MAXPSRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L256, "c5f45fc8"), + std::make_tuple(TR::InstOpCode::MAXPDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L256, "c4e1f55fc8"), + + std::make_tuple(TR::InstOpCode::PMINSBRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L128, "62f2750838c8"), + std::make_tuple(TR::InstOpCode::PMINSWRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L128, "62f17508eac8"), + std::make_tuple(TR::InstOpCode::PMINSDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L128, "62f2750839c8"), + std::make_tuple(TR::InstOpCode::PMINSQRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L128, "62f2f50839c8"), + std::make_tuple(TR::InstOpCode::MINPSRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L128, "62f174085dc8"), + std::make_tuple(TR::InstOpCode::MINPDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L128, "62f1f5085dc8"), + std::make_tuple(TR::InstOpCode::PMAXSBRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L128, "62f275083cc8"), + std::make_tuple(TR::InstOpCode::PMAXSWRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L128, "62f17508eec8"), + std::make_tuple(TR::InstOpCode::PMAXSDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L128, "62f275083dc8"), + std::make_tuple(TR::InstOpCode::PMAXSQRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L128, "62f2f5083dc8"), + std::make_tuple(TR::InstOpCode::MAXPSRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L128, "62f174085fc8"), + std::make_tuple(TR::InstOpCode::MAXPDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L128, "62f1f5085fc8"), + + std::make_tuple(TR::InstOpCode::PMINSBRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L256, "62f2752838c8"), + std::make_tuple(TR::InstOpCode::PMINSWRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L256, "62f17528eac8"), + std::make_tuple(TR::InstOpCode::PMINSDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L256, "62f2752839c8"), + std::make_tuple(TR::InstOpCode::PMINSQRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L256, "62f2f52839c8"), + std::make_tuple(TR::InstOpCode::MINPSRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L256, "62f174285dc8"), + std::make_tuple(TR::InstOpCode::MINPDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L256, "62f1f5285dc8"), + std::make_tuple(TR::InstOpCode::PMAXSBRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L256, "62f275283cc8"), + std::make_tuple(TR::InstOpCode::PMAXSWRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L256, "62f17528eec8"), + std::make_tuple(TR::InstOpCode::PMAXSDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L256, "62f275283dc8"), + std::make_tuple(TR::InstOpCode::PMAXSQRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L256, "62f2f5283dc8"), + std::make_tuple(TR::InstOpCode::MAXPSRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L256, "62f174285fc8"), + std::make_tuple(TR::InstOpCode::MAXPDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L256, "62f1f5285fc8"), + + std::make_tuple(TR::InstOpCode::PMINSBRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L512, "62f2754838c8"), + std::make_tuple(TR::InstOpCode::PMINSWRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L512, "62f17548eac8"), + std::make_tuple(TR::InstOpCode::PMINSDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L512, "62f2754839c8"), + std::make_tuple(TR::InstOpCode::PMINSQRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L512, "62f2f54839c8"), + std::make_tuple(TR::InstOpCode::MINPSRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L512, "62f174485dc8"), + std::make_tuple(TR::InstOpCode::MINPDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L512, "62f1f5485dc8"), + std::make_tuple(TR::InstOpCode::PMAXSBRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L512, "62f275483cc8"), + std::make_tuple(TR::InstOpCode::PMAXSWRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L512, "62f17548eec8"), + std::make_tuple(TR::InstOpCode::PMAXSDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L512, "62f275483dc8"), + std::make_tuple(TR::InstOpCode::PMAXSQRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L512, "62f2f5483dc8"), + std::make_tuple(TR::InstOpCode::MAXPSRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L512, "62f174485fc8"), + std::make_tuple(TR::InstOpCode::MAXPDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L512, "62f1f5485fc8") +))); + INSTANTIATE_TEST_CASE_P(AVXSimdTest, XRegRegEncEncodingTest, ::testing::ValuesIn(*TRTest::MakeVector>( std::make_tuple(TR::InstOpCode::PADDBRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm2, OMR::X86::Legacy, "660ffcca"), std::make_tuple(TR::InstOpCode::PADDBRegReg, TR::RealRegister::xmm2, TR::RealRegister::xmm1, OMR::X86::Legacy, "660ffcd1"), From 0190b45673cbc8c06ec3f08b14dc2974a61a0f8e Mon Sep 17 00:00:00 2001 From: BradleyWood Date: Tue, 5 Jul 2022 13:30:34 -0400 Subject: [PATCH 06/13] Add x86 binary encoding tests for vabs Signed-off-by: BradleyWood --- fvtest/compilerunittest/x/BinaryEncoder.cpp | 29 +++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/fvtest/compilerunittest/x/BinaryEncoder.cpp b/fvtest/compilerunittest/x/BinaryEncoder.cpp index 3219a14c95..9360ab8082 100644 --- a/fvtest/compilerunittest/x/BinaryEncoder.cpp +++ b/fvtest/compilerunittest/x/BinaryEncoder.cpp @@ -177,6 +177,35 @@ INSTANTIATE_TEST_CASE_P(SIMDMinMaxTest, XRegRegEncEncodingTest, ::testing::Value std::make_tuple(TR::InstOpCode::MAXPDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L512, "62f1f5485fc8") ))); +INSTANTIATE_TEST_CASE_P(SIMDAbsTest, XRegRegEncEncodingTest, ::testing::ValuesIn(*TRTest::MakeVector>( + std::make_tuple(TR::InstOpCode::PABSBRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::Legacy, "660f381cc8"), + std::make_tuple(TR::InstOpCode::PABSWRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::Legacy, "660f381dc8"), + std::make_tuple(TR::InstOpCode::PABSDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::Legacy, "660f381ec8"), + + std::make_tuple(TR::InstOpCode::PABSBRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L128, "c4e2791cc8"), + std::make_tuple(TR::InstOpCode::PABSWRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L128, "c4e2791dc8"), + std::make_tuple(TR::InstOpCode::PABSDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L128, "c4e2791ec8"), + + std::make_tuple(TR::InstOpCode::PABSBRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L256, "c4e27d1cc8"), + std::make_tuple(TR::InstOpCode::PABSWRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L256, "c4e27d1dc8"), + std::make_tuple(TR::InstOpCode::PABSDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::VEX_L256, "c4e27d1ec8"), + + std::make_tuple(TR::InstOpCode::PABSBRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L128, "62f27d081cc8"), + std::make_tuple(TR::InstOpCode::PABSWRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L128, "62f27d081dc8"), + std::make_tuple(TR::InstOpCode::PABSDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L128, "62f27d081ec8"), + std::make_tuple(TR::InstOpCode::PABSQRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L128, "62f2fd081fc8"), + + std::make_tuple(TR::InstOpCode::PABSBRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L256, "62f27d281cc8"), + std::make_tuple(TR::InstOpCode::PABSWRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L256, "62f27d281dc8"), + std::make_tuple(TR::InstOpCode::PABSDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L256, "62f27d281ec8"), + std::make_tuple(TR::InstOpCode::PABSQRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L256, "62f2fd281fc8"), + + std::make_tuple(TR::InstOpCode::PABSBRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L512, "62f27d481cc8"), + std::make_tuple(TR::InstOpCode::PABSWRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L512, "62f27d481dc8"), + std::make_tuple(TR::InstOpCode::PABSDRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L512, "62f27d481ec8"), + std::make_tuple(TR::InstOpCode::PABSQRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm0, OMR::X86::EVEX_L512, "62f2fd481fc8") +))); + INSTANTIATE_TEST_CASE_P(AVXSimdTest, XRegRegEncEncodingTest, ::testing::ValuesIn(*TRTest::MakeVector>( std::make_tuple(TR::InstOpCode::PADDBRegReg, TR::RealRegister::xmm1, TR::RealRegister::xmm2, OMR::X86::Legacy, "660ffcca"), std::make_tuple(TR::InstOpCode::PADDBRegReg, TR::RealRegister::xmm2, TR::RealRegister::xmm1, OMR::X86::Legacy, "660ffcd1"), From 47098b0e69f82b28aa1b7fd2e3416f16b142462e Mon Sep 17 00:00:00 2001 From: BradleyWood Date: Tue, 5 Jul 2022 13:32:41 -0400 Subject: [PATCH 07/13] Implement vabs, vmin, vmax IL opcodes on x86 --- compiler/x/amd64/codegen/OMRTreeEvaluator.cpp | 6 +-- compiler/x/codegen/OMRTreeEvaluator.cpp | 18 +++---- compiler/x/codegen/UnaryEvaluator.cpp | 50 ++++++++----------- compiler/x/i386/codegen/OMRTreeEvaluator.cpp | 6 +-- 4 files changed, 34 insertions(+), 46 deletions(-) diff --git a/compiler/x/amd64/codegen/OMRTreeEvaluator.cpp b/compiler/x/amd64/codegen/OMRTreeEvaluator.cpp index c83ad680c3..5bac7ce122 100644 --- a/compiler/x/amd64/codegen/OMRTreeEvaluator.cpp +++ b/compiler/x/amd64/codegen/OMRTreeEvaluator.cpp @@ -1472,19 +1472,19 @@ OMR::X86::AMD64::TreeEvaluator::vfmaEvaluator(TR::Node *node, TR::CodeGenerator TR::Register* OMR::X86::AMD64::TreeEvaluator::vabsEvaluator(TR::Node *node, TR::CodeGenerator *cg) { - return TR::TreeEvaluator::unImpOpEvaluator(node, cg); + return TR::TreeEvaluator::unaryVectorArithmeticEvaluator(node, cg); } TR::Register* OMR::X86::AMD64::TreeEvaluator::vminEvaluator(TR::Node *node, TR::CodeGenerator *cg) { - return TR::TreeEvaluator::unImpOpEvaluator(node, cg); + return TR::TreeEvaluator::vectorBinaryArithmeticEvaluator(node, cg); } TR::Register* OMR::X86::AMD64::TreeEvaluator::vmaxEvaluator(TR::Node *node, TR::CodeGenerator *cg) { - return TR::TreeEvaluator::unImpOpEvaluator(node, cg); + return TR::TreeEvaluator::vectorBinaryArithmeticEvaluator(node, cg); } TR::Register* diff --git a/compiler/x/codegen/OMRTreeEvaluator.cpp b/compiler/x/codegen/OMRTreeEvaluator.cpp index a29e30ef88..b5866b4420 100644 --- a/compiler/x/codegen/OMRTreeEvaluator.cpp +++ b/compiler/x/codegen/OMRTreeEvaluator.cpp @@ -4103,12 +4103,12 @@ static const TR::InstOpCode::Mnemonic VectorBinaryArithmeticOpCodesForMem[TR::Nu static const TR::InstOpCode::Mnemonic VectorUnaryArithmeticOpCodesForReg[TR::NumVectorElementTypes][NumUnaryArithmeticOps] = { // Invalid, min, max, abs, - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int8 - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int16 - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int32 - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int64 - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Float - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Double + { TR::InstOpCode::bad, TR::InstOpCode::PMINSBRegReg, TR::InstOpCode::PMAXSBRegReg, TR::InstOpCode::PABSBRegReg }, // Int8 + { TR::InstOpCode::bad, TR::InstOpCode::PMINSWRegReg, TR::InstOpCode::PMAXSWRegReg, TR::InstOpCode::PABSWRegReg }, // Int16 + { TR::InstOpCode::bad, TR::InstOpCode::PMINSDRegReg, TR::InstOpCode::PMAXSDRegReg, TR::InstOpCode::PABSDRegReg }, // Int32 + { TR::InstOpCode::bad, TR::InstOpCode::PMINSQRegReg, TR::InstOpCode::PMAXSQRegReg, TR::InstOpCode::PABSQRegReg }, // Int64 + { TR::InstOpCode::bad, TR::InstOpCode::MINPSRegReg, TR::InstOpCode::MAXPSRegReg, TR::InstOpCode::bad }, // Float + { TR::InstOpCode::bad, TR::InstOpCode::MINPDRegReg, TR::InstOpCode::MAXPDRegReg, TR::InstOpCode::bad }, // Double }; @@ -4199,12 +4199,6 @@ TR::InstOpCode OMR::X86::TreeEvaluator::getNativeSIMDOpcode(TR::ILOpCodes opcode regOpcode = VectorUnaryArithmeticOpCodesForReg[type.getVectorElementType() - 1][unaryOp - NumBinaryArithmeticOps]; } - if (memOpcode == TR::InstOpCode::bad) - TR_ASSERT_FATAL(regOpcode == TR::InstOpCode::bad, "Missing mem-source opcode for vector operation"); - - if (regOpcode == TR::InstOpCode::bad) - TR_ASSERT_FATAL(memOpcode == TR::InstOpCode::bad, "Missing reg-source opcode for vector operation"); - return memForm ? memOpcode : regOpcode; } diff --git a/compiler/x/codegen/UnaryEvaluator.cpp b/compiler/x/codegen/UnaryEvaluator.cpp index b6fe3a3aaf..bfb0e466ff 100644 --- a/compiler/x/codegen/UnaryEvaluator.cpp +++ b/compiler/x/codegen/UnaryEvaluator.cpp @@ -42,40 +42,34 @@ TR::Register *OMR::X86::TreeEvaluator::unaryVectorArithmeticEvaluator(TR::Node * TR::Register *resultReg = cg->allocateRegister(TR_VRF); bool supportsAvx = cg->comp()->target().cpu.supportsAVX(); - TR::InstOpCode::Mnemonic regRegOpcode; - TR::InstOpCode::Mnemonic regMemOpcode; + TR::InstOpCode regRegOpcode = TR::InstOpCode::bad; + TR::InstOpCode regMemOpcode = TR::InstOpCode::bad; TR::ILOpCode opcode = node->getOpCode(); + TR::DataType type = node->getType(); + OMR::X86::Encoding simdEncoding; - switch (opcode.getVectorOperation()) - { - case TR::vsqrt: - TR_ASSERT_FATAL_WITH_NODE(node, opcode.getVectorResultDataType().getVectorElementType() == TR::Double, - "Only double vsqrt is currently supported"); - - regRegOpcode = OMR::InstOpCode::SQRTPDRegReg; - regMemOpcode = OMR::InstOpCode::VSQRTPDRegMem; - // SSE RegMem instruction requires 16-byte alignment - break; - default: - TR_ASSERT_FATAL_WITH_NODE(node, 0, "Opcode not supported by unaryVectorArithmeticEvaluator"); - break; - } + regMemOpcode = TR::TreeEvaluator::getNativeSIMDOpcode(opcode.getOpCodeValue(), node->getType(), true).getMnemonic(); + node->setRegister(resultReg); - if (valueNode->getRegister() == NULL && valueNode->getReferenceCount() == 1 && regMemOpcode != TR::InstOpCode::bad) - { - TR::MemoryReference *mr = generateX86MemoryReference(valueNode, cg); - generateRegMemInstruction(regMemOpcode, node, resultReg, mr, cg); - mr->decNodeReferenceCounts(cg); - } - else + if (valueNode->getRegister() == NULL && valueNode->getReferenceCount() == 1 && regMemOpcode.getMnemonic() != TR::InstOpCode::bad) { - TR_ASSERT_FATAL(regRegOpcode != TR::InstOpCode::bad, "Illegal opcode for unary operation"); - TR::Register *valueReg = cg->evaluate(valueNode); - generateRegRegInstruction(regRegOpcode, node, resultReg, valueReg, cg); - cg->decReferenceCount(valueNode); + simdEncoding = regMemOpcode.getSIMDEncoding(&cg->comp()->target().cpu, type.getVectorLength()); + + if (simdEncoding != OMR::X86::Encoding::Bad) + { + TR::MemoryReference *mr = generateX86MemoryReference(valueNode, cg); + generateRegMemInstruction(regMemOpcode.getMnemonic(), node, resultReg, mr, cg, simdEncoding); + mr->decNodeReferenceCounts(cg); + return resultReg; + } } - node->setRegister(resultReg); + regRegOpcode = regRegOpcode.getMnemonic() != TR::InstOpCode::bad ? regRegOpcode : TR::TreeEvaluator::getNativeSIMDOpcode(opcode.getOpCodeValue(), node->getType(), false).getMnemonic(); + TR_ASSERT_FATAL_WITH_NODE(node, regRegOpcode.getMnemonic() != TR::InstOpCode::bad, "Opcode not supported by unaryVectorArithmeticEvaluator"); + simdEncoding = regRegOpcode.getSIMDEncoding(&cg->comp()->target().cpu, type.getVectorLength()); + TR::Register *valueReg = cg->evaluate(valueNode); + generateRegRegInstruction(regRegOpcode.getMnemonic(), node, resultReg, valueReg, cg, simdEncoding); + cg->decReferenceCount(valueNode); return resultReg; } diff --git a/compiler/x/i386/codegen/OMRTreeEvaluator.cpp b/compiler/x/i386/codegen/OMRTreeEvaluator.cpp index f10255291f..2331dcc82e 100644 --- a/compiler/x/i386/codegen/OMRTreeEvaluator.cpp +++ b/compiler/x/i386/codegen/OMRTreeEvaluator.cpp @@ -1551,19 +1551,19 @@ OMR::X86::I386::TreeEvaluator::vfmaEvaluator(TR::Node *node, TR::CodeGenerator * TR::Register* OMR::X86::I386::TreeEvaluator::vabsEvaluator(TR::Node *node, TR::CodeGenerator *cg) { - return TR::TreeEvaluator::unImpOpEvaluator(node, cg); + return TR::TreeEvaluator::unaryVectorArithmeticEvaluator(node, cg); } TR::Register* OMR::X86::I386::TreeEvaluator::vminEvaluator(TR::Node *node, TR::CodeGenerator *cg) { - return TR::TreeEvaluator::unImpOpEvaluator(node, cg); + return TR::TreeEvaluator::vectorBinaryArithmeticEvaluator(node, cg); } TR::Register* OMR::X86::I386::TreeEvaluator::vmaxEvaluator(TR::Node *node, TR::CodeGenerator *cg) { - return TR::TreeEvaluator::unImpOpEvaluator(node, cg); + return TR::TreeEvaluator::vectorBinaryArithmeticEvaluator(node, cg); } TR::Register* From 0dacba140c8e18c2c66ad217d1eb8a7a3f660198 Mon Sep 17 00:00:00 2001 From: BradleyWood Date: Tue, 5 Jul 2022 16:52:30 -0400 Subject: [PATCH 08/13] Implement vsqrt for F,D on x86 at all VL Signed-off-by: BradleyWood --- compiler/x/codegen/FPTreeEvaluator.cpp | 2 +- compiler/x/codegen/OMRCodeGenerator.cpp | 2 -- compiler/x/codegen/OMRTreeEvaluator.cpp | 32 ++++++++++++++----------- compiler/x/codegen/UnaryEvaluator.cpp | 2 +- compiler/x/codegen/X86Ops.ins | 17 +++++++++++++ 5 files changed, 37 insertions(+), 18 deletions(-) diff --git a/compiler/x/codegen/FPTreeEvaluator.cpp b/compiler/x/codegen/FPTreeEvaluator.cpp index c3e2b59f7b..91fd4730af 100644 --- a/compiler/x/codegen/FPTreeEvaluator.cpp +++ b/compiler/x/codegen/FPTreeEvaluator.cpp @@ -565,7 +565,7 @@ TR::Register *OMR::X86::TreeEvaluator::dsqrtEvaluator(TR::Node *node, TR::CodeGe TR::Register* OMR::X86::TreeEvaluator::vsqrtEvaluator(TR::Node *node, TR::CodeGenerator *cg) { - TR_ASSERT_FATAL(node->getDataType().getVectorElementType() == TR::Double, "Unsupported datatype for vsqrt opcode"); + TR_ASSERT_FATAL(node->getDataType().getVectorElementType().isFloatingPoint(), "Unsupported datatype for vsqrt opcode"); return TR::TreeEvaluator::unaryVectorArithmeticEvaluator(node, cg); } diff --git a/compiler/x/codegen/OMRCodeGenerator.cpp b/compiler/x/codegen/OMRCodeGenerator.cpp index f78a3a3ea0..f4ba6a27d9 100644 --- a/compiler/x/codegen/OMRCodeGenerator.cpp +++ b/compiler/x/codegen/OMRCodeGenerator.cpp @@ -1036,8 +1036,6 @@ bool OMR::X86::CodeGenerator::getSupportsOpCodeForAutoSIMD(TR::CPU *cpu, TR::ILO return ot.getVectorLength() == TR::VectorLength128; else return false; - case TR::vsqrt: - return (et == TR::Double); /* * GRA does not work with vector registers on 32 bit due to a bug where xmm registers are not being assigned. diff --git a/compiler/x/codegen/OMRTreeEvaluator.cpp b/compiler/x/codegen/OMRTreeEvaluator.cpp index b5866b4420..357046a144 100644 --- a/compiler/x/codegen/OMRTreeEvaluator.cpp +++ b/compiler/x/codegen/OMRTreeEvaluator.cpp @@ -4042,6 +4042,7 @@ enum ArithmeticOps : uint32_t UnaryArithmeticMin, UnaryArithmeticMax, UnaryArithmeticAbs, + UnaryArithmeticSqrt, LastOp, NumUnaryArithmeticOps = LastOp - NumBinaryArithmeticOps + 1 }; @@ -4102,25 +4103,25 @@ static const TR::InstOpCode::Mnemonic VectorBinaryArithmeticOpCodesForMem[TR::Nu static const TR::InstOpCode::Mnemonic VectorUnaryArithmeticOpCodesForReg[TR::NumVectorElementTypes][NumUnaryArithmeticOps] = { - // Invalid, min, max, abs, - { TR::InstOpCode::bad, TR::InstOpCode::PMINSBRegReg, TR::InstOpCode::PMAXSBRegReg, TR::InstOpCode::PABSBRegReg }, // Int8 - { TR::InstOpCode::bad, TR::InstOpCode::PMINSWRegReg, TR::InstOpCode::PMAXSWRegReg, TR::InstOpCode::PABSWRegReg }, // Int16 - { TR::InstOpCode::bad, TR::InstOpCode::PMINSDRegReg, TR::InstOpCode::PMAXSDRegReg, TR::InstOpCode::PABSDRegReg }, // Int32 - { TR::InstOpCode::bad, TR::InstOpCode::PMINSQRegReg, TR::InstOpCode::PMAXSQRegReg, TR::InstOpCode::PABSQRegReg }, // Int64 - { TR::InstOpCode::bad, TR::InstOpCode::MINPSRegReg, TR::InstOpCode::MAXPSRegReg, TR::InstOpCode::bad }, // Float - { TR::InstOpCode::bad, TR::InstOpCode::MINPDRegReg, TR::InstOpCode::MAXPDRegReg, TR::InstOpCode::bad }, // Double + // Invalid, min, max, abs, sqrt + { TR::InstOpCode::bad, TR::InstOpCode::PMINSBRegReg, TR::InstOpCode::PMAXSBRegReg, TR::InstOpCode::PABSBRegReg, TR::InstOpCode::bad }, // Int8 + { TR::InstOpCode::bad, TR::InstOpCode::PMINSWRegReg, TR::InstOpCode::PMAXSWRegReg, TR::InstOpCode::PABSWRegReg, TR::InstOpCode::bad }, // Int16 + { TR::InstOpCode::bad, TR::InstOpCode::PMINSDRegReg, TR::InstOpCode::PMAXSDRegReg, TR::InstOpCode::PABSDRegReg, TR::InstOpCode::bad }, // Int32 + { TR::InstOpCode::bad, TR::InstOpCode::PMINSQRegReg, TR::InstOpCode::PMAXSQRegReg, TR::InstOpCode::PABSQRegReg, TR::InstOpCode::bad }, // Int64 + { TR::InstOpCode::bad, TR::InstOpCode::MINPSRegReg, TR::InstOpCode::MAXPSRegReg, TR::InstOpCode::bad, TR::InstOpCode::SQRTPSRegReg }, // Float + { TR::InstOpCode::bad, TR::InstOpCode::MINPDRegReg, TR::InstOpCode::MAXPDRegReg, TR::InstOpCode::bad, TR::InstOpCode::SQRTPDRegReg }, // Double }; static const TR::InstOpCode::Mnemonic VectorUnaryArithmeticOpCodesForMem[TR::NumVectorElementTypes][NumUnaryArithmeticOps] = { - // Invalid, min, max, abs, - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int8 - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int16 - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int32 - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int64 - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Float - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Double + // Invalid, min, max, abs, sqrt + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int8 + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int16 + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int32 + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int64 + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::VSQRTPSRegMem }, // Float + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::VSQRTPDRegMem }, // Double }; static const TR::ILOpCodes MemoryLoadOpCodes[TR::NumOMRTypes] = @@ -4175,6 +4176,9 @@ TR::InstOpCode OMR::X86::TreeEvaluator::getNativeSIMDOpcode(TR::ILOpCodes opcode case TR::vabs: unaryOp = UnaryArithmeticAbs; break; + case TR::vsqrt: + unaryOp = UnaryArithmeticSqrt; + break; default: return TR::InstOpCode::bad; } diff --git a/compiler/x/codegen/UnaryEvaluator.cpp b/compiler/x/codegen/UnaryEvaluator.cpp index bfb0e466ff..c7ccfe1650 100644 --- a/compiler/x/codegen/UnaryEvaluator.cpp +++ b/compiler/x/codegen/UnaryEvaluator.cpp @@ -41,7 +41,6 @@ TR::Register *OMR::X86::TreeEvaluator::unaryVectorArithmeticEvaluator(TR::Node * TR::Node *valueNode = node->getChild(0); TR::Register *resultReg = cg->allocateRegister(TR_VRF); - bool supportsAvx = cg->comp()->target().cpu.supportsAVX(); TR::InstOpCode regRegOpcode = TR::InstOpCode::bad; TR::InstOpCode regMemOpcode = TR::InstOpCode::bad; TR::ILOpCode opcode = node->getOpCode(); @@ -50,6 +49,7 @@ TR::Register *OMR::X86::TreeEvaluator::unaryVectorArithmeticEvaluator(TR::Node * regMemOpcode = TR::TreeEvaluator::getNativeSIMDOpcode(opcode.getOpCodeValue(), node->getType(), true).getMnemonic(); node->setRegister(resultReg); + TR_ASSERT_FATAL_WITH_NODE(node, opcode.isVectorOpCode(), "unaryVectorArithmeticEvaluator expects a vector opcode"); if (valueNode->getRegister() == NULL && valueNode->getReferenceCount() == 1 && regMemOpcode.getMnemonic() != TR::InstOpCode::bad) { diff --git a/compiler/x/codegen/X86Ops.ins b/compiler/x/codegen/X86Ops.ins index 5305436444..3870ad8c43 100644 --- a/compiler/x/codegen/X86Ops.ins +++ b/compiler/x/codegen/X86Ops.ins @@ -2647,6 +2647,23 @@ INSTRUCTION(VSQRTPDRegMem, vsqrtpd, X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F)), +INSTRUCTION(SQRTPSRegReg, vsqrtps, + BINARY(VEX_L128, VEX_vNONE, PREFIX___, REX__, ESCAPE_0F__, 0x51, 0, ModRM_RM__, Immediate_0), + PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SIMDSingleSource), + FEATURES(X86FeatureProp_MinTargetSupported | + X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX | + X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | + X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | + X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F)), +INSTRUCTION(VSQRTPSRegMem, vsqrtps, + BINARY(VEX_L128, VEX_vNONE, PREFIX___, REX__, ESCAPE_0F__, 0x51, 0, ModRM_RM__, Immediate_0), + PROPERTY0(IA32OpProp_ModifiesTarget), + PROPERTY1(IA32OpProp1_XMMTarget | IA32OpProp1_SourceIsMemRef | IA32OpProp1_SIMDSingleSource), + FEATURES(X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX | + X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | + X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | + X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F)), INSTRUCTION(SQRTSSRegReg, sqrtss, BINARY(VEX_L128, VEX_vReg_, PREFIX_F3, REX__, ESCAPE_0F__, 0x51, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_SingleFP), From 0fea8c03279e94104daaa0f6a2d0bd423b25f04b Mon Sep 17 00:00:00 2001 From: BradleyWood Date: Tue, 5 Jul 2022 17:12:59 -0400 Subject: [PATCH 09/13] Implement vneg on x86 at all VL Signed-off-by: BradleyWood --- compiler/x/codegen/OMRCodeGenerator.cpp | 12 ++++++++++-- compiler/x/codegen/UnaryEvaluator.cpp | 14 +++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/compiler/x/codegen/OMRCodeGenerator.cpp b/compiler/x/codegen/OMRCodeGenerator.cpp index f4ba6a27d9..d2b4386381 100644 --- a/compiler/x/codegen/OMRCodeGenerator.cpp +++ b/compiler/x/codegen/OMRCodeGenerator.cpp @@ -1010,8 +1010,16 @@ bool OMR::X86::CodeGenerator::getSupportsOpCodeForAutoSIMD(TR::CPU *cpu, TR::ILO switch (opcode.getVectorOperation()) { case TR::vneg: - if (ot.getVectorLength() == TR::VectorLength128) - return true; + switch (ot.getVectorLength()) { + case TR::VectorLength128: + return true; + case TR::VectorLength256: + return cpu->supportsFeature(OMR_FEATURE_X86_AVX2); + case TR::VectorLength512: + return cpu->supportsFeature(OMR_FEATURE_X86_AVX512F); + default: + return false; + } case TR::vload: case TR::vloadi: case TR::vstore: diff --git a/compiler/x/codegen/UnaryEvaluator.cpp b/compiler/x/codegen/UnaryEvaluator.cpp index c7ccfe1650..5a62d6eb29 100644 --- a/compiler/x/codegen/UnaryEvaluator.cpp +++ b/compiler/x/codegen/UnaryEvaluator.cpp @@ -136,17 +136,16 @@ TR::Register* OMR::X86::TreeEvaluator::vnegEvaluator(TR::Node *node, TR::CodeGenerator *cg) { TR::DataType type = node->getDataType(); - - TR_ASSERT_FATAL_WITH_NODE(node, type.getVectorLength() == TR::VectorLength128, - "Only 128-bit vectors are supported right now\n"); - TR::Node *valueNode = node->getChild(0); TR::Register *resultReg = cg->allocateRegister(TR_VRF); TR::Register *valueReg = cg->evaluate(valueNode); // -valueReg = 0 - valueReg - generateRegRegInstruction(TR::InstOpCode::PXORRegReg, node, resultReg, resultReg, cg); - TR::InstOpCode::Mnemonic subOpcode; + TR::InstOpCode opcode = TR::InstOpCode::PXORRegReg; + OMR::X86::Encoding pxorEncoding = opcode.getSIMDEncoding(&cg->comp()->target().cpu, type.getVectorLength()); + + generateRegRegInstruction(TR::InstOpCode::PXORRegReg, node, resultReg, resultReg, cg, pxorEncoding); + TR::InstOpCode subOpcode; switch (type.getVectorElementType()) { @@ -173,7 +172,8 @@ OMR::X86::TreeEvaluator::vnegEvaluator(TR::Node *node, TR::CodeGenerator *cg) break; } - generateRegRegInstruction(subOpcode, node, resultReg, valueReg, cg); + OMR::X86::Encoding subEncoding = subOpcode.getSIMDEncoding(&cg->comp()->target().cpu, type.getVectorLength()); + generateRegRegInstruction(subOpcode.getMnemonic(), node, resultReg, valueReg, cg, subEncoding); node->setRegister(resultReg); cg->decReferenceCount(valueNode); From ba992817e9ee76501986be0dcf2bb2112f4ec382 Mon Sep 17 00:00:00 2001 From: BradleyWood Date: Wed, 6 Jul 2022 11:05:22 -0400 Subject: [PATCH 10/13] Implement binary vector tril tests at all VL The vector IL opcodes are are not generated at all vector lengths using test data acquired from their corresponding scalar opcode. A few missing opcodes were added to the function OMR::ILOpCode::convertScalarToVector(...). The function was also simplified to reduce the number of dispatches. Signed-off-by: BradleyWood --- compiler/il/OMRILOps.hpp | 82 +- fvtest/compilertriltest/VectorTest.cpp | 1385 +++++++----------------- 2 files changed, 429 insertions(+), 1038 deletions(-) diff --git a/compiler/il/OMRILOps.hpp b/compiler/il/OMRILOps.hpp index 17e482eb5c..80061d00f4 100644 --- a/compiler/il/OMRILOps.hpp +++ b/compiler/il/OMRILOps.hpp @@ -883,6 +883,22 @@ class ILOpCode return TR::BadILOp; } + static TR::ILOpCodes indirectStoreOpCode(TR::DataType type) + { + switch(type) + { + case TR::Int8: return TR::bstorei; + case TR::Int16: return TR::sstorei; + case TR::Int32: return TR::istorei; + case TR::Int64: return TR::lstorei; + case TR::Address: return TR::astorei; + case TR::Float: return TR::fstorei; + case TR::Double: return TR::dstorei; + default: TR_ASSERT(0, "no load opcode for this datatype"); + } + return TR::BadILOp; + } + static TR::ILOpCodes absOpCode(TR::DataType type) { if (type.isVector()) return createVectorOpCode(TR::vabs, type); @@ -1453,110 +1469,138 @@ class ILOpCode if (!elementType.isVectorElement()) return TR::BadILOp; TR::DataTypes vectorType = TR::DataType::createVectorType(elementType.getDataType(), vectorLength); + TR::VectorOperation vectorOperation; switch (op) { + case TR::fsqrt: + case TR::dsqrt: + vectorOperation = TR::vsqrt; + break; + case TR::imin: + case TR::lmin: + case TR::fmin: + case TR::dmin: + vectorOperation = TR::vmin; + break; + case TR::imax: + case TR::lmax: + case TR::fmax: + case TR::dmax: + vectorOperation = TR::vmax; + break; case TR::bload: case TR::sload: case TR::iload: case TR::lload: case TR::fload: case TR::dload: - return ILOpCode::createVectorOpCode(TR::vload, vectorType); + vectorOperation = TR::vload; + break; case TR::bloadi: case TR::sloadi: case TR::iloadi: case TR::lloadi: case TR::floadi: case TR::dloadi: - return ILOpCode::createVectorOpCode(TR::vloadi, vectorType); + vectorOperation = TR::vloadi; + break; case TR::bstore: case TR::sstore: case TR::istore: case TR::lstore: case TR::fstore: case TR::dstore: - return ILOpCode::createVectorOpCode(TR::vstore, vectorType); + vectorOperation = TR::vstore; + break; case TR::bstorei: case TR::sstorei: case TR::istorei: case TR::lstorei: case TR::fstorei: case TR::dstorei: - return ILOpCode::createVectorOpCode(TR::vstorei, vectorType); - + vectorOperation = TR::vstorei; + break; case TR::badd: case TR::sadd: case TR::iadd: case TR::ladd: case TR::fadd: case TR::dadd: - return ILOpCode::createVectorOpCode(TR::vadd, vectorType); + vectorOperation = TR::vadd; + break; case TR::bsub: case TR::ssub: case TR::isub: case TR::lsub: case TR::fsub: case TR::dsub: - return ILOpCode::createVectorOpCode(TR::vsub, vectorType); + vectorOperation = TR::vsub; + break; case TR::bmul: case TR::smul: case TR::imul: case TR::lmul: case TR::fmul: case TR::dmul: - return ILOpCode::createVectorOpCode(TR::vmul, vectorType); + vectorOperation = TR::vmul; + break; case TR::bdiv: case TR::sdiv: case TR::idiv: case TR::ldiv: case TR::fdiv: case TR::ddiv: - return ILOpCode::createVectorOpCode(TR::vdiv, vectorType); + vectorOperation = TR::vdiv; + break; case TR::bconst: case TR::sconst: case TR::iconst: case TR::lconst: case TR::fconst: case TR::dconst: - return ILOpCode::createVectorOpCode(TR::vsplats, vectorType); + vectorOperation = TR::vsplats; + break; case TR::bneg: case TR::sneg: case TR::ineg: case TR::lneg: case TR::fneg: case TR::dneg: - return ILOpCode::createVectorOpCode(TR::vneg, vectorType); - + vectorOperation = TR::vneg; + break; case TR::iabs: case TR::labs: case TR::fabs: case TR::dabs: - return ILOpCode::createVectorOpCode(TR::vabs, vectorType); - + vectorOperation = TR::vabs; + break; case TR::bor: case TR::sor: case TR::ior: case TR::lor: - return ILOpCode::createVectorOpCode(TR::vor, vectorType); + vectorOperation = TR::vor; + break; case TR::band: case TR::sand: case TR::iand: case TR::land: - return ILOpCode::createVectorOpCode(TR::vand, vectorType); + vectorOperation = TR::vand; + break; case TR::bxor: case TR::sxor: case TR::ixor: case TR::lxor: - return ILOpCode::createVectorOpCode(TR::vxor, vectorType); + vectorOperation = TR::vxor; + break; case TR::l2d: return ILOpCode::createVectorOpCode(TR::vconv, TR::DataType::createVectorType(TR::Int64, vectorLength), TR::DataType::createVectorType(TR::Double, vectorLength)); default: return TR::BadILOp; - } - return TR::BadILOp; + + return ILOpCode::createVectorOpCode(vectorOperation, vectorType); } static TR::ILOpCodes getRotateOpCodeFromDt(TR::DataType type) diff --git a/fvtest/compilertriltest/VectorTest.cpp b/fvtest/compilertriltest/VectorTest.cpp index d81f2cfe34..310ba4d3c2 100644 --- a/fvtest/compilertriltest/VectorTest.cpp +++ b/fvtest/compilertriltest/VectorTest.cpp @@ -23,1124 +23,471 @@ #include "default_compiler.hpp" #include "compilerunittest/CompilerUnitTest.hpp" -class VectorTest : public TRTest::JitTest {}; - -class ParameterizedVectorTest : public VectorTest, public ::testing::WithParamInterface> {}; - -TEST_P(ParameterizedVectorTest, VLoadStore) { - TR::VectorLength vl = std::get<0>(GetParam()); - TR::DataTypes et = std::get<1>(GetParam()); - - SKIP_IF(vl > TR::NumVectorLengths, MissingImplementation) << "Vector length is not supported by the target platform"; - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - - TR::DataType vt = TR::DataType::createVectorType(et, vl); - - TR::ILOpCode loadOp = TR::ILOpCode::createVectorOpCode(TR::vloadi, vt); - TR::ILOpCode storeOp = TR::ILOpCode::createVectorOpCode(TR::vstorei, vt); - TR::CPU cpu = TR::CPU::detect(privateOmrPortLibrary); - bool platformSupport = TR::CodeGenerator::getSupportsOpCodeForAutoSIMD(&cpu, loadOp) && TR::CodeGenerator::getSupportsOpCodeForAutoSIMD(&cpu, loadOp); - SKIP_IF(!platformSupport, MissingImplementation) << "Opcode is not supported by the target platform"; - - char inputTrees[1024]; - char *formatStr = "(method return= NoType args=[Address,Address] " - " (block " - " (vstorei%s offset=0 " - " (aload parm=0) " - " (vloadi%s (aload parm=1))) " - " (return))) "; - - sprintf(inputTrees, formatStr, vt.toString(), vt.toString()); - auto trees = parseString(inputTrees); - ASSERT_NOTNULL(trees); - - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; - - auto entry_point = compiler.getEntryPoint(); - - const uint8_t maxVectorLength = 64; - char output[maxVectorLength] = {0}; - char input[maxVectorLength] = {0}; - char zero[maxVectorLength] = {0}; - - for (int i = 0; i < maxVectorLength; i++) { - input[i] = i; - } - - entry_point(output, input); - - EXPECT_EQ(0, memcmp(input, output, TR::DataType::getSize(vt))); - EXPECT_EQ(0, memcmp(output + TR::DataType::getSize(vt), zero, maxVectorLength - TR::DataType::getSize(vt))); -} - -INSTANTIATE_TEST_CASE_P(VLoadStoreVectorTest, ParameterizedVectorTest, ::testing::ValuesIn(*TRTest::MakeVector>( - std::make_tuple(TR::VectorLength128, TR::Int8), - std::make_tuple(TR::VectorLength128, TR::Int16), - std::make_tuple(TR::VectorLength128, TR::Int32), - std::make_tuple(TR::VectorLength128, TR::Int64), - std::make_tuple(TR::VectorLength128, TR::Float), - std::make_tuple(TR::VectorLength128, TR::Double), - std::make_tuple(TR::VectorLength256, TR::Int8), - std::make_tuple(TR::VectorLength256, TR::Int16), - std::make_tuple(TR::VectorLength256, TR::Int32), - std::make_tuple(TR::VectorLength256, TR::Int64), - std::make_tuple(TR::VectorLength256, TR::Float), - std::make_tuple(TR::VectorLength256, TR::Double), - std::make_tuple(TR::VectorLength512, TR::Int8), - std::make_tuple(TR::VectorLength512, TR::Int16), - std::make_tuple(TR::VectorLength512, TR::Int32), - std::make_tuple(TR::VectorLength512, TR::Int64), - std::make_tuple(TR::VectorLength512, TR::Float), - std::make_tuple(TR::VectorLength512, TR::Double) -))); - -TEST_F(VectorTest, VDoubleAdd) { - - auto inputTrees = "(method return= NoType args=[Address,Address,Address] " - " (block " - " (vstoreiVector128Double offset=0 " - " (aload parm=0) " - " (vaddVector128Double " - " (vloadiVector128Double (aload parm=1)) " - " (vloadiVector128Double (aload parm=2)))) " - " (return))) "; - - auto trees = parseString(inputTrees); - - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; - - - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD - - double output[] = {0.0, 0.0}; - double inputA[] = {1.0, 2.0}; - double inputB[] = {1.0, 2.0}; - - entry_point(output,inputA,inputB); - EXPECT_DOUBLE_EQ(inputA[0] + inputB[0], output[0]); // Epsilon = 4ULP -- is this necessary? - EXPECT_DOUBLE_EQ(inputA[1] + inputB[1], output[1]); // Epsilon = 4ULP -- is this necessary? -} - -TEST_F(VectorTest, VInt8Add) { - - auto inputTrees = "(method return= NoType args=[Address,Address,Address] " - " (block " - " (vstoreiVector128Int8 offset=0 " - " (aload parm=0) " - " (vaddVector128Int8 " - " (vloadiVector128Int8 (aload parm=1)) " - " (vloadiVector128Int8 (aload parm=2)))) " - " (return))) "; - - auto trees = parseString(inputTrees); - - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - SKIP_ON_POWER(MissingImplementation); - - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; - - - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD - - int8_t output[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - int8_t inputA[] = {7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, 7}; - int8_t inputB[] = {-14, -12, -10, -8, -6, -4, -2, 0, 2, 4, 6, 8, 10, 12, 14, 1}; - - entry_point(output,inputA,inputB); - - for (int i = 0; i < (sizeof(output) / sizeof(*output)); i++) { - EXPECT_EQ(inputA[i] + inputB[i], output[i]); - } -} - -TEST_F(VectorTest, VInt16Add) { - - auto inputTrees = "(method return= NoType args=[Address,Address,Address] " - " (block " - " (vstoreiVector128Int16 offset=0 " - " (aload parm=0) " - " (vaddVector128Int16 " - " (vloadiVector128Int16 (aload parm=1)) " - " (vloadiVector128Int16 (aload parm=2)))) " - " (return))) "; - - auto trees = parseString(inputTrees); - - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - SKIP_ON_POWER(MissingImplementation); - - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; - - - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD - - int16_t output[] = {0, 0, 0, 0, 0, 0, 0, 0}; - int16_t inputA[] = {60, 45, 30, 0, -3, -2, -1, 2}; - int16_t inputB[] = {-5, -10, -1, 13, 15, 10, 7, 5}; - - entry_point(output,inputA,inputB); - - for (int i = 0; i < (sizeof(output) / sizeof(*output)); i++) { - EXPECT_EQ(inputA[i] + inputB[i], output[i]); - } -} - -TEST_F(VectorTest, VFloatAdd) { - - auto inputTrees = "(method return= NoType args=[Address,Address,Address] " - " (block " - " (vstoreiVector128Float offset=0 " - " (aload parm=0) " - " (vaddVector128Float " - " (vloadiVector128Float (aload parm=1)) " - " (vloadiVector128Float (aload parm=2)))) " - " (return))) "; - - auto trees = parseString(inputTrees); - - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; - - - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD - - float output[] = {0.0f, 0.0f, 0.0f, 0.0f}; - float inputA[] = {6.0f, 0.0f, -0.1f, 0.6f}; - float inputB[] = {-0.5f, 3.5f, 3.0f, 0.7f}; - - entry_point(output,inputA,inputB); - - for (int i = 0; i < (sizeof(output) / sizeof(*output)); i++) { - EXPECT_FLOAT_EQ(inputA[i] + inputB[i], output[i]); // Epsilon = 4ULP -- is this necessary? - } -} - -TEST_F(VectorTest, VInt8Sub) { - - auto inputTrees = "(method return= NoType args=[Address,Address,Address] " - " (block " - " (vstoreiVector128Int8 offset=0 " - " (aload parm=0) " - " (vsubVector128Int8 " - " (vloadiVector128Int8 (aload parm=1)) " - " (vloadiVector128Int8 (aload parm=2)))) " - " (return))) "; - - auto trees = parseString(inputTrees); - - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - SKIP_ON_POWER(MissingImplementation); - - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; - - - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD - - int8_t output[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - int8_t inputA[] = {7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, 9}; - int8_t inputB[] = {14, 12, 10, 8, 6, 4, 2, 0, -2, -4, -6, -8, -10, -12, -14, 1}; - - entry_point(output,inputA,inputB); - - for (int i = 0; i < (sizeof(output) / sizeof(*output)); i++) { - EXPECT_EQ(inputA[i] - inputB[i], output[i]); - } -} - -TEST_F(VectorTest, VInt16Sub) { - - auto inputTrees = "(method return= NoType args=[Address,Address,Address] " - " (block " - " (vstoreiVector128Int16 offset=0 " - " (aload parm=0) " - " (vsubVector128Int16 " - " (vloadiVector128Int16 (aload parm=1)) " - " (vloadiVector128Int16 (aload parm=2)))) " - " (return))) "; - - auto trees = parseString(inputTrees); - - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - SKIP_ON_POWER(MissingImplementation); - - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; - - - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD - - int16_t output[] = {0, 0, 0, 0, 0, 0, 0, 0}; - int16_t inputA[] = {60, 45, 30, 0, -3, -2, -1, 9}; - int16_t inputB[] = {5, 10, 1, -13, -15, -10, -7, 2}; - - entry_point(output,inputA,inputB); - - for (int i = 0; i < (sizeof(output) / sizeof(*output)); i++) { - EXPECT_EQ(inputA[i] - inputB[i], output[i]); - } -} - -TEST_F(VectorTest, VFloatSub) { - - auto inputTrees = "(method return= NoType args=[Address,Address,Address] " - " (block " - " (vstoreiVector128Float offset=0 " - " (aload parm=0) " - " (vsubVector128Float " - " (vloadiVector128Float (aload parm=1)) " - " (vloadiVector128Float (aload parm=2)))) " - " (return))) "; - - auto trees = parseString(inputTrees); - - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; - - - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD - - float output[] = {0.0f, 0.0f, 0.0f, 0.0f}; - float inputA[] = {6.0f, 0.0f, -0.1f, 2.0f}; - float inputB[] = {0.5f, -3.5f, -3.0f, 0.7f}; - - entry_point(output,inputA,inputB); - - for (int i = 0; i < (sizeof(output) / sizeof(*output)); i++) { - EXPECT_FLOAT_EQ(inputA[i] - inputB[i], output[i]); // Epsilon = 4ULP -- is this necessary? - } -} - -TEST_F(VectorTest, VDoubleSub) { - - auto inputTrees = "(method return= NoType args=[Address,Address,Address] " - " (block " - " (vstoreiVector128Double offset=0 " - " (aload parm=0) " - " (vsubVector128Double " - " (vloadiVector128Double (aload parm=1)) " - " (vloadiVector128Double (aload parm=2)))) " - " (return))) "; - - auto trees = parseString(inputTrees); - - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; - - - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD - - double output[] = {0.0, 0.0}; - double inputA[] = {1.0, -1.5}; - double inputB[] = {1.1, -3.0}; - - entry_point(output,inputA,inputB); - EXPECT_DOUBLE_EQ(inputA[0] - inputB[0], output[0]); // Epsilon = 4ULP -- is this necessary? - EXPECT_DOUBLE_EQ(inputA[1] - inputB[1], output[1]); // Epsilon = 4ULP -- is this necessary? -} - -TEST_F(VectorTest, VInt8Mul) { - - auto inputTrees = "(method return= NoType args=[Address,Address,Address] " - " (block " - " (vstoreiVector128Int8 offset=0 " - " (aload parm=0) " - " (vmulVector128Int8 " - " (vloadiVector128Int8 (aload parm=1)) " - " (vloadiVector128Int8 (aload parm=2)))) " - " (return))) "; - - auto trees = parseString(inputTrees); - - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - SKIP_ON_POWER(MissingImplementation); - SKIP_ON_X86(MissingImplementation); - SKIP_ON_HAMMER(MissingImplementation); - - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; - - - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD - - int8_t output[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - int8_t inputA[] = {7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, 7}; - int8_t inputB[] = {-14, -12, -10, -8, -6, -4, -2, 0, 2, 4, 6, 8, 10, 12, -14, 1}; - - entry_point(output,inputA,inputB); - - for (int i = 0; i < (sizeof(output) / sizeof(*output)); i++) { - EXPECT_EQ(inputA[i] * inputB[i], output[i]); - } -} - -TEST_F(VectorTest, VInt16Mul) { - - auto inputTrees = "(method return= NoType args=[Address,Address,Address] " - " (block " - " (vstoreiVector128Int16 offset=0 " - " (aload parm=0) " - " (vmulVector128Int16 " - " (vloadiVector128Int16 (aload parm=1)) " - " (vloadiVector128Int16 (aload parm=2)))) " - " (return))) "; - - auto trees = parseString(inputTrees); - - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - SKIP_ON_POWER(MissingImplementation); - - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; - - - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD - - int16_t output[] = {0, 0, 0, 0, 0, 0, 0, 0}; - int16_t inputA[] = {60, 45, 30, 0, -3, -2, -1, 2}; - int16_t inputB[] = {-5, -10, -1, 13, 15, 10, -7, 5}; - - entry_point(output,inputA,inputB); - - for (int i = 0; i < (sizeof(output) / sizeof(*output)); i++) { - EXPECT_EQ(inputA[i] * inputB[i], output[i]); - } -} - -TEST_F(VectorTest, VFloatMul) { - - auto inputTrees = "(method return= NoType args=[Address,Address,Address] " - " (block " - " (vstoreiVector128Float offset=0 " - " (aload parm=0) " - " (vmulVector128Float " - " (vloadiVector128Float (aload parm=1)) " - " (vloadiVector128Float (aload parm=2)))) " - " (return))) "; - - auto trees = parseString(inputTrees); - - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; - - - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD - - float output[] = {0.0f, 0.0f, 0.0f, 0.0f}; - float inputA[] = {6.0f, 0.0f, -0.1f, 0.6f}; - float inputB[] = {-0.5f, 3.5f, -3.0f, 0.7f}; - - entry_point(output,inputA,inputB); - - for (int i = 0; i < (sizeof(output) / sizeof(*output)); i++) { - EXPECT_FLOAT_EQ(inputA[i] * inputB[i], output[i]); // Epsilon = 4ULP -- is this necessary? - } -} - -TEST_F(VectorTest, VDoubleMul) { - - auto inputTrees = "(method return= NoType args=[Address,Address,Address] " - " (block " - " (vstoreiVector128Double offset=0 " - " (aload parm=0) " - " (vmulVector128Double " - " (vloadiVector128Double (aload parm=1)) " - " (vloadiVector128Double (aload parm=2)))) " - " (return))) "; - - auto trees = parseString(inputTrees); - - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; - - - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD - - double output[] = {0.0, 0.0}; - double inputA[] = {1.0, -1.5}; - double inputB[] = {-1.1, -3.0}; - - entry_point(output,inputA,inputB); - EXPECT_DOUBLE_EQ(inputA[0] * inputB[0], output[0]); // Epsilon = 4ULP -- is this necessary? - EXPECT_DOUBLE_EQ(inputA[1] * inputB[1], output[1]); // Epsilon = 4ULP -- is this necessary? -} - -TEST_F(VectorTest, VInt8Div) { - - auto inputTrees = "(method return= NoType args=[Address,Address,Address] " - " (block " - " (vstorei type=VectorInt8 offset=0 " - " (aload parm=0) " - " (vdiv " - " (vloadi type=VectorInt8 (aload parm=1)) " - " (vloadi type=VectorInt8 (aload parm=2)))) " - " (return))) "; - - auto trees = parseString(inputTrees); - - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - SKIP_ON_POWER(MissingImplementation); - SKIP_ON_X86(MissingImplementation); - SKIP_ON_HAMMER(MissingImplementation); - - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; - - - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD - - int8_t output[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - int8_t inputA[] = {-128, 32, -96, 99, 35, -88, 45, 100, 17, 86, -28, -100, 71, 80, 15, 2}; - int8_t inputB[] = {32, 64, -4, 7,15, 11, 9, -25, 5, 43, -5, 7, 3, 10, 4, 2}; - - entry_point(output,inputA,inputB); - - for (int i = 0; i < (sizeof(output) / sizeof(*output)); i++) { - EXPECT_EQ(inputA[i] / inputB[i], output[i]); - } -} - -TEST_F(VectorTest, VInt16Div) { - - auto inputTrees = "(method return= NoType args=[Address,Address,Address] " - " (block " - " (vstorei type=VectorInt16 offset=0 " - " (aload parm=0) " - " (vdiv " - " (vloadi type=VectorInt16 (aload parm=1)) " - " (vloadi type=VectorInt16 (aload parm=2)))) " - " (return))) "; - - auto trees = parseString(inputTrees); - - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - SKIP_ON_POWER(MissingImplementation); - SKIP_ON_X86(MissingImplementation); - SKIP_ON_HAMMER(MissingImplementation); - - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; - - - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD - - int16_t output[] = {0, 0, 0, 0, 0, 0, 0, 0}; - int16_t inputA[] = {-1024, 32, -30000, 9999, 4096, -8888, 9086, 150}; - int16_t inputB[] = {32, 2929, -40, 75, 1024, 11, 1, -3}; - - entry_point(output,inputA,inputB); - - for (int i = 0; i < (sizeof(output) / sizeof(*output)); i++) { - EXPECT_EQ(inputA[i] / inputB[i], output[i]); - } -} - -TEST_F(VectorTest, VInt32Div) { - - auto inputTrees = "(method return= NoType args=[Address,Address,Address] " - " (block " - " (vstorei type=VectorInt32 offset=0 " - " (aload parm=0) " - " (vdiv " - " (vloadi type=VectorInt32 (aload parm=1)) " - " (vloadi type=VectorInt32 (aload parm=2)))) " - " (return))) "; - - auto trees = parseString(inputTrees); - - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - SKIP_ON_POWER(MissingImplementation); - SKIP_ON_X86(MissingImplementation); - SKIP_ON_HAMMER(MissingImplementation); - - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; - - - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD - - int32_t output[] = {0, 0, 0, 0}; - int32_t inputA[] = {1992385, 32, -788811, 9999}; - int32_t inputB[] = {779, 2929, -4, 75}; - - entry_point(output,inputA,inputB); - - for (int i = 0; i < (sizeof(output) / sizeof(*output)); i++) { - EXPECT_EQ(inputA[i] / inputB[i], output[i]); - } -} - -TEST_F(VectorTest, VInt64Div) { - - auto inputTrees = "(method return= NoType args=[Address,Address,Address] " - " (block " - " (vstorei type=VectorInt64 offset=0 " - " (aload parm=0) " - " (vdiv " - " (vloadi type=VectorInt64 (aload parm=1)) " - " (vloadi type=VectorInt64 (aload parm=2)))) " - " (return))) "; - - auto trees = parseString(inputTrees); - - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - SKIP_ON_POWER(MissingImplementation); - SKIP_ON_X86(MissingImplementation); - SKIP_ON_HAMMER(MissingImplementation); - - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; - - - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD - - int64_t output[] = {0, 0, 0, 0}; - int64_t inputA[] = {(int64_t)0x10ff339955820123L, (int64_t)0xff00295014747555L, -64, 9999}; - int64_t inputB[] = {(int64_t)0x8000111122223333L, (int64_t)0xffffffff29231233L, 8, 75}; - - entry_point(output,inputA,inputB); - entry_point(&output[2],&inputA[2],&inputB[2]); - - for (int i = 0; i < (sizeof(output) / sizeof(*output)); i++) { - EXPECT_EQ(inputA[i] / inputB[i], output[i]); - } -} - -TEST_F(VectorTest, VFloatDiv) { - - auto inputTrees = "(method return= NoType args=[Address,Address,Address] " - " (block " - " (vstoreiVector128Float offset=0 " - " (aload parm=0) " - " (vdivVector128Float " - " (vloadiVector128Float (aload parm=1)) " - " (vloadiVector128Float (aload parm=2)))) " - " (return))) "; - - auto trees = parseString(inputTrees); - - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; - - - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD - - float output[] = {0.0f, 0.0f, 0.0f, 0.0f}; - float inputA[] = {6.0f, 0.0f, -9.0f, 0.6f}; - float inputB[] = {-0.5f, 3.5f, -3.0f, 0.7f}; - - entry_point(output,inputA,inputB); - - for (int i = 0; i < (sizeof(output) / sizeof(*output)); i++) { - EXPECT_FLOAT_EQ(inputA[i] / inputB[i], output[i]); // Epsilon = 4ULP -- is this necessary? - } -} - -TEST_F(VectorTest, VDoubleDiv) { - - auto inputTrees = "(method return= NoType args=[Address,Address,Address] " - " (block " - " (vstoreiVector128Double offset=0 " - " (aload parm=0) " - " (vdivVector128Double " - " (vloadiVector128Double (aload parm=1)) " - " (vloadiVector128Double (aload parm=2)))) " - " (return))) "; - - auto trees = parseString(inputTrees); - - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; - - - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD - - double output[] = {0.0, 0.0}; - double inputA[] = {12.0, -1.5}; - double inputB[] = {-4.0, -3.0}; - - entry_point(output,inputA,inputB); - EXPECT_DOUBLE_EQ(inputA[0] / inputB[0], output[0]); // Epsilon = 4ULP -- is this necessary? - EXPECT_DOUBLE_EQ(inputA[1] / inputB[1], output[1]); // Epsilon = 4ULP -- is this necessary? -} - -TEST_F(VectorTest, VInt8And) { - - auto inputTrees = "(method return= NoType args=[Address,Address,Address] " - " (block " - " (vstoreiVector128Int8 offset=0 " - " (aload parm=0) " - " (vandVector128Int8 " - " (vloadiVector128Int8 (aload parm=1)) " - " (vloadiVector128Int8 (aload parm=2)))) " - " (return))) "; - - auto trees = parseString(inputTrees); - - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - SKIP_ON_POWER(MissingImplementation); - SKIP_ON_X86(MissingImplementation); - SKIP_ON_HAMMER(MissingImplementation); - - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; +#define MAX(x, y) (x > y) ? x : y +#define ABS(x) (x < 0) ? -x : x +class VectorTest : public TRTest::JitTest {}; - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD +class ParameterizedBinaryVectorArithmeticTest : public VectorTest, public ::testing::WithParamInterface> {}; + +int vectorSize(TR::VectorLength vl) { + switch (vl) { + case TR::VectorLength64: + return 8; + case TR::VectorLength128: + return 16; + case TR::VectorLength256: + return 32; + case TR::VectorLength512: + return 64; + default: + TR_ASSERT_FATAL(0, "Illegal vector length"); + return 0; + } +} - int8_t output[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - int8_t inputA[] = {7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, 7}; - int8_t inputB[] = {-14, -12, -10, -8, -6, -4, -2, 0, 2, 4, 6, 8, 10, 12, -14, 1}; +int typeSize(TR::DataTypes dt) { + switch (dt) { + case TR::Int8: + return 1; + case TR::Int16: + return 2; + case TR::Int32: + return 4; + case TR::Int64: + return 8; + case TR::Float: + return 4; + case TR::Double: + return 8; + default: + TR_ASSERT_FATAL(0, "Illegal data type"); + return 0; + } +} - entry_point(output,inputA,inputB); +void compareResults(void *expected, void *actual, TR::DataTypes dt, TR::VectorLength vl) { + int lengthBytes = vectorSize(vl); + int elementBytes = typeSize(dt); + + for (int i = 0; i < lengthBytes; i += elementBytes) { + switch (dt) { + case TR::Int8: + EXPECT_EQ(*((int8_t *) expected), *((int8_t *) actual)); + break; + case TR::Int16: + EXPECT_EQ(*((int16_t *) expected), *((int16_t *) actual)); + break; + case TR::Int32: + EXPECT_EQ(*((int32_t *) expected), *((int32_t *) actual)); + break; + case TR::Int64: + EXPECT_EQ(*((int64_t *) expected), *((int64_t *) actual)); + break; + case TR::Float: + EXPECT_FLOAT_EQ(*((float *) expected), *((float *) actual)); + break; + case TR::Double: + EXPECT_DOUBLE_EQ(*((double *) expected), *((double *) actual)); + break; + default: + TR_ASSERT_FATAL(0, "Illegal type to compare"); + break; + } + expected = static_cast(expected) + elementBytes; + actual = static_cast(actual) + elementBytes; + } +} - for (int i = 0; i < (sizeof(output) / sizeof(*output)); i++) { - EXPECT_EQ(inputA[i] & inputB[i], output[i]); +void generateByType(void *output, TR::DataType dt, bool nonZero) { + switch (dt) { + case TR::Int8: + *((int8_t *) output) = -128 + static_cast(rand() % 255); + if (nonZero && *((int8_t *) output) == 0) *((int8_t *) output) = 1; + break; + case TR::Int16: + *((int16_t *) output) = -200 + static_cast(rand() % 400); + if (nonZero && *((int16_t *) output) == 0) *((int16_t *) output) = 1; + break; + case TR::Int32: + *((int32_t *) output) = -1000 + static_cast(rand() % 2000); + if (nonZero && *((int32_t *) output) == 0) *((int32_t *) output) = 1; + break; + case TR::Int64: + *((int64_t *) output) = -1000 + static_cast(rand() % 2000); + if (nonZero && *((int64_t *) output) == 0) *((int64_t *) output) = 1; + break; + case TR::Float: + *((float *) output) = static_cast(rand() / 1000.0); + break; + case TR::Double: + *((double *) output) = static_cast(rand() / 1000.0); + break; } } -TEST_F(VectorTest, VInt8Or) { +void generateIO(TR::ILOpCode scalarOpcode, TR::VectorLength vl, void *output, void *inputA, void *inputB) { + TR::ILOpCode vectorOpcode = OMR::ILOpCode::convertScalarToVector(scalarOpcode.getOpCodeValue(), vl); + TR::DataType elementType = vectorOpcode.getType().getVectorElementType(); + TR::ILOpCode storeOpcode = OMR::ILOpCode::indirectStoreOpCode(elementType); + TR::ILOpCode loadOpcode = OMR::ILOpCode::indirectLoadOpCode(elementType); + char inputTrees[1024]; - auto inputTrees = "(method return= NoType args=[Address,Address,Address] " - " (block " - " (vstoreiVector128Int8 offset=0 " - " (aload parm=0) " - " (vorVector128Int8 " - " (vloadiVector128Int8 (aload parm=1)) " - " (vloadiVector128Int8 (aload parm=2)))) " - " (return))) "; + if (inputB) { + std::snprintf(inputTrees, sizeof(inputTrees), + "(method return=NoType args=[Address, Address, Address]" + " (block" + " (%s" + " (aload parm=0)" + " (%s" + " (%s (aload parm=1))" + " (%s (aload parm=2))))" + " (return)))", + storeOpcode.getName(), + scalarOpcode.getName(), + loadOpcode.getName(), + loadOpcode.getName() + ); + } else { + std::snprintf(inputTrees, sizeof(inputTrees), + "(method return=NoType args=[Address, Address]" + " (block" + " (%s" + " (aload parm=0)" + " (%s" + " (%s (aload parm=1))))" + " (return)))", + storeOpcode.getName(), + scalarOpcode.getName(), + loadOpcode.getName() + ); + } auto trees = parseString(inputTrees); ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - SKIP_ON_POWER(MissingImplementation); - SKIP_ON_X86(MissingImplementation); - SKIP_ON_HAMMER(MissingImplementation); Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; - - - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD - int8_t output[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - int8_t inputA[] = {7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, 7}; - int8_t inputB[] = {-14, -12, -10, -8, -6, -4, -2, 0, 2, 4, 6, 8, 10, 12, -14, 1}; + ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; - entry_point(output,inputA,inputB); + int numBytes = vectorSize(vl); + int elementSize = typeSize(elementType.getDataType()); - for (int i = 0; i < (sizeof(output) / sizeof(*output)); i++) { - EXPECT_EQ(inputA[i] | inputB[i], output[i]); - } -} + int numElements = numBytes / elementSize; -TEST_F(VectorTest, VInt8Xor) { + if (inputB) { + auto entry_point = compiler.getEntryPoint < void(*)(void * , void *, void *) > (); + void *aOff = inputA; + void *bOff = inputB; + void *outOff = output; - auto inputTrees = "(method return= NoType args=[Address,Address,Address] " - " (block " - " (vstoreiVector128Int8 offset=0 " - " (aload parm=0) " - " (vxorVector128Int8 " - " (vloadiVector128Int8 (aload parm=1)) " - " (vloadiVector128Int8 (aload parm=2)))) " - " (return))) "; + for (int i = 0; i < numElements; i++) { + generateByType(aOff, elementType, scalarOpcode.isDiv()); + generateByType(bOff, elementType, scalarOpcode.isDiv()); - auto trees = parseString(inputTrees); + entry_point(outOff, aOff, bOff); - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - SKIP_ON_POWER(MissingImplementation); - SKIP_ON_X86(MissingImplementation); - SKIP_ON_HAMMER(MissingImplementation); + aOff = static_cast(aOff) + elementSize; + bOff = static_cast(bOff) + elementSize; + outOff = static_cast(outOff) + elementSize; + } + } else { + auto entry_point = compiler.getEntryPoint < void(*)(void * , void *) > (); + void *aOff = inputA; + void *outOff = output; - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; + for (int i = 0; i < numElements; i++) { + generateByType(aOff, elementType, scalarOpcode.isDiv()); + entry_point(outOff, aOff); + aOff = static_cast(aOff) + elementSize; + outOff = static_cast(outOff) + elementSize; + } + } +} - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD +void generateAndExecuteVectorTest(TR::ILOpCode vectorOpcode, void *expected, void *inputA, void *inputB) { + TR::VectorLength vl = vectorOpcode.getType().getVectorLength(); + TR::DataType elementType = vectorOpcode.getType().getVectorElementType(); + TR::DataType vt = TR::DataType::createVectorType(elementType.getDataType(), vl); + TR::ILOpCode loadOp = TR::ILOpCode::createVectorOpCode(TR::vloadi, vt); + TR::ILOpCode storeOp = TR::ILOpCode::createVectorOpCode(TR::vstorei, vt); - int8_t output[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - int8_t inputA[] = {7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, 7}; - int8_t inputB[] = {-14, -12, -10, -8, -6, -4, -2, 0, 2, 4, 6, 8, 10, 12, -14, 1}; + char type[64]; + char inputTrees[1024]; - entry_point(output,inputA,inputB); + std::snprintf(type, sizeof(type), "Vector%i%s", vectorSize(vl) * 8, TR::DataType::getName(elementType)); - for (int i = 0; i < (sizeof(output) / sizeof(*output)); i++) { - EXPECT_EQ(inputA[i] ^ inputB[i], output[i]); + if (vectorOpcode.expectedChildCount() == 1) { + std::snprintf(inputTrees, sizeof(inputTrees), + "(method return= NoType args=[Address,Address] " + " (block " + " (%s%s offset=0 " + " (aload parm=0) " + " (%s%s " + " (%s%s (aload parm=1)))) " + " (return))) ", + + storeOp.getName(), + type, + vectorOpcode.getName(), + type, + loadOp.getName(), + type + ); + } else { + std::snprintf(inputTrees, sizeof(inputTrees), + "(method return= NoType args=[Address,Address,Address] " + " (block " + " (%s%s offset=0 " + " (aload parm=0) " + " (%s%s " + " (%s%s (aload parm=1)) " + " (%s%s (aload parm=2)))) " + " (return))) ", + + storeOp.getName(), + type, + vectorOpcode.getName(), + type, + loadOp.getName(), + type, + loadOp.getName(), + type + ); } -} - -TEST_F(VectorTest, VInt8Neg) { - - auto inputTrees = "(method return= NoType args=[Address,Address] " - " (block " - " (vstoreiVector128Int8 offset=0 " - " (aload parm=0) " - " (vnegVector128Int8 " - " (vloadiVector128Int8 (aload parm=1)))) " - " (return))) "; auto trees = parseString(inputTrees); - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - SKIP_ON_POWER(MissingImplementation); Tril::DefaultCompiler compiler(trees); ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; - - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD - - int8_t output[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - int8_t inputA[] = {7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, 7}; - - entry_point(output,inputA); - - for (int i = 0; i < (sizeof(output) / sizeof(*output)); i++) { - EXPECT_EQ((-1) * inputA[i], output[i]); + uint8_t result[64]; + if (vectorOpcode.expectedChildCount() == 1) { + auto entry_point = compiler.getEntryPoint < void(*)(void * , void *) > (); + entry_point(result, inputA); + } else { + auto entry_point = compiler.getEntryPoint < void(*)(void * , void *, void *) > (); + entry_point(result, inputA, inputB); } -} -TEST_F(VectorTest, VInt16Neg) { - - auto inputTrees = "(method return= NoType args=[Address,Address] " - " (block " - " (vstoreiVector128Int16 offset=0 " - " (aload parm=0) " - " (vnegVector128Int16 " - " (vloadiVector128Int16 (aload parm=1)))) " - " (return))) "; + compareResults(expected, result, elementType.getDataType(), vl); +} - auto trees = parseString(inputTrees); +TEST_P(ParameterizedBinaryVectorArithmeticTest, VLoadStore) { + TR::ILOpCode scalarOpcode = std::get<0>(GetParam()); + TR::VectorLength vl = std::get<1>(GetParam()); + TR::DataTypes et = scalarOpcode.getType().getDataType(); - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. + SKIP_IF(vl > TR::NumVectorLengths, MissingImplementation) << "Vector length is not supported by the target platform"; SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - SKIP_ON_POWER(MissingImplementation); - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; + TR::ILOpCode vectorOpcode = OMR::ILOpCode::convertScalarToVector(scalarOpcode.getOpCodeValue(), vl); + ASSERT_NE(TR::BadILOp, vectorOpcode.getOpCodeValue()); + TR::DataType elementType = vectorOpcode.getType().getVectorElementType(); + TR::DataType vt = TR::DataType::createVectorType(et, vl); + TR::ILOpCode loadOp = TR::ILOpCode::createVectorOpCode(TR::vloadi, vt); + TR::ILOpCode storeOp = TR::ILOpCode::createVectorOpCode(TR::vstorei, vt); + char type[64]; + std::snprintf(type, sizeof(type), "Vector%i%s", vectorSize(vl) * 8, TR::DataType::getName(elementType)); - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD + TR::CPU cpu = TR::CPU::detect(privateOmrPortLibrary); + bool platformSupport = TR::CodeGenerator::getSupportsOpCodeForAutoSIMD(&cpu, loadOp) && + TR::CodeGenerator::getSupportsOpCodeForAutoSIMD(&cpu, storeOp) && + TR::CodeGenerator::getSupportsOpCodeForAutoSIMD(&cpu, vectorOpcode); - int16_t output[] = {0, 0, 0, 0, 0, 0, 0, 0}; - int16_t inputA[] = {60, 45, 30, 0, -3, -2, -1, 2}; + SKIP_IF(!platformSupport, MissingImplementation) << "Opcode " << vectorOpcode.getName() << type << " is not supported by the target platform"; - entry_point(output,inputA); + uint8_t expected[128]; + uint8_t a[128]; + uint8_t b[128]; - for (int i = 0; i < (sizeof(output) / sizeof(*output)); i++) { - EXPECT_EQ((-1) * inputA[i], output[i]); - } + generateIO(scalarOpcode, vl, expected, a, vectorOpcode.expectedChildCount() == 1 ? NULL : b); + generateAndExecuteVectorTest(vectorOpcode, expected, a, b); } -TEST_F(VectorTest, VInt32Neg) { - - auto inputTrees = "(method return= NoType args=[Address,Address] " - " (block " - " (vstoreiVector128Int32 offset=0 " - " (aload parm=0) " - " (vnegVector128Int32 " - " (vloadiVector128Int32 (aload parm=1)))) " - " (return))) "; +#define ALL_VL(opcode) \ + std::make_tuple(opcode, TR::VectorLength128),\ + std::make_tuple(opcode, TR::VectorLength256),\ + std::make_tuple(opcode, TR::VectorLength512) - auto trees = parseString(inputTrees); +INSTANTIATE_TEST_CASE_P(VectorArithmetic, ParameterizedBinaryVectorArithmeticTest, ::testing::ValuesIn(*TRTest::MakeVector>( + /* vadd */ - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - SKIP_ON_POWER(MissingImplementation); + ALL_VL(TR::badd), + ALL_VL(TR::sadd), + ALL_VL(TR::iadd), + ALL_VL(TR::ladd), + ALL_VL(TR::dadd), + ALL_VL(TR::fadd), - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; + /* vsub */ + ALL_VL(TR::bsub), + ALL_VL(TR::ssub), + ALL_VL(TR::isub), + ALL_VL(TR::lsub), + ALL_VL(TR::fsub), + ALL_VL(TR::dsub), - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD + /* vmul */ - int32_t output[] = {0, 0, 0, 0}; - int32_t inputA[] = {567890, 1234, 0, -20}; + ALL_VL(TR::bmul), + ALL_VL(TR::smul), + ALL_VL(TR::imul), + ALL_VL(TR::lmul), + ALL_VL(TR::fmul), + ALL_VL(TR::dmul), - entry_point(output,inputA); + /* vdiv */ - for (int i = 0; i < (sizeof(output) / sizeof(*output)); i++) { - EXPECT_EQ((-1) * inputA[i], output[i]); - } -} + ALL_VL(TR::bdiv), + ALL_VL(TR::sdiv), + ALL_VL(TR::idiv), + ALL_VL(TR::ldiv), + ALL_VL(TR::fdiv), + ALL_VL(TR::ddiv), -TEST_F(VectorTest, VInt64Neg) { + /* vand */ - auto inputTrees = "(method return= NoType args=[Address,Address] " - " (block " - " (vstoreiVector128Int64 offset=0 " - " (aload parm=0) " - " (vnegVector128Int64 " - " (vloadiVector128Int64 (aload parm=1)))) " - " (return))) "; + ALL_VL(TR::band), + ALL_VL(TR::sand), + ALL_VL(TR::iand), + ALL_VL(TR::land), - auto trees = parseString(inputTrees); + /* vor */ - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - SKIP_ON_POWER(MissingImplementation); + ALL_VL(TR::bor), + ALL_VL(TR::sor), + ALL_VL(TR::ior), + ALL_VL(TR::lor), - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; + /* vxor */ + ALL_VL(TR::bxor), + ALL_VL(TR::sxor), + ALL_VL(TR::ixor), + ALL_VL(TR::lxor), - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD + /* vmin */ - int64_t output[] = {0, 0}; - int64_t inputA[] = {60, -123456}; + /* No opcode for bmin, smin */ - entry_point(output,inputA); + ALL_VL(TR::imin), + ALL_VL(TR::lmin), - for (int i = 0; i < (sizeof(output) / sizeof(*output)); i++) { - EXPECT_EQ((-1) * inputA[i], output[i]); - } -} + /* fmin is not supported */ + /* dmin is not supported */ -TEST_F(VectorTest, VFloatNeg) { + /* vmax */ - auto inputTrees = "(method return= NoType args=[Address,Address] " - " (block " - " (vstoreiVector128Float offset=0 " - " (aload parm=0) " - " (vnegVector128Float " - " (vloadiVector128Float (aload parm=1)))) " - " (return))) "; + /* No opcode for bmax, smax */ - auto trees = parseString(inputTrees); + ALL_VL(TR::imax), + ALL_VL(TR::lmax), - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); + /* fmax is not supported */ + /* dmax is not supported */ - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; + /* ABS */ + ALL_VL(TR::iabs), + ALL_VL(TR::labs), + ALL_VL(TR::fabs), + ALL_VL(TR::dabs), - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD + /* vsqrt */ - float output[] = {0.0f, 0.0f, 0.0f, 0.0f}; - float inputA[] = {6.0f, 0.0f, -9.0f, 0.6f}; + /* No opcode for bsqrt, ssqrt, isqrt, lsqrt */ + ALL_VL(TR::fsqrt), + ALL_VL(TR::dsqrt), - entry_point(output,inputA); + /* vneg */ - for (int i = 0; i < (sizeof(output) / sizeof(*output)); i++) { - EXPECT_EQ((-1) * inputA[i], output[i]); - } -} + ALL_VL(TR::bneg), + ALL_VL(TR::sneg), + ALL_VL(TR::ineg), + ALL_VL(TR::lneg), + ALL_VL(TR::fneg), + ALL_VL(TR::dneg) +))); -TEST_F(VectorTest, VDoubleNeg) { - auto inputTrees = "(method return= NoType args=[Address,Address] " - " (block " - " (vstoreiVector128Double offset=0 " - " (aload parm=0) " - " (vnegVector128Double " - " (vloadiVector128Double (aload parm=1)))) " - " (return))) "; +class ParameterizedVectorTest : public VectorTest, public ::testing::WithParamInterface> {}; - auto trees = parseString(inputTrees); +TEST_P(ParameterizedVectorTest, VLoadStore) { + TR::VectorLength vl = std::get<0>(GetParam()); + TR::DataTypes et = std::get<1>(GetParam()); - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. + SKIP_IF(vl > TR::NumVectorLengths, MissingImplementation) << "Vector length is not supported by the target platform"; SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - - Tril::DefaultCompiler compiler(trees); - ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; - - - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD - - double output[] = {0.0, 0.0}; - double inputA[] = {12.0, -1.5}; - entry_point(output,inputA); - EXPECT_DOUBLE_EQ((-1) * inputA[0], output[0]); // Epsilon = 4ULP -- is this necessary? - EXPECT_DOUBLE_EQ((-1) * inputA[1], output[1]); // Epsilon = 4ULP -- is this necessary? -} + TR::DataType vt = TR::DataType::createVectorType(et, vl); -TEST_F(VectorTest, VDoubleSQRT) { + TR::ILOpCode loadOp = TR::ILOpCode::createVectorOpCode(TR::vloadi, vt); + TR::ILOpCode storeOp = TR::ILOpCode::createVectorOpCode(TR::vstorei, vt); + TR::CPU cpu = TR::CPU::detect(privateOmrPortLibrary); + bool platformSupport = TR::CodeGenerator::getSupportsOpCodeForAutoSIMD(&cpu, loadOp) && TR::CodeGenerator::getSupportsOpCodeForAutoSIMD(&cpu, loadOp); + SKIP_IF(!platformSupport, MissingImplementation) << "Opcode is not supported by the target platform"; - auto inputTrees = "(method return= NoType args=[Address,Address] " - " (block " - " (vstoreiVector128Double offset=0 " - " (aload parm=0) " - " (vsqrtVector128Double " - " (vloadiVector128Double (aload parm=1)))) " - " (return))) "; + char inputTrees[1024]; + char *formatStr = "(method return= NoType args=[Address,Address] " + " (block " + " (vstorei%s offset=0 " + " (aload parm=0) " + " (vloadi%s (aload parm=1))) " + " (return))) "; + sprintf(inputTrees, formatStr, vt.toString(), vt.toString()); auto trees = parseString(inputTrees); - ASSERT_NOTNULL(trees); - //TODO: Re-enable this test on S390 after issue #1843 is resolved. - SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)"; - SKIP_ON_RISCV(MissingImplementation); - SKIP_ON_AARCH64(MissingImplementation); Tril::DefaultCompiler compiler(trees); ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; + auto entry_point = compiler.getEntryPoint(); - auto entry_point = compiler.getEntryPoint(); - // This test currently assumes 128bit SIMD + const uint8_t maxVectorLength = 64; + char output[maxVectorLength] = {0}; + char input[maxVectorLength] = {0}; + char zero[maxVectorLength] = {0}; - double output[] = {0.0, 0.0}; - double inputA[] = {16.0, 100}; + for (int i = 0; i < maxVectorLength; i++) { + input[i] = i; + } - entry_point(output,inputA); - EXPECT_DOUBLE_EQ(sqrt(inputA[0]), output[0]); // Epsilon = 4ULP -- is this necessary? - EXPECT_DOUBLE_EQ(sqrt(inputA[1]), output[1]); // Epsilon = 4ULP -- is this necessary? + entry_point(output, input); + + EXPECT_EQ(0, memcmp(input, output, TR::DataType::getSize(vt))); + EXPECT_EQ(0, memcmp(output + TR::DataType::getSize(vt), zero, maxVectorLength - TR::DataType::getSize(vt))); } +INSTANTIATE_TEST_CASE_P(VLoadStoreVectorTest, ParameterizedVectorTest, ::testing::ValuesIn(*TRTest::MakeVector>( + std::make_tuple(TR::VectorLength128, TR::Int8), + std::make_tuple(TR::VectorLength128, TR::Int16), + std::make_tuple(TR::VectorLength128, TR::Int32), + std::make_tuple(TR::VectorLength128, TR::Int64), + std::make_tuple(TR::VectorLength128, TR::Float), + std::make_tuple(TR::VectorLength128, TR::Double), + std::make_tuple(TR::VectorLength256, TR::Int8), + std::make_tuple(TR::VectorLength256, TR::Int16), + std::make_tuple(TR::VectorLength256, TR::Int32), + std::make_tuple(TR::VectorLength256, TR::Int64), + std::make_tuple(TR::VectorLength256, TR::Float), + std::make_tuple(TR::VectorLength256, TR::Double), + std::make_tuple(TR::VectorLength512, TR::Int8), + std::make_tuple(TR::VectorLength512, TR::Int16), + std::make_tuple(TR::VectorLength512, TR::Int32), + std::make_tuple(TR::VectorLength512, TR::Int64), + std::make_tuple(TR::VectorLength512, TR::Float), + std::make_tuple(TR::VectorLength512, TR::Double) +))); + TEST_F(VectorTest, VInt8Not) { auto inputTrees = "(method return= NoType args=[Address,Address] " From 94acaebb00c1c0d2f51b5a210bdfc2c828f01415 Mon Sep 17 00:00:00 2001 From: BradleyWood Date: Tue, 12 Jul 2022 17:10:30 -0400 Subject: [PATCH 11/13] Use reg-src if mem-src opcode not present Signed-off-by: BradleyWood --- compiler/x/codegen/OMRTreeEvaluator.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/compiler/x/codegen/OMRTreeEvaluator.cpp b/compiler/x/codegen/OMRTreeEvaluator.cpp index 357046a144..1a72701cdd 100644 --- a/compiler/x/codegen/OMRTreeEvaluator.cpp +++ b/compiler/x/codegen/OMRTreeEvaluator.cpp @@ -4232,6 +4232,12 @@ TR::Register* OMR::X86::TreeEvaluator::vectorBinaryArithmeticEvaluator(TR::Node* TR::InstOpCode nativeOpcode = getNativeSIMDOpcode(opcode, type, useRegMemForm); + if (useRegMemForm && nativeOpcode.getMnemonic() == TR::InstOpCode::bad) + { + useRegMemForm = false; + nativeOpcode = getNativeSIMDOpcode(opcode, type, useRegMemForm); + } + TR_ASSERT_FATAL(nativeOpcode.getMnemonic() != TR::InstOpCode::bad, "Unsupported vector operation for given element type: %s", type.getVectorElementType().toString()); From c8b4e35b79a2a077dbd4ca7b583611b9508837a3 Mon Sep 17 00:00:00 2001 From: BradleyWood Date: Wed, 20 Jul 2022 13:58:29 -0400 Subject: [PATCH 12/13] Alias RegMem and RegReg vector instructions The information required for binary encoding of RegReg / RegMem instructions remains the same. There are some cases where different flags may be required specificly for mem source form, in which case these instructions should not be aliased. Otherwise, we may simplify the declaration of many instructions by using an alias instead. Signed-off-by: BradleyWood --- compiler/x/codegen/OMRInstOpCode.enum | 40 ++++ compiler/x/codegen/OMRInstOpCode.hpp | 4 + compiler/x/codegen/OMRTreeEvaluator.cpp | 12 +- compiler/x/codegen/X86Ops.ins | 296 +++--------------------- 4 files changed, 88 insertions(+), 264 deletions(-) diff --git a/compiler/x/codegen/OMRInstOpCode.enum b/compiler/x/codegen/OMRInstOpCode.enum index 5cef2b1061..5241e256ad 100644 --- a/compiler/x/codegen/OMRInstOpCode.enum +++ b/compiler/x/codegen/OMRInstOpCode.enum @@ -22,3 +22,43 @@ #define INSTRUCTION(name, mnemonic, binary, property0, property1, features) name #include "codegen/X86Ops.ins" #undef INSTRUCTION + +// Alias RegMem & RegReg form instructions +PMULLWRegMem = PMULLWRegReg, +PMULLDRegMem = PMULLDRegReg, +PADDBRegMem = PADDBRegReg, +PADDWRegMem = PADDWRegReg, +PADDDRegMem = PADDDRegReg, +PADDQRegMem = PADDQRegReg, +PSUBBRegMem = PSUBBRegReg, +PSUBWRegMem = PSUBWRegReg, +PSUBDRegMem = PSUBDRegReg, +PSUBQRegMem = PSUBQRegReg, +PANDRegMem = PANDRegReg, +PORRegMem = PORRegReg, +PXORRegMem = PXORRegReg, +ADDPSRegMem = ADDPSRegReg, +ADDPDRegMem = ADDPDRegReg, +DIVPSRegMem = DIVPSRegReg, +DIVPDRegMem = DIVPDRegReg, +MULPSRegMem = MULPSRegReg, +MULPDRegMem = MULPDRegReg, +MOVDQURegMem = MOVDQURegReg, +PABSBRegMem = PABSBRegReg, +PABSWRegMem = PABSWRegReg, +PABSDRegMem = PABSDRegReg, +PABSQRegMem = PABSQRegReg, +PMINSBRegMem = PMINSBRegReg, +PMINSWRegMem = PMINSWRegReg, +PMINSDRegMem = PMINSDRegReg, +PMINSQRegMem = PMINSQRegReg, +MINPSRegMem = MINPSRegReg, +MINPDRegMem = MINPDRegReg, +PMAXSBRegMem = PMAXSBRegReg, +PMAXSWRegMem = PMAXSWRegReg, +PMAXSDRegMem = PMAXSDRegReg, +PMAXSQRegMem = PMAXSQRegReg, +MAXPSRegMem = MAXPSRegReg, +MAXPDRegMem = MAXPDRegReg, +SUBPSRegMem = SUBPSRegReg, +SUBPDRegMem = SUBPDRegReg, diff --git a/compiler/x/codegen/OMRInstOpCode.hpp b/compiler/x/codegen/OMRInstOpCode.hpp index 61d8c343ea..8bec45f83b 100644 --- a/compiler/x/codegen/OMRInstOpCode.hpp +++ b/compiler/x/codegen/OMRInstOpCode.hpp @@ -74,6 +74,7 @@ namespace TR { class Register; } #define IA32OpProp_TargetRegisterInModRM 0x08000000 #define IA32OpProp_TargetRegisterIgnored 0x10000000 #define IA32OpProp_SourceRegisterInModRM 0x20000000 + #define IA32OpProp_SourceRegisterIgnored 0x40000000 #define IA32OpProp_BranchOp 0x80000000 @@ -102,6 +103,9 @@ namespace TR { class Register; } #define IA32OpProp1_NeedsLockPrefix 0x00004000 #define IA32OpProp1_CallOp 0x00010000 #define IA32OpProp1_SourceIsMemRef 0x00020000 + +// For cases when source operand can be a register or mem-ref +#define IA32OpProp1_SourceCanBeMemRef 0x00020000 #define IA32OpProp1_SourceRegIsImplicit 0x00040000 #define IA32OpProp1_TargetRegIsImplicit 0x00080000 #define IA32OpProp1_FusableCompare 0x00100000 diff --git a/compiler/x/codegen/OMRTreeEvaluator.cpp b/compiler/x/codegen/OMRTreeEvaluator.cpp index 1a72701cdd..b398da3efd 100644 --- a/compiler/x/codegen/OMRTreeEvaluator.cpp +++ b/compiler/x/codegen/OMRTreeEvaluator.cpp @@ -4116,12 +4116,12 @@ static const TR::InstOpCode::Mnemonic VectorUnaryArithmeticOpCodesForReg[TR::Num static const TR::InstOpCode::Mnemonic VectorUnaryArithmeticOpCodesForMem[TR::NumVectorElementTypes][NumUnaryArithmeticOps] = { // Invalid, min, max, abs, sqrt - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int8 - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int16 - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int32 - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int64 - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::VSQRTPSRegMem }, // Float - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::VSQRTPDRegMem }, // Double + { TR::InstOpCode::bad, TR::InstOpCode::PMINSBRegMem, TR::InstOpCode::PMAXSBRegMem, TR::InstOpCode::PABSBRegMem, TR::InstOpCode::bad }, // Int8 + { TR::InstOpCode::bad, TR::InstOpCode::PMINSWRegMem, TR::InstOpCode::PMAXSWRegMem, TR::InstOpCode::PABSWRegMem, TR::InstOpCode::bad }, // Int16 + { TR::InstOpCode::bad, TR::InstOpCode::PMINSDRegMem, TR::InstOpCode::PMAXSDRegMem, TR::InstOpCode::PABSDRegMem, TR::InstOpCode::bad }, // Int32 + { TR::InstOpCode::bad, TR::InstOpCode::PMINSQRegMem, TR::InstOpCode::PMAXSQRegMem, TR::InstOpCode::PABSQRegMem, TR::InstOpCode::bad }, // Int64 + { TR::InstOpCode::bad, TR::InstOpCode::MINPSRegMem, TR::InstOpCode::MAXPSRegMem, TR::InstOpCode::bad, TR::InstOpCode::VSQRTPSRegMem }, // Float + { TR::InstOpCode::bad, TR::InstOpCode::MINPDRegMem, TR::InstOpCode::MAXPDRegMem, TR::InstOpCode::bad, TR::InstOpCode::VSQRTPDRegMem }, // Double }; static const TR::ILOpCodes MemoryLoadOpCodes[TR::NumOMRTypes] = diff --git a/compiler/x/codegen/X86Ops.ins b/compiler/x/codegen/X86Ops.ins index 3870ad8c43..9d75fc6486 100644 --- a/compiler/x/codegen/X86Ops.ins +++ b/compiler/x/codegen/X86Ops.ins @@ -377,17 +377,7 @@ INSTRUCTION(ADDSSRegMem, addss, INSTRUCTION(ADDPSRegReg, addps, BINARY(VEX_L128, VEX_vReg_, PREFIX___, REX__, ESCAPE_0F__, 0x58, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SingleFP | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), - FEATURES(X86FeatureProp_MinTargetSupported | - X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | - X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | - X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | - X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) - ), -INSTRUCTION(ADDPSRegMem, addps, - BINARY(VEX_L128, VEX_vReg_, PREFIX___, REX__, ESCAPE_0F__, 0x58, 0, ModRM_RM__, Immediate_0), - PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SingleFP | IA32OpProp_IntSource | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_SourceIsMemRef | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | @@ -407,17 +397,7 @@ INSTRUCTION(ADDSDRegMem, addsd, INSTRUCTION(ADDPDRegReg, addpd, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX_W, ESCAPE_0F__, 0x58, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_DoubleFP | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), - FEATURES(X86FeatureProp_MinTargetSupported | - X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | - X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | - X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | - X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) - ), -INSTRUCTION(ADDPDRegMem, addpd, - BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX_W, ESCAPE_0F__, 0x58, 0, ModRM_RM__, Immediate_0), - PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_DoubleFP | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_SourceIsMemRef | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | @@ -1920,17 +1900,7 @@ INSTRUCTION(DIVSSRegMem, divss, INSTRUCTION(DIVPSRegReg, divps, BINARY(VEX_L128, VEX_vReg_, PREFIX___, REX__, ESCAPE_0F__, 0x5e, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SingleFP | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), - FEATURES(X86FeatureProp_MinTargetSupported | - X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | - X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | - X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | - X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) - ), -INSTRUCTION(DIVPSRegMem, divps, - BINARY(VEX_L128, VEX_vReg_, PREFIX___, REX__, ESCAPE_0F__, 0x5e, 0, ModRM_RM__, Immediate_0), - PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SingleFP | IA32OpProp_IntSource | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_SourceIsMemRef | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | @@ -1950,17 +1920,7 @@ INSTRUCTION(DIVSDRegMem, divsd, INSTRUCTION(DIVPDRegReg, divpd, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX_W, ESCAPE_0F__, 0x5e, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_DoubleFP | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), - FEATURES(X86FeatureProp_MinTargetSupported | - X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | - X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | - X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | - X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) - ), -INSTRUCTION(DIVPDRegMem, divpd, - BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX_W, ESCAPE_0F__, 0x5e, 0, ModRM_RM__, Immediate_0), - PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_DoubleFP | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_SourceIsMemRef | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | @@ -2150,17 +2110,7 @@ INSTRUCTION(MULSSRegMem, mulss, INSTRUCTION(MULPSRegReg, mulps, BINARY(VEX_L128, VEX_vReg_, PREFIX___, REX__, ESCAPE_0F__, 0x59, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SingleFP | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), - FEATURES(X86FeatureProp_MinTargetSupported | - X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX | - X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | - X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | - X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) - ), -INSTRUCTION(MULPSRegMem, mulps, - BINARY(VEX_L128, VEX_vReg_, PREFIX___, REX__, ESCAPE_0F__, 0x59, 0, ModRM_RM__, Immediate_0), - PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SingleFP | IA32OpProp_IntSource | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_SourceIsMemRef | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | @@ -2180,17 +2130,7 @@ INSTRUCTION(MULSDRegMem, mulsd, INSTRUCTION(MULPDRegReg, mulpd, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX_W, ESCAPE_0F__, 0x59, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_DoubleFP | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), - FEATURES(X86FeatureProp_MinTargetSupported | - X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX | - X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | - X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | - X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) - ), -INSTRUCTION(MULPDRegMem, mulpd, - BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX_W, ESCAPE_0F__, 0x59, 0, ModRM_RM__, Immediate_0), - PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_DoubleFP | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_SourceIsMemRef | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | @@ -2697,17 +2637,7 @@ INSTRUCTION(MOVQReg8Reg, movq, INSTRUCTION(MOVDQURegReg, movdqu, BINARY(VEX_L128, VEX_vNONE, PREFIX_F3, REX__, ESCAPE_0F__, 0x6f, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SIMDSingleSource), - FEATURES(X86FeatureProp_MinTargetSupported | - X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX | - X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | - X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | - X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) - ), -INSTRUCTION(MOVDQURegMem, movdqu, - BINARY(VEX_L128, VEX_vNONE, PREFIX_F3, REX__, ESCAPE_0F__, 0x6f, 0, ModRM_RM__, Immediate_0), - PROPERTY0(IA32OpProp_ModifiesTarget), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_SourceIsMemRef | IA32OpProp1_XMMTarget | IA32OpProp1_SIMDSingleSource), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SIMDSingleSource | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | @@ -3382,17 +3312,7 @@ INSTRUCTION(PMOVZXWDRegMem, pmovzxwd, INSTRUCTION(PMULLWRegReg, pmullw, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F__, 0xd5, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), - FEATURES(X86FeatureProp_MinTargetSupported | - X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | - X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX128RequiresAVX512BW | - X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | X86FeatureProp_EVEX256RequiresAVX512BW | - X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) - ), -INSTRUCTION(PMULLWRegMem, pmullw, - BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F__, 0xd5, 0, ModRM_RM__, Immediate_0), - PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_SourceIsMemRef | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX128RequiresAVX512BW | @@ -3402,17 +3322,7 @@ INSTRUCTION(PMULLWRegMem, pmullw, INSTRUCTION(PMULLDRegReg, pmulld, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F38, 0x40, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), - FEATURES(X86FeatureProp_SSE4_1Supported | - X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | - X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | - X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | - X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) - ), -INSTRUCTION(PMULLDRegMem, pmulld, - BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F38, 0x40, 0, ModRM_RM__, Immediate_0), - PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_SourceIsMemRef | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_SSE4_1Supported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | @@ -3422,7 +3332,7 @@ INSTRUCTION(PMULLDRegMem, pmulld, INSTRUCTION(PABSBRegReg, pabsb, BINARY(VEX_L128, VEX_vNONE, PREFIX_66, REX__, ESCAPE_0F38, 0x1C, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SIMDSingleSource), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SIMDSingleSource | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_SSE3Supported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX128RequiresAVX512BW | @@ -3432,7 +3342,7 @@ INSTRUCTION(PABSBRegReg, pabsb, INSTRUCTION(PABSWRegReg, pabsw, BINARY(VEX_L128, VEX_vNONE, PREFIX_66, REX__, ESCAPE_0F38, 0x1D, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SIMDSingleSource), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SIMDSingleSource | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_SSE3Supported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX128RequiresAVX512BW | @@ -3442,7 +3352,7 @@ INSTRUCTION(PABSWRegReg, pabsw, INSTRUCTION(PABSDRegReg, pabsd, BINARY(VEX_L128, VEX_vNONE, PREFIX_66, REX__, ESCAPE_0F38, 0x1E, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SIMDSingleSource), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SIMDSingleSource | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_SSE3Supported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | @@ -3452,7 +3362,7 @@ INSTRUCTION(PABSDRegReg, pabsd, INSTRUCTION(PABSQRegReg, pabsq, BINARY(VEX_L128, VEX_vNONE, PREFIX_66, REX_W, ESCAPE_0F38, 0x1F, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SIMDSingleSource), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SIMDSingleSource | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) @@ -3460,17 +3370,7 @@ INSTRUCTION(PABSQRegReg, pabsq, INSTRUCTION(PADDBRegReg, paddb, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F__, 0xfc, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), - FEATURES(X86FeatureProp_MinTargetSupported | - X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | - X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX128RequiresAVX512BW | - X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | X86FeatureProp_EVEX256RequiresAVX512BW | - X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) - ), -INSTRUCTION(PADDBRegMem, paddb, - BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F__, 0xfc, 0, ModRM_RM__, Immediate_0), - PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_SourceIsMemRef | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX128RequiresAVX512BW | @@ -3480,17 +3380,7 @@ INSTRUCTION(PADDBRegMem, paddb, INSTRUCTION(PADDWRegReg, paddw, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F__, 0xfd, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), - FEATURES(X86FeatureProp_MinTargetSupported | - X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | - X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX128RequiresAVX512BW | - X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | X86FeatureProp_EVEX256RequiresAVX512BW | - X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) - ), -INSTRUCTION(PADDWRegMem, paddw, - BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F__, 0xfd, 0, ModRM_RM__, Immediate_0), - PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_SourceIsMemRef | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX128RequiresAVX512BW | @@ -3500,17 +3390,7 @@ INSTRUCTION(PADDWRegMem, paddw, INSTRUCTION(PADDDRegReg, paddd, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F__, 0xfe, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), - FEATURES(X86FeatureProp_MinTargetSupported | - X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | - X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | - X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | - X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) - ), -INSTRUCTION(PADDDRegMem, paddd, - BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F__, 0xfe, 0, ModRM_RM__, Immediate_0), - PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_SourceIsMemRef | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | @@ -3520,17 +3400,7 @@ INSTRUCTION(PADDDRegMem, paddd, INSTRUCTION(PADDQRegReg, paddq, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX_W, ESCAPE_0F__, 0xd4, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), - FEATURES(X86FeatureProp_MinTargetSupported | - X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | - X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | - X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | - X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) - ), -INSTRUCTION(PADDQRegMem, paddq, - BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX_W, ESCAPE_0F__, 0xd4, 0, ModRM_RM__, Immediate_0), - PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_SourceIsMemRef | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | @@ -3540,17 +3410,7 @@ INSTRUCTION(PADDQRegMem, paddq, INSTRUCTION(PSUBBRegReg, psubb, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F__, 0xf8, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), - FEATURES(X86FeatureProp_MinTargetSupported | - X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | - X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX128RequiresAVX512BW | - X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | X86FeatureProp_EVEX256RequiresAVX512BW | - X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) - ), -INSTRUCTION(PSUBBRegMem, psubb, - BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F__, 0xf8, 0, ModRM_RM__, Immediate_0), - PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_SourceIsMemRef | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX128RequiresAVX512BW | @@ -3560,17 +3420,7 @@ INSTRUCTION(PSUBBRegMem, psubb, INSTRUCTION(PSUBWRegReg, psubw, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F__, 0xf9, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), - FEATURES(X86FeatureProp_MinTargetSupported | - X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | - X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX128RequiresAVX512BW | - X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | X86FeatureProp_EVEX256RequiresAVX512BW | - X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) - ), -INSTRUCTION(PSUBWRegMem, psubw, - BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F__, 0xf9, 0, ModRM_RM__, Immediate_0), - PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_SourceIsMemRef | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX128RequiresAVX512BW | @@ -3580,37 +3430,17 @@ INSTRUCTION(PSUBWRegMem, psubw, INSTRUCTION(PSUBDRegReg, psubd, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F__, 0xfa, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) ), -INSTRUCTION(PSUBDRegMem, psubd, - BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F__, 0xfa, 0, ModRM_RM__, Immediate_0), - PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_SourceIsMemRef | IA32OpProp1_XMMTarget), - FEATURES(X86FeatureProp_MinTargetSupported | - X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | - X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | - X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | - X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F ) - ), INSTRUCTION(PSUBQRegReg, psubq, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX_W, ESCAPE_0F__, 0xfb, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), - FEATURES(X86FeatureProp_MinTargetSupported | - X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | - X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | - X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | - X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) - ), -INSTRUCTION(PSUBQRegMem, psubq, - BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX_W, ESCAPE_0F__, 0xfb, 0, ModRM_RM__, Immediate_0), - PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_SourceIsMemRef | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | @@ -3620,37 +3450,17 @@ INSTRUCTION(PSUBQRegMem, psubq, INSTRUCTION(PANDRegReg, pand, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F__, 0xdb, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) ), -INSTRUCTION(PANDRegMem, pand, - BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F__, 0xdb, 0, ModRM_RM__, Immediate_0), - PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_SourceIsMemRef | IA32OpProp1_XMMTarget), - FEATURES(X86FeatureProp_MinTargetSupported | - X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | - X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | - X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | - X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F ) - ), INSTRUCTION(PORRegReg, por, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F__, 0xeb, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), - FEATURES(X86FeatureProp_MinTargetSupported | - X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | - X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | - X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | - X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) - ), -INSTRUCTION(PORRegMem, por, - BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F__, 0xeb, 0, ModRM_RM__, Immediate_0), - PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_SourceIsMemRef | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | @@ -3660,7 +3470,7 @@ INSTRUCTION(PORRegMem, por, INSTRUCTION(PXORRegReg, pxor, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F__, 0xef, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | @@ -3672,16 +3482,6 @@ INSTRUCTION(VPXORDZmmZmm, vpxord, PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), PROPERTY1(IA32OpProp1_ZMMSource | IA32OpProp1_ZMMTarget), FEATURES(0)), -INSTRUCTION(PXORRegMem, pxor, - BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F__, 0xef, 0, ModRM_RM__, Immediate_0), - PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_SourceIsMemRef | IA32OpProp1_XMMTarget), - FEATURES(X86FeatureProp_MinTargetSupported | - X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | - X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | - X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | - X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) - ), INSTRUCTION(PTESTRegReg, ptest, BINARY(VEX_L___, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F38, 0x17, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), @@ -3730,7 +3530,7 @@ INSTRUCTION(PSRLDQRegImm1, psrldq, INSTRUCTION(PMINSBRegReg, pminsb, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F38, 0x38, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_SSE4_1Supported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX128RequiresAVX512BW | @@ -3740,7 +3540,7 @@ INSTRUCTION(PMINSBRegReg, pminsb, INSTRUCTION(PMINSWRegReg, pminsw, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F__, 0xEA, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX128RequiresAVX512BW | @@ -3750,7 +3550,7 @@ INSTRUCTION(PMINSWRegReg, pminsw, INSTRUCTION(PMINSDRegReg, pminsd, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F38, 0x39, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_SSE4_1Supported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | @@ -3760,7 +3560,7 @@ INSTRUCTION(PMINSDRegReg, pminsd, INSTRUCTION(PMINSQRegReg, pminsq, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX_W, ESCAPE_0F38, 0x39, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) @@ -3768,7 +3568,7 @@ INSTRUCTION(PMINSQRegReg, pminsq, INSTRUCTION(MINPSRegReg, minps, BINARY(VEX_L128, VEX_vReg_, PREFIX___, REX__, ESCAPE_0F__, 0x5D, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | @@ -3778,7 +3578,7 @@ INSTRUCTION(MINPSRegReg, minps, INSTRUCTION(MINPDRegReg, minpd, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX_W, ESCAPE_0F__, 0x5D, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | @@ -3788,7 +3588,7 @@ INSTRUCTION(MINPDRegReg, minpd, INSTRUCTION(PMAXSBRegReg, pmaxsb, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F38, 0x3C, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_SSE4_1Supported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX128RequiresAVX512BW | @@ -3798,7 +3598,7 @@ INSTRUCTION(PMAXSBRegReg, pmaxsb, INSTRUCTION(PMAXSWRegReg, pmaxsw, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F__, 0xEE, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX128RequiresAVX512BW | @@ -3808,7 +3608,7 @@ INSTRUCTION(PMAXSWRegReg, pmaxsw, INSTRUCTION(PMAXSDRegReg, pmaxsd, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX__, ESCAPE_0F38, 0x3D, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_SSE4_1Supported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | @@ -3818,7 +3618,7 @@ INSTRUCTION(PMAXSDRegReg, pmaxsd, INSTRUCTION(PMAXSQRegReg, pmaxsq, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX_W, ESCAPE_0F38, 0x3D, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) @@ -3826,7 +3626,7 @@ INSTRUCTION(PMAXSQRegReg, pmaxsq, INSTRUCTION(MAXPSRegReg, maxps, BINARY(VEX_L128, VEX_vReg_, PREFIX___, REX__, ESCAPE_0F__, 0x5F, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | @@ -3836,7 +3636,7 @@ INSTRUCTION(MAXPSRegReg, maxps, INSTRUCTION(MAXPDRegReg, maxpd, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX_W, ESCAPE_0F__, 0x5F, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SourceRegisterInModRM), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | @@ -4821,17 +4621,7 @@ INSTRUCTION(SUBSSRegMem, subss, INSTRUCTION(SUBPSRegReg, subps, BINARY(VEX_L128, VEX_vReg_, PREFIX___, REX__, ESCAPE_0F__, 0x5c, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SingleFP | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), - FEATURES(X86FeatureProp_MinTargetSupported | - X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | - X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | - X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | - X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) - ), -INSTRUCTION(SUBPSRegMem, subps, - BINARY(VEX_L128, VEX_vReg_, PREFIX___, REX__, ESCAPE_0F__, 0x5c, 0, ModRM_RM__, Immediate_0), - PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_SingleFP | IA32OpProp_IntSource | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_SourceIsMemRef | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | @@ -4851,17 +4641,7 @@ INSTRUCTION(SUBSDRegMem, subsd, INSTRUCTION(SUBPDRegReg, subpd, BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX_W, ESCAPE_0F__, 0x5c, 0, ModRM_RM__, Immediate_0), PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_DoubleFP | IA32OpProp_SourceRegisterInModRM | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget), - FEATURES(X86FeatureProp_MinTargetSupported | - X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | - X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | - X86FeatureProp_EVEX256Supported | X86FeatureProp_EVEX256RequiresAVX512F | X86FeatureProp_EVEX256RequiresAVX512VL | - X86FeatureProp_EVEX512Supported | X86FeatureProp_EVEX512RequiresAVX512F) - ), -INSTRUCTION(SUBPDRegMem, subpd, - BINARY(VEX_L128, VEX_vReg_, PREFIX_66, REX_W, ESCAPE_0F__, 0x5c, 0, ModRM_RM__, Immediate_0), - PROPERTY0(IA32OpProp_ModifiesTarget | IA32OpProp_DoubleFP | IA32OpProp_UsesTarget), - PROPERTY1(IA32OpProp1_SourceIsMemRef | IA32OpProp1_XMMTarget), + PROPERTY1(IA32OpProp1_XMMSource | IA32OpProp1_XMMTarget | IA32OpProp1_SourceCanBeMemRef), FEATURES(X86FeatureProp_MinTargetSupported | X86FeatureProp_VEX128Supported | X86FeatureProp_VEX128RequiresAVX | X86FeatureProp_VEX256Supported | X86FeatureProp_VEX256RequiresAVX2 | X86FeatureProp_EVEX128Supported | X86FeatureProp_EVEX128RequiresAVX512F | X86FeatureProp_EVEX128RequiresAVX512VL | From dd3391d669a9f3d164de3089a285bd77e03357ab Mon Sep 17 00:00:00 2001 From: BradleyWood Date: Tue, 26 Jul 2022 11:58:04 -0400 Subject: [PATCH 13/13] Move vmin/vmax to binary opcode table Place vmin/vmax opcodes in the binary opcode table, leave float/double types unimplemented because they do not conform to java's floating-point semantics. Signed-off-by: BradleyWood --- compiler/x/codegen/OMRTreeEvaluator.cpp | 104 ++++++++++++------------ 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/compiler/x/codegen/OMRTreeEvaluator.cpp b/compiler/x/codegen/OMRTreeEvaluator.cpp index b398da3efd..1b51d62ef9 100644 --- a/compiler/x/codegen/OMRTreeEvaluator.cpp +++ b/compiler/x/codegen/OMRTreeEvaluator.cpp @@ -4038,9 +4038,9 @@ enum ArithmeticOps : uint32_t BinaryArithmeticAnd, BinaryArithmeticOr, BinaryArithmeticXor, + BinaryArithmeticMin, + BinaryArithmeticMax, NumBinaryArithmeticOps, - UnaryArithmeticMin, - UnaryArithmeticMax, UnaryArithmeticAbs, UnaryArithmeticSqrt, LastOp, @@ -4049,79 +4049,79 @@ enum ArithmeticOps : uint32_t static const TR::InstOpCode::Mnemonic BinaryArithmeticOpCodesForReg[TR::NumOMRTypes][NumBinaryArithmeticOps] = { - // Invalid, Add, Sub, Mul, Div, And, Or, Xor - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // NoType - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int8 - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int16 - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int32 - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int64 - { TR::InstOpCode::bad, TR::InstOpCode::ADDSSRegReg, TR::InstOpCode::SUBSSRegReg, TR::InstOpCode::MULSSRegReg, TR::InstOpCode::DIVSSRegReg, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Float - { TR::InstOpCode::bad, TR::InstOpCode::ADDSDRegReg, TR::InstOpCode::SUBSDRegReg, TR::InstOpCode::MULSDRegReg, TR::InstOpCode::DIVSDRegReg, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Double - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Address - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Aggregate + // Invalid, Add, Sub, Mul, Div, And, Or, Xor min max + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // NoType + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int8 + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int16 + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int32 + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int64 + { TR::InstOpCode::bad, TR::InstOpCode::ADDSSRegReg, TR::InstOpCode::SUBSSRegReg, TR::InstOpCode::MULSSRegReg, TR::InstOpCode::DIVSSRegReg, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Float + { TR::InstOpCode::bad, TR::InstOpCode::ADDSDRegReg, TR::InstOpCode::SUBSDRegReg, TR::InstOpCode::MULSDRegReg, TR::InstOpCode::DIVSDRegReg, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Double + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Address + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Aggregate }; static const TR::InstOpCode::Mnemonic VectorBinaryArithmeticOpCodesForReg[TR::NumVectorElementTypes][NumBinaryArithmeticOps] = { - // Invalid, Add, Sub, Mul, Div, And, Or, Xor - { TR::InstOpCode::bad, TR::InstOpCode::PADDBRegReg, TR::InstOpCode::PSUBBRegReg, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int8 - { TR::InstOpCode::bad, TR::InstOpCode::PADDWRegReg, TR::InstOpCode::PSUBWRegReg, TR::InstOpCode::PMULLWRegReg, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int16 - { TR::InstOpCode::bad, TR::InstOpCode::PADDDRegReg, TR::InstOpCode::PSUBDRegReg, TR::InstOpCode::PMULLDRegReg, TR::InstOpCode::bad, TR::InstOpCode::PANDRegReg, TR::InstOpCode::PORRegReg, TR::InstOpCode::PXORRegReg }, // Int32 - { TR::InstOpCode::bad, TR::InstOpCode::PADDQRegReg, TR::InstOpCode::PSUBQRegReg, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::PANDRegReg, TR::InstOpCode::PORRegReg, TR::InstOpCode::PXORRegReg }, // Int64 - { TR::InstOpCode::bad, TR::InstOpCode::ADDPSRegReg, TR::InstOpCode::SUBPSRegReg, TR::InstOpCode::MULPSRegReg, TR::InstOpCode::DIVPSRegReg, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Float - { TR::InstOpCode::bad, TR::InstOpCode::ADDPDRegReg, TR::InstOpCode::SUBPDRegReg, TR::InstOpCode::MULPDRegReg, TR::InstOpCode::DIVPDRegReg, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Double + // Invalid, Add, Sub, Mul, Div, And, Or, Xor min max + { TR::InstOpCode::bad, TR::InstOpCode::PADDBRegReg, TR::InstOpCode::PSUBBRegReg, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::PMINSBRegReg, TR::InstOpCode::PMAXSBRegReg }, // Int8 + { TR::InstOpCode::bad, TR::InstOpCode::PADDWRegReg, TR::InstOpCode::PSUBWRegReg, TR::InstOpCode::PMULLWRegReg, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::PMINSWRegReg, TR::InstOpCode::PMAXSWRegReg }, // Int16 + { TR::InstOpCode::bad, TR::InstOpCode::PADDDRegReg, TR::InstOpCode::PSUBDRegReg, TR::InstOpCode::PMULLDRegReg, TR::InstOpCode::bad, TR::InstOpCode::PANDRegReg, TR::InstOpCode::PORRegReg, TR::InstOpCode::PXORRegReg, TR::InstOpCode::PMINSDRegReg, TR::InstOpCode::PMAXSDRegReg }, // Int32 + { TR::InstOpCode::bad, TR::InstOpCode::PADDQRegReg, TR::InstOpCode::PSUBQRegReg, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::PANDRegReg, TR::InstOpCode::PORRegReg, TR::InstOpCode::PXORRegReg, TR::InstOpCode::PMINSQRegReg, TR::InstOpCode::PMAXSQRegReg }, // Int64 + { TR::InstOpCode::bad, TR::InstOpCode::ADDPSRegReg, TR::InstOpCode::SUBPSRegReg, TR::InstOpCode::MULPSRegReg, TR::InstOpCode::DIVPSRegReg, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Float + { TR::InstOpCode::bad, TR::InstOpCode::ADDPDRegReg, TR::InstOpCode::SUBPDRegReg, TR::InstOpCode::MULPDRegReg, TR::InstOpCode::DIVPDRegReg, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Double }; static const TR::InstOpCode::Mnemonic BinaryArithmeticOpCodesForMem[TR::NumOMRTypes][NumBinaryArithmeticOps] = { - // Invalid, Add, Sub, Mul, Div, And, Or, Xor - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // NoType - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int8 - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int16 - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int32 - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int64 - { TR::InstOpCode::bad, TR::InstOpCode::ADDSSRegMem, TR::InstOpCode::SUBSSRegMem, TR::InstOpCode::MULSSRegMem, TR::InstOpCode::DIVSSRegMem, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Float - { TR::InstOpCode::bad, TR::InstOpCode::ADDSDRegMem, TR::InstOpCode::SUBSDRegMem, TR::InstOpCode::MULSDRegMem, TR::InstOpCode::DIVSDRegMem, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Double - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Address - { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Aggregate + // Invalid, Add, Sub, Mul, Div, And, Or, Xor min max + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // NoType + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int8 + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int16 + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int32 + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int64 + { TR::InstOpCode::bad, TR::InstOpCode::ADDSSRegMem, TR::InstOpCode::SUBSSRegMem, TR::InstOpCode::MULSSRegMem, TR::InstOpCode::DIVSSRegMem, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Float + { TR::InstOpCode::bad, TR::InstOpCode::ADDSDRegMem, TR::InstOpCode::SUBSDRegMem, TR::InstOpCode::MULSDRegMem, TR::InstOpCode::DIVSDRegMem, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Double + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Address + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Aggregate }; static const TR::InstOpCode::Mnemonic VectorBinaryArithmeticOpCodesForMem[TR::NumVectorElementTypes][NumBinaryArithmeticOps] = { - // Invalid, Add, Sub, Mul, Div, And, Or, Xor - { TR::InstOpCode::bad, TR::InstOpCode::PADDBRegMem, TR::InstOpCode::PSUBBRegMem, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int8 - { TR::InstOpCode::bad, TR::InstOpCode::PADDWRegMem, TR::InstOpCode::PSUBWRegMem, TR::InstOpCode::PMULLWRegMem, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Int16 - { TR::InstOpCode::bad, TR::InstOpCode::PADDDRegMem, TR::InstOpCode::PSUBDRegMem, TR::InstOpCode::PMULLDRegMem, TR::InstOpCode::bad, TR::InstOpCode::PANDRegMem, TR::InstOpCode::PORRegMem, TR::InstOpCode::PXORRegMem }, // Int32 - { TR::InstOpCode::bad, TR::InstOpCode::PADDQRegMem, TR::InstOpCode::PSUBQRegMem, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::PANDRegMem, TR::InstOpCode::PORRegMem, TR::InstOpCode::PXORRegMem }, // Int64 - { TR::InstOpCode::bad, TR::InstOpCode::ADDPSRegMem, TR::InstOpCode::SUBPSRegMem, TR::InstOpCode::MULPSRegMem, TR::InstOpCode::DIVPSRegMem, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Float - { TR::InstOpCode::bad, TR::InstOpCode::ADDPDRegMem, TR::InstOpCode::SUBPDRegMem, TR::InstOpCode::MULPDRegMem, TR::InstOpCode::DIVPDRegMem, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Double + // Invalid, Add, Sub, Mul, Div, And, Or, Xor min max + { TR::InstOpCode::bad, TR::InstOpCode::PADDBRegMem, TR::InstOpCode::PSUBBRegMem, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::PMINSBRegMem, TR::InstOpCode::PMAXSBRegMem }, // Int8 + { TR::InstOpCode::bad, TR::InstOpCode::PADDWRegMem, TR::InstOpCode::PSUBWRegMem, TR::InstOpCode::PMULLWRegMem, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::PMINSWRegMem, TR::InstOpCode::PMAXSWRegMem }, // Int16 + { TR::InstOpCode::bad, TR::InstOpCode::PADDDRegMem, TR::InstOpCode::PSUBDRegMem, TR::InstOpCode::PMULLDRegMem, TR::InstOpCode::bad, TR::InstOpCode::PANDRegMem, TR::InstOpCode::PORRegMem, TR::InstOpCode::PXORRegMem, TR::InstOpCode::PMINSDRegMem, TR::InstOpCode::PMAXSDRegMem }, // Int32 + { TR::InstOpCode::bad, TR::InstOpCode::PADDQRegMem, TR::InstOpCode::PSUBQRegMem, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::PANDRegMem, TR::InstOpCode::PORRegMem, TR::InstOpCode::PXORRegMem, TR::InstOpCode::PMINSQRegMem, TR::InstOpCode::PMAXSQRegMem }, // Int64 + { TR::InstOpCode::bad, TR::InstOpCode::ADDPSRegMem, TR::InstOpCode::SUBPSRegMem, TR::InstOpCode::MULPSRegMem, TR::InstOpCode::DIVPSRegMem, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Float + { TR::InstOpCode::bad, TR::InstOpCode::ADDPDRegMem, TR::InstOpCode::SUBPDRegMem, TR::InstOpCode::MULPDRegMem, TR::InstOpCode::DIVPDRegMem, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::bad }, // Double }; static const TR::InstOpCode::Mnemonic VectorUnaryArithmeticOpCodesForReg[TR::NumVectorElementTypes][NumUnaryArithmeticOps] = { - // Invalid, min, max, abs, sqrt - { TR::InstOpCode::bad, TR::InstOpCode::PMINSBRegReg, TR::InstOpCode::PMAXSBRegReg, TR::InstOpCode::PABSBRegReg, TR::InstOpCode::bad }, // Int8 - { TR::InstOpCode::bad, TR::InstOpCode::PMINSWRegReg, TR::InstOpCode::PMAXSWRegReg, TR::InstOpCode::PABSWRegReg, TR::InstOpCode::bad }, // Int16 - { TR::InstOpCode::bad, TR::InstOpCode::PMINSDRegReg, TR::InstOpCode::PMAXSDRegReg, TR::InstOpCode::PABSDRegReg, TR::InstOpCode::bad }, // Int32 - { TR::InstOpCode::bad, TR::InstOpCode::PMINSQRegReg, TR::InstOpCode::PMAXSQRegReg, TR::InstOpCode::PABSQRegReg, TR::InstOpCode::bad }, // Int64 - { TR::InstOpCode::bad, TR::InstOpCode::MINPSRegReg, TR::InstOpCode::MAXPSRegReg, TR::InstOpCode::bad, TR::InstOpCode::SQRTPSRegReg }, // Float - { TR::InstOpCode::bad, TR::InstOpCode::MINPDRegReg, TR::InstOpCode::MAXPDRegReg, TR::InstOpCode::bad, TR::InstOpCode::SQRTPDRegReg }, // Double + // Invalid, abs, sqrt + { TR::InstOpCode::bad, TR::InstOpCode::PABSBRegReg, TR::InstOpCode::bad }, // Int8 + { TR::InstOpCode::bad, TR::InstOpCode::PABSWRegReg, TR::InstOpCode::bad }, // Int16 + { TR::InstOpCode::bad, TR::InstOpCode::PABSDRegReg, TR::InstOpCode::bad }, // Int32 + { TR::InstOpCode::bad, TR::InstOpCode::PABSQRegReg, TR::InstOpCode::bad }, // Int64 + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::SQRTPSRegReg }, // Float + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::SQRTPDRegReg }, // Double }; static const TR::InstOpCode::Mnemonic VectorUnaryArithmeticOpCodesForMem[TR::NumVectorElementTypes][NumUnaryArithmeticOps] = { - // Invalid, min, max, abs, sqrt - { TR::InstOpCode::bad, TR::InstOpCode::PMINSBRegMem, TR::InstOpCode::PMAXSBRegMem, TR::InstOpCode::PABSBRegMem, TR::InstOpCode::bad }, // Int8 - { TR::InstOpCode::bad, TR::InstOpCode::PMINSWRegMem, TR::InstOpCode::PMAXSWRegMem, TR::InstOpCode::PABSWRegMem, TR::InstOpCode::bad }, // Int16 - { TR::InstOpCode::bad, TR::InstOpCode::PMINSDRegMem, TR::InstOpCode::PMAXSDRegMem, TR::InstOpCode::PABSDRegMem, TR::InstOpCode::bad }, // Int32 - { TR::InstOpCode::bad, TR::InstOpCode::PMINSQRegMem, TR::InstOpCode::PMAXSQRegMem, TR::InstOpCode::PABSQRegMem, TR::InstOpCode::bad }, // Int64 - { TR::InstOpCode::bad, TR::InstOpCode::MINPSRegMem, TR::InstOpCode::MAXPSRegMem, TR::InstOpCode::bad, TR::InstOpCode::VSQRTPSRegMem }, // Float - { TR::InstOpCode::bad, TR::InstOpCode::MINPDRegMem, TR::InstOpCode::MAXPDRegMem, TR::InstOpCode::bad, TR::InstOpCode::VSQRTPDRegMem }, // Double + // Invalid, abs, sqrt + { TR::InstOpCode::bad, TR::InstOpCode::PABSBRegMem, TR::InstOpCode::bad }, // Int8 + { TR::InstOpCode::bad, TR::InstOpCode::PABSWRegMem, TR::InstOpCode::bad }, // Int16 + { TR::InstOpCode::bad, TR::InstOpCode::PABSDRegMem, TR::InstOpCode::bad }, // Int32 + { TR::InstOpCode::bad, TR::InstOpCode::PABSQRegMem, TR::InstOpCode::bad }, // Int64 + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::VSQRTPSRegMem }, // Float + { TR::InstOpCode::bad, TR::InstOpCode::bad, TR::InstOpCode::VSQRTPDRegMem }, // Double }; static const TR::ILOpCodes MemoryLoadOpCodes[TR::NumOMRTypes] = @@ -4168,10 +4168,10 @@ TR::InstOpCode OMR::X86::TreeEvaluator::getNativeSIMDOpcode(TR::ILOpCodes opcode binaryOp = BinaryArithmeticXor; break; case TR::vmin: - unaryOp = UnaryArithmeticMin; + binaryOp = BinaryArithmeticMin; break; case TR::vmax: - unaryOp = UnaryArithmeticMax; + binaryOp = BinaryArithmeticMax; break; case TR::vabs: unaryOp = UnaryArithmeticAbs;