From ea7c1811e5b886c6dcb21abd554dc48709f85d41 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 22 Oct 2023 01:28:57 +0200 Subject: [PATCH] Clean up --- src/coreclr/jit/importervectorization.cpp | 9 ++++++--- src/coreclr/jit/lowerxarch.cpp | 16 +++++++++++++++- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/importervectorization.cpp b/src/coreclr/jit/importervectorization.cpp index 89d6711252100..2987986ba1aa0 100644 --- a/src/coreclr/jit/importervectorization.cpp +++ b/src/coreclr/jit/importervectorization.cpp @@ -199,9 +199,12 @@ GenTree* Compiler::impExpandHalfConstEqualsSIMD( // Optimization: use a single load when byteLen equals simdSize. // For code simplicity we always create nodes for two vectors case. - const bool useSingleVector = simdSize == byteLen; - return gtNewSimdCmpOpAllNode(GT_EQ, TYP_UBYTE, useSingleVector ? xor1 : orr, gtNewZeroConNode(simdType), baseType, - simdSize); + if (simdSize == byteLen) + { + return gtNewSimdCmpOpAllNode(GT_EQ, TYP_UBYTE, vec1, cnsVec1, baseType, simdSize); + } + + return gtNewSimdCmpOpAllNode(GT_EQ, TYP_UBYTE, orr, gtNewZeroConNode(simdType), baseType, simdSize); // Codegen example for byteLen=40 and OrdinalIgnoreCase mode with AVX: // diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 3daa96268d721..9d35a2c6ebfd9 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -1780,10 +1780,24 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm GenTree* op2 = node->Op(2); GenCondition cmpCnd = (cmpOp == GT_EQ) ? GenCondition::EQ : GenCondition::NE; - if (!varTypeIsFloating(simdBaseType) && (simdSize != 64) && op2->IsVectorZero() && + if (!varTypeIsFloating(simdBaseType) && (simdSize != 64) && comp->compOpportunisticallyDependsOn(InstructionSet_SSE41) && !op1->OperIsHWIntrinsic(NI_AVX512F_ConvertMaskToVector)) { + if (!op2->IsVectorZero()) + { + // Optimize "X == Y" to "(X ^ Y) == 0" + GenTree* zeroVec = comp->gtNewZeroConNode(simdType); + GenTree* xorVec = comp->gtNewSimdBinOpNode(GT_XOR, simdType, op1, op2, simdBaseJitType, simdSize); + node->Op(1) = xorVec; + node->Op(2) = zeroVec; + BlockRange().InsertBefore(node, xorVec); + BlockRange().InsertBefore(node, zeroVec); + + // We'll re-visit the comparison node again + return xorVec; + } + // On SSE4.1 or higher we can optimize comparisons against zero to // just use PTEST. We can't support it for floating-point, however, // as it has both +0.0 and -0.0 where +0.0 == -0.0