Skip to content

Commit 4fb4020

Browse files
Optimize Min/Max paths with AVX10.2 intrinsics (#112535)
Co-authored-by: Ruihan-Yin <ruihan.yin@intel.com>
1 parent 2b3b2d6 commit 4fb4020

File tree

2 files changed

+48
-0
lines changed

2 files changed

+48
-0
lines changed

src/coreclr/jit/gentree.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24366,6 +24366,12 @@ GenTree* Compiler::gtNewSimdMaxNode(
2436624366
#if defined(TARGET_XARCH)
2436724367
if (varTypeIsFloating(simdBaseType))
2436824368
{
24369+
if (compOpportunisticallyDependsOn(InstructionSet_AVX10v2))
24370+
{
24371+
NamedIntrinsic minMaxIntrinsic = (simdSize == 64) ? NI_AVX10v2_V512_MinMax : NI_AVX10v2_MinMax;
24372+
return gtNewSimdHWIntrinsicNode(type, op1, op2, gtNewIconNode(0x05), minMaxIntrinsic, simdBaseJitType,
24373+
simdSize);
24374+
}
2436924375
GenTree* op1Dup1 = fgMakeMultiUse(&op1);
2437024376
GenTree* op1Dup2 = gtCloneExpr(op1Dup1);
2437124377
GenTree* op1Dup3 = gtCloneExpr(op1Dup2);
@@ -24625,6 +24631,12 @@ GenTree* Compiler::gtNewSimdMinNode(
2462524631
#if defined(TARGET_XARCH)
2462624632
if (varTypeIsFloating(simdBaseType))
2462724633
{
24634+
if (compOpportunisticallyDependsOn(InstructionSet_AVX10v2))
24635+
{
24636+
NamedIntrinsic minMaxIntrinsic = (simdSize == 64) ? NI_AVX10v2_V512_MinMax : NI_AVX10v2_MinMax;
24637+
return gtNewSimdHWIntrinsicNode(type, op1, op2, gtNewIconNode(0x04), minMaxIntrinsic, simdBaseJitType,
24638+
simdSize);
24639+
}
2462824640
GenTree* op1Dup1 = fgMakeMultiUse(&op1);
2462924641
GenTree* op1Dup2 = gtCloneExpr(op1Dup1);
2463024642
GenTree* op1Dup3 = gtCloneExpr(op1Dup2);

src/coreclr/jit/importercalls.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9657,6 +9657,42 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method,
96579657
GenTree* op2 = impImplicitR4orR8Cast(impStackTop().val, callType);
96589658
GenTree* op1 = impImplicitR4orR8Cast(impStackTop(1).val, callType);
96599659

9660+
#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH)
9661+
// If Avx10.2 is enabled, the min/max operations can be done using the
9662+
// new minmax instructions which is faster than using the combination
9663+
// of instructions for lower ISAs. We can use the minmax instructions
9664+
9665+
if (compOpportunisticallyDependsOn(InstructionSet_AVX10v2))
9666+
{
9667+
impPopStack();
9668+
impPopStack();
9669+
/**
9670+
* ctrlByte A control byte (imm8) that specifies the type of min/max operation and sign behavior:
9671+
* - Bits [1:0] (Op-select): Determines the operation performed:
9672+
* - 0b00: minimum - Returns x if x ≤ y, otherwise y; NaN handling applies.
9673+
* - 0b01: maximum - Returns x if x ≥ y, otherwise y; NaN handling applies.
9674+
* - 0b10: minimumMagnitude - Compares absolute values, returns the smaller magnitude.
9675+
* - 0b11: maximumMagnitude - Compares absolute values, returns the larger magnitude.
9676+
* - Bit [4] (min/max mode): Determines whether the instruction follows IEEE-compliant NaN handling:
9677+
* - 0: Standard min/max (propagates NaNs).
9678+
* - 1: Number-preferential min/max (ignores signaling NaNs).
9679+
* - Bits [3:2] (Sign control): Defines how the result’s sign is determined:
9680+
* - 0b00: Select sign from the first operand (src1).
9681+
* - 0b01: Select sign from the comparison result.
9682+
* - 0b10: Force result sign to 0 (positive).
9683+
* - 0b11: Force result sign to 1 (negative).
9684+
*/
9685+
uint8_t ctrlByte = 0x04; // Select sign from comparison result
9686+
ctrlByte |= isMax ? 0x01 : 0x00;
9687+
ctrlByte |= isMagnitude ? 0x02 : 0x00;
9688+
ctrlByte |= isNumber ? 0x10 : 0x00;
9689+
9690+
GenTree* retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(ctrlByte),
9691+
NI_AVX10v2_MinMaxScalar, callJitType, 16);
9692+
return gtNewSimdToScalarNode(genActualType(callType), retNode, callJitType, 16);
9693+
}
9694+
#endif // FEATURE_HW_INTRINSICS && TARGET_XARCH
9695+
96609696
if (op2->IsCnsFltOrDbl())
96619697
{
96629698
cnsNode = op2->AsDblCon();

0 commit comments

Comments
 (0)