Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Accelerate additional cross platform hardware intrinsics #61649

Merged
merged 12 commits into from
Jan 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 1 addition & 17 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3963,10 +3963,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode)
break;

case SIMDIntrinsicCast:
case SIMDIntrinsicConvertToSingle:
case SIMDIntrinsicConvertToInt32:
case SIMDIntrinsicConvertToDouble:
case SIMDIntrinsicConvertToInt64:
genSIMDIntrinsicUnOp(simdNode);
break;

Expand Down Expand Up @@ -4051,10 +4047,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
case SIMDIntrinsicCast:
result = INS_mov;
break;
case SIMDIntrinsicConvertToInt32:
case SIMDIntrinsicConvertToInt64:
result = INS_fcvtzs;
break;
case SIMDIntrinsicEqual:
result = INS_fcmeq;
break;
Expand All @@ -4081,10 +4073,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
case SIMDIntrinsicCast:
result = INS_mov;
break;
case SIMDIntrinsicConvertToDouble:
case SIMDIntrinsicConvertToSingle:
result = isUnsigned ? INS_ucvtf : INS_scvtf;
break;
case SIMDIntrinsicEqual:
result = INS_cmeq;
break;
Expand Down Expand Up @@ -4232,11 +4220,7 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode)
//
void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode)
{
assert((simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicCast) ||
(simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicConvertToSingle) ||
(simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicConvertToInt32) ||
(simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicConvertToDouble) ||
(simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicConvertToInt64));
assert(simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicCast);

GenTree* op1 = simdNode->Op(1);
var_types baseType = simdNode->GetSimdBaseType();
Expand Down
156 changes: 145 additions & 11 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19273,21 +19273,59 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode(genTreeOps op,

NamedIntrinsic intrinsic = NI_Illegal;

#if defined(TARGET_XARCH)
if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
assert(varTypeIsFloating(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_AVX2));
}
#endif // TARGET_XARCH

switch (op)
{
#if defined(TARGET_XARCH)
case GT_EQ:
{
intrinsic = (simdSize == 32) ? NI_Vector256_op_Equality : NI_Vector128_op_Equality;
break;
}

case GT_GE:
case GT_GT:
case GT_LE:
case GT_LT:
{
// We want to generate a comparison along the lines of
// GT_XX(op1, op2).As<T, TInteger>() == Vector128<TInteger>.AllBitsSet

NamedIntrinsic getAllBitsSet = NI_Illegal;

if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
assert(varTypeIsFloating(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_AVX2));
intrinsic = NI_Vector256_op_Equality;
intrinsic = NI_Vector256_op_Equality;
getAllBitsSet = NI_Vector256_get_AllBitsSet;
}
else
{
intrinsic = NI_Vector128_op_Equality;
intrinsic = NI_Vector128_op_Equality;
getAllBitsSet = NI_Vector128_get_AllBitsSet;
}

op1 = gtNewSimdCmpOpNode(op, simdBaseType, op1, op2, simdBaseJitType, simdSize,
/* isSimdAsHWIntrinsic */ false);

if (simdBaseType == TYP_FLOAT)
{
simdBaseType = TYP_INT;
simdBaseJitType = CORINFO_TYPE_INT;
}
else if (simdBaseType == TYP_DOUBLE)
{
simdBaseType = TYP_LONG;
simdBaseJitType = CORINFO_TYPE_LONG;
}

op2 = gtNewSimdHWIntrinsicNode(simdBaseType, getAllBitsSet, simdBaseJitType, simdSize);
break;
}
#elif defined(TARGET_ARM64)
Expand All @@ -19296,6 +19334,45 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode(genTreeOps op,
intrinsic = (simdSize == 8) ? NI_Vector64_op_Equality : NI_Vector128_op_Equality;
break;
}

case GT_GE:
case GT_GT:
case GT_LE:
case GT_LT:
{
// We want to generate a comparison along the lines of
// GT_XX(op1, op2).As<T, TInteger>() == Vector128<TInteger>.AllBitsSet

NamedIntrinsic getAllBitsSet = NI_Illegal;

if (simdSize == 8)
{
intrinsic = NI_Vector64_op_Equality;
getAllBitsSet = NI_Vector64_get_AllBitsSet;
}
else
{
intrinsic = NI_Vector128_op_Equality;
getAllBitsSet = NI_Vector128_get_AllBitsSet;
}

op1 = gtNewSimdCmpOpNode(op, simdBaseType, op1, op2, simdBaseJitType, simdSize,
/* isSimdAsHWIntrinsic */ false);

if (simdBaseType == TYP_FLOAT)
{
simdBaseType = TYP_INT;
simdBaseJitType = CORINFO_TYPE_INT;
}
else if (simdBaseType == TYP_DOUBLE)
{
simdBaseType = TYP_LONG;
simdBaseJitType = CORINFO_TYPE_LONG;
}

op2 = gtNewSimdHWIntrinsicNode(simdBaseType, getAllBitsSet, simdBaseJitType, simdSize);
break;
}
#else
#error Unsupported platform
#endif // !TARGET_XARCH && !TARGET_ARM64
Expand Down Expand Up @@ -19335,24 +19412,81 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode(genTreeOps op,

NamedIntrinsic intrinsic = NI_Illegal;

#if defined(TARGET_XARCH)
if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
assert(varTypeIsFloating(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_AVX2));
}
#endif // TARGET_XARCH

switch (op)
{
#if defined(TARGET_XARCH)
case GT_NE:
case GT_EQ:
case GT_GE:
case GT_GT:
case GT_LE:
case GT_LT:
{
if (simdSize == 32)
// We want to generate a comparison along the lines of
// GT_XX(op1, op2).As<T, TInteger>() != Vector128<TInteger>.Zero

intrinsic = (simdSize == 32) ? NI_Vector256_op_Inequality : NI_Vector128_op_Inequality;

op1 = gtNewSimdCmpOpNode(op, simdBaseType, op1, op2, simdBaseJitType, simdSize,
/* isSimdAsHWIntrinsic */ false);

if (simdBaseType == TYP_FLOAT)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
assert(varTypeIsFloating(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_AVX2));
intrinsic = NI_Vector256_op_Inequality;
simdBaseType = TYP_INT;
simdBaseJitType = CORINFO_TYPE_INT;
}
else
else if (simdBaseType == TYP_DOUBLE)
{
intrinsic = NI_Vector128_op_Inequality;
simdBaseType = TYP_LONG;
simdBaseJitType = CORINFO_TYPE_LONG;
}

op2 = gtNewSimdZeroNode(simdBaseType, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
break;
}

case GT_NE:
{
intrinsic = (simdSize == 32) ? NI_Vector256_op_Inequality : NI_Vector128_op_Inequality;
break;
}
#elif defined(TARGET_ARM64)
case GT_EQ:
case GT_GE:
case GT_GT:
case GT_LE:
case GT_LT:
{
// We want to generate a comparison along the lines of
// GT_XX(op1, op2).As<T, TInteger>() != Vector128<TInteger>.Zero

intrinsic = (simdSize == 8) ? NI_Vector64_op_Inequality : NI_Vector128_op_Inequality;

op1 = gtNewSimdCmpOpNode(op, simdBaseType, op1, op2, simdBaseJitType, simdSize,
/* isSimdAsHWIntrinsic */ false);

if (simdBaseType == TYP_FLOAT)
{
simdBaseType = TYP_INT;
simdBaseJitType = CORINFO_TYPE_INT;
}
else if (simdBaseType == TYP_DOUBLE)
{
simdBaseType = TYP_LONG;
simdBaseJitType = CORINFO_TYPE_LONG;
}

op2 = gtNewSimdZeroNode(simdBaseType, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
break;
}

case GT_NE:
{
intrinsic = (simdSize == 8) ? NI_Vector64_op_Inequality : NI_Vector128_op_Inequality;
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp,

bool isIsaSupported = comp->compHWIntrinsicDependsOn(isa) && comp->compSupportsHWIntrinsic(isa);

if (strcmp(methodName, "get_IsSupported") == 0)
if ((strcmp(methodName, "get_IsSupported") == 0) || (strcmp(methodName, "get_IsHardwareAccelerated") == 0))
{
return isIsaSupported ? (comp->compExactlyDependsOn(isa) ? NI_IsSupported_True : NI_IsSupported_Dynamic)
: NI_IsSupported_False;
Expand Down
Loading