Skip to content

Commit

Permalink
Add lowering for get_One, get_AllBitSet, createScalar(), createScalar…
Browse files Browse the repository at this point in the history
…Unsafe(). (#83402)

* Add lowering for get_One, and get_AllBitSet.

* CreateScalar() CreateScalarUnsafe()

* Fix incorrect switch + disasm
  • Loading branch information
DeepakRajendrakumaran committed Mar 15, 2023
1 parent f2a1411 commit 41cb2d3
Show file tree
Hide file tree
Showing 12 changed files with 90 additions and 27 deletions.
34 changes: 29 additions & 5 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -491,14 +491,38 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre

if (vecCon->IsAllBitsSet())
{
if ((attr != EA_32BYTE) || compiler->compOpportunisticallyDependsOn(InstructionSet_AVX2))
switch (attr)
{
case EA_8BYTE:
case EA_16BYTE:
{
emit->emitIns_R_R(INS_pcmpeqd, attr, targetReg, targetReg);
return;
}
#if defined(FEATURE_SIMD)
emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, targetReg, targetReg, targetReg);
#else
emit->emitIns_R_R(INS_pcmpeqd, attr, targetReg, targetReg);
case EA_32BYTE:
{
if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX2))
{
emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, targetReg, targetReg, targetReg);
return;
}
break;
}

case EA_64BYTE:
{
assert(compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, attr, targetReg, targetReg, targetReg,
static_cast<int8_t>(0xFF));
return;
}
#endif // FEATURE_SIMD
break;

default:
{
unreached();
}
}
}

Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17752,6 +17752,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_vpsllvq:
case INS_vpsrlvd:
case INS_vpsrlvq:
case INS_vpternlogd:
result.insThroughput = PERFSCORE_THROUGHPUT_2X;
result.insLatency += PERFSCORE_LATENCY_1C;
break;
Expand Down
5 changes: 5 additions & 0 deletions src/coreclr/jit/fgbasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1195,7 +1195,9 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
case NI_Vector256_Create:
case NI_Vector512_Create:
case NI_Vector256_CreateScalar:
case NI_Vector512_CreateScalar:
case NI_Vector256_CreateScalarUnsafe:
case NI_Vector512_CreateScalarUnsafe:
case NI_VectorT256_CreateBroadcast:
case NI_X86Base_BitScanForward:
case NI_X86Base_X64_BitScanForward:
Expand Down Expand Up @@ -1519,6 +1521,9 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
case NI_Vector256_get_AllBitsSet:
case NI_Vector256_get_One:
case NI_Vector256_get_Zero:
case NI_Vector512_get_AllBitsSet:
case NI_Vector512_get_One:
case NI_Vector512_get_Zero:
case NI_VectorT256_get_AllBitsSet:
case NI_VectorT256_get_One:
case NI_VectorT256_get_Zero:
Expand Down
42 changes: 27 additions & 15 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3743,6 +3743,9 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp)
case NI_Vector256_Create:
case NI_Vector256_CreateScalar:
case NI_Vector256_CreateScalarUnsafe:
case NI_Vector512_Create:
case NI_Vector512_CreateScalar:
case NI_Vector512_CreateScalarUnsafe:
#elif defined(TARGET_ARM64)
case NI_Vector64_Create:
case NI_Vector64_CreateScalar:
Expand Down Expand Up @@ -19228,6 +19231,7 @@ bool GenTree::isContainableHWIntrinsic() const

case NI_Vector128_CreateScalarUnsafe:
case NI_Vector256_CreateScalarUnsafe:
case NI_Vector512_CreateScalarUnsafe:
{
// These HWIntrinsic operations are contained as part of scalar ops
return true;
Expand Down Expand Up @@ -21460,6 +21464,10 @@ GenTree* Compiler::gtNewSimdCreateScalarNode(
{
hwIntrinsicID = NI_Vector256_CreateScalar;
}
else if (simdSize == 64)
{
hwIntrinsicID = NI_Vector512_CreateScalar;
}
#elif defined(TARGET_ARM64)
if (simdSize == 8)
{
Expand Down Expand Up @@ -21600,6 +21608,10 @@ GenTree* Compiler::gtNewSimdCreateScalarUnsafeNode(
{
hwIntrinsicID = NI_Vector256_CreateScalarUnsafe;
}
else if (simdSize == 64)
{
hwIntrinsicID = NI_Vector512_CreateScalarUnsafe;
}
#elif defined(TARGET_ARM64)
if (simdSize == 8)
{
Expand Down Expand Up @@ -21893,15 +21905,7 @@ GenTree* Compiler::gtNewSimdLoadNonTemporalNode(
// We don't guarantee a non-temporal load will actually occur, so fallback
// to regular aligned loads if the required ISA isn't supported.

if (simdSize == 64)
{
if (compOpportunisticallyDependsOn(InstructionSet_AVX512F))
{
intrinsic = NI_AVX512F_LoadAlignedVector512NonTemporal;
isNonTemporal = true;
}
}
else if (simdSize == 32)
if (simdSize == 32)
{
if (compOpportunisticallyDependsOn(InstructionSet_AVX2))
{
Expand All @@ -21914,6 +21918,14 @@ GenTree* Compiler::gtNewSimdLoadNonTemporalNode(
intrinsic = NI_AVX_LoadAlignedVector256;
}
}
else if (simdSize == 64)
{
if (compOpportunisticallyDependsOn(InstructionSet_AVX512F))
{
intrinsic = NI_AVX512F_LoadAlignedVector512NonTemporal;
isNonTemporal = true;
}
}
else if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
{
intrinsic = NI_SSE41_LoadAlignedVector128NonTemporal;
Expand Down Expand Up @@ -23155,16 +23167,16 @@ GenTree* Compiler::gtNewSimdStoreAlignedNode(

NamedIntrinsic intrinsic = NI_Illegal;

if (simdSize == 64)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
intrinsic = NI_AVX512F_StoreAligned;
}
else if (simdSize == 32)
if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
intrinsic = NI_AVX_StoreAligned;
}
else if (simdSize == 64)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
intrinsic = NI_AVX512F_StoreAligned;
}
else if (simdBaseType != TYP_FLOAT)
{
intrinsic = NI_SSE2_StoreAligned;
Expand Down
5 changes: 4 additions & 1 deletion src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -6355,7 +6355,9 @@ struct GenTreeVecCon : public GenTree
case NI_Vector256_Create:
case NI_Vector512_Create:
case NI_Vector256_CreateScalar:
case NI_Vector512_CreateScalar:
case NI_Vector256_CreateScalarUnsafe:
case NI_Vector512_CreateScalarUnsafe:
#elif defined(TARGET_ARM64)
case NI_Vector64_Create:
case NI_Vector64_CreateScalar:
Expand All @@ -6371,7 +6373,8 @@ struct GenTreeVecCon : public GenTree
// CreateScalar leaves the upper bits as zero

#if defined(TARGET_XARCH)
if ((intrinsic != NI_Vector128_CreateScalar) && (intrinsic != NI_Vector256_CreateScalar))
if ((intrinsic != NI_Vector128_CreateScalar) && (intrinsic != NI_Vector256_CreateScalar) &&
(intrinsic != NI_Vector512_CreateScalar))
#elif defined(TARGET_ARM64)
if ((intrinsic != NI_Vector64_CreateScalar) && (intrinsic != NI_Vector128_CreateScalar))
#endif
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -901,6 +901,7 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node)
{
case NI_Vector128_CreateScalarUnsafe:
case NI_Vector256_CreateScalarUnsafe:
case NI_Vector512_CreateScalarUnsafe:
{
if (varTypeIsIntegral(baseType))
{
Expand Down
6 changes: 5 additions & 1 deletion src/coreclr/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,11 @@ HARDWARE_INTRINSIC(Vector256, Xor,
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// Vector512 Intrinsics
HARDWARE_INTRINSIC(Vector512, Create, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, get_Zero, 64, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Vector512, CreateScalar, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, CreateScalarUnsafe, 64, 1, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector512, get_AllBitsSet, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Vector512, get_One, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, get_Zero, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Vector512, Load, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, LoadAligned, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, LoadAlignedNonTemporal, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1088,6 +1088,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,

case NI_Vector128_CreateScalar:
case NI_Vector256_CreateScalar:
case NI_Vector512_CreateScalar:
{
assert(sig->numArgs == 1);

Expand All @@ -1108,6 +1109,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,

case NI_Vector128_CreateScalarUnsafe:
case NI_Vector256_CreateScalarUnsafe:
case NI_Vector512_CreateScalarUnsafe:
{
assert(sig->numArgs == 1);

Expand Down Expand Up @@ -1403,6 +1405,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,

case NI_Vector128_get_AllBitsSet:
case NI_Vector256_get_AllBitsSet:
case NI_Vector512_get_AllBitsSet:
{
assert(sig->numArgs == 0);
retNode = gtNewAllBitsSetConNode(retType);
Expand All @@ -1411,6 +1414,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,

case NI_Vector128_get_One:
case NI_Vector256_get_One:
case NI_Vector512_get_One:
{
assert(sig->numArgs == 0);
retNode = gtNewOneConNode(retType, simdBaseType);
Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/jit/instr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ const char* CodeGen::genInsDisplayName(emitter::instrDesc* id)
static char buf[4][TEMP_BUFFER_LEN];
const char* retbuf;

if (GetEmitter()->IsVexEncodedInstruction(ins) && !GetEmitter()->IsBMIInstruction(ins) &&
if (GetEmitter()->IsVexOrEvexEncodedInstruction(ins) && !GetEmitter()->IsBMIInstruction(ins) &&
!GetEmitter()->IsKInstruction(ins))
{
sprintf_s(buf[curBuf], TEMP_BUFFER_LEN, "v%s", insName);
Expand Down Expand Up @@ -700,6 +700,7 @@ CodeGen::OperandDesc CodeGen::genOperandDesc(GenTree* op)
{
case NI_Vector128_CreateScalarUnsafe:
case NI_Vector256_CreateScalarUnsafe:
case NI_Vector512_CreateScalarUnsafe:
{
// The hwintrinsic should be contained and its
// op1 should be either contained or spilled. This
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/instrsxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,7 @@ INST3(movdqu32, "movdqu32", IUM_WR, SSEFLT(0x7F), BAD_CODE,
INST3(movdqu64, "movdqu64", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, Input_64Bit | INS_FLAGS_None)
INST3(vinsertf64x4, "insertf64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1A), INS_TT_TUPLE4, Input_64Bit | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values
INST3(vinserti64x4, "inserti64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3A), INS_TT_TUPLE4, Input_64Bit | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values
INST3(vpternlogd, "pternlogd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x25), INS_TT_FULL, Input_32Bit | INS_Flags_IsDstDstSrcAVXInstruction)
INST3(LAST_AVX512F_INSTRUCTION, "LAST_AVX512F_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None)

INST3(FIRST_AVX512BW_INSTRUCTION, "FIRST_AVX512BW_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None)
Expand Down
13 changes: 9 additions & 4 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1046,6 +1046,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
case NI_Vector512_Create:
case NI_Vector128_CreateScalar:
case NI_Vector256_CreateScalar:
case NI_Vector512_CreateScalar:
{
// We don't directly support the Vector128.Create or Vector256.Create methods in codegen
// and instead lower them to other intrinsic nodes in LowerHWIntrinsicCreate so we expect
Expand Down Expand Up @@ -1942,9 +1943,10 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
GenTree* tmp2 = nullptr;
GenTree* tmp3 = nullptr;

bool isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant<simd_t>(node, simdVal);
bool isCreateScalar = (intrinsicId == NI_Vector128_CreateScalar) || (intrinsicId == NI_Vector256_CreateScalar);
size_t argCnt = node->GetOperandCount();
bool isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant<simd_t>(node, simdVal);
bool isCreateScalar = (intrinsicId == NI_Vector128_CreateScalar) || (intrinsicId == NI_Vector256_CreateScalar) ||
(intrinsicId == NI_Vector512_CreateScalar);
size_t argCnt = node->GetOperandCount();

if (isConstant)
{
Expand Down Expand Up @@ -6688,6 +6690,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
{
case NI_Vector128_CreateScalarUnsafe:
case NI_Vector256_CreateScalarUnsafe:
case NI_Vector512_CreateScalarUnsafe:
{
if (!varTypeIsIntegral(childNode->TypeGet()))
{
Expand Down Expand Up @@ -6834,6 +6837,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
{
case NI_Vector128_CreateScalarUnsafe:
case NI_Vector256_CreateScalarUnsafe:
case NI_Vector512_CreateScalarUnsafe:
{
if (!supportsSIMDScalarLoads)
{
Expand Down Expand Up @@ -7063,7 +7067,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
NamedIntrinsic childNodeId = childNode->GetHWIntrinsicId();

if ((childNodeId == NI_Vector128_CreateScalarUnsafe) ||
(childNodeId == NI_Vector256_CreateScalarUnsafe))
(childNodeId == NI_Vector256_CreateScalarUnsafe) ||
(childNodeId == NI_Vector512_CreateScalarUnsafe))
{
// We have a very special case of BroadcastScalarToVector(CreateScalarUnsafe(op1))
//
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2019,6 +2019,7 @@ static GenTree* SkipContainedCreateScalarUnsafe(GenTree* node)
{
case NI_Vector128_CreateScalarUnsafe:
case NI_Vector256_CreateScalarUnsafe:
case NI_Vector512_CreateScalarUnsafe:
{
return hwintrinsic->Op(1);
}
Expand Down Expand Up @@ -2127,6 +2128,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
case NI_Vector128_ToScalar:
case NI_Vector256_CreateScalarUnsafe:
case NI_Vector256_ToScalar:
case NI_Vector512_CreateScalarUnsafe:
{
assert(numArgs == 1);

Expand Down

0 comments on commit 41cb2d3

Please sign in to comment.