Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for AvxVnni instructions under Experimental. #51998

Merged
merged 12 commits into from
Jun 2, 2021
82 changes: 50 additions & 32 deletions src/coreclr/inc/corinfoinstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,22 +56,24 @@ enum CORINFO_InstructionSet
InstructionSet_POPCNT=16,
InstructionSet_Vector128=17,
InstructionSet_Vector256=18,
InstructionSet_X86Base_X64=19,
InstructionSet_SSE_X64=20,
InstructionSet_SSE2_X64=21,
InstructionSet_SSE3_X64=22,
InstructionSet_SSSE3_X64=23,
InstructionSet_SSE41_X64=24,
InstructionSet_SSE42_X64=25,
InstructionSet_AVX_X64=26,
InstructionSet_AVX2_X64=27,
InstructionSet_AES_X64=28,
InstructionSet_BMI1_X64=29,
InstructionSet_BMI2_X64=30,
InstructionSet_FMA_X64=31,
InstructionSet_LZCNT_X64=32,
InstructionSet_PCLMULQDQ_X64=33,
InstructionSet_POPCNT_X64=34,
InstructionSet_AVXVNNI=19,
InstructionSet_X86Base_X64=20,
InstructionSet_SSE_X64=21,
InstructionSet_SSE2_X64=22,
InstructionSet_SSE3_X64=23,
InstructionSet_SSSE3_X64=24,
InstructionSet_SSE41_X64=25,
InstructionSet_SSE42_X64=26,
InstructionSet_AVX_X64=27,
InstructionSet_AVX2_X64=28,
InstructionSet_AES_X64=29,
InstructionSet_BMI1_X64=30,
InstructionSet_BMI2_X64=31,
InstructionSet_FMA_X64=32,
InstructionSet_LZCNT_X64=33,
InstructionSet_PCLMULQDQ_X64=34,
InstructionSet_POPCNT_X64=35,
InstructionSet_AVXVNNI_X64=36,
#endif // TARGET_AMD64
#ifdef TARGET_X86
InstructionSet_X86Base=1,
Expand All @@ -92,22 +94,24 @@ enum CORINFO_InstructionSet
InstructionSet_POPCNT=16,
InstructionSet_Vector128=17,
InstructionSet_Vector256=18,
InstructionSet_X86Base_X64=19,
InstructionSet_SSE_X64=20,
InstructionSet_SSE2_X64=21,
InstructionSet_SSE3_X64=22,
InstructionSet_SSSE3_X64=23,
InstructionSet_SSE41_X64=24,
InstructionSet_SSE42_X64=25,
InstructionSet_AVX_X64=26,
InstructionSet_AVX2_X64=27,
InstructionSet_AES_X64=28,
InstructionSet_BMI1_X64=29,
InstructionSet_BMI2_X64=30,
InstructionSet_FMA_X64=31,
InstructionSet_LZCNT_X64=32,
InstructionSet_PCLMULQDQ_X64=33,
InstructionSet_POPCNT_X64=34,
InstructionSet_AVXVNNI=19,
InstructionSet_X86Base_X64=20,
InstructionSet_SSE_X64=21,
InstructionSet_SSE2_X64=22,
InstructionSet_SSE3_X64=23,
InstructionSet_SSSE3_X64=24,
InstructionSet_SSE41_X64=25,
InstructionSet_SSE42_X64=26,
InstructionSet_AVX_X64=27,
InstructionSet_AVX2_X64=28,
InstructionSet_AES_X64=29,
InstructionSet_BMI1_X64=30,
InstructionSet_BMI2_X64=31,
InstructionSet_FMA_X64=32,
InstructionSet_LZCNT_X64=33,
InstructionSet_PCLMULQDQ_X64=34,
InstructionSet_POPCNT_X64=35,
InstructionSet_AVXVNNI_X64=36,
#endif // TARGET_X86

};
Expand Down Expand Up @@ -205,6 +209,8 @@ struct CORINFO_InstructionSetFlags
AddInstructionSet(InstructionSet_PCLMULQDQ_X64);
if (HasInstructionSet(InstructionSet_POPCNT))
AddInstructionSet(InstructionSet_POPCNT_X64);
if (HasInstructionSet(InstructionSet_AVXVNNI))
AddInstructionSet(InstructionSet_AVXVNNI_X64);
#endif // TARGET_AMD64
#ifdef TARGET_X86
#endif // TARGET_X86
Expand Down Expand Up @@ -342,6 +348,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
resultflags.RemoveInstructionSet(InstructionSet_POPCNT);
if (resultflags.HasInstructionSet(InstructionSet_POPCNT_X64) && !resultflags.HasInstructionSet(InstructionSet_POPCNT))
resultflags.RemoveInstructionSet(InstructionSet_POPCNT_X64);
if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNI_X64))
resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI);
if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI_X64) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNI))
resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI_X64);
if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
resultflags.RemoveInstructionSet(InstructionSet_SSE);
if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE))
Expand Down Expand Up @@ -530,6 +540,10 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
return "Vector128";
case InstructionSet_Vector256 :
return "Vector256";
case InstructionSet_AVXVNNI :
return "AVXVNNI";
case InstructionSet_AVXVNNI_X64 :
return "AVXVNNI_X64";
#endif // TARGET_AMD64
#ifdef TARGET_X86
case InstructionSet_X86Base :
Expand Down Expand Up @@ -568,6 +582,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
return "Vector128";
case InstructionSet_Vector256 :
return "Vector256";
case InstructionSet_AVXVNNI :
return "AVXVNNI";
#endif // TARGET_X86

default:
Expand Down Expand Up @@ -615,6 +631,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
case READYTORUN_INSTRUCTION_Lzcnt: return InstructionSet_LZCNT;
case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ;
case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT;
case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI;
#endif // TARGET_AMD64
#ifdef TARGET_X86
case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base;
Expand All @@ -633,6 +650,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
case READYTORUN_INSTRUCTION_Lzcnt: return InstructionSet_LZCNT;
case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ;
case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT;
case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI;
#endif // TARGET_X86

default:
Expand Down
12 changes: 6 additions & 6 deletions src/coreclr/inc/jiteeversionguid.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,12 @@ typedef const GUID *LPCGUID;
#define GUID_DEFINED
#endif // !GUID_DEFINED

constexpr GUID JITEEVersionIdentifier = { /* 895f5d24-eb01-4aff-ad6c-1efc6a91498a */
0x895f5d24,
0xeb01,
0x4aff,
{0xad, 0x6c, 0x1e, 0xfc, 0x6a, 0x91, 0x49, 0x8a}
};
constexpr GUID JITEEVersionIdentifier = { /* 1052f490-cad7-4610-99bb-6f2bd91a1d19 */
0x1052f490,
0xcad7,
0x4610,
{0x99, 0xbb, 0x6f, 0x2b, 0xd9, 0x1a, 0x1d, 0x19}
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////
//
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/inc/readytoruninstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ enum ReadyToRunInstructionSet
READYTORUN_INSTRUCTION_X86Base=22,
READYTORUN_INSTRUCTION_Dp=23,
READYTORUN_INSTRUCTION_Rdm=24,
READYTORUN_INSTRUCTION_AvxVnni=25,

};

Expand Down
5 changes: 5 additions & 0 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2419,6 +2419,11 @@ void Compiler::compSetProcessor()
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX2);
}

if (!JitConfig.EnableAVXVNNI())
{
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVXVNNI);
}

if (!JitConfig.EnableLZCNT())
{
instructionSetFlags.RemoveInstructionSet(InstructionSet_LZCNT);
Expand Down
15 changes: 12 additions & 3 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ bool IsFMAInstruction(instruction ins)
return (ins >= INS_FIRST_FMA_INSTRUCTION) && (ins <= INS_LAST_FMA_INSTRUCTION);
}

bool IsAVXVNNIInstruction(instruction ins)
{
return (ins >= INS_FIRST_AVXVNNI_INSTRUCTION) && (ins <= INS_LAST_AVXVNNI_INSTRUCTION);
}

bool IsBMIInstruction(instruction ins)
{
return (ins >= INS_FIRST_BMI_INSTRUCTION) && (ins <= INS_LAST_BMI_INSTRUCTION);
Expand Down Expand Up @@ -6314,7 +6319,7 @@ void emitter::emitIns_SIMD_R_R_S_I(
void emitter::emitIns_SIMD_R_R_R_A(
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, GenTreeIndir* indir)
{
assert(IsFMAInstruction(ins));
assert(IsFMAInstruction(ins) || IsAVXVNNIInstruction(ins));
assert(UseVEXEncoding());

// Ensure we aren't overwriting op2
Expand Down Expand Up @@ -6395,7 +6400,7 @@ void emitter::emitIns_SIMD_R_R_R_C(instruction ins,
void emitter::emitIns_SIMD_R_R_R_R(
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber op3Reg)
{
if (IsFMAInstruction(ins))
if (IsFMAInstruction(ins) || IsAVXVNNIInstruction(ins))
{
assert(UseVEXEncoding());

Expand Down Expand Up @@ -6463,7 +6468,7 @@ void emitter::emitIns_SIMD_R_R_R_R(
void emitter::emitIns_SIMD_R_R_R_S(
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int varx, int offs)
{
assert(IsFMAInstruction(ins));
assert(IsFMAInstruction(ins) || IsAVXVNNIInstruction(ins));
assert(UseVEXEncoding());

// Ensure we aren't overwriting op2
Expand Down Expand Up @@ -15633,6 +15638,10 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_vfnmsub132ss:
case INS_vfnmsub213ss:
case INS_vfnmsub231ss:
case INS_vpdpbusd: // will be populated when the HW becomes publicly available
case INS_vpdpwssd: // will be populated when the HW becomes publicly available
case INS_vpdpbusds: // will be populated when the HW becomes publicly available
case INS_vpdpwssds: // will be populated when the HW becomes publicly available
// uops.info
result.insThroughput = PERFSCORE_THROUGHPUT_2X;
result.insLatency += PERFSCORE_LATENCY_4C;
Expand Down
9 changes: 9 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,16 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
genHWIntrinsic_R_R_RM_R(node, ins);
break;
}
case NI_AVXVNNI_MultiplyWideningAndAdd:
case NI_AVXVNNI_MultiplyWideningAndAddSaturate:
{
assert(targetReg != REG_NA);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These assertions are not really needed here - we would check the same invariants at the beginning of genHWIntrinsic_R_R_R_RM

assert(op1Reg != REG_NA);
assert(op2Reg != REG_NA);

genHWIntrinsic_R_R_R_RM(ins, simdSize, targetReg, op1Reg, op2Reg, op3);
break;
}
default:
{
unreached();
Expand Down
8 changes: 7 additions & 1 deletion src/coreclr/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,13 @@ HARDWARE_INTRINSIC(AVX2, SubtractSaturate,
HARDWARE_INTRINSIC(AVX2, UnpackHigh, 32, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2, UnpackLow, 32, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2, Xor, 32, 2, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// AVXVNNI Intrinsics
HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_vpdpbusd, INS_vpdpwssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_vpdpbusds, INS_vpdpwssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg)
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa)
return InstructionSet_AVX_X64;
case InstructionSet_AVX2:
return InstructionSet_AVX2_X64;
case InstructionSet_AVXVNNI:
return InstructionSet_AVXVNNI_X64;
case InstructionSet_AES:
return InstructionSet_AES_X64;
case InstructionSet_BMI1:
Expand Down Expand Up @@ -80,6 +82,10 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className)
{
return InstructionSet_AVX2;
}
if (strcmp(className, "AvxVnni") == 0)
{
return InstructionSet_AVXVNNI;
}
}
else if (className[0] == 'S')
{
Expand Down Expand Up @@ -348,6 +354,8 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa)
case InstructionSet_AVX_X64:
case InstructionSet_AVX2:
case InstructionSet_AVX2_X64:
case InstructionSet_AVXVNNI:
case InstructionSet_AVXVNNI_X64:
case InstructionSet_BMI1:
case InstructionSet_BMI1_X64:
case InstructionSet_BMI2:
Expand Down
7 changes: 7 additions & 0 deletions src/coreclr/jit/instrsxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,13 @@ INST3(vfnmsub213ss, "fnmsub213ss", IUM_WR, BAD_CODE, BAD_CODE,
INST3(vfnmsub231ss, "fnmsub231ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBF), INS_Flags_IsDstDstSrcAVXInstruction) //
INST3(LAST_FMA_INSTRUCTION, "LAST_FMA_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)

INST3(FIRST_AVXVNNI_INSTRUCTION, "FIRST_AVXVNNI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
INST3(vpdpbusd, "pdpbusd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x50), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Unsigned and Signed Bytes
INST3(vpdpwssd, "pdpwssd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x52), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Signed Word Integers
INST3(vpdpbusds, "pdpbusds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x51), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Unsigned and Signed Bytes with Saturation
INST3(vpdpwssds, "pdpwssds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x53), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Signed Word Integers with Saturation
INST3(LAST_AVXVNNI_INSTRUCTION, "LAST_AVXVNNI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)

// BMI1
INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
INST3(andn, "andn", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND NOT
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/jitconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@ CONFIG_INTEGER(EnableSSE41, W("EnableSSE41"), 1) // Enable SSE41
CONFIG_INTEGER(EnableSSE42, W("EnableSSE42"), 1) // Enable SSE42
CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 1) // Enable AVX
CONFIG_INTEGER(EnableAVX2, W("EnableAVX2"), 1) // Enable AVX2
CONFIG_INTEGER(EnableAVXVNNI, W("EnableAVXVNNI"), 1) // Enable AVXVNNI
CONFIG_INTEGER(EnableFMA, W("EnableFMA"), 1) // Enable FMA
CONFIG_INTEGER(EnableAES, W("EnableAES"), 1) // Enable AES
CONFIG_INTEGER(EnableBMI1, W("EnableBMI1"), 1) // Enable BMI1
Expand Down
14 changes: 13 additions & 1 deletion src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6308,7 +6308,19 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
}
break;
}

case NI_AVXVNNI_MultiplyWideningAndAdd:
case NI_AVXVNNI_MultiplyWideningAndAddSaturate:
{
if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional))
{
MakeSrcContained(node, op3);
}
else if (supportsRegOptional)
{
op3->SetRegOptional();
}
break;
}
case NI_BMI2_MultiplyNoFlags:
case NI_BMI2_X64_MultiplyNoFlags:
{
Expand Down
14 changes: 14 additions & 0 deletions src/coreclr/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2373,6 +2373,20 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
break;
}

case NI_AVXVNNI_MultiplyWideningAndAdd:
case NI_AVXVNNI_MultiplyWideningAndAddSaturate:
{
assert(numArgs == 3);

tgtPrefUse = BuildUse(op1);
srcCount += 1;
srcCount += BuildDelayFreeUses(op2, op1);
srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1);

buildUses = false;
break;
}

case NI_AVX2_GatherVector128:
case NI_AVX2_GatherVector256:
{
Expand Down
Loading