Skip to content

Commit

Permalink
Add support for AvxVnni instructions under Experimental. (#51998)
Browse files Browse the repository at this point in the history
* Add support for AvxVnni instructions under Experimental.

* Add support for AvxVnni instructions

* Add preveiw feature attribute

* Handle operands in lsra

* Undo changes for Experimental

* Update JITEEVersionIdentifier and fix remaining issues

* Resolve Mono CI failure

* Disable tests

* Disable Vector128 tests

* Modify disable tests

Co-authored-by: Tanner Gooding <tagoo@outlook.com>
  • Loading branch information
weilinwa and tannergooding committed Jun 2, 2021
1 parent 190c69d commit ca889dd
Show file tree
Hide file tree
Showing 39 changed files with 4,543 additions and 75 deletions.
82 changes: 50 additions & 32 deletions src/coreclr/inc/corinfoinstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,22 +56,24 @@ enum CORINFO_InstructionSet
InstructionSet_POPCNT=16,
InstructionSet_Vector128=17,
InstructionSet_Vector256=18,
InstructionSet_X86Base_X64=19,
InstructionSet_SSE_X64=20,
InstructionSet_SSE2_X64=21,
InstructionSet_SSE3_X64=22,
InstructionSet_SSSE3_X64=23,
InstructionSet_SSE41_X64=24,
InstructionSet_SSE42_X64=25,
InstructionSet_AVX_X64=26,
InstructionSet_AVX2_X64=27,
InstructionSet_AES_X64=28,
InstructionSet_BMI1_X64=29,
InstructionSet_BMI2_X64=30,
InstructionSet_FMA_X64=31,
InstructionSet_LZCNT_X64=32,
InstructionSet_PCLMULQDQ_X64=33,
InstructionSet_POPCNT_X64=34,
InstructionSet_AVXVNNI=19,
InstructionSet_X86Base_X64=20,
InstructionSet_SSE_X64=21,
InstructionSet_SSE2_X64=22,
InstructionSet_SSE3_X64=23,
InstructionSet_SSSE3_X64=24,
InstructionSet_SSE41_X64=25,
InstructionSet_SSE42_X64=26,
InstructionSet_AVX_X64=27,
InstructionSet_AVX2_X64=28,
InstructionSet_AES_X64=29,
InstructionSet_BMI1_X64=30,
InstructionSet_BMI2_X64=31,
InstructionSet_FMA_X64=32,
InstructionSet_LZCNT_X64=33,
InstructionSet_PCLMULQDQ_X64=34,
InstructionSet_POPCNT_X64=35,
InstructionSet_AVXVNNI_X64=36,
#endif // TARGET_AMD64
#ifdef TARGET_X86
InstructionSet_X86Base=1,
Expand All @@ -92,22 +94,24 @@ enum CORINFO_InstructionSet
InstructionSet_POPCNT=16,
InstructionSet_Vector128=17,
InstructionSet_Vector256=18,
InstructionSet_X86Base_X64=19,
InstructionSet_SSE_X64=20,
InstructionSet_SSE2_X64=21,
InstructionSet_SSE3_X64=22,
InstructionSet_SSSE3_X64=23,
InstructionSet_SSE41_X64=24,
InstructionSet_SSE42_X64=25,
InstructionSet_AVX_X64=26,
InstructionSet_AVX2_X64=27,
InstructionSet_AES_X64=28,
InstructionSet_BMI1_X64=29,
InstructionSet_BMI2_X64=30,
InstructionSet_FMA_X64=31,
InstructionSet_LZCNT_X64=32,
InstructionSet_PCLMULQDQ_X64=33,
InstructionSet_POPCNT_X64=34,
InstructionSet_AVXVNNI=19,
InstructionSet_X86Base_X64=20,
InstructionSet_SSE_X64=21,
InstructionSet_SSE2_X64=22,
InstructionSet_SSE3_X64=23,
InstructionSet_SSSE3_X64=24,
InstructionSet_SSE41_X64=25,
InstructionSet_SSE42_X64=26,
InstructionSet_AVX_X64=27,
InstructionSet_AVX2_X64=28,
InstructionSet_AES_X64=29,
InstructionSet_BMI1_X64=30,
InstructionSet_BMI2_X64=31,
InstructionSet_FMA_X64=32,
InstructionSet_LZCNT_X64=33,
InstructionSet_PCLMULQDQ_X64=34,
InstructionSet_POPCNT_X64=35,
InstructionSet_AVXVNNI_X64=36,
#endif // TARGET_X86

};
Expand Down Expand Up @@ -205,6 +209,8 @@ struct CORINFO_InstructionSetFlags
AddInstructionSet(InstructionSet_PCLMULQDQ_X64);
if (HasInstructionSet(InstructionSet_POPCNT))
AddInstructionSet(InstructionSet_POPCNT_X64);
if (HasInstructionSet(InstructionSet_AVXVNNI))
AddInstructionSet(InstructionSet_AVXVNNI_X64);
#endif // TARGET_AMD64
#ifdef TARGET_X86
#endif // TARGET_X86
Expand Down Expand Up @@ -342,6 +348,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
resultflags.RemoveInstructionSet(InstructionSet_POPCNT);
if (resultflags.HasInstructionSet(InstructionSet_POPCNT_X64) && !resultflags.HasInstructionSet(InstructionSet_POPCNT))
resultflags.RemoveInstructionSet(InstructionSet_POPCNT_X64);
if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNI_X64))
resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI);
if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI_X64) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNI))
resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI_X64);
if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
resultflags.RemoveInstructionSet(InstructionSet_SSE);
if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE))
Expand Down Expand Up @@ -530,6 +540,10 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
return "Vector128";
case InstructionSet_Vector256 :
return "Vector256";
case InstructionSet_AVXVNNI :
return "AVXVNNI";
case InstructionSet_AVXVNNI_X64 :
return "AVXVNNI_X64";
#endif // TARGET_AMD64
#ifdef TARGET_X86
case InstructionSet_X86Base :
Expand Down Expand Up @@ -568,6 +582,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
return "Vector128";
case InstructionSet_Vector256 :
return "Vector256";
case InstructionSet_AVXVNNI :
return "AVXVNNI";
#endif // TARGET_X86

default:
Expand Down Expand Up @@ -615,6 +631,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
case READYTORUN_INSTRUCTION_Lzcnt: return InstructionSet_LZCNT;
case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ;
case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT;
case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI;
#endif // TARGET_AMD64
#ifdef TARGET_X86
case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base;
Expand All @@ -633,6 +650,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
case READYTORUN_INSTRUCTION_Lzcnt: return InstructionSet_LZCNT;
case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ;
case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT;
case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI;
#endif // TARGET_X86

default:
Expand Down
12 changes: 6 additions & 6 deletions src/coreclr/inc/jiteeversionguid.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,12 @@ typedef const GUID *LPCGUID;
#define GUID_DEFINED
#endif // !GUID_DEFINED

constexpr GUID JITEEVersionIdentifier = { /* 81a5e384-8ca5-4947-8b2e-1d76556728fd */
0x81a5e384,
0x8ca5,
0x4947,
{0x8b, 0x2e, 0x1d, 0x76, 0x55, 0x67, 0x28, 0xfd}
};
constexpr GUID JITEEVersionIdentifier = { /* 1052f490-cad7-4610-99bb-6f2bd91a1d19 */
0x1052f490,
0xcad7,
0x4610,
{0x99, 0xbb, 0x6f, 0x2b, 0xd9, 0x1a, 0x1d, 0x19}
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////
//
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/inc/readytoruninstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ enum ReadyToRunInstructionSet
READYTORUN_INSTRUCTION_X86Base=22,
READYTORUN_INSTRUCTION_Dp=23,
READYTORUN_INSTRUCTION_Rdm=24,
READYTORUN_INSTRUCTION_AvxVnni=25,

};

Expand Down
5 changes: 5 additions & 0 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2419,6 +2419,11 @@ void Compiler::compSetProcessor()
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX2);
}

if (!JitConfig.EnableAVXVNNI())
{
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVXVNNI);
}

if (!JitConfig.EnableLZCNT())
{
instructionSetFlags.RemoveInstructionSet(InstructionSet_LZCNT);
Expand Down
15 changes: 12 additions & 3 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ bool IsFMAInstruction(instruction ins)
return (ins >= INS_FIRST_FMA_INSTRUCTION) && (ins <= INS_LAST_FMA_INSTRUCTION);
}

bool IsAVXVNNIInstruction(instruction ins)
{
return (ins >= INS_FIRST_AVXVNNI_INSTRUCTION) && (ins <= INS_LAST_AVXVNNI_INSTRUCTION);
}

bool IsBMIInstruction(instruction ins)
{
return (ins >= INS_FIRST_BMI_INSTRUCTION) && (ins <= INS_LAST_BMI_INSTRUCTION);
Expand Down Expand Up @@ -6314,7 +6319,7 @@ void emitter::emitIns_SIMD_R_R_S_I(
void emitter::emitIns_SIMD_R_R_R_A(
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, GenTreeIndir* indir)
{
assert(IsFMAInstruction(ins));
assert(IsFMAInstruction(ins) || IsAVXVNNIInstruction(ins));
assert(UseVEXEncoding());

// Ensure we aren't overwriting op2
Expand Down Expand Up @@ -6395,7 +6400,7 @@ void emitter::emitIns_SIMD_R_R_R_C(instruction ins,
void emitter::emitIns_SIMD_R_R_R_R(
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber op3Reg)
{
if (IsFMAInstruction(ins))
if (IsFMAInstruction(ins) || IsAVXVNNIInstruction(ins))
{
assert(UseVEXEncoding());

Expand Down Expand Up @@ -6463,7 +6468,7 @@ void emitter::emitIns_SIMD_R_R_R_R(
void emitter::emitIns_SIMD_R_R_R_S(
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int varx, int offs)
{
assert(IsFMAInstruction(ins));
assert(IsFMAInstruction(ins) || IsAVXVNNIInstruction(ins));
assert(UseVEXEncoding());

// Ensure we aren't overwriting op2
Expand Down Expand Up @@ -15633,6 +15638,10 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_vfnmsub132ss:
case INS_vfnmsub213ss:
case INS_vfnmsub231ss:
case INS_vpdpbusd: // will be populated when the HW becomes publicly available
case INS_vpdpwssd: // will be populated when the HW becomes publicly available
case INS_vpdpbusds: // will be populated when the HW becomes publicly available
case INS_vpdpwssds: // will be populated when the HW becomes publicly available
// uops.info
result.insThroughput = PERFSCORE_THROUGHPUT_2X;
result.insLatency += PERFSCORE_LATENCY_4C;
Expand Down
9 changes: 9 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,16 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
genHWIntrinsic_R_R_RM_R(node, ins);
break;
}
case NI_AVXVNNI_MultiplyWideningAndAdd:
case NI_AVXVNNI_MultiplyWideningAndAddSaturate:
{
assert(targetReg != REG_NA);
assert(op1Reg != REG_NA);
assert(op2Reg != REG_NA);

genHWIntrinsic_R_R_R_RM(ins, simdSize, targetReg, op1Reg, op2Reg, op3);
break;
}
default:
{
unreached();
Expand Down
8 changes: 7 additions & 1 deletion src/coreclr/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,13 @@ HARDWARE_INTRINSIC(AVX2, SubtractSaturate,
HARDWARE_INTRINSIC(AVX2, UnpackHigh, 32, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2, UnpackLow, 32, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2, Xor, 32, 2, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// AVXVNNI Intrinsics
HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_vpdpbusd, INS_vpdpwssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_vpdpbusds, INS_vpdpwssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg)
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa)
return InstructionSet_AVX_X64;
case InstructionSet_AVX2:
return InstructionSet_AVX2_X64;
case InstructionSet_AVXVNNI:
return InstructionSet_AVXVNNI_X64;
case InstructionSet_AES:
return InstructionSet_AES_X64;
case InstructionSet_BMI1:
Expand Down Expand Up @@ -80,6 +82,10 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className)
{
return InstructionSet_AVX2;
}
if (strcmp(className, "AvxVnni") == 0)
{
return InstructionSet_AVXVNNI;
}
}
else if (className[0] == 'S')
{
Expand Down Expand Up @@ -348,6 +354,8 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa)
case InstructionSet_AVX_X64:
case InstructionSet_AVX2:
case InstructionSet_AVX2_X64:
case InstructionSet_AVXVNNI:
case InstructionSet_AVXVNNI_X64:
case InstructionSet_BMI1:
case InstructionSet_BMI1_X64:
case InstructionSet_BMI2:
Expand Down
7 changes: 7 additions & 0 deletions src/coreclr/jit/instrsxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,13 @@ INST3(vfnmsub213ss, "fnmsub213ss", IUM_WR, BAD_CODE, BAD_CODE,
INST3(vfnmsub231ss, "fnmsub231ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBF), INS_Flags_IsDstDstSrcAVXInstruction) //
INST3(LAST_FMA_INSTRUCTION, "LAST_FMA_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)

INST3(FIRST_AVXVNNI_INSTRUCTION, "FIRST_AVXVNNI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
INST3(vpdpbusd, "pdpbusd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x50), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Unsigned and Signed Bytes
INST3(vpdpwssd, "pdpwssd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x52), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Signed Word Integers
INST3(vpdpbusds, "pdpbusds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x51), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Unsigned and Signed Bytes with Saturation
INST3(vpdpwssds, "pdpwssds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x53), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Signed Word Integers with Saturation
INST3(LAST_AVXVNNI_INSTRUCTION, "LAST_AVXVNNI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)

// BMI1
INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
INST3(andn, "andn", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND NOT
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/jitconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ CONFIG_INTEGER(EnableSSE41, W("EnableSSE41"), 1) // Enable SSE41
CONFIG_INTEGER(EnableSSE42, W("EnableSSE42"), 1) // Enable SSE42
CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 1) // Enable AVX
CONFIG_INTEGER(EnableAVX2, W("EnableAVX2"), 1) // Enable AVX2
CONFIG_INTEGER(EnableAVXVNNI, W("EnableAVXVNNI"), 1) // Enable AVXVNNI
CONFIG_INTEGER(EnableFMA, W("EnableFMA"), 1) // Enable FMA
CONFIG_INTEGER(EnableAES, W("EnableAES"), 1) // Enable AES
CONFIG_INTEGER(EnableBMI1, W("EnableBMI1"), 1) // Enable BMI1
Expand Down
14 changes: 13 additions & 1 deletion src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6347,7 +6347,19 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
}
break;
}

case NI_AVXVNNI_MultiplyWideningAndAdd:
case NI_AVXVNNI_MultiplyWideningAndAddSaturate:
{
if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional))
{
MakeSrcContained(node, op3);
}
else if (supportsRegOptional)
{
op3->SetRegOptional();
}
break;
}
case NI_BMI2_MultiplyNoFlags:
case NI_BMI2_X64_MultiplyNoFlags:
{
Expand Down
14 changes: 14 additions & 0 deletions src/coreclr/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2373,6 +2373,20 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
break;
}

case NI_AVXVNNI_MultiplyWideningAndAdd:
case NI_AVXVNNI_MultiplyWideningAndAddSaturate:
{
assert(numArgs == 3);

tgtPrefUse = BuildUse(op1);
srcCount += 1;
srcCount += BuildDelayFreeUses(op2, op1);
srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1);

buildUses = false;
break;
}

case NI_AVX2_GatherVector128:
case NI_AVX2_GatherVector256:
{
Expand Down
Loading

0 comments on commit ca889dd

Please sign in to comment.