From 7865cb848b5d69f100753529bfaaf2b9f441ad89 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Wed, 3 Mar 2021 21:35:24 -0800 Subject: [PATCH 01/10] Add support for AvxVnni instructions under Experimental. --- src/coreclr/inc/corinfoinstructionset.h | 82 +-- src/coreclr/inc/readytoruninstructionset.h | 1 + src/coreclr/jit/compiler.cpp | 5 + src/coreclr/jit/emitxarch.cpp | 18 +- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 15 + src/coreclr/jit/hwintrinsiclistxarch.h | 8 +- src/coreclr/jit/hwintrinsicxarch.cpp | 8 + src/coreclr/jit/instrsxarch.h | 7 + src/coreclr/jit/jitconfigvalues.h | 1 + src/coreclr/jit/lowerxarch.cpp | 14 +- .../Runtime/ReadyToRunInstructionSet.cs | 1 + .../Runtime/ReadyToRunInstructionSetHelper.cs | 4 + .../JitInterface/CorInfoInstructionSet.cs | 80 +-- .../ThunkGenerator/InstructionSetDesc.txt | 2 + src/coreclr/vm/codeman.cpp | 9 + .../ILLink.Substitutions.NoX86Intrinsics.xml | 6 + .../System.Private.CoreLib.Shared.projitems | 2 + .../X86/Avx2.PlatformNotSupported.cs | 4 +- .../src/System/Runtime/Intrinsics/X86/Avx2.cs | 4 +- .../X86/AvxVnni.PlatformNotSupported.cs | 70 +++ .../System/Runtime/Intrinsics/X86/AvxVnni.cs | 72 +++ .../Directory.Build.props | 6 + ...System.Runtime.Intrinsics.Experimental.sln | 85 +++ .../System.Runtime.Intrinsics.Experimental.cs | 29 + ...tem.Runtime.Intrinsics.Experimental.csproj | 15 + ...tem.Runtime.Intrinsics.Experimental.csproj | 10 + .../ref/System.Runtime.Intrinsics.cs | 5 +- src/libraries/pkg/baseline/packageIndex.json | 8 + .../AvxVnni/MultiplyWideningAndAdd.Byte.cs | 501 +++++++++++++++++ .../AvxVnni/MultiplyWideningAndAdd.Int16.cs | 500 +++++++++++++++++ .../MultiplyWideningAndAddSaturate.Byte.cs | 504 +++++++++++++++++ .../MultiplyWideningAndAddSaturate.Int16.cs | 505 +++++++++++++++++ .../AvxVnni/MultiplyWideningAndAdd_r.csproj | 22 + .../AvxVnni/MultiplyWideningAndAdd_ro.csproj | 22 + .../X86/AvxVnni/Program.AvxVnni.cs | 21 + .../MultiplyWideningAndAdd.Byte.cs | 515 ++++++++++++++++++ .../MultiplyWideningAndAdd.Int16.cs | 500 +++++++++++++++++ .../MultiplyWideningAndAddSaturate.Byte.cs | 503 +++++++++++++++++ .../MultiplyWideningAndAddSaturate.Int16.cs | 503 +++++++++++++++++ .../MultiplyWideningAndAdd_r.csproj | 22 + .../MultiplyWideningAndAdd_ro.csproj | 22 + .../Program.AvxVnni_Vector128.cs | 21 + .../HardwareIntrinsics/X86/Shared/Program.cs | 1 + 43 files changed, 4659 insertions(+), 74 deletions(-) create mode 100644 src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.PlatformNotSupported.cs create mode 100644 src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.cs create mode 100644 src/libraries/System.Runtime.Intrinsics.Experimental/Directory.Build.props create mode 100644 src/libraries/System.Runtime.Intrinsics.Experimental/System.Runtime.Intrinsics.Experimental.sln create mode 100644 src/libraries/System.Runtime.Intrinsics.Experimental/ref/System.Runtime.Intrinsics.Experimental.cs create mode 100644 src/libraries/System.Runtime.Intrinsics.Experimental/ref/System.Runtime.Intrinsics.Experimental.csproj create mode 100644 src/libraries/System.Runtime.Intrinsics.Experimental/src/System.Runtime.Intrinsics.Experimental.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd.Byte.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd.Int16.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAddSaturate.Byte.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAddSaturate.Int16.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd_r.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd_ro.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/Program.AvxVnni.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd.Byte.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd.Int16.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAddSaturate.Byte.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAddSaturate.Int16.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd_r.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd_ro.csproj create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/Program.AvxVnni_Vector128.cs diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 5b7ec3f559c25..39003cb739852 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -56,22 +56,24 @@ enum CORINFO_InstructionSet InstructionSet_POPCNT=16, InstructionSet_Vector128=17, InstructionSet_Vector256=18, - InstructionSet_X86Base_X64=19, - InstructionSet_SSE_X64=20, - InstructionSet_SSE2_X64=21, - InstructionSet_SSE3_X64=22, - InstructionSet_SSSE3_X64=23, - InstructionSet_SSE41_X64=24, - InstructionSet_SSE42_X64=25, - InstructionSet_AVX_X64=26, - InstructionSet_AVX2_X64=27, - InstructionSet_AES_X64=28, - InstructionSet_BMI1_X64=29, - InstructionSet_BMI2_X64=30, - InstructionSet_FMA_X64=31, - InstructionSet_LZCNT_X64=32, - InstructionSet_PCLMULQDQ_X64=33, - InstructionSet_POPCNT_X64=34, + InstructionSet_AVXVNNI=19, + InstructionSet_X86Base_X64=20, + InstructionSet_SSE_X64=21, + InstructionSet_SSE2_X64=22, + InstructionSet_SSE3_X64=23, + InstructionSet_SSSE3_X64=24, + InstructionSet_SSE41_X64=25, + InstructionSet_SSE42_X64=26, + InstructionSet_AVX_X64=27, + InstructionSet_AVX2_X64=28, + InstructionSet_AES_X64=29, + InstructionSet_BMI1_X64=30, + InstructionSet_BMI2_X64=31, + InstructionSet_FMA_X64=32, + InstructionSet_LZCNT_X64=33, + InstructionSet_PCLMULQDQ_X64=34, + InstructionSet_POPCNT_X64=35, + InstructionSet_AVXVNNI_X64=36, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, @@ -92,22 +94,24 @@ enum CORINFO_InstructionSet InstructionSet_POPCNT=16, InstructionSet_Vector128=17, InstructionSet_Vector256=18, - InstructionSet_X86Base_X64=19, - InstructionSet_SSE_X64=20, - InstructionSet_SSE2_X64=21, - InstructionSet_SSE3_X64=22, - InstructionSet_SSSE3_X64=23, - InstructionSet_SSE41_X64=24, - InstructionSet_SSE42_X64=25, - InstructionSet_AVX_X64=26, - InstructionSet_AVX2_X64=27, - InstructionSet_AES_X64=28, - InstructionSet_BMI1_X64=29, - InstructionSet_BMI2_X64=30, - InstructionSet_FMA_X64=31, - InstructionSet_LZCNT_X64=32, - InstructionSet_PCLMULQDQ_X64=33, - InstructionSet_POPCNT_X64=34, + InstructionSet_AVXVNNI=19, + InstructionSet_X86Base_X64=20, + InstructionSet_SSE_X64=21, + InstructionSet_SSE2_X64=22, + InstructionSet_SSE3_X64=23, + InstructionSet_SSSE3_X64=24, + InstructionSet_SSE41_X64=25, + InstructionSet_SSE42_X64=26, + InstructionSet_AVX_X64=27, + InstructionSet_AVX2_X64=28, + InstructionSet_AES_X64=29, + InstructionSet_BMI1_X64=30, + InstructionSet_BMI2_X64=31, + InstructionSet_FMA_X64=32, + InstructionSet_LZCNT_X64=33, + InstructionSet_PCLMULQDQ_X64=34, + InstructionSet_POPCNT_X64=35, + InstructionSet_AVXVNNI_X64=36, #endif // TARGET_X86 }; @@ -205,6 +209,8 @@ struct CORINFO_InstructionSetFlags AddInstructionSet(InstructionSet_PCLMULQDQ_X64); if (HasInstructionSet(InstructionSet_POPCNT)) AddInstructionSet(InstructionSet_POPCNT_X64); + if (HasInstructionSet(InstructionSet_AVXVNNI)) + AddInstructionSet(InstructionSet_AVXVNNI_X64); #endif // TARGET_AMD64 #ifdef TARGET_X86 #endif // TARGET_X86 @@ -342,6 +348,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_POPCNT); if (resultflags.HasInstructionSet(InstructionSet_POPCNT_X64) && !resultflags.HasInstructionSet(InstructionSet_POPCNT)) resultflags.RemoveInstructionSet(InstructionSet_POPCNT_X64); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNI_X64)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI_X64) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNI)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI_X64); if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_SSE); if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE)) @@ -530,6 +540,10 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "Vector128"; case InstructionSet_Vector256 : return "Vector256"; + case InstructionSet_AVXVNNI : + return "AVXVNNI"; + case InstructionSet_AVXVNNI_X64 : + return "AVXVNNI_X64"; #endif // TARGET_AMD64 #ifdef TARGET_X86 case InstructionSet_X86Base : @@ -568,6 +582,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "Vector128"; case InstructionSet_Vector256 : return "Vector256"; + case InstructionSet_AVXVNNI : + return "AVXVNNI"; #endif // TARGET_X86 default: @@ -615,6 +631,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Lzcnt: return InstructionSet_LZCNT; case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ; case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT; + case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI; #endif // TARGET_AMD64 #ifdef TARGET_X86 case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base; @@ -633,6 +650,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Lzcnt: return InstructionSet_LZCNT; case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ; case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT; + case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI; #endif // TARGET_X86 default: diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h index 9a4d0ba2ef9b1..1b66c6e520891 100644 --- a/src/coreclr/inc/readytoruninstructionset.h +++ b/src/coreclr/inc/readytoruninstructionset.h @@ -33,6 +33,7 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_X86Base=22, READYTORUN_INSTRUCTION_Dp=23, READYTORUN_INSTRUCTION_Rdm=24, + READYTORUN_INSTRUCTION_AvxVnni=25, }; diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 621da35a7b0f9..26b80a8c47310 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2415,6 +2415,11 @@ void Compiler::compSetProcessor() instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX2); } + if (!JitConfig.EnableAVXVNNI()) + { + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVXVNNI); + } + if (!JitConfig.EnableLZCNT()) { instructionSetFlags.RemoveInstructionSet(InstructionSet_LZCNT); diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 18d37a489d5f3..0b6e31924033a 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -44,6 +44,11 @@ bool IsFMAInstruction(instruction ins) return (ins >= INS_FIRST_FMA_INSTRUCTION) && (ins <= INS_LAST_FMA_INSTRUCTION); } +bool IsAVXVNNIInstruction(instruction ins) +{ + return (ins >= INS_FIRST_AVXVNNI_INSTRUCTION) && (ins <= INS_LAST_AVXVNNI_INSTRUCTION); +} + bool IsBMIInstruction(instruction ins) { return (ins >= INS_FIRST_BMI_INSTRUCTION) && (ins <= INS_LAST_BMI_INSTRUCTION); @@ -6158,7 +6163,7 @@ void emitter::emitIns_SIMD_R_R_S_I( void emitter::emitIns_SIMD_R_R_R_A( instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, GenTreeIndir* indir) { - assert(IsFMAInstruction(ins)); + assert(IsFMAInstruction(ins) || IsAVXVNNIInstruction(ins)); assert(UseVEXEncoding()); if (op1Reg != targetReg) @@ -6267,6 +6272,11 @@ void emitter::emitIns_SIMD_R_R_R_R( emitIns_R_R_R(ins, attr, targetReg, op2Reg, op3Reg); } + else if (IsAVXVNNIInstruction(ins)) + { + assert(UseVEXEncoding()); + emitIns_R_R_R(ins, attr, targetReg, op2Reg, op3Reg); + } else if (UseVEXEncoding()) { assert(isAvxBlendv(ins) || isSse41Blendv(ins)); @@ -6328,7 +6338,7 @@ void emitter::emitIns_SIMD_R_R_R_R( void emitter::emitIns_SIMD_R_R_R_S( instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int varx, int offs) { - assert(IsFMAInstruction(ins)); + assert(IsFMAInstruction(ins) || IsAVXVNNIInstruction(ins)); assert(UseVEXEncoding()); if (op1Reg != targetReg) @@ -15391,6 +15401,10 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_vfnmsub132ss: case INS_vfnmsub213ss: case INS_vfnmsub231ss: + case INS_vpdpbusd: //will be populated when the HW becomes publicly available + case INS_vpdpwssd: //will be populated when the HW becomes publicly available + case INS_vpdpbusds: //will be populated when the HW becomes publicly available + case INS_vpdpwssds: //will be populated when the HW becomes publicly available // uops.info result.insThroughput = PERFSCORE_THROUGHPUT_2X; result.insLatency += PERFSCORE_LATENCY_4C; diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 42711acaed83a..6aa19ce937945 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -333,7 +333,22 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) genHWIntrinsic_R_R_RM_R(node, ins); break; } + case NI_AVXVNNI_MultiplyWideningAndAdd: + case NI_AVXVNNI_MultiplyWideningAndAddSaturate: + { + assert(targetReg != REG_NA); + assert(op1Reg != REG_NA); + assert(op2Reg != REG_NA); + + if (targetReg != op1Reg) + { + node->SetRegNum(op1Reg); + targetReg = op1Reg; + } + genHWIntrinsic_R_R_R_RM(ins, simdSize, targetReg, op1Reg, op2Reg, op3); + break; + } default: { unreached(); diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index e5aec99897e4c..5fff3718ae204 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -567,7 +567,13 @@ HARDWARE_INTRINSIC(AVX2, SubtractSaturate, HARDWARE_INTRINSIC(AVX2, UnpackHigh, 32, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2, UnpackLow, 32, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2, Xor, 32, 2, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) - +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// AVXVNNI Intrinsics +HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_vpdpbusd, INS_vpdpwssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_vpdpbusds, INS_vpdpwssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 73a43ab0584c7..88e853a90eaae 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -36,6 +36,8 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa) return InstructionSet_AVX_X64; case InstructionSet_AVX2: return InstructionSet_AVX2_X64; + case InstructionSet_AVXVNNI: + return InstructionSet_AVXVNNI_X64; case InstructionSet_AES: return InstructionSet_AES_X64; case InstructionSet_BMI1: @@ -80,6 +82,10 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) { return InstructionSet_AVX2; } + if (strcmp(className, "AvxVnni") == 0) + { + return InstructionSet_AVXVNNI; + } } else if (className[0] == 'S') { @@ -348,6 +354,8 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa) case InstructionSet_AVX_X64: case InstructionSet_AVX2: case InstructionSet_AVX2_X64: + case InstructionSet_AVXVNNI: + case InstructionSet_AVXVNNI_X64: case InstructionSet_BMI1: case InstructionSet_BMI1_X64: case InstructionSet_BMI2: diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 25e5de73caf74..750f1b215036b 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -583,6 +583,13 @@ INST3(vfnmsub213ss, "fnmsub213ss", IUM_WR, BAD_CODE, BAD_CODE, INST3(vfnmsub231ss, "fnmsub231ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBF), INS_Flags_IsDstDstSrcAVXInstruction) // INST3(LAST_FMA_INSTRUCTION, "LAST_FMA_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) +INST3(FIRST_AVXVNNI_INSTRUCTION, "FIRST_AVXVNNI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) +INST3(vpdpbusd, "pdpbusd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x50), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Unsigned and Signed Bytes +INST3(vpdpwssd, "pdpwssd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x52), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Signed Word Integers +INST3(vpdpbusds, "pdpbusds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x51), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Unsigned and Signed Bytes with Saturation +INST3(vpdpwssds, "pdpwssds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x53), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Signed Word Integers with Saturation +INST3(LAST_AVXVNNI_INSTRUCTION, "LAST_AVXVNNI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) + // BMI1 INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) INST3(andn, "andn", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND NOT diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index ff52e6e050e11..5b66d4e4842c2 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -273,6 +273,7 @@ CONFIG_INTEGER(EnableSSE41, W("EnableSSE41"), 1) // Enable SSE41 CONFIG_INTEGER(EnableSSE42, W("EnableSSE42"), 1) // Enable SSE42 CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 1) // Enable AVX CONFIG_INTEGER(EnableAVX2, W("EnableAVX2"), 1) // Enable AVX2 +CONFIG_INTEGER(EnableAVXVNNI, W("EnableAVXVNNI"), 1) // Enable AVXVNNI CONFIG_INTEGER(EnableFMA, W("EnableFMA"), 1) // Enable FMA CONFIG_INTEGER(EnableAES, W("EnableAES"), 1) // Enable AES CONFIG_INTEGER(EnableBMI1, W("EnableBMI1"), 1) // Enable BMI1 diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 31279fb8b435a..1415615b38393 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -5609,7 +5609,19 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) } break; } - + case NI_AVXVNNI_MultiplyWideningAndAdd: + case NI_AVXVNNI_MultiplyWideningAndAddSaturate: + { + if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) + { + MakeSrcContained(node, op3); + } + else if (supportsRegOptional) + { + op3->SetRegOptional(); + } + break; + } case NI_BMI2_MultiplyNoFlags: case NI_BMI2_X64_MultiplyNoFlags: { diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs index 5a1093e1dbce5..ea4aa13ded125 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs @@ -36,6 +36,7 @@ public enum ReadyToRunInstructionSet X86Base=22, Dp=23, Rdm=24, + AvxVnni=25, } } diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index 7b816cdd9e11d..ffc302a827567 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -86,6 +86,8 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_POPCNT_X64: return ReadyToRunInstructionSet.Popcnt; case InstructionSet.X64_Vector128: return null; case InstructionSet.X64_Vector256: return null; + case InstructionSet.X64_AVXVNNI: return ReadyToRunInstructionSet.AvxVnni; + case InstructionSet.X64_AVXVNNI_X64: return ReadyToRunInstructionSet.AvxVnni; default: throw new Exception("Unknown instruction set"); } @@ -129,6 +131,8 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_POPCNT_X64: return null; case InstructionSet.X86_Vector128: return null; case InstructionSet.X86_Vector256: return null; + case InstructionSet.X86_AVXVNNI: return ReadyToRunInstructionSet.AvxVnni; + case InstructionSet.X86_AVXVNNI_X64: return null; default: throw new Exception("Unknown instruction set"); } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 8a8111d514268..f1ffa0a0cd4eb 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -55,22 +55,24 @@ public enum InstructionSet X64_POPCNT=16, X64_Vector128=17, X64_Vector256=18, - X64_X86Base_X64=19, - X64_SSE_X64=20, - X64_SSE2_X64=21, - X64_SSE3_X64=22, - X64_SSSE3_X64=23, - X64_SSE41_X64=24, - X64_SSE42_X64=25, - X64_AVX_X64=26, - X64_AVX2_X64=27, - X64_AES_X64=28, - X64_BMI1_X64=29, - X64_BMI2_X64=30, - X64_FMA_X64=31, - X64_LZCNT_X64=32, - X64_PCLMULQDQ_X64=33, - X64_POPCNT_X64=34, + X64_AVXVNNI=19, + X64_X86Base_X64=20, + X64_SSE_X64=21, + X64_SSE2_X64=22, + X64_SSE3_X64=23, + X64_SSSE3_X64=24, + X64_SSE41_X64=25, + X64_SSE42_X64=26, + X64_AVX_X64=27, + X64_AVX2_X64=28, + X64_AES_X64=29, + X64_BMI1_X64=30, + X64_BMI2_X64=31, + X64_FMA_X64=32, + X64_LZCNT_X64=33, + X64_PCLMULQDQ_X64=34, + X64_POPCNT_X64=35, + X64_AVXVNNI_X64=36, X86_X86Base=1, X86_SSE=2, X86_SSE2=3, @@ -89,22 +91,24 @@ public enum InstructionSet X86_POPCNT=16, X86_Vector128=17, X86_Vector256=18, - X86_X86Base_X64=19, - X86_SSE_X64=20, - X86_SSE2_X64=21, - X86_SSE3_X64=22, - X86_SSSE3_X64=23, - X86_SSE41_X64=24, - X86_SSE42_X64=25, - X86_AVX_X64=26, - X86_AVX2_X64=27, - X86_AES_X64=28, - X86_BMI1_X64=29, - X86_BMI2_X64=30, - X86_FMA_X64=31, - X86_LZCNT_X64=32, - X86_PCLMULQDQ_X64=33, - X86_POPCNT_X64=34, + X86_AVXVNNI=19, + X86_X86Base_X64=20, + X86_SSE_X64=21, + X86_SSE2_X64=22, + X86_SSE3_X64=23, + X86_SSSE3_X64=24, + X86_SSE41_X64=25, + X86_SSE42_X64=26, + X86_AVX_X64=27, + X86_AVX2_X64=28, + X86_AES_X64=29, + X86_BMI1_X64=30, + X86_BMI2_X64=31, + X86_FMA_X64=32, + X86_LZCNT_X64=33, + X86_PCLMULQDQ_X64=34, + X86_POPCNT_X64=35, + X86_AVXVNNI_X64=36, } @@ -298,6 +302,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_POPCNT_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_POPCNT_X64)) resultflags.AddInstructionSet(InstructionSet.X64_POPCNT); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI_X64); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) @@ -449,6 +457,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ); if (resultflags.HasInstructionSet(InstructionSet.X64_POPCNT_X64)) resultflags.AddInstructionSet(InstructionSet.X64_POPCNT); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_SSE); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) @@ -574,6 +584,7 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("popcnt", "Popcnt", InstructionSet.X64_POPCNT, true); yield return new InstructionSetInfo("Vector128", "", InstructionSet.X64_Vector128, false); yield return new InstructionSetInfo("Vector256", "", InstructionSet.X64_Vector256, false); + yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X64_AVXVNNI, true); break; case TargetArchitecture.X86: @@ -595,6 +606,7 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("popcnt", "Popcnt", InstructionSet.X86_POPCNT, true); yield return new InstructionSetInfo("Vector128", "", InstructionSet.X86_Vector128, false); yield return new InstructionSetInfo("Vector256", "", InstructionSet.X86_Vector256, false); + yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X86_AVXVNNI, true); break; } @@ -657,6 +669,8 @@ public void Set64BitInstructionSetVariants(TargetArchitecture architecture) AddInstructionSet(InstructionSet.X64_PCLMULQDQ_X64); if (HasInstructionSet(InstructionSet.X64_POPCNT)) AddInstructionSet(InstructionSet.X64_POPCNT_X64); + if (HasInstructionSet(InstructionSet.X64_AVXVNNI)) + AddInstructionSet(InstructionSet.X64_AVXVNNI_X64); break; case TargetArchitecture.X86: @@ -698,6 +712,7 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X64_LZCNT_X64); AddInstructionSet(InstructionSet.X64_PCLMULQDQ_X64); AddInstructionSet(InstructionSet.X64_POPCNT_X64); + AddInstructionSet(InstructionSet.X64_AVXVNNI_X64); break; case TargetArchitecture.X86: @@ -717,6 +732,7 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X86_LZCNT_X64); AddInstructionSet(InstructionSet.X86_PCLMULQDQ_X64); AddInstructionSet(InstructionSet.X86_POPCNT_X64); + AddInstructionSet(InstructionSet.X86_AVXVNNI_X64); break; } diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index 6e64e7e2b02c0..8bfdc9a9d86a7 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -40,6 +40,7 @@ instructionset ,X86 ,Pclmulqdq , ,14 ,PCLMULQDQ,pclmul instructionset ,X86 ,Popcnt , ,15 ,POPCNT ,popcnt instructionset ,X86 , , , ,Vector128, instructionset ,X86 , , , ,Vector256, +instructionset ,X86 ,AvxVnni , ,25 ,AVXVNNI ,avxvnni instructionset64bit,X86 ,X86Base instructionset64bit,X86 ,SSE @@ -57,6 +58,7 @@ instructionset64bit,X86 ,FMA instructionset64bit,X86 ,LZCNT instructionset64bit,X86 ,PCLMULQDQ instructionset64bit,X86 ,POPCNT +instructionset64bit,X86 ,AVXVNNI implication ,X86 ,SSE ,X86Base implication ,X86 ,SSE2 ,SSE diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index f1cc4b674cf9d..17a2dd0944c3b 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1308,6 +1308,9 @@ void EEJitManager::SetCpuInfo() // CORJIT_FLAG_USE_AVX2 if the following feature bit is set (input EAX of 0x07 and input ECX of 0): // CORJIT_FLAG_USE_AVX // AVX2 - EBX bit 5 + // CORJIT_FLAG_USE_AVXVNNI if the following feature bit is set (input EAX of 0x07 and input ECX of 1): + // CORJIT_FLAG_USE_AVX2 + // AVXVNNI - EAX bit 4 // CORJIT_FLAG_USE_AVX_512 is not currently set, but defined so that it can be used in future without // CORJIT_FLAG_USE_BMI1 if the following feature bit is set (input EAX of 0x07 and input ECX of 0): // BMI1 - EBX bit 3 @@ -1385,6 +1388,12 @@ void EEJitManager::SetCpuInfo() if ((cpuidInfo[EBX] & (1 << 5)) != 0) // AVX2 { CPUCompileFlags.Set(InstructionSet_AVX2); + + __cpuidex(cpuidInfo, 0x00000007, 0x00000001); + if ((cpuidInfo[EAX] & (1 << 4)) != 0) // AVX-VNNI + { + CPUCompileFlags.Set(InstructionSet_AVXVNNI); + } } } } diff --git a/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoX86Intrinsics.xml b/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoX86Intrinsics.xml index df1020f4b8d5d..59d0783d0a789 100644 --- a/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoX86Intrinsics.xml +++ b/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoX86Intrinsics.xml @@ -18,6 +18,12 @@ + + + + + + diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index b8f3489e46526..b66f360bdb133 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -1944,6 +1944,7 @@ + @@ -1962,6 +1963,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.PlatformNotSupported.cs index 28ae4c77ff67a..2d72673b09260 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.PlatformNotSupported.cs @@ -13,13 +13,13 @@ namespace System.Runtime.Intrinsics.X86 [CLSCompliant(false)] public abstract class Avx2 : Avx { - internal Avx2() { } + protected internal Avx2() { } public static new bool IsSupported { [Intrinsic] get { return false; } } public new abstract class X64 : Avx.X64 { - internal X64() { } + protected internal X64() { } public static new bool IsSupported { [Intrinsic] get { return false; } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.cs index c6ab66867c1c2..5837efbba72f7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.cs @@ -12,14 +12,14 @@ namespace System.Runtime.Intrinsics.X86 [CLSCompliant(false)] public abstract class Avx2 : Avx { - internal Avx2() { } + protected internal Avx2() { } public static new bool IsSupported { get => IsSupported; } [Intrinsic] public new abstract class X64 : Avx.X64 { - internal X64() { } + protected internal X64() { } public static new bool IsSupported { get => IsSupported; } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.PlatformNotSupported.cs new file mode 100644 index 0000000000000..d01010413de3c --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.PlatformNotSupported.cs @@ -0,0 +1,70 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Runtime.CompilerServices; + +namespace System.Runtime.Intrinsics.X86 +{ + [CLSCompliant(false)] + public abstract class AvxVnni : Avx2 + { + internal AvxVnni() { } + + public static new bool IsSupported { [Intrinsic] get { return false; } } + + public new abstract class X64 : Avx2.X64 + { + internal X64() { } + + public static new bool IsSupported { [Intrinsic] get { return false; } } + } + + /// + /// __m128i _mm_dpbusd_epi32 (__m128i src, __m128i a, __m128i b) + /// VPDPBUSD xmm, xmm, xmm + /// + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + /// + /// __m128i _mm_dpwssd_epi32 (__m128i src, __m128i a, __m128i b) + /// VPDPWSSD xmm, xmm, xmm + /// + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + /// + /// __m256i _mm256_dpbusd_epi32 (__m256i src, __m256i a, __m256i b) + /// VPDPBUSD ymm, ymm, ymm + /// + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + /// + /// __m256i _mm256_dpwssd_epi32 (__m256i src, __m256i a, __m256i b) + /// VPDPWSSD ymm, ymm, ymm + /// + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + /// + /// __m128i _mm_dpbusds_epi32 (__m128i src, __m128i a, __m128i b) + /// VPDPBUSDS xmm, xmm, xmm + /// + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + /// + /// __m128i _mm_dpwssds_epi32 (__m128i src, __m128i a, __m128i b) + /// VPDPWSSDS xmm, xmm, xmm + /// + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + /// + /// __m256i _mm256_dpbusds_epi32 (__m256i src, __m256i a, __m256i b) + /// VPDPBUSDS ymm, ymm, ymm + /// + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + /// + /// __m256i _mm256_dpwssds_epi32 (__m256i src, __m256i a, __m256i b) + /// VPDPWSSDS ymm, ymm, ymm + /// + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.cs new file mode 100644 index 0000000000000..8dcffed9384ed --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.cs @@ -0,0 +1,72 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Runtime.CompilerServices; + +namespace System.Runtime.Intrinsics.X86 +{ + [Intrinsic] + [CLSCompliant(false)] + public abstract class AvxVnni : Avx2 + { + internal AvxVnni() { } + + public static new bool IsSupported { get => IsSupported; } + + [Intrinsic] + public new abstract class X64 : Avx2.X64 + { + internal X64() { } + + public static new bool IsSupported { get => IsSupported; } + } + + /// + /// __m128i _mm_dpbusd_epi32 (__m128i src, __m128i a, __m128i b) + /// VPDPBUSD xmm, xmm, xmm + /// + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAdd(addend, left, right); + + /// + /// __m128i _mm_dpwssd_epi32 (__m128i src, __m128i a, __m128i b) + /// VPDPWSSD xmm, xmm, xmm + /// + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAdd(addend, left, right); + + /// + /// __m256i _mm256_dpbusd_epi32 (__m256i src, __m256i a, __m256i b) + /// VPDPBUSD ymm, ymm, ymm + /// + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAdd(addend, left, right); + + /// + /// __m256i _mm256_dpwssd_epi32 (__m256i src, __m256i a, __m256i b) + /// VPDPWSSD ymm, ymm, ymm + /// + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAdd(addend, left, right); + + /// + /// __m128i _mm_dpbusds_epi32 (__m128i src, __m128i a, __m128i b) + /// VPDPBUSDS xmm, xmm, xmm + /// + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + /// + /// __m128i _mm_dpwssds_epi32 (__m128i src, __m128i a, __m128i b) + /// VPDPWSSDS xmm, xmm, xmm + /// + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + /// + /// __m256i _mm256_dpbusds_epi32 (__m256i src, __m256i a, __m256i b) + /// VPDPBUSDS ymm, ymm, ymm + /// + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + /// + /// __m256i _mm256_dpwssds_epi32 (__m256i src, __m256i a, __m256i b) + /// VPDPWSSDS ymm, ymm, ymm + /// + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAddSaturate(addend, left, right); + } +} diff --git a/src/libraries/System.Runtime.Intrinsics.Experimental/Directory.Build.props b/src/libraries/System.Runtime.Intrinsics.Experimental/Directory.Build.props new file mode 100644 index 0000000000000..ba1f965d83cae --- /dev/null +++ b/src/libraries/System.Runtime.Intrinsics.Experimental/Directory.Build.props @@ -0,0 +1,6 @@ + + + + Open + + diff --git a/src/libraries/System.Runtime.Intrinsics.Experimental/System.Runtime.Intrinsics.Experimental.sln b/src/libraries/System.Runtime.Intrinsics.Experimental/System.Runtime.Intrinsics.Experimental.sln new file mode 100644 index 0000000000000..87a1b5f0fe163 --- /dev/null +++ b/src/libraries/System.Runtime.Intrinsics.Experimental/System.Runtime.Intrinsics.Experimental.sln @@ -0,0 +1,85 @@ +Microsoft Visual Studio Solution File, Format Version 12.00 +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Private.CoreLib", "..\..\coreclr\System.Private.CoreLib\System.Private.CoreLib.csproj", "{5965CFFE-886A-418C-854F-5967D91DE914}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Runtime.Intrinsics.Experimental", "ref\System.Runtime.Intrinsics.Experimental.csproj", "{28B808CE-B1F8-4B05-9ADA-8884525BD87F}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Runtime.Intrinsics.Experimental", "src\System.Runtime.Intrinsics.Experimental.csproj", "{5AD79501-BEA5-48C7-B466-021A9DCB9D5C}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{47B32900-BEBD-49E4-A54B-7A4BC87F0E68}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ref", "ref", "{65331E4B-EF91-413C-984B-51D110CE5AC6}" +EndProject +Global + GlobalSection(NestedProjects) = preSolution + {5965CFFE-886A-418C-854F-5967D91DE914} = {47B32900-BEBD-49E4-A54B-7A4BC87F0E68} + {5AD79501-BEA5-48C7-B466-021A9DCB9D5C} = {47B32900-BEBD-49E4-A54B-7A4BC87F0E68} + {28B808CE-B1F8-4B05-9ADA-8884525BD87F} = {65331E4B-EF91-413C-984B-51D110CE5AC6} + EndGlobalSection + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 + Checked|Any CPU = Checked|Any CPU + Checked|x64 = Checked|x64 + Checked|x86 = Checked|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {5965CFFE-886A-418C-854F-5967D91DE914}.Debug|Any CPU.ActiveCfg = Debug|x64 + {5965CFFE-886A-418C-854F-5967D91DE914}.Debug|Any CPU.Build.0 = Debug|x64 + {5965CFFE-886A-418C-854F-5967D91DE914}.Debug|x64.ActiveCfg = Debug|x64 + {5965CFFE-886A-418C-854F-5967D91DE914}.Debug|x64.Build.0 = Debug|x64 + {5965CFFE-886A-418C-854F-5967D91DE914}.Debug|x86.ActiveCfg = Debug|x86 + {5965CFFE-886A-418C-854F-5967D91DE914}.Debug|x86.Build.0 = Debug|x86 + {5965CFFE-886A-418C-854F-5967D91DE914}.Release|Any CPU.ActiveCfg = Release|x64 + {5965CFFE-886A-418C-854F-5967D91DE914}.Release|Any CPU.Build.0 = Release|x64 + {5965CFFE-886A-418C-854F-5967D91DE914}.Release|x64.ActiveCfg = Release|x64 + {5965CFFE-886A-418C-854F-5967D91DE914}.Release|x64.Build.0 = Release|x64 + {5965CFFE-886A-418C-854F-5967D91DE914}.Release|x86.ActiveCfg = Release|x86 + {5965CFFE-886A-418C-854F-5967D91DE914}.Release|x86.Build.0 = Release|x86 + {5965CFFE-886A-418C-854F-5967D91DE914}.Checked|Any CPU.ActiveCfg = Checked|x64 + {5965CFFE-886A-418C-854F-5967D91DE914}.Checked|Any CPU.Build.0 = Checked|x64 + {5965CFFE-886A-418C-854F-5967D91DE914}.Checked|x64.ActiveCfg = Checked|x64 + {5965CFFE-886A-418C-854F-5967D91DE914}.Checked|x64.Build.0 = Checked|x64 + {5965CFFE-886A-418C-854F-5967D91DE914}.Checked|x86.ActiveCfg = Checked|x86 + {5965CFFE-886A-418C-854F-5967D91DE914}.Checked|x86.Build.0 = Checked|x86 + {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Debug|Any CPU.Build.0 = Debug|Any CPU + {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Debug|x64.ActiveCfg = Debug|Any CPU + {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Debug|x64.Build.0 = Debug|Any CPU + {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Debug|x86.ActiveCfg = Debug|Any CPU + {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Debug|x86.Build.0 = Debug|Any CPU + {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Release|Any CPU.ActiveCfg = Release|Any CPU + {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Release|Any CPU.Build.0 = Release|Any CPU + {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Release|x64.ActiveCfg = Release|Any CPU + {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Release|x64.Build.0 = Release|Any CPU + {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Release|x86.ActiveCfg = Release|Any CPU + {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Release|x86.Build.0 = Release|Any CPU + {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Checked|Any CPU.ActiveCfg = Debug|Any CPU + {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Checked|x64.ActiveCfg = Debug|Any CPU + {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Checked|x86.ActiveCfg = Debug|Any CPU + {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Debug|Any CPU.Build.0 = Debug|Any CPU + {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Debug|x64.ActiveCfg = Debug|Any CPU + {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Debug|x64.Build.0 = Debug|Any CPU + {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Debug|x86.ActiveCfg = Debug|Any CPU + {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Debug|x86.Build.0 = Debug|Any CPU + {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Release|Any CPU.ActiveCfg = Release|Any CPU + {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Release|Any CPU.Build.0 = Release|Any CPU + {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Release|x64.ActiveCfg = Release|Any CPU + {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Release|x64.Build.0 = Release|Any CPU + {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Release|x86.ActiveCfg = Release|Any CPU + {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Release|x86.Build.0 = Release|Any CPU + {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Checked|Any CPU.ActiveCfg = Debug|Any CPU + {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Checked|x64.ActiveCfg = Debug|Any CPU + {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Checked|x86.ActiveCfg = Debug|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {9205DA5F-88A2-4045-9B31-9CC53CCF7550} + EndGlobalSection +EndGlobal diff --git a/src/libraries/System.Runtime.Intrinsics.Experimental/ref/System.Runtime.Intrinsics.Experimental.cs b/src/libraries/System.Runtime.Intrinsics.Experimental/ref/System.Runtime.Intrinsics.Experimental.cs new file mode 100644 index 0000000000000..5e46973b0c6a0 --- /dev/null +++ b/src/libraries/System.Runtime.Intrinsics.Experimental/ref/System.Runtime.Intrinsics.Experimental.cs @@ -0,0 +1,29 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// ------------------------------------------------------------------------------ +// Changes to this file must follow the https://aka.ms/api-review process. +// ------------------------------------------------------------------------------ + +namespace System.Runtime.Intrinsics.X86 +{ + [System.CLSCompliantAttribute(false)] + public abstract partial class AvxVnni : System.Runtime.Intrinsics.X86.Avx2 + { + internal AvxVnni() { } + public static new bool IsSupported { get { throw null; } } + public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public new abstract partial class X64 : System.Runtime.Intrinsics.X86.Avx2.X64 + { + internal X64() { } + public static new bool IsSupported { get { throw null; } } + } + } + +} diff --git a/src/libraries/System.Runtime.Intrinsics.Experimental/ref/System.Runtime.Intrinsics.Experimental.csproj b/src/libraries/System.Runtime.Intrinsics.Experimental/ref/System.Runtime.Intrinsics.Experimental.csproj new file mode 100644 index 0000000000000..e775a65cc70b8 --- /dev/null +++ b/src/libraries/System.Runtime.Intrinsics.Experimental/ref/System.Runtime.Intrinsics.Experimental.csproj @@ -0,0 +1,15 @@ + + + true + $(NetCoreAppCurrent) + enable + + + + + + + + + + \ No newline at end of file diff --git a/src/libraries/System.Runtime.Intrinsics.Experimental/src/System.Runtime.Intrinsics.Experimental.csproj b/src/libraries/System.Runtime.Intrinsics.Experimental/src/System.Runtime.Intrinsics.Experimental.csproj new file mode 100644 index 0000000000000..dd3dc2940a54b --- /dev/null +++ b/src/libraries/System.Runtime.Intrinsics.Experimental/src/System.Runtime.Intrinsics.Experimental.csproj @@ -0,0 +1,10 @@ + + + true + $(NetCoreAppCurrent) + enable + + + + + \ No newline at end of file diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 6cf453fd8d0c8..b0f2b804f8918 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -2967,7 +2967,7 @@ internal X64() { } [System.CLSCompliantAttribute(false)] public abstract partial class Avx2 : System.Runtime.Intrinsics.X86.Avx { - internal Avx2() { } + protected internal Avx2() { } public static new bool IsSupported { get { throw null; } } public static System.Runtime.Intrinsics.Vector256 Abs(System.Runtime.Intrinsics.Vector256 value) { throw null; } public static System.Runtime.Intrinsics.Vector256 Abs(System.Runtime.Intrinsics.Vector256 value) { throw null; } @@ -3364,10 +3364,11 @@ public unsafe static void MaskStore(ulong* address, System.Runtime.Intrinsics.Ve public static System.Runtime.Intrinsics.Vector256 Xor(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } public new abstract partial class X64 : System.Runtime.Intrinsics.X86.Avx.X64 { - internal X64() { } + protected internal X64() { } public static new bool IsSupported { get { throw null; } } } } + [System.CLSCompliantAttribute(false)] public abstract partial class Bmi1 : System.Runtime.Intrinsics.X86.X86Base { diff --git a/src/libraries/pkg/baseline/packageIndex.json b/src/libraries/pkg/baseline/packageIndex.json index e9d4d02764470..9afe37638cd13 100644 --- a/src/libraries/pkg/baseline/packageIndex.json +++ b/src/libraries/pkg/baseline/packageIndex.json @@ -5661,6 +5661,14 @@ "net5.0": "5.0.0.0" } }, + "System.Runtime.Intrinsics.Experimental":{ + "BaselineVersion": "6.0.0", + "InboxOn": {}, + "AssemblyVersionInPackageVersion": { + "5.0.0.0": "5.0.0", + "6.0.0.0": "6.0.0" + } + }, "System.Runtime.Loader": { "StableVersions": [ "4.0.0", diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd.Byte.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd.Byte.cs new file mode 100644 index 0000000000000..0a38e01b11e12 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd.Byte.cs @@ -0,0 +1,501 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using System.Text.RegularExpressions; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + private static void MultiplyWideningAndAddByte() + { + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddByte(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + + // Validates basic functionality works, using LoadAligned + test.RunBasicScenario_LoadAligned(); + } + + else + { + Console.WriteLine("Avx Is Not Supported"); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); //TODO: this one does not work. Fix it. + + if (Avx.IsSupported) + { + // Validates calling via reflection works, using Load + test.RunReflectionScenario_Load(); + + // Validates calling via reflection works, using LoadAligned + test.RunReflectionScenario_LoadAligned(); + } + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates passing a local works, using Load + test.RunLclVarScenario_Load(); + + // Validates passing a local works, using LoadAligned + test.RunLclVarScenario_LoadAligned(); + } + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + Console.WriteLine("Test Is Not Supported"); + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddByte + { + private struct DataTable + { + private byte[] inArray0; + private byte[] inArray1; + private byte[] inArray2; + private byte[] outArray; + + private GCHandle inHandle0; + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable(Int32[] inArray0, Byte[] inArray1, SByte[] inArray2, Int32[] outArray, int alignment) + { + int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf(); + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf(); + + if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alighment"); + } + + this.inArray0 = new byte[alignment * 2]; + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned); + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray0Ptr), ref Unsafe.As(ref inArray0[0]), (uint)sizeOfinArray0); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As(ref inArray2[0]), (uint)sizeOfinArray2); + } + + public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle0.Free(); + inHandle1.Free(); + inHandle2.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlighment) + { + return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1)); + } + } + private struct TestStruct + { + public Vector256 _fld0; + public Vector256 _fld1; + public Vector256 _fld2; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + return testStruct; + } + + public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddByte testClass) + { + var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = 32; + + private static readonly int Op0ElementCount = Unsafe.SizeOf>() / sizeof(Int32); + private static readonly int Op1ElementCount = Unsafe.SizeOf>() / sizeof(Byte); + private static readonly int Op2ElementCount = Unsafe.SizeOf>() / sizeof(SByte); + private static readonly int RetElementCount = Unsafe.SizeOf>() / sizeof(Int32); + + private static Int32[] _data0 = new Int32[Op0ElementCount]; + private static Byte[] _data1 = new Byte[Op1ElementCount]; + private static SByte[] _data2 = new SByte[Op2ElementCount]; + + private static Vector256 _clsVar0; + private static Vector256 _clsVar1; + private static Vector256 _clsVar2; + + private Vector256 _fld0; + private Vector256 _fld1; + private Vector256 _fld2; + + private DataTable _dataTable; + + static SimpleTernaryOpTest__MultiplyWideningAndAddByte() + { + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + } + + public SimpleTernaryOpTest__MultiplyWideningAndAddByte() + { + Succeeded = true; + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetSByte(); } + _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => AvxVnni.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = AvxVnni.MultiplyWideningAndAdd( + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + var result = AvxVnni.MultiplyWideningAndAdd( + Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector256((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadVector256((SByte*)(_dataTable.inArray2Ptr))); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + + } + + public void RunBasicScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); + + var result = AvxVnni.MultiplyWideningAndAdd( + Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector256((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector256((SByte*)(_dataTable.inArray2Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector256), typeof(Vector256), typeof(Vector256) }) + .Invoke(null, new object[] { + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector256)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector256), typeof(Vector256), typeof(Vector256) }) + .Invoke(null, new object[] { + Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector256((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadVector256((SByte*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector256)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector256), typeof(Vector256), typeof(Vector256) }) + .Invoke(null, new object[] { + Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector256((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector256((SByte*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector256)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = AvxVnni.MultiplyWideningAndAdd( + _clsVar0, + _clsVar1, + _clsVar2 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var first = Unsafe.Read>(_dataTable.inArray0Ptr); + var second = Unsafe.Read>(_dataTable.inArray1Ptr); + var third = Unsafe.Read>(_dataTable.inArray2Ptr); + var result = AvxVnni.MultiplyWideningAndAdd(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); + + var first= Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadVector256((Byte*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadVector256((SByte*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAdd(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); + + var first = Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadAlignedVector256((Byte*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadAlignedVector256((SByte*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAdd(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddByte(); + var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult(Vector256 addend, Vector256 left, Vector256 right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Byte[] inArray1 = new Byte[Op1ElementCount]; + SByte[] inArray2 = new SByte[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray0[0]), addend); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray1[0]), left); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray2[0]), right); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Byte[] inArray1 = new Byte[Op1ElementCount]; + SByte[] inArray2 = new SByte[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray0[0]), ref Unsafe.AsRef(addend), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray1[0]), ref Unsafe.AsRef(left), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray2[0]), ref Unsafe.AsRef(right), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(Int32[] addend, Byte[] left, SByte[] right, Int32[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + Int32[] outArray = new Int32[RetElementCount]; + + for (var i = 0; i < RetElementCount; i++) + { + outArray[i] = Math.Clamp((addend[i] + (right[i * 4 + 3] * left[i * 4 + 3] + right[i * 4 + 2] * left[i * 4 + 2]) + + (right[i * 4 + 1] * left[i * 4 + 1] + right[i * 4] * left[i * 4])), int.MinValue, int.MaxValue); + } + + for (var i = 0; i < RetElementCount; i++) + { + if (result[i] != outArray[i]) + { + succeeded = false; + break; + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAdd)}(Vector256, Vector256): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" addend: ({string.Join(", ", addend)})"); + TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})"); + TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation($" valid: ({string.Join(", ", outArray)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd.Int16.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd.Int16.cs new file mode 100644 index 0000000000000..4907c183f800d --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd.Int16.cs @@ -0,0 +1,500 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using System.Text.RegularExpressions; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + private static void MultiplyWideningAndAddInt16() + { + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddInt16(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + + // Validates basic functionality works, using LoadAligned + test.RunBasicScenario_LoadAligned(); + } + + else + { + Console.WriteLine("Avx Is Not Supported"); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); //TODO: this one does not work. Fix it. + + if (Avx.IsSupported) + { + // Validates calling via reflection works, using Load + test.RunReflectionScenario_Load(); + + // Validates calling via reflection works, using LoadAligned + test.RunReflectionScenario_LoadAligned(); + } + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates passing a local works, using Load + test.RunLclVarScenario_Load(); + + // Validates passing a local works, using LoadAligned + test.RunLclVarScenario_LoadAligned(); + } + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + Console.WriteLine("Test Is Not Supported"); + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddInt16 + { + private struct DataTable + { + private byte[] inArray0; + private byte[] inArray1; + private byte[] inArray2; + private byte[] outArray; + + private GCHandle inHandle0; + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable(Int32[] inArray0, Int16[] inArray1, Int16[] inArray2, Int32[] outArray, int alignment) + { + int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf(); + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf(); + + if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alignment"); + } + + this.inArray0 = new byte[alignment * 2]; + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned); + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray0Ptr), ref Unsafe.As(ref inArray0[0]), (uint)sizeOfinArray0); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As(ref inArray2[0]), (uint)sizeOfinArray2); + } + + public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle0.Free(); + inHandle1.Free(); + inHandle2.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlighment) + { + return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1)); + } + } + private struct TestStruct + { + public Vector256 _fld0; + public Vector256 _fld1; + public Vector256 _fld2; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + return testStruct; + } + + public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddInt16 testClass) + { + var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = 32; + + private static readonly int Op0ElementCount = Unsafe.SizeOf>() / sizeof(Int32); + private static readonly int Op1ElementCount = Unsafe.SizeOf>() / sizeof(Int16); + private static readonly int Op2ElementCount = Unsafe.SizeOf>() / sizeof(Int16); + private static readonly int RetElementCount = Unsafe.SizeOf>() / sizeof(Int32); + + private static Int32[] _data0 = new Int32[Op0ElementCount]; + private static Int16[] _data1 = new Int16[Op1ElementCount]; + private static Int16[] _data2 = new Int16[Op2ElementCount]; + + private static Vector256 _clsVar0; + private static Vector256 _clsVar1; + private static Vector256 _clsVar2; + + private Vector256 _fld0; + private Vector256 _fld1; + private Vector256 _fld2; + + private DataTable _dataTable; + + static SimpleTernaryOpTest__MultiplyWideningAndAddInt16() + { + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + } + + public SimpleTernaryOpTest__MultiplyWideningAndAddInt16() + { + Succeeded = true; + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); } + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt16(); } + _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => AvxVnni.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = AvxVnni.MultiplyWideningAndAdd( + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + var result = AvxVnni.MultiplyWideningAndAdd( + Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector256((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadVector256((Int16*)(_dataTable.inArray2Ptr))); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + + } + + public void RunBasicScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); + + var result = AvxVnni.MultiplyWideningAndAdd( + Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray2Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector256), typeof(Vector256), typeof(Vector256) }) + .Invoke(null, new object[] { + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector256)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector256), typeof(Vector256), typeof(Vector256) }) + .Invoke(null, new object[] { + Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector256((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadVector256((Int16*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector256)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector256), typeof(Vector256), typeof(Vector256) }) + .Invoke(null, new object[] { + Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector256)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = AvxVnni.MultiplyWideningAndAdd( + _clsVar0, + _clsVar1, + _clsVar2 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var first = Unsafe.Read>(_dataTable.inArray0Ptr); + var second = Unsafe.Read>(_dataTable.inArray1Ptr); + var third = Unsafe.Read>(_dataTable.inArray2Ptr); + var result = AvxVnni.MultiplyWideningAndAdd(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); + + var first= Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadVector256((Int16*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadVector256((Int16*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAdd(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); + + var first = Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAdd(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddInt16(); + var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult(Vector256 addend, Vector256 left, Vector256 right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Int16[] inArray1 = new Int16[Op1ElementCount]; + Int16[] inArray2 = new Int16[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray0[0]), addend); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray1[0]), left); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray2[0]), right); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Int16[] inArray1 = new Int16[Op1ElementCount]; + Int16[] inArray2 = new Int16[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray0[0]), ref Unsafe.AsRef(addend), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray1[0]), ref Unsafe.AsRef(left), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray2[0]), ref Unsafe.AsRef(right), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(Int32[] addend, Int16[] left, Int16[] right, Int32[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + Int32[] outArray = new Int32[RetElementCount]; + + for (var i = 0; i < RetElementCount; i++) + { + outArray[i] = Math.Clamp((addend[i] + (right[i * 2 + 1] * left[i * 2 + 1] + right[i * 2] * left[i * 2])), int.MinValue, int.MaxValue); + } + + for (var i = 0; i < RetElementCount; i++) + { + if (result[i] != outArray[i]) + { + succeeded = false; + break; + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAdd)}(Vector256, Vector256): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" addend: ({string.Join(", ", addend)})"); + TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})"); + TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation($" valid: ({string.Join(", ", outArray)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAddSaturate.Byte.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAddSaturate.Byte.cs new file mode 100644 index 0000000000000..2e221073681d0 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAddSaturate.Byte.cs @@ -0,0 +1,504 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using System.Text.RegularExpressions; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + private static void MultiplyWideningAndAddSaturateByte() + { + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + + // Validates basic functionality works, using LoadAligned + test.RunBasicScenario_LoadAligned(); + } + + else + { + Console.WriteLine("Avx Is Not Supported"); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates calling via reflection works, using Load + test.RunReflectionScenario_Load(); + + // Validates calling via reflection works, using LoadAligned + test.RunReflectionScenario_LoadAligned(); + } + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates passing a local works, using Load + test.RunLclVarScenario_Load(); + + // Validates passing a local works, using LoadAligned + test.RunLclVarScenario_LoadAligned(); + } + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + Console.WriteLine("Test Is Not Supported"); + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte + { + private struct DataTable + { + private byte[] inArray0; + private byte[] inArray1; + private byte[] inArray2; + private byte[] outArray; + + private GCHandle inHandle0; + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable(Int32[] inArray0, Byte[] inArray1, SByte[] inArray2, Int32[] outArray, int alignment) + { + int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf(); + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf(); + + if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alighment"); + } + + this.inArray0 = new byte[alignment * 2]; + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned); + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray0Ptr), ref Unsafe.As(ref inArray0[0]), (uint)sizeOfinArray0); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As(ref inArray2[0]), (uint)sizeOfinArray2); + } + + public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle0.Free(); + inHandle1.Free(); + inHandle2.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlighment) + { + return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1)); + } + } + private struct TestStruct + { + public Vector256 _fld0; + public Vector256 _fld1; + public Vector256 _fld2; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + return testStruct; + } + + public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte testClass) + { + var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = 32; + + private static readonly int Op0ElementCount = Unsafe.SizeOf>() / sizeof(Int32); + private static readonly int Op1ElementCount = Unsafe.SizeOf>() / sizeof(Byte); + private static readonly int Op2ElementCount = Unsafe.SizeOf>() / sizeof(SByte); + private static readonly int RetElementCount = Unsafe.SizeOf>() / sizeof(Int32); + + private static Int32[] _data0 = new Int32[Op0ElementCount]; + private static Byte[] _data1 = new Byte[Op1ElementCount]; + private static SByte[] _data2 = new SByte[Op2ElementCount]; + + private static Vector256 _clsVar0; + private static Vector256 _clsVar1; + private static Vector256 _clsVar2; + + private Vector256 _fld0; + private Vector256 _fld1; + private Vector256 _fld2; + + private DataTable _dataTable; + + static SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte() + { + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + } + + public SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte() + { + Succeeded = true; + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetSByte(); } + _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => AvxVnni.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector256((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadVector256((SByte*)(_dataTable.inArray2Ptr))); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + + } + + public void RunBasicScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector256((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector256((SByte*)(_dataTable.inArray2Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector256), typeof(Vector256), typeof(Vector256) }) + .Invoke(null, new object[] { + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector256)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector256), typeof(Vector256), typeof(Vector256) }) + .Invoke(null, new object[] { + Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector256((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadVector256((SByte*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector256)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector256), typeof(Vector256), typeof(Vector256) }) + .Invoke(null, new object[] { + Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector256((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector256((SByte*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector256)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + _clsVar0, + _clsVar1, + _clsVar2 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var first = Unsafe.Read>(_dataTable.inArray0Ptr); + var second = Unsafe.Read>(_dataTable.inArray1Ptr); + var third = Unsafe.Read>(_dataTable.inArray2Ptr); + var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); + + var first= Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadVector256((Byte*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadVector256((SByte*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); + + var first = Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadAlignedVector256((Byte*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadAlignedVector256((SByte*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte(); + var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult(Vector256 addend, Vector256 left, Vector256 right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Byte[] inArray1 = new Byte[Op1ElementCount]; + SByte[] inArray2 = new SByte[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray0[0]), addend); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray1[0]), left); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray2[0]), right); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Byte[] inArray1 = new Byte[Op1ElementCount]; + SByte[] inArray2 = new SByte[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray0[0]), ref Unsafe.AsRef(addend), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray1[0]), ref Unsafe.AsRef(left), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray2[0]), ref Unsafe.AsRef(right), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(Int32[] addend, Byte[] left, SByte[] right, Int32[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + Int32[] outArray = new Int32[RetElementCount]; + + for (var i = 0; i < RetElementCount; i++) + { + int addend2 = right[i * 4 + 3] * left[i * 4 + 3] + right[i * 4 + 2] * left[i * 4 + 2] + right[i * 4 + 1] * left[i * 4 + 1] + right[i * 4] * left[i * 4]; + int value = addend[i] + addend2; + int tmp = (value & ~(addend2 | addend[i])) < 0 ? int.MaxValue : value; + int c = (~value & (addend2 & addend[i])) < 0 ? int.MinValue : tmp; + outArray[i] = c; + } + + for (var i = 0; i < RetElementCount; i++) + { + if (result[i] != outArray[i]) + { + succeeded = false; + break; + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAddSaturate)}(Vector256, Vector256): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" addend: ({string.Join(", ", addend)})"); + TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})"); + TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation($" valid: ({string.Join(", ", outArray)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAddSaturate.Int16.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAddSaturate.Int16.cs new file mode 100644 index 0000000000000..3c755d8e97ba9 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAddSaturate.Int16.cs @@ -0,0 +1,505 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.ComponentModel; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using System.Text.RegularExpressions; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + private static void MultiplyWideningAndAddSaturateInt16() + { + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + + // Validates basic functionality works, using LoadAligned + test.RunBasicScenario_LoadAligned(); + } + + else + { + Console.WriteLine("Avx Is Not Supported"); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); //TODO: this one does not work. Fix it. + + if (Avx.IsSupported) + { + // Validates calling via reflection works, using Load + test.RunReflectionScenario_Load(); + + // Validates calling via reflection works, using LoadAligned + test.RunReflectionScenario_LoadAligned(); + } + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates passing a local works, using Load + test.RunLclVarScenario_Load(); + + // Validates passing a local works, using LoadAligned + test.RunLclVarScenario_LoadAligned(); + } + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + Console.WriteLine("Test Is Not Supported"); + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16 + { + private struct DataTable + { + private byte[] inArray0; + private byte[] inArray1; + private byte[] inArray2; + private byte[] outArray; + + private GCHandle inHandle0; + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable(Int32[] inArray0, Int16[] inArray1, Int16[] inArray2, Int32[] outArray, int alignment) + { + int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf(); + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf(); + + if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alignment"); + } + + this.inArray0 = new byte[alignment * 2]; + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned); + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray0Ptr), ref Unsafe.As(ref inArray0[0]), (uint)sizeOfinArray0); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As(ref inArray2[0]), (uint)sizeOfinArray2); + } + + public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle0.Free(); + inHandle1.Free(); + inHandle2.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlighment) + { + return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1)); + } + } + private struct TestStruct + { + public Vector256 _fld0; + public Vector256 _fld1; + public Vector256 _fld2; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + return testStruct; + } + + public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16 testClass) + { + var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = 32; + + private static readonly int Op0ElementCount = Unsafe.SizeOf>() / sizeof(Int32); + private static readonly int Op1ElementCount = Unsafe.SizeOf>() / sizeof(Int16); + private static readonly int Op2ElementCount = Unsafe.SizeOf>() / sizeof(Int16); + private static readonly int RetElementCount = Unsafe.SizeOf>() / sizeof(Int32); + + private static Int32[] _data0 = new Int32[Op0ElementCount]; + private static Int16[] _data1 = new Int16[Op1ElementCount]; + private static Int16[] _data2 = new Int16[Op2ElementCount]; + + private static Vector256 _clsVar0; + private static Vector256 _clsVar1; + private static Vector256 _clsVar2; + + private Vector256 _fld0; + private Vector256 _fld1; + private Vector256 _fld2; + + private DataTable _dataTable; + + static SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16() + { + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + } + + public SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16() + { + Succeeded = true; + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); } + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt16(); } + _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => AvxVnni.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector256((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadVector256((Int16*)(_dataTable.inArray2Ptr))); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + + } + + public void RunBasicScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray2Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector256), typeof(Vector256), typeof(Vector256) }) + .Invoke(null, new object[] { + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector256)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector256), typeof(Vector256), typeof(Vector256) }) + .Invoke(null, new object[] { + Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector256((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadVector256((Int16*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector256)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector256), typeof(Vector256), typeof(Vector256) }) + .Invoke(null, new object[] { + Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector256)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + _clsVar0, + _clsVar1, + _clsVar2 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var first = Unsafe.Read>(_dataTable.inArray0Ptr); + var second = Unsafe.Read>(_dataTable.inArray1Ptr); + var third = Unsafe.Read>(_dataTable.inArray2Ptr); + var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); + + var first= Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadVector256((Int16*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadVector256((Int16*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); + + var first = Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16(); + var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult(Vector256 addend, Vector256 left, Vector256 right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Int16[] inArray1 = new Int16[Op1ElementCount]; + Int16[] inArray2 = new Int16[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray0[0]), addend); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray1[0]), left); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray2[0]), right); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Int16[] inArray1 = new Int16[Op1ElementCount]; + Int16[] inArray2 = new Int16[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray0[0]), ref Unsafe.AsRef(addend), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray1[0]), ref Unsafe.AsRef(left), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray2[0]), ref Unsafe.AsRef(right), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(Int32[] addend, Int16[] left, Int16[] right, Int32[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + Int32[] outArray = new Int32[RetElementCount]; + + for (var i = 0; i < RetElementCount; i++) + { + int addend2 = right[i * 2 + 1] * left[i * 2 + 1] + right[i * 2] * left[i * 2]; + int value = addend[i] + addend2; + int tmp = (value & ~(addend2 | addend[i])) < 0 ? int.MaxValue : value; + int c = (~value & (addend2 & addend[i])) < 0 ? int.MinValue: tmp; + outArray[i] = c; + } + + for (var i = 0; i < RetElementCount; i++) + { + if (result[i] != outArray[i]) + { + succeeded = false; + break; + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAddSaturate)}(Vector256, Vector256): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" addend: ({string.Join(", ", addend)})"); + TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})"); + TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation($" valid: ({string.Join(", ", outArray)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd_r.csproj new file mode 100644 index 0000000000000..721cd42651892 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd_r.csproj @@ -0,0 +1,22 @@ + + + Exe + true + + true + + true + + + Embedded + + + + + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd_ro.csproj new file mode 100644 index 0000000000000..58b4945788725 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd_ro.csproj @@ -0,0 +1,22 @@ + + + Exe + true + + true + + true + + + Embedded + True + + + + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/Program.AvxVnni.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/Program.AvxVnni.cs new file mode 100644 index 0000000000000..ff68d04e248e0 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/Program.AvxVnni.cs @@ -0,0 +1,21 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + static Program() + { + TestList = new Dictionary() { + ["MultiplyWideningAndAdd.Byte"] = MultiplyWideningAndAddByte, + ["MultiplyWideningAndAdd.Int16"] = MultiplyWideningAndAddInt16, + ["MultiplyWideningAndAddSaturate.Byte"] = MultiplyWideningAndAddSaturateByte, + ["MultiplyWideningAndAddSaturate.Int16"] = MultiplyWideningAndAddSaturateInt16, + }; + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd.Byte.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd.Byte.cs new file mode 100644 index 0000000000000..fe05ed09cfd86 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd.Byte.cs @@ -0,0 +1,515 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using System.Text.RegularExpressions; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + private static void MultiplyWideningAndAddByte() + { + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddByte(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + + // Validates basic functionality works, using LoadAligned + test.RunBasicScenario_LoadAligned(); + } + + else + { + Console.WriteLine("Avx Is Not Supported"); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); //TODO: this one does not work. Fix it. + + if (Avx.IsSupported) + { + // Validates calling via reflection works, using Load + test.RunReflectionScenario_Load(); + + // Validates calling via reflection works, using LoadAligned + test.RunReflectionScenario_LoadAligned(); + } + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates passing a local works, using Load + test.RunLclVarScenario_Load(); + + // Validates passing a local works, using LoadAligned + test.RunLclVarScenario_LoadAligned(); + } + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + Console.WriteLine("Test Is Not Supported"); + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddByte + { + private struct DataTable + { + private byte[] inArray0; + private byte[] inArray1; + private byte[] inArray2; + private byte[] outArray; + + private GCHandle inHandle0; + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable(Int32[] inArray0, Byte[] inArray1, SByte[] inArray2, Int32[] outArray, int alignment) + { + int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf(); + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf(); + + if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alighment"); + } + + this.inArray0 = new byte[alignment * 2]; + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned); + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray0Ptr), ref Unsafe.As(ref inArray0[0]), (uint)sizeOfinArray0); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As(ref inArray2[0]), (uint)sizeOfinArray2); + } + + public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle0.Free(); + inHandle1.Free(); + inHandle2.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlighment) + { + return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1)); + } + } + private struct TestStruct + { + public Vector128 _fld0; + public Vector128 _fld1; + public Vector128 _fld2; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + return testStruct; + } + + public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddByte testClass) + { + var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = 32; + + private static readonly int Op0ElementCount = Unsafe.SizeOf>() / sizeof(Int32); + private static readonly int Op1ElementCount = Unsafe.SizeOf>() / sizeof(Byte); + private static readonly int Op2ElementCount = Unsafe.SizeOf>() / sizeof(SByte); + private static readonly int RetElementCount = Unsafe.SizeOf>() / sizeof(Int32); + + private static Int32[] _data0 = new Int32[Op0ElementCount]; + private static Byte[] _data1 = new Byte[Op1ElementCount]; + private static SByte[] _data2 = new SByte[Op2ElementCount]; + + private static Vector128 _clsVar0; + private static Vector128 _clsVar1; + private static Vector128 _clsVar2; + + private Vector128 _fld0; + private Vector128 _fld1; + private Vector128 _fld2; + + private DataTable _dataTable; + + static SimpleTernaryOpTest__MultiplyWideningAndAddByte() + { + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + } + + public SimpleTernaryOpTest__MultiplyWideningAndAddByte() + { + Succeeded = true; + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetSByte(); } + _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => AvxVnni.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = AvxVnni.MultiplyWideningAndAdd( + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + var result1 = AvxVnni.MultiplyWideningAndAdd( + Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector128((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadVector128((SByte*)(_dataTable.inArray2Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result1); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); + + var result = AvxVnni.MultiplyWideningAndAdd( + Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector128((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector128((SByte*)(_dataTable.inArray2Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) }) + .Invoke(null, new object[] { + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector128)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) }) + .Invoke(null, new object[] { + Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector128((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadVector128((SByte*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector128)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) }) + .Invoke(null, new object[] { + Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector128((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector128((SByte*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector128)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = AvxVnni.MultiplyWideningAndAdd( + _clsVar0, + _clsVar1, + _clsVar2 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var first = Unsafe.Read>(_dataTable.inArray0Ptr); + var second = Unsafe.Read>(_dataTable.inArray1Ptr); + var third = Unsafe.Read>(_dataTable.inArray2Ptr); + var result = AvxVnni.MultiplyWideningAndAdd(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); + + var first= Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadVector128((Byte*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadVector128((SByte*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAdd(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); + + var first = Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadAlignedVector128((Byte*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadAlignedVector128((SByte*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAdd(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddByte(); + var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult(Vector128 addend, Vector128 left, Vector128 right, Vector128 result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Byte[] inArray1 = new Byte[Op1ElementCount]; + SByte[] inArray2 = new SByte[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray0[0]), addend); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray1[0]), left); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray2[0]), right); + Unsafe.WriteUnaligned(ref Unsafe.As(ref outArray[0]), result); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + private void ValidateResult(Vector128 addend, Vector128 left, Vector128 right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Byte[] inArray1 = new Byte[Op1ElementCount]; + SByte[] inArray2 = new SByte[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray0[0]), addend); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray1[0]), left); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray2[0]), right); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Byte[] inArray1 = new Byte[Op1ElementCount]; + SByte[] inArray2 = new SByte[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray0[0]), ref Unsafe.AsRef(addend), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray1[0]), ref Unsafe.AsRef(left), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray2[0]), ref Unsafe.AsRef(right), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(Int32[] addend, Byte[] left, SByte[] right, Int32[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + Int32[] outArray = new Int32[RetElementCount]; + + for (var i = 0; i < RetElementCount; i++) + { + outArray[i] = Math.Clamp((addend[i] + (right[i * 4 + 3] * left[i * 4 + 3] + right[i * 4 + 2] * left[i * 4 + 2]) + + (right[i * 4 + 1] * left[i * 4 + 1] + right[i * 4] * left[i * 4])), int.MinValue, int.MaxValue); + } + + for (var i = 0; i < RetElementCount; i++) + { + if (result[i] != outArray[i]) + { + succeeded = false; + break; + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAdd)}(Vector128, Vector128): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" addend: ({string.Join(", ", addend)})"); + TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})"); + TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation($" valid: ({string.Join(", ", outArray)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd.Int16.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd.Int16.cs new file mode 100644 index 0000000000000..8adf4e588dd92 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd.Int16.cs @@ -0,0 +1,500 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using System.Text.RegularExpressions; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + private static void MultiplyWideningAndAddInt16() + { + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddInt16(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + + // Validates basic functionality works, using LoadAligned + test.RunBasicScenario_LoadAligned(); + } + + else + { + Console.WriteLine("Avx Is Not Supported"); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates calling via reflection works, using Load + test.RunReflectionScenario_Load(); + + // Validates calling via reflection works, using LoadAligned + test.RunReflectionScenario_LoadAligned(); + } + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates passing a local works, using Load + test.RunLclVarScenario_Load(); + + // Validates passing a local works, using LoadAligned + test.RunLclVarScenario_LoadAligned(); + } + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + Console.WriteLine("Test Is Not Supported"); + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddInt16 + { + private struct DataTable + { + private byte[] inArray0; + private byte[] inArray1; + private byte[] inArray2; + private byte[] outArray; + + private GCHandle inHandle0; + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable(Int32[] inArray0, Int16[] inArray1, Int16[] inArray2, Int32[] outArray, int alignment) + { + int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf(); + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf(); + + if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alighment"); + } + + this.inArray0 = new byte[alignment * 2]; + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned); + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray0Ptr), ref Unsafe.As(ref inArray0[0]), (uint)sizeOfinArray0); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As(ref inArray2[0]), (uint)sizeOfinArray2); + } + + public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle0.Free(); + inHandle1.Free(); + inHandle2.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlighment) + { + return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1)); + } + } + private struct TestStruct + { + public Vector128 _fld0; + public Vector128 _fld1; + public Vector128 _fld2; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + return testStruct; + } + + public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddInt16 testClass) + { + var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = 32; + + private static readonly int Op0ElementCount = Unsafe.SizeOf>() / sizeof(Int32); + private static readonly int Op1ElementCount = Unsafe.SizeOf>() / sizeof(Int16); + private static readonly int Op2ElementCount = Unsafe.SizeOf>() / sizeof(Int16); + private static readonly int RetElementCount = Unsafe.SizeOf>() / sizeof(Int32); + + private static Int32[] _data0 = new Int32[Op0ElementCount]; + private static Int16[] _data1 = new Int16[Op1ElementCount]; + private static Int16[] _data2 = new Int16[Op2ElementCount]; + + private static Vector128 _clsVar0; + private static Vector128 _clsVar1; + private static Vector128 _clsVar2; + + private Vector128 _fld0; + private Vector128 _fld1; + private Vector128 _fld2; + + private DataTable _dataTable; + + static SimpleTernaryOpTest__MultiplyWideningAndAddInt16() + { + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + } + + public SimpleTernaryOpTest__MultiplyWideningAndAddInt16() + { + Succeeded = true; + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); } + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt16(); } + _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => AvxVnni.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = AvxVnni.MultiplyWideningAndAdd( + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + var result = AvxVnni.MultiplyWideningAndAdd( + Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector128((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadVector128((Int16*)(_dataTable.inArray2Ptr)) + ); + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + + } + + public void RunBasicScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); + + var result = AvxVnni.MultiplyWideningAndAdd( + Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray2Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) }) + .Invoke(null, new object[] { + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector128)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) }) + .Invoke(null, new object[] { + Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector128((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadVector128((Int16*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector128)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) }) + .Invoke(null, new object[] { + Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector128)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = AvxVnni.MultiplyWideningAndAdd( + _clsVar0, + _clsVar1, + _clsVar2 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var first = Unsafe.Read>(_dataTable.inArray0Ptr); + var second = Unsafe.Read>(_dataTable.inArray1Ptr); + var third = Unsafe.Read>(_dataTable.inArray2Ptr); + var result = AvxVnni.MultiplyWideningAndAdd(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); + + var first= Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadVector128((Int16*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadVector128((Int16*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAdd(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); + + var first = Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAdd(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddInt16(); + var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult(Vector128 addend, Vector128 left, Vector128 right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Int16[] inArray1 = new Int16[Op1ElementCount]; + Int16[] inArray2 = new Int16[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray0[0]), addend); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray1[0]), left); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray2[0]), right); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Int16[] inArray1 = new Int16[Op1ElementCount]; + Int16[] inArray2 = new Int16[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray0[0]), ref Unsafe.AsRef(addend), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray1[0]), ref Unsafe.AsRef(left), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray2[0]), ref Unsafe.AsRef(right), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(Int32[] addend, Int16[] left, Int16[] right, Int32[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + Int32[] outArray = new Int32[RetElementCount]; + + for (var i = 0; i < RetElementCount; i++) + { + outArray[i] = Math.Clamp((addend[i] + (right[i * 2 + 1] * left[i * 2 + 1] + right[i * 2] * left[i * 2])), int.MinValue, int.MaxValue); + } + + for (var i = 0; i < RetElementCount; i++) + { + if (result[i] != outArray[i]) + { + succeeded = false; + break; + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAdd)}(Vector128, Vector128): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" addend: ({string.Join(", ", addend)})"); + TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})"); + TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation($" valid: ({string.Join(", ", outArray)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAddSaturate.Byte.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAddSaturate.Byte.cs new file mode 100644 index 0000000000000..6b003ef003845 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAddSaturate.Byte.cs @@ -0,0 +1,503 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using System.Text.RegularExpressions; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + private static void MultiplyWideningAndAddSaturateByte() + { + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + + // Validates basic functionality works, using LoadAligned + test.RunBasicScenario_LoadAligned(); + } + + else + { + Console.WriteLine("Avx Is Not Supported"); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); //TODO: this one does not work. Fix it. + + if (Avx.IsSupported) + { + // Validates calling via reflection works, using Load + test.RunReflectionScenario_Load(); + + // Validates calling via reflection works, using LoadAligned + test.RunReflectionScenario_LoadAligned(); + } + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates passing a local works, using Load + test.RunLclVarScenario_Load(); + + // Validates passing a local works, using LoadAligned + test.RunLclVarScenario_LoadAligned(); + } + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + Console.WriteLine("Test Is Not Supported"); + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte + { + private struct DataTable + { + private byte[] inArray0; + private byte[] inArray1; + private byte[] inArray2; + private byte[] outArray; + + private GCHandle inHandle0; + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable(Int32[] inArray0, Byte[] inArray1, SByte[] inArray2, Int32[] outArray, int alignment) + { + int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf(); + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf(); + + if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alighment"); + } + + this.inArray0 = new byte[alignment * 2]; + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned); + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray0Ptr), ref Unsafe.As(ref inArray0[0]), (uint)sizeOfinArray0); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As(ref inArray2[0]), (uint)sizeOfinArray2); + } + + public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle0.Free(); + inHandle1.Free(); + inHandle2.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlighment) + { + return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1)); + } + } + private struct TestStruct + { + public Vector128 _fld0; + public Vector128 _fld1; + public Vector128 _fld2; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + return testStruct; + } + + public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte testClass) + { + var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = 32; + + private static readonly int Op0ElementCount = Unsafe.SizeOf>() / sizeof(Int32); + private static readonly int Op1ElementCount = Unsafe.SizeOf>() / sizeof(Byte); + private static readonly int Op2ElementCount = Unsafe.SizeOf>() / sizeof(SByte); + private static readonly int RetElementCount = Unsafe.SizeOf>() / sizeof(Int32); + + private static Int32[] _data0 = new Int32[Op0ElementCount]; + private static Byte[] _data1 = new Byte[Op1ElementCount]; + private static SByte[] _data2 = new SByte[Op2ElementCount]; + + private static Vector128 _clsVar0; + private static Vector128 _clsVar1; + private static Vector128 _clsVar2; + + private Vector128 _fld0; + private Vector128 _fld1; + private Vector128 _fld2; + + private DataTable _dataTable; + + static SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte() + { + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + } + + public SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte() + { + Succeeded = true; + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetSByte(); } + _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => AvxVnni.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector128((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadVector128((SByte*)(_dataTable.inArray2Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector128((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector128((SByte*)(_dataTable.inArray2Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) }) + .Invoke(null, new object[] { + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector128)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) }) + .Invoke(null, new object[] { + Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector128((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadVector128((SByte*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector128)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) }) + .Invoke(null, new object[] { + Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector128((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector128((SByte*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector128)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + _clsVar0, + _clsVar1, + _clsVar2 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var first = Unsafe.Read>(_dataTable.inArray0Ptr); + var second = Unsafe.Read>(_dataTable.inArray1Ptr); + var third = Unsafe.Read>(_dataTable.inArray2Ptr); + var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); + + var first= Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadVector128((Byte*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadVector128((SByte*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); + + var first = Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadAlignedVector128((Byte*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadAlignedVector128((SByte*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte(); + var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult(Vector128 addend, Vector128 left, Vector128 right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Byte[] inArray1 = new Byte[Op1ElementCount]; + SByte[] inArray2 = new SByte[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray0[0]), addend); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray1[0]), left); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray2[0]), right); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Byte[] inArray1 = new Byte[Op1ElementCount]; + SByte[] inArray2 = new SByte[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray0[0]), ref Unsafe.AsRef(addend), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray1[0]), ref Unsafe.AsRef(left), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray2[0]), ref Unsafe.AsRef(right), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(Int32[] addend, Byte[] left, SByte[] right, Int32[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + Int32[] outArray = new Int32[RetElementCount]; + + for (var i = 0; i < RetElementCount; i++) + { + int addend2 = right[i * 4 + 3] * left[i * 4 + 3] + right[i * 4 + 2] * left[i * 4 + 2] + right[i * 4 + 1] * left[i * 4 + 1] + right[i * 4] * left[i * 4]; + int value = addend[i] + addend2; + int tmp = (value & ~(addend2 | addend[i])) < 0 ? int.MaxValue : value; + int c = (~value & (addend2 & addend[i])) < 0 ? int.MinValue : tmp; + outArray[i] = c; + } + for (var i = 0; i < RetElementCount; i++) + { + if (result[i] != outArray[i]) + { + succeeded = false; + break; + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAddSaturate)}(Vector128, Vector128): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" addend: ({string.Join(", ", addend)})"); + TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})"); + TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation($" valid: ({string.Join(", ", outArray)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAddSaturate.Int16.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAddSaturate.Int16.cs new file mode 100644 index 0000000000000..a96951f4ace85 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAddSaturate.Int16.cs @@ -0,0 +1,503 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using System.Text.RegularExpressions; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + private static void MultiplyWideningAndAddSaturateInt16() + { + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + + // Validates basic functionality works, using LoadAligned + test.RunBasicScenario_LoadAligned(); + } + + else + { + Console.WriteLine("Avx Is Not Supported"); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); //TODO: this one does not work. Fix it. + + if (Avx.IsSupported) + { + // Validates calling via reflection works, using Load + test.RunReflectionScenario_Load(); + + // Validates calling via reflection works, using LoadAligned + test.RunReflectionScenario_LoadAligned(); + } + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates passing a local works, using Load + test.RunLclVarScenario_Load(); + + // Validates passing a local works, using LoadAligned + test.RunLclVarScenario_LoadAligned(); + } + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + Console.WriteLine("Test Is Not Supported"); + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16 + { + private struct DataTable + { + private byte[] inArray0; + private byte[] inArray1; + private byte[] inArray2; + private byte[] outArray; + + private GCHandle inHandle0; + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable(Int32[] inArray0, Int16[] inArray1, Int16[] inArray2, Int32[] outArray, int alignment) + { + int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf(); + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf(); + + if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alighment"); + } + + this.inArray0 = new byte[alignment * 2]; + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned); + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray0Ptr), ref Unsafe.As(ref inArray0[0]), (uint)sizeOfinArray0); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As(ref inArray2[0]), (uint)sizeOfinArray2); + } + + public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle0.Free(); + inHandle1.Free(); + inHandle2.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlighment) + { + return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1)); + } + } + private struct TestStruct + { + public Vector128 _fld0; + public Vector128 _fld1; + public Vector128 _fld2; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + return testStruct; + } + + public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16 testClass) + { + var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = 32; + + private static readonly int Op0ElementCount = Unsafe.SizeOf>() / sizeof(Int32); + private static readonly int Op1ElementCount = Unsafe.SizeOf>() / sizeof(Int16); + private static readonly int Op2ElementCount = Unsafe.SizeOf>() / sizeof(Int16); + private static readonly int RetElementCount = Unsafe.SizeOf>() / sizeof(Int32); + + private static Int32[] _data0 = new Int32[Op0ElementCount]; + private static Int16[] _data1 = new Int16[Op1ElementCount]; + private static Int16[] _data2 = new Int16[Op2ElementCount]; + + private static Vector128 _clsVar0; + private static Vector128 _clsVar1; + private static Vector128 _clsVar2; + + private Vector128 _fld0; + private Vector128 _fld1; + private Vector128 _fld2; + + private DataTable _dataTable; + + static SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16() + { + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + } + + public SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16() + { + Succeeded = true; + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); } + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt16(); } + _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => AvxVnni.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector128((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadVector128((Int16*)(_dataTable.inArray2Ptr)) + ); + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + + } + + public void RunBasicScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray2Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) }) + .Invoke(null, new object[] { + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector128)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) }) + .Invoke(null, new object[] { + Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector128((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadVector128((Int16*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector128)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) }) + .Invoke(null, new object[] { + Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector128)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + _clsVar0, + _clsVar1, + _clsVar2 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var first = Unsafe.Read>(_dataTable.inArray0Ptr); + var second = Unsafe.Read>(_dataTable.inArray1Ptr); + var third = Unsafe.Read>(_dataTable.inArray2Ptr); + var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); + + var first= Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadVector128((Int16*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadVector128((Int16*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); + + var first = Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16(); + var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult(Vector128 addend, Vector128 left, Vector128 right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Int16[] inArray1 = new Int16[Op1ElementCount]; + Int16[] inArray2 = new Int16[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray0[0]), addend); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray1[0]), left); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray2[0]), right); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Int16[] inArray1 = new Int16[Op1ElementCount]; + Int16[] inArray2 = new Int16[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray0[0]), ref Unsafe.AsRef(addend), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray1[0]), ref Unsafe.AsRef(left), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray2[0]), ref Unsafe.AsRef(right), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(Int32[] addend, Int16[] left, Int16[] right, Int32[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + Int32[] outArray = new Int32[RetElementCount]; + + for (var i = 0; i < RetElementCount; i++) + { + int addend2 = right[i * 2 + 1] * left[i * 2 + 1] + right[i * 2] * left[i * 2]; + int value = addend[i] + addend2; + int tmp = (value & ~(addend2 | addend[i])) < 0 ? int.MaxValue : value; + int c = (~value & (addend2 & addend[i])) < 0 ? int.MinValue : tmp; + outArray[i] = c; + } + for (var i = 0; i < RetElementCount; i++) + { + if (result[i] != outArray[i]) + { + succeeded = false; + break; + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAddSaturate)}(Vector128, Vector128): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" addend: ({string.Join(", ", addend)})"); + TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})"); + TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation($" valid: ({string.Join(", ", outArray)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd_r.csproj new file mode 100644 index 0000000000000..8274558730341 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd_r.csproj @@ -0,0 +1,22 @@ + + + Exe + true + + true + + true + + + Embedded + + + + + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd_ro.csproj new file mode 100644 index 0000000000000..669831c75815b --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd_ro.csproj @@ -0,0 +1,22 @@ + + + Exe + true + + true + + true + + + Embedded + True + + + + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/Program.AvxVnni_Vector128.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/Program.AvxVnni_Vector128.cs new file mode 100644 index 0000000000000..ff68d04e248e0 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/Program.AvxVnni_Vector128.cs @@ -0,0 +1,21 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + static Program() + { + TestList = new Dictionary() { + ["MultiplyWideningAndAdd.Byte"] = MultiplyWideningAndAddByte, + ["MultiplyWideningAndAdd.Int16"] = MultiplyWideningAndAddInt16, + ["MultiplyWideningAndAddSaturate.Byte"] = MultiplyWideningAndAddSaturateByte, + ["MultiplyWideningAndAddSaturate.Int16"] = MultiplyWideningAndAddSaturateInt16, + }; + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/Program.cs b/src/tests/JIT/HardwareIntrinsics/X86/Shared/Program.cs index 884eff7b4465c..1d772682f8644 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/Shared/Program.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/Program.cs @@ -72,6 +72,7 @@ private static void PrintSupportedIsa() TestLibrary.TestFramework.LogInformation($" AES: {Aes.IsSupported}"); TestLibrary.TestFramework.LogInformation($" AVX: {Avx.IsSupported}"); TestLibrary.TestFramework.LogInformation($" AVX2: {Avx2.IsSupported}"); + TestLibrary.TestFramework.LogInformation($" AVXVNNI: {AvxVnni.IsSupported}"); TestLibrary.TestFramework.LogInformation($" BMI1: {Bmi1.IsSupported}"); TestLibrary.TestFramework.LogInformation($" BMI2: {Bmi2.IsSupported}"); TestLibrary.TestFramework.LogInformation($" FMA: {Fma.IsSupported}"); From 00a0a0bf44bf9ce80c20b37bf4e94dff86547326 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Wed, 3 Mar 2021 21:35:24 -0800 Subject: [PATCH 02/10] Add support for AvxVnni instructions --- .../ref/System.Runtime.Intrinsics.cs | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index b0f2b804f8918..4ca3fafe7054a 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -3369,6 +3369,26 @@ protected internal X64() { } } } + [System.CLSCompliantAttribute(false)] + public abstract class AvxVnni : System.Runtime.Intrinsics.X86.Avx2 + { + internal AvxVnni() { } + public static new bool IsSupported { get { throw null; } } + public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public new abstract partial class X64 : System.Runtime.Intrinsics.X86.Avx2.X64 + { + internal X64() { } + public static new bool IsSupported { get { throw null; } } + } + } + [System.CLSCompliantAttribute(false)] public abstract partial class Bmi1 : System.Runtime.Intrinsics.X86.X86Base { From 83ef75d7dbb24da2754006a66935f221307f5861 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Tue, 11 May 2021 14:19:03 -0700 Subject: [PATCH 03/10] Add preveiw feature attribute --- .../src/System/Runtime/Intrinsics/X86/AvxVnni.cs | 2 ++ .../System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.cs index 8dcffed9384ed..ca2880228de77 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.cs @@ -2,11 +2,13 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Runtime.CompilerServices; +using System.Runtime.Versioning; namespace System.Runtime.Intrinsics.X86 { [Intrinsic] [CLSCompliant(false)] + [RequiresPreviewFeatures] public abstract class AvxVnni : Avx2 { internal AvxVnni() { } diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 4ca3fafe7054a..0f096f8175528 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -3,6 +3,7 @@ // ------------------------------------------------------------------------------ // Changes to this file must follow the https://aka.ms/api-review process. // ------------------------------------------------------------------------------ +using System.Runtime.Versioning; namespace System.Runtime.Intrinsics { @@ -3370,6 +3371,7 @@ protected internal X64() { } } [System.CLSCompliantAttribute(false)] + [RequiresPreviewFeatures] public abstract class AvxVnni : System.Runtime.Intrinsics.X86.Avx2 { internal AvxVnni() { } From 12fd6bc4ccbdd95aab2a45889ba9b2c03602a6ab Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Wed, 12 May 2021 11:42:22 -0700 Subject: [PATCH 04/10] Handle operands in lsra --- src/coreclr/jit/emitxarch.cpp | 7 +------ src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 6 ------ src/coreclr/jit/lsraxarch.cpp | 14 ++++++++++++++ 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 0b6e31924033a..835297c0df7a7 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -6256,7 +6256,7 @@ void emitter::emitIns_SIMD_R_R_R_C(instruction ins, void emitter::emitIns_SIMD_R_R_R_R( instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber op3Reg) { - if (IsFMAInstruction(ins)) + if (IsFMAInstruction(ins) || IsAVXVNNIInstruction(ins)) { assert(UseVEXEncoding()); @@ -6272,11 +6272,6 @@ void emitter::emitIns_SIMD_R_R_R_R( emitIns_R_R_R(ins, attr, targetReg, op2Reg, op3Reg); } - else if (IsAVXVNNIInstruction(ins)) - { - assert(UseVEXEncoding()); - emitIns_R_R_R(ins, attr, targetReg, op2Reg, op3Reg); - } else if (UseVEXEncoding()) { assert(isAvxBlendv(ins) || isSse41Blendv(ins)); diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 6aa19ce937945..11b57d8972ec8 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -340,12 +340,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(op1Reg != REG_NA); assert(op2Reg != REG_NA); - if (targetReg != op1Reg) - { - node->SetRegNum(op1Reg); - targetReg = op1Reg; - } - genHWIntrinsic_R_R_R_RM(ins, simdSize, targetReg, op1Reg, op2Reg, op3); break; } diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index ad750fad953c8..d47c652e2b59c 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2458,6 +2458,20 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) break; } + case NI_AVXVNNI_MultiplyWideningAndAdd: + case NI_AVXVNNI_MultiplyWideningAndAddSaturate: + { + assert(numArgs == 3); + + tgtPrefUse = BuildUse(op1); + srcCount += 1; + srcCount += BuildDelayFreeUses(op2, op1); + srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); + + buildUses = false; + break; + } + case NI_AVX2_GatherVector128: case NI_AVX2_GatherVector256: { From 31fd9cf9b1dc50da8e200409bbf19c6f172749d2 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Wed, 12 May 2021 21:20:53 -0700 Subject: [PATCH 05/10] Undo changes for Experimental --- .../X86/Avx2.PlatformNotSupported.cs | 4 +- .../src/System/Runtime/Intrinsics/X86/Avx2.cs | 4 +- .../Directory.Build.props | 6 -- ...System.Runtime.Intrinsics.Experimental.sln | 85 ------------------- .../System.Runtime.Intrinsics.Experimental.cs | 29 ------- ...tem.Runtime.Intrinsics.Experimental.csproj | 15 ---- ...tem.Runtime.Intrinsics.Experimental.csproj | 10 --- .../ref/System.Runtime.Intrinsics.cs | 4 +- src/libraries/pkg/baseline/packageIndex.json | 8 -- 9 files changed, 6 insertions(+), 159 deletions(-) delete mode 100644 src/libraries/System.Runtime.Intrinsics.Experimental/Directory.Build.props delete mode 100644 src/libraries/System.Runtime.Intrinsics.Experimental/System.Runtime.Intrinsics.Experimental.sln delete mode 100644 src/libraries/System.Runtime.Intrinsics.Experimental/ref/System.Runtime.Intrinsics.Experimental.cs delete mode 100644 src/libraries/System.Runtime.Intrinsics.Experimental/ref/System.Runtime.Intrinsics.Experimental.csproj delete mode 100644 src/libraries/System.Runtime.Intrinsics.Experimental/src/System.Runtime.Intrinsics.Experimental.csproj diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.PlatformNotSupported.cs index 2d72673b09260..28ae4c77ff67a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.PlatformNotSupported.cs @@ -13,13 +13,13 @@ namespace System.Runtime.Intrinsics.X86 [CLSCompliant(false)] public abstract class Avx2 : Avx { - protected internal Avx2() { } + internal Avx2() { } public static new bool IsSupported { [Intrinsic] get { return false; } } public new abstract class X64 : Avx.X64 { - protected internal X64() { } + internal X64() { } public static new bool IsSupported { [Intrinsic] get { return false; } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.cs index 5837efbba72f7..c6ab66867c1c2 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.cs @@ -12,14 +12,14 @@ namespace System.Runtime.Intrinsics.X86 [CLSCompliant(false)] public abstract class Avx2 : Avx { - protected internal Avx2() { } + internal Avx2() { } public static new bool IsSupported { get => IsSupported; } [Intrinsic] public new abstract class X64 : Avx.X64 { - protected internal X64() { } + internal X64() { } public static new bool IsSupported { get => IsSupported; } } diff --git a/src/libraries/System.Runtime.Intrinsics.Experimental/Directory.Build.props b/src/libraries/System.Runtime.Intrinsics.Experimental/Directory.Build.props deleted file mode 100644 index ba1f965d83cae..0000000000000 --- a/src/libraries/System.Runtime.Intrinsics.Experimental/Directory.Build.props +++ /dev/null @@ -1,6 +0,0 @@ - - - - Open - - diff --git a/src/libraries/System.Runtime.Intrinsics.Experimental/System.Runtime.Intrinsics.Experimental.sln b/src/libraries/System.Runtime.Intrinsics.Experimental/System.Runtime.Intrinsics.Experimental.sln deleted file mode 100644 index 87a1b5f0fe163..0000000000000 --- a/src/libraries/System.Runtime.Intrinsics.Experimental/System.Runtime.Intrinsics.Experimental.sln +++ /dev/null @@ -1,85 +0,0 @@ -Microsoft Visual Studio Solution File, Format Version 12.00 -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Private.CoreLib", "..\..\coreclr\System.Private.CoreLib\System.Private.CoreLib.csproj", "{5965CFFE-886A-418C-854F-5967D91DE914}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Runtime.Intrinsics.Experimental", "ref\System.Runtime.Intrinsics.Experimental.csproj", "{28B808CE-B1F8-4B05-9ADA-8884525BD87F}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Runtime.Intrinsics.Experimental", "src\System.Runtime.Intrinsics.Experimental.csproj", "{5AD79501-BEA5-48C7-B466-021A9DCB9D5C}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{47B32900-BEBD-49E4-A54B-7A4BC87F0E68}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ref", "ref", "{65331E4B-EF91-413C-984B-51D110CE5AC6}" -EndProject -Global - GlobalSection(NestedProjects) = preSolution - {5965CFFE-886A-418C-854F-5967D91DE914} = {47B32900-BEBD-49E4-A54B-7A4BC87F0E68} - {5AD79501-BEA5-48C7-B466-021A9DCB9D5C} = {47B32900-BEBD-49E4-A54B-7A4BC87F0E68} - {28B808CE-B1F8-4B05-9ADA-8884525BD87F} = {65331E4B-EF91-413C-984B-51D110CE5AC6} - EndGlobalSection - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Any CPU = Debug|Any CPU - Debug|x64 = Debug|x64 - Debug|x86 = Debug|x86 - Release|Any CPU = Release|Any CPU - Release|x64 = Release|x64 - Release|x86 = Release|x86 - Checked|Any CPU = Checked|Any CPU - Checked|x64 = Checked|x64 - Checked|x86 = Checked|x86 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {5965CFFE-886A-418C-854F-5967D91DE914}.Debug|Any CPU.ActiveCfg = Debug|x64 - {5965CFFE-886A-418C-854F-5967D91DE914}.Debug|Any CPU.Build.0 = Debug|x64 - {5965CFFE-886A-418C-854F-5967D91DE914}.Debug|x64.ActiveCfg = Debug|x64 - {5965CFFE-886A-418C-854F-5967D91DE914}.Debug|x64.Build.0 = Debug|x64 - {5965CFFE-886A-418C-854F-5967D91DE914}.Debug|x86.ActiveCfg = Debug|x86 - {5965CFFE-886A-418C-854F-5967D91DE914}.Debug|x86.Build.0 = Debug|x86 - {5965CFFE-886A-418C-854F-5967D91DE914}.Release|Any CPU.ActiveCfg = Release|x64 - {5965CFFE-886A-418C-854F-5967D91DE914}.Release|Any CPU.Build.0 = Release|x64 - {5965CFFE-886A-418C-854F-5967D91DE914}.Release|x64.ActiveCfg = Release|x64 - {5965CFFE-886A-418C-854F-5967D91DE914}.Release|x64.Build.0 = Release|x64 - {5965CFFE-886A-418C-854F-5967D91DE914}.Release|x86.ActiveCfg = Release|x86 - {5965CFFE-886A-418C-854F-5967D91DE914}.Release|x86.Build.0 = Release|x86 - {5965CFFE-886A-418C-854F-5967D91DE914}.Checked|Any CPU.ActiveCfg = Checked|x64 - {5965CFFE-886A-418C-854F-5967D91DE914}.Checked|Any CPU.Build.0 = Checked|x64 - {5965CFFE-886A-418C-854F-5967D91DE914}.Checked|x64.ActiveCfg = Checked|x64 - {5965CFFE-886A-418C-854F-5967D91DE914}.Checked|x64.Build.0 = Checked|x64 - {5965CFFE-886A-418C-854F-5967D91DE914}.Checked|x86.ActiveCfg = Checked|x86 - {5965CFFE-886A-418C-854F-5967D91DE914}.Checked|x86.Build.0 = Checked|x86 - {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Debug|Any CPU.Build.0 = Debug|Any CPU - {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Debug|x64.ActiveCfg = Debug|Any CPU - {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Debug|x64.Build.0 = Debug|Any CPU - {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Debug|x86.ActiveCfg = Debug|Any CPU - {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Debug|x86.Build.0 = Debug|Any CPU - {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Release|Any CPU.ActiveCfg = Release|Any CPU - {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Release|Any CPU.Build.0 = Release|Any CPU - {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Release|x64.ActiveCfg = Release|Any CPU - {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Release|x64.Build.0 = Release|Any CPU - {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Release|x86.ActiveCfg = Release|Any CPU - {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Release|x86.Build.0 = Release|Any CPU - {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Checked|Any CPU.ActiveCfg = Debug|Any CPU - {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Checked|x64.ActiveCfg = Debug|Any CPU - {28B808CE-B1F8-4B05-9ADA-8884525BD87F}.Checked|x86.ActiveCfg = Debug|Any CPU - {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Debug|Any CPU.Build.0 = Debug|Any CPU - {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Debug|x64.ActiveCfg = Debug|Any CPU - {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Debug|x64.Build.0 = Debug|Any CPU - {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Debug|x86.ActiveCfg = Debug|Any CPU - {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Debug|x86.Build.0 = Debug|Any CPU - {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Release|Any CPU.ActiveCfg = Release|Any CPU - {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Release|Any CPU.Build.0 = Release|Any CPU - {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Release|x64.ActiveCfg = Release|Any CPU - {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Release|x64.Build.0 = Release|Any CPU - {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Release|x86.ActiveCfg = Release|Any CPU - {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Release|x86.Build.0 = Release|Any CPU - {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Checked|Any CPU.ActiveCfg = Debug|Any CPU - {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Checked|x64.ActiveCfg = Debug|Any CPU - {5AD79501-BEA5-48C7-B466-021A9DCB9D5C}.Checked|x86.ActiveCfg = Debug|Any CPU - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {9205DA5F-88A2-4045-9B31-9CC53CCF7550} - EndGlobalSection -EndGlobal diff --git a/src/libraries/System.Runtime.Intrinsics.Experimental/ref/System.Runtime.Intrinsics.Experimental.cs b/src/libraries/System.Runtime.Intrinsics.Experimental/ref/System.Runtime.Intrinsics.Experimental.cs deleted file mode 100644 index 5e46973b0c6a0..0000000000000 --- a/src/libraries/System.Runtime.Intrinsics.Experimental/ref/System.Runtime.Intrinsics.Experimental.cs +++ /dev/null @@ -1,29 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// ------------------------------------------------------------------------------ -// Changes to this file must follow the https://aka.ms/api-review process. -// ------------------------------------------------------------------------------ - -namespace System.Runtime.Intrinsics.X86 -{ - [System.CLSCompliantAttribute(false)] - public abstract partial class AvxVnni : System.Runtime.Intrinsics.X86.Avx2 - { - internal AvxVnni() { } - public static new bool IsSupported { get { throw null; } } - public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } - public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } - public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } - public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } - public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } - public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } - public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } - public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } - public new abstract partial class X64 : System.Runtime.Intrinsics.X86.Avx2.X64 - { - internal X64() { } - public static new bool IsSupported { get { throw null; } } - } - } - -} diff --git a/src/libraries/System.Runtime.Intrinsics.Experimental/ref/System.Runtime.Intrinsics.Experimental.csproj b/src/libraries/System.Runtime.Intrinsics.Experimental/ref/System.Runtime.Intrinsics.Experimental.csproj deleted file mode 100644 index e775a65cc70b8..0000000000000 --- a/src/libraries/System.Runtime.Intrinsics.Experimental/ref/System.Runtime.Intrinsics.Experimental.csproj +++ /dev/null @@ -1,15 +0,0 @@ - - - true - $(NetCoreAppCurrent) - enable - - - - - - - - - - \ No newline at end of file diff --git a/src/libraries/System.Runtime.Intrinsics.Experimental/src/System.Runtime.Intrinsics.Experimental.csproj b/src/libraries/System.Runtime.Intrinsics.Experimental/src/System.Runtime.Intrinsics.Experimental.csproj deleted file mode 100644 index dd3dc2940a54b..0000000000000 --- a/src/libraries/System.Runtime.Intrinsics.Experimental/src/System.Runtime.Intrinsics.Experimental.csproj +++ /dev/null @@ -1,10 +0,0 @@ - - - true - $(NetCoreAppCurrent) - enable - - - - - \ No newline at end of file diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 0f096f8175528..6ad7fc3eb9842 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -2968,7 +2968,7 @@ internal X64() { } [System.CLSCompliantAttribute(false)] public abstract partial class Avx2 : System.Runtime.Intrinsics.X86.Avx { - protected internal Avx2() { } + internal Avx2() { } public static new bool IsSupported { get { throw null; } } public static System.Runtime.Intrinsics.Vector256 Abs(System.Runtime.Intrinsics.Vector256 value) { throw null; } public static System.Runtime.Intrinsics.Vector256 Abs(System.Runtime.Intrinsics.Vector256 value) { throw null; } @@ -3365,7 +3365,7 @@ public unsafe static void MaskStore(ulong* address, System.Runtime.Intrinsics.Ve public static System.Runtime.Intrinsics.Vector256 Xor(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } public new abstract partial class X64 : System.Runtime.Intrinsics.X86.Avx.X64 { - protected internal X64() { } + internal X64() { } public static new bool IsSupported { get { throw null; } } } } diff --git a/src/libraries/pkg/baseline/packageIndex.json b/src/libraries/pkg/baseline/packageIndex.json index 9afe37638cd13..e9d4d02764470 100644 --- a/src/libraries/pkg/baseline/packageIndex.json +++ b/src/libraries/pkg/baseline/packageIndex.json @@ -5661,14 +5661,6 @@ "net5.0": "5.0.0.0" } }, - "System.Runtime.Intrinsics.Experimental":{ - "BaselineVersion": "6.0.0", - "InboxOn": {}, - "AssemblyVersionInPackageVersion": { - "5.0.0.0": "5.0.0", - "6.0.0.0": "6.0.0" - } - }, "System.Runtime.Loader": { "StableVersions": [ "4.0.0", From 35bdb57eeee6713e4d2cc0a9044a4c54c9f679ca Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Thu, 13 May 2021 16:03:38 -0700 Subject: [PATCH 06/10] Update JITEEVersionIdentifier and fix remaining issues --- src/coreclr/inc/jiteeversionguid.h | 12 ++++++------ src/coreclr/jit/emitxarch.cpp | 8 ++++---- .../X86/AvxVnni.PlatformNotSupported.cs | 18 ++++++++++-------- .../System/Runtime/Intrinsics/X86/AvxVnni.cs | 16 ++++++++-------- 4 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index b32d0edef000b..25746b4599294 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,12 +43,12 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* 12234eca-dfc2-48bc-a320-6155cf25ce17 */ - 0x12234eca, - 0xdfc2, - 0x48bc, - {0xa3, 0x20, 0x61, 0x55, 0xcf, 0x25, 0xce, 0x17} -}; +constexpr GUID JITEEVersionIdentifier = { /* 1052f490-cad7-4610-99bb-6f2bd91a1d19 */ + 0x1052f490, + 0xcad7, + 0x4610, + {0x99, 0xbb, 0x6f, 0x2b, 0xd9, 0x1a, 0x1d, 0x19} + }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// // diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 835297c0df7a7..710b7a11edad3 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -15396,10 +15396,10 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_vfnmsub132ss: case INS_vfnmsub213ss: case INS_vfnmsub231ss: - case INS_vpdpbusd: //will be populated when the HW becomes publicly available - case INS_vpdpwssd: //will be populated when the HW becomes publicly available - case INS_vpdpbusds: //will be populated when the HW becomes publicly available - case INS_vpdpwssds: //will be populated when the HW becomes publicly available + case INS_vpdpbusd: // will be populated when the HW becomes publicly available + case INS_vpdpwssd: // will be populated when the HW becomes publicly available + case INS_vpdpbusds: // will be populated when the HW becomes publicly available + case INS_vpdpwssds: // will be populated when the HW becomes publicly available // uops.info result.insThroughput = PERFSCORE_THROUGHPUT_2X; result.insLatency += PERFSCORE_LATENCY_4C; diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.PlatformNotSupported.cs index d01010413de3c..2edfd97a518ad 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.PlatformNotSupported.cs @@ -2,10 +2,12 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Runtime.CompilerServices; +using System.Runtime.Versioning; namespace System.Runtime.Intrinsics.X86 { [CLSCompliant(false)] + [RequiresPreviewFeatures] public abstract class AvxVnni : Avx2 { internal AvxVnni() { } @@ -21,49 +23,49 @@ internal X64() { } /// /// __m128i _mm_dpbusd_epi32 (__m128i src, __m128i a, __m128i b) - /// VPDPBUSD xmm, xmm, xmm + /// VPDPBUSD xmm, xmm, xmm/m128 /// public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_dpwssd_epi32 (__m128i src, __m128i a, __m128i b) - /// VPDPWSSD xmm, xmm, xmm + /// VPDPWSSD xmm, xmm, xmm/m128 /// public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_dpbusd_epi32 (__m256i src, __m256i a, __m256i b) - /// VPDPBUSD ymm, ymm, ymm + /// VPDPBUSD ymm, ymm, ymm/m256 /// public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_dpwssd_epi32 (__m256i src, __m256i a, __m256i b) - /// VPDPWSSD ymm, ymm, ymm + /// VPDPWSSD ymm, ymm, ymm/m256 /// public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_dpbusds_epi32 (__m128i src, __m128i a, __m128i b) - /// VPDPBUSDS xmm, xmm, xmm + /// VPDPBUSDS xmm, xmm, xmm/m128 /// public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m128i _mm_dpwssds_epi32 (__m128i src, __m128i a, __m128i b) - /// VPDPWSSDS xmm, xmm, xmm + /// VPDPWSSDS xmm, xmm, xmm/m128 /// public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_dpbusds_epi32 (__m256i src, __m256i a, __m256i b) - /// VPDPBUSDS ymm, ymm, ymm + /// VPDPBUSDS ymm, ymm, ymm/m256 /// public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } /// /// __m256i _mm256_dpwssds_epi32 (__m256i src, __m256i a, __m256i b) - /// VPDPWSSDS ymm, ymm, ymm + /// VPDPWSSDS ymm, ymm, ymm/m256 /// public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.cs index ca2880228de77..d6bb750ebc89a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.cs @@ -25,49 +25,49 @@ internal X64() { } /// /// __m128i _mm_dpbusd_epi32 (__m128i src, __m128i a, __m128i b) - /// VPDPBUSD xmm, xmm, xmm + /// VPDPBUSD xmm, xmm, xmm/m128 /// public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAdd(addend, left, right); /// /// __m128i _mm_dpwssd_epi32 (__m128i src, __m128i a, __m128i b) - /// VPDPWSSD xmm, xmm, xmm + /// VPDPWSSD xmm, xmm, xmm/m128 /// public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAdd(addend, left, right); /// /// __m256i _mm256_dpbusd_epi32 (__m256i src, __m256i a, __m256i b) - /// VPDPBUSD ymm, ymm, ymm + /// VPDPBUSD ymm, ymm, ymm/m256 /// public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAdd(addend, left, right); /// /// __m256i _mm256_dpwssd_epi32 (__m256i src, __m256i a, __m256i b) - /// VPDPWSSD ymm, ymm, ymm + /// VPDPWSSD ymm, ymm, ymm/m256 /// public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAdd(addend, left, right); /// /// __m128i _mm_dpbusds_epi32 (__m128i src, __m128i a, __m128i b) - /// VPDPBUSDS xmm, xmm, xmm + /// VPDPBUSDS xmm, xmm, xmm/m128 /// public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAddSaturate(addend, left, right); /// /// __m128i _mm_dpwssds_epi32 (__m128i src, __m128i a, __m128i b) - /// VPDPWSSDS xmm, xmm, xmm + /// VPDPWSSDS xmm, xmm, xmm/m128 /// public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAddSaturate(addend, left, right); /// /// __m256i _mm256_dpbusds_epi32 (__m256i src, __m256i a, __m256i b) - /// VPDPBUSDS ymm, ymm, ymm + /// VPDPBUSDS ymm, ymm, ymm/m256 /// public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAddSaturate(addend, left, right); /// /// __m256i _mm256_dpwssds_epi32 (__m256i src, __m256i a, __m256i b) - /// VPDPWSSDS ymm, ymm, ymm + /// VPDPWSSDS ymm, ymm, ymm/m256 /// public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAddSaturate(addend, left, right); } From e5568b6679162d11577f071d2fbbff399753136a Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Thu, 20 May 2021 14:25:31 -0700 Subject: [PATCH 07/10] Resolve Mono CI failure --- src/mono/mono/mini/simd-intrinsics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 53df6cb4f6b74..40df6910a9181 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -2082,6 +2082,7 @@ static const IntrinGroup supported_x86_intrinsics [] = { { "Aes", MONO_CPU_X86_AES, aes_methods, sizeof (aes_methods) }, { "Avx", MONO_CPU_X86_AVX, unsupported, sizeof (unsupported) }, { "Avx2", MONO_CPU_X86_AVX2, unsupported, sizeof (unsupported) }, + { "AvxVnni", 0, unsupported, sizeof (unsupported) }, { "Bmi1", MONO_CPU_X86_BMI1, bmi1_methods, sizeof (bmi1_methods) }, { "Bmi2", MONO_CPU_X86_BMI2, bmi2_methods, sizeof (bmi2_methods) }, { "Fma", MONO_CPU_X86_FMA, unsupported, sizeof (unsupported) }, From e10d64165b607d9ecf5d4369c86477a0f4ddeea5 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Fri, 21 May 2021 17:12:08 -0700 Subject: [PATCH 08/10] Disable tests --- src/tests/issues.targets | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/tests/issues.targets b/src/tests/issues.targets index a0c2d9a4ef4f1..3bc9f1630691e 100644 --- a/src/tests/issues.targets +++ b/src/tests/issues.targets @@ -1028,6 +1028,9 @@ https://github.com/dotnet/runtime/issues/48190 + + Mono crashes when new unsupported intrinsic groups are added, https://github.com/dotnet/runtime/issues/53078 + Mono doesn't have a dynamic pgo or tiered compilation infrastructure From 297f85565a1573fb075e0b88a903d924d497e080 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Fri, 21 May 2021 17:21:17 -0700 Subject: [PATCH 09/10] Disable Vector128 tests --- src/tests/issues.targets | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/tests/issues.targets b/src/tests/issues.targets index 3bc9f1630691e..80518bd8dc9f7 100644 --- a/src/tests/issues.targets +++ b/src/tests/issues.targets @@ -1031,6 +1031,9 @@ Mono crashes when new unsupported intrinsic groups are added, https://github.com/dotnet/runtime/issues/53078 + + Mono crashes when new unsupported intrinsic groups are added, https://github.com/dotnet/runtime/issues/53078 + Mono doesn't have a dynamic pgo or tiered compilation infrastructure From eebd83b8c14775dd665be1525cc85b5395b9ccbe Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Tue, 25 May 2021 15:19:00 -0700 Subject: [PATCH 10/10] Modify disable tests --- src/tests/issues.targets | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/issues.targets b/src/tests/issues.targets index 80518bd8dc9f7..de692d3feae0d 100644 --- a/src/tests/issues.targets +++ b/src/tests/issues.targets @@ -1028,10 +1028,10 @@ https://github.com/dotnet/runtime/issues/48190 - + Mono crashes when new unsupported intrinsic groups are added, https://github.com/dotnet/runtime/issues/53078 - + Mono crashes when new unsupported intrinsic groups are added, https://github.com/dotnet/runtime/issues/53078