-
Notifications
You must be signed in to change notification settings - Fork 4.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[API Implementation]: Extend System.Runtime.Intrinsics.X86 to support nint and nuint #63594
Conversation
…8Single` and `ConvertToNIntWithTruncation` (SSE)
Note regarding the This serves as a reminder for when your PR is modifying a ref *.cs file and adding/modifying public APIs, to please make sure the API implementation in the src *.cs file is documented with triple slash comments, so the PR reviewers can sign off that change. |
Tagging subscribers to this area: @dotnet/area-system-runtime-intrinsics Issue DetailsFixes #52021 Proposalnamespace System.Runtime.Intrinsics.X86
{
public abstract partial class Sse
{
public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, nint value);
public static nint ConvertToNInt(Vector128<float> value);
public static nint ConvertToNIntWithTruncation(Vector128<float> value);
}
public abstract partial class Sse2
{
public static Vector128<double> ConvertScalarToVector128Double(Vector128<double> upper, nint value);
public static Vector128<nint> ConvertScalarToVector128NInt(nint value);
public static Vector128<nuint> ConvertScalarToVector128NUInt(nuint value);
public static nint ConvertToNInt(Vector128<double> value);
public static nint ConvertToNIntWithTruncation(Vector128<double> value);
public static nint ConvertToNInt(Vector128<nint> value);
public static nuint ConvertToNUInt(Vector128<nuint> value);
public static unsafe void StoreNonTemporal(nint* address, nint value);
public static unsafe void StoreNonTemporal(nuint* address, nuint value);
public static Vector128<nint> Add(Vector128<nint> left, Vector128<nint> right);
public static Vector128<nuint> Add(Vector128<nuint> left, Vector128<nuint> right);
public static Vector128<nint> And(Vector128<nint> left, Vector128<nint> right);
public static Vector128<nuint> And(Vector128<nuint> left, Vector128<nuint> right);
public static Vector128<nint> AndNot(Vector128<nint> left, Vector128<nint> right);
public static Vector128<nuint> AndNot(Vector128<nuint> left, Vector128<nuint> right);
public static unsafe Vector128<nint> LoadVector128(nint* address);
public static unsafe Vector128<nuint> LoadVector128(nuint* address);
public static unsafe Vector128<nint> LoadAlignedVector128(nint* address);
public static unsafe Vector128<nuint> LoadAlignedVector128(nuint* address);
public static unsafe Vector128<nint> LoadScalarVector128(nint* address);
public static unsafe Vector128<nuint> LoadScalarVector128(nuint* address);
public static Vector128<nint> MoveScalar(Vector128<nint> value);
public static Vector128<nuint> MoveScalar(Vector128<nuint> value);
public static Vector128<nint> Or(Vector128<nint> left, Vector128<nint> right);
public static Vector128<nuint> Or(Vector128<nuint> left, Vector128<nuint> right);
public static Vector128<nint> ShiftLeftLogical(Vector128<nint> value, Vector128<nint> count);
public static Vector128<nuint> ShiftLeftLogical(Vector128<nuint> value, Vector128<nuint> count);
public static Vector128<nint> ShiftLeftLogical(Vector128<nint> value, byte count);
public static Vector128<nuint> ShiftLeftLogical(Vector128<nuint> value, byte count);
public static Vector128<nint> ShiftLeftLogical128BitLane(Vector128<nint> value, byte numBytes);
public static Vector128<nuint> ShiftLeftLogical128BitLane(Vector128<nuint> value, byte numBytes);
public static Vector128<nint> ShiftRightLogical(Vector128<nint> value, Vector128<nint> count);
public static Vector128<nuint> ShiftRightLogical(Vector128<nuint> value, Vector128<nuint> count);
public static Vector128<nint> ShiftRightLogical(Vector128<nint> value, byte count);
public static Vector128<nuint> ShiftRightLogical(Vector128<nuint> value, byte count);
public static Vector128<nint> ShiftRightLogical128BitLane(Vector128<nint> value, byte numBytes);
public static Vector128<nuint> ShiftRightLogical128BitLane(Vector128<nuint> value, byte numBytes);
public static unsafe void StoreScalar(nint* address, Vector128<nint> source);
public static unsafe void StoreScalar(nuint* address, Vector128<nuint> source);
public static unsafe void StoreAligned(nint* address, Vector128<nint> source);
public static unsafe void StoreAligned(nuint* address, Vector128<nuint> source);
public static unsafe void StoreAlignedNonTemporal(nint* address, Vector128<nint> source);
public static unsafe void StoreAlignedNonTemporal(nuint* address, Vector128<nuint> source);
public static unsafe void Store(nint* address, Vector128<nint> source);
public static unsafe void Store(nuint* address, Vector128<nuint> source);
public static Vector128<nint> Subtract(Vector128<nint> left, Vector128<nint> right);
public static Vector128<nuint> Subtract(Vector128<nuint> left, Vector128<nuint> right);
public static Vector128<nint> UnpackHigh(Vector128<nint> left, Vector128<nint> right);
public static Vector128<nuint> UnpackHigh(Vector128<nuint> left, Vector128<nuint> right);
public static Vector128<nint> UnpackLow(Vector128<nint> left, Vector128<nint> right);
public static Vector128<nuint> UnpackLow(Vector128<nuint> left, Vector128<nuint> right);
public static Vector128<nint> Xor(Vector128<nint> left, Vector128<nint> right);
public static Vector128<nuint> Xor(Vector128<nuint> left, Vector128<nuint> right);
}
public abstract partial class Sse3
{
public static unsafe Vector128<nint> LoadDquVector128(nint* address);
public static unsafe Vector128<nuint> LoadDquVector128(nuint* address);
}
public abstract partial class Ssse3
{
public static Vector128<nint> AlignRight(Vector128<nint> left, Vector128<nint> right, byte mask);
public static Vector128<nuint> AlignRight(Vector128<nuint> left, Vector128<nuint> right, byte mask);
}
public abstract partial class Sse41
{
public static nint Extract(Vector128<nint> value, byte index);
public static nuint Extract(Vector128<nuint> value, byte index);
public static Vector128<nint> Insert(Vector128<nint> value, nint data, byte index);
public static Vector128<nuint> Insert(Vector128<nuint> value, nuint data, byte index);
public static Vector128<nint> BlendVariable(Vector128<nint> left, Vector128<nint> right, Vector128<nint> mask);
public static Vector128<nuint> BlendVariable(Vector128<nuint> left, Vector128<nuint> right, Vector128<nuint> mask);
public static Vector128<nint> CompareEqual(Vector128<nint> left, Vector128<nint> right);
public static Vector128<nuint> CompareEqual(Vector128<nuint> left, Vector128<nuint> right);
public static Vector128<nint> ConvertToVector128NInt(Vector128<sbyte> value);
public static Vector128<nint> ConvertToVector128NInt(Vector128<byte> value);
public static Vector128<nint> ConvertToVector128NInt(Vector128<short> value);
public static Vector128<nint> ConvertToVector128NInt(Vector128<ushort> value);
public static Vector128<nint> ConvertToVector128NInt(Vector128<int> value);
public static Vector128<nint> ConvertToVector128NInt(Vector128<uint> value);
public static unsafe Vector128<nint> ConvertToVector128NInt(sbyte* address);
public static unsafe Vector128<nint> ConvertToVector128NInt(byte* address);
public static unsafe Vector128<nint> ConvertToVector128NInt(short* address);
public static unsafe Vector128<nint> ConvertToVector128NInt(ushort* address);
public static unsafe Vector128<nint> ConvertToVector128NInt(int* address);
public static unsafe Vector128<nint> ConvertToVector128NInt(uint* address);
public static Vector128<nint> Multiply(Vector128<int> left, Vector128<int> right);
public static unsafe Vector128<nint> LoadAlignedVector128NonTemporal(nint* address);
public static unsafe Vector128<nuint> LoadAlignedVector128NonTemporal(nuint* address);
public static bool TestC(Vector128<nint> left, Vector128<nint> right);
public static bool TestC(Vector128<nuint> left, Vector128<nuint> right);
public static bool TestNotZAndNotC(Vector128<nint> left, Vector128<nint> right);
public static bool TestNotZAndNotC(Vector128<nuint> left, Vector128<nuint> right);
public static bool TestZ(Vector128<nint> left, Vector128<nint> right);
public static bool TestZ(Vector128<nuint> left, Vector128<nuint> right);
}
public abstract partial class Sse42
{
public static Vector128<nint> CompareGreaterThan(Vector128<nint> left, Vector128<nint> right);
public static nuint Crc32(nuint crc, nuint data);
}
public abstract partial class Avx
{
public static Vector128<nint> ExtractVector128(Vector256<nint> value, byte index);
public static Vector128<nuint> ExtractVector128(Vector256<nuint> value, byte index);
public static Vector256<nint> InsertVector128(Vector256<nint> value, Vector128<nint> data, byte index);
public static Vector256<nuint> InsertVector128(Vector256<nuint> value, Vector128<nuint> data, byte index);
public static unsafe Vector256<nint> LoadVector256(nint* address);
public static unsafe Vector256<nuint> LoadVector256(nuint* address);
public static unsafe Vector256<nint> LoadAlignedVector256(nint* address);
public static unsafe Vector256<nuint> LoadAlignedVector256(nuint* address);
public static unsafe Vector256<nint> LoadDquVector256(nint* address);
public static unsafe Vector256<nuint> LoadDquVector256(nuint* address);
public static Vector256<nint> Permute2x128(Vector256<nint> left, Vector256<nint> right, byte control);
public static Vector256<nuint> Permute2x128(Vector256<nuint> left, Vector256<nuint> right, byte control);
public static unsafe void StoreAligned(nint* address, Vector256<nint> source);
public static unsafe void StoreAligned(nuint* address, Vector256<nuint> source);
public static unsafe void StoreAlignedNonTemporal(nint* address, Vector256<nint> source);
public static unsafe void StoreAlignedNonTemporal(nuint* address, Vector256<nuint> source);
public static unsafe void Store(nint* address, Vector256<nint> source);
public static unsafe void Store(nuint* address, Vector256<nuint> source);
public static bool TestC(Vector256<nint> left, Vector256<nint> right);
public static bool TestC(Vector256<nuint> left, Vector256<nuint> right);
public static bool TestNotZAndNotC(Vector256<nint> left, Vector256<nint> right);
public static bool TestNotZAndNotC(Vector256<nuint> left, Vector256<nuint> right);
public static bool TestZ(Vector256<nint> left, Vector256<nint> right);
public static bool TestZ(Vector256<nuint> left, Vector256<nuint> right);
}
public abstract partial class Avx2
{
public static Vector256<nint> Add(Vector256<nint> left, Vector256<nint> right);
public static Vector256<nuint> Add(Vector256<nuint> left, Vector256<nuint> right);
public static Vector256<nint> AlignRight(Vector256<nint> left, Vector256<nint> right, byte mask);
public static Vector256<nuint> AlignRight(Vector256<nuint> left, Vector256<nuint> right, byte mask);
public static Vector256<nint> And(Vector256<nint> left, Vector256<nint> right);
public static Vector256<nuint> And(Vector256<nuint> left, Vector256<nuint> right);
public static Vector256<nint> AndNot(Vector256<nint> left, Vector256<nint> right);
public static Vector256<nuint> AndNot(Vector256<nuint> left, Vector256<nuint> right);
public static Vector256<nint> BlendVariable(Vector256<nint> left, Vector256<nint> right, Vector256<nint> mask);
public static Vector256<nuint> BlendVariable(Vector256<nuint> left, Vector256<nuint> right, Vector256<nuint> mask);
public static Vector128<nint> BroadcastScalarToVector128(Vector128<nint> value);
public static Vector128<nuint> BroadcastScalarToVector128(Vector128<nuint> value);
public static unsafe Vector128<nint> BroadcastScalarToVector128(nint* source);
public static unsafe Vector128<nuint> BroadcastScalarToVector128(nuint* source);
public static Vector256<nint> BroadcastScalarToVector256(Vector128<nint> value);
public static Vector256<nuint> BroadcastScalarToVector256(Vector128<nuint> value);
public static unsafe Vector256<nint> BroadcastScalarToVector256(nint* source);
public static unsafe Vector256<nuint> BroadcastScalarToVector256(nuint* source);
public static unsafe Vector256<nint> BroadcastVector128ToVector256(nint* address);
public static unsafe Vector256<nuint> BroadcastVector128ToVector256(nuint* address);
public static Vector256<nint> CompareEqual(Vector256<nint> left, Vector256<nint> right);
public static Vector256<nuint> CompareEqual(Vector256<nuint> left, Vector256<nuint> right);
public static Vector256<nint> CompareGreaterThan(Vector256<nint> left, Vector256<nint> right);
public static Vector256<nint> ConvertToVector256NInt(Vector128<sbyte> value);
public static Vector256<nint> ConvertToVector256NInt(Vector128<byte> value);
public static Vector256<nint> ConvertToVector256NInt(Vector128<short> value);
public static Vector256<nint> ConvertToVector256NInt(Vector128<ushort> value);
public static Vector256<nint> ConvertToVector256NInt(Vector128<int> value);
public static Vector256<nint> ConvertToVector256NInt(Vector128<uint> value);
public static unsafe Vector256<nint> ConvertToVector256NInt(sbyte* address);
public static unsafe Vector256<nint> ConvertToVector256NInt(byte* address);
public static unsafe Vector256<nint> ConvertToVector256NInt(short* address);
public static unsafe Vector256<nint> ConvertToVector256NInt(ushort* address);
public static unsafe Vector256<nint> ConvertToVector256NInt(int* address);
public static unsafe Vector256<nint> ConvertToVector256NInt(uint* address);
public static new Vector128<nint> ExtractVector128(Vector256<nint> value, byte index);
public static new Vector128<nuint> ExtractVector128(Vector256<nuint> value, byte index);
public static unsafe Vector128<nint> GatherVector128(nint* baseAddress, Vector128<int> index, byte scale);
public static unsafe Vector128<nuint> GatherVector128(nuint* baseAddress, Vector128<int> index, byte scale);
public static unsafe Vector128<int> GatherVector128(int* baseAddress, Vector128<nint> index, byte scale);
public static unsafe Vector128<uint> GatherVector128(uint* baseAddress, Vector128<nint> index, byte scale);
public static unsafe Vector128<nint> GatherVector128(long* baseAddress, Vector128<nint> index, byte scale);
public static unsafe Vector128<nuint> GatherVector128(ulong* baseAddress, Vector128<nint> index, byte scale);
public static unsafe Vector128<nint> GatherVector128(nint* baseAddress, Vector128<nint> index, byte scale);
public static unsafe Vector128<nuint> GatherVector128(nuint* baseAddress, Vector128<nint> index, byte scale);
public static unsafe Vector128<float> GatherVector128(float* baseAddress, Vector128<nint> index, byte scale);
public static unsafe Vector128<double> GatherVector128(double* baseAddress, Vector128<nint> index, byte scale);
public static unsafe Vector256<nint> GatherVector256(nint* baseAddress, Vector128<int> index, byte scale);
public static unsafe Vector256<nuint> GatherVector256(nuint* baseAddress, Vector128<int> index, byte scale);
public static unsafe Vector128<int> GatherVector128(int* baseAddress, Vector256<nint> index, byte scale);
public static unsafe Vector128<uint> GatherVector128(uint* baseAddress, Vector256<nint> index, byte scale);
public static unsafe Vector256<nint> GatherVector256(long* baseAddress, Vector256<nint> index, byte scale);
public static unsafe Vector256<nuint> GatherVector256(ulong* baseAddress, Vector256<nint> index, byte scale);
public static unsafe Vector256<nint> GatherVector256(nint* baseAddress, Vector256<nint> index, byte scale);
public static unsafe Vector256<nuint> GatherVector256(nuint* baseAddress, Vector256<nint> index, byte scale);
public static unsafe Vector128<float> GatherVector128(float* baseAddress, Vector256<nint> index, byte scale);
public static unsafe Vector256<double> GatherVector256(double* baseAddress, Vector256<nint> index, byte scale);
public static unsafe Vector128<nint> GatherMaskVector128(Vector128<nint> source, nint* baseAddress, Vector128<int> index, Vector128<nint> mask, byte scale);
public static unsafe Vector128<nuint> GatherMaskVector128(Vector128<nuint> source, nuint* baseAddress, Vector128<int> index, Vector128<nuint> mask, byte scale);
public static unsafe Vector128<int> GatherMaskVector128(Vector128<int> source, int* baseAddress, Vector128<nint> index, Vector128<int> mask, byte scale);
public static unsafe Vector128<uint> GatherMaskVector128(Vector128<uint> source, uint* baseAddress, Vector128<nint> index, Vector128<uint> mask, byte scale);
public static unsafe Vector128<long> GatherMaskVector128(Vector128<long> source, long* baseAddress, Vector128<nint> index, Vector128<long> mask, byte scale);
public static unsafe Vector128<ulong> GatherMaskVector128(Vector128<ulong> source, ulong* baseAddress, Vector128<nint> index, Vector128<long> mask, byte scale);
public static unsafe Vector128<nint> GatherMaskVector128(Vector128<nint> source, nint* baseAddress, Vector128<nint> index, Vector128<nint> mask, byte scale);
public static unsafe Vector128<nuint> GatherMaskVector128(Vector128<nuint> source, nuint* baseAddress, Vector128<nint> index, Vector128<nuint> mask, byte scale);
public static unsafe Vector128<float> GatherMaskVector128(Vector128<float> source, float* baseAddress, Vector128<nint> index, Vector128<float> mask, byte scale);
public static unsafe Vector128<double> GatherMaskVector128(Vector128<double> source, double* baseAddress, Vector128<nint> index, Vector128<double> mask, byte scale);
public static unsafe Vector256<nint> GatherMaskVector256(Vector256<nint> source, nint* baseAddress, Vector128<int> index, Vector256<nint> mask, byte scale);
public static unsafe Vector256<nuint> GatherMaskVector256(Vector256<nuint> source, nuint* baseAddress, Vector128<int> index, Vector256<nuint> mask, byte scale);
public static unsafe Vector128<int> GatherMaskVector128(Vector128<int> source, int* baseAddress, Vector256<nint> index, Vector128<int> mask, byte scale);
public static unsafe Vector128<uint> GatherMaskVector128(Vector128<uint> source, uint* baseAddress, Vector256<nint> index, Vector128<uint> mask, byte scale);
public static unsafe Vector256<long> GatherMaskVector256(Vector256<long> source, long* baseAddress, Vector256<nint> index, Vector256<long> mask, byte scale);
public static unsafe Vector256<ulong> GatherMaskVector256(Vector256<ulong> source, ulong* baseAddress, Vector256<nint> index, Vector256<ulong> mask, byte scale);
public static unsafe Vector256<nint> GatherMaskVector256(Vector256<nint> source, nint* baseAddress, Vector256<nint> index, Vector256<nint> mask, byte scale);
public static unsafe Vector256<nuint> GatherMaskVector256(Vector256<nuint> source, nuint* baseAddress, Vector256<nint> index, Vector256<nuint> mask, byte scale);
public static unsafe Vector128<float> GatherMaskVector128(Vector128<float> source, float* baseAddress, Vector256<nint> index, Vector128<float> mask, byte scale);
public static unsafe Vector256<double> GatherMaskVector256(Vector256<double> source, double* baseAddress, Vector256<nint> index, Vector256<double> mask, byte scale);
public static new Vector256<nint> InsertVector128(Vector256<nint> value, Vector128<nint> data, byte index);
public static new Vector256<nuint> InsertVector128(Vector256<nuint> value, Vector128<nuint> data, byte index);
public static unsafe Vector256<nint> LoadAlignedVector256NonTemporal(nint* address);
public static unsafe Vector256<nuint> LoadAlignedVector256NonTemporal(nuint* address);
public static unsafe Vector128<nint> MaskLoad(nint* address, Vector128<nint> mask);
public static unsafe Vector128<nuint> MaskLoad(nuint* address, Vector128<nuint> mask);
public static unsafe Vector256<nint> MaskLoad(nint* address, Vector256<nint> mask);
public static unsafe Vector256<nuint> MaskLoad(nuint* address, Vector256<nuint> mask);
public static unsafe void MaskStore(nint* address, Vector128<nint> mask, Vector128<nint> source);
public static unsafe void MaskStore(nuint* address, Vector128<nuint> mask, Vector128<nuint> source);
public static unsafe void MaskStore(nint* address, Vector256<nint> mask, Vector256<nint> source);
public static unsafe void MaskStore(nuint* address, Vector256<nuint> mask, Vector256<nuint> source);
public static Vector256<nint> Or(Vector256<nint> left, Vector256<nint> right);
public static Vector256<nuint> Or(Vector256<nuint> left, Vector256<nuint> right);
public static new Vector256<nint> Permute2x128(Vector256<nint> left, Vector256<nint> right, byte control);
public static new Vector256<nuint> Permute2x128(Vector256<nuint> left, Vector256<nuint> right, byte control);
public static Vector256<nint> Permute4x64(Vector256<nint> value, byte control);
public static Vector256<nuint> Permute4x64(Vector256<nuint> value, byte control);
public static Vector256<nint> ShiftLeftLogical(Vector256<nint> value, Vector128<nint> count);
public static Vector256<nuint> ShiftLeftLogical(Vector256<nuint> value, Vector128<nuint> count);
public static Vector256<nint> ShiftLeftLogical(Vector256<nint> value, byte count);
public static Vector256<nuint> ShiftLeftLogical(Vector256<nuint> value, byte count);
public static Vector256<nint> ShiftLeftLogical128BitLane(Vector256<nint> value, byte numBytes);
public static Vector256<nuint> ShiftLeftLogical128BitLane(Vector256<nuint> value, byte numBytes);
public static Vector256<nint> ShiftLeftLogicalVariable(Vector256<nint> value, Vector256<nuint> count);
public static Vector256<nuint> ShiftLeftLogicalVariable(Vector256<nuint> value, Vector256<nuint> count);
public static Vector128<nint> ShiftLeftLogicalVariable(Vector128<nint> value, Vector128<nuint> count);
public static Vector128<nuint> ShiftLeftLogicalVariable(Vector128<nuint> value, Vector128<nuint> count);
public static Vector256<nint> ShiftRightLogical(Vector256<nint> value, Vector128<nint> count);
public static Vector256<nuint> ShiftRightLogical(Vector256<nuint> value, Vector128<nuint> count);
public static Vector256<nint> ShiftRightLogical(Vector256<nint> value, byte count);
public static Vector256<nuint> ShiftRightLogical(Vector256<nuint> value, byte count);
public static Vector256<nint> ShiftRightLogical128BitLane(Vector256<nint> value, byte numBytes);
public static Vector256<nuint> ShiftRightLogical128BitLane(Vector256<nuint> value, byte numBytes);
public static Vector256<nint> ShiftRightLogicalVariable(Vector256<nint> value, Vector256<nuint> count);
public static Vector256<nuint> ShiftRightLogicalVariable(Vector256<nuint> value, Vector256<nuint> count);
public static Vector128<nint> ShiftRightLogicalVariable(Vector128<nint> value, Vector128<nuint> count);
public static Vector128<nuint> ShiftRightLogicalVariable(Vector128<nuint> value, Vector128<nuint> count);
public static Vector256<nint> Subtract(Vector256<nint> left, Vector256<nint> right);
public static Vector256<nuint> Subtract(Vector256<nuint> left, Vector256<nuint> right);
public static Vector256<nint> UnpackHigh(Vector256<nint> left, Vector256<nint> right);
public static Vector256<nuint> UnpackHigh(Vector256<nuint> left, Vector256<nuint> right);
public static Vector256<nint> UnpackLow(Vector256<nint> left, Vector256<nint> right);
public static Vector256<nuint> UnpackLow(Vector256<nuint> left, Vector256<nuint> right);
public static Vector256<nint> Xor(Vector256<nint> left, Vector256<nint> right);
public static Vector256<nuint> Xor(Vector256<nuint> left, Vector256<nuint> right);
}
public abstract partial class Bmi1
{
public static nuint AndNot(nuint left, nuint right);
public static nuint BitFieldExtract(nuint value, byte start, byte length);
public static nuint BitFieldExtract(nuint value, ushort control);
public static nuint ExtractLowestSetBit(nuint value);
public static nuint GetMaskUpToLowestSetBit(nuint value);
public static nuint ResetLowestSetBit(nuint value);
public static nuint TrailingZeroCount(nuint value);
}
public abstract partial class Bmi2
{
public static nuint ZeroHighBits(nuint value, nuint index);
public static nuint MultiplyNoFlags(nuint left, nuint right);
public static unsafe nuint MultiplyNoFlags(nuint left, nuint right, nuint* low);
public static nuint ParallelBitDeposit(nuint value, nuint mask);
public static nuint ParallelBitExtract(nuint value, nuint mask);
}
public abstract partial class Lzcnt
{
public static nuint LeadingZeroCount(nuint value);
}
public abstract partial class Popcnt
{
public static nuint PopCount(nuint value);
}
} Current state of implementation
|
Co-Authored-By: Tanner Gooding <tagoo@outlook.com>
…llelBitDeposit` and `ParallelBitExtract` (BMI2)
* `AndNot` * `BitFieldExtract` * `ExtractLowestSetBit` * `GetMaskUpToLowestSetBit` * `ResetLowestSetBit` * `TrailingZeroCount`
* BlendVariable * CompareEqual * ConvertToVector128NInt * Extract * Insert * Multiply * LoadAlignedVector128NonTemporal * TestC * TestNotZAndNotC * TestZ
* ExtractVector128 * InsertVector128 * LoadAlignedVector256 * LoadDquVector256 * LoadVector256 * Permute2x128 * Store * StoreAligned * StoreAlignedNonTemporal * TestC * TestNotZAndNotC * TestZ
@deeprobin what is the status of this one -- I see you have errors:
this usually means that there's a publicly visible member in the implementation, but you have forgotten to add it to a ref/**cs file. |
@danmoseley I think this was mis-generated by the Reference Source Generator. I will adjust that. The implementation of this proposal is basically done. I will investigate this again the days. |
OK. I think we plan to rewrite the reference assembly generator (need a volunteer perhaps). But if you know of a specific bug it might be worth adding to dotnet/arcade#5717. |
Edit: I think it will be about next week |
* `MaskLoad` * `MaskStore` * `Or` * `Permute2x128` * `Permute4x64`
@danmoseley I still get some failures here. It does not exist in ref but also not in implementation? |
@ViktorHofer Do you know something about this behavior? |
I need to log off now. Maybe @ericstj can take a look? |
HARDWARE_INTRINSIC(SSE2, ConvertToVector128Int32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) | ||
HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128Int32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) | ||
HARDWARE_INTRINSIC(SSE2, ConvertToVector128Int32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_cvttpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) | ||
HARDWARE_INTRINSIC(SSE2, ConvertToVector128Single, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) | ||
HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128Single, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This one shouldn't need to be touched, it's public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, Vector128<double> value)
@@ -499,11 +506,12 @@ HARDWARE_INTRINSIC(SSE41, CompareEqual, | |||
HARDWARE_INTRINSIC(SSE41, ConvertToVector128Int16, 16, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad) | |||
HARDWARE_INTRINSIC(SSE41, ConvertToVector128Int32, 16, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad) | |||
HARDWARE_INTRINSIC(SSE41, ConvertToVector128Int64, 16, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad) | |||
HARDWARE_INTRINSIC(SSE41, ConvertToVector128NInt, 16, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovsxdq, INS_pmovzxdq, INS_pmovzxdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This one isn't correct and likely needs special handling.
The instructions are {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}
when nint
is 64-bits
but {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_movups, INS_movups, INS_invalid, INS_invalid, INS_invalid, INS_invalid}
when nint
is 32-bits.
noting that on 32-bits, TYP_INT
and TYP_UINT
are "copies" and not actual conversions
HARDWARE_INTRINSIC(SSE41, DotProduct, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dpps, INS_dppd}, HW_Category_IMM, HW_Flag_FullRangeIMM) | ||
HARDWARE_INTRINSIC(SSE41, Extract, 16, 2, {INS_pextrb, INS_pextrb, INS_invalid, INS_invalid, INS_pextrd, INS_pextrd, INS_invalid, INS_invalid, INS_extractps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiIns|HW_Flag_NoRMWSemantics) | ||
HARDWARE_INTRINSIC(SSE41, Extract, 16, 2, {INS_pextrb, INS_pextrb, INS_invalid, INS_invalid, INS_pextrd, INS_pextrd, INS_pextrd, INS_pextrq, INS_extractps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiIns|HW_Flag_NoRMWSemantics) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should be INS_pextrq, INS_pextrq
and not INS_pextrd, INS_pextrq
(not you have d
, not q
, for TYP_LONG
)
@@ -654,6 +662,7 @@ HARDWARE_INTRINSIC(AVX2, ConvertToUInt32, | |||
HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int16, 32, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MaybeMemoryLoad) | |||
HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int32, 32, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MaybeMemoryLoad) | |||
HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int64, 32, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MaybeMemoryLoad) | |||
HARDWARE_INTRINSIC(AVX2, ConvertToVector256NInt, 32, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MaybeMemoryLoad) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same general issue as https://github.com/dotnet/runtime/pull/63594/files#r877212176
HARDWARE_INTRINSIC(BMI1, ResetLowestSetBit, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) | ||
HARDWARE_INTRINSIC(BMI1, TrailingZeroCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_tzcnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns) | ||
HARDWARE_INTRINSIC(BMI1, BitFieldExtract, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bextr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns|HW_Flag_SpecialImport) | ||
HARDWARE_INTRINSIC(BMI1, AndNot, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andn, INS_andn, INS_andn, INS_andn, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
TYP_UINT
and TYP_ULONG
should likely remain INS_invalid
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Although looking at the APIs, it probably should've been TYP_INT
and TYP_LONG
that were invalid. There is a disconnect here that's probably being specially handled elsewhere.
|
||
namespace System.Runtime.Intrinsics; | ||
|
||
public sealed class ReflectionTester |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This file seems like a lot of code for testing these scenarios.
I'd expect we already have some of this functionality supported -or- that simply testing the "basic" scenario would be sufficient here.
@@ -11,9 +11,7 @@ | |||
<EnableLibraryImportGenerator>true</EnableLibraryImportGenerator> | |||
</PropertyGroup> | |||
<ItemGroup> | |||
<EmbeddedResource Include="$(MSBuildThisFileDirectory)ILLink.Substitutions.AggressiveTrimming.xml" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would be nice to not have these unrelated file edits.
/// 32-bit: | ||
/// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8) | ||
/// VEXTRACTF128 xmm/m128, ymm, imm8 | ||
/// | ||
/// 64-bit: | ||
/// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8) | ||
/// VEXTRACTF128 xmm/m128, ymm, imm8 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The format we use for Arm/Arm64 is:
__m128i _mm256_extractf128_si256 (__m256i a, const int imm8)
x86: VEXTRACTF128 xmm/m128, ymm, imm8
x64: VEXTRACTF128 xmm/m128, ymm, imm8
In this case its identical for both so we can just use the normal form instead:
__m128i _mm256_extractf128_si256 (__m256i a, const int imm8)
VEXTRACTF128 xmm/m128, ymm, imm8
/// <summary> | ||
/// __m128i _mm_i32gather_epi32 (int const* base_addr, __m128i vindex, const int scale) | ||
/// VPGATHERDD xmm, vm32x, xmm | ||
/// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown. | ||
/// </summary> | ||
public static unsafe Vector128<int> GatherVector128(int* baseAddress, Vector128<int> index, byte scale) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's changes here where I'd expect none. It looks like float
/double
got moved up and its messing up the diffs.
This would likely explain your build failures as well
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would, in general, expect this whole file to be "purely additive" (no lines modified/removed) with just the new nint
/nuint
APIs being additions
@deeprobin were you able to solve the issues you were seeing? |
Draft Pull Request was automatically closed for 30 days of inactivity. Please let us know if you'd like to reopen it. |
Proposal implementation of #52021 (closes #52021)
Proposal
Current state of implementation
/cc @tannergooding