Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Combine writes in WidenAsciiToUtf16 #88502

Closed
wants to merge 15 commits into from
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;

namespace System.Text
{
Expand Down Expand Up @@ -77,5 +79,75 @@ internal static uint CountNumberOfLeadingAsciiBytesFromUInt32WithSomeNonAsciiDat
return numAsciiBytes;
}
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe void WritePairUnaligned<T>(ref byte destination, (T Lower, T Upper) source)
where T : unmanaged
{
if (BitConverter.IsLittleEndian)
{
Unsafe.WriteUnaligned(ref destination, source.Lower);
Unsafe.WriteUnaligned(ref Unsafe.Add(ref destination, sizeof(T)), source.Upper);
}
else
{
Unsafe.WriteUnaligned(ref destination, source.Upper);
Unsafe.WriteUnaligned(ref Unsafe.Add(ref destination, sizeof(T)), source.Lower);
}
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void WriteUnalignedWidening(ref ushort destination, uint value)
{
if (AdvSimd.Arm64.IsSupported)
{
Vector128<byte> vecNarrow = AdvSimd.DuplicateToVector128(value).AsByte();
Vector128<ulong> vecWide = AdvSimd.Arm64.ZipLow(vecNarrow, Vector128<byte>.Zero).AsUInt64();
Unsafe.WriteUnaligned(ref Unsafe.As<ushort, byte>(ref destination), vecWide.ToScalar());
}
else if (Vector128.IsHardwareAccelerated)
{
Vector128<byte> vecNarrow = Vector128.CreateScalar(value).AsByte();
Vector128<ulong> vecWide = Vector128.WidenLower(vecNarrow).AsUInt64();
Unsafe.WriteUnaligned(ref Unsafe.As<ushort, byte>(ref destination), vecWide.ToScalar());
}
else if (UIntPtr.Size >= sizeof(ulong))
{
ulong temp = value;
temp |= temp << 16;
temp &= 0x0000FFFF_0000FFFFuL;
temp |= temp << 8;
temp &= 0x00FF00FF_00FF00FFuL;
Unsafe.WriteUnaligned(ref Unsafe.As<ushort, byte>(ref destination), temp);
}
else if (BitConverter.IsLittleEndian)
{
WriteUnalignedWideningLower(ref destination, value);
WriteUnalignedWideningUpper(ref Unsafe.Add(ref destination, sizeof(uint) / sizeof(ushort)), value);
}
else
{
WriteUnalignedWideningUpper(ref destination, value);
WriteUnalignedWideningLower(ref Unsafe.Add(ref destination, sizeof(uint) / sizeof(ushort)), value);
}
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void WriteUnalignedWideningLower(ref ushort destination, uint value)
{
uint lower = (ushort)value;
lower |= value << 8;
lower &= 0x00FF00FFu;
Unsafe.WriteUnaligned(ref Unsafe.As<ushort, byte>(ref destination), lower);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void WriteUnalignedWideningUpper(ref ushort destination, uint value)
{
uint upper = value >> 16;
upper |= upper << 8;
upper &= 0x00FF00FFu;
Unsafe.WriteUnaligned(ref Unsafe.As<ushort, byte>(ref destination), upper);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1650,7 +1650,7 @@ internal static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16B
// Intrinsified in mono interpreter
nuint currentOffset = 0;

if (BitConverter.IsLittleEndian && Vector128.IsHardwareAccelerated && elementCount >= (uint)Vector128<byte>.Count)
if (Vector128.IsHardwareAccelerated && elementCount >= (uint)Vector128<byte>.Count)
{
ushort* pCurrentWriteAddress = (ushort*)pUtf16Buffer;

Expand All @@ -1670,9 +1670,7 @@ internal static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16B
break;
}

(Vector256<ushort> utf16LowVector, Vector256<ushort> utf16HighVector) = Vector256.Widen(asciiVector);
utf16LowVector.Store(pCurrentWriteAddress);
utf16HighVector.Store(pCurrentWriteAddress + Vector256<ushort>.Count);
WritePairUnaligned(ref *(byte*)pCurrentWriteAddress, Vector256.Widen(asciiVector));

currentOffset += (nuint)Vector256<byte>.Count;
pCurrentWriteAddress += (nuint)Vector256<byte>.Count;
Expand All @@ -1694,9 +1692,7 @@ internal static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16B
break;
}

(Vector128<ushort> utf16LowVector, Vector128<ushort> utf16HighVector) = Vector128.Widen(asciiVector);
utf16LowVector.Store(pCurrentWriteAddress);
utf16HighVector.Store(pCurrentWriteAddress + Vector128<ushort>.Count);
WritePairUnaligned(ref *(byte*)pCurrentWriteAddress, Vector128.Widen(asciiVector));

currentOffset += (nuint)Vector128<byte>.Count;
pCurrentWriteAddress += (nuint)Vector128<byte>.Count;
Expand All @@ -1710,23 +1706,21 @@ internal static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16B
// Only bother vectorizing if we have enough data to do so.
if (elementCount >= SizeOfVector)
{
// Note use of SBYTE instead of BYTE below; we're using the two's-complement
// representation of negative integers to act as a surrogate for "is ASCII?".

nuint finalOffsetWhereCanLoop = elementCount - SizeOfVector;
do
{
Vector<sbyte> asciiVector = Unsafe.ReadUnaligned<Vector<sbyte>>(pAsciiBuffer + currentOffset);
if (Vector.LessThanAny(asciiVector, Vector<sbyte>.Zero))
Vector<byte> asciiVector = Unsafe.ReadUnaligned<Vector<byte>>(pAsciiBuffer + currentOffset);

// If the high bit of any byte is set, that byte is non-ASCII.
// In two's-complement, the high bit represents the sign of binary number.
if (Vector.LessThanAny(Vector.AsVectorSByte(asciiVector), Vector<sbyte>.Zero))
{
break; // found non-ASCII data
// Found non-ASCII data.
break;
}

Vector.Widen(Vector.AsVectorByte(asciiVector), out Vector<ushort> utf16LowVector, out Vector<ushort> utf16HighVector);

// TODO: Is the below logic also valid for big-endian platforms?
Unsafe.WriteUnaligned(pUtf16Buffer + currentOffset, utf16LowVector);
Unsafe.WriteUnaligned(pUtf16Buffer + currentOffset + Vector<ushort>.Count, utf16HighVector);
Vector.Widen(asciiVector, out Vector<ushort> lower, out Vector<ushort> upper);
WritePairUnaligned(ref Unsafe.As<char, byte>(ref pUtf16Buffer[currentOffset]), (lower, upper));

currentOffset += SizeOfVector;
} while (currentOffset <= finalOffsetWhereCanLoop);
Expand All @@ -1752,7 +1746,7 @@ internal static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16B
goto FoundNonAsciiData;
}

WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref pUtf16Buffer[currentOffset], asciiData);
WriteUnalignedWidening(ref Unsafe.As<char, ushort>(ref pUtf16Buffer[currentOffset]), asciiData);
currentOffset += 4;
} while (currentOffset <= finalOffsetWhereCanLoop);
}
Expand All @@ -1771,17 +1765,7 @@ internal static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16B
goto FoundNonAsciiData;
}

if (BitConverter.IsLittleEndian)
{
pUtf16Buffer[currentOffset] = (char)(byte)asciiData;
pUtf16Buffer[currentOffset + 1] = (char)(asciiData >> 8);
}
else
{
pUtf16Buffer[currentOffset + 1] = (char)(byte)asciiData;
pUtf16Buffer[currentOffset] = (char)(asciiData >> 8);
}

WriteUnalignedWideningLower(ref Unsafe.As<char, ushort>(ref pUtf16Buffer[currentOffset]), asciiData);
currentOffset += 2;
}

Expand Down Expand Up @@ -1840,41 +1824,7 @@ internal static void WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref char outputB
{
Debug.Assert(AllBytesInUInt32AreAscii(value));

if (AdvSimd.Arm64.IsSupported)
{
Vector128<byte> vecNarrow = AdvSimd.DuplicateToVector128(value).AsByte();
Vector128<ulong> vecWide = AdvSimd.Arm64.ZipLow(vecNarrow, Vector128<byte>.Zero).AsUInt64();
Unsafe.WriteUnaligned(ref Unsafe.As<char, byte>(ref outputBuffer), vecWide.ToScalar());
}
else if (Vector128.IsHardwareAccelerated)
{
Vector128<byte> vecNarrow = Vector128.CreateScalar(value).AsByte();
Vector128<ulong> vecWide = Vector128.WidenLower(vecNarrow).AsUInt64();
Unsafe.WriteUnaligned(ref Unsafe.As<char, byte>(ref outputBuffer), vecWide.ToScalar());
}
else
{
if (BitConverter.IsLittleEndian)
{
outputBuffer = (char)(byte)value;
value >>= 8;
Unsafe.Add(ref outputBuffer, 1) = (char)(byte)value;
value >>= 8;
Unsafe.Add(ref outputBuffer, 2) = (char)(byte)value;
value >>= 8;
Unsafe.Add(ref outputBuffer, 3) = (char)value;
}
else
{
Unsafe.Add(ref outputBuffer, 3) = (char)(byte)value;
value >>= 8;
Unsafe.Add(ref outputBuffer, 2) = (char)(byte)value;
value >>= 8;
Unsafe.Add(ref outputBuffer, 1) = (char)(byte)value;
value >>= 8;
outputBuffer = (char)value;
}
}
WriteUnalignedWidening(ref Unsafe.As<char, ushort>(ref outputBuffer), value);
}
}
}