Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions src/System.Private.CoreLib/shared/System/Text/ASCIIUtility.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1009,10 +1009,14 @@ private static void NarrowFourUtf16CharsToAsciiAndWriteToBuffer(ref byte outputB
{
Debug.Assert(AllCharsInUInt64AreAscii(value));

if (Bmi2.X64.IsSupported)
if (Sse2.X64.IsSupported)
{
// BMI2 will work regardless of the processor's endianness.
Unsafe.WriteUnaligned(ref outputBuffer, (uint)Bmi2.X64.ParallelBitExtract(value, 0x00FF00FF_00FF00FFul));
// Narrows a vector of words [ w0 w1 w2 w3 ] to a vector of bytes
// [ b0 b1 b2 b3 b0 b1 b2 b3 ], then writes 4 bytes (32 bits) to the destination.

Vector128<short> vecWide = Sse2.X64.ConvertScalarToVector128UInt64(value).AsInt16();
Vector128<uint> vecNarrow = Sse2.PackUnsignedSaturate(vecWide, vecWide).AsUInt32();
Unsafe.WriteUnaligned<uint>(ref outputBuffer, Sse2.ConvertToUInt32(vecNarrow));
}
else
{
Expand Down Expand Up @@ -1694,14 +1698,16 @@ private static unsafe nuint WidenAsciiToUtf16_Sse2(byte* pAsciiBuffer, char* pUt
/// writes them to the output buffer with machine endianness.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref char outputBuffer, uint value)
internal static void WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref char outputBuffer, uint value)
{
Debug.Assert(AllBytesInUInt32AreAscii(value));

if (Bmi2.X64.IsSupported)
if (Sse2.X64.IsSupported)
{
// BMI2 will work regardless of the processor's endianness.
Unsafe.WriteUnaligned(ref Unsafe.As<char, byte>(ref outputBuffer), Bmi2.X64.ParallelBitDeposit(value, 0x00FF00FF_00FF00FFul));
Debug.Assert(BitConverter.IsLittleEndian, "SSE2 widening assumes little-endian.");
Vector128<byte> vecNarrow = Sse2.ConvertScalarToVector128UInt32(value).AsByte();
Vector128<ulong> vecWide = Sse2.UnpackLow(vecNarrow, Vector128<byte>.Zero).AsUInt64();
Unsafe.WriteUnaligned<ulong>(ref Unsafe.As<char, byte>(ref outputBuffer), Sse2.X64.ConvertToUInt64(vecWide));
}
else
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics.X86;
using Internal.Runtime.CompilerServices;

namespace System.Text.Unicode
Expand Down Expand Up @@ -61,47 +60,27 @@ private static uint ExtractCharFromFirstTwoByteSequence(uint value)
}

/// <summary>
/// Given a machine-endian DWORD which four bytes of UTF-8 data, interprets the input as a
/// Given a machine-endian DWORD which represents four bytes of UTF-8 data, interprets the input as a
/// four-byte UTF-8 sequence and returns the machine-endian DWORD of the UTF-16 representation.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint ExtractCharsFromFourByteSequence(uint value)
{
if (BitConverter.IsLittleEndian)
{
if (Bmi2.IsSupported)
{
// need to reverse endianness for bit manipulation to work correctly
value = BinaryPrimitives.ReverseEndianness(value);

// value = [ 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx ]
// want to return [ 110110wwwwxxxxxx 110111xxxxxxxxxx ]
// where wwww = uuuuu - 1

uint highSurrogateChar = Bmi2.ParallelBitExtract(value, 0b00000111_00111111_00110000_00000000u);
uint lowSurrogateChar = Bmi2.ParallelBitExtract(value, 0b00000000_00000000_00001111_00111111u);

uint combined = (lowSurrogateChar << 16) + highSurrogateChar;
combined -= 0x40u; // wwww = uuuuu - 1
combined += 0xDC00_D800u; // add surrogate markers
return combined;
}
else
{
// input is UTF8 [ 10xxxxxx 10yyyyyy 10uuzzzz 11110uuu ] = scalar 000uuuuu zzzzyyyy yyxxxxxx
// want to return UTF16 scalar 000uuuuuzzzzyyyyyyxxxxxx = [ 110111yy yyxxxxxx 110110ww wwzzzzyy ]
// where wwww = uuuuu - 1
uint retVal = (uint)(byte)value << 8; // retVal = [ 00000000 00000000 11110uuu 00000000 ]
retVal |= (value & 0x0000_3F00u) >> 6; // retVal = [ 00000000 00000000 11110uuu uuzzzz00 ]
retVal |= (value & 0x0030_0000u) >> 20; // retVal = [ 00000000 00000000 11110uuu uuzzzzyy ]
retVal |= (value & 0x3F00_0000u) >> 8; // retVal = [ 00000000 00xxxxxx 11110uuu uuzzzzyy ]
retVal |= (value & 0x000F_0000u) << 6; // retVal = [ 000000yy yyxxxxxx 11110uuu uuzzzzyy ]
retVal -= 0x0000_0040u; // retVal = [ 000000yy yyxxxxxx 111100ww wwzzzzyy ]
retVal -= 0x0000_2000u; // retVal = [ 000000yy yyxxxxxx 110100ww wwzzzzyy ]
retVal += 0x0000_0800u; // retVal = [ 000000yy yyxxxxxx 110110ww wwzzzzyy ]
retVal += 0xDC00_0000u; // retVal = [ 110111yy yyxxxxxx 110110ww wwzzzzyy ]
return retVal;
}
// input is UTF8 [ 10xxxxxx 10yyyyyy 10uuzzzz 11110uuu ] = scalar 000uuuuu zzzzyyyy yyxxxxxx
// want to return UTF16 scalar 000uuuuuzzzzyyyyyyxxxxxx = [ 110111yy yyxxxxxx 110110ww wwzzzzyy ]
// where wwww = uuuuu - 1
uint retVal = (uint)(byte)value << 8; // retVal = [ 00000000 00000000 11110uuu 00000000 ]
retVal |= (value & 0x0000_3F00u) >> 6; // retVal = [ 00000000 00000000 11110uuu uuzzzz00 ]
retVal |= (value & 0x0030_0000u) >> 20; // retVal = [ 00000000 00000000 11110uuu uuzzzzyy ]
retVal |= (value & 0x3F00_0000u) >> 8; // retVal = [ 00000000 00xxxxxx 11110uuu uuzzzzyy ]
retVal |= (value & 0x000F_0000u) << 6; // retVal = [ 000000yy yyxxxxxx 11110uuu uuzzzzyy ]
retVal -= 0x0000_0040u; // retVal = [ 000000yy yyxxxxxx 111100ww wwzzzzyy ]
retVal -= 0x0000_2000u; // retVal = [ 000000yy yyxxxxxx 110100ww wwzzzzyy ]
retVal += 0x0000_0800u; // retVal = [ 000000yy yyxxxxxx 110110ww wwzzzzyy ]
retVal += 0xDC00_0000u; // retVal = [ 110111yy yyxxxxxx 110110ww wwzzzzyy ]
return retVal;
}
else
{
Expand Down Expand Up @@ -135,37 +114,19 @@ private static uint ExtractFourUtf8BytesFromSurrogatePair(uint value)
// input = [ 110111yyyyxxxxxx 110110wwwwzzzzyy ] = scalar (000uuuuu zzzzyyyy yyxxxxxx)
// must return [ 10xxxxxx 10yyyyyy 10uuzzzz 11110uuu ], where wwww = uuuuu - 1

if (Bmi2.IsSupported)
{
// Since pdep and pext have high latencies and can only be dispatched to a single execution port, we want
// to use them conservatively. Here, we'll build up the scalar value (this would normally be pext) via simple
// logical and arithmetic operations, and use only pdep for the expensive step of exploding the scalar across
// all four output bytes.

uint unmaskedScalar = (value << 10) + (value >> 16) + ((0x40u) << 10) /* uuuuu = wwww + 1 */ - 0xDC00u /* remove low surrogate marker */;

// Now, unmaskedScalar = [ xxxxxx11 011uuuuu zzzzyyyy yyxxxxxx ]. There's a bit of unneeded junk at the beginning
// that should normally be masked out via an and, but we'll just direct pdep to ignore it.
value += 0x0000_0040u; // = [ 110111yyyyxxxxxx 11011uuuuuzzzzyy ]

uint exploded = Bmi2.ParallelBitDeposit(unmaskedScalar, 0b00000111_00111111_00111111_00111111u); // = [ 00000uuu 00uuzzzz 00yyyyyy 00xxxxxx ]
return BinaryPrimitives.ReverseEndianness(exploded + 0xF080_8080u); // = [ 10xxxxxx 10yyyyyy 10uuzzzz 11110uuu ]
}
else
{
value += 0x0000_0040u; // = [ 110111yyyyxxxxxx 11011uuuuuzzzzyy ]
uint tempA = BinaryPrimitives.ReverseEndianness(value & 0x003F_0700u); // = [ 00000000 00000uuu 00xxxxxx 00000000 ]
tempA = BitOperations.RotateLeft(tempA, 16); // = [ 00xxxxxx 00000000 00000000 00000uuu ]

uint tempA = BinaryPrimitives.ReverseEndianness(value & 0x003F_0700u); // = [ 00000000 00000uuu 00xxxxxx 00000000 ]
tempA = BitOperations.RotateLeft(tempA, 16); // = [ 00xxxxxx 00000000 00000000 00000uuu ]
uint tempB = (value & 0x00FCu) << 6; // = [ 00000000 00000000 00uuzzzz 00000000 ]
uint tempC = (value >> 6) & 0x000F_0000u; // = [ 00000000 0000yyyy 00000000 00000000 ]
tempC |= tempB;

uint tempB = (value & 0x00FCu) << 6; // = [ 00000000 00000000 00uuzzzz 00000000 ]
uint tempC = (value >> 6) & 0x000F_0000u; // = [ 00000000 0000yyyy 00000000 00000000 ]
tempC |= tempB;
uint tempD = (value & 0x03u) << 20; // = [ 00000000 00yy0000 00000000 00000000 ]
tempD |= 0x8080_80F0u;

uint tempD = (value & 0x03u) << 20; // = [ 00000000 00yy0000 00000000 00000000 ]
tempD |= 0x8080_80F0u;

return (tempD | tempA | tempC); // = [ 10xxxxxx 10yyyyyy 10uuzzzz 11110uuu ]
}
return (tempD | tempA | tempC); // = [ 10xxxxxx 10yyyyyy 10uuzzzz 11110uuu ]
}
else
{
Expand Down Expand Up @@ -756,43 +717,6 @@ private static bool UInt32ThirdByteIsAscii(uint value)
|| (!BitConverter.IsLittleEndian && ((value & 0x8000u) == 0));
}

/// <summary>
/// Given a DWORD which represents a buffer of 4 ASCII bytes, widen each byte to a 16-bit WORD
/// and writes the resulting QWORD into the destination with machine endianness.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void Widen4AsciiBytesToCharsAndWrite(ref char outputBuffer, uint value)
{
if (Bmi2.X64.IsSupported)
{
// BMI2 will work regardless of the processor's endianness.
Unsafe.WriteUnaligned(ref Unsafe.As<char, byte>(ref outputBuffer), Bmi2.X64.ParallelBitDeposit(value, 0x00FF00FF_00FF00FFul));
}
else
{
if (BitConverter.IsLittleEndian)
{
outputBuffer = (char)(byte)value;
value >>= 8;
Unsafe.Add(ref outputBuffer, 1) = (char)(byte)value;
value >>= 8;
Unsafe.Add(ref outputBuffer, 2) = (char)(byte)value;
value >>= 8;
Unsafe.Add(ref outputBuffer, 3) = (char)value;
}
else
{
Unsafe.Add(ref outputBuffer, 3) = (char)(byte)value;
value >>= 8;
Unsafe.Add(ref outputBuffer, 2) = (char)(byte)value;
value >>= 8;
Unsafe.Add(ref outputBuffer, 1) = (char)(byte)value;
value >>= 8;
outputBuffer = (char)value;
}
}
}

/// <summary>
/// Given a DWORD which represents a buffer of 2 packed UTF-16 values in machine endianess,
/// converts those scalar values to their 3-byte UTF-8 representation and writes the
Expand Down
Loading