Skip to content

Commit

Permalink
Improve vectorization of String.Split (#64899)
Browse files Browse the repository at this point in the history
  • Loading branch information
yesmey committed Mar 24, 2022
1 parent 7508080 commit b4e258a
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,24 @@ public int Length
public void Append(T item)
{
int pos = _pos;
if (pos >= _span.Length)
Grow();
if ((uint)pos < (uint)_span.Length)
{
_span[pos] = item;
_pos = pos + 1;
}
else
{
AddWithResize(item);
}
}

// Hide uncommon path
[MethodImpl(MethodImplOptions.NoInlining)]
private void AddWithResize(T item)
{
Debug.Assert(_pos == _span.Length);
int pos = _pos;
Grow();
_span[pos] = item;
_pos = pos + 1;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1615,8 +1615,7 @@ private void MakeSeparatorList(ReadOnlySpan<char> separators, ref ValueListBuild
sep0 = separators[0];
sep1 = separators.Length > 1 ? separators[1] : sep0;
sep2 = separators.Length > 2 ? separators[2] : sep1;

if (Length >= 16 && Sse41.IsSupported)
if (Vector128.IsHardwareAccelerated && Length >= Vector128<ushort>.Count * 2)
{
MakeSeparatorListVectorized(ref sepListBuilder, sep0, sep1, sep2);
return;
Expand Down Expand Up @@ -1659,75 +1658,54 @@ private void MakeSeparatorList(ReadOnlySpan<char> separators, ref ValueListBuild
private void MakeSeparatorListVectorized(ref ValueListBuilder<int> sepListBuilder, char c, char c2, char c3)
{
// Redundant test so we won't prejit remainder of this method
// on platforms without SSE.
if (!Sse41.IsSupported)
// on platforms where it is not supported
if (!Vector128.IsHardwareAccelerated)
{
throw new PlatformNotSupportedException();
}

// Constant that allows for the truncation of 16-bit (FFFF/0000) values within a register to 4-bit (F/0)
Vector128<byte> shuffleConstant = Vector128.Create(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
Debug.Assert(Length >= Vector128<ushort>.Count);

nuint offset = 0;
nuint lengthToExamine = (nuint)(uint)Length;

ref ushort source = ref Unsafe.As<char, ushort>(ref _firstChar);

Vector128<ushort> v1 = Vector128.Create((ushort)c);
Vector128<ushort> v2 = Vector128.Create((ushort)c2);
Vector128<ushort> v3 = Vector128.Create((ushort)c3);

ref char c0 = ref MemoryMarshal.GetReference(this.AsSpan());
int cond = Length & -Vector128<ushort>.Count;
int i = 0;

for (; i < cond; i += Vector128<ushort>.Count)
do
{
Vector128<ushort> charVector = ReadVector(ref c0, i);
Vector128<ushort> cmp = Sse2.CompareEqual(charVector, v1);

cmp = Sse2.Or(Sse2.CompareEqual(charVector, v2), cmp);
cmp = Sse2.Or(Sse2.CompareEqual(charVector, v3), cmp);
Vector128<ushort> vector = Vector128.LoadUnsafe(ref source, offset);
Vector128<ushort> v1Eq = Vector128.Equals(vector, v1);
Vector128<ushort> v2Eq = Vector128.Equals(vector, v2);
Vector128<ushort> v3Eq = Vector128.Equals(vector, v3);
Vector128<byte> cmp = (v1Eq | v2Eq | v3Eq).AsByte();

if (Sse41.TestZ(cmp, cmp)) { continue; }

Vector128<byte> mask = Sse2.ShiftRightLogical(cmp.AsUInt64(), 4).AsByte();
mask = Ssse3.Shuffle(mask, shuffleConstant);

uint lowBits = Sse2.ConvertToUInt32(mask.AsUInt32());
mask = Sse2.ShiftRightLogical(mask.AsUInt64(), 32).AsByte();
uint highBits = Sse2.ConvertToUInt32(mask.AsUInt32());

for (int idx = i; lowBits != 0; idx++)
if (cmp != Vector128<byte>.Zero)
{
if ((lowBits & 0xF) != 0)
// Skip every other bit
uint mask = cmp.ExtractMostSignificantBits() & 0x5555;
do
{
sepListBuilder.Append(idx);
}

lowBits >>= 8;
uint bitPos = (uint)BitOperations.TrailingZeroCount(mask) / sizeof(char);
sepListBuilder.Append((int)(offset + bitPos));
mask = BitOperations.ResetLowestSetBit(mask);
} while (mask != 0);
}

for (int idx = i + 4; highBits != 0; idx++)
{
if ((highBits & 0xF) != 0)
{
sepListBuilder.Append(idx);
}

highBits >>= 8;
}
}
offset += (nuint)Vector128<ushort>.Count;
} while (offset <= lengthToExamine - (nuint)Vector128<ushort>.Count);

for (; i < Length; i++)
while (offset < lengthToExamine)
{
char curr = Unsafe.Add(ref c0, (IntPtr)(uint)i);
char curr = (char)Unsafe.Add(ref source, offset);
if (curr == c || curr == c2 || curr == c3)
{
sepListBuilder.Append(i);
sepListBuilder.Append((int)offset);
}
}

static Vector128<ushort> ReadVector(ref char c0, int offset)
{
ref char ci = ref Unsafe.Add(ref c0, (IntPtr)(uint)offset);
ref byte b = ref Unsafe.As<char, byte>(ref ci);
return Unsafe.ReadUnaligned<Vector128<ushort>>(ref b);
offset++;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,7 @@ public static void SplitNullCharArraySeparator_BindsToCharArrayOverload()
[InlineData("this, is, a, string, with some spaces", new[] { ',', 's', 'a' }, M, StringSplitOptions.RemoveEmptyEntries, new[] { "thi", " i", " ", " ", "tring", " with ", "ome ", "p", "ce" })]
[InlineData("this, is, a, string, with some spaces", new[] { ',', 's', 'a' }, M, StringSplitOptions.TrimEntries, new[] { "thi", "", "i", "", "", "", "", "tring", "with", "ome", "p", "ce", "" })]
[InlineData("this, is, a, string, with some spaces", new[] { ',', 's', 'a' }, M, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries, new[] { "thi", "i", "tring", "with", "ome", "p", "ce" })]
[InlineData("this, is, a, very long string, with some spaces, commas and more spaces", new[] { ',', 's' }, M, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries, new[] { "thi", "i", "a", "very long", "tring", "with", "ome", "pace", "comma", "and more", "pace" })]
public static void SplitCharArraySeparator(string value, char[] separators, int count, StringSplitOptions options, string[] expected)
{
Assert.Equal(expected, value.Split(separators, count, options));
Expand Down Expand Up @@ -561,6 +562,7 @@ public static void SplitCharArraySeparator(string value, char[] separators, int
[InlineData("this, is, a, string, with some spaces, ", new[] { ",", " s" }, M, StringSplitOptions.RemoveEmptyEntries, new[] { "this", " is", " a", "tring", " with", "ome", "paces", " " })]
[InlineData("this, is, a, string, with some spaces, ", new[] { ",", " s" }, M, StringSplitOptions.TrimEntries, new[] { "this", "is", "a", "", "tring", "with", "ome", "paces", "" })]
[InlineData("this, is, a, string, with some spaces, ", new[] { ",", " s" }, M, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries, new[] { "this", "is", "a", "tring", "with", "ome", "paces" })]
[InlineData("this, is, a, very long string, with some spaces, commas and more spaces", new[] { ",", " s" }, M, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries, new[] { "this", "is", "a", "very long", "tring", "with", "ome", "paces", "commas and more", "paces" })]
public static void SplitStringArraySeparator(string value, string[] separators, int count, StringSplitOptions options, string[] expected)
{
Assert.Equal(expected, value.Split(separators, count, options));
Expand Down

0 comments on commit b4e258a

Please sign in to comment.