Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vectorize IndexOf for OrdinalIgnoreCase #67758

Merged
merged 12 commits into from
Apr 9, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
using System.Diagnostics;
using System.Text.Unicode;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;

namespace System.Globalization
{
Expand Down Expand Up @@ -222,7 +224,6 @@ internal static int IndexOfOrdinalIgnoreCase(ReadOnlySpan<char> source, ReadOnly
// A non-linguistic search compares chars directly against one another, so large
// target strings can never be found inside small search spaces. This check also
// handles empty 'source' spans.

return -1;
}

Expand All @@ -236,7 +237,80 @@ internal static int IndexOfOrdinalIgnoreCase(ReadOnlySpan<char> source, ReadOnly
return CompareInfo.NlsIndexOfOrdinalCore(source, value, ignoreCase: true, fromBeginning: true);
}

return OrdinalCasing.IndexOf(source, value);
// if value starts with an ASCII char we can use a vectorized path
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
ref char valueRef = ref MemoryMarshal.GetReference(value);
char valueChar = valueRef;

if (!char.IsAscii(valueChar))
{
// Fallback to a more non-ASCII friendly version
return OrdinalCasing.IndexOf(source, value);
}


int valueTailLength = value.Length - 1;
ref char valueTail = ref Unsafe.Add(ref valueRef, 1);
ref char searchSpace = ref MemoryMarshal.GetReference(source);
int remainingSearchSpaceLength = source.Length - valueTailLength;

// hoist some expressions from the loop
char valueCharU = default;
char valueCharL = default;
bool isLetter = false;
nint offset = 0;

if ((uint)((valueChar | 0x20) - 'a') <= 'z' - 'a')
{
valueCharU = (char)(valueChar & ~0x20);
valueCharL = (char)(valueChar | 0x20);
isLetter = true;
}

while (remainingSearchSpaceLength > 0)
{
int relativeIndex;
if (isLetter)
{
relativeIndex =
SpanHelpers.IndexOfAny(
ref Unsafe.Add(ref searchSpace, offset),
valueCharU,
valueCharL,
remainingSearchSpaceLength);
}
else
{
relativeIndex =
SpanHelpers.IndexOf(
ref Unsafe.Add(ref searchSpace, offset),
valueChar,
remainingSearchSpaceLength);
}

// Do a quick search for the first element of "value".
if (relativeIndex < 0)
break;

remainingSearchSpaceLength -= relativeIndex;
offset += relativeIndex;
EgorBo marked this conversation as resolved.
Show resolved Hide resolved

if (remainingSearchSpaceLength <= 0)
break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there.

// Found the first element of "value". See if the tail matches.
if (EqualsIgnoreCase(
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
ref Unsafe.Add(ref searchSpace, (nuint)(offset + 1)),
ref valueTail,
valueTailLength))
{
return (int)offset; // The tail matched. Return a successful find.
}

remainingSearchSpaceLength--;
offset++;
}

return -1;
}

internal static int LastIndexOf(string source, string value, int startIndex, int count)
Expand Down