Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vectorize IndexOf for OrdinalIgnoreCase #67758

Merged
merged 12 commits into from
Apr 9, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using System.Diagnostics;
using System.Text.Unicode;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

namespace System.Globalization
{
Expand Down Expand Up @@ -236,7 +237,66 @@ internal static int IndexOfOrdinalIgnoreCase(ReadOnlySpan<char> source, ReadOnly
return CompareInfo.NlsIndexOfOrdinalCore(source, value, ignoreCase: true, fromBeginning: true);
}

return OrdinalCasing.IndexOf(source, value);
// if value starts with an ASCII char we can use a vectorized path
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
ref char valueRef = ref MemoryMarshal.GetReference(value);
char valueChar = valueRef;

if (!char.IsAscii(valueChar))
{
// Fallback to a more non-ASCII friendly version
return OrdinalCasing.IndexOf(source, value);
}

// hoist some expressions from the loop
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
int valueTailLength = value.Length - 1;
int searchSpaceLength = source.Length - valueTailLength;
ref char searchSpace = ref MemoryMarshal.GetReference(source);
char valueCharU = default;
char valueCharL = default;
nint offset = 0;
bool isLetter = false;

if ((uint)((valueChar | 0x20) - 'a') <= 'z' - 'a')
{
valueCharU = (char)(valueChar & ~0x20);
valueCharL = (char)(valueChar | 0x20);
isLetter = true;
}

do
{
int relativeIndex = isLetter ?
SpanHelpers.IndexOfAny(ref Unsafe.Add(ref searchSpace, offset), valueCharU, valueCharL, searchSpaceLength) :
SpanHelpers.IndexOf(ref Unsafe.Add(ref searchSpace, offset), valueChar, searchSpaceLength);

// Do a quick search for the first element of "value".
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
if (relativeIndex < 0)
{
break;
}

searchSpaceLength -= relativeIndex;
if (searchSpaceLength <= 0)
{
break;
}
offset += relativeIndex;
EgorBo marked this conversation as resolved.
Show resolved Hide resolved

// Found the first element of "value". See if the tail matches.
if (valueTailLength == 0 || // for single-char values we already matched first chars
EqualsIgnoreCase(
ref Unsafe.Add(ref searchSpace, (nuint)(offset + 1)),
ref Unsafe.Add(ref valueRef, 1), valueTailLength))
{
return (int)offset; // The tail matched. Return a successful find.
}

searchSpaceLength--;
offset++;
}
while (searchSpaceLength > 0);
stephentoub marked this conversation as resolved.
Show resolved Hide resolved

return -1;
}

internal static int LastIndexOf(string source, string value, int startIndex, int count)
Expand Down