Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vectorize IndexOf for OrdinalIgnoreCase #67758

Merged
merged 12 commits into from
Apr 9, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
using System.Diagnostics;
using System.Text.Unicode;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;

namespace System.Globalization
{
Expand Down Expand Up @@ -223,7 +225,7 @@ internal static int IndexOfOrdinalIgnoreCase(ReadOnlySpan<char> source, ReadOnly
// target strings can never be found inside small search spaces. This check also
// handles empty 'source' spans.

return -1;
goto NOT_FOUND;
}

if (GlobalizationMode.Invariant)
Expand All @@ -236,7 +238,51 @@ internal static int IndexOfOrdinalIgnoreCase(ReadOnlySpan<char> source, ReadOnly
return CompareInfo.NlsIndexOfOrdinalCore(source, value, ignoreCase: true, fromBeginning: true);
}

return OrdinalCasing.IndexOf(source, value);
// if value starts with an ASCII char we can use a vectorized path
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
ref char valueRef = ref MemoryMarshal.GetReference(value);
char valueChar = valueRef;

if (!char.IsAscii(valueChar))
{
// Fallback to a more non-ASCII friendly version
return OrdinalCasing.IndexOf(source, value);
}

ref char searchSpace = ref MemoryMarshal.GetReference(source);
int valueLength = value.Length;
int searchSpaceLength = source.Length;

do
{
// if val is either [a..z] or [A..Z] - search for its lower and upper counter parts using IndexOfAny
// otherwise use just plain IndexOf
int candidatePos = (uint)((valueChar | 0x20) - 'a') <= 'z' - 'a' ?
SpanHelpers.IndexOfAny(ref searchSpace, (char)(valueChar & ~0x20), (char)(valueChar | 0x20), searchSpaceLength) :
SpanHelpers.IndexOf(ref searchSpace, valueChar, searchSpaceLength);

if ((candidatePos == -1) || (searchSpaceLength - candidatePos) < valueLength)
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
{
// the whole input doesn't contain the first char or it does
// but there is no room left to fit the value
goto NOT_FOUND;
}

// Do ASCII and non-ASCII friendly compare for the current candidate
// Since we already know first chars match we ignore them and inspect valueLength - 1
if (EqualsIgnoreCase(
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
ref Unsafe.Add(ref searchSpace, (nuint)(candidatePos + 1)),
ref Unsafe.Add(ref valueRef, (nuint)1),
valueLength - 1))
{
return source.Length - searchSpaceLength + candidatePos;
}

searchSpace = Unsafe.Add(ref searchSpace, (nuint)(candidatePos + valueLength));
searchSpaceLength -= candidatePos + valueLength;
} while (searchSpaceLength >= valueLength);

NOT_FOUND:
return -1;
}

internal static int LastIndexOf(string source, string value, int startIndex, int count)
Expand Down