Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce ProbabilisticWithAsciiCharSearchValues overhead on non-ASCII texts #89224

Merged
merged 1 commit into from Jul 20, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -40,7 +40,9 @@ internal override int IndexOfAny(ReadOnlySpan<char> span)
{
int offset = 0;

if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count)
// We check whether the first character is ASCII before calling into IndexOfAnyAsciiSearcher
// in order to minimize the overhead this fast-path has on non-ASCII texts.
if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count && char.IsAscii(span[0]))
{
// We are using IndexOfAnyAsciiSearcher to search for the first ASCII character in the set, or any non-ASCII character.
// We do this by inverting the bitmap and using the opposite search function (Negate instead of DontNegate).
Expand Down Expand Up @@ -100,7 +102,9 @@ internal override int IndexOfAnyExcept(ReadOnlySpan<char> span)
{
int offset = 0;

if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count)
// We check whether the first character is ASCII before calling into IndexOfAnyAsciiSearcher
// in order to minimize the overhead this fast-path has on non-ASCII texts.
if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count && char.IsAscii(span[0]))
{
// Do a regular IndexOfAnyExcept for the ASCII characters. The search will stop if we encounter a non-ASCII char.
offset = IndexOfAnyAsciiSearcher.IndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, TOptimizations>(
Expand Down Expand Up @@ -134,7 +138,9 @@ internal override int IndexOfAnyExcept(ReadOnlySpan<char> span)

internal override int LastIndexOfAny(ReadOnlySpan<char> span)
{
if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count)
// We check whether the last character is ASCII before calling into IndexOfAnyAsciiSearcher
// in order to minimize the overhead this fast-path has on non-ASCII texts.
if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count && char.IsAscii(span[^1]))
stephentoub marked this conversation as resolved.
Show resolved Hide resolved
{
// We are using IndexOfAnyAsciiSearcher to search for the last ASCII character in the set, or any non-ASCII character.
// We do this by inverting the bitmap and using the opposite search function (Negate instead of DontNegate).
Expand Down Expand Up @@ -186,7 +192,9 @@ internal override int LastIndexOfAny(ReadOnlySpan<char> span)

internal override int LastIndexOfAnyExcept(ReadOnlySpan<char> span)
{
if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count)
// We check whether the last character is ASCII before calling into IndexOfAnyAsciiSearcher
// in order to minimize the overhead this fast-path has on non-ASCII texts.
if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count && char.IsAscii(span[^1]))
{
// Do a regular LastIndexOfAnyExcept for the ASCII characters. The search will stop if we encounter a non-ASCII char.
int offset = IndexOfAnyAsciiSearcher.LastIndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, TOptimizations>(
Expand Down