From 0a362d22390bafdada47ed2f47c53cb780d18b7b Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Thu, 20 Jul 2023 01:26:27 +0200 Subject: [PATCH] Reduce ProbabilisticWithAsciiCharSearchValues overhead on non-ASCII texts --- .../ProbabilisticWithAsciiCharSearchValues.cs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticWithAsciiCharSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticWithAsciiCharSearchValues.cs index 065f2cd5e893..8a8f09a0d327 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticWithAsciiCharSearchValues.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticWithAsciiCharSearchValues.cs @@ -40,7 +40,9 @@ internal override int IndexOfAny(ReadOnlySpan span) { int offset = 0; - if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128.Count) + // We check whether the first character is ASCII before calling into IndexOfAnyAsciiSearcher + // in order to minimize the overhead this fast-path has on non-ASCII texts. + if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128.Count && char.IsAscii(span[0])) { // We are using IndexOfAnyAsciiSearcher to search for the first ASCII character in the set, or any non-ASCII character. // We do this by inverting the bitmap and using the opposite search function (Negate instead of DontNegate). @@ -100,7 +102,9 @@ internal override int IndexOfAnyExcept(ReadOnlySpan span) { int offset = 0; - if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128.Count) + // We check whether the first character is ASCII before calling into IndexOfAnyAsciiSearcher + // in order to minimize the overhead this fast-path has on non-ASCII texts. + if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128.Count && char.IsAscii(span[0])) { // Do a regular IndexOfAnyExcept for the ASCII characters. The search will stop if we encounter a non-ASCII char. offset = IndexOfAnyAsciiSearcher.IndexOfAnyVectorized( @@ -134,7 +138,9 @@ internal override int IndexOfAnyExcept(ReadOnlySpan span) internal override int LastIndexOfAny(ReadOnlySpan span) { - if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128.Count) + // We check whether the last character is ASCII before calling into IndexOfAnyAsciiSearcher + // in order to minimize the overhead this fast-path has on non-ASCII texts. + if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128.Count && char.IsAscii(span[^1])) { // We are using IndexOfAnyAsciiSearcher to search for the last ASCII character in the set, or any non-ASCII character. // We do this by inverting the bitmap and using the opposite search function (Negate instead of DontNegate). @@ -186,7 +192,9 @@ internal override int LastIndexOfAny(ReadOnlySpan span) internal override int LastIndexOfAnyExcept(ReadOnlySpan span) { - if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128.Count) + // We check whether the last character is ASCII before calling into IndexOfAnyAsciiSearcher + // in order to minimize the overhead this fast-path has on non-ASCII texts. + if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128.Count && char.IsAscii(span[^1])) { // Do a regular LastIndexOfAnyExcept for the ASCII characters. The search will stop if we encounter a non-ASCII char. int offset = IndexOfAnyAsciiSearcher.LastIndexOfAnyVectorized(