From bc470fd41b86535e8fcdfff2db2328c3458077c6 Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Sat, 15 Apr 2023 08:36:56 +0200 Subject: [PATCH 1/6] Add dedicated Ascii.IsValid path --- .../src/System/Text/Ascii.CaseConversion.cs | 28 +-- .../src/System/Text/Ascii.Utility.cs | 57 +++++ .../src/System/Text/Ascii.cs | 225 +++++++++++++++--- 3 files changed, 252 insertions(+), 58 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.CaseConversion.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.CaseConversion.cs index 9fa47f66fbde..c226161ec574 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.CaseConversion.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.CaseConversion.cs @@ -238,7 +238,7 @@ public static OperationStatus ToUpperInPlace(Span value, out int charsWrit // Unaligned read and check for non-ASCII data. Vector128 srcVector = Vector128.LoadUnsafe(ref *pSrc); - if (VectorContainsAnyNonAsciiData(srcVector)) + if (VectorContainsNonAsciiChar(srcVector)) { goto Drain64; } @@ -291,7 +291,7 @@ public static OperationStatus ToUpperInPlace(Span value, out int charsWrit // Unaligned read & check for non-ASCII data. srcVector = Vector128.LoadUnsafe(ref *pSrc, i); - if (VectorContainsAnyNonAsciiData(srcVector)) + if (VectorContainsNonAsciiChar(srcVector)) { goto Drain64; } @@ -463,30 +463,6 @@ public static OperationStatus ToUpperInPlace(Span value, out int charsWrit return i; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe bool VectorContainsAnyNonAsciiData(Vector128 vector) - where T : unmanaged - { - if (sizeof(T) == 1) - { - if (vector.ExtractMostSignificantBits() != 0) { return true; } - } - else if (sizeof(T) == 2) - { - if (VectorContainsNonAsciiChar(vector.AsUInt16())) - { - return true; - } - } - else - { - Debug.Fail("Unknown types provided."); - throw new NotSupportedException(); - } - - return false; - } - [MethodImpl(MethodImplOptions.AggressiveInlining)] private static unsafe void Widen8To16AndAndWriteTo(Vector128 narrowVector, char* pDest, nuint destOffset) { diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs index e8413c3ffe3a..35561afe79e4 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs @@ -41,6 +41,17 @@ private static bool AllCharsInUInt64AreAscii(ulong value) return (value & ~0x007F007F_007F007Ful) == 0; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool AllCharsInUInt64AreAscii(ulong value) + where T : unmanaged + { + Debug.Assert(typeof(T) == typeof(byte) || typeof(T) == typeof(ushort)); + + return typeof(T) == typeof(byte) + ? AllBytesInUInt64AreAscii(value) + : AllCharsInUInt64AreAscii(value); + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int GetIndexOfFirstNonAsciiByteInLane_AdvSimd(Vector128 value, Vector128 bitmask) { @@ -1432,6 +1443,52 @@ private static bool VectorContainsNonAsciiChar(Vector128 utf16Vector) } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool VectorContainsNonAsciiChar(Vector128 vector) + where T : unmanaged + { + Debug.Assert(typeof(T) == typeof(byte) || typeof(T) == typeof(ushort)); + + return typeof(T) == typeof(byte) + ? VectorContainsNonAsciiChar(vector.AsByte()) + : VectorContainsNonAsciiChar(vector.AsUInt16()); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool AllCharsInVectorAreAscii(Vector128 vector) + where T : unmanaged + { + Debug.Assert(Avx.IsSupported); + Debug.Assert(typeof(T) == typeof(byte) || typeof(T) == typeof(ushort)); + + if (typeof(T) == typeof(byte)) + { + return + Sse41.IsSupported ? Sse41.TestZ(vector.AsByte(), Vector128.Create((byte)0x80)) : + AdvSimd.Arm64.IsSupported ? AllBytesInUInt64AreAscii(AdvSimd.Arm64.MaxPairwise(vector.AsByte(), vector.AsByte()).AsUInt64().ToScalar()) : + vector.AsByte().ExtractMostSignificantBits() == 0; + } + else + { + return + Sse41.IsSupported ? Sse41.TestZ(vector.AsInt16(), Vector128.Create((short)-128)) : + AdvSimd.Arm64.IsSupported ? AllCharsInUInt64AreAscii(AdvSimd.Arm64.MaxPairwise(vector.AsUInt16(), vector.AsUInt16()).AsUInt64().ToScalar()) : + (vector.AsUInt16() & Vector128.Create((ushort)(ushort.MaxValue - 127))) == Vector128.Zero; + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool AllCharsInVectorAreAscii(Vector256 vector) + where T : unmanaged + { + Debug.Assert(Avx.IsSupported); + Debug.Assert(typeof(T) == typeof(byte) || typeof(T) == typeof(ushort)); + + return typeof(T) == typeof(byte) + ? Avx.TestZ(vector.AsByte(), Vector256.Create((byte)0x80)) + : Avx.TestZ(vector.AsInt16(), Vector256.Create((short)-128)); + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector128 ExtractAsciiVector(Vector128 vectorFirst, Vector128 vectorSecond) { diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs index 8934bd91ada5..6101af8456b0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs @@ -2,7 +2,9 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; +using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; namespace System.Text { @@ -14,21 +16,9 @@ public static partial class Ascii /// The value to inspect. /// True if contains only ASCII bytes or is /// empty; False otherwise. - public static unsafe bool IsValid(ReadOnlySpan value) - { - if (value.IsEmpty) - { - return true; - } - - nuint bufferLength = (uint)value.Length; - fixed (byte* pBuffer = &MemoryMarshal.GetReference(value)) - { - nuint idxOfFirstNonAsciiElement = GetIndexOfFirstNonAsciiByte(pBuffer, bufferLength); - Debug.Assert(idxOfFirstNonAsciiElement <= bufferLength); - return idxOfFirstNonAsciiElement == bufferLength; - } - } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool IsValid(ReadOnlySpan value) => + IsValidCore(ref MemoryMarshal.GetReference(value), value.Length); /// /// Determines whether the provided value contains only ASCII chars. @@ -36,34 +26,205 @@ public static unsafe bool IsValid(ReadOnlySpan value) /// The value to inspect. /// True if contains only ASCII chars or is /// empty; False otherwise. - public static unsafe bool IsValid(ReadOnlySpan value) - { - if (value.IsEmpty) - { - return true; - } - - nuint bufferLength = (uint)value.Length; - fixed (char* pBuffer = &MemoryMarshal.GetReference(value)) - { - nuint idxOfFirstNonAsciiElement = GetIndexOfFirstNonAsciiChar(pBuffer, bufferLength); - Debug.Assert(idxOfFirstNonAsciiElement <= bufferLength); - return idxOfFirstNonAsciiElement == bufferLength; - } - } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool IsValid(ReadOnlySpan value) => + IsValidCore(ref Unsafe.As(ref MemoryMarshal.GetReference(value)), value.Length); /// /// Determines whether the provided value is ASCII byte. /// /// The value to inspect. /// True if is ASCII, False otherwise. - public static unsafe bool IsValid(byte value) => value <= 127; + public static bool IsValid(byte value) => value <= 127; /// /// Determines whether the provided value is ASCII char. /// /// The value to inspect. /// True if is ASCII, False otherwise. - public static unsafe bool IsValid(char value) => value <= 127; + public static bool IsValid(char value) => value <= 127; + + private static unsafe bool IsValidCore(ref T searchSpace, int length) where T : unmanaged + { + Debug.Assert(typeof(T) == typeof(byte) || typeof(T) == typeof(ushort)); + + ref T searchSpaceEnd = ref Unsafe.Add(ref searchSpace, length); + + if (!Vector128.IsHardwareAccelerated || length < Vector128.Count) + { + int elementsPerUlong = sizeof(ulong) / sizeof(T); + + if (length < elementsPerUlong) + { + if (typeof(T) == typeof(byte) && length >= sizeof(uint)) + { + // Process byte inputs with lengths [4, 7] + return AllBytesInUInt32AreAscii( + Unsafe.ReadUnaligned(ref Unsafe.As(ref searchSpace)) | + Unsafe.ReadUnaligned(ref Unsafe.As(ref Unsafe.Subtract(ref searchSpaceEnd, sizeof(uint))))); + } + + // Process inputs with lengths [0, 3] + while (Unsafe.IsAddressLessThan(ref searchSpace, ref searchSpaceEnd)) + { + if (typeof(T) == typeof(byte) + ? (Unsafe.BitCast(searchSpace) > 127) + : (Unsafe.BitCast(searchSpace) > 127)) + { + return false; + } + + searchSpace = ref Unsafe.Add(ref searchSpace, 1); + } + + return true; + } + + // If vectorization isn't supported, process 16 bytes at a time. + if (!Vector128.IsHardwareAccelerated && length > 2 * elementsPerUlong) + { + ref T finalStart = ref Unsafe.Subtract(ref searchSpaceEnd, 2 * elementsPerUlong); + + do + { + if (!AllCharsInUInt64AreAscii( + Unsafe.ReadUnaligned(ref Unsafe.As(ref searchSpace)) | + Unsafe.ReadUnaligned(ref Unsafe.Add(ref Unsafe.As(ref searchSpace), sizeof(ulong))))) + { + return false; + } + + searchSpace = ref Unsafe.Add(ref searchSpace, 2 * elementsPerUlong); + } + while (Unsafe.IsAddressLessThan(ref searchSpace, ref finalStart)); + + searchSpace = ref finalStart; + } + + // Process the last [8, 16] bytes. + return AllCharsInUInt64AreAscii( + Unsafe.ReadUnaligned(ref Unsafe.As(ref searchSpace)) | + Unsafe.ReadUnaligned(ref Unsafe.Subtract(ref Unsafe.As(ref searchSpaceEnd), sizeof(ulong)))); + } + + // Process inputs with lengths [16, 32] bytes. + if (length <= 2 * Vector128.Count) + { + return AllCharsInVectorAreAscii( + Vector128.LoadUnsafe(ref searchSpace) | + Vector128.LoadUnsafe(ref Unsafe.Subtract(ref searchSpaceEnd, (nuint)Vector128.Count))); + } + + if (Vector256.IsHardwareAccelerated) + { + // Process inputs with lengths [33, 64] bytes. + if (length <= 2 * Vector256.Count) + { + return AllCharsInVectorAreAscii( + Vector256.LoadUnsafe(ref searchSpace) | + Vector256.LoadUnsafe(ref Unsafe.Subtract(ref searchSpaceEnd, (nuint)Vector256.Count))); + } + + // Process long inputs 128 bytes at a time. + if (length > 4 * Vector256.Count) + { + // Process the first 128 bytes. + if (!AllCharsInVectorAreAscii( + Vector256.LoadUnsafe(ref searchSpace) | + Vector256.LoadUnsafe(ref searchSpace, (nuint)Vector256.Count) | + Vector256.LoadUnsafe(ref searchSpace, 2 * (nuint)Vector256.Count) | + Vector256.LoadUnsafe(ref searchSpace, 3 * (nuint)Vector256.Count))) + { + return false; + } + + searchSpace = ref Unsafe.Add(ref searchSpace, 4 * Vector256.Count); + + // Try to opportunistically align the reads below. The input isn't pinned, so the GC + // is free to move the references. We're therefore assuming that reads may still be unaligned. + // They may also be unaligned if the input chars aren't 2-byte aligned. + nuint misalignedElements = ((nuint)Unsafe.AsPointer(ref searchSpace) & (nuint)(Vector256.Count - 1)) / (nuint)sizeof(T); + searchSpace = ref Unsafe.Subtract(ref searchSpace, misalignedElements); + + ref T finalStart = ref Unsafe.Subtract(ref searchSpaceEnd, 4 * Vector256.Count); + + while (Unsafe.IsAddressLessThan(ref searchSpace, ref finalStart)) + { + if (!AllCharsInVectorAreAscii( + Vector256.LoadUnsafe(ref searchSpace) | + Vector256.LoadUnsafe(ref searchSpace, (nuint)Vector256.Count) | + Vector256.LoadUnsafe(ref searchSpace, 2 * (nuint)Vector256.Count) | + Vector256.LoadUnsafe(ref searchSpace, 3 * (nuint)Vector256.Count))) + { + return false; + } + + searchSpace = ref Unsafe.Add(ref searchSpace, 4 * Vector256.Count); + } + + searchSpace = ref finalStart; + } + + // Process the last [1, 128] bytes. + // The search space has at least 2 * Vector256 bytes available to read. + // We process the first 2 and last 2 vectors, which may overlap. + return AllCharsInVectorAreAscii( + Vector256.LoadUnsafe(ref searchSpace) | + Vector256.LoadUnsafe(ref searchSpace, (nuint)Vector256.Count) | + Vector256.LoadUnsafe(ref Unsafe.Subtract(ref searchSpaceEnd, 2 * (nuint)Vector256.Count)) | + Vector256.LoadUnsafe(ref Unsafe.Subtract(ref searchSpaceEnd, (nuint)Vector256.Count))); + } + else + { + // Process long inputs 64 bytes at a time. + if (length > 4 * Vector128.Count) + { + // Process the first 64 bytes. + if (!AllCharsInVectorAreAscii( + Vector128.LoadUnsafe(ref searchSpace) | + Vector128.LoadUnsafe(ref searchSpace, (nuint)Vector128.Count) | + Vector128.LoadUnsafe(ref searchSpace, 2 * (nuint)Vector128.Count) | + Vector128.LoadUnsafe(ref searchSpace, 3 * (nuint)Vector128.Count))) + { + return false; + } + + searchSpace = ref Unsafe.Add(ref searchSpace, 4 * Vector128.Count); + + // Try to opportunistically align the reads below. The input isn't pinned, so the GC + // is free to move the references. We're therefore assuming that reads may still be unaligned. + // They may also be unaligned if the input chars aren't 2-byte aligned. + nuint misalignedElements = ((nuint)Unsafe.AsPointer(ref searchSpace) & (nuint)(Vector128.Count - 1)) / (nuint)sizeof(T); + searchSpace = ref Unsafe.Subtract(ref searchSpace, misalignedElements); + + ref T finalStart = ref Unsafe.Subtract(ref searchSpaceEnd, 4 * Vector128.Count); + + while (Unsafe.IsAddressLessThan(ref searchSpace, ref finalStart)) + { + if (!AllCharsInVectorAreAscii( + Vector128.LoadUnsafe(ref searchSpace) | + Vector128.LoadUnsafe(ref searchSpace, (nuint)Vector128.Count) | + Vector128.LoadUnsafe(ref searchSpace, 2 * (nuint)Vector128.Count) | + Vector128.LoadUnsafe(ref searchSpace, 3 * (nuint)Vector128.Count))) + { + return false; + } + + searchSpace = ref Unsafe.Add(ref searchSpace, 4 * Vector128.Count); + } + + searchSpace = ref finalStart; + } + + // Process the last [1, 64] bytes. + // The search space has at least 2 * Vector128 bytes available to read. + // We process the first 2 and last 2 vectors, which may overlap. + return AllCharsInVectorAreAscii( + Vector128.LoadUnsafe(ref searchSpace) | + Vector128.LoadUnsafe(ref searchSpace, (nuint)Vector128.Count) | + Vector128.LoadUnsafe(ref Unsafe.Subtract(ref searchSpaceEnd, 2 * (nuint)Vector128.Count)) | + Vector128.LoadUnsafe(ref Unsafe.Subtract(ref searchSpaceEnd, (nuint)Vector128.Count))); + } + } } } From 9a97e6386da6b35ff5e3a3808f23ea7e877d700e Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Sat, 15 Apr 2023 09:38:30 +0200 Subject: [PATCH 2/6] Remove wrong assert --- .../System.Private.CoreLib/src/System/Text/Ascii.Utility.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs index 35561afe79e4..4acf6e82baa6 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs @@ -1458,9 +1458,9 @@ private static bool VectorContainsNonAsciiChar(Vector128 vector) private static bool AllCharsInVectorAreAscii(Vector128 vector) where T : unmanaged { - Debug.Assert(Avx.IsSupported); Debug.Assert(typeof(T) == typeof(byte) || typeof(T) == typeof(ushort)); + // This is a copy of VectorContainsNonAsciiChar with an inverted condition. if (typeof(T) == typeof(byte)) { return From 5570632613e5e37c727b60e67523beee5e746490 Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Wed, 19 Apr 2023 17:00:27 +0200 Subject: [PATCH 3/6] Use for loop instead of chasing end byref --- .../src/System/Text/Ascii.cs | 91 +++++++++---------- 1 file changed, 45 insertions(+), 46 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs index 6101af8456b0..0e8871426854 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs @@ -48,11 +48,9 @@ public static partial class Ascii { Debug.Assert(typeof(T) == typeof(byte) || typeof(T) == typeof(ushort)); - ref T searchSpaceEnd = ref Unsafe.Add(ref searchSpace, length); - if (!Vector128.IsHardwareAccelerated || length < Vector128.Count) { - int elementsPerUlong = sizeof(ulong) / sizeof(T); + uint elementsPerUlong = (uint)(sizeof(ulong) / sizeof(T)); if (length < elementsPerUlong) { @@ -61,58 +59,57 @@ public static partial class Ascii // Process byte inputs with lengths [4, 7] return AllBytesInUInt32AreAscii( Unsafe.ReadUnaligned(ref Unsafe.As(ref searchSpace)) | - Unsafe.ReadUnaligned(ref Unsafe.As(ref Unsafe.Subtract(ref searchSpaceEnd, sizeof(uint))))); + Unsafe.ReadUnaligned(ref Unsafe.As(ref Unsafe.Add(ref searchSpace, length - sizeof(uint))))); } // Process inputs with lengths [0, 3] - while (Unsafe.IsAddressLessThan(ref searchSpace, ref searchSpaceEnd)) + for (int j = 0; j < length; j++) { if (typeof(T) == typeof(byte) - ? (Unsafe.BitCast(searchSpace) > 127) - : (Unsafe.BitCast(searchSpace) > 127)) + ? (Unsafe.BitCast(Unsafe.Add(ref searchSpace, j)) > 127) + : (Unsafe.BitCast(Unsafe.Add(ref searchSpace, j)) > 127)) { return false; } - - searchSpace = ref Unsafe.Add(ref searchSpace, 1); } return true; } + nuint i = 0; + // If vectorization isn't supported, process 16 bytes at a time. if (!Vector128.IsHardwareAccelerated && length > 2 * elementsPerUlong) { - ref T finalStart = ref Unsafe.Subtract(ref searchSpaceEnd, 2 * elementsPerUlong); + nuint finalStart = (nuint)length - 2 * elementsPerUlong; - do + for (; i < finalStart; i += 2 * elementsPerUlong) { if (!AllCharsInUInt64AreAscii( - Unsafe.ReadUnaligned(ref Unsafe.As(ref searchSpace)) | - Unsafe.ReadUnaligned(ref Unsafe.Add(ref Unsafe.As(ref searchSpace), sizeof(ulong))))) + Unsafe.ReadUnaligned(ref Unsafe.As(ref Unsafe.Add(ref searchSpace, i))) | + Unsafe.ReadUnaligned(ref Unsafe.As(ref Unsafe.Add(ref searchSpace, i + elementsPerUlong))))) { return false; } - - searchSpace = ref Unsafe.Add(ref searchSpace, 2 * elementsPerUlong); } - while (Unsafe.IsAddressLessThan(ref searchSpace, ref finalStart)); - searchSpace = ref finalStart; + i = finalStart; } // Process the last [8, 16] bytes. return AllCharsInUInt64AreAscii( - Unsafe.ReadUnaligned(ref Unsafe.As(ref searchSpace)) | - Unsafe.ReadUnaligned(ref Unsafe.Subtract(ref Unsafe.As(ref searchSpaceEnd), sizeof(ulong)))); + Unsafe.ReadUnaligned(ref Unsafe.As(ref Unsafe.Add(ref searchSpace, i))) | + Unsafe.ReadUnaligned(ref Unsafe.Subtract(ref Unsafe.As(ref Unsafe.Add(ref searchSpace, length)), sizeof(ulong)))); } + ref T searchSpaceEnd = ref Unsafe.Add(ref searchSpace, length); + // Process inputs with lengths [16, 32] bytes. if (length <= 2 * Vector128.Count) { return AllCharsInVectorAreAscii( Vector128.LoadUnsafe(ref searchSpace) | - Vector128.LoadUnsafe(ref Unsafe.Subtract(ref searchSpaceEnd, (nuint)Vector128.Count))); + Vector128.LoadUnsafe(ref Unsafe.Subtract(ref searchSpaceEnd, Vector128.Count))); } if (Vector256.IsHardwareAccelerated) @@ -122,7 +119,7 @@ public static partial class Ascii { return AllCharsInVectorAreAscii( Vector256.LoadUnsafe(ref searchSpace) | - Vector256.LoadUnsafe(ref Unsafe.Subtract(ref searchSpaceEnd, (nuint)Vector256.Count))); + Vector256.LoadUnsafe(ref Unsafe.Subtract(ref searchSpaceEnd, Vector256.Count))); } // Process long inputs 128 bytes at a time. @@ -138,31 +135,31 @@ public static partial class Ascii return false; } - searchSpace = ref Unsafe.Add(ref searchSpace, 4 * Vector256.Count); + nuint i = 4 * (nuint)Vector256.Count; // Try to opportunistically align the reads below. The input isn't pinned, so the GC // is free to move the references. We're therefore assuming that reads may still be unaligned. // They may also be unaligned if the input chars aren't 2-byte aligned. nuint misalignedElements = ((nuint)Unsafe.AsPointer(ref searchSpace) & (nuint)(Vector256.Count - 1)) / (nuint)sizeof(T); - searchSpace = ref Unsafe.Subtract(ref searchSpace, misalignedElements); + i -= misalignedElements; - ref T finalStart = ref Unsafe.Subtract(ref searchSpaceEnd, 4 * Vector256.Count); + nuint finalStart = (nuint)length - 4 * (nuint)Vector256.Count; - while (Unsafe.IsAddressLessThan(ref searchSpace, ref finalStart)) + for (; i < finalStart; i += 4 * (nuint)Vector256.Count) { + ref T current = ref Unsafe.Add(ref searchSpace, i); + if (!AllCharsInVectorAreAscii( - Vector256.LoadUnsafe(ref searchSpace) | - Vector256.LoadUnsafe(ref searchSpace, (nuint)Vector256.Count) | - Vector256.LoadUnsafe(ref searchSpace, 2 * (nuint)Vector256.Count) | - Vector256.LoadUnsafe(ref searchSpace, 3 * (nuint)Vector256.Count))) + Vector256.LoadUnsafe(ref current) | + Vector256.LoadUnsafe(ref current, (nuint)Vector256.Count) | + Vector256.LoadUnsafe(ref current, 2 * (nuint)Vector256.Count) | + Vector256.LoadUnsafe(ref current, 3 * (nuint)Vector256.Count))) { return false; } - - searchSpace = ref Unsafe.Add(ref searchSpace, 4 * Vector256.Count); } - searchSpace = ref finalStart; + searchSpace = ref Unsafe.Add(ref searchSpace, finalStart); } // Process the last [1, 128] bytes. @@ -171,8 +168,8 @@ public static partial class Ascii return AllCharsInVectorAreAscii( Vector256.LoadUnsafe(ref searchSpace) | Vector256.LoadUnsafe(ref searchSpace, (nuint)Vector256.Count) | - Vector256.LoadUnsafe(ref Unsafe.Subtract(ref searchSpaceEnd, 2 * (nuint)Vector256.Count)) | - Vector256.LoadUnsafe(ref Unsafe.Subtract(ref searchSpaceEnd, (nuint)Vector256.Count))); + Vector256.LoadUnsafe(ref Unsafe.Subtract(ref searchSpaceEnd, 2 * Vector256.Count)) | + Vector256.LoadUnsafe(ref Unsafe.Subtract(ref searchSpaceEnd, Vector256.Count))); } else { @@ -189,31 +186,33 @@ public static partial class Ascii return false; } + nuint i = 4 * (nuint)Vector128.Count; + searchSpace = ref Unsafe.Add(ref searchSpace, 4 * Vector128.Count); // Try to opportunistically align the reads below. The input isn't pinned, so the GC // is free to move the references. We're therefore assuming that reads may still be unaligned. // They may also be unaligned if the input chars aren't 2-byte aligned. nuint misalignedElements = ((nuint)Unsafe.AsPointer(ref searchSpace) & (nuint)(Vector128.Count - 1)) / (nuint)sizeof(T); - searchSpace = ref Unsafe.Subtract(ref searchSpace, misalignedElements); + i -= misalignedElements; - ref T finalStart = ref Unsafe.Subtract(ref searchSpaceEnd, 4 * Vector128.Count); + nuint finalStart = (nuint)length - 4 * (nuint)Vector128.Count; - while (Unsafe.IsAddressLessThan(ref searchSpace, ref finalStart)) + for (; i < finalStart; i += 4 * (nuint)Vector128.Count) { + ref T current = ref Unsafe.Add(ref searchSpace, i); + if (!AllCharsInVectorAreAscii( - Vector128.LoadUnsafe(ref searchSpace) | - Vector128.LoadUnsafe(ref searchSpace, (nuint)Vector128.Count) | - Vector128.LoadUnsafe(ref searchSpace, 2 * (nuint)Vector128.Count) | - Vector128.LoadUnsafe(ref searchSpace, 3 * (nuint)Vector128.Count))) + Vector128.LoadUnsafe(ref current) | + Vector128.LoadUnsafe(ref current, (nuint)Vector128.Count) | + Vector128.LoadUnsafe(ref current, 2 * (nuint)Vector128.Count) | + Vector128.LoadUnsafe(ref current, 3 * (nuint)Vector128.Count))) { return false; } - - searchSpace = ref Unsafe.Add(ref searchSpace, 4 * Vector128.Count); } - searchSpace = ref finalStart; + searchSpace = ref Unsafe.Add(ref searchSpace, finalStart); } // Process the last [1, 64] bytes. @@ -222,8 +221,8 @@ public static partial class Ascii return AllCharsInVectorAreAscii( Vector128.LoadUnsafe(ref searchSpace) | Vector128.LoadUnsafe(ref searchSpace, (nuint)Vector128.Count) | - Vector128.LoadUnsafe(ref Unsafe.Subtract(ref searchSpaceEnd, 2 * (nuint)Vector128.Count)) | - Vector128.LoadUnsafe(ref Unsafe.Subtract(ref searchSpaceEnd, (nuint)Vector128.Count))); + Vector128.LoadUnsafe(ref Unsafe.Subtract(ref searchSpaceEnd, 2 * Vector128.Count)) | + Vector128.LoadUnsafe(ref Unsafe.Subtract(ref searchSpaceEnd, Vector128.Count))); } } } From d3f100b3f7f344decd4e593e473a2c6e12c76f0d Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Wed, 19 Apr 2023 17:42:22 +0200 Subject: [PATCH 4/6] Add an extra assert --- src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs index 0e8871426854..8dfb35ed13c7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs @@ -142,6 +142,7 @@ public static partial class Ascii // They may also be unaligned if the input chars aren't 2-byte aligned. nuint misalignedElements = ((nuint)Unsafe.AsPointer(ref searchSpace) & (nuint)(Vector256.Count - 1)) / (nuint)sizeof(T); i -= misalignedElements; + Debug.Assert((int)i > 3 * Vector256.Count); nuint finalStart = (nuint)length - 4 * (nuint)Vector256.Count; @@ -195,6 +196,7 @@ public static partial class Ascii // They may also be unaligned if the input chars aren't 2-byte aligned. nuint misalignedElements = ((nuint)Unsafe.AsPointer(ref searchSpace) & (nuint)(Vector128.Count - 1)) / (nuint)sizeof(T); i -= misalignedElements; + Debug.Assert((int)i > 3 * Vector128.Count); nuint finalStart = (nuint)length - 4 * (nuint)Vector128.Count; From fbc19038c832518901af64abe14454d882345e00 Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Wed, 19 Apr 2023 19:36:39 +0200 Subject: [PATCH 5/6] Use nuint in loop for [0, 3] elements --- src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs index 8dfb35ed13c7..bb762c73aec1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs @@ -63,7 +63,7 @@ public static partial class Ascii } // Process inputs with lengths [0, 3] - for (int j = 0; j < length; j++) + for (nuint j = 0; j < (uint)length; j++) { if (typeof(T) == typeof(byte) ? (Unsafe.BitCast(Unsafe.Add(ref searchSpace, j)) > 127) From 59211c215e7dc647b5fdf95c691d4764158f078a Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Wed, 19 Apr 2023 19:59:00 +0200 Subject: [PATCH 6/6] Remove leftover byref increment --- src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs index bb762c73aec1..0b9af8cf0c6a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs @@ -189,8 +189,6 @@ public static partial class Ascii nuint i = 4 * (nuint)Vector128.Count; - searchSpace = ref Unsafe.Add(ref searchSpace, 4 * Vector128.Count); - // Try to opportunistically align the reads below. The input isn't pinned, so the GC // is free to move the references. We're therefore assuming that reads may still be unaligned. // They may also be unaligned if the input chars aren't 2-byte aligned.