diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs index ee9f789a40fab..b0c1376611b9e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs @@ -6,6 +6,7 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; #if SYSTEM_PRIVATE_CORELIB @@ -882,7 +883,7 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt // is not enabled. Unsafe.SkipInit(out Vector128 nonAsciiUtf16DataMask); - if (Sse41.X64.IsSupported) + if (Sse41.X64.IsSupported || (AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian)) { nonAsciiUtf16DataMask = Vector128.Create(unchecked((short)0xFF80)); // mask of non-ASCII bits in a UTF-16 char } @@ -940,10 +941,8 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt uint inputCharsRemaining = (uint)(pFinalPosWhereCanReadDWordFromInputBuffer - pInputBuffer) + 2; uint minElementsRemaining = (uint)Math.Min(inputCharsRemaining, outputBytesRemaining); - if (Sse41.X64.IsSupported) + if (Sse41.X64.IsSupported || (AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian)) { - Debug.Assert(BitConverter.IsLittleEndian, "SSE41 requires little-endian."); - // Try reading and writing 8 elements per iteration. uint maxIters = minElementsRemaining / 8; ulong possibleNonAsciiQWord; @@ -952,14 +951,30 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt for (i = 0; (uint)i < maxIters; i++) { utf16Data = Unsafe.ReadUnaligned>(pInputBuffer); - if (!Sse41.TestZ(utf16Data, nonAsciiUtf16DataMask)) + + if (AdvSimd.IsSupported) { - goto LoopTerminatedDueToNonAsciiDataInVectorLocal; - } + Vector128 isUtf16DataNonAscii = AdvSimd.CompareTest(utf16Data, nonAsciiUtf16DataMask); + bool hasNonAsciiDataInVector = AdvSimd.Arm64.MinPairwise(isUtf16DataNonAscii, isUtf16DataNonAscii).AsUInt64().ToScalar() != 0; - // narrow and write + if (hasNonAsciiDataInVector) + { + goto LoopTerminatedDueToNonAsciiDataInVectorLocal; + } - Sse2.StoreScalar((ulong*)pOutputBuffer /* unaligned */, Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt64()); + Vector64 lower = AdvSimd.ExtractNarrowingSaturateUnsignedLower(utf16Data); + AdvSimd.Store(pOutputBuffer, lower); + } + else + { + if (!Sse41.TestZ(utf16Data, nonAsciiUtf16DataMask)) + { + goto LoopTerminatedDueToNonAsciiDataInVectorLocal; + } + + // narrow and write + Sse2.StoreScalar((ulong*)pOutputBuffer /* unaligned */, Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt64()); + } pInputBuffer += 8; pOutputBuffer += 8; @@ -978,7 +993,16 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt } utf16Data = Vector128.CreateScalarUnsafe(possibleNonAsciiQWord).AsInt16(); - Unsafe.WriteUnaligned(pOutputBuffer, Sse2.ConvertToUInt32(Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt32())); + + if (AdvSimd.IsSupported) + { + Vector64 lower = AdvSimd.ExtractNarrowingSaturateUnsignedLower(utf16Data); + AdvSimd.StoreSelectedScalar((uint*)pOutputBuffer, lower.AsUInt32(), 0); + } + else + { + Unsafe.WriteUnaligned(pOutputBuffer, Sse2.ConvertToUInt32(Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt32())); + } pInputBuffer += 4; pOutputBuffer += 4; @@ -990,7 +1014,15 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt LoopTerminatedDueToNonAsciiDataInVectorLocal: outputBytesRemaining -= 8 * i; - possibleNonAsciiQWord = Sse2.X64.ConvertToUInt64(utf16Data.AsUInt64()); + + if (Sse2.X64.IsSupported) + { + possibleNonAsciiQWord = Sse2.X64.ConvertToUInt64(utf16Data.AsUInt64()); + } + else + { + possibleNonAsciiQWord = utf16Data.AsUInt64().ToScalar(); + } // Temporarily set 'possibleNonAsciiQWord' to be the low 64 bits of the vector, // then check whether it's all-ASCII. If so, narrow and write to the destination @@ -1000,7 +1032,15 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt if (Utf16Utility.AllCharsInUInt64AreAscii(possibleNonAsciiQWord)) // all chars in first QWORD are ASCII { - Unsafe.WriteUnaligned(pOutputBuffer, Sse2.ConvertToUInt32(Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt32())); + if (AdvSimd.IsSupported) + { + Vector64 lower = AdvSimd.ExtractNarrowingSaturateUnsignedLower(utf16Data); + AdvSimd.StoreSelectedScalar((uint*)pOutputBuffer, lower.AsUInt32(), 0); + } + else + { + Unsafe.WriteUnaligned(pOutputBuffer, Sse2.ConvertToUInt32(Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt32())); + } pInputBuffer += 4; pOutputBuffer += 4; outputBytesRemaining -= 4; diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs b/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs index c4c68e966eba2..fa4602b33f429 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs @@ -8,6 +8,7 @@ internal static class Vector64 public static Vector64 Create(ulong value) => throw new PlatformNotSupportedException(); public static Vector64 CreateScalar(uint value) => throw new PlatformNotSupportedException(); public static Vector64 AsByte(this Vector64 vector) where T : struct => throw new PlatformNotSupportedException(); + public static Vector64 AsUInt32(this Vector64 vector) where T : struct => throw new PlatformNotSupportedException(); } internal readonly struct Vector64 where T : struct