Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit 0463e34

Browse files
eerhardtjkotas
authored andcommitted
Vectorize String.IndexOf(char) and String.LastIndexOf(char) (#16392)
* Vectorize String.IndexOf(char) using the same algorithm as SpanHelpers IndexOf(byte). * Respond to feedback. * Vectorize String.LastIndexOf Clean up IndexOf vectorization. Signed-off-by: dotnet-bot-corefx-mirror <dotnet-bot@microsoft.com>
1 parent 76b6432 commit 0463e34

File tree

1 file changed

+158
-6
lines changed

1 file changed

+158
-6
lines changed

src/Common/src/CoreLib/System/String.Searching.cs

Lines changed: 158 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
// See the LICENSE file in the project root for more information.
44

55
using System.Globalization;
6+
using System.Numerics;
7+
using System.Runtime.CompilerServices;
68
using System.Runtime.InteropServices;
9+
using Internal.Runtime.CompilerServices;
710

811
namespace System
912
{
@@ -63,24 +66,35 @@ public int IndexOf(char value, StringComparison comparisonType)
6366

6467
case StringComparison.OrdinalIgnoreCase:
6568
return CompareInfo.Invariant.IndexOf(this, value, CompareOptions.OrdinalIgnoreCase);
66-
69+
6770
default:
6871
throw new ArgumentException(SR.NotSupported_StringComparison, nameof(comparisonType));
6972
}
7073
}
71-
74+
7275
public unsafe int IndexOf(char value, int startIndex, int count)
7376
{
74-
if (startIndex < 0 || startIndex > Length)
77+
if ((uint)startIndex > (uint)Length)
7578
throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
7679

77-
if (count < 0 || count > Length - startIndex)
80+
if ((uint)count > (uint)(Length - startIndex))
7881
throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count);
7982

8083
fixed (char* pChars = &_firstChar)
8184
{
8285
char* pCh = pChars + startIndex;
86+
char* pEndCh = pCh + count;
8387

88+
if (Vector.IsHardwareAccelerated && count >= Vector<ushort>.Count * 2)
89+
{
90+
unchecked
91+
{
92+
const int elementsPerByte = sizeof(ushort) / sizeof(byte);
93+
int unaligned = ((int)pCh & (Vector<byte>.Count - 1)) / elementsPerByte;
94+
count = ((Vector<ushort>.Count - unaligned) & (Vector<ushort>.Count - 1));
95+
}
96+
}
97+
SequentialScan:
8498
while (count >= 4)
8599
{
86100
if (*pCh == value) goto ReturnIndex;
@@ -101,6 +115,34 @@ public unsafe int IndexOf(char value, int startIndex, int count)
101115
pCh++;
102116
}
103117

118+
if (pCh < pEndCh)
119+
{
120+
count = (int)((pEndCh - pCh) & ~(Vector<ushort>.Count - 1));
121+
// Get comparison Vector
122+
Vector<ushort> vComparison = new Vector<ushort>(value);
123+
while (count > 0)
124+
{
125+
var vMatches = Vector.Equals(vComparison, Unsafe.ReadUnaligned<Vector<ushort>>(pCh));
126+
if (Vector<ushort>.Zero.Equals(vMatches))
127+
{
128+
pCh += Vector<ushort>.Count;
129+
count -= Vector<ushort>.Count;
130+
continue;
131+
}
132+
// Find offset of first match
133+
return (int)(pCh - pChars) + LocateFirstFoundChar(vMatches);
134+
}
135+
136+
if (pCh < pEndCh)
137+
{
138+
unchecked
139+
{
140+
count = (int)(pEndCh - pCh);
141+
}
142+
goto SequentialScan;
143+
}
144+
}
145+
104146
return -1;
105147

106148
ReturnIndex3: pCh++;
@@ -111,6 +153,43 @@ public unsafe int IndexOf(char value, int startIndex, int count)
111153
}
112154
}
113155

156+
// Vector sub-search adapted from https://github.com/aspnet/KestrelHttpServer/pull/1138
157+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
158+
private static int LocateFirstFoundChar(Vector<ushort> match)
159+
{
160+
var vector64 = Vector.AsVectorUInt64(match);
161+
ulong candidate = 0;
162+
int i = 0;
163+
// Pattern unrolled by jit https://github.com/dotnet/coreclr/pull/8001
164+
for (; i < Vector<ulong>.Count; i++)
165+
{
166+
candidate = vector64[i];
167+
if (candidate != 0)
168+
{
169+
break;
170+
}
171+
}
172+
173+
// Single LEA instruction with jitted const (using function result)
174+
return i * 4 + LocateFirstFoundChar(candidate);
175+
}
176+
177+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
178+
private static int LocateFirstFoundChar(ulong match)
179+
{
180+
unchecked
181+
{
182+
// Flag least significant power of two bit
183+
var powerOfTwoFlag = match ^ (match - 1);
184+
// Shift all powers of two into the high byte and extract
185+
return (int)((powerOfTwoFlag * XorPowerOfTwoToHighChar) >> 49);
186+
}
187+
}
188+
189+
private const ulong XorPowerOfTwoToHighChar = (0x03ul |
190+
0x02ul << 16 |
191+
0x01ul << 32) + 1;
192+
114193
// Returns the index of the first occurrence of any specified character in the current instance.
115194
// The search starts at startIndex and runs to startIndex + count - 1.
116195
//
@@ -397,17 +476,27 @@ public unsafe int LastIndexOf(char value, int startIndex, int count)
397476
if (Length == 0)
398477
return -1;
399478

400-
if (startIndex < 0 || startIndex >= Length)
479+
if ((uint)startIndex >= (uint)Length)
401480
throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
402481

403-
if (count < 0 || count - 1 > startIndex)
482+
if ((uint)count > (uint)startIndex + 1)
404483
throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count);
405484

406485
fixed (char* pChars = &_firstChar)
407486
{
408487
char* pCh = pChars + startIndex;
488+
char* pEndCh = pCh - count;
409489

410490
//We search [startIndex..EndIndex]
491+
if (Vector.IsHardwareAccelerated && count >= Vector<ushort>.Count * 2)
492+
{
493+
unchecked
494+
{
495+
const int elementsPerByte = sizeof(ushort) / sizeof(byte);
496+
count = (((int)pCh & (Vector<byte>.Count - 1)) / elementsPerByte) + 1;
497+
}
498+
}
499+
SequentialScan:
411500
while (count >= 4)
412501
{
413502
if (*pCh == value) goto ReturnIndex;
@@ -428,6 +517,35 @@ public unsafe int LastIndexOf(char value, int startIndex, int count)
428517
pCh--;
429518
}
430519

520+
if (pCh > pEndCh)
521+
{
522+
count = (int)((pCh - pEndCh) & ~(Vector<ushort>.Count - 1));
523+
524+
// Get comparison Vector
525+
Vector<ushort> vComparison = new Vector<ushort>(value);
526+
while (count > 0)
527+
{
528+
char* pStart = pCh - Vector<ushort>.Count + 1;
529+
var vMatches = Vector.Equals(vComparison, Unsafe.ReadUnaligned<Vector<ushort>>(pStart));
530+
if (Vector<ushort>.Zero.Equals(vMatches))
531+
{
532+
pCh -= Vector<ushort>.Count;
533+
count -= Vector<ushort>.Count;
534+
continue;
535+
}
536+
// Find offset of last match
537+
return (int)(pStart - pChars) + LocateLastFoundChar(vMatches);
538+
}
539+
540+
if (pCh > pEndCh)
541+
{
542+
unchecked
543+
{
544+
count = (int)(pCh - pEndCh);
545+
}
546+
goto SequentialScan;
547+
}
548+
}
431549
return -1;
432550

433551
ReturnIndex3: pCh--;
@@ -438,6 +556,40 @@ public unsafe int LastIndexOf(char value, int startIndex, int count)
438556
}
439557
}
440558

559+
// Vector sub-search adapted from https://github.com/aspnet/KestrelHttpServer/pull/1138
560+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
561+
private static int LocateLastFoundChar(Vector<ushort> match)
562+
{
563+
var vector64 = Vector.AsVectorUInt64(match);
564+
ulong candidate = 0;
565+
int i = Vector<ulong>.Count - 1;
566+
// Pattern unrolled by jit https://github.com/dotnet/coreclr/pull/8001
567+
for (; i >= 0; i--)
568+
{
569+
candidate = vector64[i];
570+
if (candidate != 0)
571+
{
572+
break;
573+
}
574+
}
575+
576+
// Single LEA instruction with jitted const (using function result)
577+
return i * 4 + LocateLastFoundChar(candidate);
578+
}
579+
580+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
581+
private static int LocateLastFoundChar(ulong match)
582+
{
583+
// Find the most significant char that has its highest bit set
584+
int index = 3;
585+
while ((long)match > 0)
586+
{
587+
match = match << 16;
588+
index--;
589+
}
590+
return index;
591+
}
592+
441593
// Returns the index of the last occurrence of any specified character in the current instance.
442594
// The search starts at startIndex and runs backwards to startIndex - count + 1.
443595
// The character at position startIndex is included in the search. startIndex is the larger

0 commit comments

Comments
 (0)