Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Improve performance of string.IndexOfAny for 2 & 3 char searches #13219

Merged
merged 4 commits into from
Aug 8, 2017
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 15 additions & 14 deletions src/classlibnative/bcltype/stringnative.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -327,8 +327,6 @@ FCIMPL4(INT32, COMString::IndexOfCharArray, StringObject* thisRef, CHARArray* va

if (thisRef == NULL)
FCThrow(kNullReferenceException);
if (valueRef == NULL)
FCThrowArgumentNull(W("anyOf"));

WCHAR *thisChars;
WCHAR *valueChars;
Expand All @@ -337,14 +335,6 @@ FCIMPL4(INT32, COMString::IndexOfCharArray, StringObject* thisRef, CHARArray* va

thisRef->RefInterpretGetStringValuesDangerousForGC(&thisChars, &thisLength);

if (startIndex < 0 || startIndex > thisLength) {
FCThrowArgumentOutOfRange(W("startIndex"), W("ArgumentOutOfRange_Index"));
}

if (count < 0 || count > thisLength - startIndex) {
FCThrowArgumentOutOfRange(W("count"), W("ArgumentOutOfRange_Count"));
}

int endIndex = startIndex + count;

valueLength = valueRef->GetNumComponents();
Expand Down Expand Up @@ -494,19 +484,30 @@ void InitializeProbabilisticMap(int* charMap, __in_ecount(length) const WCHAR* c
_ASSERTE(charArray != NULL);
_ASSERTE(length >= 0);

bool hasAscii = false;

for(int i = 0; i < length; ++i) {
int hi,lo;

WCHAR c = charArray[i];
int c = charArray[i];

hi = (c >> 8) & 0xFF;
lo = c & 0xFF;
hi = (c >> 8) & 0xFF;

int* value = &charMap[lo & PROBABILISTICMAP_BLOCK_INDEX_MASK];
SetBit(value, lo >> PROBABILISTICMAP_BLOCK_INDEX_SHIFT);

value = &charMap[hi & PROBABILISTICMAP_BLOCK_INDEX_MASK];
SetBit(value, hi >> PROBABILISTICMAP_BLOCK_INDEX_SHIFT);
if (hi > 0) {
value = &charMap[hi & PROBABILISTICMAP_BLOCK_INDEX_MASK];
SetBit(value, hi >> PROBABILISTICMAP_BLOCK_INDEX_SHIFT);
}
else {
hasAscii = true;
}
}

if (hasAscii) {
charMap[0] |= 1;
}
}

Expand Down
92 changes: 91 additions & 1 deletion src/mscorlib/src/System/String.Searching.cs
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,98 @@ public int IndexOfAny(char[] anyOf, int startIndex)
}

[Pure]
public int IndexOfAny(char[] anyOf, int startIndex, int count)
{
if (anyOf == null)
{
throw new ArgumentNullException(nameof(anyOf));
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It may not be common enough to warrant it (and thus may hurt the more common cases of 2/3), but I wonder if it's worthwhile adding here:

if (anyOf.Length == 1)
{
    return IndexOf(anyOf[0], startIndex, count);
}

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@stephentoub The only place I could think of that would be using a 1 char array was the Unix path handling code. That looked to be taken care of with the Path internal changes so I didn't bother with it.
But true to performance testing form the 2 char call is actually 10% better if it has another if before it. Not so if I change that to a switch though which is interesting. Will run a few more tests on the other paths.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope sorry it was the 3 char test that was 10% better. 2 char is 4% worse. Thoughts?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope sorry it was the 3 char test that was 10% better. 2 char is 4% worse. Thoughts?

These kind of odd variations are typically caused by things like code alignment or branch prediction that will vary from build to build. One has to run it under Intel profiler to tell what is going on. I typically just look at the JITed code whether it looks fine.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if it's worthwhile adding here

I think it is worthwhile to add it, and also Length == 0. Otherwise, the API has unexpected perf cliff.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

😢 but don't optimize for them early? e.g.

if (anyOf.Length == 2)
{
    // Very common optimization for directory separators (/, \), quotes (", '), brackets, etc
    return IndexOfAny(anyOf[0], anyOf[1], startIndex, count);
}
else if (anyOf.Length == 3)
{
    return IndexOfAny(anyOf[0], anyOf[1], anyOf[2], startIndex, count);
}
else if (anyOf.Length > 3)
{
    return IndexOfCharArray(anyOf, startIndex, count);
}
else if (anyOf.Length == 1)
{
    return IndexOf(anyOf[0], startIndex, count);
}
else // anyOf.Length == 0
{
    return -1;
}

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That seems like a good compromise. Done.


if ((uint)startIndex > (uint)Length)
{
throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
}

if ((uint)count > (uint)(Length - startIndex))
{
throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count);
}

if (anyOf.Length == 2)
{
// Very common optimization for directory separators (/, \), quotes (", '), brackets, etc
return IndexOfAny(anyOf[0], anyOf[1], startIndex, count);
}
else if (anyOf.Length == 3)
{
return IndexOfAny(anyOf[0], anyOf[1], anyOf[2], startIndex, count);
}

return IndexOfCharArray(anyOf, startIndex, count);
}

private unsafe int IndexOfAny(char value1, char value2, int startIndex, int count)
{
fixed (char* pChars = &_firstChar)
{
char* pCh = pChars + startIndex;

while (count > 0)
{
if (*pCh == value1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This depends on JIT CSEing *pCh for best performance. The JIT will probably do it, but it is best to write hand-optimized code like this one to be as close as possible to the native code.

Why not:

 char c = *pCh; 
 if (c == value1 || c == value2)
     goto ReturnIndex;

like in the 3 argument overload?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I got the same time between the two variations so I just went with the simpler one. 3 char was better with a variable. Will change.

goto ReturnIndex;

if (*pCh == value2)
goto ReturnIndex;

// Possibly reads outside of count and can include null terminator
// Handled in the return logic
if (*(pCh + 1) == value1)
goto ReturnIndex1;

if (*(pCh + 1) == value2)
goto ReturnIndex1;

pCh += 2;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any reason why we only unroll 2x here? The other methods in this file, IndexOf and LastIndexOf both use 4x. My tests runs also show that switching to 4x cuts the time to 60-70% (actually, 8x is even faster on my machine but let's not go over the top...). Also, I would see value in keeping the two new explicit implementations for 2/3 elements char arrays as close as possible to the existing one for IndexOf() just with 2/3 if checks instead of just 1?!

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Most of the usages of IndexOfAny looked like they would be on short strings, path segments and the like. IndexOf use is much more varied and needs to have great performance on all string lengths.
It's a bit of a trade off as the more you unroll, the more time you spend outside that loop in slower code going a char at a time which hurts small string performance.
It's quite possible that with further analysis this function would be better off with 4x unroll but I locked in the win when it achieved its first two objectives. Which was to avoid the relative expensive probability map creation/comparison and to alleviate the need for the hard coded loops that have been popping up.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You might want to look at that test code once you've got too much time: https://gist.github.com/dnickless/eb4807d698b7032b625a8d04794e90d8

count -= 2;
}

return -1;

ReturnIndex:
return (int)(pCh - pChars);

ReturnIndex1:
return (count == 1 ? -1 : (int)(pCh - pChars) + 1);
}
}

private unsafe int IndexOfAny(char value1, char value2, char value3, int startIndex, int count)
{
fixed (char* pChars = &_firstChar)
{
char* pCh = pChars + startIndex;

while (count > 0)
{
char c = *pCh;

if (c == value1 || c == value2 || c == value3)
goto ReturnIndex;

pCh++;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would 4x unrolling here not make sense, too? I didn't test this case but I would expect it to have a similar effect like with all other methods in this file.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

3 char is a lot less common. This was really about giving a lot better performance with minimal code.

count--;
}

return -1;

ReturnIndex:
return (int)(pCh - pChars);
}
}

[MethodImplAttribute(MethodImplOptions.InternalCall)]
public extern int IndexOfAny(char[] anyOf, int startIndex, int count);
private extern int IndexOfCharArray(char[] anyOf, int startIndex, int count);


// Determines the position within this string of the first occurrence of the specified
Expand Down
2 changes: 1 addition & 1 deletion src/vm/ecalllist.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ FCFuncStart(gStringFuncs)
FCIntrinsic("get_Chars", COMString::GetCharAt, CORINFO_INTRINSIC_StringGetChar)
FCFuncElement("IsAscii", COMString::IsAscii)
FCFuncElement("CompareOrdinalHelper", COMString::CompareOrdinalEx)
FCFuncElement("IndexOfAny", COMString::IndexOfCharArray)
FCFuncElement("IndexOfCharArray", COMString::IndexOfCharArray)
FCFuncElement("LastIndexOfAny", COMString::LastIndexOfCharArray)
FCFuncElementSig("ReplaceInternal", &gsig_IM_Str_Str_RetStr, COMString::ReplaceString)
#ifdef FEATURE_COMINTEROP
Expand Down