Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid bloom filter checks for IndexOfAnyExcept in ProbabilisticMap #85203

Merged
merged 1 commit into from Jun 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -34,30 +34,16 @@ public ProbabilisticCharSearchValues(scoped ReadOnlySpan<char> values)
internal override bool ContainsCore(char value) =>
ProbabilisticMap.Contains(ref Unsafe.As<ProbabilisticMap, uint>(ref _map), _values, value);

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal override int IndexOfAny(ReadOnlySpan<char> span) =>
IndexOfAny<IndexOfAnyAsciiSearcher.DontNegate>(ref MemoryMarshal.GetReference(span), span.Length);
ProbabilisticMap.IndexOfAny(ref Unsafe.As<ProbabilisticMap, uint>(ref _map), ref MemoryMarshal.GetReference(span), span.Length, _values);

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal override int IndexOfAnyExcept(ReadOnlySpan<char> span) =>
IndexOfAny<IndexOfAnyAsciiSearcher.Negate>(ref MemoryMarshal.GetReference(span), span.Length);
ProbabilisticMap.IndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.Negate>(ref MemoryMarshal.GetReference(span), span.Length, _values);

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal override int LastIndexOfAny(ReadOnlySpan<char> span) =>
LastIndexOfAny<IndexOfAnyAsciiSearcher.DontNegate>(ref MemoryMarshal.GetReference(span), span.Length);
ProbabilisticMap.LastIndexOfAny(ref Unsafe.As<ProbabilisticMap, uint>(ref _map), ref MemoryMarshal.GetReference(span), span.Length, _values);

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal override int LastIndexOfAnyExcept(ReadOnlySpan<char> span) =>
LastIndexOfAny<IndexOfAnyAsciiSearcher.Negate>(ref MemoryMarshal.GetReference(span), span.Length);

[MethodImpl(MethodImplOptions.NoInlining)]
private int IndexOfAny<TNegator>(ref char searchSpace, int searchSpaceLength)
where TNegator : struct, IndexOfAnyAsciiSearcher.INegator =>
ProbabilisticMap.IndexOfAny<TNegator>(ref Unsafe.As<ProbabilisticMap, uint>(ref _map), ref searchSpace, searchSpaceLength, _values);

[MethodImpl(MethodImplOptions.NoInlining)]
private int LastIndexOfAny<TNegator>(ref char searchSpace, int searchSpaceLength)
where TNegator : struct, IndexOfAnyAsciiSearcher.INegator =>
ProbabilisticMap.LastIndexOfAny<TNegator>(ref Unsafe.As<ProbabilisticMap, uint>(ref _map), ref searchSpace, searchSpaceLength, _values);
ProbabilisticMap.LastIndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.Negate>(ref MemoryMarshal.GetReference(span), span.Length, _values);
}
}
Expand Up @@ -207,116 +207,96 @@ private static bool ShouldUseSimpleLoop(int searchSpaceLength, int valuesLength)
|| (searchSpaceLength < 20 && searchSpaceLength < (valuesLength >> 1));
}

public static int IndexOfAny(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength) =>
IndexOfAny<SpanHelpers.DontNegate<char>>(ref searchSpace, searchSpaceLength, ref values, valuesLength);

public static int IndexOfAnyExcept(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength) =>
IndexOfAny<SpanHelpers.Negate<char>>(ref searchSpace, searchSpaceLength, ref values, valuesLength);

public static int LastIndexOfAny(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength) =>
LastIndexOfAny<SpanHelpers.DontNegate<char>>(ref searchSpace, searchSpaceLength, ref values, valuesLength);

public static int LastIndexOfAnyExcept(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength) =>
LastIndexOfAny<SpanHelpers.Negate<char>>(ref searchSpace, searchSpaceLength, ref values, valuesLength);

private static int IndexOfAny<TNegator>(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength)
where TNegator : struct, SpanHelpers.INegator<char>
public static int IndexOfAny(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength)
{
var valuesSpan = new ReadOnlySpan<char>(ref values, valuesLength);

// If the search space is relatively short compared to the needle, do a simple O(n * m) search.
if (ShouldUseSimpleLoop(searchSpaceLength, valuesLength))
{
ref char searchSpaceEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength);
ref char cur = ref searchSpace;
return IndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.DontNegate>(ref searchSpace, searchSpaceLength, valuesSpan);
}

while (!Unsafe.AreSame(ref cur, ref searchSpaceEnd))
{
char c = cur;
if (TNegator.NegateIfNeeded(Contains(valuesSpan, c)))
{
return (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref cur) / sizeof(char));
}
if (IndexOfAnyAsciiSearcher.TryIndexOfAny(ref searchSpace, searchSpaceLength, valuesSpan, out int index))
{
return index;
}

cur = ref Unsafe.Add(ref cur, 1);
}
return ProbabilisticIndexOfAny(ref searchSpace, searchSpaceLength, ref values, valuesLength);
}

return -1;
}
public static int IndexOfAnyExcept(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength)
{
var valuesSpan = new ReadOnlySpan<char>(ref values, valuesLength);

if (typeof(TNegator) == typeof(SpanHelpers.DontNegate<char>)
? IndexOfAnyAsciiSearcher.TryIndexOfAny(ref searchSpace, searchSpaceLength, valuesSpan, out int index)
: IndexOfAnyAsciiSearcher.TryIndexOfAnyExcept(ref searchSpace, searchSpaceLength, valuesSpan, out index))
if (IndexOfAnyAsciiSearcher.IsVectorizationSupported &&
!ShouldUseSimpleLoop(searchSpaceLength, valuesLength) &&
IndexOfAnyAsciiSearcher.TryIndexOfAnyExcept(ref searchSpace, searchSpaceLength, valuesSpan, out int index))
{
return index;
}

return ProbabilisticIndexOfAny<TNegator>(ref searchSpace, searchSpaceLength, ref values, valuesLength);
return IndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.Negate>(ref searchSpace, searchSpaceLength, valuesSpan);
}

private static int LastIndexOfAny<TNegator>(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength)
where TNegator : struct, SpanHelpers.INegator<char>
public static int LastIndexOfAny(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength)
{
var valuesSpan = new ReadOnlySpan<char>(ref values, valuesLength);

// If the search space is relatively short compared to the needle, do a simple O(n * m) search.
if (ShouldUseSimpleLoop(searchSpaceLength, valuesLength))
{
for (int i = searchSpaceLength - 1; i >= 0; i--)
{
char c = Unsafe.Add(ref searchSpace, i);
if (TNegator.NegateIfNeeded(Contains(valuesSpan, c)))
{
return i;
}
}
return LastIndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.DontNegate>(ref searchSpace, searchSpaceLength, valuesSpan);
}

return -1;
if (IndexOfAnyAsciiSearcher.TryLastIndexOfAny(ref searchSpace, searchSpaceLength, valuesSpan, out int index))
{
return index;
}

if (typeof(TNegator) == typeof(SpanHelpers.DontNegate<char>)
? IndexOfAnyAsciiSearcher.TryLastIndexOfAny(ref searchSpace, searchSpaceLength, valuesSpan, out int index)
: IndexOfAnyAsciiSearcher.TryLastIndexOfAnyExcept(ref searchSpace, searchSpaceLength, valuesSpan, out index))
return ProbabilisticLastIndexOfAny(ref searchSpace, searchSpaceLength, ref values, valuesLength);
}

public static int LastIndexOfAnyExcept(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength)
{
var valuesSpan = new ReadOnlySpan<char>(ref values, valuesLength);

if (IndexOfAnyAsciiSearcher.IsVectorizationSupported &&
!ShouldUseSimpleLoop(searchSpaceLength, valuesLength) &&
IndexOfAnyAsciiSearcher.TryLastIndexOfAnyExcept(ref searchSpace, searchSpaceLength, valuesSpan, out int index))
{
return index;
}

return ProbabilisticLastIndexOfAny<TNegator>(ref searchSpace, searchSpaceLength, ref values, valuesLength);
return LastIndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.Negate>(ref searchSpace, searchSpaceLength, valuesSpan);
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static int ProbabilisticIndexOfAny<TNegator>(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength)
where TNegator : struct, SpanHelpers.INegator<char>
private static int ProbabilisticIndexOfAny(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength)
{
var valuesSpan = new ReadOnlySpan<char>(ref values, valuesLength);

var map = new ProbabilisticMap(valuesSpan);
ref uint charMap = ref Unsafe.As<ProbabilisticMap, uint>(ref map);

return typeof(TNegator) == typeof(SpanHelpers.DontNegate<char>)
? IndexOfAny<IndexOfAnyAsciiSearcher.DontNegate>(ref charMap, ref searchSpace, searchSpaceLength, valuesSpan)
: IndexOfAny<IndexOfAnyAsciiSearcher.Negate>(ref charMap, ref searchSpace, searchSpaceLength, valuesSpan);
return IndexOfAny(ref charMap, ref searchSpace, searchSpaceLength, valuesSpan);
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static int ProbabilisticLastIndexOfAny<TNegator>(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength)
where TNegator : struct, SpanHelpers.INegator<char>
private static int ProbabilisticLastIndexOfAny(ref char searchSpace, int searchSpaceLength, ref char values, int valuesLength)
{
var valuesSpan = new ReadOnlySpan<char>(ref values, valuesLength);

var map = new ProbabilisticMap(valuesSpan);
ref uint charMap = ref Unsafe.As<ProbabilisticMap, uint>(ref map);

return typeof(TNegator) == typeof(SpanHelpers.DontNegate<char>)
? LastIndexOfAny<IndexOfAnyAsciiSearcher.DontNegate>(ref charMap, ref searchSpace, searchSpaceLength, valuesSpan)
: LastIndexOfAny<IndexOfAnyAsciiSearcher.Negate>(ref charMap, ref searchSpace, searchSpaceLength, valuesSpan);
return LastIndexOfAny(ref charMap, ref searchSpace, searchSpaceLength, valuesSpan);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static int IndexOfAny<TNegator>(ref uint charMap, ref char searchSpace, int searchSpaceLength, ReadOnlySpan<char> values)
where TNegator : struct, IndexOfAnyAsciiSearcher.INegator
internal static int IndexOfAny(ref uint charMap, ref char searchSpace, int searchSpaceLength, ReadOnlySpan<char> values)
{
if ((Sse41.IsSupported || AdvSimd.Arm64.IsSupported) && typeof(TNegator) == typeof(IndexOfAnyAsciiSearcher.DontNegate) && searchSpaceLength >= 16)
if ((Sse41.IsSupported || AdvSimd.Arm64.IsSupported) && searchSpaceLength >= 16)
{
return IndexOfAnyVectorized(ref charMap, ref searchSpace, searchSpaceLength, values);
}
Expand All @@ -327,7 +307,7 @@ internal static int IndexOfAny<TNegator>(ref uint charMap, ref char searchSpace,
while (!Unsafe.AreSame(ref cur, ref searchSpaceEnd))
{
int ch = cur;
if (TNegator.NegateIfNeeded(Contains(ref charMap, values, ch)))
if (Contains(ref charMap, values, ch))
{
return (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref cur) / sizeof(char));
}
Expand All @@ -339,13 +319,12 @@ internal static int IndexOfAny<TNegator>(ref uint charMap, ref char searchSpace,
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static int LastIndexOfAny<TNegator>(ref uint charMap, ref char searchSpace, int searchSpaceLength, ReadOnlySpan<char> values)
where TNegator : struct, IndexOfAnyAsciiSearcher.INegator
internal static int LastIndexOfAny(ref uint charMap, ref char searchSpace, int searchSpaceLength, ReadOnlySpan<char> values)
{
for (int i = searchSpaceLength - 1; i >= 0; i--)
{
int ch = Unsafe.Add(ref searchSpace, i);
if (TNegator.NegateIfNeeded(Contains(ref charMap, values, ch)))
if (Contains(ref charMap, values, ch))
{
return i;
}
Expand Down Expand Up @@ -461,5 +440,42 @@ private static int IndexOfAnyVectorized(ref uint charMap, ref char searchSpace,

return -1;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static int IndexOfAnySimpleLoop<TNegator>(ref char searchSpace, int searchSpaceLength, ReadOnlySpan<char> values)
where TNegator : struct, IndexOfAnyAsciiSearcher.INegator
{
ref char searchSpaceEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength);
ref char cur = ref searchSpace;

while (!Unsafe.AreSame(ref cur, ref searchSpaceEnd))
{
char c = cur;
if (TNegator.NegateIfNeeded(Contains(values, c)))
{
return (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref cur) / sizeof(char));
}

cur = ref Unsafe.Add(ref cur, 1);
}

return -1;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static int LastIndexOfAnySimpleLoop<TNegator>(ref char searchSpace, int searchSpaceLength, ReadOnlySpan<char> values)
where TNegator : struct, IndexOfAnyAsciiSearcher.INegator
{
for (int i = searchSpaceLength - 1; i >= 0; i--)
{
char c = Unsafe.Add(ref searchSpace, i);
if (TNegator.NegateIfNeeded(Contains(values, c)))
{
return i;
}
}

return -1;
}
}
}