Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework ProbabilisticMap character checks in SearchValues #101001

Merged
merged 3 commits into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 26 additions & 2 deletions src/libraries/System.Memory/tests/Span/SearchValues.cs
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,31 @@ public static IEnumerable<object[]> Values_MemberData()
"\uFFFF\uFFFE\uFFFD\uFFFC\uFFFB\uFFFA",
"\uFFFF\uFFFE\uFFFD\uFFFC\uFFFB\uFFFB",
"\uFFFF\uFFFE\uFFFD\uFFFC\uFFFB\uFFF9",
new string('\u0080', 256) + '\u0082',
new string('\u0080', 100) + '\uF000',
new string('\u0080', 256) + '\uF000',
string.Concat(Enumerable.Range(128, 255).Select(i => (char)i)),
string.Concat(Enumerable.Range(128, 257).Select(i => (char)i)),
string.Concat(Enumerable.Range(128, 254).Select(i => (char)i)) + '\uF000',
string.Concat(Enumerable.Range(128, 256).Select(i => (char)i)) + '\uF000',
'\0' + string.Concat(Enumerable.Range(2, char.MaxValue - 1).Select(i => (char)i)),
};

return values.Select(v => new object[] { v, Encoding.Latin1.GetBytes(v) });
foreach (string value in values)
{
yield return Pair(value);
yield return Pair('a' + value);

// Test some more duplicates
if (value.Length > 0)
{
yield return Pair(value + value[0]);
yield return Pair(value[0] + value);
yield return Pair(value + value);
}
}

static object[] Pair(string value) => new object[] { value, Encoding.Latin1.GetBytes(value) };
}

[Theory]
Expand Down Expand Up @@ -192,10 +214,12 @@ public static void SearchValues_Contains(string needle, byte[] byteNeedle)

static void Test<T>(ReadOnlySpan<T> needle, SearchValues<T> values) where T : struct, INumber<T>, IMinMaxValue<T>
{
HashSet<T> needleSet = needle.ToArray().ToHashSet();

for (int i = int.CreateChecked(T.MaxValue); i >= 0; i--)
{
T t = T.CreateChecked(i);
Assert.Equal(needle.Contains(t), values.Contains(t));
Assert.Equal(needleSet.Contains(t), values.Contains(t));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any2SearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any3SearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\BitVector256.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\ProbabilisticMapState.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\ProbabilisticWithAsciiCharSearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any4SearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any5SearchValues.cs" />
Expand All @@ -445,7 +446,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\RangeByteSearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\RangeCharSearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\ProbabilisticCharSearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Latin1CharSearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\BitmapCharSearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\SearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\SearchValues.T.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\SearchValuesDebugView.cs" />
Expand Down
Original file line number Diff line number Diff line change
@@ -1,34 +1,55 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

namespace System.Buffers
{
internal sealed class Latin1CharSearchValues : SearchValues<char>
internal sealed class BitmapCharSearchValues : SearchValues<char>
{
private readonly BitVector256 _lookup;
private readonly uint[] _bitmap;

public Latin1CharSearchValues(ReadOnlySpan<char> values)
public BitmapCharSearchValues(ReadOnlySpan<char> values, int maxInclusive)
{
Debug.Assert(maxInclusive <= char.MaxValue);

_bitmap = new uint[maxInclusive / 32 + 1];

foreach (char c in values)
{
if (c > 255)
_bitmap[c >> 5] |= 1u << c;
}
}

internal override char[] GetValues()
{
var chars = new List<char>();
uint[] bitmap = _bitmap;

for (int i = 0; i < _bitmap.Length * 32; i++)
{
if (Contains(bitmap, i))
{
// The values were modified concurrent with the call to SearchValues.Create
ThrowHelper.ThrowInvalidOperationException_InvalidOperation_EnumFailedVersion();
chars.Add((char)i);
}

_lookup.Set(c);
}
}

internal override char[] GetValues() => _lookup.GetCharValues();
return chars.ToArray();
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal override bool ContainsCore(char value) =>
_lookup.Contains256(value);
Contains(_bitmap, value);

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool Contains(uint[] bitmap, int value)
{
uint offset = (uint)(value >> 5);
return offset < (uint)bitmap.Length && (bitmap[offset] & (1u << value)) != 0;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal override int IndexOfAny(ReadOnlySpan<char> span) =>
Expand All @@ -51,11 +72,12 @@ private int IndexOfAny<TNegator>(ref char searchSpace, int searchSpaceLength)
{
ref char searchSpaceEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength);
ref char cur = ref searchSpace;
uint[] bitmap = _bitmap;

while (!Unsafe.AreSame(ref cur, ref searchSpaceEnd))
{
char c = cur;
if (TNegator.NegateIfNeeded(_lookup.Contains256(c)))
if (TNegator.NegateIfNeeded(Contains(bitmap, c)))
{
return (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref cur) / sizeof(char));
}
Expand All @@ -69,16 +91,18 @@ private int IndexOfAny<TNegator>(ref char searchSpace, int searchSpaceLength)
private int LastIndexOfAny<TNegator>(ref char searchSpace, int searchSpaceLength)
where TNegator : struct, IndexOfAnyAsciiSearcher.INegator
{
for (int i = searchSpaceLength - 1; i >= 0; i--)
uint[] bitmap = _bitmap;

while (--searchSpaceLength >= 0)
{
char c = Unsafe.Add(ref searchSpace, i);
if (TNegator.NegateIfNeeded(_lookup.Contains256(c)))
char c = Unsafe.Add(ref searchSpace, searchSpaceLength);
if (TNegator.NegateIfNeeded(Contains(bitmap, c)))
{
return i;
break;
}
}

return -1;
return searchSpaceLength;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,31 +8,30 @@ namespace System.Buffers
{
internal sealed class ProbabilisticCharSearchValues : SearchValues<char>
{
private ProbabilisticMap _map;
private readonly string _values;
private ProbabilisticMapState _map;

public ProbabilisticCharSearchValues(scoped ReadOnlySpan<char> values)
public ProbabilisticCharSearchValues(ReadOnlySpan<char> values, int maxInclusive)
{
_values = new string(values);
_map = new ProbabilisticMap(_values);
_map = new ProbabilisticMapState(values, maxInclusive);
}

internal override char[] GetValues() => _values.ToCharArray();
internal override char[] GetValues() =>
_map.GetValues();

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal override bool ContainsCore(char value) =>
ProbabilisticMap.Contains(ref Unsafe.As<ProbabilisticMap, uint>(ref _map), _values, value);
_map.FastContains(value);

internal override int IndexOfAny(ReadOnlySpan<char> span) =>
ProbabilisticMap.IndexOfAny(ref Unsafe.As<ProbabilisticMap, uint>(ref _map), ref MemoryMarshal.GetReference(span), span.Length, _values);
ProbabilisticMap.IndexOfAny<SearchValues.TrueConst>(ref MemoryMarshal.GetReference(span), span.Length, ref _map);

internal override int IndexOfAnyExcept(ReadOnlySpan<char> span) =>
ProbabilisticMap.IndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.Negate>(ref MemoryMarshal.GetReference(span), span.Length, _values);
ProbabilisticMapState.IndexOfAnySimpleLoop<SearchValues.TrueConst, IndexOfAnyAsciiSearcher.Negate>(ref MemoryMarshal.GetReference(span), span.Length, ref _map);

internal override int LastIndexOfAny(ReadOnlySpan<char> span) =>
ProbabilisticMap.LastIndexOfAny(ref Unsafe.As<ProbabilisticMap, uint>(ref _map), ref MemoryMarshal.GetReference(span), span.Length, _values);
ProbabilisticMap.LastIndexOfAny<SearchValues.TrueConst>(ref MemoryMarshal.GetReference(span), span.Length, ref _map);

internal override int LastIndexOfAnyExcept(ReadOnlySpan<char> span) =>
ProbabilisticMap.LastIndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.Negate>(ref MemoryMarshal.GetReference(span), span.Length, _values);
ProbabilisticMapState.LastIndexOfAnySimpleLoop<SearchValues.TrueConst, IndexOfAnyAsciiSearcher.Negate>(ref MemoryMarshal.GetReference(span), span.Length, ref _map);
}
}
Loading
Loading