Skip to content

Commit

Permalink
Remove optimizeForReading overloads from FrozenDictionary/Set (#87988)
Browse files Browse the repository at this point in the history
We initially added these as a mitigation primarily for construction potentially being super slow.  Now that enough optimization has been done to mitigate those performance concerns, we can get rid of the original mitgation and just have the surface area we initially wanted.
  • Loading branch information
stephentoub committed Jun 26, 2023
1 parent f08f488 commit de6955b
Show file tree
Hide file tree
Showing 13 changed files with 144 additions and 449 deletions.
Expand Up @@ -9,8 +9,6 @@ namespace System.Collections.Frozen
public static partial class FrozenDictionary
{
public static System.Collections.Frozen.FrozenDictionary<TKey, TValue> ToFrozenDictionary<TKey, TValue>(this System.Collections.Generic.IEnumerable<System.Collections.Generic.KeyValuePair<TKey, TValue>> source, System.Collections.Generic.IEqualityComparer<TKey>? comparer = null) where TKey : notnull { throw null; }
public static System.Collections.Frozen.FrozenDictionary<TKey, TValue> ToFrozenDictionary<TKey, TValue>(this System.Collections.Generic.IEnumerable<System.Collections.Generic.KeyValuePair<TKey, TValue>> source, System.Collections.Generic.IEqualityComparer<TKey>? comparer, bool optimizeForReading) where TKey : notnull { throw null; }
public static System.Collections.Frozen.FrozenDictionary<TKey, TValue> ToFrozenDictionary<TKey, TValue>(this System.Collections.Generic.IEnumerable<System.Collections.Generic.KeyValuePair<TKey, TValue>> source, bool optimizeForReading) where TKey : notnull { throw null; }
public static System.Collections.Frozen.FrozenDictionary<TKey, TSource> ToFrozenDictionary<TSource, TKey>(this System.Collections.Generic.IEnumerable<TSource> source, System.Func<TSource, TKey> keySelector, System.Collections.Generic.IEqualityComparer<TKey>? comparer = null) where TKey : notnull { throw null; }
public static System.Collections.Frozen.FrozenDictionary<TKey, TElement> ToFrozenDictionary<TSource, TKey, TElement>(this System.Collections.Generic.IEnumerable<TSource> source, System.Func<TSource, TKey> keySelector, System.Func<TSource, TElement> elementSelector, System.Collections.Generic.IEqualityComparer<TKey>? comparer = null) where TKey : notnull { throw null; }
}
Expand Down Expand Up @@ -73,8 +71,6 @@ public partial struct Enumerator : System.Collections.Generic.IEnumerator<System
public static partial class FrozenSet
{
public static System.Collections.Frozen.FrozenSet<T> ToFrozenSet<T>(this System.Collections.Generic.IEnumerable<T> source, System.Collections.Generic.IEqualityComparer<T>? comparer = null) { throw null; }
public static System.Collections.Frozen.FrozenSet<T> ToFrozenSet<T>(this System.Collections.Generic.IEnumerable<T> source, System.Collections.Generic.IEqualityComparer<T>? comparer, bool optimizeForReading) { throw null; }
public static System.Collections.Frozen.FrozenSet<T> ToFrozenSet<T>(this System.Collections.Generic.IEnumerable<T> source, bool optimizeForReading) { throw null; }
}
public abstract partial class FrozenSet<T> : System.Collections.Generic.ICollection<T>, System.Collections.Generic.IEnumerable<T>, System.Collections.Generic.IReadOnlyCollection<T>, System.Collections.Generic.ISet<T>, System.Collections.ICollection, System.Collections.IEnumerable
{
Expand Down
Expand Up @@ -146,10 +146,6 @@ The System.Collections.Immutable library is built-in as part of the shared frame
<None Include="Interfaces.cd" />
</ItemGroup>

<ItemGroup Condition="$([MSBuild]::IsTargetFrameworkCompatible('$(TargetFramework)', 'net6.0'))">
<Compile Include="System\Collections\Frozen\WrappedDictionaryFrozenDictionary.cs" />
</ItemGroup>

<ItemGroup Condition="'$(TargetFramework)' == '$(NetCoreAppCurrent)'">
<Reference Include="System.Collections" />
<Reference Include="System.Linq" />
Expand Down
Expand Up @@ -12,8 +12,8 @@ namespace System.Collections.Frozen
internal sealed class DefaultFrozenDictionary<TKey, TValue> : KeysAndValuesFrozenDictionary<TKey, TValue>, IDictionary<TKey, TValue>
where TKey : notnull
{
internal DefaultFrozenDictionary(Dictionary<TKey, TValue> source, bool optimizeForReading)
: base(source, optimizeForReading)
internal DefaultFrozenDictionary(Dictionary<TKey, TValue> source)
: base(source)
{
}

Expand Down
Expand Up @@ -9,8 +9,8 @@ namespace System.Collections.Frozen
/// <typeparam name="T">The type of the values in the set.</typeparam>
internal sealed class DefaultFrozenSet<T> : ItemsFrozenSet<T, DefaultFrozenSet<T>.GSW>
{
internal DefaultFrozenSet(HashSet<T> source, bool optimizeForReading)
: base(source, optimizeForReading)
internal DefaultFrozenSet(HashSet<T> source)
: base(source)
{
}

Expand Down

Large diffs are not rendered by default.

Expand Up @@ -37,20 +37,19 @@ private FrozenHashTable(int[] hashCodes, Bucket[] buckets, ulong fastModMultipli

/// <summary>Initializes a frozen hash table.</summary>
/// <param name="hashCodes">Pre-calculated hash codes. When the method finishes, it assigns each value to destination index.</param>
/// <param name="optimizeForReading">true to spend additional effort tuning for subsequent read speed on the table; false to prioritize construction time.</param>
/// <param name="hashCodesAreUnique">True when the input hash codes are already unique (provided from a dictionary of integers for example).</param>
/// <remarks>
/// It will then determine the optimal number of hash buckets to allocate and will populate the
/// bucket table. The caller is responsible to consume the values written to <paramref name="hashCodes"/> and update the destination (if desired).
/// <see cref="FindMatchingEntries(int, out int, out int)"/> then uses this index to reference individual entries by indexing into <see cref="HashCodes"/>.
/// </remarks>
/// <returns>A frozen hash table.</returns>
public static FrozenHashTable Create(Span<int> hashCodes, bool optimizeForReading = true, bool hashCodesAreUnique = false)
public static FrozenHashTable Create(Span<int> hashCodes, bool hashCodesAreUnique = false)
{
// Determine how many buckets to use. This might be fewer than the number of entries
// if any entries have identical hashcodes (not just different hashcodes that might
// map to the same bucket).
int numBuckets = CalcNumBuckets(hashCodes, optimizeForReading, hashCodesAreUnique);
int numBuckets = CalcNumBuckets(hashCodes, hashCodesAreUnique);
ulong fastModMultiplier = HashHelpers.GetFastModMultiplier((uint)numBuckets);

// Create two spans:
Expand Down Expand Up @@ -144,7 +143,7 @@ public void FindMatchingEntries(int hashCode, out int startIndex, out int endInd
/// sizes, starting at the exact number of hash codes and incrementing the bucket count by 1 per trial,
/// this is a trade-off between speed of determining a good number of buckets and maximal density.
/// </remarks>
private static int CalcNumBuckets(ReadOnlySpan<int> hashCodes, bool optimizeForReading, bool hashCodesAreUnique)
private static int CalcNumBuckets(ReadOnlySpan<int> hashCodes, bool hashCodesAreUnique)
{
Debug.Assert(hashCodes.Length != 0);
Debug.Assert(!hashCodesAreUnique || new HashSet<int>(hashCodes.ToArray()).Count == hashCodes.Length);
Expand All @@ -154,11 +153,6 @@ private static int CalcNumBuckets(ReadOnlySpan<int> hashCodes, bool optimizeForR
const int MaxSmallBucketTableMultiplier = 16; // How large a bucket table should be allowed for small inputs?
const int MaxLargeBucketTableMultiplier = 3; // How large a bucket table should be allowed for large inputs?

if (!optimizeForReading)
{
return HashHelpers.GetPrime(hashCodes.Length);
}

// Filter out duplicate codes, since no increase in buckets will avoid collisions from duplicate input hash codes.
HashSet<int>? codes = null;
int uniqueCodesCount = hashCodes.Length;
Expand Down
Expand Up @@ -19,107 +19,45 @@ public static class FrozenSet
/// <param name="comparer">The comparer implementation to use to compare values for equality. If null, <see cref="EqualityComparer{T}.Default"/> is used.</param>
/// <typeparam name="T">The type of the values in the set.</typeparam>
/// <returns>A frozen set.</returns>
public static FrozenSet<T> ToFrozenSet<T>(this IEnumerable<T> source, IEqualityComparer<T>? comparer = null)
{
GetUniqueValues(source, comparer, out FrozenSet<T>? existing, out HashSet<T>? uniqueValues);

// Trimming note:
// This avoids delegating to ToFrozenSet(..., bool optimizeForReading) to avoid rooting
// ChooseImplementationOptimizedForReading, which in turn references many different concrete implementations.
return existing ??
ChooseImplementationOptimizedForConstruction(uniqueValues!);
}

/// <summary>Creates a <see cref="FrozenSet{T}"/> with the specified values.</summary>
/// <param name="source">The values to use to populate the set.</param>
/// <param name="optimizeForReading">
/// <see langword="true"/> to do more work as part of set construction to optimize for subsequent reading of the data;
/// <see langword="false"/> to prefer making construction more efficient. The default is <see langword="false"/>.
/// </param>
/// <typeparam name="T">The type of the values in the set.</typeparam>
/// <returns>A frozen set.</returns>
/// <remarks>
/// Frozen collections are immutable and may be optimized for situations where a collection is created very infrequently but
/// is used very frequently at runtime. Setting <paramref name="optimizeForReading"/> to <see langword="true"/> will result in a
/// relatively high cost to create the collection in exchange for improved performance when subsequently using the collection.
/// Using <see langword="true"/> is ideal for collections that are created once, potentially at the startup of a service, and then
/// used throughout the remainder of the lifetime of the service. Because of the high cost of creation, frozen collections should
/// only be initialized with trusted input.
/// </remarks>
public static FrozenSet<T> ToFrozenSet<T>(this IEnumerable<T> source, bool optimizeForReading) =>
ToFrozenSet(source, null, optimizeForReading);

/// <summary>Creates a <see cref="FrozenSet{T}"/> with the specified values.</summary>
/// <param name="source">The values to use to populate the set.</param>
/// <param name="comparer">The comparer implementation to use to compare values for equality. If null, <see cref="EqualityComparer{T}.Default"/> is used.</param>
/// <param name="optimizeForReading">
/// <see langword="true"/> to do more work as part of set construction to optimize for subsequent reading of the data;
/// <see langword="false"/> to prefer making construction more efficient. The default is <see langword="false"/>.
/// </param>
/// <typeparam name="T">The type of the values in the set.</typeparam>
/// <returns>A frozen set.</returns>
/// <remarks>
/// Frozen collections are immutable and may be optimized for situations where a collection is created very infrequently but
/// is used very frequently at runtime. Setting <paramref name="optimizeForReading"/> to <see langword="true"/> will result in a
/// relatively high cost to create the collection in exchange for improved performance when subsequently using the collection.
/// Using <see langword="true"/> is ideal for collections that are created once, potentially at the startup of a service, and then
/// used throughout the remainder of the lifetime of the service. Because of the high cost of creation, frozen collections should
/// only be initialized with trusted input.
/// </remarks>
public static FrozenSet<T> ToFrozenSet<T>(this IEnumerable<T> source, IEqualityComparer<T>? comparer, bool optimizeForReading)
{
GetUniqueValues(source, comparer, out FrozenSet<T>? existing, out HashSet<T>? uniqueValues);
return existing ?? (optimizeForReading ?
ChooseImplementationOptimizedForReading(uniqueValues!) :
ChooseImplementationOptimizedForConstruction(uniqueValues!));
}
public static FrozenSet<T> ToFrozenSet<T>(this IEnumerable<T> source, IEqualityComparer<T>? comparer = null) =>
GetExistingFrozenOrNewSet(source, comparer, out HashSet<T>? newSet) ??
CreateFromSet(newSet!);

/// <summary>Extracts from the source either an existing <see cref="FrozenSet{T}"/> instance or a <see cref="HashSet{T}"/> containing the values and the specified <paramref name="comparer"/>.</summary>
private static void GetUniqueValues<T>(
IEnumerable<T> source, IEqualityComparer<T>? comparer,
out FrozenSet<T>? existing, out HashSet<T>? uniqueValues)
private static FrozenSet<T>? GetExistingFrozenOrNewSet<T>(IEnumerable<T> source, IEqualityComparer<T>? comparer, out HashSet<T>? newSet)
{
ThrowHelper.ThrowIfNull(source);
comparer ??= EqualityComparer<T>.Default;

// If the source is already frozen with the same comparer, it can simply be returned.
if (source is FrozenSet<T> fs && fs.Comparer.Equals(comparer))
{
existing = fs;
uniqueValues = null;
return;
newSet = null;
return fs;
}

// Ensure we have a HashSet<> using the specified comparer such that all items
// are non-null and unique according to that comparer.
uniqueValues = source as HashSet<T>;
if (uniqueValues is null ||
(uniqueValues.Count != 0 && !uniqueValues.Comparer.Equals(comparer)))
newSet = source as HashSet<T>;
if (newSet is null ||
(newSet.Count != 0 && !newSet.Comparer.Equals(comparer)))
{
uniqueValues = new HashSet<T>(source, comparer);
newSet = new HashSet<T>(source, comparer);
}

if (uniqueValues.Count == 0)
if (newSet.Count == 0)
{
existing = ReferenceEquals(comparer, FrozenSet<T>.Empty.Comparer) ?
return ReferenceEquals(comparer, FrozenSet<T>.Empty.Comparer) ?
FrozenSet<T>.Empty :
new EmptyFrozenSet<T>(comparer);
uniqueValues = null;
return;
}

Debug.Assert(uniqueValues is not null);
Debug.Assert(uniqueValues.Comparer.Equals(comparer));

existing = null;
}

private static FrozenSet<T> ChooseImplementationOptimizedForConstruction<T>(HashSet<T> source)
{
return new DefaultFrozenSet<T>(source, optimizeForReading: false);
Debug.Assert(newSet is not null);
Debug.Assert(newSet.Comparer.Equals(comparer));
return null;
}

private static FrozenSet<T> ChooseImplementationOptimizedForReading<T>(HashSet<T> source)
private static FrozenSet<T> CreateFromSet<T>(HashSet<T> source)
{
IEqualityComparer<T> comparer = source.Comparer;

Expand Down Expand Up @@ -243,19 +181,19 @@ private static FrozenSet<T> ChooseImplementationOptimizedForReading<T>(HashSet<T
}

// No special-cases apply. Use the default frozen set.
return new DefaultFrozenSet<T>(source, optimizeForReading: true);
return new DefaultFrozenSet<T>(source);
}
}

/// <summary>Provides an immutable, read-only set optimized for fast lookup and enumeration.</summary>
/// <typeparam name="T">The type of the values in this set.</typeparam>
/// <remarks>
/// Frozen collections are immutable and are optimized for situations where a collection
/// is created very infrequently but is used very frequently at runtime. They have a relatively high
/// cost to create but provide excellent lookup performance. Thus, these are ideal for cases
/// where a collection is created once, potentially at the startup of an application, and used throughout
/// the remainder of the life of the application. Frozen collections should only be initialized with
/// trusted input.
/// <see cref="FrozenSet{T}"/> is immutable and is optimized for situations where a set
/// is created very infrequently but is used very frequently at run-time. It has a relatively high
/// cost to create but provides excellent lookup performance. Thus, it is ideal for cases
/// where a set is created once, potentially at the startup of an application, and is used throughout
/// the remainder of the life of the application. <see cref="FrozenSet{T}"/> should only be initialized
/// with trusted elements, as the details of the elements impacts construction time.
/// </remarks>
[DebuggerTypeProxy(typeof(ImmutableEnumerableDebuggerProxy<>))]
[DebuggerDisplay("Count = {Count}")]
Expand Down
Expand Up @@ -14,7 +14,7 @@ internal abstract class ItemsFrozenSet<T, TThisWrapper> : FrozenSetInternalBase<
private protected readonly FrozenHashTable _hashTable;
private protected readonly T[] _items;

protected ItemsFrozenSet(HashSet<T> source, bool optimizeForReading = true) : base(source.Comparer)
protected ItemsFrozenSet(HashSet<T> source) : base(source.Comparer)
{
Debug.Assert(source.Count != 0);

Expand All @@ -30,7 +30,7 @@ protected ItemsFrozenSet(HashSet<T> source, bool optimizeForReading = true) : ba
hashCodes[i] = entries[i] is T t ? Comparer.GetHashCode(t) : 0;
}

_hashTable = FrozenHashTable.Create(hashCodes, optimizeForReading);
_hashTable = FrozenHashTable.Create(hashCodes);

for (int srcIndex = 0; srcIndex < hashCodes.Length; srcIndex++)
{
Expand Down
Expand Up @@ -15,7 +15,7 @@ internal abstract class KeysAndValuesFrozenDictionary<TKey, TValue> : FrozenDict
private protected readonly TKey[] _keys;
private protected readonly TValue[] _values;

protected KeysAndValuesFrozenDictionary(Dictionary<TKey, TValue> source, bool optimizeForReading = true) : base(source.Comparer)
protected KeysAndValuesFrozenDictionary(Dictionary<TKey, TValue> source) : base(source.Comparer)
{
Debug.Assert(source.Count != 0);

Expand All @@ -32,7 +32,7 @@ protected KeysAndValuesFrozenDictionary(Dictionary<TKey, TValue> source, bool op
hashCodes[i] = Comparer.GetHashCode(entries[i].Key);
}

_hashTable = FrozenHashTable.Create(hashCodes, optimizeForReading);
_hashTable = FrozenHashTable.Create(hashCodes);

for (int srcIndex = 0; srcIndex < hashCodes.Length; srcIndex++)
{
Expand Down

0 comments on commit de6955b

Please sign in to comment.