Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Faster optimized frozen dictionary creation (2/n) #87630

Merged
merged 5 commits into from Jun 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -229,60 +229,60 @@ public static class FrozenDictionary
if (typeof(TKey) == typeof(string) &&
(ReferenceEquals(comparer, EqualityComparer<TKey>.Default) || ReferenceEquals(comparer, StringComparer.Ordinal) || ReferenceEquals(comparer, StringComparer.OrdinalIgnoreCase)))
{
Dictionary<string, TValue> stringEntries = (Dictionary<string, TValue>)(object)source;
IEqualityComparer<string> stringComparer = (IEqualityComparer<string>)(object)comparer;

// this array is needed for every strategy
string[] entries = (string[])(object)source.Keys.ToArray();
// keys and values are needed for every strategy
string[] keys = (string[])(object)source.Keys.ToArray();
TValue[] values = source.Values.ToArray();

// Calculate the minimum and maximum lengths of the strings in the dictionary. Several of the analyses need this.
int minLength = int.MaxValue, maxLength = 0;
foreach (string key in entries)
foreach (string key in keys)
{
if (key.Length < minLength) minLength = key.Length;
if (key.Length > maxLength) maxLength = key.Length;
}
Debug.Assert(minLength >= 0 && maxLength >= minLength);

// Try to create an implementation that uses length buckets, where each bucket contains up to only a few strings of the same length.
FrozenDictionary<string, TValue>? frozenDictionary = LengthBucketsFrozenDictionary<TValue>.CreateLengthBucketsFrozenDictionaryIfAppropriate(stringEntries, stringComparer, minLength, maxLength, entries);
FrozenDictionary<string, TValue>? frozenDictionary = LengthBucketsFrozenDictionary<TValue>.CreateLengthBucketsFrozenDictionaryIfAppropriate(keys, values, stringComparer, minLength, maxLength);
if (frozenDictionary is not null)
{
return (FrozenDictionary<TKey, TValue>)(object)frozenDictionary;
}

// Analyze the keys for unique substrings and create an implementation that minimizes the cost of hashing keys.
KeyAnalyzer.AnalysisResults analysis = KeyAnalyzer.Analyze(entries, ReferenceEquals(stringComparer, StringComparer.OrdinalIgnoreCase), minLength, maxLength);
KeyAnalyzer.AnalysisResults analysis = KeyAnalyzer.Analyze(keys, ReferenceEquals(stringComparer, StringComparer.OrdinalIgnoreCase), minLength, maxLength);
if (analysis.SubstringHashing)
{
if (analysis.RightJustifiedSubstring)
{
if (analysis.IgnoreCase)
{
frozenDictionary = analysis.AllAsciiIfIgnoreCase
? new OrdinalStringFrozenDictionary_RightJustifiedCaseInsensitiveAsciiSubstring<TValue>(stringEntries, entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.HashIndex, analysis.HashCount)
: new OrdinalStringFrozenDictionary_RightJustifiedCaseInsensitiveSubstring<TValue>(stringEntries, entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.HashIndex, analysis.HashCount);
? new OrdinalStringFrozenDictionary_RightJustifiedCaseInsensitiveAsciiSubstring<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.HashIndex, analysis.HashCount)
: new OrdinalStringFrozenDictionary_RightJustifiedCaseInsensitiveSubstring<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.HashIndex, analysis.HashCount);
}
else
{
frozenDictionary = analysis.HashCount == 1
? new OrdinalStringFrozenDictionary_RightJustifiedSingleChar<TValue>(stringEntries, entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.HashIndex)
: new OrdinalStringFrozenDictionary_RightJustifiedSubstring<TValue>(stringEntries, entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.HashIndex, analysis.HashCount);
? new OrdinalStringFrozenDictionary_RightJustifiedSingleChar<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.HashIndex)
: new OrdinalStringFrozenDictionary_RightJustifiedSubstring<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.HashIndex, analysis.HashCount);
}
}
else
{
if (analysis.IgnoreCase)
{
frozenDictionary = analysis.AllAsciiIfIgnoreCase
? new OrdinalStringFrozenDictionary_LeftJustifiedCaseInsensitiveAsciiSubstring<TValue>(stringEntries, entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.HashIndex, analysis.HashCount)
: new OrdinalStringFrozenDictionary_LeftJustifiedCaseInsensitiveSubstring<TValue>(stringEntries, entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.HashIndex, analysis.HashCount);
? new OrdinalStringFrozenDictionary_LeftJustifiedCaseInsensitiveAsciiSubstring<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.HashIndex, analysis.HashCount)
: new OrdinalStringFrozenDictionary_LeftJustifiedCaseInsensitiveSubstring<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.HashIndex, analysis.HashCount);
}
else
{
frozenDictionary = analysis.HashCount == 1
? new OrdinalStringFrozenDictionary_LeftJustifiedSingleChar<TValue>(stringEntries, entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.HashIndex)
: new OrdinalStringFrozenDictionary_LeftJustifiedSubstring<TValue>(stringEntries, entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.HashIndex, analysis.HashCount);
? new OrdinalStringFrozenDictionary_LeftJustifiedSingleChar<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.HashIndex)
: new OrdinalStringFrozenDictionary_LeftJustifiedSubstring<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, analysis.HashIndex, analysis.HashCount);
}
}
}
Expand All @@ -291,12 +291,12 @@ public static class FrozenDictionary
if (analysis.IgnoreCase)
{
frozenDictionary = analysis.AllAsciiIfIgnoreCase
? new OrdinalStringFrozenDictionary_FullCaseInsensitiveAscii<TValue>(stringEntries, entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff)
: new OrdinalStringFrozenDictionary_FullCaseInsensitive<TValue>(stringEntries, entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff);
? new OrdinalStringFrozenDictionary_FullCaseInsensitiveAscii<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff)
: new OrdinalStringFrozenDictionary_FullCaseInsensitive<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff);
}
else
{
frozenDictionary = new OrdinalStringFrozenDictionary_Full<TValue>(stringEntries, entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff);
frozenDictionary = new OrdinalStringFrozenDictionary_Full<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff);
}
}

Expand Down
Expand Up @@ -36,8 +36,7 @@ private FrozenHashTable(int[] hashCodes, Bucket[] buckets, ulong fastModMultipli
}

/// <summary>Initializes a frozen hash table.</summary>
/// <param name="entriesLength">The number of entries to track from the hash table.</param>
/// <param name="hashAtIndex">A delegate that produces a hash code for a given entry. It's passed the index of the entry and returns that entry's hash code.</param>
/// <param name="hashCodes">Pre-calculated hash codes.</param>
/// <param name="storeDestIndexFromSrcIndex">A delegate that assigns the index to a specific entry. It's passed the destination and source indices.</param>
/// <param name="optimizeForReading">true to spend additional effort tuning for subsequent read speed on the table; false to prioritize construction time.</param>
/// <remarks>
Expand All @@ -48,18 +47,8 @@ private FrozenHashTable(int[] hashCodes, Bucket[] buckets, ulong fastModMultipli
/// then uses this index to reference individual entries by indexing into <see cref="HashCodes"/>.
/// </remarks>
/// <returns>A frozen hash table.</returns>
public static FrozenHashTable Create(int entriesLength, Func<int, int> hashAtIndex, Action<int, int> storeDestIndexFromSrcIndex, bool optimizeForReading = true)
public static FrozenHashTable Create(ReadOnlySpan<int> hashCodes, Action<int, int> storeDestIndexFromSrcIndex, bool optimizeForReading = true)
{
Debug.Assert(entriesLength != 0);

// Calculate the hashcodes for every entry.
int[] arrayPoolHashCodes = ArrayPool<int>.Shared.Rent(entriesLength);
Span<int> hashCodes = arrayPoolHashCodes.AsSpan(0, entriesLength);
for (int i = 0; i < entriesLength; i++)
{
hashCodes[i] = hashAtIndex(i);
}

// Determine how many buckets to use. This might be fewer than the number of entries
// if any entries have identical hashcodes (not just different hashcodes that might
// map to the same bucket).
Expand Down Expand Up @@ -124,7 +113,6 @@ public static FrozenHashTable Create(int entriesLength, Func<int, int> hashAtInd
Debug.Assert(count == hashtableHashcodes.Length);

ArrayPool<int>.Shared.Return(arrayPoolBuckets);
ArrayPool<int>.Shared.Return(arrayPoolHashCodes);

return new FrozenHashTable(hashtableHashcodes, hashtableBuckets, fastModMultiplier);
}
Expand Down
@@ -1,6 +1,7 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.CompilerServices;
Expand All @@ -27,10 +28,18 @@ internal Int32FrozenDictionary(Dictionary<int, TValue> source) : base(EqualityCo

_values = new TValue[entries.Length];

int[] arrayPoolHashCodes = ArrayPool<int>.Shared.Rent(entries.Length);
Span<int> hashCodes = arrayPoolHashCodes.AsSpan(0, entries.Length);
for (int i = 0; i < entries.Length; i++)
{
hashCodes[i] = entries[i].Key;
}

_hashTable = FrozenHashTable.Create(
entries.Length,
index => entries[index].Key,
hashCodes,
(destIndex, srcIndex) => _values[destIndex] = entries[srcIndex].Value);

ArrayPool<int>.Shared.Return(arrayPoolHashCodes);
}

/// <inheritdoc />
Expand Down
Expand Up @@ -26,9 +26,8 @@ internal Int32FrozenSet(HashSet<int> source) : base(EqualityComparer<int>.Defaul
source.CopyTo(entries);

_hashTable = FrozenHashTable.Create(
count,
index => entries[index],
delegate { });
new ReadOnlySpan<int>(entries, 0, count),
static delegate { });

ArrayPool<int>.Shared.Return(entries);
}
Expand Down
@@ -1,6 +1,7 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Collections.Generic;
using System.Diagnostics;

Expand All @@ -22,11 +23,19 @@ protected ItemsFrozenSet(HashSet<T> source, bool optimizeForReading = true) : ba

_items = new T[entries.Length];

int[] arrayPoolHashCodes = ArrayPool<int>.Shared.Rent(entries.Length);
Span<int> hashCodes = arrayPoolHashCodes.AsSpan(0, entries.Length);
for (int i = 0; i < entries.Length; i++)
{
hashCodes[i] = entries[i] is T t ? Comparer.GetHashCode(t) : 0;
}

_hashTable = FrozenHashTable.Create(
entries.Length,
index => entries[index] is T t ? Comparer.GetHashCode(t) : 0,
hashCodes,
(destIndex, srcIndex) => _items[destIndex] = entries[srcIndex],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not entirely familiar with the implementation, but curious why we're doing delayed initialization for the _items array?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not delayed, it's happening during the call to Create, which is helping to put the values in the right spot.

optimizeForReading);

ArrayPool<int>.Shared.Return(arrayPoolHashCodes);
}

/// <inheritdoc />
Expand Down
@@ -1,6 +1,7 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Collections.Generic;
using System.Diagnostics;

Expand All @@ -24,15 +25,23 @@ protected KeysAndValuesFrozenDictionary(Dictionary<TKey, TValue> source, bool op
_keys = new TKey[entries.Length];
_values = new TValue[entries.Length];

int[] arrayPoolHashCodes = ArrayPool<int>.Shared.Rent(entries.Length);
Span<int> hashCodes = arrayPoolHashCodes.AsSpan(0, entries.Length);
for (int i = 0; i < entries.Length; i++)
{
hashCodes[i] = Comparer.GetHashCode(entries[i].Key);
}

_hashTable = FrozenHashTable.Create(
entries.Length,
index => Comparer.GetHashCode(entries[index].Key),
hashCodes,
(destIndex, srcIndex) =>
{
_keys[destIndex] = entries[srcIndex].Key;
_values[destIndex] = entries[srcIndex].Value;
},
optimizeForReading);

ArrayPool<int>.Shared.Return(arrayPoolHashCodes);
}

/// <inheritdoc />
Expand Down
Expand Up @@ -225,7 +225,7 @@ private static bool HasSufficientUniquenessFactor(HashSet<string> set, ReadOnlyS
{
set.Clear();

// SufficientUniquenessFactor of 95% is good enough.
// Sufficient uniqueness factor of 95% is good enough.
// Instead of ensuring that 95% of data is good, we stop when we know that at least 5% is bad.
int acceptableNonUniqueCount = uniqueStrings.Length / 20;

Expand Down
Expand Up @@ -36,25 +36,25 @@ internal sealed class LengthBucketsFrozenDictionary<TValue> : FrozenDictionary<s
}

internal static LengthBucketsFrozenDictionary<TValue>? CreateLengthBucketsFrozenDictionaryIfAppropriate(
Dictionary<string, TValue> source, IEqualityComparer<string> comparer, int minLength, int maxLength, string[] keys)
string[] keys, TValue[] values, IEqualityComparer<string> comparer, int minLength, int maxLength)
{
Debug.Assert(source.Count != 0);
Debug.Assert(keys.Length != 0 && keys.Length == values.Length);
Debug.Assert(comparer == EqualityComparer<string>.Default || comparer == StringComparer.Ordinal || comparer == StringComparer.OrdinalIgnoreCase);
Debug.Assert(minLength >= 0 && maxLength >= minLength);

// If without even looking at the keys we know that some bucket will exceed the max per-bucket
// limit (pigeon hole principle), we can early-exit out without doing any further work.
int spread = maxLength - minLength + 1;
if (source.Count / spread > MaxPerLength)
if (keys.Length / spread > MaxPerLength)
{
return null;
}

// Iterate through all of the inputs, bucketing them based on the length of the string.
var groupedByLength = new Dictionary<int, List<KeyValuePair<string, TValue>>>();
foreach (KeyValuePair<string, TValue> pair in source)
for (int i = 0; i < keys.Length; i++)
{
string s = pair.Key;
string s = keys[i];
Debug.Assert(s.Length >= minLength && s.Length <= maxLength);

#if NET6_0_OR_GREATER
Expand All @@ -72,7 +72,7 @@ internal sealed class LengthBucketsFrozenDictionary<TValue> : FrozenDictionary<s
return null;
}

list.Add(pair);
list.Add(new KeyValuePair<string, TValue>(s, values[i]));
}

// If there would be too much empty space in the lookup array, bail.
Expand All @@ -81,7 +81,6 @@ internal sealed class LengthBucketsFrozenDictionary<TValue> : FrozenDictionary<s
return null;
}

var values = new TValue[keys.Length];
var lengthBuckets = new KeyValuePair<string, int>[spread][];

// Iterate through each bucket, filling the keys/values arrays, and creating a lookup array such that
Expand Down