Skip to content

Commit 835596d

Browse files
authored
sketch comparison (#297)
* flat bench * benmethod * Revert "benmethod"
1 parent 3d500e9 commit 835596d

File tree

4 files changed

+400
-22
lines changed

4 files changed

+400
-22
lines changed

BitFaster.Caching.Benchmarks/BitFaster.Caching.Benchmarks.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
<PropertyGroup>
44
<OutputType>Exe</OutputType>
55
<TargetFrameworks>net48;net6.0</TargetFrameworks>
6+
<AllowUnsafeBlocks>True</AllowUnsafeBlocks>
67
</PropertyGroup>
78

89
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
Lines changed: 320 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,320 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using System.Text;
5+
using System.Threading.Tasks;
6+
7+
#if NETCOREAPP3_1_OR_GREATER
8+
using System.Runtime.Intrinsics;
9+
using System.Runtime.Intrinsics.X86;
10+
#endif
11+
12+
namespace BitFaster.Caching.Benchmarks.Lfu
13+
{
14+
internal class CmSketchFlat<T, I> where I : struct, IsaProbe
15+
{
16+
// A mixture of seeds from FNV-1a, CityHash, and Murmur3
17+
private static readonly ulong[] Seed = { 0xc3a5c85c97cb3127L, 0xb492b66fbe98f273L, 0x9ae16a3b2f90404fL, 0xcbf29ce484222325L };
18+
private static readonly long ResetMask = 0x7777777777777777L;
19+
private static readonly long OneMask = 0x1111111111111111L;
20+
21+
private int sampleSize;
22+
private int tableMask;
23+
private long[] table;
24+
private int size;
25+
26+
private readonly IEqualityComparer<T> comparer;
27+
28+
/// <summary>
29+
/// Initializes a new instance of the CmSketch class with the specified maximum size and equality comparer.
30+
/// </summary>
31+
/// <param name="maximumSize">The maximum size.</param>
32+
/// <param name="comparer">The equality comparer.</param>
33+
public CmSketchFlat(long maximumSize, IEqualityComparer<T> comparer)
34+
{
35+
EnsureCapacity(maximumSize);
36+
this.comparer = comparer;
37+
}
38+
39+
/// <summary>
40+
/// Gets the reset sample size.
41+
/// </summary>
42+
public int ResetSampleSize => this.sampleSize;
43+
44+
/// <summary>
45+
/// Gets the size.
46+
/// </summary>
47+
public int Size => this.size;
48+
49+
/// <summary>
50+
/// Estimate the frequency of the specified value.
51+
/// </summary>
52+
/// <param name="value">The value.</param>
53+
/// <returns>The estimated frequency of the value.</returns>
54+
public int EstimateFrequency(T value)
55+
{
56+
#if !NETCOREAPP3_1_OR_GREATER
57+
return EstimateFrequencyStd(value);
58+
#else
59+
60+
I isa = default;
61+
62+
if (isa.IsAvx2Supported)
63+
{
64+
return EstimateFrequencyAvx(value);
65+
}
66+
else
67+
{
68+
return EstimateFrequencyStd(value);
69+
}
70+
#endif
71+
}
72+
73+
/// <summary>
74+
/// Increment the count of the specified value.
75+
/// </summary>
76+
/// <param name="value">The value.</param>
77+
public void Increment(T value)
78+
{
79+
#if !NETCOREAPP3_1_OR_GREATER
80+
IncrementStd(value);
81+
#else
82+
83+
I isa = default;
84+
85+
if (isa.IsAvx2Supported)
86+
{
87+
IncrementAvx(value);
88+
}
89+
else
90+
{
91+
IncrementStd(value);
92+
}
93+
#endif
94+
}
95+
96+
/// <summary>
97+
/// Clears the count for all items.
98+
/// </summary>
99+
public void Clear()
100+
{
101+
table = new long[table.Length];
102+
size = 0;
103+
}
104+
105+
private int EstimateFrequencyStd(T value)
106+
{
107+
int hash = Spread(comparer.GetHashCode(value));
108+
109+
int start = (hash & 3) << 2;
110+
int frequency = int.MaxValue;
111+
112+
for (int i = 0; i < 4; i++)
113+
{
114+
int index = IndexOf(hash, i);
115+
int count = (int)(((ulong)table[index] >> ((start + i) << 2)) & 0xfL);
116+
frequency = Math.Min(frequency, count);
117+
}
118+
return frequency;
119+
}
120+
121+
private void IncrementStd(T value)
122+
{
123+
int hash = Spread(comparer.GetHashCode(value));
124+
int start = (hash & 3) << 2;
125+
126+
// Loop unrolling improves throughput by 5m ops/s
127+
int index0 = IndexOf(hash, 0);
128+
int index1 = IndexOf(hash, 1);
129+
int index2 = IndexOf(hash, 2);
130+
int index3 = IndexOf(hash, 3);
131+
132+
bool added = IncrementAt(index0, start);
133+
added |= IncrementAt(index1, start + 1);
134+
added |= IncrementAt(index2, start + 2);
135+
added |= IncrementAt(index3, start + 3);
136+
137+
if (added && (++size == sampleSize))
138+
{
139+
Reset();
140+
}
141+
}
142+
143+
private bool IncrementAt(int i, int j)
144+
{
145+
int offset = j << 2;
146+
long mask = (0xfL << offset);
147+
if ((table[i] & mask) != mask)
148+
{
149+
table[i] += (1L << offset);
150+
return true;
151+
}
152+
return false;
153+
}
154+
155+
private void Reset()
156+
{
157+
// unroll, almost 2x faster
158+
int count0 = 0;
159+
int count1 = 0;
160+
int count2 = 0;
161+
int count3 = 0;
162+
163+
for (int i = 0; i < table.Length; i += 4)
164+
{
165+
count0 += BitOps.BitCount(table[i] & OneMask);
166+
count1 += BitOps.BitCount(table[i + 1] & OneMask);
167+
count2 += BitOps.BitCount(table[i + 2] & OneMask);
168+
count3 += BitOps.BitCount(table[i + 3] & OneMask);
169+
170+
table[i] = (long)((ulong)table[i] >> 1) & ResetMask;
171+
table[i + 1] = (long)((ulong)table[i + 1] >> 1) & ResetMask;
172+
table[i + 2] = (long)((ulong)table[i + 2] >> 1) & ResetMask;
173+
table[i + 3] = (long)((ulong)table[i + 3] >> 1) & ResetMask;
174+
}
175+
176+
count0 = (count0 + count1) + (count2 + count3);
177+
178+
size = (size - (count0 >> 2)) >> 1;
179+
}
180+
181+
private void EnsureCapacity(long maximumSize)
182+
{
183+
int maximum = (int)Math.Min(maximumSize, int.MaxValue >> 1);
184+
185+
// clamp to 4 as min size
186+
maximum = Math.Max(4, maximum);
187+
188+
table = new long[(maximum == 0) ? 1 : BitOps.CeilingPowerOfTwo(maximum)];
189+
tableMask = Math.Max(0, table.Length - 1);
190+
sampleSize = (maximumSize == 0) ? 10 : (10 * maximum);
191+
192+
size = 0;
193+
}
194+
195+
private int IndexOf(int item, int i)
196+
{
197+
ulong hash = ((ulong)item + Seed[i]) * Seed[i];
198+
hash += (hash >> 32);
199+
return ((int)hash) & tableMask;
200+
}
201+
202+
private int Spread(int x)
203+
{
204+
uint y = (uint)x;
205+
y = ((y >> 16) ^ y) * 0x45d9f3b;
206+
y = ((y >> 16) ^ y) * 0x45d9f3b;
207+
return (int)((y >> 16) ^ y);
208+
}
209+
210+
#if NETCOREAPP3_1_OR_GREATER
211+
private unsafe int EstimateFrequencyAvx(T value)
212+
{
213+
int hash = Spread(comparer.GetHashCode(value));
214+
int start = (hash & 3) << 2;
215+
216+
fixed (long* tablePtr = &table[0])
217+
{
218+
var tableVector = Avx2.GatherVector256(tablePtr, IndexesOfAvx(hash), 8).AsUInt64();
219+
220+
Vector256<ulong> starts = Vector256.Create(0UL, 1UL, 2UL, 3UL);
221+
starts = Avx2.Add(starts, Vector256.Create((ulong)start));
222+
starts = Avx2.ShiftLeftLogical(starts, 2);
223+
224+
tableVector = Avx2.ShiftRightLogicalVariable(tableVector, starts);
225+
tableVector = Avx2.And(tableVector, Vector256.Create(0xfUL));
226+
227+
Vector256<int> permuteMask = Vector256.Create(0, 2, 4, 6, 1, 3, 5, 7);
228+
Vector128<ushort> lower = Avx2.PermuteVar8x32(tableVector.AsInt32(), permuteMask)
229+
.GetLower()
230+
.AsUInt16();
231+
232+
// set the zeroed high parts of the long value to ushort.Max
233+
var masked = Avx2.Blend(lower, Vector128.Create(ushort.MaxValue), 0b10101010);
234+
return Avx2.MinHorizontal(masked).GetElement(0);
235+
}
236+
}
237+
238+
private unsafe void IncrementAvx(T value)
239+
{
240+
int hash = Spread(comparer.GetHashCode(value));
241+
int start = (hash & 3) << 2;
242+
243+
Vector128<int> indexes = IndexesOfAvx(hash);
244+
245+
fixed (long* tablePtr = &table[0])
246+
{
247+
var tableVector = Avx2.GatherVector256(tablePtr, indexes, 8);
248+
249+
// offset = j << 2, where j [start+0, start+1, start+2, start+3]
250+
Vector256<ulong> offset = Vector256.Create((ulong)start);
251+
Vector256<ulong> add = Vector256.Create(0UL, 1UL, 2UL, 3UL);
252+
offset = Avx2.Add(offset, add);
253+
offset = Avx2.ShiftLeftLogical(offset, 2);
254+
255+
// mask = (0xfL << offset)
256+
Vector256<long> fifteen = Vector256.Create(0xfL);
257+
Vector256<long> mask = Avx2.ShiftLeftLogicalVariable(fifteen, offset);
258+
259+
// (table[i] & mask) != mask)
260+
// Note masked is 'equal' - therefore use AndNot below
261+
Vector256<long> masked = Avx2.CompareEqual(Avx2.And(tableVector, mask), mask);
262+
263+
// 1L << offset
264+
Vector256<long> inc = Avx2.ShiftLeftLogicalVariable(Vector256.Create(1L), offset);
265+
266+
// Mask to zero out non matches (add zero below) - first operand is NOT then AND result (order matters)
267+
inc = Avx2.AndNot(masked, inc);
268+
269+
*(tablePtr + indexes.GetElement(0)) += inc.GetElement(0);
270+
*(tablePtr + indexes.GetElement(1)) += inc.GetElement(1);
271+
*(tablePtr + indexes.GetElement(2)) += inc.GetElement(2);
272+
*(tablePtr + indexes.GetElement(3)) += inc.GetElement(3);
273+
274+
Vector256<byte> result = Avx2.CompareEqual(masked.AsByte(), Vector256.Create(0).AsByte());
275+
bool wasInc = Avx2.MoveMask(result.AsByte()) == unchecked((int)(0b1111_1111_1111_1111_1111_1111_1111_1111));
276+
277+
if (wasInc && (++size == sampleSize))
278+
{
279+
Reset();
280+
}
281+
}
282+
}
283+
284+
private Vector128<int> IndexesOfAvx(int item)
285+
{
286+
Vector256<ulong> VectorSeed = Vector256.Create(0xc3a5c85c97cb3127L, 0xb492b66fbe98f273L, 0x9ae16a3b2f90404fL, 0xcbf29ce484222325L);
287+
Vector256<ulong> hash = Vector256.Create((ulong)item);
288+
hash = Avx2.Add(hash, VectorSeed);
289+
hash = Multiply(hash, VectorSeed);
290+
291+
Vector256<ulong> shift = Vector256.Create(32UL);
292+
Vector256<ulong> shifted = Avx2.ShiftRightLogicalVariable(hash, shift);
293+
hash = Avx2.Add(hash, shifted);
294+
295+
// Move [a1, a2, b1, b2, c1, c2, d1, d2]
296+
// To [a1, b1, c1, d1, a2, b2, c2, d2]
297+
// then GetLower() [a1, b1, c1, d1]
298+
Vector256<int> permuteMask = Vector256.Create(0, 2, 4, 6, 1, 3, 5, 7);
299+
Vector128<int> f = Avx2.PermuteVar8x32(hash.AsInt32(), permuteMask)
300+
.GetLower();
301+
302+
Vector128<int> maskVector = Vector128.Create(tableMask);
303+
return Avx2.And(f, maskVector);
304+
}
305+
306+
// taken from Agner Fog's vector library, see https://github.com/vectorclass/version2, vectori256.h
307+
private static Vector256<ulong> Multiply(Vector256<ulong> a, Vector256<ulong> b)
308+
{
309+
// instruction does not exist. Split into 32-bit multiplies
310+
Vector256<int> bswap = Avx2.Shuffle(b.AsInt32(), 0xB1); // swap H<->L
311+
Vector256<int> prodlh = Avx2.MultiplyLow(a.AsInt32(), bswap); // 32 bit L*H products
312+
Vector256<int> zero = Vector256.Create(0); // 0
313+
Vector256<int> prodlh2 = Avx2.HorizontalAdd(prodlh, zero); // a0Lb0H+a0Hb0L,a1Lb1H+a1Hb1L,0,0
314+
Vector256<int> prodlh3 = Avx2.Shuffle(prodlh2, 0x73); // 0, a0Lb0H+a0Hb0L, 0, a1Lb1H+a1Hb1L
315+
Vector256<ulong> prodll = Avx2.Multiply(a.AsUInt32(), b.AsUInt32()); // a0Lb0L,a1Lb1L, 64 bit unsigned products
316+
return Avx2.Add(prodll.AsInt64(), prodlh3.AsInt64()).AsUInt64(); // a0Lb0L+(a0Lb0H+a0Hb0L)<<32, a1Lb1L+(a1Lb1H+a1Hb1L)<<32
317+
}
318+
#endif
319+
}
320+
}

0 commit comments

Comments
 (0)