diff --git a/BitFaster.Caching.Benchmarks/Lfu/SketchReset.cs b/BitFaster.Caching.Benchmarks/Lfu/SketchReset.cs new file mode 100644 index 00000000..0c774061 --- /dev/null +++ b/BitFaster.Caching.Benchmarks/Lfu/SketchReset.cs @@ -0,0 +1,79 @@ + +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Jobs; + +namespace BitFaster.Caching.Benchmarks.Lfu +{ + [SimpleJob(RuntimeMoniker.Net60)] + public class SketchReset + { + static long ResetMask = 0x7777777777777777L; + static long OneMask = 0x1111111111111111L; + + long[] table; + + [Params(4, 128, 8192, 1048576)] + public int Size { get; set; } + + [GlobalSetup] + public void Setup() + { + table = new long[Size]; + } + + [Benchmark(Baseline = true)] + public int Reset1() + { + int count = 0; + for (int i = 0; i < table.Length; i++) + { + count += BitOps.BitCount(table[i] & OneMask); + table[i] = (long)((ulong)table[i] >> 1) & ResetMask; + } + + return count; + } + + [Benchmark()] + public int Reset2() + { + int count0 = 0; + int count1 = 0; + + for (int i = 0; i < table.Length; i += 2) + { + count0 += BitOps.BitCount(table[i] & OneMask); + count1 += BitOps.BitCount(table[i + 1] & OneMask); + + table[i] = (long)((ulong)table[i] >> 1) & ResetMask; + table[i + 1] = (long)((ulong)table[i + 1] >> 1) & ResetMask; + } + + return count0 + count1; + } + + [Benchmark()] + public int Reset4() + { + int count0 = 0; + int count1 = 0; + int count2 = 0; + int count3 = 0; + + for (int i = 0; i < table.Length; i += 4) + { + count0 += BitOps.BitCount(table[i] & OneMask); + count1 += BitOps.BitCount(table[i + 1] & OneMask); + count2 += BitOps.BitCount(table[i + 2] & OneMask); + count3 += BitOps.BitCount(table[i + 3] & OneMask); + + table[i] = (long)((ulong)table[i] >> 1) & ResetMask; + table[i + 1] = (long)((ulong)table[i + 1] >> 1) & ResetMask; + table[i + 2] = (long)((ulong)table[i + 2] >> 1) & ResetMask; + table[i + 3] = (long)((ulong)table[i + 3] >> 1) & ResetMask; + } + + return (count0 + count1) + (count2 + count3); + } + } +} diff --git a/BitFaster.Caching/Lfu/CmSketch.cs b/BitFaster.Caching/Lfu/CmSketch.cs index b0785be5..c55030f9 100644 --- a/BitFaster.Caching/Lfu/CmSketch.cs +++ b/BitFaster.Caching/Lfu/CmSketch.cs @@ -158,19 +158,37 @@ private bool IncrementAt(int i, int j) private void Reset() { - int count = 0; - for (int i = 0; i < table.Length; i++) + // unroll, almost 2x faster + int count0 = 0; + int count1 = 0; + int count2 = 0; + int count3 = 0; + + for (int i = 0; i < table.Length; i += 4) { - count += BitOps.BitCount(table[i] & OneMask); + count0 += BitOps.BitCount(table[i] & OneMask); + count1 += BitOps.BitCount(table[i + 1] & OneMask); + count2 += BitOps.BitCount(table[i + 2] & OneMask); + count3 += BitOps.BitCount(table[i + 3] & OneMask); + table[i] = (long)((ulong)table[i] >> 1) & ResetMask; + table[i + 1] = (long)((ulong)table[i + 1] >> 1) & ResetMask; + table[i + 2] = (long)((ulong)table[i + 2] >> 1) & ResetMask; + table[i + 3] = (long)((ulong)table[i + 3] >> 1) & ResetMask; } - size = (size - (count >> 2)) >> 1; + + count0 = (count0 + count1) + (count2 + count3); + + size = (size - (count0 >> 2)) >> 1; } private void EnsureCapacity(long maximumSize) { int maximum = (int)Math.Min(maximumSize, int.MaxValue >> 1); + // clamp to 4 as min size + maximum = Math.Max(4, maximum); + table = new long[(maximum == 0) ? 1 : BitOps.CeilingPowerOfTwo(maximum)]; tableMask = Math.Max(0, table.Length - 1); sampleSize = (maximumSize == 0) ? 10 : (10 * maximum);