From c296b562b81145bd73d4104d78367af931034d9d Mon Sep 17 00:00:00 2001 From: alexpeck Date: Sun, 14 Jun 2020 19:25:55 -0700 Subject: [PATCH 1/3] zipf distribution --- .../BitFaster.Caching.Benchmarks.csproj | 5 ++ .../Lru/ZipDistribution.cs | 89 +++++++++++++++++++ BitFaster.Caching.Benchmarks/Program.cs | 2 +- BitFaster.Sampling/BitFaster.Sampling.csproj | 20 +++++ BitFaster.Sampling/Program.cs | 57 ++++++++++++ BitFaster.sln | 6 ++ README.md | 22 ++++- 7 files changed, 198 insertions(+), 3 deletions(-) create mode 100644 BitFaster.Caching.Benchmarks/Lru/ZipDistribution.cs create mode 100644 BitFaster.Sampling/BitFaster.Sampling.csproj create mode 100644 BitFaster.Sampling/Program.cs diff --git a/BitFaster.Caching.Benchmarks/BitFaster.Caching.Benchmarks.csproj b/BitFaster.Caching.Benchmarks/BitFaster.Caching.Benchmarks.csproj index ca54f2dd..6a31dca0 100644 --- a/BitFaster.Caching.Benchmarks/BitFaster.Caching.Benchmarks.csproj +++ b/BitFaster.Caching.Benchmarks/BitFaster.Caching.Benchmarks.csproj @@ -5,8 +5,13 @@ netcoreapp3.1 + + 1701;1702;CS8002 + + + diff --git a/BitFaster.Caching.Benchmarks/Lru/ZipDistribution.cs b/BitFaster.Caching.Benchmarks/Lru/ZipDistribution.cs new file mode 100644 index 00000000..d69e1321 --- /dev/null +++ b/BitFaster.Caching.Benchmarks/Lru/ZipDistribution.cs @@ -0,0 +1,89 @@ +using System; +using System.Collections.Generic; +using System.Text; +using BenchmarkDotNet.Attributes; +using BitFaster.Caching.Lru; +using MathNet.Numerics.Distributions; +using MathNet.Numerics.Random; + +namespace BitFaster.Caching.Benchmarks.Lru +{ + public class ZipDistribution + { + const double s = 0.86; + const int n = 500; + const int sampleCount = 1000; + private static int[] samples; + + const int concurrencyLevel = 1; + const int cacheSize = 50; // 10% cache size + + private static readonly ClassicLru classicLru = new ClassicLru(concurrencyLevel, cacheSize, EqualityComparer.Default); + private static readonly ConcurrentLru concurrentLru = new ConcurrentLru(concurrencyLevel, cacheSize, EqualityComparer.Default); + private static readonly ConcurrentTLru concurrentTlru = new ConcurrentTLru(concurrencyLevel, cacheSize, EqualityComparer.Default, TimeSpan.FromMinutes(10)); + private static readonly FastConcurrentLru fastConcurrentLru = new FastConcurrentLru(concurrencyLevel, cacheSize, EqualityComparer.Default); + private static readonly FastConcurrentTLru fastConcurrentTLru = new FastConcurrentTLru(concurrencyLevel, cacheSize, EqualityComparer.Default, TimeSpan.FromMinutes(1)); + + [GlobalSetup] + public void GlobalSetup() + { + samples = new int[sampleCount]; + Zipf.Samples(samples, s, n); + } + + [Benchmark(Baseline = true, OperationsPerInvoke = sampleCount)] + public void ClassicLru() + { + Func func = x => x; + + for (int i = 0; i < sampleCount; i++) + { + classicLru.GetOrAdd(samples[i], func); + } + } + + [Benchmark(OperationsPerInvoke = sampleCount)] + public void FastConcurrentLru() + { + Func func = x => x; + + for (int i = 0; i < sampleCount; i++) + { + fastConcurrentLru.GetOrAdd(samples[i], func); + } + } + + [Benchmark(OperationsPerInvoke = sampleCount)] + public void ConcurrentLru() + { + Func func = x => x; + + for (int i = 0; i < sampleCount; i++) + { + concurrentLru.GetOrAdd(samples[i], func); + } + } + + [Benchmark(OperationsPerInvoke = sampleCount)] + public void FastConcurrentTLru() + { + Func func = x => x; + + for (int i = 0; i < sampleCount; i++) + { + fastConcurrentTLru.GetOrAdd(samples[i], func); + } + } + + [Benchmark(OperationsPerInvoke = sampleCount)] + public void ConcurrentTLru() + { + Func func = x => x; + + for (int i = 0; i < sampleCount; i++) + { + concurrentTlru.GetOrAdd(samples[i], func); + } + } + } +} diff --git a/BitFaster.Caching.Benchmarks/Program.cs b/BitFaster.Caching.Benchmarks/Program.cs index aa66d4ed..317eb34e 100644 --- a/BitFaster.Caching.Benchmarks/Program.cs +++ b/BitFaster.Caching.Benchmarks/Program.cs @@ -15,7 +15,7 @@ class Program static void Main(string[] args) { var summary = BenchmarkRunner - .Run(ManualConfig.Create(DefaultConfig.Instance) + .Run(ManualConfig.Create(DefaultConfig.Instance) .AddJob(Job.RyuJitX64)); } } diff --git a/BitFaster.Sampling/BitFaster.Sampling.csproj b/BitFaster.Sampling/BitFaster.Sampling.csproj new file mode 100644 index 00000000..71495a29 --- /dev/null +++ b/BitFaster.Sampling/BitFaster.Sampling.csproj @@ -0,0 +1,20 @@ + + + + Exe + netcoreapp3.1 + + + + 1701;1702;CS8002 + + + + + + + + + + + diff --git a/BitFaster.Sampling/Program.cs b/BitFaster.Sampling/Program.cs new file mode 100644 index 00000000..3f3bec04 --- /dev/null +++ b/BitFaster.Sampling/Program.cs @@ -0,0 +1,57 @@ +using System; +using System.Collections.Generic; +using BitFaster.Caching.Lru; +using MathNet.Numerics.Distributions; + +namespace BitFaster.Sampling +{ + class Program + { + // Test methodolopy from 2Q paper: + // http://www.vldb.org/conf/1994/P439.PDF + + // s = 0.5 and s = 0.86. + // If there are N items, the probability of accessing an item numbered i or less is (i / N)^s. + // A setting of (s = 0.86 gives an 80 / 20 distribution, while a setting of (s = 0.5 give a less skewed + // distribution (about 45 / 20). + const double s = 0.86; + // const double s = 0.5; + + // Took 1 million samples + const int sampleCount = 20000; + + // We simulated a database of 50,000 pages and + // buffer sizes ranging from 2,500 (5%) items to 20,000 + // (40%) items. + const int n = 50000; + + const double cacheSizeRatio = 0.05; + + const int cacheSize = (int)(n * cacheSizeRatio); + + static void Main(string[] args) + { + Console.WriteLine($"Generating Zipfan distribution with {sampleCount} samples, s = {s}, N = {n}"); + + var samples = new int[sampleCount]; + Zipf.Samples(samples, s, n); + + var concurrentLru = new ConcurrentLru(1, cacheSize, EqualityComparer.Default); + var classicLru = new ClassicLru(1, cacheSize, EqualityComparer.Default); + + Func func = x => x; + Console.WriteLine($"Running {sampleCount} iterations"); + + for (int i = 0; i < sampleCount; i++) + { + concurrentLru.GetOrAdd(samples[i], func); + classicLru.GetOrAdd(samples[i], func); + } + + Console.WriteLine($"ConcurrentLru hit ratio {concurrentLru.HitRatio * 100.0}%"); + Console.WriteLine($"ClassicLru hit ratio {classicLru.HitRatio * 100.0}%"); + + Console.ReadLine(); + } + } +} diff --git a/BitFaster.sln b/BitFaster.sln index e58b6e73..1c48cf2f 100644 --- a/BitFaster.sln +++ b/BitFaster.sln @@ -14,6 +14,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BitFaster.Caching.UnitTests EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BitFaster.Caching.Benchmarks", "BitFaster.Caching.Benchmarks\BitFaster.Caching.Benchmarks.csproj", "{8CDE3FA5-B08A-4375-9EF0-F1F044B841C4}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BitFaster.Sampling", "BitFaster.Sampling\BitFaster.Sampling.csproj", "{EAAE8DD3-EA1C-4BDF-920B-A0C858E853CB}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -32,6 +34,10 @@ Global {8CDE3FA5-B08A-4375-9EF0-F1F044B841C4}.Debug|Any CPU.Build.0 = Debug|Any CPU {8CDE3FA5-B08A-4375-9EF0-F1F044B841C4}.Release|Any CPU.ActiveCfg = Release|Any CPU {8CDE3FA5-B08A-4375-9EF0-F1F044B841C4}.Release|Any CPU.Build.0 = Release|Any CPU + {EAAE8DD3-EA1C-4BDF-920B-A0C858E853CB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {EAAE8DD3-EA1C-4BDF-920B-A0C858E853CB}.Debug|Any CPU.Build.0 = Debug|Any CPU + {EAAE8DD3-EA1C-4BDF-920B-A0C858E853CB}.Release|Any CPU.ActiveCfg = Release|Any CPU + {EAAE8DD3-EA1C-4BDF-920B-A0C858E853CB}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/README.md b/README.md index 2b61dcd2..c50e0928 100644 --- a/README.md +++ b/README.md @@ -77,9 +77,27 @@ Intel Core i7-5600U CPU 2.60GHz (Broadwell), 1 CPU, 4 logical and 2 physical cor Job=RyuJitX64 Jit=RyuJit Platform=X64 ~~~ -### Lookup speed +### Lookup keys with a Zipf distribution -Cache contains 6 items which are fetched repeatedly, no items are evicted. Representative of high hit rate scenario, when there are a low number of hot items. +Take 1000 samples of a [Zipfan distribution](https://en.wikipedia.org/wiki/Zipf%27s_law) over a set of keys of size $N$ and use the keys to lookup values in the cache. If there are $N$ items, the probability of accessing an item numbered $i$ or less is $(i / N)^s$. + +$s$ = 0.86 (yields approx 80/20 distribution) +$N$ = 500 + +Cache size = $N$ / 10 (so we can cache 10% of the total set). ConcurrentLru has approximately the same performance as ClassicLru in this single threaded test. + + +| Method | Mean | Error | StdDev | Ratio | RatioSD | +|------------------- |---------:|--------:|--------:|------:|--------:| +| ClassicLru | 176.1 ns | 2.74 ns | 2.56 ns | 1.00 | 0.00 | +| FastConcurrentLru | 178.0 ns | 2.76 ns | 2.45 ns | 1.01 | 0.02 | +| ConcurrentLru | 185.2 ns | 1.87 ns | 1.56 ns | 1.06 | 0.01 | +| FastConcurrentTLru | 435.7 ns | 2.88 ns | 2.41 ns | 2.48 | 0.03 | +| ConcurrentTLru | 425.1 ns | 8.46 ns | 7.91 ns | 2.41 | 0.07 | + +### Raw Lookup speed + +In this test the same items are fetched repeatedly, no items are evicted. Representative of high hit rate scenario, when there are a low number of hot items. - ConcurrentLru family does not move items in the queues, it is just marking as accessed for pure cache hits. - ClassicLru must maintain item order, and is internally splicing the fetched item to the head of the linked list. From aed4bdc1638be01569430fe68770f69662d56772 Mon Sep 17 00:00:00 2001 From: alexpeck Date: Sun, 14 Jun 2020 19:35:35 -0700 Subject: [PATCH 2/3] reformat --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index c50e0928..46cfcf3c 100644 --- a/README.md +++ b/README.md @@ -79,12 +79,12 @@ Job=RyuJitX64 Jit=RyuJit Platform=X64 ### Lookup keys with a Zipf distribution -Take 1000 samples of a [Zipfan distribution](https://en.wikipedia.org/wiki/Zipf%27s_law) over a set of keys of size $N$ and use the keys to lookup values in the cache. If there are $N$ items, the probability of accessing an item numbered $i$ or less is $(i / N)^s$. +Take 1000 samples of a [Zipfan distribution](https://en.wikipedia.org/wiki/Zipf%27s_law) over a set of keys of size *N* and use the keys to lookup values in the cache. If there are $N$ items, the probability of accessing an item numbered *i* or less is (*i* / *N*)^*s*. -$s$ = 0.86 (yields approx 80/20 distribution) -$N$ = 500 +*s* = 0.86 (yields approx 80/20 distribution)
+*N* = 500 -Cache size = $N$ / 10 (so we can cache 10% of the total set). ConcurrentLru has approximately the same performance as ClassicLru in this single threaded test. +Cache size = *N* / 10 (so we can cache 10% of the total set). ConcurrentLru has approximately the same performance as ClassicLru in this single threaded test. | Method | Mean | Error | StdDev | Ratio | RatioSD | From c8b3f5256a9969ef38a86cecc8c5463e541f7e8f Mon Sep 17 00:00:00 2001 From: alexpeck Date: Sun, 14 Jun 2020 19:36:28 -0700 Subject: [PATCH 3/3] fix fomrat --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 46cfcf3c..e1a972d2 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ Job=RyuJitX64 Jit=RyuJit Platform=X64 ### Lookup keys with a Zipf distribution -Take 1000 samples of a [Zipfan distribution](https://en.wikipedia.org/wiki/Zipf%27s_law) over a set of keys of size *N* and use the keys to lookup values in the cache. If there are $N$ items, the probability of accessing an item numbered *i* or less is (*i* / *N*)^*s*. +Take 1000 samples of a [Zipfan distribution](https://en.wikipedia.org/wiki/Zipf%27s_law) over a set of keys of size *N* and use the keys to lookup values in the cache. If there are *N* items, the probability of accessing an item numbered *i* or less is (*i* / *N*)^*s*. *s* = 0.86 (yields approx 80/20 distribution)
*N* = 500