diff --git a/Lightweight.Caching.Benchmarks/Lru/LruCycle.cs b/Lightweight.Caching.Benchmarks/Lru/LruCycle.cs index b973d63c..41c72be2 100644 --- a/Lightweight.Caching.Benchmarks/Lru/LruCycle.cs +++ b/Lightweight.Caching.Benchmarks/Lru/LruCycle.cs @@ -22,7 +22,7 @@ public class LruCycle private static readonly FastConcurrentLru fastConcurrentLru = new FastConcurrentLru(8, 9, EqualityComparer.Default); private static readonly FastConcurrentTLru fastConcurrentTLru = new FastConcurrentTLru(8, 9, EqualityComparer.Default, TimeSpan.FromMinutes(1)); - private static MemoryCache memoryCache = MemoryCache.Default; + private static MemoryCache memoryCache = System.Runtime.Caching.MemoryCache.Default; [GlobalSetup] public void GlobalSetup() @@ -60,7 +60,7 @@ public void GlobalSetup() } [Benchmark(Baseline = true, OperationsPerInvoke = 24)] - public void ConcurrentDictionaryGetOrAdd() + public void ConcurrentDictionary() { Func func = x => x; dictionary.GetOrAdd(1, func); @@ -93,7 +93,7 @@ public void ConcurrentDictionaryGetOrAdd() } [Benchmark(OperationsPerInvoke = 24)] - public void FastConcurrentLruGetOrAdd() + public void FastConcurrentLru() { // size is 9, so segment size is 3. 6 items will cause queue cycling // without eviction. Hot => cold when not accessed. @@ -128,7 +128,7 @@ public void FastConcurrentLruGetOrAdd() } [Benchmark(OperationsPerInvoke = 24)] - public void ConcurrentLruGetOrAdd() + public void ConcurrentLru() { // size is 9, so segment size is 3. 6 items will cause queue cycling // without eviction. Hot => cold when not accessed. @@ -163,7 +163,7 @@ public void ConcurrentLruGetOrAdd() } [Benchmark(OperationsPerInvoke = 24)] - public void FastConcurrentTLruGetOrAdd() + public void FastConcurrentTLru() { // size is 9, so segment size is 3. 6 items will cause queue cycling // without eviction. Hot => cold when not accessed. @@ -198,7 +198,7 @@ public void FastConcurrentTLruGetOrAdd() } [Benchmark(OperationsPerInvoke = 24)] - public void ConcurrentTLruGetOrAdd() + public void ConcurrentTLru() { // size is 9, so segment size is 3. 6 items will cause queue cycling // without eviction. Hot => cold when not accessed. @@ -233,7 +233,7 @@ public void ConcurrentTLruGetOrAdd() } [Benchmark(OperationsPerInvoke = 24)] - public void ClassicLruGetOrAdd() + public void ClassicLru() { // size is 9, so segment size is 3. 6 items will cause queue cycling // without eviction. Hot => cold when not accessed. @@ -268,7 +268,7 @@ public void ClassicLruGetOrAdd() } [Benchmark(OperationsPerInvoke = 24)] - public void MemoryCacheGetStringKey() + public void MemoryCache() { memoryCache.Get("1"); memoryCache.Get("2"); diff --git a/Lightweight.Caching.Benchmarks/Lru/LruGetOrAddTest.cs b/Lightweight.Caching.Benchmarks/Lru/LruGetOrAddTest.cs index 1a6a667a..d636a6a7 100644 --- a/Lightweight.Caching.Benchmarks/Lru/LruGetOrAddTest.cs +++ b/Lightweight.Caching.Benchmarks/Lru/LruGetOrAddTest.cs @@ -38,12 +38,6 @@ public void ConcurrentDictionaryGetOrAdd() dictionary.GetOrAdd(1, func); } - //[Benchmark()] - //public DateTime DateTimeUtcNow() - //{ - // return DateTime.UtcNow; - //} - [Benchmark()] public void FastConcurrentLruGetOrAdd() { @@ -79,12 +73,6 @@ public void ClassicLruGetOrAdd() classicLru.GetOrAdd(1, func); } - //[Benchmark()] - //public void MemoryCacheGetIntKey() - //{ - // memoryCache.Get(key.ToString()); - //} - [Benchmark()] public void MemoryCacheGetStringKey() { diff --git a/Lightweight.Caching.Benchmarks/Lru/MissHitHitRemove.cs b/Lightweight.Caching.Benchmarks/Lru/MissHitHitRemove.cs new file mode 100644 index 00000000..093c2263 --- /dev/null +++ b/Lightweight.Caching.Benchmarks/Lru/MissHitHitRemove.cs @@ -0,0 +1,120 @@ +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Runtime.Caching; +using System.Text; +using BenchmarkDotNet.Attributes; +using Lightweight.Caching.Lru; + +namespace Lightweight.Caching.Benchmarks.Lru +{ + [MemoryDiagnoser] + public class MissHitHitRemove + { + const int capacity = 9; + const int arraySize = 16; + + private static readonly ConcurrentDictionary dictionary = new ConcurrentDictionary(8, capacity, EqualityComparer.Default); + + private static readonly ClassicLru classicLru = new ClassicLru(8, capacity, EqualityComparer.Default); + private static readonly ConcurrentLru concurrentLru = new ConcurrentLru(8, capacity, EqualityComparer.Default); + private static readonly ConcurrentTLru concurrentTlru = new ConcurrentTLru(8, capacity, EqualityComparer.Default, TimeSpan.FromMinutes(10)); + private static readonly FastConcurrentLru fastConcurrentLru = new FastConcurrentLru(8, capacity, EqualityComparer.Default); + private static readonly FastConcurrentTLru fastConcurrentTLru = new FastConcurrentTLru(8, capacity, EqualityComparer.Default, TimeSpan.FromMinutes(1)); + + private static MemoryCache memoryCache = System.Runtime.Caching.MemoryCache.Default; + + [Benchmark(Baseline = true)] + public void ConcurrentDictionary() + { + Func func = x => new byte[arraySize]; + dictionary.GetOrAdd(1, func); + + dictionary.GetOrAdd(1, func); + dictionary.GetOrAdd(1, func); + + dictionary.TryRemove(1, out var removed); + } + + [Benchmark()] + public void FastConcurrentLru() + { + Func func = x => new byte[arraySize]; + fastConcurrentLru.GetOrAdd(1, func); + + fastConcurrentLru.GetOrAdd(1, func); + fastConcurrentLru.GetOrAdd(1, func); + + fastConcurrentLru.TryRemove(1); + } + + [Benchmark()] + public void ConcurrentLru() + { + Func func = x => new byte[arraySize]; + concurrentLru.GetOrAdd(1, func); + + concurrentLru.GetOrAdd(1, func); + concurrentLru.GetOrAdd(1, func); + + concurrentLru.TryRemove(1); + } + + [Benchmark()] + public void FastConcurrentTlru() + { + Func func = x => new byte[arraySize]; + fastConcurrentTLru.GetOrAdd(1, func); + + fastConcurrentTLru.GetOrAdd(1, func); + fastConcurrentTLru.GetOrAdd(1, func); + + fastConcurrentTLru.TryRemove(1); + } + + [Benchmark()] + public void ConcurrentTlru() + { + Func func = x => new byte[arraySize]; + concurrentTlru.GetOrAdd(1, func); + + concurrentTlru.GetOrAdd(1, func); + concurrentTlru.GetOrAdd(1, func); + + concurrentTlru.TryRemove(1); + } + + [Benchmark()] + public void ClassicLru() + { + Func func = x => new byte[arraySize]; + classicLru.GetOrAdd(1, func); + + classicLru.GetOrAdd(1, func); + classicLru.GetOrAdd(1, func); + + classicLru.TryRemove(1); + } + + [Benchmark()] + public void MemoryCache() + { + if (memoryCache.Get("1") == null) + { + memoryCache.Set("1", new byte[arraySize], new CacheItemPolicy()); + } + + if (memoryCache.Get("1") == null) + { + memoryCache.Set("1", new byte[arraySize], new CacheItemPolicy()); + } + + if (memoryCache.Get("1") == null) + { + memoryCache.Set("1", new byte[arraySize], new CacheItemPolicy()); + } + + memoryCache.Remove("1"); + } + } +} diff --git a/Lightweight.Caching.Benchmarks/SegmentedLruTests.cs b/Lightweight.Caching.Benchmarks/PrimitiveBenchmarks.cs similarity index 91% rename from Lightweight.Caching.Benchmarks/SegmentedLruTests.cs rename to Lightweight.Caching.Benchmarks/PrimitiveBenchmarks.cs index b8ec653f..7c015eb3 100644 --- a/Lightweight.Caching.Benchmarks/SegmentedLruTests.cs +++ b/Lightweight.Caching.Benchmarks/PrimitiveBenchmarks.cs @@ -10,7 +10,7 @@ namespace Lightweight.Caching.Benchmarks { [MemoryDiagnoser] - public class SegmentedLruTests + public class PrimitiveBenchmarks { private static readonly ConcurrentDictionary dictionary = new ConcurrentDictionary(8, 9, EqualityComparer.Default); LinkedList intList = new LinkedList(new int[] { 1, 2, 3 }); @@ -45,6 +45,12 @@ public void LinkedListLockSwapFirstToLast() } } + [Benchmark()] + public DateTime DateTimeUtcNow() + { + return DateTime.UtcNow; + } + [Benchmark()] public void DictionaryGetOrAdd() { diff --git a/Lightweight.Caching.Benchmarks/Program.cs b/Lightweight.Caching.Benchmarks/Program.cs index 45a5f364..671ad617 100644 --- a/Lightweight.Caching.Benchmarks/Program.cs +++ b/Lightweight.Caching.Benchmarks/Program.cs @@ -15,7 +15,7 @@ class Program static void Main(string[] args) { var summary = BenchmarkRunner - .Run(ManualConfig.Create(DefaultConfig.Instance) + .Run(ManualConfig.Create(DefaultConfig.Instance) .AddJob(Job.RyuJitX64)); } } diff --git a/Lightweight.Caching.UnitTests/Lru/ConcurrentLruTests.cs b/Lightweight.Caching.UnitTests/Lru/ConcurrentLruTests.cs index a8a109b2..14b6b60c 100644 --- a/Lightweight.Caching.UnitTests/Lru/ConcurrentLruTests.cs +++ b/Lightweight.Caching.UnitTests/Lru/ConcurrentLruTests.cs @@ -141,14 +141,14 @@ public async Task WhenDifferentKeysAreRequesteValueIsCreatedForEachAsync() [Fact] public void WhenValuesAreNotReadAndMoreKeysRequestedThanCapacityCountDoesNotIncrease() { - int capacity = hotCap + coldCap; - for (int i = 0; i < capacity + 1; i++) + int hotColdCapacity = hotCap + coldCap; + for (int i = 0; i < hotColdCapacity + 1; i++) { lru.GetOrAdd(i, valueFactory.Create); } - lru.Count.Should().Be(capacity); - valueFactory.timesCalled.Should().Be(capacity + 1); + lru.Count.Should().Be(hotColdCapacity); + valueFactory.timesCalled.Should().Be(hotColdCapacity + 1); } [Fact] @@ -352,7 +352,21 @@ public void WhenKeyDoesNotExistTryRemoveReturnsFalse() lru.TryRemove(2).Should().BeFalse(); } - private class DisposableItem : IDisposable + [Fact] + public void WhenRepeatedlyAddingAndRemovingSameValueLruRemainsInConsistentState() + { + int capacity = hotCap + coldCap + warmCap; + for (int i = 0; i < capacity; i++) + { + // Because TryRemove leaves the item in the queue, when it is eventually removed + // from the cold queue, it should not remove the newly created value. + lru.GetOrAdd(1, valueFactory.Create); + lru.TryGet(1, out var value).Should().BeTrue(); + lru.TryRemove(1); + } + } + + private class DisposableItem : IDisposable { public bool IsDisposed { get; private set; } diff --git a/Lightweight.Caching/Lightweight.Caching.csproj b/Lightweight.Caching/Lightweight.Caching.csproj index ca5d12ef..fe1d0c5c 100644 --- a/Lightweight.Caching/Lightweight.Caching.csproj +++ b/Lightweight.Caching/Lightweight.Caching.csproj @@ -11,7 +11,7 @@ 0.9.1 Copyright © Alex Peck 2020 - https://github.com/plexcake/Lightweight.Caching + https://github.com/bitfaster/Lightweight.Caching Cache diff --git a/Lightweight.Caching/Lru/LruItem.cs b/Lightweight.Caching/Lru/LruItem.cs index 93cc427f..debae1df 100644 --- a/Lightweight.Caching/Lru/LruItem.cs +++ b/Lightweight.Caching/Lru/LruItem.cs @@ -9,8 +9,9 @@ namespace Lightweight.Caching.Lru public class LruItem { private bool wasAccessed; + private bool wasRemoved; - public LruItem(K k, V v) + public LruItem(K k, V v) { this.Key = k; this.Value = v; @@ -25,5 +26,11 @@ public bool WasAccessed get => this.wasAccessed; set => this.wasAccessed = value; } - } + + public bool WasRemoved + { + get => this.wasRemoved; + set => this.wasRemoved = value; + } + } } diff --git a/Lightweight.Caching/Lru/TemplateConcurrentLru.cs b/Lightweight.Caching/Lru/TemplateConcurrentLru.cs index 8d9e7d83..c4258b86 100644 --- a/Lightweight.Caching/Lru/TemplateConcurrentLru.cs +++ b/Lightweight.Caching/Lru/TemplateConcurrentLru.cs @@ -167,19 +167,48 @@ public async Task GetOrAddAsync(K key, Func> valueFactory) public bool TryRemove(K key) { - if (this.dictionary.TryRemove(key, out var removedItem)) + // Possible race condition: + // Thread A TryRemove(1), removes LruItem1, has reference to removed item but not yet marked as removed + // Thread B GetOrAdd(1) => Adds LruItem1* + // Thread C GetOrAdd(2), Cycle, Move(LruItem1, Removed) + // + // Thread C can run and remove LruItem1* from this.dictionary before Thread A has marked LruItem1 as removed. + // + // In this situation, a subsequent attempt to fetch 1 will be a miss. The queues will still contain LruItem1*, + // and it will not be marked as removed. If key 1 is fetched while LruItem1* is still in the queue, there will + // be two queue entries for key 1, and neither is marked as removed. Thus when LruItem1 * ages out, it will + // incorrectly remove 1 from the dictionary, and this cycle can repeat. + if (this.dictionary.TryGetValue(key, out var existing)) { - // Mark as not accessed, it will later be cycled out of the queues because it can never be fetched - // from the dictionary. Note: Hot/Warm/Cold count will reflect the removed item until it is cycled - // from the queue. - removedItem.WasAccessed = false; + if (existing.WasRemoved) + { + return false; + } - if (removedItem.Value is IDisposable d) + lock (existing) { - d.Dispose(); + if (existing.WasRemoved) + { + return false; + } + + existing.WasRemoved = true; } - return true; + if (this.dictionary.TryRemove(key, out var removedItem)) + { + // Mark as not accessed, it will later be cycled out of the queues because it can never be fetched + // from the dictionary. Note: Hot/Warm/Cold count will reflect the removed item until it is cycled + // from the queue. + removedItem.WasAccessed = false; + + if (removedItem.Value is IDisposable d) + { + d.Dispose(); + } + + return true; + } } return false; @@ -290,11 +319,24 @@ private void Move(I item, ItemDestination where) Interlocked.Increment(ref this.coldCount); break; case ItemDestination.Remove: - if (this.dictionary.TryRemove(item.Key, out var removedItem)) - { - if (removedItem.Value is IDisposable d) - { - d.Dispose(); + if (!item.WasRemoved) + { + // avoid race where 2 threads could remove the same key - see TryRemove for details. + lock (item) + { + if (item.WasRemoved) + { + break; + } + + if (this.dictionary.TryRemove(item.Key, out var removedItem)) + { + item.WasRemoved = true; + if (removedItem.Value is IDisposable d) + { + d.Dispose(); + } + } } } break; diff --git a/README.md b/README.md index a5671a4e..e1f38bcc 100644 --- a/README.md +++ b/README.md @@ -21,10 +21,6 @@ LRU implementations are intended as an alternative to the System.Runtime.Caching ## Lru Benchmarks -### Lookup speed with queue cycling - -Cache contains 6 items which are fetched repeatedly, no items are evicted. For LRU caches this measures time spent maintaining item access order. For all other classes (including MemoryCache), it is a pure lookup. FastConcurrentLru does not allocate and is approximately 10x faster than MemoryCache. - ~~~ BenchmarkDotNet=v0.12.1, OS=Windows 10.0.18363.900 (1909/November2018Update/19H2) Intel Core i7-5600U CPU 2.60GHz (Broadwell), 1 CPU, 4 logical and 2 physical cores @@ -35,6 +31,16 @@ Intel Core i7-5600U CPU 2.60GHz (Broadwell), 1 CPU, 4 logical and 2 physical cor Job=RyuJitX64 Jit=RyuJit Platform=X64 ~~~ +### Lookup speed + +Cache contains 6 items which are fetched repeatedly, no items are evicted. Representative of high hit rate scenario. + +- ConcurrentLru family does not move items in the queues, it is just marking as accessed for pure cache hits. +- ClassicLru must maintain item order, and is internally splicing the fetched item to the head of the linked list. +- MemoryCache and ConcurrentDictionary represent a pure lookup. This is the best case scenario for MemoryCache, since the lookup key is a string (if the key were a Guid, using MemoryCache adds string conversion overhead). + +FastConcurrentLru does not allocate and is approximately 10x faster than MemoryCache. + | Method | Mean | Error | StdDev | Ratio | Gen 0 | Allocated | |----------------------------- |----------:|---------:|---------:|------:|-------:|----------:| | ConcurrentDictionaryGetOrAdd | 18.72 ns | 0.289 ns | 0.641 ns | 1.00 | - | - | @@ -45,6 +51,23 @@ Job=RyuJitX64 Jit=RyuJit Platform=X64 | ClassicLruGetOrAdd | 75.67 ns | 1.513 ns | 1.554 ns | 3.99 | - | - | | MemoryCacheGetStringKey | 309.14 ns | 2.155 ns | 1.910 ns | 16.17 | 0.0153 | 32 B | +### Mixed workload + +Tests 4 operations, 1 miss (adding the item), 2 hits then remove. + +This test needs to be improved to provoke queue cycling. + + +| Method | Mean | Error | StdDev | Ratio | Gen 0 | Allocated | +|--------------------- |-----------:|---------:|---------:|------:|-------:|----------:| +| ConcurrentDictionary | 178.1 ns | 1.47 ns | 1.23 ns | 1.00 | 0.0381 | 80 B | +| FastConcurrentLru | 420.4 ns | 7.52 ns | 6.67 ns | 2.36 | 0.0534 | 112 B | +| ConcurrentLru | 423.7 ns | 3.17 ns | 2.64 ns | 2.38 | 0.0534 | 112 B | +| FastConcurrentTlru | 941.6 ns | 6.69 ns | 5.93 ns | 5.29 | 0.0572 | 120 B | +| ConcurrentTlru | 960.3 ns | 17.73 ns | 14.80 ns | 5.39 | 0.0572 | 120 B | +| ClassicLru | 363.5 ns | 3.65 ns | 3.23 ns | 2.04 | 0.0763 | 160 B | +| MemoryCache | 2,380.9 ns | 33.22 ns | 27.74 ns | 13.37 | 2.3460 | 4912 B | + ## Meta-programming using structs for JIT dead code removal and inlining TemplateConcurrentLru features injectable policies defined as structs. Since structs are subject to special JIT optimizations, the implementation is much faster than if these policies were defined as classes. Using this technique, 'Fast' versions without hit counting are within 30% of the speed of a ConcurrentDictionary.