diff --git a/BitFaster.Caching.HitRateAnalysis/Arc/Analysis.cs b/BitFaster.Caching.HitRateAnalysis/Arc/Analysis.cs new file mode 100644 index 00000000..9e2d5cc6 --- /dev/null +++ b/BitFaster.Caching.HitRateAnalysis/Arc/Analysis.cs @@ -0,0 +1,55 @@ +using System; +using System.Collections.Generic; +using System.Globalization; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using BitFaster.Caching.Lru; +using CsvHelper; + +namespace BitFaster.Caching.HitRateAnalysis.Arc +{ + public class Analysis + { + private readonly ConcurrentLru concurrentLru; + private readonly ClassicLru classicLru; + + public Analysis(int cacheSize) + { + concurrentLru = new ConcurrentLru(1, cacheSize, EqualityComparer.Default); + classicLru = new ClassicLru(1, cacheSize, EqualityComparer.Default); + } + + public int CacheSize => concurrentLru.Capacity; + + public double ConcurrentLruHitRate => concurrentLru.HitRatio * 100; + + public double ClassicLruHitRate => classicLru.HitRatio * 100; + + public void TestKey(long key) + { + concurrentLru.GetOrAdd(key, u => 1); + classicLru.GetOrAdd(key, u => 1); + } + + public void Compare() + { + Console.WriteLine($"Size {concurrentLru.Capacity} Classic HitRate {FormatHits(classicLru.HitRatio)} Concurrent HitRate {FormatHits(concurrentLru.HitRatio)}"); + } + + private static string FormatHits(double hitRate) + { + return string.Format("{0:N2}%", hitRate * 100.0); + } + + public static void WriteToFile(string path, IEnumerable results) + { + using (var writer = new StreamWriter(path)) + using (var csv = new CsvWriter(writer, CultureInfo.InvariantCulture)) + { + csv.WriteRecords(results); + } + } + } +} diff --git a/BitFaster.Caching.HitRateAnalysis/Arc/ArcDataFile.cs b/BitFaster.Caching.HitRateAnalysis/Arc/ArcDataFile.cs new file mode 100644 index 00000000..4afaae48 --- /dev/null +++ b/BitFaster.Caching.HitRateAnalysis/Arc/ArcDataFile.cs @@ -0,0 +1,116 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.IO.Compression; +using System.Linq; +using System.Net.Http; +using System.Text; +using System.Threading.Tasks; + +namespace BitFaster.Caching.HitRateAnalysis.Arc +{ + public class ArcDataFile + { + // See https://researcher.watson.ibm.com/researcher/view_person_subpage.php?id=4700 + private readonly Uri Uri; + private readonly string FilePath = "DS1.lis"; + + // Trace file taken from: + // Nimrod Megiddo and Dharmendra S.Modha, "ARC: A Self-Tuning, Low Overhead Replacement Cache," USENIX Conference on File and Storage Technologies(FAST 03), San Francisco, CA, pp. 115-130, March 31-April 2, 2003. + + public ArcDataFile(Uri uri) + { + this.Uri = uri; + this.FilePath = ComputePath(uri); + } + + private static string ComputePath(Uri uri) + { + string seg = uri.Segments.LastOrDefault(); + + if (seg == null) + { + throw new InvalidOperationException(); + } + + if (seg.EndsWith(".gz")) + { + seg = seg.Substring(0, seg.LastIndexOf(".gz")); + } + + return seg; + } + + public async Task DownloadIfNotExistsAsync() + { + var zipped = FilePath + ".gz"; + + if (!File.Exists(zipped)) + { + Console.WriteLine($"Downloading {Uri}..."); + HttpClient client = new HttpClient(); + var response = await client.GetAsync(Uri); + using (var fs = new FileStream(zipped, FileMode.CreateNew)) + { + await response.Content.CopyToAsync(fs); + } + } + + if (!File.Exists(FilePath)) + { + Console.WriteLine($"Decompressing {Uri}..."); + + using FileStream originalFileStream = new FileInfo(zipped).OpenRead(); + using var decompressedFileStream = File.Create(FilePath); + using var decompressionStream = new GZipStream(originalFileStream, CompressionMode.Decompress); + + decompressionStream.CopyTo(decompressedFileStream); + } + } + + public IEnumerable EnumerateFileData() + { + // File Format: + // Every line in every file has four fields. + // + // First field: starting_block + // Second field: number_of_blocks(each block is 512 bytes) + // + // Third field: ignore + // Fourth field: request_number(starts at 0) + // + // + // Example: first line in P6.lis is + // 110765 64 0 0 + // + // + // 110765 starting block + // + // 64 64 blocks each of 512 bytes + // so this represents 64 requests(each of a 512 byte page) from 110765 to 110828 + // + // 0 ignore + // + // 0 request number(goes from 0 to n-1) + + using StreamReader sr = new StreamReader(FilePath); + + while (sr.Peek() >= 0) + { + var line = sr.ReadLine(); + var chunks = line.Split(' '); + + if (long.TryParse(chunks[0], out var startBlock)) + { + if (int.TryParse(chunks[1], out var sequence)) + { + for (long i = startBlock; i < startBlock + sequence; i++) + { + yield return i; + } + } + } + } + } + } +} diff --git a/BitFaster.Caching.HitRateAnalysis/Arc/Runner.cs b/BitFaster.Caching.HitRateAnalysis/Arc/Runner.cs new file mode 100644 index 00000000..636a877d --- /dev/null +++ b/BitFaster.Caching.HitRateAnalysis/Arc/Runner.cs @@ -0,0 +1,51 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Runtime.CompilerServices; +using System.Text; +using System.Threading.Tasks; + +namespace BitFaster.Caching.HitRateAnalysis.Arc +{ + public class Runner + { + private readonly RunnerConfig config; + + public Runner(RunnerConfig config) + { + this.config = config; + } + + public async Task Run() + { + await this.config.File.DownloadIfNotExistsAsync(); + + Console.WriteLine("Running..."); + int count = 0; + var sw = Stopwatch.StartNew(); + + foreach (var key in this.config.File.EnumerateFileData()) + { + foreach (var a in this.config.Analysis) + { + a.TestKey(key); + } + + if (++count % 100000 == 0) + { + Console.WriteLine($"Processed {count} keys..."); + } + } + + Console.WriteLine($"Tested {count} keys in {sw.Elapsed}"); + + foreach (var a in this.config.Analysis) + { + a.Compare(); + } + + Analysis.WriteToFile(this.config.Name, this.config.Analysis); + } + } +} diff --git a/BitFaster.Caching.HitRateAnalysis/Arc/RunnerConfig.cs b/BitFaster.Caching.HitRateAnalysis/Arc/RunnerConfig.cs new file mode 100644 index 00000000..edd95f32 --- /dev/null +++ b/BitFaster.Caching.HitRateAnalysis/Arc/RunnerConfig.cs @@ -0,0 +1,32 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace BitFaster.Caching.HitRateAnalysis.Arc +{ + public class RunnerConfig + { + private readonly string name; + private readonly List analysis; + private readonly ArcDataFile file; + + public RunnerConfig(string name, int[] cacheSizes, Uri dataUri) + { + this.name = name; + this.analysis = cacheSizes.Select(s => new Analysis(s)).ToList(); + this.file = new ArcDataFile(dataUri); + } + + public string Name => this.name; + + public IEnumerable Analysis => this.analysis; + + public ArcDataFile File => this.file; + + public static RunnerConfig Database = new RunnerConfig("results.arc.database.csv", new[] { 1000000, 2000000, 3000000, 4000000, 5000000, 6000000, 7000000, 8000000 }, new Uri("https://github.com/bitfaster/cache-datasets/releases/download/v1.0/DS1.lis.gz")); + public static RunnerConfig Search = new RunnerConfig("results.arc.search.csv", new[] { 100000, 200000, 300000, 400000, 500000, 600000, 700000, 800000 }, new Uri("https://github.com/bitfaster/cache-datasets/releases/download/v1.0/S3.lis.gz")); + public static RunnerConfig Oltp = new RunnerConfig("results.arc.oltp.csv", new[] { 250, 500, 750, 1000, 1250, 1500, 1750, 2000 }, new Uri("https://github.com/bitfaster/cache-datasets/releases/download/v1.0/OLTP.lis.gz")); + } +} diff --git a/BitFaster.Caching.HitRateAnalysis/Glimpse/DataFile.cs b/BitFaster.Caching.HitRateAnalysis/Glimpse/DataFile.cs index d67d14bd..6d5b3273 100644 --- a/BitFaster.Caching.HitRateAnalysis/Glimpse/DataFile.cs +++ b/BitFaster.Caching.HitRateAnalysis/Glimpse/DataFile.cs @@ -12,7 +12,7 @@ namespace BitFaster.Caching.HitRateAnalysis.Glimpse // TODO: dedupe public class DataFile { - private static readonly Uri Uri = new Uri("https://github.com/ben-manes/caffeine/raw/master/simulator/src/main/resources/com/github/benmanes/caffeine/cache/simulator/parser/lirs/gli.trace.gz"); + private static readonly Uri Uri = new Uri("https://github.com/bitfaster/cache-datasets/releases/download/v1.0/gli.trace.gz"); private static readonly string FilePath = "gli.trace"; public static async Task DownloadIfNotExistsAsync() diff --git a/BitFaster.Caching.HitRateAnalysis/Program.cs b/BitFaster.Caching.HitRateAnalysis/Program.cs index be098525..dcf99a81 100644 --- a/BitFaster.Caching.HitRateAnalysis/Program.cs +++ b/BitFaster.Caching.HitRateAnalysis/Program.cs @@ -1,9 +1,13 @@ using System; using System.Threading.Tasks; +using BitFaster.Caching.HitRateAnalysis.Arc; var menu = new EasyConsole.Menu() .Add("Zipf", () => BitFaster.Caching.HitRateAnalysis.Zipfian.Runner.Run()) .Add("Wikibench", () => BitFaster.Caching.HitRateAnalysis.Wikibench.Runner.Run().Wait()) - .Add("Glimpse", () => BitFaster.Caching.HitRateAnalysis.Glimpse.Runner.Run().Wait()); + .Add("Glimpse", () => BitFaster.Caching.HitRateAnalysis.Glimpse.Runner.Run().Wait()) + .Add("Arc Database", () => new BitFaster.Caching.HitRateAnalysis.Arc.Runner(RunnerConfig.Database).Run().Wait()) + .Add("Arc Search", () => new BitFaster.Caching.HitRateAnalysis.Arc.Runner(RunnerConfig.Search).Run().Wait()) + .Add("Arc OLTP", () => new BitFaster.Caching.HitRateAnalysis.Arc.Runner(RunnerConfig.Oltp).Run().Wait()); menu.Display();