From 97a646a86a138026b820d10c964369fe4d4a560c Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Thu, 16 Jan 2020 15:38:52 +0100 Subject: [PATCH] Update and improve benchmarks - upgrade to most recent versions of compared libs/tools - for easier profiling - wrap profiled tools and parameters into methods to get a separate stack - sleep 1 sec. at start to allow to attach a profiler - catch exceptions and keep going to benchmark remaining tools - webarchive-commons: add variant with disabled digesting - gzip: add variant with increased buffer size (64 kB) --- comparison/pom.xml | 8 +- comparison/src/Bench.java | 161 +++++++++++++++++++++++++------------- 2 files changed, 111 insertions(+), 58 deletions(-) diff --git a/comparison/pom.xml b/comparison/pom.xml index 65af863..9ca8d57 100644 --- a/comparison/pom.xml +++ b/comparison/pom.xml @@ -19,17 +19,17 @@ org.jwat jwat-warc - 1.1.0 + 1.1.1 org.netpreserve.commons webarchive-commons - 1.1.8 + 1.1.9 org.netpreserve jwarc - 0.1.0 + 0.8.4-SNAPSHOT compile @@ -39,4 +39,4 @@ 1.8 1.8 - \ No newline at end of file + diff --git a/comparison/src/Bench.java b/comparison/src/Bench.java index f15a382..d235ee9 100644 --- a/comparison/src/Bench.java +++ b/comparison/src/Bench.java @@ -14,75 +14,128 @@ import java.io.FileInputStream; import java.io.IOException; import java.nio.channels.FileChannel; +import java.nio.file.Files; import java.nio.file.Paths; +import java.util.function.Function; import java.util.zip.GZIPInputStream; public class Bench { - public static void main(String[] args) throws IOException { - String filename = args[0]; - while (true) { - if (filename.endsWith(".gz")) { - long start = System.currentTimeMillis(); - long count = 0; - byte[] buf = new byte[8192]; - try (GZIPInputStream gzis = new GZIPInputStream(new FileInputStream(new File(filename)), 8192)) { - while (true) { - int n = gzis.read(buf); - if (n < 0) { - break; - } - } + @FunctionalInterface + public interface ThrowingFunction { + R apply(T t) throws E; + } + + private static void bench(String name, ThrowingFunction func, String filename) { + long start = System.currentTimeMillis(); + try { + String res = func.apply(filename); + System.out.println(name + " " + res + " in " + (System.currentTimeMillis() - start) + "ms"); + } catch(IOException e) { + System.out.println(name + " failed after " + (System.currentTimeMillis() - start) + "ms throwing " + e); + } + } + + private static String gzip(String filename, int bufferSize) throws IOException { + byte[] buf = new byte[bufferSize]; + try (GZIPInputStream gzis = new GZIPInputStream(new FileInputStream(new File(filename)), bufferSize)) { + while (true) { + int n = gzis.read(buf); + if (n < 0) { + break; } - System.out.println("gzipinpustream in " + (System.currentTimeMillis() - start) + "ms"); } + } + return ""; + } - { - long start = System.currentTimeMillis(); - long count = 0; - try (ArchiveReader reader = WARCReaderFactory.get(new File(filename))) { - for (ArchiveRecord record : reader) { - count++; - } - } - System.out.println("webarchive-commons " + count + " in " + (System.currentTimeMillis() - start) + "ms"); + private static String gzip8k(String filename) throws IOException { + return gzip(filename, 8192); + } + + private static String gzip64k(String filename) throws IOException { + return gzip(filename, 65536); + } + + private static String webarchiveCommons(String filename) throws IOException { + long count = 0; + try (ArchiveReader reader = WARCReaderFactory.get(new File(filename))) { + for (ArchiveRecord record : reader) { + count++; } + } + return Long.toString(count); + } -// { -// long start = System.currentTimeMillis(); -// long count = 0; -// try (WarcReader reader = WarcReaderFactory.getReader(new FileInputStream(filename))) { -// for (WarcRecord record : reader) { -// count++; -// } -// } -// System.out.println("jwat " + count + " in " + (System.currentTimeMillis() - start) + "ms"); -// } - - { - long start = System.currentTimeMillis(); - long count = 0; - try (WarcReader reader = WarcReaderFactory.getReader(new FileInputStream(filename), 8192)) { - for (WarcRecord record : reader) { - count++; - } - } - System.out.println("jwat buff " + count + " in " + (System.currentTimeMillis() - start) + "ms"); + private static String webarchiveCommonsNoDigest(String filename) throws IOException { + long count = 0; + try (ArchiveReader reader = WARCReaderFactory.get(new File(filename))) { + reader.setDigest(false); + for (ArchiveRecord record : reader) { + count++; } + } + return Long.toString(count); + } - { - long start = System.currentTimeMillis(); - long count = 0; - try (org.netpreserve.jwarc.WarcReader reader = new org.netpreserve.jwarc.WarcReader(FileChannel.open(Paths.get(filename)))) { - for (org.netpreserve.jwarc.WarcRecord record : reader) { - count++; - } - } - System.out.println("jwarc " + count + " in " + (System.currentTimeMillis() - start) + "ms"); + private static String jwat(String filename) throws IOException { + long count = 0; + try (WarcReader reader = WarcReaderFactory.getReader(new FileInputStream(filename))) { + for (WarcRecord record : reader) { + count++; } + } + return Long.toString(count); + } - System.out.println(""); + private static String jwatBuff(String filename) throws IOException { + long count = 0; + try (WarcReader reader = WarcReaderFactory.getReader(new FileInputStream(filename), 8192)) { + for (WarcRecord record : reader) { + count++; + } + } + return Long.toString(count); + } + + private static String jwarc(String filename) throws IOException { + long count = 0; + try (org.netpreserve.jwarc.WarcReader reader = new org.netpreserve.jwarc.WarcReader(FileChannel.open(Paths.get(filename)))) { + for (org.netpreserve.jwarc.WarcRecord record : reader) { + count++; + } + } + return Long.toString(count); + } + + public static void main(String[] args) { + String filename = args[0]; + System.out.println("Benchmarking " + filename); + + int iterations = 3; + try { + Thread.sleep(1000); // sleep a short time to be able to attach a profiler + } catch(Exception e) { + } + + for (int i = 1; i <= iterations; i++) { + System.out.println("iteration " + i); + + if (filename.endsWith(".gz")) { + bench("gzipinputstream (buffer 8kB)", Bench::gzip8k, filename); + bench("gzipinputstream (buffer 64kB)", Bench::gzip64k, filename); + } + + bench("webarchive-commons", Bench::webarchiveCommons, filename); + bench("webarchive-commons (no digest check)", Bench::webarchiveCommonsNoDigest, filename); + + //bench("jwat", Bench::jwat, filename); + bench("jwat buff", Bench::jwatBuff, filename); + + bench("jwarc", Bench::jwarc, filename); + + System.out.println(""); } } }