Skip to content

Commit

Permalink
Update and improve benchmarks
Browse files Browse the repository at this point in the history
- upgrade to most recent versions of compared libs/tools
- for easier profiling
  - wrap profiled tools and parameters into methods to
    get a separate stack
  - sleep 1 sec. at start to allow to attach a profiler
- catch exceptions and keep going to benchmark remaining tools
- webarchive-commons: add variant with disabled digesting
- gzip: add variant with increased buffer size (64 kB)
  • Loading branch information
sebastian-nagel committed Jan 20, 2020
1 parent d1d7b3a commit 97a646a
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 58 deletions.
8 changes: 4 additions & 4 deletions comparison/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,17 @@
<dependency>
<groupId>org.jwat</groupId>
<artifactId>jwat-warc</artifactId>
<version>1.1.0</version>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>org.netpreserve.commons</groupId>
<artifactId>webarchive-commons</artifactId>
<version>1.1.8</version>
<version>1.1.9</version>
</dependency>
<dependency>
<groupId>org.netpreserve</groupId>
<artifactId>jwarc</artifactId>
<version>0.1.0</version>
<version>0.8.4-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
</dependencies>
Expand All @@ -39,4 +39,4 @@
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
</project>
</project>
161 changes: 107 additions & 54 deletions comparison/src/Bench.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,75 +14,128 @@
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.function.Function;
import java.util.zip.GZIPInputStream;

public class Bench {
public static void main(String[] args) throws IOException {
String filename = args[0];

while (true) {
if (filename.endsWith(".gz")) {
long start = System.currentTimeMillis();
long count = 0;
byte[] buf = new byte[8192];
try (GZIPInputStream gzis = new GZIPInputStream(new FileInputStream(new File(filename)), 8192)) {
while (true) {
int n = gzis.read(buf);
if (n < 0) {
break;
}
}
@FunctionalInterface
public interface ThrowingFunction<R, T, E extends Exception> {
R apply(T t) throws E;
}

private static void bench(String name, ThrowingFunction<String, String, IOException> func, String filename) {
long start = System.currentTimeMillis();
try {
String res = func.apply(filename);
System.out.println(name + " " + res + " in " + (System.currentTimeMillis() - start) + "ms");
} catch(IOException e) {
System.out.println(name + " failed after " + (System.currentTimeMillis() - start) + "ms throwing " + e);
}
}

private static String gzip(String filename, int bufferSize) throws IOException {
byte[] buf = new byte[bufferSize];
try (GZIPInputStream gzis = new GZIPInputStream(new FileInputStream(new File(filename)), bufferSize)) {
while (true) {
int n = gzis.read(buf);
if (n < 0) {
break;
}
System.out.println("gzipinpustream in " + (System.currentTimeMillis() - start) + "ms");
}
}
return "";
}

{
long start = System.currentTimeMillis();
long count = 0;
try (ArchiveReader reader = WARCReaderFactory.get(new File(filename))) {
for (ArchiveRecord record : reader) {
count++;
}
}
System.out.println("webarchive-commons " + count + " in " + (System.currentTimeMillis() - start) + "ms");
private static String gzip8k(String filename) throws IOException {
return gzip(filename, 8192);
}

private static String gzip64k(String filename) throws IOException {
return gzip(filename, 65536);
}

private static String webarchiveCommons(String filename) throws IOException {
long count = 0;
try (ArchiveReader reader = WARCReaderFactory.get(new File(filename))) {
for (ArchiveRecord record : reader) {
count++;
}
}
return Long.toString(count);
}

// {
// long start = System.currentTimeMillis();
// long count = 0;
// try (WarcReader reader = WarcReaderFactory.getReader(new FileInputStream(filename))) {
// for (WarcRecord record : reader) {
// count++;
// }
// }
// System.out.println("jwat " + count + " in " + (System.currentTimeMillis() - start) + "ms");
// }

{
long start = System.currentTimeMillis();
long count = 0;
try (WarcReader reader = WarcReaderFactory.getReader(new FileInputStream(filename), 8192)) {
for (WarcRecord record : reader) {
count++;
}
}
System.out.println("jwat buff " + count + " in " + (System.currentTimeMillis() - start) + "ms");
private static String webarchiveCommonsNoDigest(String filename) throws IOException {
long count = 0;
try (ArchiveReader reader = WARCReaderFactory.get(new File(filename))) {
reader.setDigest(false);
for (ArchiveRecord record : reader) {
count++;
}
}
return Long.toString(count);
}

{
long start = System.currentTimeMillis();
long count = 0;
try (org.netpreserve.jwarc.WarcReader reader = new org.netpreserve.jwarc.WarcReader(FileChannel.open(Paths.get(filename)))) {
for (org.netpreserve.jwarc.WarcRecord record : reader) {
count++;
}
}
System.out.println("jwarc " + count + " in " + (System.currentTimeMillis() - start) + "ms");
private static String jwat(String filename) throws IOException {
long count = 0;
try (WarcReader reader = WarcReaderFactory.getReader(new FileInputStream(filename))) {
for (WarcRecord record : reader) {
count++;
}
}
return Long.toString(count);
}

System.out.println("");
private static String jwatBuff(String filename) throws IOException {
long count = 0;
try (WarcReader reader = WarcReaderFactory.getReader(new FileInputStream(filename), 8192)) {
for (WarcRecord record : reader) {
count++;
}
}
return Long.toString(count);
}

private static String jwarc(String filename) throws IOException {
long count = 0;
try (org.netpreserve.jwarc.WarcReader reader = new org.netpreserve.jwarc.WarcReader(FileChannel.open(Paths.get(filename)))) {
for (org.netpreserve.jwarc.WarcRecord record : reader) {
count++;
}
}
return Long.toString(count);
}

public static void main(String[] args) {
String filename = args[0];
System.out.println("Benchmarking " + filename);

int iterations = 3;

try {
Thread.sleep(1000); // sleep a short time to be able to attach a profiler
} catch(Exception e) {
}

for (int i = 1; i <= iterations; i++) {
System.out.println("iteration " + i);

if (filename.endsWith(".gz")) {
bench("gzipinputstream (buffer 8kB)", Bench::gzip8k, filename);
bench("gzipinputstream (buffer 64kB)", Bench::gzip64k, filename);
}

bench("webarchive-commons", Bench::webarchiveCommons, filename);
bench("webarchive-commons (no digest check)", Bench::webarchiveCommonsNoDigest, filename);

//bench("jwat", Bench::jwat, filename);
bench("jwat buff", Bench::jwatBuff, filename);

bench("jwarc", Bench::jwarc, filename);

System.out.println("");
}
}
}

0 comments on commit 97a646a

Please sign in to comment.