Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update and complete benchmarks #19

Merged
merged 1 commit into from
Jan 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions comparison/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,17 @@
<dependency>
<groupId>org.jwat</groupId>
<artifactId>jwat-warc</artifactId>
<version>1.1.0</version>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>org.netpreserve.commons</groupId>
<artifactId>webarchive-commons</artifactId>
<version>1.1.8</version>
<version>1.1.9</version>
</dependency>
<dependency>
<groupId>org.netpreserve</groupId>
<artifactId>jwarc</artifactId>
<version>0.1.0</version>
<version>0.8.4-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
</dependencies>
Expand All @@ -39,4 +39,4 @@
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
</project>
</project>
161 changes: 107 additions & 54 deletions comparison/src/Bench.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,75 +14,128 @@
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.function.Function;
import java.util.zip.GZIPInputStream;

public class Bench {
public static void main(String[] args) throws IOException {
String filename = args[0];

while (true) {
if (filename.endsWith(".gz")) {
long start = System.currentTimeMillis();
long count = 0;
byte[] buf = new byte[8192];
try (GZIPInputStream gzis = new GZIPInputStream(new FileInputStream(new File(filename)), 8192)) {
while (true) {
int n = gzis.read(buf);
if (n < 0) {
break;
}
}
@FunctionalInterface
public interface ThrowingFunction<R, T, E extends Exception> {
R apply(T t) throws E;
}

private static void bench(String name, ThrowingFunction<String, String, IOException> func, String filename) {
long start = System.currentTimeMillis();
try {
String res = func.apply(filename);
System.out.println(name + " " + res + " in " + (System.currentTimeMillis() - start) + "ms");
} catch(IOException e) {
System.out.println(name + " failed after " + (System.currentTimeMillis() - start) + "ms throwing " + e);
}
}

private static String gzip(String filename, int bufferSize) throws IOException {
byte[] buf = new byte[bufferSize];
try (GZIPInputStream gzis = new GZIPInputStream(new FileInputStream(new File(filename)), bufferSize)) {
while (true) {
int n = gzis.read(buf);
if (n < 0) {
break;
}
System.out.println("gzipinpustream in " + (System.currentTimeMillis() - start) + "ms");
}
}
return "";
}

{
long start = System.currentTimeMillis();
long count = 0;
try (ArchiveReader reader = WARCReaderFactory.get(new File(filename))) {
for (ArchiveRecord record : reader) {
count++;
}
}
System.out.println("webarchive-commons " + count + " in " + (System.currentTimeMillis() - start) + "ms");
private static String gzip8k(String filename) throws IOException {
return gzip(filename, 8192);
}

private static String gzip64k(String filename) throws IOException {
return gzip(filename, 65536);
}

private static String webarchiveCommons(String filename) throws IOException {
long count = 0;
try (ArchiveReader reader = WARCReaderFactory.get(new File(filename))) {
for (ArchiveRecord record : reader) {
count++;
}
}
return Long.toString(count);
}

// {
// long start = System.currentTimeMillis();
// long count = 0;
// try (WarcReader reader = WarcReaderFactory.getReader(new FileInputStream(filename))) {
// for (WarcRecord record : reader) {
// count++;
// }
// }
// System.out.println("jwat " + count + " in " + (System.currentTimeMillis() - start) + "ms");
// }

{
long start = System.currentTimeMillis();
long count = 0;
try (WarcReader reader = WarcReaderFactory.getReader(new FileInputStream(filename), 8192)) {
for (WarcRecord record : reader) {
count++;
}
}
System.out.println("jwat buff " + count + " in " + (System.currentTimeMillis() - start) + "ms");
private static String webarchiveCommonsNoDigest(String filename) throws IOException {
long count = 0;
try (ArchiveReader reader = WARCReaderFactory.get(new File(filename))) {
reader.setDigest(false);
for (ArchiveRecord record : reader) {
count++;
}
}
return Long.toString(count);
}

{
long start = System.currentTimeMillis();
long count = 0;
try (org.netpreserve.jwarc.WarcReader reader = new org.netpreserve.jwarc.WarcReader(FileChannel.open(Paths.get(filename)))) {
for (org.netpreserve.jwarc.WarcRecord record : reader) {
count++;
}
}
System.out.println("jwarc " + count + " in " + (System.currentTimeMillis() - start) + "ms");
private static String jwat(String filename) throws IOException {
long count = 0;
try (WarcReader reader = WarcReaderFactory.getReader(new FileInputStream(filename))) {
for (WarcRecord record : reader) {
count++;
}
}
return Long.toString(count);
}

System.out.println("");
private static String jwatBuff(String filename) throws IOException {
long count = 0;
try (WarcReader reader = WarcReaderFactory.getReader(new FileInputStream(filename), 8192)) {
for (WarcRecord record : reader) {
count++;
}
}
return Long.toString(count);
}

private static String jwarc(String filename) throws IOException {
long count = 0;
try (org.netpreserve.jwarc.WarcReader reader = new org.netpreserve.jwarc.WarcReader(FileChannel.open(Paths.get(filename)))) {
for (org.netpreserve.jwarc.WarcRecord record : reader) {
count++;
}
}
return Long.toString(count);
}

public static void main(String[] args) {
String filename = args[0];
System.out.println("Benchmarking " + filename);

int iterations = 3;

try {
Thread.sleep(1000); // sleep a short time to be able to attach a profiler
} catch(Exception e) {
}

for (int i = 1; i <= iterations; i++) {
System.out.println("iteration " + i);

if (filename.endsWith(".gz")) {
bench("gzipinputstream (buffer 8kB)", Bench::gzip8k, filename);
bench("gzipinputstream (buffer 64kB)", Bench::gzip64k, filename);
}

bench("webarchive-commons", Bench::webarchiveCommons, filename);
bench("webarchive-commons (no digest check)", Bench::webarchiveCommonsNoDigest, filename);

//bench("jwat", Bench::jwat, filename);
bench("jwat buff", Bench::jwatBuff, filename);

bench("jwarc", Bench::jwarc, filename);

System.out.println("");
}
}
}