From eb386905a48d32b4cd62347c1a71e8b0b34d0660 Mon Sep 17 00:00:00 2001 From: Ryan Skraba Date: Thu, 29 Aug 2019 18:00:02 +0200 Subject: [PATCH 1/8] PARQUET-1644: Clean up some benchmark code and docs. --- parquet-benchmarks/README.md | 4 ++-- parquet-benchmarks/run.sh | 4 ++-- parquet-benchmarks/run_checksums.sh | 4 ++-- .../parquet/benchmarks/ReadBenchmarks.java | 16 +++++++++------- .../parquet/benchmarks/WriteBenchmarks.java | 16 +++++++++------- 5 files changed, 24 insertions(+), 20 deletions(-) diff --git a/parquet-benchmarks/README.md b/parquet-benchmarks/README.md index 8da067b09b..882b1cbf63 100644 --- a/parquet-benchmarks/README.md +++ b/parquet-benchmarks/README.md @@ -28,11 +28,11 @@ mvn --projects parquet-benchmarks -amd -DskipTests -Denforcer.skip=true clean pa Then, you can run all the benchmarks with the following command ``` -./parquet-benchmarks/run.sh -wi 5 -i 5 -f 3 -bm all +./parquet-benchmarks/run.sh -wi 5 -i 5 -f 3 ``` To understand what each command line argument means and for more arguments please see ``` java -jar parquet-benchmarks/target/parquet-benchmarks.jar -help -``` \ No newline at end of file +``` diff --git a/parquet-benchmarks/run.sh b/parquet-benchmarks/run.sh index 8aa1e69ab3..ad581180d9 100755 --- a/parquet-benchmarks/run.sh +++ b/parquet-benchmarks/run.sh @@ -21,10 +21,10 @@ SCRIPT_PATH=$( cd "$(dirname "$0")" ; pwd -P ) echo "Starting WRITE benchmarks" -java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar p*Write* "$@" +java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.WriteBenchmarks "$@" echo "Generating test data" java -cp ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.DataGenerator generate echo "Data generated, starting READ benchmarks" -java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar p*Read* "$@" +java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.WriteBenchmarks "$@" echo "Cleaning up generated data" java -cp ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.DataGenerator cleanup diff --git a/parquet-benchmarks/run_checksums.sh b/parquet-benchmarks/run_checksums.sh index e798488157..37bb4f5c42 100755 --- a/parquet-benchmarks/run_checksums.sh +++ b/parquet-benchmarks/run_checksums.sh @@ -23,6 +23,6 @@ SCRIPT_PATH=$( cd "$(dirname "$0")" ; pwd -P ) echo "Page level CRC checksum benchmarks" echo "Running write benchmarks" -java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar p*PageChecksumWriteBenchmarks -bm ss "$@" +java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.PageChecksumWriteBenchmarks -bm ss "$@" echo "Running read benchmarks" -java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar p*PageChecksumReadBenchmarks -bm ss "$@" +java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.PageChecksumReadBenchmarks -bm ss "$@" diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ReadBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ReadBenchmarks.java index dba5544a5e..4a91234e6f 100644 --- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ReadBenchmarks.java +++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ReadBenchmarks.java @@ -20,6 +20,8 @@ import org.apache.hadoop.fs.Path; import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Mode; import org.openjdk.jmh.infra.Blackhole; import org.apache.parquet.example.data.Group; import org.apache.parquet.hadoop.ParquetReader; @@ -47,35 +49,35 @@ private void read(Path parquetFile, int nRows, Blackhole blackhole) throws IOExc reader.close(); } - @Benchmark + @Benchmark @BenchmarkMode(Mode.SingleShotTime) public void read1MRowsDefaultBlockAndPageSizeUncompressed(Blackhole blackhole) throws IOException { read(file_1M, ONE_MILLION, blackhole); } - @Benchmark + @Benchmark @BenchmarkMode(Mode.SingleShotTime) public void read1MRowsBS256MPS4MUncompressed(Blackhole blackhole) throws IOException { read(file_1M_BS256M_PS4M, ONE_MILLION, blackhole); } - @Benchmark + @Benchmark @BenchmarkMode(Mode.SingleShotTime) public void read1MRowsBS256MPS8MUncompressed(Blackhole blackhole) throws IOException { read(file_1M_BS256M_PS8M, ONE_MILLION, blackhole); } - @Benchmark + @Benchmark @BenchmarkMode(Mode.SingleShotTime) public void read1MRowsBS512MPS4MUncompressed(Blackhole blackhole) throws IOException { read(file_1M_BS512M_PS4M, ONE_MILLION, blackhole); } - @Benchmark + @Benchmark @BenchmarkMode(Mode.SingleShotTime) public void read1MRowsBS512MPS8MUncompressed(Blackhole blackhole) throws IOException { @@ -90,14 +92,14 @@ public void read1MRowsBS512MPS8MUncompressed(Blackhole blackhole) // read(parquetFile_1M_LZO, ONE_MILLION, blackhole); // } - @Benchmark + @Benchmark @BenchmarkMode(Mode.SingleShotTime) public void read1MRowsDefaultBlockAndPageSizeSNAPPY(Blackhole blackhole) throws IOException { read(file_1M_SNAPPY, ONE_MILLION, blackhole); } - @Benchmark + @Benchmark @BenchmarkMode(Mode.SingleShotTime) public void read1MRowsDefaultBlockAndPageSizeGZIP(Blackhole blackhole) throws IOException { diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java index 5c26a845dc..aea4eac5f3 100644 --- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java +++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java @@ -19,7 +19,9 @@ package org.apache.parquet.benchmarks; import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Mode; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; @@ -44,7 +46,7 @@ public void cleanup() { dataGenerator.cleanup(); } - @Benchmark + @Benchmark @BenchmarkMode(Mode.SingleShotTime) public void write1MRowsDefaultBlockAndPageSizeUncompressed() throws IOException { @@ -58,7 +60,7 @@ public void write1MRowsDefaultBlockAndPageSizeUncompressed() ONE_MILLION); } - @Benchmark + @Benchmark @BenchmarkMode(Mode.SingleShotTime) public void write1MRowsBS256MPS4MUncompressed() throws IOException { @@ -72,7 +74,7 @@ public void write1MRowsBS256MPS4MUncompressed() ONE_MILLION); } - @Benchmark + @Benchmark @BenchmarkMode(Mode.SingleShotTime) public void write1MRowsBS256MPS8MUncompressed() throws IOException { @@ -86,7 +88,7 @@ public void write1MRowsBS256MPS8MUncompressed() ONE_MILLION); } - @Benchmark + @Benchmark @BenchmarkMode(Mode.SingleShotTime) public void write1MRowsBS512MPS4MUncompressed() throws IOException { @@ -100,7 +102,7 @@ public void write1MRowsBS512MPS4MUncompressed() ONE_MILLION); } - @Benchmark + @Benchmark @BenchmarkMode(Mode.SingleShotTime) public void write1MRowsBS512MPS8MUncompressed() throws IOException { @@ -129,7 +131,7 @@ public void write1MRowsBS512MPS8MUncompressed() // ONE_MILLION); // } - @Benchmark + @Benchmark @BenchmarkMode(Mode.SingleShotTime) public void write1MRowsDefaultBlockAndPageSizeSNAPPY() throws IOException { @@ -143,7 +145,7 @@ public void write1MRowsDefaultBlockAndPageSizeSNAPPY() ONE_MILLION); } - @Benchmark + @Benchmark @BenchmarkMode(Mode.SingleShotTime) public void write1MRowsDefaultBlockAndPageSizeGZIP() throws IOException { From e270c3a6d256599a12af47a8bf91f5dda6ddfc38 Mon Sep 17 00:00:00 2001 From: Ryan Skraba Date: Thu, 29 Aug 2019 19:06:22 +0200 Subject: [PATCH 2/8] Fix typo in read benchmark. --- parquet-benchmarks/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parquet-benchmarks/run.sh b/parquet-benchmarks/run.sh index ad581180d9..b3d1d52a51 100755 --- a/parquet-benchmarks/run.sh +++ b/parquet-benchmarks/run.sh @@ -25,6 +25,6 @@ java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchm echo "Generating test data" java -cp ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.DataGenerator generate echo "Data generated, starting READ benchmarks" -java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.WriteBenchmarks "$@" +java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.ReadBenchmarks "$@" echo "Cleaning up generated data" java -cp ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.DataGenerator cleanup From 98dd0b7b1c493dba5b76ad7d31e15d2f5836a7a9 Mon Sep 17 00:00:00 2001 From: Ryan Skraba Date: Thu, 5 Sep 2019 18:09:07 +0200 Subject: [PATCH 3/8] Only set-up the required state for a benchmark. Do not clean up resources after a benchmark, leave them for the next run. --- .../parquet/benchmarks/BenchmarkFiles.java | 2 ++ .../parquet/benchmarks/DataGenerator.java | 9 +------- .../benchmarks/FilteringBenchmarks.java | 2 +- .../benchmarks/PageChecksumDataGenerator.java | 23 +------------------ .../PageChecksumReadBenchmarks.java | 9 ++++---- .../PageChecksumWriteBenchmarks.java | 2 +- .../parquet/benchmarks/ReadBenchmarks.java | 16 +++++++++++++ .../parquet/benchmarks/WriteBenchmarks.java | 2 +- 8 files changed, 27 insertions(+), 38 deletions(-) diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/BenchmarkFiles.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/BenchmarkFiles.java index f039403bfc..24da8220ca 100644 --- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/BenchmarkFiles.java +++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/BenchmarkFiles.java @@ -25,6 +25,8 @@ public class BenchmarkFiles { public static final Configuration configuration = new Configuration(); public static final String TARGET_DIR = "target/tests/ParquetBenchmarks"; + public static final Path targetDir = new Path(TARGET_DIR ); + public static final Path file_1M = new Path(TARGET_DIR + "/PARQUET-1M"); //different block and page sizes diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/DataGenerator.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/DataGenerator.java index 42d9953e68..3b5db686fa 100644 --- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/DataGenerator.java +++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/DataGenerator.java @@ -115,14 +115,7 @@ public void generateData(Path outFile, Configuration configuration, ParquetPrope public void cleanup() { - deleteIfExists(configuration, file_1M); - deleteIfExists(configuration, file_1M_BS256M_PS4M); - deleteIfExists(configuration, file_1M_BS256M_PS8M); - deleteIfExists(configuration, file_1M_BS512M_PS4M); - deleteIfExists(configuration, file_1M_BS512M_PS8M); -// deleteIfExists(configuration, parquetFile_1M_LZO); - deleteIfExists(configuration, file_1M_SNAPPY); - deleteIfExists(configuration, file_1M_GZIP); + deleteIfExists(configuration, targetDir); } public static void main(String[] args) { diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/FilteringBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/FilteringBenchmarks.java index ac47f7bc2f..76802c5fd1 100644 --- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/FilteringBenchmarks.java +++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/FilteringBenchmarks.java @@ -87,7 +87,7 @@ @Warmup(iterations = 10, batchSize = 1) @Measurement(iterations = 50, batchSize = 1) @OutputTimeUnit(MILLISECONDS) -public class FilteringBenchmarks { +public class FilteringBenchmarks { private static final int RECORD_COUNT = 2_000_000; private static final Logger LOGGER = LoggerFactory.getLogger(FilteringBenchmarks.class); diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumDataGenerator.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumDataGenerator.java index 6c62cc6e6d..49ebdce8e3 100644 --- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumDataGenerator.java +++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumDataGenerator.java @@ -40,7 +40,7 @@ import static org.apache.parquet.benchmarks.BenchmarkUtils.exists; import static org.apache.parquet.hadoop.metadata.CompressionCodecName.*; -public class PageChecksumDataGenerator { +public class PageChecksumDataGenerator extends DataGenerator { private final MessageType SCHEMA = MessageTypeParser.parseMessageType( "message m {" + @@ -103,25 +103,4 @@ public void generateAll() { throw new RuntimeException(e); } } - - public void cleanup() { - deleteIfExists(configuration, file_100K_NOCHECKSUMS_UNCOMPRESSED); - deleteIfExists(configuration, file_100K_CHECKSUMS_UNCOMPRESSED); - deleteIfExists(configuration, file_100K_NOCHECKSUMS_GZIP); - deleteIfExists(configuration, file_100K_CHECKSUMS_GZIP); - deleteIfExists(configuration, file_100K_NOCHECKSUMS_SNAPPY); - deleteIfExists(configuration, file_100K_CHECKSUMS_SNAPPY); - deleteIfExists(configuration, file_1M_NOCHECKSUMS_UNCOMPRESSED); - deleteIfExists(configuration, file_1M_CHECKSUMS_UNCOMPRESSED); - deleteIfExists(configuration, file_1M_NOCHECKSUMS_GZIP); - deleteIfExists(configuration, file_1M_CHECKSUMS_GZIP); - deleteIfExists(configuration, file_1M_NOCHECKSUMS_SNAPPY); - deleteIfExists(configuration, file_1M_CHECKSUMS_SNAPPY); - deleteIfExists(configuration, file_10M_NOCHECKSUMS_UNCOMPRESSED); - deleteIfExists(configuration, file_10M_CHECKSUMS_UNCOMPRESSED); - deleteIfExists(configuration, file_10M_NOCHECKSUMS_GZIP); - deleteIfExists(configuration, file_10M_CHECKSUMS_GZIP); - deleteIfExists(configuration, file_10M_NOCHECKSUMS_SNAPPY); - deleteIfExists(configuration, file_10M_CHECKSUMS_SNAPPY); - } } diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumReadBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumReadBenchmarks.java index db23eeb672..b8c3ffee1f 100644 --- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumReadBenchmarks.java +++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumReadBenchmarks.java @@ -51,16 +51,15 @@ public class PageChecksumReadBenchmarks { private PageChecksumDataGenerator pageChecksumDataGenerator = new PageChecksumDataGenerator(); + /** + * This needs to be done exactly once. To avoid needlessly regenerating the files for reading, they aren't cleaned + * as part of the benchmark. If the files exist, a message will be printed and they will not be regenerated. + */ @Setup(Level.Trial) public void setup() { pageChecksumDataGenerator.generateAll(); } - @Setup(Level.Trial) - public void cleanup() { - pageChecksumDataGenerator.cleanup(); - } - private void readFile(Path file, int nRows, boolean verifyChecksums, Blackhole blackhole) throws IOException { try (ParquetReader reader = ParquetReader.builder(new GroupReadSupport(), file) diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumWriteBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumWriteBenchmarks.java index c743dde01e..f02212a653 100644 --- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumWriteBenchmarks.java +++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumWriteBenchmarks.java @@ -57,7 +57,7 @@ public class PageChecksumWriteBenchmarks { private PageChecksumDataGenerator pageChecksumDataGenerator = new PageChecksumDataGenerator(); @Setup(Level.Iteration) - public void cleanup() { + public void setup() { pageChecksumDataGenerator.cleanup(); } diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ReadBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ReadBenchmarks.java index 4a91234e6f..e53e71a371 100644 --- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ReadBenchmarks.java +++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ReadBenchmarks.java @@ -21,7 +21,12 @@ import org.apache.hadoop.fs.Path; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Level; import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; import org.openjdk.jmh.infra.Blackhole; import org.apache.parquet.example.data.Group; import org.apache.parquet.hadoop.ParquetReader; @@ -31,7 +36,9 @@ import java.io.IOException; +@State(Scope.Benchmark) public class ReadBenchmarks { + private void read(Path parquetFile, int nRows, Blackhole blackhole) throws IOException { ParquetReader reader = ParquetReader.builder(new GroupReadSupport(), parquetFile).withConf(configuration).build(); @@ -49,6 +56,15 @@ private void read(Path parquetFile, int nRows, Blackhole blackhole) throws IOExc reader.close(); } + /** + * This needs to be done exactly once. To avoid needlessly regenerating the files for reading, they aren't cleaned + * as part of the benchmark. If the files exist, a message will be printed and they will not be regenerated. + */ + @Setup(Level.Trial) + public void generateFilesForRead() { + new DataGenerator().generateAll(); + } + @Benchmark @BenchmarkMode(Mode.SingleShotTime) public void read1MRowsDefaultBlockAndPageSizeUncompressed(Blackhole blackhole) throws IOException diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java index aea4eac5f3..5980b909eb 100644 --- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java +++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java @@ -41,7 +41,7 @@ public class WriteBenchmarks { private DataGenerator dataGenerator = new DataGenerator(); @Setup(Level.Iteration) - public void cleanup() { + public void setup() { //clean existing test data at the beginning of each iteration dataGenerator.cleanup(); } From dee9f88368b6f47101b035abfbb5d0b1fe4f7501 Mon Sep 17 00:00:00 2001 From: Ryan Skraba Date: Thu, 5 Sep 2019 18:09:27 +0200 Subject: [PATCH 4/8] Add logger (same as parquet-cli). --- .../src/main/resources/log4j.properties | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 parquet-benchmarks/src/main/resources/log4j.properties diff --git a/parquet-benchmarks/src/main/resources/log4j.properties b/parquet-benchmarks/src/main/resources/log4j.properties new file mode 100644 index 0000000000..f4737c8808 --- /dev/null +++ b/parquet-benchmarks/src/main/resources/log4j.properties @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +log4j.rootLogger=INFO, stdout + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.Target=System.out +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p :: %m [%C]%n From 4d8d2ecdcc72bc25216f757a22a4458d0a4a8a4b Mon Sep 17 00:00:00 2001 From: Ryan Skraba Date: Thu, 5 Sep 2019 18:09:50 +0200 Subject: [PATCH 5/8] Rewrite run.sh to run 'suites'. --- parquet-benchmarks/README.md | 36 +++++++++--- parquet-benchmarks/run.sh | 88 ++++++++++++++++++++++++++--- parquet-benchmarks/run_checksums.sh | 28 --------- 3 files changed, 108 insertions(+), 44 deletions(-) delete mode 100755 parquet-benchmarks/run_checksums.sh diff --git a/parquet-benchmarks/README.md b/parquet-benchmarks/README.md index 882b1cbf63..63101bd1b5 100644 --- a/parquet-benchmarks/README.md +++ b/parquet-benchmarks/README.md @@ -17,22 +17,42 @@ ~ under the License. --> -##Running Parquet Benchmarks +# Running Parquet Benchmarks -First, build the ``parquet-benchmarks`` module +The Parquet benchmarks in this module are run using the +[OpenJDK Java Microbenchmarking Harness](http://openjdk.java.net/projects/code-tools/jmh/). + +First, building the `parquet-benchmarks` module creates an uber-jar including the Parquet +classes and all dependencies, and a main class to launch the JMH tool. ``` mvn --projects parquet-benchmarks -amd -DskipTests -Denforcer.skip=true clean package ``` -Then, you can run all the benchmarks with the following command +JMH doesn't have the notion of "benchmark suites", but there are certain benchmarks that +make sense to group together or to run in isolation during development. The +`./parquet-benchmarks/run.sh` script can be used to launch all or some benchmarks: ``` -./parquet-benchmarks/run.sh -wi 5 -i 5 -f 3 -``` +# More information about the run script and the available arguments. +./parquet-benchmarks/run.sh -To understand what each command line argument means and for more arguments please see +# More information on the JMH options available. +./parquet-benchmarks/run.sh all -help +# Run every benchmark once (~20 minutes). +./parquet-benchmarks/run.sh all -wi 0 -i 1 -f 1 + +# A more rigourous run of all benchmarks, saving a report for comparison. +./parquet-benchmarks/run.sh all -wi 5 -i 5 -f 3 -rff /tmp/benchmark1.json + +# Run a benchmark "suite" built into the script, with JMH defaults (about 30 minutes) +./parquet-benchmarks/run.sh checksum + +# Running one specific benchmark using a regex. +./parquet-benchmarks/run.sh all org.apache.parquet.benchmarks.NestedNullWritingBenchmarks + +# Manually clean up any state left behind from a previous run. +./parquet-benchmarks/run.sh clean ``` -java -jar parquet-benchmarks/target/parquet-benchmarks.jar -help -``` + diff --git a/parquet-benchmarks/run.sh b/parquet-benchmarks/run.sh index b3d1d52a51..9ab1731aa3 100755 --- a/parquet-benchmarks/run.sh +++ b/parquet-benchmarks/run.sh @@ -20,11 +20,83 @@ SCRIPT_PATH=$( cd "$(dirname "$0")" ; pwd -P ) -echo "Starting WRITE benchmarks" -java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.WriteBenchmarks "$@" -echo "Generating test data" -java -cp ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.DataGenerator generate -echo "Data generated, starting READ benchmarks" -java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.ReadBenchmarks "$@" -echo "Cleaning up generated data" -java -cp ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.DataGenerator cleanup +BENCHMARK=$1; shift +JMH_OPTIONS="$@" + +if [ -z "$BENCHMARK" ]; then + + # Print usage if run without arguments. + cat << EOF +Runs Parquet JMH-based benchmarks. + +Usage: + run.sh [JMH_OPTIONS] + +Information on the JMH_OPTIONS can be found by running: run.sh all -help + + | Description +----------- | ---------- +all | Runs all benchmarks in the module (listed here and others). +build | (No benchmark run, shortcut to rebuild the JMH uber jar). +clean | (No benchmark run, shortcut to clean up any temporary files). +read | Reading files with different compression, page and block sizes. +write | Writing files. +checksum | Reading and writing with and without CRC checksums. +filter | Filtering column indexes + +Examples: + +# More information about the run script and the available arguments. +./parquet-benchmarks/run.sh + +# More information on the JMH options available. +./parquet-benchmarks/run.sh all -help + +# Run every benchmark once (~20 minutes). +./parquet-benchmarks/run.sh all -wi 0 -i 1 -f 1 + +# A more rigourous run of all benchmarks, saving a report for comparison. +./parquet-benchmarks/run.sh all -wi 5 -i 5 -f 3 -rff /tmp/benchmark1.json + +# Run a benchmark "suite" built into the script, with JMH defaults (about 30 minutes) +./parquet-benchmarks/run.sh checksum + +# Running one specific benchmark using a regex. +./parquet-benchmarks/run.sh all org.apache.parquet.benchmarks.NestedNullWritingBenchmarks + +EOF + +elif [ "$BENCHMARK" == "build" ]; then + + # Shortcut utility to rebuild the benchmark module only. + ( cd $SCRIPT_PATH && mvn -amd -DskipTests -Denforcer.skip=true clean package ) + +elif [ "$BENCHMARK" == "clean" ]; then + + # Shortcut utility to clean any state left behind from any previous run. + java -cp ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.DataGenerator cleanup java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.PageChecksumReadBenchmarks -bm ss "$@" + +else + + # Actually run a benchmark in the JMH harness. + + # Pick a regex if specified. + BENCHMARK_REGEX="" + case "$BENCHMARK" in + "read") + BENCHMARK_REGEX="org.apache.parquet.benchmarks.ReadBenchmarks" + ;; + "write") + BENCHMARK_REGEX="org.apache.parquet.benchmarks.WriteBenchmarks" + ;; + "checksum") + BENCHMARK_REGEX="org.apache.parquet.benchmarks.PageChecksum.*" + ;; + "filter") + BENCHMARK_REGEX="org.apache.parquet.benchmarks.FilteringBenchmarks" + ;; + esac + + echo JMH command: java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar $BENCHMARK_REGEX $JMH_OPTIONS + java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar $BENCHMARK_REGEX $JMH_OPTIONS +fi diff --git a/parquet-benchmarks/run_checksums.sh b/parquet-benchmarks/run_checksums.sh deleted file mode 100755 index 37bb4f5c42..0000000000 --- a/parquet-benchmarks/run_checksums.sh +++ /dev/null @@ -1,28 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -# !/usr/bin/env bash - -SCRIPT_PATH=$( cd "$(dirname "$0")" ; pwd -P ) - -echo "Page level CRC checksum benchmarks" -echo "Running write benchmarks" -java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.PageChecksumWriteBenchmarks -bm ss "$@" -echo "Running read benchmarks" -java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.PageChecksumReadBenchmarks -bm ss "$@" From 7b02afd3dac550b5e51137476e4c48517d967188 Mon Sep 17 00:00:00 2001 From: Ryan Skraba Date: Fri, 6 Sep 2019 11:40:58 +0200 Subject: [PATCH 6/8] Fix typo with extra arguments on clean. --- parquet-benchmarks/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parquet-benchmarks/run.sh b/parquet-benchmarks/run.sh index 9ab1731aa3..cc9f334487 100755 --- a/parquet-benchmarks/run.sh +++ b/parquet-benchmarks/run.sh @@ -74,7 +74,7 @@ elif [ "$BENCHMARK" == "build" ]; then elif [ "$BENCHMARK" == "clean" ]; then # Shortcut utility to clean any state left behind from any previous run. - java -cp ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.DataGenerator cleanup java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.PageChecksumReadBenchmarks -bm ss "$@" + java -cp ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.DataGenerator cleanup else From cd0073c39ae419553383e088386512de171a13a8 Mon Sep 17 00:00:00 2001 From: Ryan Skraba Date: Tue, 24 Sep 2019 11:05:52 +0200 Subject: [PATCH 7/8] Annotations on one line. --- .../benchmarks/FilteringBenchmarks.java | 2 +- .../PageChecksumReadBenchmarks.java | 54 ++++++++++++------- .../PageChecksumWriteBenchmarks.java | 54 ++++++++++++------- .../parquet/benchmarks/ReadBenchmarks.java | 21 +++++--- .../parquet/benchmarks/WriteBenchmarks.java | 21 +++++--- 5 files changed, 101 insertions(+), 51 deletions(-) diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/FilteringBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/FilteringBenchmarks.java index 76802c5fd1..ac47f7bc2f 100644 --- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/FilteringBenchmarks.java +++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/FilteringBenchmarks.java @@ -87,7 +87,7 @@ @Warmup(iterations = 10, batchSize = 1) @Measurement(iterations = 50, batchSize = 1) @OutputTimeUnit(MILLISECONDS) -public class FilteringBenchmarks { +public class FilteringBenchmarks { private static final int RECORD_COUNT = 2_000_000; private static final Logger LOGGER = LoggerFactory.getLogger(FilteringBenchmarks.class); diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumReadBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumReadBenchmarks.java index b8c3ffee1f..be2ebe40f7 100644 --- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumReadBenchmarks.java +++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumReadBenchmarks.java @@ -81,96 +81,114 @@ private void readFile(Path file, int nRows, boolean verifyChecksums, Blackhole b // 100k rows, uncompressed, GZIP, Snappy - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void read100KRowsUncompressedWithoutVerification(Blackhole blackhole) throws IOException { readFile(file_100K_CHECKSUMS_UNCOMPRESSED, 100 * ONE_K, false, blackhole); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void read100KRowsUncompressedWithVerification(Blackhole blackhole) throws IOException { readFile(file_100K_CHECKSUMS_UNCOMPRESSED, 100 * ONE_K, true, blackhole); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void read100KRowsGzipWithoutVerification(Blackhole blackhole) throws IOException { readFile(file_100K_CHECKSUMS_GZIP, 100 * ONE_K, false, blackhole); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void read100KRowsGzipWithVerification(Blackhole blackhole) throws IOException { readFile(file_100K_CHECKSUMS_GZIP, 100 * ONE_K, true, blackhole); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void read100KRowsSnappyWithoutVerification(Blackhole blackhole) throws IOException { readFile(file_100K_CHECKSUMS_SNAPPY, 100 * ONE_K, false, blackhole); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void read100KRowsSnappyWithVerification(Blackhole blackhole) throws IOException { readFile(file_100K_CHECKSUMS_SNAPPY, 100 * ONE_K, true, blackhole); } // 1M rows, uncompressed, GZIP, Snappy - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void read1MRowsUncompressedWithoutVerification(Blackhole blackhole) throws IOException { readFile(file_1M_CHECKSUMS_UNCOMPRESSED, ONE_MILLION, false, blackhole); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void read1MRowsUncompressedWithVerification(Blackhole blackhole) throws IOException { readFile(file_1M_CHECKSUMS_UNCOMPRESSED, ONE_MILLION, true, blackhole); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void read1MRowsGzipWithoutVerification(Blackhole blackhole) throws IOException { readFile(file_1M_CHECKSUMS_GZIP, ONE_MILLION, false, blackhole); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void read1MRowsGzipWithVerification(Blackhole blackhole) throws IOException { readFile(file_1M_CHECKSUMS_GZIP, ONE_MILLION, true, blackhole); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void read1MRowsSnappyWithoutVerification(Blackhole blackhole) throws IOException { readFile(file_1M_CHECKSUMS_SNAPPY, ONE_MILLION, false, blackhole); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void read1MRowsSnappyWithVerification(Blackhole blackhole) throws IOException { readFile(file_1M_CHECKSUMS_SNAPPY, ONE_MILLION, true, blackhole); } // 10M rows, uncompressed, GZIP, Snappy - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void read10MRowsUncompressedWithoutVerification(Blackhole blackhole) throws IOException { readFile(file_10M_CHECKSUMS_UNCOMPRESSED, 10 * ONE_MILLION, false, blackhole); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void read10MRowsUncompressedWithVerification(Blackhole blackhole) throws IOException { readFile(file_10M_CHECKSUMS_UNCOMPRESSED, 10 * ONE_MILLION, true, blackhole); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void read10MRowsGzipWithoutVerification(Blackhole blackhole) throws IOException { readFile(file_10M_CHECKSUMS_GZIP, 10 * ONE_MILLION, false, blackhole); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void read10MRowsGzipWithVerification(Blackhole blackhole) throws IOException { readFile(file_10M_CHECKSUMS_GZIP, 10 * ONE_MILLION, true, blackhole); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void read10MRowsSnappyWithoutVerification(Blackhole blackhole) throws IOException { readFile(file_10M_CHECKSUMS_SNAPPY, 10 * ONE_MILLION, false, blackhole); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void read10MRowsSnappyWithVerification(Blackhole blackhole) throws IOException { readFile(file_10M_CHECKSUMS_SNAPPY, 10 * ONE_MILLION, true, blackhole); } diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumWriteBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumWriteBenchmarks.java index f02212a653..e892d53a76 100644 --- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumWriteBenchmarks.java +++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/PageChecksumWriteBenchmarks.java @@ -63,96 +63,114 @@ public void setup() { // 100k rows, uncompressed, GZIP, Snappy - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void write100KRowsUncompressedWithoutChecksums() throws IOException { pageChecksumDataGenerator.generateData(file_100K_NOCHECKSUMS_UNCOMPRESSED, 100 * ONE_K, false, UNCOMPRESSED); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void write100KRowsUncompressedWithChecksums() throws IOException { pageChecksumDataGenerator.generateData(file_100K_CHECKSUMS_UNCOMPRESSED, 100 * ONE_K, true, UNCOMPRESSED); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void write100KRowsGzipWithoutChecksums() throws IOException { pageChecksumDataGenerator.generateData(file_100K_NOCHECKSUMS_GZIP, 100 * ONE_K, false, GZIP); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void write100KRowsGzipWithChecksums() throws IOException { pageChecksumDataGenerator.generateData(file_100K_CHECKSUMS_GZIP, 100 * ONE_K, true, GZIP); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void write100KRowsSnappyWithoutChecksums() throws IOException { pageChecksumDataGenerator.generateData(file_100K_NOCHECKSUMS_SNAPPY, 100 * ONE_K, false, SNAPPY); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void write100KRowsSnappyWithChecksums() throws IOException { pageChecksumDataGenerator.generateData(file_100K_CHECKSUMS_SNAPPY, 100 * ONE_K, true, SNAPPY); } // 1M rows, uncompressed, GZIP, Snappy - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void write1MRowsUncompressedWithoutChecksums() throws IOException { pageChecksumDataGenerator.generateData(file_1M_NOCHECKSUMS_UNCOMPRESSED, ONE_MILLION, false, UNCOMPRESSED); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void write1MRowsUncompressedWithChecksums() throws IOException { pageChecksumDataGenerator.generateData(file_1M_CHECKSUMS_UNCOMPRESSED, ONE_MILLION, true, UNCOMPRESSED); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void write1MRowsGzipWithoutChecksums() throws IOException { pageChecksumDataGenerator.generateData(file_1M_NOCHECKSUMS_GZIP, ONE_MILLION, false, GZIP); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void write1MRowsGzipWithChecksums() throws IOException { pageChecksumDataGenerator.generateData(file_1M_CHECKSUMS_GZIP, ONE_MILLION, true, GZIP); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void write1MRowsSnappyWithoutChecksums() throws IOException { pageChecksumDataGenerator.generateData(file_1M_NOCHECKSUMS_SNAPPY, ONE_MILLION, false, SNAPPY); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void write1MRowsSnappyWithChecksums() throws IOException { pageChecksumDataGenerator.generateData(file_1M_CHECKSUMS_SNAPPY, ONE_MILLION, true, SNAPPY); } // 10M rows, uncompressed, GZIP, Snappy - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void write10MRowsUncompressedWithoutChecksums() throws IOException { pageChecksumDataGenerator.generateData(file_10M_NOCHECKSUMS_UNCOMPRESSED, 10 * ONE_MILLION, false, UNCOMPRESSED); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void write10MRowsUncompressedWithChecksums() throws IOException { pageChecksumDataGenerator.generateData(file_10M_CHECKSUMS_UNCOMPRESSED, 10 * ONE_MILLION, true, UNCOMPRESSED); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void write10MRowsGzipWithoutChecksums() throws IOException { pageChecksumDataGenerator.generateData(file_10M_NOCHECKSUMS_GZIP, 10 * ONE_MILLION, false, GZIP); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void write10MRowsGzipWithChecksums() throws IOException { pageChecksumDataGenerator.generateData(file_10M_CHECKSUMS_GZIP, 10 * ONE_MILLION, true, GZIP); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void write10MRowsSnappyWithoutChecksums() throws IOException { pageChecksumDataGenerator.generateData(file_10M_NOCHECKSUMS_SNAPPY, 10 * ONE_MILLION, false, SNAPPY); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void write10MRowsSnappyWithChecksums() throws IOException { pageChecksumDataGenerator.generateData(file_10M_CHECKSUMS_SNAPPY, 10 * ONE_MILLION, true, SNAPPY); } diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ReadBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ReadBenchmarks.java index e53e71a371..e74204a69d 100644 --- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ReadBenchmarks.java +++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ReadBenchmarks.java @@ -65,35 +65,40 @@ public void generateFilesForRead() { new DataGenerator().generateAll(); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void read1MRowsDefaultBlockAndPageSizeUncompressed(Blackhole blackhole) throws IOException { read(file_1M, ONE_MILLION, blackhole); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void read1MRowsBS256MPS4MUncompressed(Blackhole blackhole) throws IOException { read(file_1M_BS256M_PS4M, ONE_MILLION, blackhole); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void read1MRowsBS256MPS8MUncompressed(Blackhole blackhole) throws IOException { read(file_1M_BS256M_PS8M, ONE_MILLION, blackhole); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void read1MRowsBS512MPS4MUncompressed(Blackhole blackhole) throws IOException { read(file_1M_BS512M_PS4M, ONE_MILLION, blackhole); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void read1MRowsBS512MPS8MUncompressed(Blackhole blackhole) throws IOException { @@ -108,14 +113,16 @@ public void read1MRowsBS512MPS8MUncompressed(Blackhole blackhole) // read(parquetFile_1M_LZO, ONE_MILLION, blackhole); // } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void read1MRowsDefaultBlockAndPageSizeSNAPPY(Blackhole blackhole) throws IOException { read(file_1M_SNAPPY, ONE_MILLION, blackhole); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void read1MRowsDefaultBlockAndPageSizeGZIP(Blackhole blackhole) throws IOException { diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java index 5980b909eb..0a2d2c058b 100644 --- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java +++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java @@ -46,7 +46,8 @@ public void setup() { dataGenerator.cleanup(); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void write1MRowsDefaultBlockAndPageSizeUncompressed() throws IOException { @@ -60,7 +61,8 @@ public void write1MRowsDefaultBlockAndPageSizeUncompressed() ONE_MILLION); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void write1MRowsBS256MPS4MUncompressed() throws IOException { @@ -74,7 +76,8 @@ public void write1MRowsBS256MPS4MUncompressed() ONE_MILLION); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void write1MRowsBS256MPS8MUncompressed() throws IOException { @@ -88,7 +91,8 @@ public void write1MRowsBS256MPS8MUncompressed() ONE_MILLION); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void write1MRowsBS512MPS4MUncompressed() throws IOException { @@ -102,7 +106,8 @@ public void write1MRowsBS512MPS4MUncompressed() ONE_MILLION); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void write1MRowsBS512MPS8MUncompressed() throws IOException { @@ -131,7 +136,8 @@ public void write1MRowsBS512MPS8MUncompressed() // ONE_MILLION); // } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void write1MRowsDefaultBlockAndPageSizeSNAPPY() throws IOException { @@ -145,7 +151,8 @@ public void write1MRowsDefaultBlockAndPageSizeSNAPPY() ONE_MILLION); } - @Benchmark @BenchmarkMode(Mode.SingleShotTime) + @Benchmark + @BenchmarkMode(Mode.SingleShotTime) public void write1MRowsDefaultBlockAndPageSizeGZIP() throws IOException { From d383c26a944b842b912d8ab2a1b03bd09dbfe75b Mon Sep 17 00:00:00 2001 From: Ryan Skraba Date: Tue, 24 Sep 2019 11:06:48 +0200 Subject: [PATCH 8/8] Auto-build and auto-clean around benchmark run. --- parquet-benchmarks/run.sh | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/parquet-benchmarks/run.sh b/parquet-benchmarks/run.sh index cc9f334487..ba407662d2 100755 --- a/parquet-benchmarks/run.sh +++ b/parquet-benchmarks/run.sh @@ -69,7 +69,7 @@ EOF elif [ "$BENCHMARK" == "build" ]; then # Shortcut utility to rebuild the benchmark module only. - ( cd $SCRIPT_PATH && mvn -amd -DskipTests -Denforcer.skip=true clean package ) + ( cd $SCRIPT_PATH && mvn -amd -DskipTests -Denforcer.skip=true clean package ) elif [ "$BENCHMARK" == "clean" ]; then @@ -80,6 +80,11 @@ else # Actually run a benchmark in the JMH harness. + # Build the benchmark uberjar if it doesn't already exist. + if [ ! -f ${SCRIPT_PATH}/target/parquet-benchmarks.jar ]; then + ${SCRIPT_PATH}/run.sh build + fi + # Pick a regex if specified. BENCHMARK_REGEX="" case "$BENCHMARK" in @@ -99,4 +104,7 @@ else echo JMH command: java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar $BENCHMARK_REGEX $JMH_OPTIONS java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar $BENCHMARK_REGEX $JMH_OPTIONS + + # Clean any data files generated by the benchmarks. + ${SCRIPT_PATH}/run.sh clean fi