From 131d17e5348bab8fea80192dd3ca573cb84e1888 Mon Sep 17 00:00:00 2001 From: William Hyun Date: Fri, 6 Aug 2021 22:51:55 -0700 Subject: [PATCH] ORC-913: Support data/format/compress options in Spark benchmark --- .../orc/bench/core/convert/GenerateVariants.java | 2 +- .../org/apache/orc/bench/spark/SparkBenchmark.java | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/java/bench/core/src/java/org/apache/orc/bench/core/convert/GenerateVariants.java b/java/bench/core/src/java/org/apache/orc/bench/core/convert/GenerateVariants.java index 1c95434b8c..eb51627c2a 100644 --- a/java/bench/core/src/java/org/apache/orc/bench/core/convert/GenerateVariants.java +++ b/java/bench/core/src/java/org/apache/orc/bench/core/convert/GenerateVariants.java @@ -223,7 +223,7 @@ public static BatchReader createReader(Path root, } } - static CommandLine parseCommandLine(String[] args) throws ParseException { + public static CommandLine parseCommandLine(String[] args) throws ParseException { Options options = new Options() .addOption("h", "help", false, "Provide help") .addOption("c", "compress", true, "List of compression") diff --git a/java/bench/spark/src/java/org/apache/orc/bench/spark/SparkBenchmark.java b/java/bench/spark/src/java/org/apache/orc/bench/spark/SparkBenchmark.java index f01c140398..f9c817d342 100644 --- a/java/bench/spark/src/java/org/apache/orc/bench/spark/SparkBenchmark.java +++ b/java/bench/spark/src/java/org/apache/orc/bench/spark/SparkBenchmark.java @@ -19,6 +19,7 @@ package org.apache.orc.bench.spark; import com.google.auto.service.AutoService; +import org.apache.commons.cli.CommandLine; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -27,6 +28,7 @@ import org.apache.orc.bench.core.OrcBenchmark; import org.apache.orc.bench.core.IOCounters; import org.apache.orc.bench.core.Utilities; +import org.apache.orc.bench.core.convert.GenerateVariants; import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.avro.AvroFileFormat; import org.apache.spark.sql.catalyst.InternalRow; @@ -52,6 +54,7 @@ import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.infra.Blackhole; import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.options.OptionsBuilder; import scala.Function1; import java.io.IOException; @@ -87,7 +90,14 @@ public String getDescription() { @Override public void run(String[] args) throws Exception { - new Runner(Utilities.parseOptions(args, this.getClass())).run(); + CommandLine cmds = GenerateVariants.parseCommandLine(args); + new Runner(new OptionsBuilder() + .parent(Utilities.parseOptions(args, this.getClass())) + .param("compression", cmds.getOptionValue("compress", "none,gz,snappy").split(",")) + .param("dataset", cmds.getOptionValue("data", "taxi,sales,github").split(",")) + .param("format", cmds.getOptionValue("format", "orc,parquet,json").split(",")) + .build() + ).run(); } @State(Scope.Thread)