From 631df4b1944c73a4c63b7dcb9873358671a07baf Mon Sep 17 00:00:00 2001 From: seancxmao Date: Thu, 20 Sep 2018 22:48:21 +0800 Subject: [PATCH] [SPARK-25487][SQL][TEST] Refactor PrimitiveArrayBenchmark --- .../PrimitiveArrayBenchmark-results.txt | 13 +++++ .../benchmark/PrimitiveArrayBenchmark.scala | 47 +++++++++---------- 2 files changed, 35 insertions(+), 25 deletions(-) create mode 100644 sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt diff --git a/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt b/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt new file mode 100644 index 0000000000000..b06b5c092b61a --- /dev/null +++ b/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt @@ -0,0 +1,13 @@ +================================================================================================ +Write primitive arrays in dataset +================================================================================================ + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-6820HQ CPU @ 2.70GHz + +Write an array in Dataset: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Int 437 / 529 19.2 52.1 1.0X +Double 638 / 670 13.1 76.1 0.7X + + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala index e7c8f2717fd74..7f467d161081a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala @@ -17,21 +17,30 @@ package org.apache.spark.sql.execution.benchmark -import scala.concurrent.duration._ - -import org.apache.spark.SparkConf -import org.apache.spark.sql.catalyst.util._ -import org.apache.spark.util.Benchmark +import org.apache.spark.sql.SparkSession +import org.apache.spark.util.{Benchmark, BenchmarkBase => FileBenchmarkBase} /** - * Benchmark [[PrimitiveArray]] for DataFrame and Dataset program using primitive array - * To run this: - * 1. replace ignore(...) with test(...) - * 2. build/sbt "sql/test-only *benchmark.PrimitiveArrayBenchmark" - * - * Benchmarks in this file are skipped in normal builds. + * Benchmark primitive arrays via DataFrame and Dataset program using primitive arrays + * To run this benchmark: + * 1. without sbt: bin/spark-submit --class + * 2. build/sbt "sql/test:runMain " + * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain " + * Results will be written to "benchmarks/PrimitiveArrayBenchmark-results.txt". */ -class PrimitiveArrayBenchmark extends BenchmarkBase { +object PrimitiveArrayBenchmark extends FileBenchmarkBase { + lazy val sparkSession = SparkSession.builder + .master("local[1]") + .appName("microbenchmark") + .config("spark.sql.shuffle.partitions", 1) + .config("spark.sql.autoBroadcastJoinThreshold", 1) + .getOrCreate() + + override def benchmark(): Unit = { + runBenchmark("Write primitive arrays in dataset") { + writeDatasetArray(4) + } + } def writeDatasetArray(iters: Int): Unit = { import sparkSession.implicits._ @@ -62,21 +71,9 @@ class PrimitiveArrayBenchmark extends BenchmarkBase { } } - val benchmark = new Benchmark("Write an array in Dataset", count * iters) + val benchmark = new Benchmark("Write an array in Dataset", count * iters, output = output) benchmark.addCase("Int ")(intArray) benchmark.addCase("Double")(doubleArray) benchmark.run - /* - OpenJDK 64-Bit Server VM 1.8.0_91-b14 on Linux 4.4.11-200.fc22.x86_64 - Intel Xeon E3-12xx v2 (Ivy Bridge) - Write an array in Dataset: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Int 352 / 401 23.8 42.0 1.0X - Double 821 / 885 10.2 97.9 0.4X - */ - } - - ignore("Write an array in Dataset") { - writeDatasetArray(4) } }