From 631df4b1944c73a4c63b7dcb9873358671a07baf Mon Sep 17 00:00:00 2001
From: seancxmao <seancxmao@gmail.com>
Date: Thu, 20 Sep 2018 22:48:21 +0800
Subject: [PATCH] [SPARK-25487][SQL][TEST] Refactor PrimitiveArrayBenchmark

---
 .../PrimitiveArrayBenchmark-results.txt       | 13 +++++
 .../benchmark/PrimitiveArrayBenchmark.scala   | 47 +++++++++----------
 2 files changed, 35 insertions(+), 25 deletions(-)
 create mode 100644 sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
diff --git a/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt b/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
new file mode 100644
index 0000000000000..b06b5c092b61a
--- /dev/null
+++ b/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
@@ -0,0 +1,13 @@
+================================================================================================
+Write primitive arrays in dataset
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.13.6
+Intel(R) Core(TM) i7-6820HQ CPU @ 2.70GHz
+
+Write an array in Dataset:               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Int                                            437 /  529         19.2          52.1       1.0X
+Double                                         638 /  670         13.1          76.1       0.7X
+
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala
index e7c8f2717fd74..7f467d161081a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala
@@ -17,21 +17,30 @@
 
 package org.apache.spark.sql.execution.benchmark
 
-import scala.concurrent.duration._
-
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.util.Benchmark
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.util.{Benchmark, BenchmarkBase => FileBenchmarkBase}
 
 /**
- * Benchmark [[PrimitiveArray]] for DataFrame and Dataset program using primitive array
- * To run this:
- *  1. replace ignore(...) with test(...)
- *  2. build/sbt "sql/test-only *benchmark.PrimitiveArrayBenchmark"
- *
- * Benchmarks in this file are skipped in normal builds.
+ * Benchmark primitive arrays via DataFrame and Dataset program using primitive arrays
+ * To run this benchmark:
+ * 1. without sbt: bin/spark-submit --class <this class> <spark sql test jar>
+ * 2. build/sbt "sql/test:runMain <this class>"
+ * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *    Results will be written to "benchmarks/PrimitiveArrayBenchmark-results.txt".
  */
-class PrimitiveArrayBenchmark extends BenchmarkBase {
+object PrimitiveArrayBenchmark extends FileBenchmarkBase {
+  lazy val sparkSession = SparkSession.builder
+    .master("local[1]")
+    .appName("microbenchmark")
+    .config("spark.sql.shuffle.partitions", 1)
+    .config("spark.sql.autoBroadcastJoinThreshold", 1)
+    .getOrCreate()
+
+  override def benchmark(): Unit = {
+    runBenchmark("Write primitive arrays in dataset") {
+      writeDatasetArray(4)
+    }
+  }
 
   def writeDatasetArray(iters: Int): Unit = {
     import sparkSession.implicits._
@@ -62,21 +71,9 @@ class PrimitiveArrayBenchmark extends BenchmarkBase {
       }
     }
 
-    val benchmark = new Benchmark("Write an array in Dataset", count * iters)
+    val benchmark = new Benchmark("Write an array in Dataset", count * iters, output = output)
     benchmark.addCase("Int   ")(intArray)
     benchmark.addCase("Double")(doubleArray)
     benchmark.run
-    /*
-    OpenJDK 64-Bit Server VM 1.8.0_91-b14 on Linux 4.4.11-200.fc22.x86_64
-    Intel Xeon E3-12xx v2 (Ivy Bridge)
-    Write an array in Dataset:               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Int                                            352 /  401         23.8          42.0       1.0X
-    Double                                         821 /  885         10.2          97.9       0.4X
-    */
-  }
-
-  ignore("Write an array in Dataset") {
-    writeDatasetArray(4)
   }
 }