From 21f9c32dc685d7d3a3d637ac6b7ea0c66f2d8681 Mon Sep 17 00:00:00 2001 From: seancxmao Date: Thu, 20 Sep 2018 23:32:17 +0800 Subject: [PATCH] [SPARK-25489] Refactor UDTSerializationBenchmark --- .../UDTSerializationBenchmark-results.txt | 13 ++++ .../linalg/UDTSerializationBenchmark.scala | 70 ++++++++++--------- 2 files changed, 49 insertions(+), 34 deletions(-) create mode 100644 mllib/benchmarks/UDTSerializationBenchmark-results.txt diff --git a/mllib/benchmarks/UDTSerializationBenchmark-results.txt b/mllib/benchmarks/UDTSerializationBenchmark-results.txt new file mode 100644 index 0000000000000..169f4c60c748e --- /dev/null +++ b/mllib/benchmarks/UDTSerializationBenchmark-results.txt @@ -0,0 +1,13 @@ +================================================================================================ +VectorUDT de/serialization +================================================================================================ + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-6820HQ CPU @ 2.70GHz + +VectorUDT de/serialization: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +serialize 144 / 206 0.0 143979.7 1.0X +deserialize 114 / 135 0.0 113802.6 1.3X + + diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala index e2976e1ab022b..1a2216ea070c4 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala @@ -17,53 +17,55 @@ package org.apache.spark.mllib.linalg -import org.apache.spark.benchmark.Benchmark +import org.apache.spark.benchmark.{Benchmark, BenchmarkBase} import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder /** * Serialization benchmark for VectorUDT. + * To run this benchmark: + * {{{ + * 1. without sbt: bin/spark-submit --class + * 2. build/sbt "mllib/test:runMain " + * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "mllib/test:runMain " + * Results will be written to "benchmarks/UDTSerializationBenchmark-results.txt". + * }}} */ -object UDTSerializationBenchmark { +object UDTSerializationBenchmark extends BenchmarkBase { - def main(args: Array[String]): Unit = { - val iters = 1e2.toInt - val numRows = 1e3.toInt + override def benchmark(): Unit = { - val encoder = ExpressionEncoder[Vector].resolveAndBind() + runBenchmark("VectorUDT de/serialization") { + val iters = 1e2.toInt + val numRows = 1e3.toInt - val vectors = (1 to numRows).map { i => - Vectors.dense(Array.fill(1e5.toInt)(1.0 * i)) - }.toArray - val rows = vectors.map(encoder.toRow) + val encoder = ExpressionEncoder[Vector].resolveAndBind() - val benchmark = new Benchmark("VectorUDT de/serialization", numRows, iters) + val vectors = (1 to numRows).map { i => + Vectors.dense(Array.fill(1e5.toInt)(1.0 * i)) + }.toArray + val rows = vectors.map(encoder.toRow) - benchmark.addCase("serialize") { _ => - var sum = 0 - var i = 0 - while (i < numRows) { - sum += encoder.toRow(vectors(i)).numFields - i += 1 + val benchmark = new Benchmark("VectorUDT de/serialization", numRows, iters, output = output) + + benchmark.addCase("serialize") { _ => + var sum = 0 + var i = 0 + while (i < numRows) { + sum += encoder.toRow(vectors(i)).numFields + i += 1 + } } - } - benchmark.addCase("deserialize") { _ => - var sum = 0 - var i = 0 - while (i < numRows) { - sum += encoder.fromRow(rows(i)).numActives - i += 1 + benchmark.addCase("deserialize") { _ => + var sum = 0 + var i = 0 + while (i < numRows) { + sum += encoder.fromRow(rows(i)).numActives + i += 1 + } } - } - /* - OpenJDK 64-Bit Server VM 1.8.0_91-b14 on Linux 4.4.11-200.fc22.x86_64 - Intel Xeon E3-12xx v2 (Ivy Bridge) - VectorUDT de/serialization: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - serialize 265 / 318 0.0 265138.5 1.0X - deserialize 155 / 197 0.0 154611.4 1.7X - */ - benchmark.run() + benchmark.run() + } } }