From 3e6a058642043c9ed948da0c364ff8cfa005b4e6 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sat, 6 Oct 2018 08:50:43 +0100 Subject: [PATCH 1/6] refactor HashByteArrayBenchmark --- .../HashByteArrayBenchmark-results.txt | 86 +++++++++++++ .../spark/sql/HashByteArrayBenchmark.scala | 118 ++++-------------- 2 files changed, 109 insertions(+), 95 deletions(-) create mode 100644 sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt diff --git a/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt b/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt new file mode 100644 index 0000000000000..9da21a2d7b1eb --- /dev/null +++ b/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt @@ -0,0 +1,86 @@ +================================================================================================ +Benchmark for MurMurHash 3 and xxHash64 +================================================================================================ + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Hash byte arrays with length 8: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Murmur3_x86_32 12 / 15 181.7 5.5 1.0X +xxHash 64-bit 14 / 18 146.0 6.8 0.8X +HiveHasher 11 / 13 183.0 5.5 1.0X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Hash byte arrays with length 16: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Murmur3_x86_32 18 / 22 113.4 8.8 1.0X +xxHash 64-bit 17 / 24 120.3 8.3 1.1X +HiveHasher 21 / 31 97.9 10.2 0.9X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Hash byte arrays with length 24: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Murmur3_x86_32 25 / 27 84.8 11.8 1.0X +xxHash 64-bit 20 / 22 103.9 9.6 1.2X +HiveHasher 32 / 34 65.0 15.4 0.8X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Hash byte arrays with length 31: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Murmur3_x86_32 36 / 38 58.5 17.1 1.0X +xxHash 64-bit 29 / 31 73.2 13.7 1.3X +HiveHasher 43 / 46 48.7 20.5 0.8X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Hash byte arrays with length 95: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Murmur3_x86_32 87 / 92 24.2 41.3 1.0X +xxHash 64-bit 57 / 59 37.1 26.9 1.5X +HiveHasher 163 / 167 12.9 77.6 0.5X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Hash byte arrays with length 287: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Murmur3_x86_32 263 / 271 8.0 125.3 1.0X +xxHash 64-bit 95 / 100 22.1 45.3 2.8X +HiveHasher 537 / 540 3.9 256.0 0.5X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Hash byte arrays with length 1055: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Murmur3_x86_32 976 / 1065 2.1 465.2 1.0X +xxHash 64-bit 233 / 248 9.0 110.9 4.2X +HiveHasher 2054 / 2054 1.0 979.3 0.5X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Hash byte arrays with length 2079: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Murmur3_x86_32 1874 / 1878 1.1 893.8 1.0X +xxHash 64-bit 412 / 428 5.1 196.6 4.5X +HiveHasher 4044 / 4049 0.5 1928.5 0.5X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Hash byte arrays with length 8223: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Murmur3_x86_32 7744 / 7938 0.3 3692.8 1.0X +xxHash 64-bit 1871 / 1893 1.1 892.3 4.1X +HiveHasher 16043 / 16073 0.1 7650.1 0.5X + + diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala index a60eb20d9edef..cb468095aca19 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala @@ -19,15 +19,22 @@ package org.apache.spark.sql import java.util.Random -import org.apache.spark.benchmark.Benchmark +import org.apache.spark.benchmark.{Benchmark, BenchmarkBase} import org.apache.spark.sql.catalyst.expressions.{HiveHasher, XXH64} import org.apache.spark.unsafe.Platform import org.apache.spark.unsafe.hash.Murmur3_x86_32 /** * Synthetic benchmark for MurMurHash 3 and xxHash64. + * To run this benchmark: + * {{{ + * 1. without sbt: bin/spark-submit --class + * 2. build/sbt "sql/test:runMain " + * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain " + * Results will be written to "benchmarks/HashByteArrayBenchmark-results.txt". + * }}} */ -object HashByteArrayBenchmark { +object HashByteArrayBenchmark extends BenchmarkBase { def test(length: Int, seed: Long, numArrays: Int, iters: Int): Unit = { val random = new Random(seed) val arrays = Array.fill[Array[Byte]](numArrays) { @@ -36,8 +43,8 @@ object HashByteArrayBenchmark { bytes } - val benchmark = - new Benchmark("Hash byte arrays with length " + length, iters * numArrays.toLong) + val benchmark = new Benchmark( + "Hash byte arrays with length " + length, iters * numArrays.toLong, output = output) benchmark.addCase("Murmur3_x86_32") { _: Int => var sum = 0L for (_ <- 0L until iters) { @@ -74,96 +81,17 @@ object HashByteArrayBenchmark { benchmark.run() } - def main(args: Array[String]): Unit = { - /* - Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz - Hash byte arrays with length 8: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Murmur3_x86_32 12 / 16 174.3 5.7 1.0X - xxHash 64-bit 17 / 22 120.0 8.3 0.7X - HiveHasher 13 / 15 162.1 6.2 0.9X - */ - test(8, 42L, 1 << 10, 1 << 11) - - /* - Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz - Hash byte arrays with length 16: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Murmur3_x86_32 19 / 22 107.6 9.3 1.0X - xxHash 64-bit 20 / 24 104.6 9.6 1.0X - HiveHasher 24 / 28 87.0 11.5 0.8X - */ - test(16, 42L, 1 << 10, 1 << 11) - - /* - Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz - Hash byte arrays with length 24: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Murmur3_x86_32 28 / 32 74.8 13.4 1.0X - xxHash 64-bit 24 / 29 87.3 11.5 1.2X - HiveHasher 36 / 41 57.7 17.3 0.8X - */ - test(24, 42L, 1 << 10, 1 << 11) - - // Add 31 to all arrays to create worse case alignment for xxHash. - /* - Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz - Hash byte arrays with length 31: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Murmur3_x86_32 41 / 45 51.1 19.6 1.0X - xxHash 64-bit 36 / 44 58.8 17.0 1.2X - HiveHasher 49 / 54 42.6 23.5 0.8X - */ - test(31, 42L, 1 << 10, 1 << 11) - - /* - Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz - Hash byte arrays with length 95: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Murmur3_x86_32 100 / 110 21.0 47.7 1.0X - xxHash 64-bit 74 / 78 28.2 35.5 1.3X - HiveHasher 189 / 196 11.1 90.3 0.5X - */ - test(64 + 31, 42L, 1 << 10, 1 << 11) - - /* - Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz - Hash byte arrays with length 287: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Murmur3_x86_32 299 / 311 7.0 142.4 1.0X - xxHash 64-bit 113 / 122 18.5 54.1 2.6X - HiveHasher 620 / 624 3.4 295.5 0.5X - */ - test(256 + 31, 42L, 1 << 10, 1 << 11) - - /* - Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz - Hash byte arrays with length 1055: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Murmur3_x86_32 1068 / 1070 2.0 509.1 1.0X - xxHash 64-bit 306 / 315 6.9 145.9 3.5X - HiveHasher 2316 / 2369 0.9 1104.3 0.5X - */ - test(1024 + 31, 42L, 1 << 10, 1 << 11) - - /* - Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz - Hash byte arrays with length 2079: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Murmur3_x86_32 2252 / 2274 0.9 1074.1 1.0X - xxHash 64-bit 534 / 580 3.9 254.6 4.2X - HiveHasher 4739 / 4786 0.4 2259.8 0.5X - */ - test(2048 + 31, 42L, 1 << 10, 1 << 11) - - /* - Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz - Hash byte arrays with length 8223: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Murmur3_x86_32 9249 / 9586 0.2 4410.5 1.0X - xxHash 64-bit 2897 / 3241 0.7 1381.6 3.2X - HiveHasher 19392 / 20211 0.1 9246.6 0.5X - */ - test(8192 + 31, 42L, 1 << 10, 1 << 11) + override def runBenchmarkSuite(): Unit = { + runBenchmark("Benchmark for MurMurHash 3 and xxHash64") { + test(8, 42L, 1 << 10, 1 << 11) + test(16, 42L, 1 << 10, 1 << 11) + test(24, 42L, 1 << 10, 1 << 11) + test(31, 42L, 1 << 10, 1 << 11) + test(64 + 31, 42L, 1 << 10, 1 << 11) + test(256 + 31, 42L, 1 << 10, 1 << 11) + test(1024 + 31, 42L, 1 << 10, 1 << 11) + test(2048 + 31, 42L, 1 << 10, 1 << 11) + test(8192 + 31, 42L, 1 << 10, 1 << 11) + } } } From bdb0549c22bb49bff9aa358fa9dbdb81e7de217d Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sat, 6 Oct 2018 17:38:15 +0100 Subject: [PATCH 2/6] Fix scala doc --- .../scala/org/apache/spark/sql/HashByteArrayBenchmark.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala index cb468095aca19..72426f37cef33 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala @@ -29,8 +29,9 @@ import org.apache.spark.unsafe.hash.Murmur3_x86_32 * To run this benchmark: * {{{ * 1. without sbt: bin/spark-submit --class - * 2. build/sbt "sql/test:runMain " - * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain " + * 2. build/sbt "catalyst/test:runMain " + * 3. generate result: + * SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "catalyst/test:runMain " * Results will be written to "benchmarks/HashByteArrayBenchmark-results.txt". * }}} */ From b5190d476762295415d80b9c47c6497d49295c26 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sat, 6 Oct 2018 09:45:39 -0700 Subject: [PATCH 3/6] Update result (#17) --- .../HashByteArrayBenchmark-results.txt | 99 +++++++++---------- 1 file changed, 45 insertions(+), 54 deletions(-) diff --git a/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt b/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt index 9da21a2d7b1eb..a4304ee3b5f60 100644 --- a/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt +++ b/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt @@ -2,85 +2,76 @@ Benchmark for MurMurHash 3 and xxHash64 ================================================================================================ -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz - +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Hash byte arrays with length 8: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Murmur3_x86_32 12 / 15 181.7 5.5 1.0X -xxHash 64-bit 14 / 18 146.0 6.8 0.8X -HiveHasher 11 / 13 183.0 5.5 1.0X - -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +Murmur3_x86_32 16 / 16 127.7 7.8 1.0X +xxHash 64-bit 23 / 23 90.7 11.0 0.7X +HiveHasher 16 / 16 134.8 7.4 1.1X +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Hash byte arrays with length 16: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Murmur3_x86_32 18 / 22 113.4 8.8 1.0X -xxHash 64-bit 17 / 24 120.3 8.3 1.1X -HiveHasher 21 / 31 97.9 10.2 0.9X - -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +Murmur3_x86_32 26 / 26 79.5 12.6 1.0X +xxHash 64-bit 26 / 27 79.3 12.6 1.0X +HiveHasher 30 / 30 70.1 14.3 0.9X +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Hash byte arrays with length 24: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Murmur3_x86_32 25 / 27 84.8 11.8 1.0X -xxHash 64-bit 20 / 22 103.9 9.6 1.2X -HiveHasher 32 / 34 65.0 15.4 0.8X - -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +Murmur3_x86_32 36 / 36 58.1 17.2 1.0X +xxHash 64-bit 30 / 30 70.2 14.2 1.2X +HiveHasher 45 / 45 46.4 21.5 0.8X +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Hash byte arrays with length 31: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Murmur3_x86_32 36 / 38 58.5 17.1 1.0X -xxHash 64-bit 29 / 31 73.2 13.7 1.3X -HiveHasher 43 / 46 48.7 20.5 0.8X - -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +Murmur3_x86_32 50 / 50 41.8 23.9 1.0X +xxHash 64-bit 43 / 43 49.3 20.3 1.2X +HiveHasher 58 / 58 35.9 27.8 0.9X +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Hash byte arrays with length 95: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Murmur3_x86_32 87 / 92 24.2 41.3 1.0X -xxHash 64-bit 57 / 59 37.1 26.9 1.5X -HiveHasher 163 / 167 12.9 77.6 0.5X - -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +Murmur3_x86_32 132 / 132 15.9 62.7 1.0X +xxHash 64-bit 79 / 79 26.7 37.5 1.7X +HiveHasher 198 / 199 10.6 94.6 0.7X +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Hash byte arrays with length 287: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Murmur3_x86_32 263 / 271 8.0 125.3 1.0X -xxHash 64-bit 95 / 100 22.1 45.3 2.8X -HiveHasher 537 / 540 3.9 256.0 0.5X - -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +Murmur3_x86_32 334 / 334 6.3 159.3 1.0X +xxHash 64-bit 126 / 126 16.7 59.9 2.7X +HiveHasher 633 / 634 3.3 302.0 0.5X +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Hash byte arrays with length 1055: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Murmur3_x86_32 976 / 1065 2.1 465.2 1.0X -xxHash 64-bit 233 / 248 9.0 110.9 4.2X -HiveHasher 2054 / 2054 1.0 979.3 0.5X - -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +Murmur3_x86_32 1149 / 1149 1.8 547.9 1.0X +xxHash 64-bit 327 / 327 6.4 155.9 3.5X +HiveHasher 2338 / 2346 0.9 1114.6 0.5X +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Hash byte arrays with length 2079: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Murmur3_x86_32 1874 / 1878 1.1 893.8 1.0X -xxHash 64-bit 412 / 428 5.1 196.6 4.5X -HiveHasher 4044 / 4049 0.5 1928.5 0.5X - -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +Murmur3_x86_32 2215 / 2216 0.9 1056.1 1.0X +xxHash 64-bit 554 / 554 3.8 264.0 4.0X +HiveHasher 4609 / 4609 0.5 2197.5 0.5X +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz Hash byte arrays with length 8223: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -Murmur3_x86_32 7744 / 7938 0.3 3692.8 1.0X -xxHash 64-bit 1871 / 1893 1.1 892.3 4.1X -HiveHasher 16043 / 16073 0.1 7650.1 0.5X +Murmur3_x86_32 8633 / 8643 0.2 4116.3 1.0X +xxHash 64-bit 1891 / 1892 1.1 901.6 4.6X +HiveHasher 18206 / 18206 0.1 8681.3 0.5X From cc268caa70792cb1fa91bc3fd5e79687bc4cefde Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sat, 6 Oct 2018 19:08:31 +0100 Subject: [PATCH 4/6] Fix scala doc --- .../scala/org/apache/spark/sql/HashByteArrayBenchmark.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala index 72426f37cef33..e042e805d8fd7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala @@ -28,7 +28,7 @@ import org.apache.spark.unsafe.hash.Murmur3_x86_32 * Synthetic benchmark for MurMurHash 3 and xxHash64. * To run this benchmark: * {{{ - * 1. without sbt: bin/spark-submit --class + * 1. without sbt: bin/spark-submit --class * 2. build/sbt "catalyst/test:runMain " * 3. generate result: * SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "catalyst/test:runMain " From 11f2bbec31749672ddaa3aeab5d9d0688ed5417d Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sun, 7 Oct 2018 07:45:21 +0100 Subject: [PATCH 5/6] Fix scala doc --- .../scala/org/apache/spark/sql/HashByteArrayBenchmark.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala index e042e805d8fd7..8ef01ea643308 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala @@ -28,7 +28,8 @@ import org.apache.spark.unsafe.hash.Murmur3_x86_32 * Synthetic benchmark for MurMurHash 3 and xxHash64. * To run this benchmark: * {{{ - * 1. without sbt: bin/spark-submit --class + * 1. without sbt: + * bin/spark-submit --class --jars * 2. build/sbt "catalyst/test:runMain " * 3. generate result: * SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "catalyst/test:runMain " From 0a7741a491921b43e8f3a13af2e416cad1cb8e5c Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sun, 7 Oct 2018 07:49:36 +0100 Subject: [PATCH 6/6] Fix scala doc --- .../scala/org/apache/spark/sql/HashByteArrayBenchmark.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala index 8ef01ea643308..7dc865d85af04 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala @@ -29,7 +29,7 @@ import org.apache.spark.unsafe.hash.Murmur3_x86_32 * To run this benchmark: * {{{ * 1. without sbt: - * bin/spark-submit --class --jars + * bin/spark-submit --class --jars * 2. build/sbt "catalyst/test:runMain " * 3. generate result: * SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "catalyst/test:runMain "