Skip to content
Permalink
Browse files

[SPARK-29518][SQL][TEST] Benchmark `date_part` for `INTERVAL`

### What changes were proposed in this pull request?
I extended `ExtractBenchmark` to support the `INTERVAL` type of the `source` parameter of the `date_part` function.

### Why are the changes needed?
- To detect performance issues while changing implementation of the `date_part` function in the future.
- To find out current performance bottlenecks in `date_part` for the `INTERVAL` type

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
By running the benchmark and print out produced values per each `field` value.

Closes #26175 from MaxGekk/extract-interval-benchmark.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
  • Loading branch information
MaxGekk authored and HyukjinKwon committed Oct 22, 2019
1 parent 6ffec5e commit eef11ba9ef789c582f83c1796dbff0d234254fd6
@@ -98,3 +98,22 @@ MILLISECONDS of date 1744 1749
MICROSECONDS of date 1592 1594 1 6.3 159.2 0.6X
EPOCH of date 2368 2371 3 4.2 236.8 0.4X

Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.15
Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
Invoke date_part for interval: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
cast to interval 1365 1395 31 7.3 136.5 1.0X
MILLENNIUM of interval 1620 1651 27 6.2 162.0 0.8X
CENTURY of interval 1469 1487 22 6.8 146.9 0.9X
DECADE of interval 1462 1473 17 6.8 146.2 0.9X
YEAR of interval 1438 1447 8 7.0 143.8 0.9X
QUARTER of interval 1456 1458 3 6.9 145.6 0.9X
MONTH of interval 1440 1452 16 6.9 144.0 0.9X
DAY of interval 1478 1485 6 6.8 147.8 0.9X
HOUR of interval 1579 1580 3 6.3 157.9 0.9X
MINUTE of interval 1598 1605 11 6.3 159.8 0.9X
SECOND of interval 1571 1579 10 6.4 157.1 0.9X
MILLISECONDS of interval 1570 1577 6 6.4 157.0 0.9X
MICROSECONDS of interval 1484 1488 5 6.7 148.4 0.9X
EPOCH of interval 1521 1522 1 6.6 152.1 0.9X

@@ -61,8 +61,10 @@ object ExtractBenchmark extends SqlBasedBenchmark {
}

private def castExpr(from: String): String = from match {
case "timestamp" => s"cast(id as timestamp)"
case "date" => s"cast(cast(id as timestamp) as date)"
case "timestamp" => "cast(id as timestamp)"
case "date" => "cast(cast(id as timestamp) as date)"
case "interval" => "(cast(cast(id as timestamp) as date) - date'0001-01-01') + " +
"(cast(id as timestamp) - timestamp'1000-01-01 01:02:03.123456')"
case other => throw new IllegalArgumentException(
s"Unsupported column type $other. Valid column types are 'timestamp' and 'date'")
}
@@ -74,8 +76,8 @@ object ExtractBenchmark extends SqlBasedBenchmark {
field: String,
from: String): Unit = {
val expr = func match {
case "extract" => s"EXTRACT($field FROM ${castExpr(from)})"
case "date_part" => s"DATE_PART('$field', ${castExpr(from)})"
case "extract" => s"EXTRACT($field FROM ${castExpr(from)}) AS $field"
case "date_part" => s"DATE_PART('$field', ${castExpr(from)}) AS $field"
case other => throw new IllegalArgumentException(
s"Unsupported function '$other'. Valid functions are 'extract' and 'date_part'.")
}
@@ -84,24 +86,36 @@ object ExtractBenchmark extends SqlBasedBenchmark {
}
}

private case class Settings(fields: Seq[String], func: Seq[String], iterNum: Long)

override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
val N = 10000000L
val fields = Seq(
val datetimeFields = Seq(
"MILLENNIUM", "CENTURY", "DECADE", "YEAR",
"ISOYEAR", "QUARTER", "MONTH", "WEEK",
"DAY", "DAYOFWEEK", "DOW", "ISODOW",
"DOY", "HOUR", "MINUTE", "SECOND",
"MILLISECONDS", "MICROSECONDS", "EPOCH")
val intervalFields = Seq(
"MILLENNIUM", "CENTURY", "DECADE", "YEAR",
"QUARTER", "MONTH", "DAY",
"HOUR", "MINUTE", "SECOND",
"MILLISECONDS", "MICROSECONDS", "EPOCH")
val settings = Map(
"timestamp" -> Settings(datetimeFields, Seq("extract", "date_part"), N),
"date" -> Settings(datetimeFields, Seq("extract", "date_part"), N),
"interval" -> Settings(intervalFields, Seq("date_part"), N))

for {
(dataType, Settings(fields, funcs, iterNum)) <- settings
func <- funcs} {

Seq("extract", "date_part").foreach { func =>
Seq("timestamp", "date").foreach { dateType =>
val benchmark = new Benchmark(s"Invoke $func for $dateType", N, output = output)
val benchmark = new Benchmark(s"Invoke $func for $dataType", N, output = output)

run(benchmark, N, s"cast to $dateType", castExpr(dateType))
fields.foreach(run(benchmark, func, N, _, dateType))
run(benchmark, iterNum, s"cast to $dataType", castExpr(dataType))
fields.foreach(run(benchmark, func, iterNum, _, dataType))

benchmark.run()
}
benchmark.run()
}
}
}

0 comments on commit eef11ba

Please sign in to comment.
You can’t perform that action at this time.