From b56739b112d80bea82c04451e9861420570ed929 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 30 Dec 2022 21:04:58 -0800 Subject: [PATCH 1/2] [SPARK-41798][BUILD] Upgrade `hive-storage-api` to 2.8.1 --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +- pom.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index dbfac4bfb86dd..55bbdb456fc4b 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -100,7 +100,7 @@ hive-shims-0.23/2.3.9//hive-shims-0.23-2.3.9.jar hive-shims-common/2.3.9//hive-shims-common-2.3.9.jar hive-shims-scheduler/2.3.9//hive-shims-scheduler-2.3.9.jar hive-shims/2.3.9//hive-shims-2.3.9.jar -hive-storage-api/2.7.3//hive-storage-api-2.7.3.jar +hive-storage-api/2.8.1//hive-storage-api-2.8.1.jar hk2-api/2.6.1//hk2-api-2.6.1.jar hk2-locator/2.6.1//hk2-locator-2.6.1.jar hk2-utils/2.6.1//hk2-utils-2.6.1.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index caf6f1c3d7e8d..b90bf8eb59b2c 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -88,7 +88,7 @@ hive-shims-0.23/2.3.9//hive-shims-0.23-2.3.9.jar hive-shims-common/2.3.9//hive-shims-common-2.3.9.jar hive-shims-scheduler/2.3.9//hive-shims-scheduler-2.3.9.jar hive-shims/2.3.9//hive-shims-2.3.9.jar -hive-storage-api/2.7.3//hive-storage-api-2.7.3.jar +hive-storage-api/2.8.1//hive-storage-api-2.8.1.jar hk2-api/2.6.1//hk2-api-2.6.1.jar hk2-locator/2.6.1//hk2-locator-2.6.1.jar hk2-utils/2.6.1//hk2-utils-2.6.1.jar diff --git a/pom.xml b/pom.xml index c17833589c116..19bb76bc28deb 100644 --- a/pom.xml +++ b/pom.xml @@ -256,7 +256,7 @@ --> compile compile - 2.7.3 + 2.8.1 compile compile compile From f34a2055eb4b14bf9f0c4c60ba7bc2d8df9f8ec2 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sat, 31 Dec 2022 00:42:39 -0800 Subject: [PATCH 2/2] Use toOldString from HIVE-24458 --- .../datasources/orc/OrcFilterSuite.scala | 19 ++++++++++--------- .../datasources/orc/OrcV1FilterSuite.scala | 5 ++++- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala index a7f9da84c1f1a..dc3d2a804607a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala @@ -24,7 +24,7 @@ import java.time.{Duration, LocalDateTime, Period} import scala.collection.JavaConverters._ -import org.apache.hadoop.hive.ql.io.sarg.{PredicateLeaf, SearchArgument} +import org.apache.hadoop.hive.ql.io.sarg.{PredicateLeaf, SearchArgument, SearchArgumentImpl} import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory.newBuilder import org.apache.spark.{SparkConf, SparkException} @@ -86,7 +86,8 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession { (predicate: Predicate, stringExpr: String) (implicit df: DataFrame): Unit = { def checkLogicalOperator(filter: SearchArgument) = { - assert(filter.toString == stringExpr) + // HIVE-24458 changes toString output and provides `toOldString` for old style. + assert(filter.asInstanceOf[SearchArgumentImpl].toOldString == stringExpr) } checkFilterPredicate(df, predicate, checkLogicalOperator) } @@ -543,7 +544,7 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession { OrcFilters.createFilter(schema, Array( LessThan("a", 10), StringContains("b", "prefix") - )).get.toString + )).get.asInstanceOf[SearchArgumentImpl].toOldString } // The `LessThan` should be converted while the whole inner `And` shouldn't @@ -554,7 +555,7 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession { GreaterThan("a", 1), StringContains("b", "prefix") )) - )).get.toString + )).get.asInstanceOf[SearchArgumentImpl].toOldString } // Safely remove unsupported `StringContains` predicate and push down `LessThan` @@ -564,7 +565,7 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession { LessThan("a", 10), StringContains("b", "prefix") ) - )).get.toString + )).get.asInstanceOf[SearchArgumentImpl].toOldString } // Safely remove unsupported `StringContains` predicate, push down `LessThan` and `GreaterThan`. @@ -578,7 +579,7 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession { ), GreaterThan("a", 1) ) - )).get.toString + )).get.asInstanceOf[SearchArgumentImpl].toOldString } } @@ -601,7 +602,7 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession { LessThan("a", 1) ) ) - )).get.toString + )).get.asInstanceOf[SearchArgumentImpl].toOldString } assertResult("leaf-0 = (LESS_THAN_EQUALS a 10), leaf-1 = (LESS_THAN a 1)," + @@ -617,7 +618,7 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession { LessThan("a", 1) ) ) - )).get.toString + )).get.asInstanceOf[SearchArgumentImpl].toOldString } assert(OrcFilters.createFilter(schema, Array( @@ -639,7 +640,7 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession { LessThan( "a", new java.math.BigDecimal(3.14, MathContext.DECIMAL64).setScale(2))) - ).get.toString + ).get.asInstanceOf[SearchArgumentImpl].toOldString } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala index d0032df488f47..4691fa580193d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala @@ -18,6 +18,8 @@ package org.apache.spark.sql.execution.datasources.orc import scala.collection.JavaConverters._ +import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl + import org.apache.spark.SparkConf import org.apache.spark.sql.{Column, DataFrame} import org.apache.spark.sql.catalyst.dsl.expressions._ @@ -74,7 +76,8 @@ class OrcV1FilterSuite extends OrcFilterSuite { (predicate: Predicate, stringExpr: String) (implicit df: DataFrame): Unit = { def checkLogicalOperator(filter: SearchArgument) = { - assert(filter.toString == stringExpr) + // HIVE-24458 changes toString format and provides `toOldString` for old style. + assert(filter.asInstanceOf[SearchArgumentImpl].toOldString == stringExpr) } checkFilterPredicate(df, predicate, checkLogicalOperator) }