Skip to content

Commit

Permalink
[SPARK-30238][SQL][2.4] hive partition pruning can only support strin…
Browse files Browse the repository at this point in the history
…g and integral types

backport #26871

-----

Check the partition column data type and only allow string and integral types in hive partition pruning.

Currently we only support string and integral types in hive partition pruning, but the check is done for literals. If the predicate is `InSet`, then there is no literal and we may pass an unsupported partition predicate to Hive and cause problems.

Closes #26876 from cloud-fan/backport.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
  • Loading branch information
cloud-fan authored and HyukjinKwon committed Dec 13, 2019
1 parent f4e95ca commit c411269
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 6 deletions.
Expand Up @@ -651,7 +651,7 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
}
}

object NonVarcharAttribute {
object SupportedAttribute {
// hive varchar is treated as catalyst string, but hive varchar can't be pushed down.
private val varcharKeys = table.getPartitionKeys.asScala
.filter(col => col.getType.startsWith(serdeConstants.VARCHAR_TYPE_NAME) ||
Expand All @@ -661,8 +661,10 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
def unapply(attr: Attribute): Option[String] = {
if (varcharKeys.contains(attr.name)) {
None
} else {
} else if (attr.dataType.isInstanceOf[IntegralType] || attr.dataType == StringType) {
Some(attr.name)
} else {
None
}
}
}
Expand All @@ -685,20 +687,20 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
}

def convert(expr: Expression): Option[String] = expr match {
case In(ExtractAttribute(NonVarcharAttribute(name)), ExtractableLiterals(values))
case In(ExtractAttribute(SupportedAttribute(name)), ExtractableLiterals(values))
if useAdvanced =>
Some(convertInToOr(name, values))

case InSet(ExtractAttribute(NonVarcharAttribute(name)), ExtractableValues(values))
case InSet(ExtractAttribute(SupportedAttribute(name)), ExtractableValues(values))
if useAdvanced =>
Some(convertInToOr(name, values))

case op @ SpecialBinaryComparison(
ExtractAttribute(NonVarcharAttribute(name)), ExtractableLiteral(value)) =>
ExtractAttribute(SupportedAttribute(name)), ExtractableLiteral(value)) =>
Some(s"$name ${op.symbol} $value")

case op @ SpecialBinaryComparison(
ExtractableLiteral(value), ExtractAttribute(NonVarcharAttribute(name))) =>
ExtractableLiteral(value), ExtractAttribute(SupportedAttribute(name))) =>
Some(s"$value ${op.symbol} $name")

case And(expr1, expr2) if useAdvanced =>
Expand Down
Expand Up @@ -2310,4 +2310,21 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
}
}

test("partition pruning should handle date correctly") {
withSQLConf(SQLConf.OPTIMIZER_INSET_CONVERSION_THRESHOLD.key -> "2") {
withTable("t") {
sql("CREATE TABLE t (i INT) PARTITIONED BY (j DATE)")
sql("INSERT INTO t PARTITION(j='1990-11-11') SELECT 1")
checkAnswer(sql("SELECT i, CAST(j AS STRING) FROM t"), Row(1, "1990-11-11"))
checkAnswer(
sql(
"""
|SELECT i, CAST(j AS STRING)
|FROM t
|WHERE j IN (DATE'1990-11-10', DATE'1990-11-11', DATE'1990-11-12')
|""".stripMargin),
Row(1, "1990-11-11"))
}
}
}
}

0 comments on commit c411269

Please sign in to comment.