Skip to content

Commit

Permalink
[SPARK-30238][SQL] hive partition pruning can only support string and…
Browse files Browse the repository at this point in the history
… integral types

### What changes were proposed in this pull request?

Check the partition column data type and only allow string and integral types in hive partition pruning.

### Why are the changes needed?

Currently we only support string and integral types in hive partition pruning, but the check is done for literals. If the predicate is `InSet`, then there is no literal and we may pass an unsupported partition predicate to Hive and cause problems.

### Does this PR introduce any user-facing change?

yes. fix a bug. A query fails before and can run now.

### How was this patch tested?

a new test

Closes #26871 from cloud-fan/bug.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
  • Loading branch information
cloud-fan authored and dongjoon-hyun committed Dec 12, 2019
1 parent cada5be commit 982f72f
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 6 deletions.
Expand Up @@ -703,7 +703,7 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
}
}

object NonVarcharAttribute {
object SupportedAttribute {
// hive varchar is treated as catalyst string, but hive varchar can't be pushed down.
private val varcharKeys = table.getPartitionKeys.asScala
.filter(col => col.getType.startsWith(serdeConstants.VARCHAR_TYPE_NAME) ||
Expand All @@ -713,8 +713,10 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
def unapply(attr: Attribute): Option[String] = {
if (varcharKeys.contains(attr.name)) {
None
} else {
} else if (attr.dataType.isInstanceOf[IntegralType] || attr.dataType == StringType) {
Some(attr.name)
} else {
None
}
}
}
Expand All @@ -737,20 +739,20 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
}

def convert(expr: Expression): Option[String] = expr match {
case In(ExtractAttribute(NonVarcharAttribute(name)), ExtractableLiterals(values))
case In(ExtractAttribute(SupportedAttribute(name)), ExtractableLiterals(values))
if useAdvanced =>
Some(convertInToOr(name, values))

case InSet(ExtractAttribute(NonVarcharAttribute(name)), ExtractableValues(values))
case InSet(ExtractAttribute(SupportedAttribute(name)), ExtractableValues(values))
if useAdvanced =>
Some(convertInToOr(name, values))

case op @ SpecialBinaryComparison(
ExtractAttribute(NonVarcharAttribute(name)), ExtractableLiteral(value)) =>
ExtractAttribute(SupportedAttribute(name)), ExtractableLiteral(value)) =>
Some(s"$name ${op.symbol} $value")

case op @ SpecialBinaryComparison(
ExtractableLiteral(value), ExtractAttribute(NonVarcharAttribute(name))) =>
ExtractableLiteral(value), ExtractAttribute(SupportedAttribute(name))) =>
Some(s"$value ${op.symbol} $name")

case And(expr1, expr2) if useAdvanced =>
Expand Down
Expand Up @@ -2473,4 +2473,22 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
}
}
}

test("partition pruning should handle date correctly") {
withSQLConf(SQLConf.OPTIMIZER_INSET_CONVERSION_THRESHOLD.key -> "2") {
withTable("t") {
sql("CREATE TABLE t (i INT) PARTITIONED BY (j DATE)")
sql("INSERT INTO t PARTITION(j='1990-11-11') SELECT 1")
checkAnswer(sql("SELECT i, CAST(j AS STRING) FROM t"), Row(1, "1990-11-11"))
checkAnswer(
sql(
"""
|SELECT i, CAST(j AS STRING)
|FROM t
|WHERE j IN (DATE'1990-11-10', DATE'1990-11-11', DATE'1990-11-12')
|""".stripMargin),
Row(1, "1990-11-11"))
}
}
}
}

0 comments on commit 982f72f

Please sign in to comment.