Skip to content

Commit

Permalink
[SPARK-13616][SQL] Let SQLBuilder convert logical plan without a proj…
Browse files Browse the repository at this point in the history
…ect on top of it

JIRA: https://issues.apache.org/jira/browse/SPARK-13616

## What changes were proposed in this pull request?

It is possibly that a logical plan has been removed `Project` from the top of it. Or the plan doesn't has a top `Project` from the beginning because it is not necessary. Currently the `SQLBuilder` can't convert such plans back to SQL. This change is to add this feature.

## How was this patch tested?

A test is added to `LogicalPlanToSQLSuite`.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #11466 from viirya/sqlbuilder-notopselect.
  • Loading branch information
viirya authored and rxin committed Mar 3, 2016
1 parent 9c274ac commit f87ce05
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi
case e => e
}

val generatedSQL = toSQL(canonicalizedPlan)
val generatedSQL = toSQL(canonicalizedPlan, true)
logDebug(
s"""Built SQL query string successfully from given logical plan:
|
Expand All @@ -78,6 +78,27 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi
}
}

private def toSQL(node: LogicalPlan, topNode: Boolean): String = {
if (topNode) {
node match {
case d: Distinct => toSQL(node)
case p: Project => toSQL(node)
case a: Aggregate => toSQL(node)
case s: Sort => toSQL(node)
case r: RepartitionByExpression => toSQL(node)
case _ =>
build(
"SELECT",
node.output.map(_.sql).mkString(", "),
"FROM",
toSQL(node)
)
}
} else {
toSQL(node)
}
}

private def toSQL(node: LogicalPlan): String = node match {
case Distinct(p: Project) =>
projectToSQL(p, isDistinct = true)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ package org.apache.spark.sql.hive

import scala.util.control.NonFatal

import org.apache.spark.sql.{DataFrame, SQLContext}
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.functions._
import org.apache.spark.sql.test.SQLTestUtils

Expand Down Expand Up @@ -54,6 +56,33 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils {
sql("DROP TABLE IF EXISTS t0")
}

private def checkPlan(plan: LogicalPlan, sqlContext: SQLContext, expected: String): Unit = {
val convertedSQL = try new SQLBuilder(plan, sqlContext).toSQL catch {
case NonFatal(e) =>
fail(
s"""Cannot convert the following logical query plan back to SQL query string:
|
|# Original logical query plan:
|${plan.treeString}
""".stripMargin, e)
}

try {
checkAnswer(sql(convertedSQL), DataFrame(sqlContext, plan))
} catch { case cause: Throwable =>
fail(
s"""Failed to execute converted SQL string or got wrong answer:
|
|# Converted SQL query string:
|$convertedSQL
|
|# Original logical query plan:
|${plan.treeString}
""".stripMargin,
cause)
}
}

private def checkHiveQl(hiveQl: String): Unit = {
val df = sql(hiveQl)

Expand Down Expand Up @@ -157,6 +186,18 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils {
"SELECT x.key, COUNT(*) FROM parquet_t1 x JOIN parquet_t1 y ON x.key = y.key group by x.key")
}

test("join plan") {
val expectedSql = "SELECT x.key FROM parquet_t1 x JOIN parquet_t1 y ON x.key = y.key"

val df1 = sqlContext.table("parquet_t1").as("x")
val df2 = sqlContext.table("parquet_t1").as("y")
val joinPlan = df1.join(df2).queryExecution.analyzed

// Make sure we have a plain Join operator without Project on top of it.
assert(joinPlan.isInstanceOf[Join])
checkPlan(joinPlan, sqlContext, expectedSql)
}

test("case") {
checkHiveQl("SELECT CASE WHEN id % 2 > 0 THEN 0 WHEN id % 2 = 0 THEN 1 END FROM parquet_t0")
}
Expand Down

0 comments on commit f87ce05

Please sign in to comment.