-
Notifications
You must be signed in to change notification settings - Fork 28.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-12689][SQL] Migrate DDL parsing to the newly absorbed parser #10723
Changes from 5 commits
f886f67
c60cd9e
f2d6fa6
5a6cc4a
78f1b7c
1c145eb
d800c58
838f701
7e0f218
559083e
17ce6ae
4fc7a60
3e5a229
9922ccc
cbd3173
1df2b80
9ff32e0
7350f07
7d31844
8b7086e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,10 +16,13 @@ | |
*/ | ||
package org.apache.spark.sql.execution | ||
|
||
import org.apache.spark.sql.SaveMode | ||
import org.apache.spark.sql.catalyst.{CatalystQl, TableIdentifier} | ||
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation | ||
import org.apache.spark.sql.catalyst.parser.{ASTNode, ParserConf, SimpleParserConf} | ||
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation} | ||
import org.apache.spark.sql.execution.datasources._ | ||
import org.apache.spark.sql.types.StructType | ||
|
||
private[sql] class SparkQl(conf: ParserConf = SimpleParserConf()) extends CatalystQl(conf) { | ||
/** Check if a command should not be explained. */ | ||
|
@@ -42,6 +45,84 @@ private[sql] class SparkQl(conf: ParserConf = SimpleParserConf()) extends Cataly | |
getClauses(Seq("TOK_QUERY", "FORMATTED", "EXTENDED"), explainArgs) | ||
ExplainCommand(nodeToPlan(query), extended = extended.isDefined) | ||
|
||
case Token("TOK_REFRESHTABLE", nameParts :: Nil) => | ||
val tableIdent = extractTableIdent(nameParts) | ||
RefreshTable(tableIdent) | ||
|
||
case Token("TOK_CREATETABLEUSING", createTableArgs) => | ||
val clauses = getClauses( | ||
Seq("TEMPORARY", "TOK_IFNOTEXISTS", "TOK_TABNAME", "TOK_TABCOLLIST", | ||
"TOK_TABLEPROVIDER", "TOK_TABLEOPTIONS", "TOK_QUERY"), createTableArgs) | ||
|
||
val temp = clauses(0) | ||
val allowExisting = clauses(1) | ||
val Some(tabName) = clauses(2) | ||
val tableCols = clauses(3) | ||
val Some(tableProvider) = clauses(4) | ||
val tableOpts = clauses(5) | ||
val tableAs = clauses(6) | ||
|
||
val tableIdent: TableIdentifier = tabName match { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why don't we use |
||
case Token("TOK_TABNAME", Token(dbName, _) :: Token(tableName, _) :: Nil) => | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am personally not big on using wildcards in pattern matches. This prevents us from catching a grammar problem early. Since most of the wildcards are actually empty lists, why not match these? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't get understood. I think I don't use wildcards in pattern matches here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As soon as you use an underscore in a pattern match, for example: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah, I see, just not sure where you are pointing to. I was wondering I don't have case _ branch. Thanks. |
||
new TableIdentifier(tableName, Some(dbName)) | ||
case Token("TOK_TABNAME", Token(tableName, _) :: Nil) => | ||
TableIdentifier(tableName) | ||
} | ||
|
||
val columns = tableCols.map { | ||
case Token("TOK_TABCOLLIST", fields) => StructType(fields.map(nodeToStructField)) | ||
} | ||
|
||
val provider = tableProvider match { | ||
case Token("TOK_TABLEPROVIDER", Token(provider, _) :: Nil) => provider | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah. One test failed because of this. I will fix it. |
||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You could make more use of pattern matching here. For example:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah. I was using that to match. But the compiler continues to throw syntax problem. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just tried this:
...and this compiles. [edit: posted the wrong code initialy] |
||
|
||
val options = tableOpts.map { opts => | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: You could also turn the option into a sequence, flatMap over it, and create the Map as a result:
(note: code is not tested) |
||
opts match { | ||
case Token("TOK_TABLEOPTIONS", options) => | ||
options.map { | ||
case Token("TOK_TABLEOPTION", Token(key, _) :: Token(value, _) :: Nil) => | ||
(key, value.replaceAll("^\'|^\"|\"$|\'$", "")) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't know there is |
||
case _ => super.nodeToPlan(node) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you have a plan inside table options? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No. This line should be removed. |
||
}.asInstanceOf[Seq[(String, String)]].toMap | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this cast needed? |
||
} | ||
}.getOrElse(Map.empty[String, String]) | ||
|
||
val asClause = tableAs.map(nodeToPlan(_)) | ||
|
||
if (temp.isDefined && allowExisting.isDefined) { | ||
throw new DDLException( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we still need a DDL exception? Why not throw an analysis exception? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually here the thrown exception will be caught in |
||
"a CREATE TEMPORARY TABLE statement does not allow IF NOT EXISTS clause.") | ||
} | ||
|
||
if (asClause.isDefined) { | ||
val mode = if (allowExisting.isDefined) { | ||
SaveMode.Ignore | ||
} else if (temp.isDefined) { | ||
SaveMode.Overwrite | ||
} else { | ||
SaveMode.ErrorIfExists | ||
} | ||
|
||
CreateTableUsingAsSelect(tableIdent, | ||
provider, | ||
temp.isDefined, | ||
Array.empty[String], | ||
bucketSpec = None, | ||
mode, | ||
options, | ||
asClause.get) | ||
} else { | ||
CreateTableUsing( | ||
tableIdent, | ||
columns, | ||
provider, | ||
temp.isDefined, | ||
options, | ||
allowExisting.isDefined, | ||
managedIfNoPath = false) | ||
} | ||
|
||
case Token("TOK_DESCTABLE", describeArgs) => | ||
// Reference: https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL | ||
val Some(tableType) :: formatted :: extended :: pretty :: Nil = | ||
|
@@ -52,26 +133,30 @@ private[sql] class SparkQl(conf: ParserConf = SimpleParserConf()) extends Cataly | |
nodeToDescribeFallback(node) | ||
} else { | ||
tableType match { | ||
case Token("TOK_TABTYPE", Token("TOK_TABNAME", nameParts :: Nil) :: Nil) => | ||
case Token("TOK_TABTYPE", Token("TOK_TABNAME", nameParts) :: Nil) => | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why change this? You didn't touch the describe stuff in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. I think it is incorrect from beginning but not be tested it out because we don't reach here before. I've tested it locally. Once all three commands are migrated, we can see this passing tests. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if we parse the following SQL using the parse driver We would end up with the following AST:
This change would pick this up, and old code didn't (I am sure I tested this though :S ). You can disable this in the DDL parser, to see if it works now. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we add a test for this? The Hive test suite apparently misses this one. I could also address in a different PR. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually we have test for describe table command in HiveQuerySuite. Do we need another test? |
||
nameParts match { | ||
case Token(".", dbName :: tableName :: Nil) => | ||
case Token(dbName, _) :: Token(tableName, _) :: Nil => | ||
// It is describing a table with the format like "describe db.table". | ||
// TODO: Actually, a user may mean tableName.columnName. Need to resolve this | ||
// issue. | ||
val tableIdent = extractTableIdent(nameParts) | ||
val tableIdent = TableIdentifier( | ||
cleanIdentifier(tableName), Some(cleanIdentifier(dbName))) | ||
datasources.DescribeCommand( | ||
UnresolvedRelation(tableIdent, None), isExtended = extended.isDefined) | ||
case Token(".", dbName :: tableName :: colName :: Nil) => | ||
case Token(dbName, _) :: Token(tableName, _) :: Token(colName, _) :: Nil => | ||
// It is describing a column with the format like "describe db.table column". | ||
nodeToDescribeFallback(node) | ||
case tableName => | ||
case tableName :: Nil => | ||
// It is describing a table with the format like "describe table". | ||
datasources.DescribeCommand( | ||
UnresolvedRelation(TableIdentifier(tableName.text), None), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cleanIdentifier? |
||
isExtended = extended.isDefined) | ||
case _ => | ||
nodeToDescribeFallback(node) | ||
} | ||
// All other cases. | ||
case _ => nodeToDescribeFallback(node) | ||
case _ => | ||
nodeToDescribeFallback(node) | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -316,7 +316,7 @@ class HiveContext private[hive]( | |
} | ||
|
||
protected[sql] override def parseSql(sql: String): LogicalPlan = { | ||
super.parseSql(substitutor.substitute(hiveconf, sql)) | ||
sqlParser.parsePlan(substitutor.substitute(hiveconf, sql)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about gradually moving functionality from the DLL parser to SparkQl? That would allow us to test this in the meantime. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
} | ||
|
||
override protected[sql] def executePlan(plan: LogicalPlan): this.QueryExecution = | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Providers can have dotted names, e.g.:
org.apache.spark.sql.avro
. Identifier won't work here. You would need something like:Identifier (DOT Identifier)*
I am not sure if we should support quoted identifiers here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like we just use identifiers without quotes for providers before.