New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-15279][SQL] Catch conflicting SerDe when creating table #13068
Changes from 7 commits
796adc3
62c27f2
bf6b23d
d4b5a32
bc2a7bd
4ffaf59
7e02a0a
5a1b2fa
87266e9
4589b6a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -771,6 +771,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { | |
compressed = false, | ||
serdeProperties = Map()) | ||
} | ||
validateRowFormatFileFormat(ctx.rowFormat, ctx.createFileFormat, ctx) | ||
val fileStorage = Option(ctx.createFileFormat).map(visitCreateFileFormat) | ||
.getOrElse(EmptyStorageFormat) | ||
val rowStorage = Option(ctx.rowFormat).map(visitRowFormat).getOrElse(EmptyStorageFormat) | ||
|
@@ -827,11 +828,13 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { | |
|
||
/** | ||
* Create a [[CatalogStorageFormat]] for creating tables. | ||
* | ||
* Format: STORED AS ... | ||
*/ | ||
override def visitCreateFileFormat( | ||
ctx: CreateFileFormatContext): CatalogStorageFormat = withOrigin(ctx) { | ||
(ctx.fileFormat, ctx.storageHandler) match { | ||
// Expected format: INPUTFORMAT input_format OUTPUTFORMAT output_format | ||
// Expected format: INPUTFORMAT input_format OUTPUTFORMAT output_format (SERDE serde) | ||
case (c: TableFileFormatContext, null) => | ||
visitTableFileFormat(c) | ||
// Expected format: SEQUENCEFILE | TEXTFILE | RCFILE | ORC | PARQUET | AVRO | ||
|
@@ -940,6 +943,43 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { | |
EmptyStorageFormat.copy(serdeProperties = entries.toMap) | ||
} | ||
|
||
/** | ||
* Throw a [[ParseException]] if the user specified incompatible SerDes through ROW FORMAT | ||
* and STORED AS. | ||
*/ | ||
private def validateRowFormatFileFormat( | ||
rowFormatCtx: RowFormatContext, | ||
createFileFormatCtx: CreateFileFormatContext, | ||
parentCtx: ParserRuleContext): Unit = { | ||
if (rowFormatCtx == null || createFileFormatCtx == null) { | ||
return | ||
} | ||
val cff = (0 until createFileFormatCtx.getChildCount) | ||
.map { i => createFileFormatCtx.getChild(i).getText } | ||
.mkString(" ") | ||
(rowFormatCtx, createFileFormatCtx.fileFormat) match { | ||
case (_, ffTable: TableFileFormatContext) => | ||
if (visitTableFileFormat(ffTable).serde.isDefined) { | ||
throw operationNotAllowed(s"ROW FORMAT is not compatible with $cff", parentCtx) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we can be explicit that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, I like that actually! Hive doesn't support STORED AS serde too: https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL. I actually have no idea why we accept it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why don't we support this? What's the problem? Can you give me an example? thx |
||
} | ||
case (rfSerde: RowFormatSerdeContext, ffGeneric: GenericFileFormatContext) => | ||
ffGeneric.identifier.getText.toLowerCase match { | ||
case ("sequencefile" | "textfile" | "rcfile") => // OK | ||
case _ => throw operationNotAllowed( | ||
s"ROW FORMAT SERDE is not compatible with $cff", parentCtx) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we can say something like |
||
} | ||
case (rfDelimited: RowFormatDelimitedContext, ffGeneric: GenericFileFormatContext) => | ||
ffGeneric.identifier.getText.toLowerCase match { | ||
case "textfile" => // OK | ||
case _ => throw operationNotAllowed( | ||
s"ROW FORMAT SERDE is not compatible with $cff", parentCtx) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. isn't that what we talked about? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh, sorry. I was thinking about what the error message should say. Yea, the semantic is what we talked about. |
||
} | ||
case (rf, ff) => | ||
// should never happen | ||
throw operationNotAllowed(s"Unexpected combination of ROW FORMAT and $cff", parentCtx) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we also print out There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cff is the nicely formatted string |
||
} | ||
} | ||
|
||
/** | ||
* Create or replace a view. This creates a [[CreateViewCommand]] command. | ||
* | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(Maybe it is good to add a few examples to this?)