New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-27442][SQL] Remove check field name when reading/writing data in parquet #35229
Changes from 6 commits
388be6d
4bf6a19
f3f4e4c
3d7114a
59e9030
d09f83d
5a9d993
5dc6741
b2d64af
b3f0f09
fe3aeb2
e11bcdb
5f3430f
bd5540e
310b00c
e6253a2
e3efd3f
8a1dc91
8883634
3d4adf9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -81,12 +81,16 @@ object DataSourceUtils extends PredicateHelper { | |
* in a driver side. | ||
*/ | ||
def verifySchema(format: FileFormat, schema: StructType): Unit = { | ||
checkFieldType(format, schema) | ||
checkFieldNames(format, schema) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this PR change anything? it looks like a refactoring by pulling out part of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
oh, sorry, one file was not chosen when commit change |
||
} | ||
|
||
def checkFieldType(format: FileFormat, schema: StructType): Unit = { | ||
schema.foreach { field => | ||
if (!format.supportDataType(field.dataType)) { | ||
throw QueryCompilationErrors.dataTypeUnsupportedByDataSourceError(format.toString, field) | ||
} | ||
} | ||
checkFieldNames(format, schema) | ||
} | ||
|
||
// SPARK-24626: Metadata files and temporary files should not be | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4243,6 +4243,18 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark | |
checkAnswer(df3, df4) | ||
} | ||
} | ||
|
||
test("SPARK-27442: Spark support read parquet file with invalid char in field name") { | ||
withResourceTempPath("test-data/field_with_invalid_char.snappy.parquet") { dir => | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can write the parquet file in the test, instead of generating it ahead. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Updated |
||
val df = spark.read.parquet(dir.getAbsolutePath) | ||
checkAnswer(df, Row(1, 2, 3, 4, 5, 6) :: Row(2, 4, 6, 8, 10, 12) :: Nil) | ||
assert(df.schema.names.sameElements(Array("max(t)", "a b", "{", ".", "a.b", "a"))) | ||
checkAnswer(df.select("`max(t)`", "`a b`", "`{`", "`.`", "`a.b`") | ||
, Row(1, 2, 3, 4, 5) :: Row(2, 4, 6, 8, 10) :: Nil) | ||
checkAnswer(df.where("`a.b` > 8"), | ||
Row(2, 4, 6, 8, 10, 12) :: Nil) | ||
} | ||
} | ||
} | ||
|
||
case class Foo(bar: Option[String]) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm I think this doesn't compile?