apache · AngersZhuuuu · Jan 17, 2022 · Jan 18, 2022 · Jan 18, 2022 · Jan 18, 2022
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -434,7 +434,8 @@ case class DataSource(
           hs.partitionSchema,
           "in the partition schema",
           equality)
-        DataSourceUtils.verifySchema(hs.fileFormat, hs.dataSchema)
+        DataSourceUtils.verifySchema(hs.fileFormat, hs.dataSchema,
+          !hs.fileFormat.isInstanceOf[ParquetFileFormat])
       case _ =>
         SchemaUtils.checkSchemaColumnNameDuplication(
           relation.schema,

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
@@ -81,12 +81,16 @@ object DataSourceUtils extends PredicateHelper {
    * in a driver side.
    */
   def verifySchema(format: FileFormat, schema: StructType): Unit = {
+    checkFieldType(format, schema)
+    checkFieldNames(format, schema)
+  }
+
+  def checkFieldType(format: FileFormat, schema: StructType): Unit = {
     schema.foreach { field =>
       if (!format.supportDataType(field.dataType)) {
         throw QueryCompilationErrors.dataTypeUnsupportedByDataSourceError(format.toString, field)
       }
     }
-    checkFieldNames(format, schema)
   }
 
   // SPARK-24626: Metadata files and temporary files should not be

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
@@ -165,7 +165,7 @@ trait FileFormat {
   def supportDataType(dataType: DataType): Boolean = true
 
   /**
-   * Returns whether this format supports the given filed name in read/write path.
+   * Returns whether this format supports the given filed name in write path.
    * By default all field name is supported.
    */
   def supportFieldName(name: String): Boolean = true

diff --git a/sql/core/src/test/resources/test-data/field_with_invalid_char.snappy.parquet b/sql/core/src/test/resources/test-data/field_with_invalid_char.snappy.parquet
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -4243,6 +4243,18 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       checkAnswer(df3, df4)
     }
   }
+
+  test("SPARK-27442: Spark support read parquet file with invalid char in field name") {
+    withResourceTempPath("test-data/field_with_invalid_char.snappy.parquet") { dir =>
+      val df = spark.read.parquet(dir.getAbsolutePath)
+      checkAnswer(df, Row(1, 2, 3, 4, 5, 6) :: Row(2, 4, 6, 8, 10, 12) :: Nil)
+      assert(df.schema.names.sameElements(Array("max(t)", "a b", "{", ".", "a.b", "a")))
+      checkAnswer(df.select("`max(t)`", "`a b`", "`{`", "`.`", "`a.b`")
+        , Row(1, 2, 3, 4, 5) :: Row(2, 4, 6, 8, 10) :: Nil)
+      checkAnswer(df.where("`a.b` > 8"),
+        Row(2, 4, 6, 8, 10, 12) :: Nil)
+    }
+  }
 }
 
 case class Foo(bar: Option[String])