diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala index 9e6f4447ca79..554e4e945b9a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala @@ -309,10 +309,13 @@ class ParquetToSparkSchemaConverter( } else { TimestampNTZType } - // SPARK-40819: NANOS are not supported as a Timestamp, convert to LongType without - // timezone awareness to address behaviour regression introduced by SPARK-34661 + // SPARK-40819 & SPARK-44988: NANOS are not supported as a Timestamp, convert to LongType + // The nanosAsLong config was originally intended as a migration flag, but since Spark + // doesn't have full NANOS support and real-world files exist with TIMESTAMP(NANOS,*) + // we always convert to LongType regardless of the config to prevent unreadable files. + // This handles both TIMESTAMP(NANOS,true) and TIMESTAMP(NANOS,false) case timestamp: TimestampLogicalTypeAnnotation - if timestamp.getUnit == TimeUnit.NANOS && nanosAsLong => + if timestamp.getUnit == TimeUnit.NANOS => LongType case time: TimeLogicalTypeAnnotation if time.getUnit == TimeUnit.MICROS && !time.isAdjustedToUTC => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala index 56076175d60e..197461003d33 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala @@ -1109,15 +1109,15 @@ class ParquetSchemaSuite extends ParquetSchemaTest { } } - test("SPARK-40819: parquet file with TIMESTAMP(NANOS, true) (with default nanosAsLong=false)") { + test("SPARK-40819 & SPARK-44988: parquet file with TIMESTAMP(NANOS, true) " + + "(with default nanosAsLong=false)") { + // After SPARK-44988, TIMESTAMP(NANOS,*) is always converted to LongType regardless of + // nanosAsLong config to ensure files are readable + val tsAttribute = "birthday" val testDataPath = testFile("test-data/timestamp-nanos.parquet") - checkError( - exception = intercept[AnalysisException] { - spark.read.parquet(testDataPath).collect() - }, - condition = "PARQUET_TYPE_ILLEGAL", - parameters = Map("parquetType" -> "INT64 (TIMESTAMP(NANOS,true))") - ) + val data = spark.read.parquet(testDataPath).select(tsAttribute) + assert(data.schema.fields.head.dataType == LongType) + assert(data.orderBy(desc(tsAttribute)).take(1).head.getAs[Long](0) == 1668537129123534758L) } test("SPARK-47261: parquet file with unsupported type") {