Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -309,10 +309,13 @@ class ParquetToSparkSchemaConverter(
} else {
TimestampNTZType
}
// SPARK-40819: NANOS are not supported as a Timestamp, convert to LongType without
// timezone awareness to address behaviour regression introduced by SPARK-34661
// SPARK-40819 & SPARK-44988: NANOS are not supported as a Timestamp, convert to LongType
// The nanosAsLong config was originally intended as a migration flag, but since Spark
// doesn't have full NANOS support and real-world files exist with TIMESTAMP(NANOS,*)
// we always convert to LongType regardless of the config to prevent unreadable files.
// This handles both TIMESTAMP(NANOS,true) and TIMESTAMP(NANOS,false)
case timestamp: TimestampLogicalTypeAnnotation
if timestamp.getUnit == TimeUnit.NANOS && nanosAsLong =>
if timestamp.getUnit == TimeUnit.NANOS =>
LongType
case time: TimeLogicalTypeAnnotation
if time.getUnit == TimeUnit.MICROS && !time.isAdjustedToUTC =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1109,15 +1109,15 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
}
}

test("SPARK-40819: parquet file with TIMESTAMP(NANOS, true) (with default nanosAsLong=false)") {
test("SPARK-40819 & SPARK-44988: parquet file with TIMESTAMP(NANOS, true) " +
"(with default nanosAsLong=false)") {
// After SPARK-44988, TIMESTAMP(NANOS,*) is always converted to LongType regardless of
// nanosAsLong config to ensure files are readable
val tsAttribute = "birthday"
val testDataPath = testFile("test-data/timestamp-nanos.parquet")
checkError(
exception = intercept[AnalysisException] {
spark.read.parquet(testDataPath).collect()
},
condition = "PARQUET_TYPE_ILLEGAL",
parameters = Map("parquetType" -> "INT64 (TIMESTAMP(NANOS,true))")
)
val data = spark.read.parquet(testDataPath).select(tsAttribute)
assert(data.schema.fields.head.dataType == LongType)
assert(data.orderBy(desc(tsAttribute)).take(1).head.getAs[Long](0) == 1668537129123534758L)
}

test("SPARK-47261: parquet file with unsupported type") {
Expand Down