From 83dd40bf368ad77205e90c0ad06cb9202f4febc2 Mon Sep 17 00:00:00 2001
From: AbinayaJayaprakasam <jaiabiman@gmail.com>
Date: Wed, 26 Nov 2025 01:45:17 +0530
Subject: [PATCH 1/2] [SPARK-44988][SQL] Support reading Parquet
 TIMESTAMP(NANOS,false)

Convert TIMESTAMP(NANOS,*) to LongType regardless of nanosAsLong config
to allow reading Parquet files with nanosecond precision timestamps.

### What changes were proposed in this pull request?

Simplified the TIMESTAMP(NANOS) handling in ParquetSchemaConverter to always
convert to LongType, removing the nanosAsLong condition check that caused
TIMESTAMP(NANOS,false) files to be unreadable.

### Why are the changes needed?

SPARK-40819 added spark.sql.legacy.parquet.nanosAsLong as a workaround for
TIMESTAMP(NANOS,true), but:
- Only worked for TIMESTAMP(NANOS,true), not for  TIMESTAMP(NANOS,false)
- Required users to know about an obscure internal config flag
- Still required manual casting from Long to Timestamp

This fix makes all NANOS timestamps readable by default. Since Spark cannot
fully support nanosecond precision in its type system, converting to LongType
preserves precision while allowing files to be read.

### Does this PR introduce any user-facing change?

Yes - Parquet files with TIMESTAMP(NANOS,*) are now readable by default
without configuration. Values are read as LongType (nanoseconds since epoch).
Users can convert to timestamp if needed: (col('nanos') / 1e9).cast('timestamp')

### How was this patch tested?

- Updated ParquetSchemaSuite test expectations (lines 1112-1121)
- All 110 tests in ParquetSchemaSuite pass
- Manually tested with TIMESTAMP(NANOS,false) Parquet file generated via PyArrow
---
 .../parquet/ParquetSchemaConverter.scala         |  9 ++++++---
 .../datasources/parquet/ParquetSchemaSuite.scala | 16 ++++++++--------
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
index 9e6f4447ca79..554e4e945b9a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
@@ -309,10 +309,13 @@ class ParquetToSparkSchemaConverter(
             } else {
               TimestampNTZType
             }
-          // SPARK-40819: NANOS are not supported as a Timestamp, convert to LongType without
-          // timezone awareness to address behaviour regression introduced by SPARK-34661
+          // SPARK-40819 & SPARK-44988: NANOS are not supported as a Timestamp, convert to LongType
+          // The nanosAsLong config was originally intended as a migration flag, but since Spark
+          // doesn't have full NANOS support and real-world files exist with TIMESTAMP(NANOS,*)
+          // we always convert to LongType regardless of the config to prevent unreadable files.
+          // This handles both TIMESTAMP(NANOS,true) and TIMESTAMP(NANOS,false)
           case timestamp: TimestampLogicalTypeAnnotation
-            if timestamp.getUnit == TimeUnit.NANOS && nanosAsLong =>
+            if timestamp.getUnit == TimeUnit.NANOS =>
             LongType
           case time: TimeLogicalTypeAnnotation
             if time.getUnit == TimeUnit.MICROS && !time.isAdjustedToUTC =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
index 56076175d60e..197461003d33 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
@@ -1109,15 +1109,15 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
     }
   }
 
-  test("SPARK-40819: parquet file with TIMESTAMP(NANOS, true) (with default nanosAsLong=false)") {
+  test("SPARK-40819 & SPARK-44988: parquet file with TIMESTAMP(NANOS, true) " +
+    "(with default nanosAsLong=false)") {
+    // After SPARK-44988, TIMESTAMP(NANOS,*) is always converted to LongType regardless of
+    // nanosAsLong config to ensure files are readable
+    val tsAttribute = "birthday"
     val testDataPath = testFile("test-data/timestamp-nanos.parquet")
-    checkError(
-      exception = intercept[AnalysisException] {
-        spark.read.parquet(testDataPath).collect()
-      },
-      condition = "PARQUET_TYPE_ILLEGAL",
-      parameters = Map("parquetType" -> "INT64 (TIMESTAMP(NANOS,true))")
-    )
+    val data = spark.read.parquet(testDataPath).select(tsAttribute)
+    assert(data.schema.fields.head.dataType == LongType)
+    assert(data.orderBy(desc(tsAttribute)).take(1).head.getAs[Long](0) == 1668537129123534758L)
   }
 
   test("SPARK-47261: parquet file with unsupported type") {

From 059c360defa0d12be9985f6790dd13e34fec5d42 Mon Sep 17 00:00:00 2001
From: AbinayaJayaprakasam <jaiabiman@gmail.com>
Date: Wed, 26 Nov 2025 13:00:46 +0530
Subject: [PATCH 2/2] Retrigger CI