From 59e49edffd4e93ec7b25b941f39cd52d1d58426a Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 18 May 2026 10:44:39 +0200 Subject: [PATCH 01/10] Add TimestampNTZNanosType and TimestampLTZNanosType --- .../resources/error/error-conditions.json | 12 +++ .../spark/sql/errors/DataTypeErrors.scala | 26 ++++++ .../org/apache/spark/sql/types/DataType.scala | 10 ++- .../sql/types/TimestampLTZNanosType.scala | 63 ++++++++++++++ .../sql/types/TimestampNTZNanosType.scala | 63 ++++++++++++++ .../spark/sql/types/DataTypeSuite.scala | 87 +++++++++++++++++++ 6 files changed, 260 insertions(+), 1 deletion(-) create mode 100644 sql/api/src/main/scala/org/apache/spark/sql/types/TimestampLTZNanosType.scala create mode 100644 sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 889ecf9f7b08a..08acc574e9588 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -8503,6 +8503,18 @@ ], "sqlState" : "42000" }, + "UNSUPPORTED_TIMESTAMP_LTZ_NANOS_PRECISION" : { + "message" : [ + "The fractional seconds precision of TIMESTAMP_LTZ is out of the supported range [, ] for nanosecond-capable timestamps." + ], + "sqlState" : "0A001" + }, + "UNSUPPORTED_TIMESTAMP_NTZ_NANOS_PRECISION" : { + "message" : [ + "The fractional seconds precision of TIMESTAMP_NTZ is out of the supported range [, ] for nanosecond-capable timestamps." + ], + "sqlState" : "0A001" + }, "UNSUPPORTED_TIME_PRECISION" : { "message" : [ "The seconds precision of the TIME data type is out of the supported range [0, 6]." diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala index 1e2b2e691cd31..82d20f5647e12 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala @@ -275,4 +275,30 @@ private[sql] object DataTypeErrors extends DataTypeErrorsBase { messageParameters = Map("precision" -> precision.toString), cause = null) } + + def unsupportedTimestampNtzPrecisionError( + precision: Int, + minPrecision: Int, + maxPrecision: Int): Throwable = { + new SparkException( + errorClass = "UNSUPPORTED_TIMESTAMP_NTZ_NANOS_PRECISION", + messageParameters = Map( + "precision" -> precision.toString, + "minPrecision" -> minPrecision.toString, + "maxPrecision" -> maxPrecision.toString), + cause = null) + } + + def unsupportedTimestampLtzPrecisionError( + precision: Int, + minPrecision: Int, + maxPrecision: Int): Throwable = { + new SparkException( + errorClass = "UNSUPPORTED_TIMESTAMP_LTZ_NANOS_PRECISION", + messageParameters = Map( + "precision" -> precision.toString, + "minPrecision" -> minPrecision.toString, + "maxPrecision" -> maxPrecision.toString), + cause = null) + } } diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala index 48a6514440dd3..e3d0e7c17b8cb 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala @@ -127,6 +127,8 @@ object DataType { private val CHAR_TYPE = """char\(\s*(\d+)\s*\)""".r private val VARCHAR_TYPE = """varchar\(\s*(\d+)\s*\)""".r private val STRING_WITH_COLLATION = """string\s+collate\s+(\w+)""".r + private val TIMESTAMP_LTZ_NANOS_TYPE = """timestamp_ltz\(\s*(\d+)\s*\)""".r + private val TIMESTAMP_NTZ_NANOS_TYPE = """timestamp_ntz\(\s*(\d+)\s*\)""".r private val GEOMETRY_TYPE = """geometry\(\s*([\w]+:-?[\w]+)\s*\)""".r private val GEOGRAPHY_TYPE_CRS = """geography\(\s*(\w+:-?\w+)\s*\)""".r private val GEOGRAPHY_TYPE_ALG = """geography\(\s*(\w+)\s*\)""".r @@ -208,7 +210,11 @@ object DataType { YearMonthIntervalType(YEAR, MONTH), TimestampNTZType, VariantType) ++ - (TimeType.MIN_PRECISION to TimeType.MAX_PRECISION).map(TimeType(_))) + (TimeType.MIN_PRECISION to TimeType.MAX_PRECISION).map(TimeType(_)) ++ + (TimestampLTZNanosType.MIN_PRECISION to TimestampLTZNanosType.MAX_PRECISION) + .map(TimestampLTZNanosType(_)) ++ + (TimestampNTZNanosType.MIN_PRECISION to TimestampNTZNanosType.MAX_PRECISION) + .map(TimestampNTZNanosType(_))) .map(t => t.typeName -> t) .toMap } @@ -233,6 +239,8 @@ object DataType { case GEOGRAPHY_TYPE_CRS_ALG(crs, alg) => GeographyType(crs, alg) // For backwards compatibility, previously the type name of NullType is "null" case "null" => NullType + case TIMESTAMP_LTZ_NANOS_TYPE(precision) => TimestampLTZNanosType(precision.toInt) + case TIMESTAMP_NTZ_NANOS_TYPE(precision) => TimestampNTZNanosType(precision.toInt) case "timestamp_ltz" => TimestampType case other => otherTypes.getOrElse( diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampLTZNanosType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampLTZNanosType.scala new file mode 100644 index 0000000000000..ea33274867d89 --- /dev/null +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampLTZNanosType.scala @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.types + +import org.apache.spark.annotation.Unstable +import org.apache.spark.sql.errors.DataTypeErrors + +/** + * Timestamp with local time zone with fractional-second precision in the nanosecond-capable range + * (7 to 9 decimal digits). Values will use epoch microseconds plus nanoseconds within that + * microsecond; the time zone affects interpretation only. + * + * @param precision + * Number of digits of fractional seconds for this SQL type. The valid values are 7, 8, and 9 + * where 9 means nanosecond precision. + * + * @since 4.2.0 + */ +@Unstable +case class TimestampLTZNanosType(precision: Int) extends DatetimeType { + + if (precision < TimestampLTZNanosType.MIN_PRECISION || + precision > TimestampLTZNanosType.MAX_PRECISION) { + throw DataTypeErrors.unsupportedTimestampLtzPrecisionError( + precision, + TimestampLTZNanosType.MIN_PRECISION, + TimestampLTZNanosType.MAX_PRECISION) + } + + /** + * Default size used by Spark for row-size estimation. Values are represented logically as + * epoch microseconds (Long, 8 bytes) plus nanoseconds within that micro (Short, 2 bytes). + */ + override def defaultSize: Int = 10 + + override def typeName: String = s"timestamp_ltz($precision)" + + private[spark] override def asNullable: TimestampLTZNanosType = this +} + +object TimestampLTZNanosType { + val MIN_PRECISION: Int = 7 + val MAX_PRECISION: Int = 9 + val NANOS_PRECISION: Int = 9 + val DEFAULT_PRECISION: Int = NANOS_PRECISION + + def apply(): TimestampLTZNanosType = new TimestampLTZNanosType(DEFAULT_PRECISION) +} diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala new file mode 100644 index 0000000000000..df147b6bf0ffd --- /dev/null +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.types + +import org.apache.spark.annotation.Unstable +import org.apache.spark.sql.errors.DataTypeErrors + +/** + * Timestamp without time zone with fractional-second precision in the nanosecond-capable range + * (7 to 9 decimal digits). Values will use epoch microseconds plus nanoseconds within that + * microsecond. + * + * @param precision + * Number of digits of fractional seconds for this SQL type. The valid values are 7, 8, and 9 + * where 9 means nanosecond precision. + * + * @since 4.2.0 + */ +@Unstable +case class TimestampNTZNanosType(precision: Int) extends DatetimeType { + + if (precision < TimestampNTZNanosType.MIN_PRECISION || + precision > TimestampNTZNanosType.MAX_PRECISION) { + throw DataTypeErrors.unsupportedTimestampNtzPrecisionError( + precision, + TimestampNTZNanosType.MIN_PRECISION, + TimestampNTZNanosType.MAX_PRECISION) + } + + /** + * Default size used by Spark for row-size estimation. Values are represented logically as + * epoch microseconds (Long, 8 bytes) plus nanoseconds within that micro (Short, 2 bytes). + */ + override def defaultSize: Int = 10 + + override def typeName: String = s"timestamp_ntz($precision)" + + private[spark] override def asNullable: TimestampNTZNanosType = this +} + +object TimestampNTZNanosType { + val MIN_PRECISION: Int = 7 + val MAX_PRECISION: Int = 9 + val NANOS_PRECISION: Int = 9 + val DEFAULT_PRECISION: Int = NANOS_PRECISION + + def apply(): TimestampNTZNanosType = new TimestampNTZNanosType(DEFAULT_PRECISION) +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala index ce4f5e89be2b8..51b9638fc165a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala @@ -255,6 +255,13 @@ class DataTypeSuite extends SparkFunSuite { checkDataTypeFromJson(TimestampNTZType) checkDataTypeFromDDL(TimestampNTZType) + checkDataTypeFromJson(TimestampLTZNanosType(TimestampLTZNanosType.MIN_PRECISION)) + checkDataTypeFromJson(TimestampLTZNanosType(8)) + checkDataTypeFromJson(TimestampLTZNanosType(TimestampLTZNanosType.MAX_PRECISION)) + checkDataTypeFromJson(TimestampNTZNanosType(TimestampNTZNanosType.MIN_PRECISION)) + checkDataTypeFromJson(TimestampNTZNanosType(8)) + checkDataTypeFromJson(TimestampNTZNanosType(TimestampNTZNanosType.MAX_PRECISION)) + checkDataTypeFromJson(StringType) checkDataTypeFromDDL(StringType) @@ -403,6 +410,10 @@ class DataTypeSuite extends SparkFunSuite { dayTimeIntervalTypes.foreach(checkDefaultSize(_, 8)) checkDefaultSize(TimeType(TimeType.MIN_PRECISION), 8) checkDefaultSize(TimeType(TimeType.MAX_PRECISION), 8) + checkDefaultSize(TimestampLTZNanosType(TimestampLTZNanosType.MIN_PRECISION), 10) + checkDefaultSize(TimestampLTZNanosType(TimestampLTZNanosType.MAX_PRECISION), 10) + checkDefaultSize(TimestampNTZNanosType(TimestampNTZNanosType.MIN_PRECISION), 10) + checkDefaultSize(TimestampNTZNanosType(TimestampNTZNanosType.MAX_PRECISION), 10) def checkEqualsIgnoreCompatibleNullability( from: DataType, @@ -1448,6 +1459,82 @@ class DataTypeSuite extends SparkFunSuite { parameters = Map("error" -> "'time'", "hint" -> "")) } + test("SPARK-56876: precisions of nanos-capable TIMESTAMP_LTZ and TIMESTAMP_NTZ types") { + TimestampLTZNanosType.MIN_PRECISION to TimestampLTZNanosType.MAX_PRECISION foreach { p => + assert(TimestampLTZNanosType(p).sql === s"TIMESTAMP_LTZ($p)") + assert(TimestampNTZNanosType(p).sql === s"TIMESTAMP_NTZ($p)") + } + + Seq(6, 10, Int.MinValue, Int.MaxValue).foreach { p => + checkError( + exception = intercept[SparkException] { + TimestampLTZNanosType(p) + }, + condition = "UNSUPPORTED_TIMESTAMP_LTZ_NANOS_PRECISION", + parameters = Map( + "precision" -> p.toString, + "minPrecision" -> "7", + "maxPrecision" -> "9")) + checkError( + exception = intercept[SparkException] { + TimestampNTZNanosType(p) + }, + condition = "UNSUPPORTED_TIMESTAMP_NTZ_NANOS_PRECISION", + parameters = Map( + "precision" -> p.toString, + "minPrecision" -> "7", + "maxPrecision" -> "9")) + } + } + + test("SPARK-56876: parse timestamp with nanosecond precision from JSON") { + TimestampLTZNanosType.MIN_PRECISION to TimestampLTZNanosType.MAX_PRECISION foreach { n => + assert(DataType.fromJson(s"\"timestamp_ltz($n)\"") === TimestampLTZNanosType(n)) + assert(DataType.fromJson(s"\"timestamp_ltz( $n)\"") === TimestampLTZNanosType(n)) + assert(DataType.fromJson(s"\"timestamp_ntz($n)\"") === TimestampNTZNanosType(n)) + assert(DataType.fromJson(s"\"timestamp_ntz($n )\"") === TimestampNTZNanosType(n)) + } + // JSON round-trip for nanos timestamp types inside struct, array, and map + val structWithNanos = StructType(Seq( + StructField("ntz", TimestampNTZNanosType(7)), + StructField("ltz", TimestampLTZNanosType(8)))) + assert(DataType.fromJson(structWithNanos.json) === structWithNanos) + val arrayOfNanos = ArrayType(TimestampNTZNanosType(9), containsNull = false) + assert(DataType.fromJson(arrayOfNanos.json) === arrayOfNanos) + val mapOfNanos = MapType(StringType, TimestampNTZNanosType(7), valueContainsNull = true) + assert(DataType.fromJson(mapOfNanos.json) === mapOfNanos) + + assert(DataType.fromJson("\"timestamp_ltz\"") === TimestampType) + assert(DataType.fromJson("\"timestamp_ntz\"") === TimestampNTZType) + checkError( + exception = intercept[SparkException] { + DataType.fromJson("\"timestamp_ltz(6)\"") + }, + condition = "UNSUPPORTED_TIMESTAMP_LTZ_NANOS_PRECISION", + parameters = Map( + "precision" -> "6", + "minPrecision" -> "7", + "maxPrecision" -> "9")) + checkError( + exception = intercept[SparkException] { + DataType.fromJson("\"timestamp_ntz(0)\"") + }, + condition = "UNSUPPORTED_TIMESTAMP_NTZ_NANOS_PRECISION", + parameters = Map( + "precision" -> "0", + "minPrecision" -> "7", + "maxPrecision" -> "9")) + checkError( + exception = intercept[SparkException] { + DataType.fromJson("\"timestamp_ntz(10)\"") + }, + condition = "UNSUPPORTED_TIMESTAMP_NTZ_NANOS_PRECISION", + parameters = Map( + "precision" -> "10", + "minPrecision" -> "7", + "maxPrecision" -> "9")) + } + test("singleton DataType equality after deserialization") { // Singleton DataTypes that use `case object` pattern matching (e.g., `case BinaryType =>`). // If a non-singleton instance is created (e.g., via Kryo deserialization which doesn't call From 86e157d9ff38a57bf26cab89a288fcde65f4f4a5 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 18 May 2026 15:18:26 +0200 Subject: [PATCH 02/10] Fix coding style --- .../apache/spark/sql/types/TimestampLTZNanosType.scala | 6 +++--- .../apache/spark/sql/types/TimestampNTZNanosType.scala | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampLTZNanosType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampLTZNanosType.scala index ea33274867d89..878dc7182a857 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampLTZNanosType.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampLTZNanosType.scala @@ -35,7 +35,7 @@ import org.apache.spark.sql.errors.DataTypeErrors case class TimestampLTZNanosType(precision: Int) extends DatetimeType { if (precision < TimestampLTZNanosType.MIN_PRECISION || - precision > TimestampLTZNanosType.MAX_PRECISION) { + precision > TimestampLTZNanosType.MAX_PRECISION) { throw DataTypeErrors.unsupportedTimestampLtzPrecisionError( precision, TimestampLTZNanosType.MIN_PRECISION, @@ -43,8 +43,8 @@ case class TimestampLTZNanosType(precision: Int) extends DatetimeType { } /** - * Default size used by Spark for row-size estimation. Values are represented logically as - * epoch microseconds (Long, 8 bytes) plus nanoseconds within that micro (Short, 2 bytes). + * Default size used by Spark for row-size estimation. Values are represented logically as epoch + * microseconds (Long, 8 bytes) plus nanoseconds within that micro (Short, 2 bytes). */ override def defaultSize: Int = 10 diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala index df147b6bf0ffd..d4295540e804c 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala @@ -21,8 +21,8 @@ import org.apache.spark.annotation.Unstable import org.apache.spark.sql.errors.DataTypeErrors /** - * Timestamp without time zone with fractional-second precision in the nanosecond-capable range - * (7 to 9 decimal digits). Values will use epoch microseconds plus nanoseconds within that + * Timestamp without time zone with fractional-second precision in the nanosecond-capable range (7 + * to 9 decimal digits). Values will use epoch microseconds plus nanoseconds within that * microsecond. * * @param precision @@ -35,7 +35,7 @@ import org.apache.spark.sql.errors.DataTypeErrors case class TimestampNTZNanosType(precision: Int) extends DatetimeType { if (precision < TimestampNTZNanosType.MIN_PRECISION || - precision > TimestampNTZNanosType.MAX_PRECISION) { + precision > TimestampNTZNanosType.MAX_PRECISION) { throw DataTypeErrors.unsupportedTimestampNtzPrecisionError( precision, TimestampNTZNanosType.MIN_PRECISION, @@ -43,8 +43,8 @@ case class TimestampNTZNanosType(precision: Int) extends DatetimeType { } /** - * Default size used by Spark for row-size estimation. Values are represented logically as - * epoch microseconds (Long, 8 bytes) plus nanoseconds within that micro (Short, 2 bytes). + * Default size used by Spark for row-size estimation. Values are represented logically as epoch + * microseconds (Long, 8 bytes) plus nanoseconds within that micro (Short, 2 bytes). */ override def defaultSize: Int = 10 From 06ffd74e9e3ea103e175e5a367d865b97dbc93ae Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 18 May 2026 15:37:55 +0200 Subject: [PATCH 03/10] Improve error messages --- .../resources/error/error-conditions.json | 8 ++--- .../spark/sql/errors/DataTypeErrors.scala | 24 ++++--------- .../sql/types/TimestampLTZNanosType.scala | 5 +-- .../sql/types/TimestampNTZNanosType.scala | 5 +-- .../spark/sql/types/DataTypeSuite.scala | 35 ++++++------------- 5 files changed, 22 insertions(+), 55 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 08acc574e9588..508f90169f9ad 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -8503,15 +8503,15 @@ ], "sqlState" : "42000" }, - "UNSUPPORTED_TIMESTAMP_LTZ_NANOS_PRECISION" : { + "UNSUPPORTED_TIMESTAMP_LTZ_PRECISION" : { "message" : [ - "The fractional seconds precision of TIMESTAMP_LTZ is out of the supported range [, ] for nanosecond-capable timestamps." + "The seconds precision of TIMESTAMP_LTZ is out of the supported range [7, 9]." ], "sqlState" : "0A001" }, - "UNSUPPORTED_TIMESTAMP_NTZ_NANOS_PRECISION" : { + "UNSUPPORTED_TIMESTAMP_NTZ_PRECISION" : { "message" : [ - "The fractional seconds precision of TIMESTAMP_NTZ is out of the supported range [, ] for nanosecond-capable timestamps." + "The seconds precision of TIMESTAMP_NTZ is out of the supported range [7, 9]." ], "sqlState" : "0A001" }, diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala index 82d20f5647e12..16ec78821790f 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala @@ -276,29 +276,17 @@ private[sql] object DataTypeErrors extends DataTypeErrorsBase { cause = null) } - def unsupportedTimestampNtzPrecisionError( - precision: Int, - minPrecision: Int, - maxPrecision: Int): Throwable = { + def unsupportedTimestampNtzPrecisionError(precision: Int): Throwable = { new SparkException( - errorClass = "UNSUPPORTED_TIMESTAMP_NTZ_NANOS_PRECISION", - messageParameters = Map( - "precision" -> precision.toString, - "minPrecision" -> minPrecision.toString, - "maxPrecision" -> maxPrecision.toString), + errorClass = "UNSUPPORTED_TIMESTAMP_NTZ_PRECISION", + messageParameters = Map("precision" -> precision.toString), cause = null) } - def unsupportedTimestampLtzPrecisionError( - precision: Int, - minPrecision: Int, - maxPrecision: Int): Throwable = { + def unsupportedTimestampLtzPrecisionError(precision: Int): Throwable = { new SparkException( - errorClass = "UNSUPPORTED_TIMESTAMP_LTZ_NANOS_PRECISION", - messageParameters = Map( - "precision" -> precision.toString, - "minPrecision" -> minPrecision.toString, - "maxPrecision" -> maxPrecision.toString), + errorClass = "UNSUPPORTED_TIMESTAMP_LTZ_PRECISION", + messageParameters = Map("precision" -> precision.toString), cause = null) } } diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampLTZNanosType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampLTZNanosType.scala index 878dc7182a857..52f534b02254c 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampLTZNanosType.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampLTZNanosType.scala @@ -36,10 +36,7 @@ case class TimestampLTZNanosType(precision: Int) extends DatetimeType { if (precision < TimestampLTZNanosType.MIN_PRECISION || precision > TimestampLTZNanosType.MAX_PRECISION) { - throw DataTypeErrors.unsupportedTimestampLtzPrecisionError( - precision, - TimestampLTZNanosType.MIN_PRECISION, - TimestampLTZNanosType.MAX_PRECISION) + throw DataTypeErrors.unsupportedTimestampLtzPrecisionError(precision) } /** diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala index d4295540e804c..f75f6e8445296 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala @@ -36,10 +36,7 @@ case class TimestampNTZNanosType(precision: Int) extends DatetimeType { if (precision < TimestampNTZNanosType.MIN_PRECISION || precision > TimestampNTZNanosType.MAX_PRECISION) { - throw DataTypeErrors.unsupportedTimestampNtzPrecisionError( - precision, - TimestampNTZNanosType.MIN_PRECISION, - TimestampNTZNanosType.MAX_PRECISION) + throw DataTypeErrors.unsupportedTimestampNtzPrecisionError(precision) } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala index 51b9638fc165a..1cd075115d29b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala @@ -1470,20 +1470,14 @@ class DataTypeSuite extends SparkFunSuite { exception = intercept[SparkException] { TimestampLTZNanosType(p) }, - condition = "UNSUPPORTED_TIMESTAMP_LTZ_NANOS_PRECISION", - parameters = Map( - "precision" -> p.toString, - "minPrecision" -> "7", - "maxPrecision" -> "9")) + condition = "UNSUPPORTED_TIMESTAMP_LTZ_PRECISION", + parameters = Map("precision" -> p.toString)) checkError( exception = intercept[SparkException] { TimestampNTZNanosType(p) }, - condition = "UNSUPPORTED_TIMESTAMP_NTZ_NANOS_PRECISION", - parameters = Map( - "precision" -> p.toString, - "minPrecision" -> "7", - "maxPrecision" -> "9")) + condition = "UNSUPPORTED_TIMESTAMP_NTZ_PRECISION", + parameters = Map("precision" -> p.toString)) } } @@ -1510,29 +1504,20 @@ class DataTypeSuite extends SparkFunSuite { exception = intercept[SparkException] { DataType.fromJson("\"timestamp_ltz(6)\"") }, - condition = "UNSUPPORTED_TIMESTAMP_LTZ_NANOS_PRECISION", - parameters = Map( - "precision" -> "6", - "minPrecision" -> "7", - "maxPrecision" -> "9")) + condition = "UNSUPPORTED_TIMESTAMP_LTZ_PRECISION", + parameters = Map("precision" -> "6")) checkError( exception = intercept[SparkException] { DataType.fromJson("\"timestamp_ntz(0)\"") }, - condition = "UNSUPPORTED_TIMESTAMP_NTZ_NANOS_PRECISION", - parameters = Map( - "precision" -> "0", - "minPrecision" -> "7", - "maxPrecision" -> "9")) + condition = "UNSUPPORTED_TIMESTAMP_NTZ_PRECISION", + parameters = Map("precision" -> "0")) checkError( exception = intercept[SparkException] { DataType.fromJson("\"timestamp_ntz(10)\"") }, - condition = "UNSUPPORTED_TIMESTAMP_NTZ_NANOS_PRECISION", - parameters = Map( - "precision" -> "10", - "minPrecision" -> "7", - "maxPrecision" -> "9")) + condition = "UNSUPPORTED_TIMESTAMP_NTZ_PRECISION", + parameters = Map("precision" -> "10")) } test("singleton DataType equality after deserialization") { From e87f6ae21f1365fb3ffb2f0e91e76d600a9e3330 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 19 May 2026 14:38:44 +0200 Subject: [PATCH 04/10] Handle precision overflow in nanos timestamp JSON parsing Convert NumberFormatException from overflowing precision strings into UNSUPPORTED_TIMESTAMP_{LTZ,NTZ}_PRECISION with the original digit string preserved. Co-authored-by: Isaac --- .../apache/spark/sql/errors/DataTypeErrors.scala | 8 ++++---- .../org/apache/spark/sql/types/DataType.scala | 14 ++++++++++++-- .../spark/sql/types/TimestampLTZNanosType.scala | 2 +- .../spark/sql/types/TimestampNTZNanosType.scala | 2 +- .../apache/spark/sql/types/DataTypeSuite.scala | 16 ++++++++++++++++ 5 files changed, 34 insertions(+), 8 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala index 16ec78821790f..cb7e3aa3bed90 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala @@ -276,17 +276,17 @@ private[sql] object DataTypeErrors extends DataTypeErrorsBase { cause = null) } - def unsupportedTimestampNtzPrecisionError(precision: Int): Throwable = { + def unsupportedTimestampNtzPrecisionError(precision: String): Throwable = { new SparkException( errorClass = "UNSUPPORTED_TIMESTAMP_NTZ_PRECISION", - messageParameters = Map("precision" -> precision.toString), + messageParameters = Map("precision" -> precision), cause = null) } - def unsupportedTimestampLtzPrecisionError(precision: Int): Throwable = { + def unsupportedTimestampLtzPrecisionError(precision: String): Throwable = { new SparkException( errorClass = "UNSUPPORTED_TIMESTAMP_LTZ_PRECISION", - messageParameters = Map("precision" -> precision.toString), + messageParameters = Map("precision" -> precision), cause = null) } } diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala index e3d0e7c17b8cb..6e1d6645eb8e5 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala @@ -239,8 +239,18 @@ object DataType { case GEOGRAPHY_TYPE_CRS_ALG(crs, alg) => GeographyType(crs, alg) // For backwards compatibility, previously the type name of NullType is "null" case "null" => NullType - case TIMESTAMP_LTZ_NANOS_TYPE(precision) => TimestampLTZNanosType(precision.toInt) - case TIMESTAMP_NTZ_NANOS_TYPE(precision) => TimestampNTZNanosType(precision.toInt) + case TIMESTAMP_LTZ_NANOS_TYPE(precision) => + try TimestampLTZNanosType(precision.toInt) + catch { + case _: NumberFormatException => + throw DataTypeErrors.unsupportedTimestampLtzPrecisionError(precision) + } + case TIMESTAMP_NTZ_NANOS_TYPE(precision) => + try TimestampNTZNanosType(precision.toInt) + catch { + case _: NumberFormatException => + throw DataTypeErrors.unsupportedTimestampNtzPrecisionError(precision) + } case "timestamp_ltz" => TimestampType case other => otherTypes.getOrElse( diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampLTZNanosType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampLTZNanosType.scala index 52f534b02254c..9b235bd1d4b91 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampLTZNanosType.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampLTZNanosType.scala @@ -36,7 +36,7 @@ case class TimestampLTZNanosType(precision: Int) extends DatetimeType { if (precision < TimestampLTZNanosType.MIN_PRECISION || precision > TimestampLTZNanosType.MAX_PRECISION) { - throw DataTypeErrors.unsupportedTimestampLtzPrecisionError(precision) + throw DataTypeErrors.unsupportedTimestampLtzPrecisionError(precision.toString) } /** diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala index f75f6e8445296..7e58f3b0777c7 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala @@ -36,7 +36,7 @@ case class TimestampNTZNanosType(precision: Int) extends DatetimeType { if (precision < TimestampNTZNanosType.MIN_PRECISION || precision > TimestampNTZNanosType.MAX_PRECISION) { - throw DataTypeErrors.unsupportedTimestampNtzPrecisionError(precision) + throw DataTypeErrors.unsupportedTimestampNtzPrecisionError(precision.toString) } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala index 1cd075115d29b..a8bfdc4515753 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala @@ -1518,6 +1518,22 @@ class DataTypeSuite extends SparkFunSuite { }, condition = "UNSUPPORTED_TIMESTAMP_NTZ_PRECISION", parameters = Map("precision" -> "10")) + + // Precision strings that overflow Int should surface as UNSUPPORTED_*_PRECISION + // (with the original digit string preserved), not as a raw NumberFormatException. + val overflowing = "9" * 20 + checkError( + exception = intercept[SparkException] { + DataType.fromJson(s"""\"timestamp_ltz($overflowing)\"""") + }, + condition = "UNSUPPORTED_TIMESTAMP_LTZ_PRECISION", + parameters = Map("precision" -> overflowing)) + checkError( + exception = intercept[SparkException] { + DataType.fromJson(s"""\"timestamp_ntz($overflowing)\"""") + }, + condition = "UNSUPPORTED_TIMESTAMP_NTZ_PRECISION", + parameters = Map("precision" -> overflowing)) } test("singleton DataType equality after deserialization") { From 14106e7c055d6bd3a01714b1b0d8683abd9a16e6 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 19 May 2026 14:45:39 +0200 Subject: [PATCH 05/10] Drop redundant nanos timestamp entries from otherTypes map The regex in nameToType already handles every valid precision for timestamp_ltz(n) / timestamp_ntz(n) and emits a precision-specific error for invalid ones, so the parallel enumeration was dead lookup. Co-authored-by: Isaac --- .../main/scala/org/apache/spark/sql/types/DataType.scala | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala index 6e1d6645eb8e5..90b7a887133dd 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala @@ -210,11 +210,7 @@ object DataType { YearMonthIntervalType(YEAR, MONTH), TimestampNTZType, VariantType) ++ - (TimeType.MIN_PRECISION to TimeType.MAX_PRECISION).map(TimeType(_)) ++ - (TimestampLTZNanosType.MIN_PRECISION to TimestampLTZNanosType.MAX_PRECISION) - .map(TimestampLTZNanosType(_)) ++ - (TimestampNTZNanosType.MIN_PRECISION to TimestampNTZNanosType.MAX_PRECISION) - .map(TimestampNTZNanosType(_))) + (TimeType.MIN_PRECISION to TimeType.MAX_PRECISION).map(TimeType(_))) .map(t => t.typeName -> t) .toMap } From 8b1e2ab21912d03125f0a5657ae1da4595b6768f Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 19 May 2026 15:21:57 +0200 Subject: [PATCH 06/10] Clarify scaladoc on nanos timestamp types Anchor both types to their parameterless counterparts (TimestampType and TimestampNTZType) and state plainly that no time zone is stored, replacing the ambiguous "time zone affects interpretation only" phrase that could read as if the type carried a zone tag. Co-authored-by: Isaac --- .../apache/spark/sql/types/TimestampLTZNanosType.scala | 6 ++++-- .../apache/spark/sql/types/TimestampNTZNanosType.scala | 8 +++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampLTZNanosType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampLTZNanosType.scala index 9b235bd1d4b91..427905101203b 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampLTZNanosType.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampLTZNanosType.scala @@ -22,8 +22,10 @@ import org.apache.spark.sql.errors.DataTypeErrors /** * Timestamp with local time zone with fractional-second precision in the nanosecond-capable range - * (7 to 9 decimal digits). Values will use epoch microseconds plus nanoseconds within that - * microsecond; the time zone affects interpretation only. + * (7 to 9 decimal digits). Represents a time instant analogous to `TimestampType`, but with + * sub-microsecond precision: valid range is [0001-01-01T00:00:00.000000000Z, + * 9999-12-31T23:59:59.999999999Z] in the proleptic Gregorian calendar at UTC+00:00. No time zone + * is stored; the session time zone is used when converting values to and from text. * * @param precision * Number of digits of fractional seconds for this SQL type. The valid values are 7, 8, and 9 diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala index 7e58f3b0777c7..d58d85b11a68a 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala @@ -21,9 +21,11 @@ import org.apache.spark.annotation.Unstable import org.apache.spark.sql.errors.DataTypeErrors /** - * Timestamp without time zone with fractional-second precision in the nanosecond-capable range (7 - * to 9 decimal digits). Values will use epoch microseconds plus nanoseconds within that - * microsecond. + * Timestamp without time zone with fractional-second precision in the nanosecond-capable range + * (7 to 9 decimal digits). Represents a local date-time analogous to `TimestampNTZType`, but with + * sub-microsecond precision: valid range is [0001-01-01T00:00:00.000000000, + * 9999-12-31T23:59:59.999999999] in the proleptic Gregorian calendar. The value is independent of + * any time zone. To represent an absolute point in time, use `TimestampLTZNanosType` instead. * * @param precision * Number of digits of fractional seconds for this SQL type. The valid values are 7, 8, and 9 From 4730b9b6a18fff93dc5d6f27b807776e4cf72bc7 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 19 May 2026 15:42:58 +0200 Subject: [PATCH 07/10] Cover malformed JSON forms and DRY the SPARK-56876 parser test Drive both timestamp_ltz and timestamp_ntz through a single loop and add coverage for malformed precision forms (negative, empty, non- numeric, uppercase) that fall through to INVALID_JSON_DATA_TYPE. Co-authored-by: Isaac --- .../spark/sql/types/DataTypeSuite.scala | 80 +++++++++---------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala index a8bfdc4515753..22fa7cd17dbc0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala @@ -1482,13 +1482,46 @@ class DataTypeSuite extends SparkFunSuite { } test("SPARK-56876: parse timestamp with nanosecond precision from JSON") { - TimestampLTZNanosType.MIN_PRECISION to TimestampLTZNanosType.MAX_PRECISION foreach { n => - assert(DataType.fromJson(s"\"timestamp_ltz($n)\"") === TimestampLTZNanosType(n)) - assert(DataType.fromJson(s"\"timestamp_ltz( $n)\"") === TimestampLTZNanosType(n)) - assert(DataType.fromJson(s"\"timestamp_ntz($n)\"") === TimestampNTZNanosType(n)) - assert(DataType.fromJson(s"\"timestamp_ntz($n )\"") === TimestampNTZNanosType(n)) + // (json-type-name, precision-error-condition, factory) + val variants = Seq[(String, String, Int => DataType)]( + ("timestamp_ltz", "UNSUPPORTED_TIMESTAMP_LTZ_PRECISION", TimestampLTZNanosType(_)), + ("timestamp_ntz", "UNSUPPORTED_TIMESTAMP_NTZ_PRECISION", TimestampNTZNanosType(_))) + val overflowing = "9" * 20 + + variants.foreach { case (name, precisionError, factory) => + // Happy path across valid precisions, tolerant of surrounding whitespace. + TimestampLTZNanosType.MIN_PRECISION to TimestampLTZNanosType.MAX_PRECISION foreach { n => + assert(DataType.fromJson(s"""\"$name($n)\"""") === factory(n)) + assert(DataType.fromJson(s"""\"$name( $n)\"""") === factory(n)) + assert(DataType.fromJson(s"""\"$name($n )\"""") === factory(n)) + } + + // Out-of-range precisions surface as UNSUPPORTED_TIMESTAMP_*_PRECISION. The overflowing + // case verifies the original digit string is preserved instead of leaking + // NumberFormatException. + Seq("0", "6", "10", overflowing).foreach { p => + checkError( + exception = intercept[SparkException] { + DataType.fromJson(s"""\"$name($p)\"""") + }, + condition = precisionError, + parameters = Map("precision" -> p)) + } + + // Malformed precision forms that don't match the regex fall through to + // INVALID_JSON_DATA_TYPE: negative, empty parens, non-numeric, and uppercase + // (JSON type-name convention is lowercase). + Seq(s"$name(-1)", s"$name()", s"$name(abc)", s"${name.toUpperCase}(7)").foreach { raw => + checkError( + exception = intercept[SparkIllegalArgumentException] { + DataType.fromJson(s"""\"$raw\"""") + }, + condition = "INVALID_JSON_DATA_TYPE", + parameters = Map("invalidType" -> raw)) + } } - // JSON round-trip for nanos timestamp types inside struct, array, and map + + // JSON round-trip for nanos timestamp types inside struct, array, and map. val structWithNanos = StructType(Seq( StructField("ntz", TimestampNTZNanosType(7)), StructField("ltz", TimestampLTZNanosType(8)))) @@ -1498,42 +1531,9 @@ class DataTypeSuite extends SparkFunSuite { val mapOfNanos = MapType(StringType, TimestampNTZNanosType(7), valueContainsNull = true) assert(DataType.fromJson(mapOfNanos.json) === mapOfNanos) + // Bare names without parens still map to the legacy single-precision types. assert(DataType.fromJson("\"timestamp_ltz\"") === TimestampType) assert(DataType.fromJson("\"timestamp_ntz\"") === TimestampNTZType) - checkError( - exception = intercept[SparkException] { - DataType.fromJson("\"timestamp_ltz(6)\"") - }, - condition = "UNSUPPORTED_TIMESTAMP_LTZ_PRECISION", - parameters = Map("precision" -> "6")) - checkError( - exception = intercept[SparkException] { - DataType.fromJson("\"timestamp_ntz(0)\"") - }, - condition = "UNSUPPORTED_TIMESTAMP_NTZ_PRECISION", - parameters = Map("precision" -> "0")) - checkError( - exception = intercept[SparkException] { - DataType.fromJson("\"timestamp_ntz(10)\"") - }, - condition = "UNSUPPORTED_TIMESTAMP_NTZ_PRECISION", - parameters = Map("precision" -> "10")) - - // Precision strings that overflow Int should surface as UNSUPPORTED_*_PRECISION - // (with the original digit string preserved), not as a raw NumberFormatException. - val overflowing = "9" * 20 - checkError( - exception = intercept[SparkException] { - DataType.fromJson(s"""\"timestamp_ltz($overflowing)\"""") - }, - condition = "UNSUPPORTED_TIMESTAMP_LTZ_PRECISION", - parameters = Map("precision" -> overflowing)) - checkError( - exception = intercept[SparkException] { - DataType.fromJson(s"""\"timestamp_ntz($overflowing)\"""") - }, - condition = "UNSUPPORTED_TIMESTAMP_NTZ_PRECISION", - parameters = Map("precision" -> overflowing)) } test("singleton DataType equality after deserialization") { From 89616ff2614327d32dc4ca82db9741146896b526 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 19 May 2026 15:48:48 +0200 Subject: [PATCH 08/10] Use Locale.ROOT in DataTypeSuite to satisfy scalastyle Co-authored-by: Isaac --- .../scala/org/apache/spark/sql/types/DataTypeSuite.scala | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala index 22fa7cd17dbc0..bbe9600e94c14 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.types +import java.util.Locale + import com.fasterxml.jackson.core.JsonParseException import org.json4s.jackson.JsonMethods @@ -1511,7 +1513,11 @@ class DataTypeSuite extends SparkFunSuite { // Malformed precision forms that don't match the regex fall through to // INVALID_JSON_DATA_TYPE: negative, empty parens, non-numeric, and uppercase // (JSON type-name convention is lowercase). - Seq(s"$name(-1)", s"$name()", s"$name(abc)", s"${name.toUpperCase}(7)").foreach { raw => + Seq( + s"$name(-1)", + s"$name()", + s"$name(abc)", + s"${name.toUpperCase(Locale.ROOT)}(7)").foreach { raw => checkError( exception = intercept[SparkIllegalArgumentException] { DataType.fromJson(s"""\"$raw\"""") From 63f2bc3a983d9afd30cc80f87382c02ed326572c Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 19 May 2026 17:26:20 +0200 Subject: [PATCH 09/10] Apply scalafmt to TimestampNTZNanosType scaladoc Co-authored-by: Isaac --- .../org/apache/spark/sql/types/TimestampNTZNanosType.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala index d58d85b11a68a..76d57dfbe5c65 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala @@ -21,8 +21,8 @@ import org.apache.spark.annotation.Unstable import org.apache.spark.sql.errors.DataTypeErrors /** - * Timestamp without time zone with fractional-second precision in the nanosecond-capable range - * (7 to 9 decimal digits). Represents a local date-time analogous to `TimestampNTZType`, but with + * Timestamp without time zone with fractional-second precision in the nanosecond-capable range (7 + * to 9 decimal digits). Represents a local date-time analogous to `TimestampNTZType`, but with * sub-microsecond precision: valid range is [0001-01-01T00:00:00.000000000, * 9999-12-31T23:59:59.999999999] in the proleptic Gregorian calendar. The value is independent of * any time zone. To represent an absolute point in time, use `TimestampLTZNanosType` instead. From 4714bd689b59e9fa5f00ea3e139d5910232009e4 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 20 May 2026 21:49:02 +0200 Subject: [PATCH 10/10] Collapse nanos timestamp precision errors into INVALID_TIMESTAMP_PRECISION Replace UNSUPPORTED_TIMESTAMP_{LTZ,NTZ}_PRECISION (sqlState 0A001 was "feature not supported") with a single INVALID_TIMESTAMP_PRECISION parameterized on , sqlState 22023 ("invalid parameter value"). Message now points users at parameterless TIMESTAMP_LTZ / TIMESTAMP_NTZ for precision <= 6, addressing peter-toth's review comment. Co-authored-by: Isaac --- .../resources/error/error-conditions.json | 18 +++++---------- .../spark/sql/errors/DataTypeErrors.scala | 13 +++-------- .../org/apache/spark/sql/types/DataType.scala | 4 ++-- .../sql/types/TimestampLTZNanosType.scala | 2 +- .../sql/types/TimestampNTZNanosType.scala | 2 +- .../spark/sql/types/DataTypeSuite.scala | 22 +++++++++---------- 6 files changed, 24 insertions(+), 37 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 61a72c1ac6478..21e33bea1f390 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -4751,6 +4751,12 @@ ], "sqlState" : "42K0F" }, + "INVALID_TIMESTAMP_PRECISION" : { + "message" : [ + "The seconds precision of is invalid. Expected an integer in [7, 9], or parameterless for precision <= 6." + ], + "sqlState" : "22023" + }, "INVALID_TIMEZONE" : { "message" : [ "The timezone: is invalid. The timezone must be either a region-based zone ID or a zone offset. Region IDs must have the form 'area/city', such as 'America/Los_Angeles'. Zone offsets must be in the format '(+|-)HH', '(+|-)HH:mm’ or '(+|-)HH:mm:ss', e.g '-08' , '+01:00' or '-13:33:33', and must be in the range from -18:00 to +18:00. 'Z' and 'UTC' are accepted as synonyms for '+00:00'." @@ -8534,18 +8540,6 @@ ], "sqlState" : "42000" }, - "UNSUPPORTED_TIMESTAMP_LTZ_PRECISION" : { - "message" : [ - "The seconds precision of TIMESTAMP_LTZ is out of the supported range [7, 9]." - ], - "sqlState" : "0A001" - }, - "UNSUPPORTED_TIMESTAMP_NTZ_PRECISION" : { - "message" : [ - "The seconds precision of TIMESTAMP_NTZ is out of the supported range [7, 9]." - ], - "sqlState" : "0A001" - }, "UNSUPPORTED_TIME_PRECISION" : { "message" : [ "The seconds precision of the TIME data type is out of the supported range [0, 6]." diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala index cb7e3aa3bed90..6e8cb8077be81 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala @@ -276,17 +276,10 @@ private[sql] object DataTypeErrors extends DataTypeErrorsBase { cause = null) } - def unsupportedTimestampNtzPrecisionError(precision: String): Throwable = { + def invalidTimestampPrecisionError(precision: String, typeName: String): Throwable = { new SparkException( - errorClass = "UNSUPPORTED_TIMESTAMP_NTZ_PRECISION", - messageParameters = Map("precision" -> precision), - cause = null) - } - - def unsupportedTimestampLtzPrecisionError(precision: String): Throwable = { - new SparkException( - errorClass = "UNSUPPORTED_TIMESTAMP_LTZ_PRECISION", - messageParameters = Map("precision" -> precision), + errorClass = "INVALID_TIMESTAMP_PRECISION", + messageParameters = Map("precision" -> precision, "type" -> typeName), cause = null) } } diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala index 90b7a887133dd..fbd70cf8b899c 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala @@ -239,13 +239,13 @@ object DataType { try TimestampLTZNanosType(precision.toInt) catch { case _: NumberFormatException => - throw DataTypeErrors.unsupportedTimestampLtzPrecisionError(precision) + throw DataTypeErrors.invalidTimestampPrecisionError(precision, "TIMESTAMP_LTZ") } case TIMESTAMP_NTZ_NANOS_TYPE(precision) => try TimestampNTZNanosType(precision.toInt) catch { case _: NumberFormatException => - throw DataTypeErrors.unsupportedTimestampNtzPrecisionError(precision) + throw DataTypeErrors.invalidTimestampPrecisionError(precision, "TIMESTAMP_NTZ") } case "timestamp_ltz" => TimestampType case other => diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampLTZNanosType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampLTZNanosType.scala index 427905101203b..7d65a492f544c 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampLTZNanosType.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampLTZNanosType.scala @@ -38,7 +38,7 @@ case class TimestampLTZNanosType(precision: Int) extends DatetimeType { if (precision < TimestampLTZNanosType.MIN_PRECISION || precision > TimestampLTZNanosType.MAX_PRECISION) { - throw DataTypeErrors.unsupportedTimestampLtzPrecisionError(precision.toString) + throw DataTypeErrors.invalidTimestampPrecisionError(precision.toString, "TIMESTAMP_LTZ") } /** diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala index 76d57dfbe5c65..722e0f2d25edc 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZNanosType.scala @@ -38,7 +38,7 @@ case class TimestampNTZNanosType(precision: Int) extends DatetimeType { if (precision < TimestampNTZNanosType.MIN_PRECISION || precision > TimestampNTZNanosType.MAX_PRECISION) { - throw DataTypeErrors.unsupportedTimestampNtzPrecisionError(precision.toString) + throw DataTypeErrors.invalidTimestampPrecisionError(precision.toString, "TIMESTAMP_NTZ") } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala index bbe9600e94c14..1a7524dbc5a73 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala @@ -1472,25 +1472,25 @@ class DataTypeSuite extends SparkFunSuite { exception = intercept[SparkException] { TimestampLTZNanosType(p) }, - condition = "UNSUPPORTED_TIMESTAMP_LTZ_PRECISION", - parameters = Map("precision" -> p.toString)) + condition = "INVALID_TIMESTAMP_PRECISION", + parameters = Map("precision" -> p.toString, "type" -> "TIMESTAMP_LTZ")) checkError( exception = intercept[SparkException] { TimestampNTZNanosType(p) }, - condition = "UNSUPPORTED_TIMESTAMP_NTZ_PRECISION", - parameters = Map("precision" -> p.toString)) + condition = "INVALID_TIMESTAMP_PRECISION", + parameters = Map("precision" -> p.toString, "type" -> "TIMESTAMP_NTZ")) } } test("SPARK-56876: parse timestamp with nanosecond precision from JSON") { - // (json-type-name, precision-error-condition, factory) + // (json-type-name, sql-type-name-in-error, factory) val variants = Seq[(String, String, Int => DataType)]( - ("timestamp_ltz", "UNSUPPORTED_TIMESTAMP_LTZ_PRECISION", TimestampLTZNanosType(_)), - ("timestamp_ntz", "UNSUPPORTED_TIMESTAMP_NTZ_PRECISION", TimestampNTZNanosType(_))) + ("timestamp_ltz", "TIMESTAMP_LTZ", TimestampLTZNanosType(_)), + ("timestamp_ntz", "TIMESTAMP_NTZ", TimestampNTZNanosType(_))) val overflowing = "9" * 20 - variants.foreach { case (name, precisionError, factory) => + variants.foreach { case (name, sqlTypeName, factory) => // Happy path across valid precisions, tolerant of surrounding whitespace. TimestampLTZNanosType.MIN_PRECISION to TimestampLTZNanosType.MAX_PRECISION foreach { n => assert(DataType.fromJson(s"""\"$name($n)\"""") === factory(n)) @@ -1498,7 +1498,7 @@ class DataTypeSuite extends SparkFunSuite { assert(DataType.fromJson(s"""\"$name($n )\"""") === factory(n)) } - // Out-of-range precisions surface as UNSUPPORTED_TIMESTAMP_*_PRECISION. The overflowing + // Out-of-range precisions surface as INVALID_TIMESTAMP_PRECISION. The overflowing // case verifies the original digit string is preserved instead of leaking // NumberFormatException. Seq("0", "6", "10", overflowing).foreach { p => @@ -1506,8 +1506,8 @@ class DataTypeSuite extends SparkFunSuite { exception = intercept[SparkException] { DataType.fromJson(s"""\"$name($p)\"""") }, - condition = precisionError, - parameters = Map("precision" -> p)) + condition = "INVALID_TIMESTAMP_PRECISION", + parameters = Map("precision" -> p, "type" -> sqlTypeName)) } // Malformed precision forms that don't match the regex fall through to