Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions common/utils/src/main/resources/error/error-conditions.json
Original file line number Diff line number Diff line change
Expand Up @@ -4751,6 +4751,12 @@
],
"sqlState" : "42K0F"
},
"INVALID_TIMESTAMP_PRECISION" : {
"message" : [
"The seconds precision <precision> of <type> is invalid. Expected an integer in [7, 9], or parameterless <type> for precision <= 6."
],
"sqlState" : "22023"
},
"INVALID_TIMEZONE" : {
"message" : [
"The timezone: <timeZone> is invalid. The timezone must be either a region-based zone ID or a zone offset. Region IDs must have the form 'area/city', such as 'America/Los_Angeles'. Zone offsets must be in the format '(+|-)HH', '(+|-)HH:mm’ or '(+|-)HH:mm:ss', e.g '-08' , '+01:00' or '-13:33:33', and must be in the range from -18:00 to +18:00. 'Z' and 'UTC' are accepted as synonyms for '+00:00'."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -275,4 +275,11 @@ private[sql] object DataTypeErrors extends DataTypeErrorsBase {
messageParameters = Map("precision" -> precision.toString),
cause = null)
}

def invalidTimestampPrecisionError(precision: String, typeName: String): Throwable = {
new SparkException(
errorClass = "INVALID_TIMESTAMP_PRECISION",
messageParameters = Map("precision" -> precision, "type" -> typeName),
cause = null)
}
}
14 changes: 14 additions & 0 deletions sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ object DataType {
private val CHAR_TYPE = """char\(\s*(\d+)\s*\)""".r
private val VARCHAR_TYPE = """varchar\(\s*(\d+)\s*\)""".r
private val STRING_WITH_COLLATION = """string\s+collate\s+(\w+)""".r
private val TIMESTAMP_LTZ_NANOS_TYPE = """timestamp_ltz\(\s*(\d+)\s*\)""".r
private val TIMESTAMP_NTZ_NANOS_TYPE = """timestamp_ntz\(\s*(\d+)\s*\)""".r
private val GEOMETRY_TYPE = """geometry\(\s*([\w]+:-?[\w]+)\s*\)""".r
private val GEOGRAPHY_TYPE_CRS = """geography\(\s*(\w+:-?\w+)\s*\)""".r
private val GEOGRAPHY_TYPE_ALG = """geography\(\s*(\w+)\s*\)""".r
Expand Down Expand Up @@ -233,6 +235,18 @@ object DataType {
case GEOGRAPHY_TYPE_CRS_ALG(crs, alg) => GeographyType(crs, alg)
// For backwards compatibility, previously the type name of NullType is "null"
case "null" => NullType
case TIMESTAMP_LTZ_NANOS_TYPE(precision) =>
try TimestampLTZNanosType(precision.toInt)
catch {
case _: NumberFormatException =>
throw DataTypeErrors.invalidTimestampPrecisionError(precision, "TIMESTAMP_LTZ")
}
case TIMESTAMP_NTZ_NANOS_TYPE(precision) =>
try TimestampNTZNanosType(precision.toInt)
catch {
case _: NumberFormatException =>
throw DataTypeErrors.invalidTimestampPrecisionError(precision, "TIMESTAMP_NTZ")
}
case "timestamp_ltz" => TimestampType
case other =>
otherTypes.getOrElse(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.types

import org.apache.spark.annotation.Unstable
import org.apache.spark.sql.errors.DataTypeErrors

/**
* Timestamp with local time zone with fractional-second precision in the nanosecond-capable range
* (7 to 9 decimal digits). Represents a time instant analogous to `TimestampType`, but with
* sub-microsecond precision: valid range is [0001-01-01T00:00:00.000000000Z,
* 9999-12-31T23:59:59.999999999Z] in the proleptic Gregorian calendar at UTC+00:00. No time zone
* is stored; the session time zone is used when converting values to and from text.
*
* @param precision
* Number of digits of fractional seconds for this SQL type. The valid values are 7, 8, and 9
* where 9 means nanosecond precision.
*
* @since 4.2.0
*/
@Unstable
case class TimestampLTZNanosType(precision: Int) extends DatetimeType {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current timestamp type doesn't include "LTZ" in the name. Why not go with TimestampNanosType here?

Copy link
Copy Markdown
Member Author

@MaxGekk MaxGekk May 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

First of all, because the SPIP https://docs.google.com/document/d/1DeW15QueI4PdRyPm6C6jsTZFmIjbXX2j4h-Ja5W_fsg/edit?usp=sharing defines this class with such name. Probably you might ask why I named it in this way in the SPIP. So, there are a few reasons:

  1. Pairs with TimestampNTZNanosType. Spark already has two SQL timestamp families: with local time zone (TimestampType / TIMESTAMP_LTZ) and without (TimestampNTZType / TIMESTAMP_NTZ). The nanosecond-capable types are the same split. Alone TimestampNanosType reads as “the” nano timestamp type and does not signal which semantics apply.

  2. Matches SQL and typeName. The class backs timestamp_ltz(p). TimestampLTZNanosType lines up with TimestampNTZNanosType and with the SPIP/SQL names; TimestampNanosType would mirror neither timestamp_ntz nor the explicit TIMESTAMP_LTZ(n) surface.

  3. Consistency with how Spark names the NTZ side. TimestampType omits “LTZ” for history (timestamp defaulted to session-local semantics), but TimestampNTZType is explicit because the second variant exists. For new APIs where both variants are first-class, being explicit on both sides avoids the ambiguity that already bites people (TimestampType vs “timestamp with TZ” in docs).

  4. Safer for pattern matches and downstream code. Much of the codebase branches TimestampType vs TimestampNTZType. TimestampLTZNanosType + TimestampNTZNanosType extend that model predictably; TimestampNanosType would be assumed LTZ-by-analogy-to-TimestampType, which is easy to get wrong in reviews and refactors.


if (precision < TimestampLTZNanosType.MIN_PRECISION ||
precision > TimestampLTZNanosType.MAX_PRECISION) {
throw DataTypeErrors.invalidTimestampPrecisionError(precision.toString, "TIMESTAMP_LTZ")
}

/**
* Default size used by Spark for row-size estimation. Values are represented logically as epoch
* microseconds (Long, 8 bytes) plus nanoseconds within that micro (Short, 2 bytes).
*/
override def defaultSize: Int = 10

override def typeName: String = s"timestamp_ltz($precision)"

private[spark] override def asNullable: TimestampLTZNanosType = this
}

object TimestampLTZNanosType {
val MIN_PRECISION: Int = 7
val MAX_PRECISION: Int = 9
val NANOS_PRECISION: Int = 9
val DEFAULT_PRECISION: Int = NANOS_PRECISION

def apply(): TimestampLTZNanosType = new TimestampLTZNanosType(DEFAULT_PRECISION)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.types

import org.apache.spark.annotation.Unstable
import org.apache.spark.sql.errors.DataTypeErrors

/**
* Timestamp without time zone with fractional-second precision in the nanosecond-capable range (7
* to 9 decimal digits). Represents a local date-time analogous to `TimestampNTZType`, but with
* sub-microsecond precision: valid range is [0001-01-01T00:00:00.000000000,
* 9999-12-31T23:59:59.999999999] in the proleptic Gregorian calendar. The value is independent of
* any time zone. To represent an absolute point in time, use `TimestampLTZNanosType` instead.
*
* @param precision
* Number of digits of fractional seconds for this SQL type. The valid values are 7, 8, and 9
* where 9 means nanosecond precision.
*
* @since 4.2.0
*/
@Unstable
case class TimestampNTZNanosType(precision: Int) extends DatetimeType {

if (precision < TimestampNTZNanosType.MIN_PRECISION ||
precision > TimestampNTZNanosType.MAX_PRECISION) {
throw DataTypeErrors.invalidTimestampPrecisionError(precision.toString, "TIMESTAMP_NTZ")
}

/**
* Default size used by Spark for row-size estimation. Values are represented logically as epoch
* microseconds (Long, 8 bytes) plus nanoseconds within that micro (Short, 2 bytes).
*/
override def defaultSize: Int = 10

override def typeName: String = s"timestamp_ntz($precision)"

private[spark] override def asNullable: TimestampNTZNanosType = this
}

object TimestampNTZNanosType {
val MIN_PRECISION: Int = 7
val MAX_PRECISION: Int = 9
val NANOS_PRECISION: Int = 9
val DEFAULT_PRECISION: Int = NANOS_PRECISION

def apply(): TimestampNTZNanosType = new TimestampNTZNanosType(DEFAULT_PRECISION)
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package org.apache.spark.sql.types

import java.util.Locale

import com.fasterxml.jackson.core.JsonParseException
import org.json4s.jackson.JsonMethods

Expand Down Expand Up @@ -255,6 +257,13 @@ class DataTypeSuite extends SparkFunSuite {
checkDataTypeFromJson(TimestampNTZType)
checkDataTypeFromDDL(TimestampNTZType)

checkDataTypeFromJson(TimestampLTZNanosType(TimestampLTZNanosType.MIN_PRECISION))
checkDataTypeFromJson(TimestampLTZNanosType(8))
checkDataTypeFromJson(TimestampLTZNanosType(TimestampLTZNanosType.MAX_PRECISION))
checkDataTypeFromJson(TimestampNTZNanosType(TimestampNTZNanosType.MIN_PRECISION))
checkDataTypeFromJson(TimestampNTZNanosType(8))
checkDataTypeFromJson(TimestampNTZNanosType(TimestampNTZNanosType.MAX_PRECISION))

checkDataTypeFromJson(StringType)
checkDataTypeFromDDL(StringType)

Expand Down Expand Up @@ -403,6 +412,10 @@ class DataTypeSuite extends SparkFunSuite {
dayTimeIntervalTypes.foreach(checkDefaultSize(_, 8))
checkDefaultSize(TimeType(TimeType.MIN_PRECISION), 8)
checkDefaultSize(TimeType(TimeType.MAX_PRECISION), 8)
checkDefaultSize(TimestampLTZNanosType(TimestampLTZNanosType.MIN_PRECISION), 10)
checkDefaultSize(TimestampLTZNanosType(TimestampLTZNanosType.MAX_PRECISION), 10)
checkDefaultSize(TimestampNTZNanosType(TimestampNTZNanosType.MIN_PRECISION), 10)
checkDefaultSize(TimestampNTZNanosType(TimestampNTZNanosType.MAX_PRECISION), 10)

def checkEqualsIgnoreCompatibleNullability(
from: DataType,
Expand Down Expand Up @@ -1448,6 +1461,87 @@ class DataTypeSuite extends SparkFunSuite {
parameters = Map("error" -> "'time'", "hint" -> ""))
}

test("SPARK-56876: precisions of nanos-capable TIMESTAMP_LTZ and TIMESTAMP_NTZ types") {
TimestampLTZNanosType.MIN_PRECISION to TimestampLTZNanosType.MAX_PRECISION foreach { p =>
assert(TimestampLTZNanosType(p).sql === s"TIMESTAMP_LTZ($p)")
assert(TimestampNTZNanosType(p).sql === s"TIMESTAMP_NTZ($p)")
}

Seq(6, 10, Int.MinValue, Int.MaxValue).foreach { p =>
checkError(
exception = intercept[SparkException] {
TimestampLTZNanosType(p)
},
condition = "INVALID_TIMESTAMP_PRECISION",
parameters = Map("precision" -> p.toString, "type" -> "TIMESTAMP_LTZ"))
checkError(
exception = intercept[SparkException] {
TimestampNTZNanosType(p)
},
condition = "INVALID_TIMESTAMP_PRECISION",
parameters = Map("precision" -> p.toString, "type" -> "TIMESTAMP_NTZ"))
}
}

test("SPARK-56876: parse timestamp with nanosecond precision from JSON") {
// (json-type-name, sql-type-name-in-error, factory)
val variants = Seq[(String, String, Int => DataType)](
("timestamp_ltz", "TIMESTAMP_LTZ", TimestampLTZNanosType(_)),
("timestamp_ntz", "TIMESTAMP_NTZ", TimestampNTZNanosType(_)))
val overflowing = "9" * 20

variants.foreach { case (name, sqlTypeName, factory) =>
// Happy path across valid precisions, tolerant of surrounding whitespace.
TimestampLTZNanosType.MIN_PRECISION to TimestampLTZNanosType.MAX_PRECISION foreach { n =>
assert(DataType.fromJson(s"""\"$name($n)\"""") === factory(n))
assert(DataType.fromJson(s"""\"$name( $n)\"""") === factory(n))
assert(DataType.fromJson(s"""\"$name($n )\"""") === factory(n))
}

// Out-of-range precisions surface as INVALID_TIMESTAMP_PRECISION. The overflowing
// case verifies the original digit string is preserved instead of leaking
// NumberFormatException.
Seq("0", "6", "10", overflowing).foreach { p =>
checkError(
exception = intercept[SparkException] {
DataType.fromJson(s"""\"$name($p)\"""")
},
condition = "INVALID_TIMESTAMP_PRECISION",
parameters = Map("precision" -> p, "type" -> sqlTypeName))
}

// Malformed precision forms that don't match the regex fall through to
// INVALID_JSON_DATA_TYPE: negative, empty parens, non-numeric, and uppercase
// (JSON type-name convention is lowercase).
Seq(
s"$name(-1)",
s"$name()",
s"$name(abc)",
s"${name.toUpperCase(Locale.ROOT)}(7)").foreach { raw =>
checkError(
exception = intercept[SparkIllegalArgumentException] {
DataType.fromJson(s"""\"$raw\"""")
},
condition = "INVALID_JSON_DATA_TYPE",
parameters = Map("invalidType" -> raw))
}
}

// JSON round-trip for nanos timestamp types inside struct, array, and map.
val structWithNanos = StructType(Seq(
StructField("ntz", TimestampNTZNanosType(7)),
StructField("ltz", TimestampLTZNanosType(8))))
assert(DataType.fromJson(structWithNanos.json) === structWithNanos)
val arrayOfNanos = ArrayType(TimestampNTZNanosType(9), containsNull = false)
assert(DataType.fromJson(arrayOfNanos.json) === arrayOfNanos)
val mapOfNanos = MapType(StringType, TimestampNTZNanosType(7), valueContainsNull = true)
assert(DataType.fromJson(mapOfNanos.json) === mapOfNanos)

// Bare names without parens still map to the legacy single-precision types.
assert(DataType.fromJson("\"timestamp_ltz\"") === TimestampType)
assert(DataType.fromJson("\"timestamp_ntz\"") === TimestampNTZType)
}

test("singleton DataType equality after deserialization") {
// Singleton DataTypes that use `case object` pattern matching (e.g., `case BinaryType =>`).
// If a non-singleton instance is created (e.g., via Kryo deserialization which doesn't call
Expand Down