From 90618d2e477b2f27cf5155bbfb8f391c006df612 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 3 Jun 2026 11:12:17 +0200 Subject: [PATCH 01/11] [SPARK-57162][SQL] Add nanosecond-aware TimestampFormatter for parsing and formatting TimestampNanosVal ### What changes were proposed in this pull request? Extend the `TimestampFormatter` family with additive, nanosecond-aware parse and format methods that produce and consume `org.apache.spark.unsafe.types.TimestampNanosVal` (`epochMicros: Long` + `nanosWithinMicro: Short` in `[0, 999]`) at a target fractional precision `p` in `[7, 9]`: - New trait methods: `parseNanos` / `parseNanosOptional` (LTZ), `parseWithoutTimeZoneNanos` / `parseWithoutTimeZoneNanosOptional` (NTZ, plus a `final` `allowTimeZone = true` overload), and `formatNanos`. - `Iso8601TimestampFormatter`: `extractNanos` / `extractNanosNTZ` build the `Instant` / `LocalDateTime` and delegate to `SparkDateTimeUtils.instantToTimestampNanos` / `localDateTimeToTimestampNanos`; `formatNanos` floors sub-`precision` digits and renders the reconstructed instant. - `DefaultTimestampFormatter`: delegates to the SPARK-57032 nanos entry points. - `LegacyFastTimestampFormatter` / `LegacySimpleTimestampFormatter`: explicitly reject nanosecond precision under the `LEGACY` time parser policy (they cap at micro resolution). Sub-precision fractional digits are truncated (floored), consistent with SPARK-57032. All existing microsecond methods are unchanged (additive API). ### Why are the changes needed? Today `TimestampFormatter` is microsecond-only and discards the 7th-9th fractional digits. The JSON and CSV datasources drive all timestamp parsing/formatting through `TimestampFormatter`, so they cannot round-trip 7-9 digit fractions until the formatter is nanos-aware. This is the foundational unblocker for nanosecond support in those datasources (parent: SPARK-56822). ### Does this PR introduce _any_ user-facing change? No. The new formatter API is additive and gated for use behind `spark.sql.timestampNanosTypes.enabled` by its callers. ### How was this patch tested? New cases in `TimestampFormatterSuite`: parse/format round-trip for `p` in `[7, 9]` across ISO default and custom patterns (LTZ and NTZ); boundary values (`nanosWithinMicro` 0 and 999, pre-epoch instants, the 0001/1582/1970/9999 corpus); truncation rule; NTZ time-zone rejection; and LEGACY-mode rejection. ### Was this patch authored or co-authored using generative AI tooling? Generated-by: Cursor (Claude Opus 4.8) --- .../catalyst/util/TimestampFormatter.scala | 236 +++++++++++++++++- .../util/TimestampFormatterSuite.scala | 147 ++++++++++- 2 files changed, 380 insertions(+), 3 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index f09df4fcbee9b..e8d1bf50bbb68 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -38,7 +38,7 @@ import org.apache.spark.sql.errors.ExecutionErrors import org.apache.spark.sql.internal.LegacyBehaviorPolicy._ import org.apache.spark.sql.internal.SqlApiConf import org.apache.spark.sql.types.{Decimal, TimestampNTZType} -import org.apache.spark.unsafe.types.UTF8String +import org.apache.spark.unsafe.types.{TimestampNanosVal, UTF8String} sealed trait TimestampFormatter extends Serializable { @@ -158,6 +158,98 @@ sealed trait TimestampFormatter extends Serializable { // did not fail if timestamp contained zone-id or zone-offset component and instead ignored it. parseWithoutTimeZone(s, true) + /** + * Parses a timestamp in a string and converts it to a [[TimestampNanosVal]] (epoch microseconds + * plus a sub-microsecond remainder in `[0, 999]`) for `TIMESTAMP_LTZ(precision)`. Fractional + * digits beyond `precision` are truncated (floored), matching the cast/parse rule used by the + * microsecond path and `SparkDateTimeUtils`. + * + * @param s + * \- string with timestamp to parse + * @param precision + * \- the target fractional-second precision in `[7, 9]` + * @return + * the parsed value as a [[TimestampNanosVal]]. + */ + @throws(classOf[ParseException]) + @throws(classOf[DateTimeParseException]) + @throws(classOf[DateTimeException]) + def parseNanos(s: String, precision: Int): TimestampNanosVal + + /** + * Optional counterpart of [[parseNanos]]. The result is `None` on invalid input. + */ + def parseNanosOptional(s: String, precision: Int): Option[TimestampNanosVal] = + try { + Some(parseNanos(s, precision)) + } catch { + case _: Exception => None + } + + /** + * Parses a timestamp in a string and converts it to a [[TimestampNanosVal]] for + * `TIMESTAMP_NTZ(precision)`. The result is independent of time zones; a time zone component is + * discarded when `allowTimeZone` is `true` and rejected otherwise. Fractional digits beyond + * `precision` are truncated (floored). + * + * @param s + * \- string with timestamp to parse + * @param precision + * \- the target fractional-second precision in `[7, 9]` + * @param allowTimeZone + * \- indicates strict parsing of timezone + * @throws IllegalStateException + * The formatter for timestamp without time zone should always implement this method. The + * exception should never be hit. + */ + @throws(classOf[ParseException]) + @throws(classOf[DateTimeParseException]) + @throws(classOf[DateTimeException]) + @throws(classOf[IllegalStateException]) + def parseWithoutTimeZoneNanos( + s: String, + precision: Int, + allowTimeZone: Boolean): TimestampNanosVal = + throw SparkException.internalError( + s"The method `parseWithoutTimeZoneNanos(s: String, precision: Int, allowTimeZone: " + + "Boolean)` should be implemented in the formatter of timestamp without time zone") + + /** + * Optional counterpart of [[parseWithoutTimeZoneNanos]]. The result is `None` on invalid input. + */ + @throws(classOf[ParseException]) + @throws(classOf[DateTimeParseException]) + @throws(classOf[DateTimeException]) + @throws(classOf[IllegalStateException]) + def parseWithoutTimeZoneNanosOptional( + s: String, + precision: Int, + allowTimeZone: Boolean): Option[TimestampNanosVal] = + try { + Some(parseWithoutTimeZoneNanos(s, precision, allowTimeZone)) + } catch { + case _: Exception => None + } + + /** + * Parses a timestamp in a string to a [[TimestampNanosVal]] for `TIMESTAMP_NTZ(precision)`. + * Zone-id and zone-offset components are ignored. + */ + @throws(classOf[ParseException]) + @throws(classOf[DateTimeParseException]) + @throws(classOf[DateTimeException]) + @throws(classOf[IllegalStateException]) + final def parseWithoutTimeZoneNanos(s: String, precision: Int): TimestampNanosVal = + parseWithoutTimeZoneNanos(s, precision, true) + + /** + * Formats a [[TimestampNanosVal]] to a string at the target fractional-second `precision` in + * `[7, 9]`. Sub-`precision` digits are truncated (floored) before rendering; the number of + * fractional digits actually emitted follows the formatter pattern (e.g. the count of `S` + * letters), consistent with the microsecond `format` overloads. + */ + def formatNanos(v: TimestampNanosVal, precision: Int): String + def format(us: Long): String def format(ts: Timestamp): String def format(instant: Instant): String @@ -227,6 +319,42 @@ class Iso8601TimestampFormatter( } catch checkParsedDiff(s, legacyFormatter.parse) } + // `checkParsedDiff` only uses the legacy parse to decide whether to raise an upgrade exception + // and never returns its result, so the legacy formatter (microsecond-only) is fine here even on + // the nanos path. The returned `TimestampNanosVal.ZERO` is discarded. + protected def legacyNanosParse(str: String): TimestampNanosVal = { + legacyFormatter.parse(str) + TimestampNanosVal.ZERO + } + + override def parseNanosOptional(s: String, precision: Int): Option[TimestampNanosVal] = { + try { + val parsePosition = new ParsePosition(0) + val parsed = formatter.parseUnresolved(s, parsePosition) + if (parsed != null && s.length == parsePosition.getIndex) { + Some(extractNanos(parsed, precision)) + } else { + None + } + } catch { + case NonFatal(_) => None + } + } + + private def extractNanos(parsed: TemporalAccessor, precision: Int): TimestampNanosVal = { + val parsedZoneId = parsed.query(TemporalQueries.zone()) + val timeZoneId = if (parsedZoneId == null) zoneId else parsedZoneId + val zonedDateTime = toZonedDateTime(parsed, timeZoneId) + SparkDateTimeUtils.instantToTimestampNanos(zonedDateTime.toInstant, precision) + } + + override def parseNanos(s: String, precision: Int): TimestampNanosVal = { + try { + val parsed = formatter.parse(s) + extractNanos(parsed, precision) + } catch checkParsedDiff(s, legacyNanosParse) + } + override def parseWithoutTimeZoneOptional(s: String, allowTimeZone: Boolean): Option[Long] = { try { val parsePosition = new ParsePosition(0) @@ -260,6 +388,48 @@ class Iso8601TimestampFormatter( } catch checkParsedDiff(s, legacyFormatter.parse) } + override def parseWithoutTimeZoneNanosOptional( + s: String, + precision: Int, + allowTimeZone: Boolean): Option[TimestampNanosVal] = { + try { + val parsePosition = new ParsePosition(0) + val parsed = formatter.parseUnresolved(s, parsePosition) + if (parsed != null && s.length == parsePosition.getIndex) { + Some(extractNanosNTZ(s, parsed, precision, allowTimeZone)) + } else { + None + } + } catch { + case NonFatal(_) => None + } + } + + private def extractNanosNTZ( + s: String, + parsed: TemporalAccessor, + precision: Int, + allowTimeZone: Boolean): TimestampNanosVal = { + if (!allowTimeZone && parsed.query(TemporalQueries.zone()) != null) { + throw ExecutionErrors.cannotParseStringAsDataTypeError(pattern, s, TimestampNTZType) + } + val localDate = toLocalDate(parsed) + val localTime = toLocalTime(parsed) + SparkDateTimeUtils.localDateTimeToTimestampNanos( + LocalDateTime.of(localDate, localTime), + precision) + } + + override def parseWithoutTimeZoneNanos( + s: String, + precision: Int, + allowTimeZone: Boolean): TimestampNanosVal = { + try { + val parsed = formatter.parse(s) + extractNanosNTZ(s, parsed, precision, allowTimeZone) + } catch checkParsedDiff(s, legacyNanosParse) + } + override def format(instant: Instant): String = { try { zonedFormatter.format(instant) @@ -280,6 +450,16 @@ class Iso8601TimestampFormatter( localDateTime.format(formatter) } + override def formatNanos(v: TimestampNanosVal, precision: Int): String = { + // Floor sub-`precision` digits using the shared `SparkDateTimeUtils` truncation rule, then + // render the reconstructed instant. The number of fractional digits emitted follows the + // formatter pattern (count of `S` letters), consistent with the microsecond `format` paths. + val truncated = SparkDateTimeUtils.instantToTimestampNanos( + SparkDateTimeUtils.timestampNanosToInstant(v), + precision) + format(SparkDateTimeUtils.timestampNanosToInstant(truncated)) + } + override def validatePatternString(checkLegacy: Boolean): Unit = { if (checkLegacy) { try { @@ -346,6 +526,36 @@ class DefaultTimestampFormatter( val utf8Value = UTF8String.fromString(s) SparkDateTimeUtils.stringToTimestampWithoutTimeZone(utf8Value, allowTimeZone) } + + override def parseNanos(s: String, precision: Int): TimestampNanosVal = { + try { + SparkDateTimeUtils.stringToTimestampLTZNanosAnsi(UTF8String.fromString(s), precision, zoneId) + } catch checkParsedDiff(s, legacyNanosParse) + } + + override def parseNanosOptional(s: String, precision: Int): Option[TimestampNanosVal] = + SparkDateTimeUtils.stringToTimestampLTZNanos(UTF8String.fromString(s), precision, zoneId) + + override def parseWithoutTimeZoneNanos( + s: String, + precision: Int, + allowTimeZone: Boolean): TimestampNanosVal = { + try { + val utf8Value = UTF8String.fromString(s) + SparkDateTimeUtils.stringToTimestampNTZNanos(utf8Value, precision, allowTimeZone).getOrElse { + throw ExecutionErrors.cannotParseStringAsDataTypeError( + TimestampFormatter.defaultPattern(), + s, + TimestampNTZType) + } + } catch checkParsedDiff(s, legacyNanosParse) + } + + override def parseWithoutTimeZoneNanosOptional( + s: String, + precision: Int, + allowTimeZone: Boolean): Option[TimestampNanosVal] = + SparkDateTimeUtils.stringToTimestampNTZNanos(UTF8String.fromString(s), precision, allowTimeZone) } /** @@ -491,6 +701,12 @@ class LegacyFastTimestampFormatter(pattern: String, zoneId: ZoneId, locale: Loca format(instantToMicros(instant)) } + override def parseNanos(s: String, precision: Int): TimestampNanosVal = + throw TimestampFormatter.legacyNanosUnsupported() + + override def formatNanos(v: TimestampNanosVal, precision: Int): String = + throw TimestampFormatter.legacyNanosUnsupported() + override def validatePatternString(checkLegacy: Boolean): Unit = fastDateFormat } @@ -532,6 +748,12 @@ class LegacySimpleTimestampFormatter( format(instantToMicros(instant)) } + override def parseNanos(s: String, precision: Int): TimestampNanosVal = + throw TimestampFormatter.legacyNanosUnsupported() + + override def formatNanos(v: TimestampNanosVal, precision: Int): String = + throw TimestampFormatter.legacyNanosUnsupported() + override def validatePatternString(checkLegacy: Boolean): Unit = sdf } @@ -548,6 +770,18 @@ object TimestampFormatter { def defaultPattern(): String = s"${DateFormatter.defaultPattern} ${TimeFormatter.defaultPattern}" + /** + * The legacy formatters (`FastDateFormat` / `SimpleDateFormat`) cap at millisecond/microsecond + * resolution and cannot represent the sub-microsecond remainder of a [[TimestampNanosVal]]. + * Nanosecond-capable timestamp types are therefore unsupported under the `LEGACY` time parser + * policy; callers gate the nanos path behind the corresponding config, so reaching this is a + * misconfiguration. + */ + def legacyNanosUnsupported(): SparkException = + SparkException.internalError( + "Nanosecond-precision timestamp parsing/formatting is not supported under the LEGACY " + + "time parser policy. Set spark.sql.legacy.timeParserPolicy to CORRECTED.") + private def getFormatter( format: Option[String], zoneId: ZoneId, diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala index 558d7eda78b4a..9f187d4d0b9ee 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala @@ -17,15 +17,16 @@ package org.apache.spark.sql.catalyst.util -import java.time.{DateTimeException, LocalDateTime, ZoneId} +import java.time.{DateTimeException, Instant, LocalDateTime, ZoneId} import java.util.Locale import org.apache.spark.{SparkException, SparkUpgradeException} import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.catalyst.util.DateTimeUtils._ import org.apache.spark.sql.catalyst.util.LegacyDateFormats.LENIENT_SIMPLE_DATE_FORMAT +import org.apache.spark.sql.catalyst.util.TimestampNanosTestUtils._ import org.apache.spark.sql.internal.{LegacyBehaviorPolicy, SQLConf} -import org.apache.spark.unsafe.types.UTF8String +import org.apache.spark.unsafe.types.{TimestampNanosVal, UTF8String} class TimestampFormatterSuite extends DatetimeFormatterSuite { @@ -559,4 +560,146 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite { "'yyyy-MM-dd HH:mm:ss' as the target spark data type \"TIMESTAMP_NTZ\".")) ) } + + // The expected LTZ value: floor the sub-`precision` fractional digits, then split into + // (epochMicros, nanosWithinMicro). Mirrors `SparkDateTimeUtils.instantToTimestampNanos`. + private def expectedLTZNanos(instant: Instant, precision: Int): TimestampNanosVal = { + val truncatedNano = nanoOfSecTruncator(precision)(instant.getNano) + instantToNanosVal(Instant.ofEpochSecond(instant.getEpochSecond, truncatedNano.toLong)) + } + + // The expected NTZ value (interpreted at UTC), with sub-`precision` digits floored. + private def expectedNTZNanos(ldt: LocalDateTime, precision: Int): TimestampNanosVal = { + localDateTimeToNanosVal(ldt.withNano(nanoOfSecTruncator(precision)(ldt.getNano))) + } + + private val nanosPattern = "yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS" + + test("SPARK-57162: Iso8601 formatter parses strings into TimestampNanosVal (LTZ)") { + outstandingZoneIds.foreach { zoneId => + val formatter = TimestampFormatter(nanosPattern, zoneId, isParsing = true) + foreachNanosPrecision { precision => + specialNanosTs.foreach { ts => + val input = ts.replace(' ', 'T') + val expected = expectedLTZNanos(parseSpecialNanosLTZ(ts, zoneId), precision) + assert(formatter.parseNanos(input, precision) === expected) + assert(formatter.parseNanosOptional(input, precision).contains(expected)) + } + } + } + } + + test("SPARK-57162: Iso8601 formatter parses strings into TimestampNanosVal (NTZ)") { + // NTZ values are zone-independent (interpreted at UTC), so a single formatter zone suffices. + val formatter = TimestampFormatter(nanosPattern, UTC, isParsing = true) + foreachNanosPrecision { precision => + specialNanosTs.foreach { ts => + val input = ts.replace(' ', 'T') + val expected = expectedNTZNanos(parseSpecialNanosNTZ(ts), precision) + assert(formatter.parseWithoutTimeZoneNanos(input, precision) === expected) + assert(formatter.parseWithoutTimeZoneNanos(input, precision, allowTimeZone = true) === + expected) + assert(formatter.parseWithoutTimeZoneNanosOptional(input, precision, allowTimeZone = true) + .contains(expected)) + } + } + } + + test("SPARK-57162: round-trip TimestampNanosVal -> string -> TimestampNanosVal") { + outstandingZoneIds.foreach { zoneId => + val parser = TimestampFormatter(nanosPattern, zoneId, isParsing = true) + val printer = TimestampFormatter(nanosPattern, zoneId, isParsing = false) + foreachNanosPrecision { precision => + specialNanosTs.foreach { ts => + val value = expectedLTZNanos(parseSpecialNanosLTZ(ts, zoneId), precision) + val formatted = printer.formatNanos(value, precision) + assert(parser.parseNanos(formatted, precision) === value) + } + } + } + } + + test("SPARK-57162: sub-precision fractional digits are truncated on parse") { + val formatter = TimestampFormatter(nanosPattern, UTC, isParsing = true) + val input = "1970-01-01T00:00:00.123456789" + Seq( + 9 -> nanosVal(123456L, 789), + 8 -> nanosVal(123456L, 780), + 7 -> nanosVal(123456L, 700)).foreach { case (precision, expected) => + assert(formatter.parseNanos(input, precision) === expected) + assert(formatter.parseWithoutTimeZoneNanos(input, precision) === expected) + } + } + + test("SPARK-57162: formatNanos truncates to precision and renders per pattern width") { + val value = nanosVal(123456L, 789) // 1970-01-01 00:00:00.123456789 at UTC + val fixed = TimestampFormatter(nanosPattern, UTC, isParsing = false) + assert(fixed.formatNanos(value, 9) === "1970-01-01T00:00:00.123456789") + assert(fixed.formatNanos(value, 8) === "1970-01-01T00:00:00.123456780") + assert(fixed.formatNanos(value, 7) === "1970-01-01T00:00:00.123456700") + + // The fraction formatter omits trailing zeros. + val fraction = TimestampFormatter.getFractionFormatter(UTC) + assert(fraction.formatNanos(value, 9) === "1970-01-01 00:00:00.123456789") + assert(fraction.formatNanos(value, 8) === "1970-01-01 00:00:00.12345678") + assert(fraction.formatNanos(value, 7) === "1970-01-01 00:00:00.1234567") + } + + test("SPARK-57162: NTZ nanos parse rejects a time zone when not allowed") { + val formatter = TimestampFormatter( + "yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSSXXX", + UTC, + isParsing = true) + val input = "2018-12-02T10:11:12.123456789+01:00" + // When the zone component is allowed it is discarded and the local fields are kept. + val expected = expectedNTZNanos(LocalDateTime.of(2018, 12, 2, 10, 11, 12, 123456789), 9) + assert(formatter.parseWithoutTimeZoneNanos(input, 9, allowTimeZone = true) === expected) + + intercept[SparkException] { + formatter.parseWithoutTimeZoneNanos(input, 9, allowTimeZone = false) + } + assert(formatter.parseWithoutTimeZoneNanosOptional(input, 9, allowTimeZone = false).isEmpty) + } + + test("SPARK-57162: DefaultTimestampFormatter parses nanos without a pattern") { + outstandingZoneIds.foreach { zoneId => + val formatter = new DefaultTimestampFormatter( + zoneId, + locale = DateFormatter.defaultLocale, + legacyFormat = LegacyDateFormats.SIMPLE_DATE_FORMAT, + isParsing = true) + val ldt = LocalDateTime.of(2021, 8, 12, 18, 31, 50, 123456789) + val input = "2021-08-12T18:31:50.123456789" + foreachNanosPrecision { precision => + val expectedLtz = expectedLTZNanos(ldt.atZone(zoneId).toInstant, precision) + assert(formatter.parseNanos(input, precision) === expectedLtz) + assert(formatter.parseNanosOptional(input, precision).contains(expectedLtz)) + val expectedNtz = expectedNTZNanos(ldt, precision) + assert(formatter.parseWithoutTimeZoneNanos(input, precision) === expectedNtz) + assert(formatter.parseWithoutTimeZoneNanosOptional(input, precision, allowTimeZone = true) + .contains(expectedNtz)) + } + assert(formatter.parseNanosOptional("x123", 9).isEmpty) + assert(formatter.parseWithoutTimeZoneNanosOptional("x123", 9, allowTimeZone = true).isEmpty) + } + } + + test("SPARK-57162: legacy formatters reject nanosecond precision") { + val fast = new LegacyFastTimestampFormatter( + "yyyy-MM-dd HH:mm:ss.SSSSSS", + zoneId = UTC, + locale = DateFormatter.defaultLocale) + val simple = new LegacySimpleTimestampFormatter( + "yyyy-MM-dd HH:mm:ss.SSSSSS", + zoneId = UTC, + locale = DateFormatter.defaultLocale) + Seq[TimestampFormatter](fast, simple).foreach { formatter => + intercept[SparkException] { + formatter.parseNanos("2020-01-01 00:00:00.123456789", 9) + } + intercept[SparkException] { + formatter.formatNanos(nanosVal(0L, 1), 9) + } + } + } } From f272cdeeb435bbd3336b683a4f4b4f750ae1dfda Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 3 Jun 2026 11:30:19 +0200 Subject: [PATCH 02/11] [SPARK-57162][SQL] Normalize scaladoc @param descriptions in TimestampFormatter Drop the escaped leading dash from @param descriptions, capitalize the first letter, and inline each description on the same `@param` line where it fits. --- .../catalyst/util/TimestampFormatter.scala | 50 +++++++------------ 1 file changed, 17 insertions(+), 33 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index e8d1bf50bbb68..9db38b033d7cd 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -45,8 +45,7 @@ sealed trait TimestampFormatter extends Serializable { /** * Parses a timestamp in a string and converts it to microseconds. * - * @param s - * \- string with timestamp to parse + * @param s String with timestamp to parse * @return * microseconds since epoch. * @throws ParseException @@ -64,8 +63,7 @@ sealed trait TimestampFormatter extends Serializable { /** * Parses a timestamp in a string and converts it to an optional number of microseconds. * - * @param s - * \- string with timestamp to parse + * @param s String with timestamp to parse * @return * An optional number of microseconds since epoch. The result is None on invalid input. * @throws ParseException @@ -89,10 +87,8 @@ sealed trait TimestampFormatter extends Serializable { * Parses a timestamp in a string and converts it to microseconds since Unix Epoch in local * time. * - * @param s - * \- string with timestamp to parse - * @param allowTimeZone - * \- indicates strict parsing of timezone + * @param s String with timestamp to parse + * @param allowTimeZone Indicates strict parsing of timezone * @return * microseconds since epoch. * @throws ParseException @@ -118,10 +114,8 @@ sealed trait TimestampFormatter extends Serializable { * Parses a timestamp in a string and converts it to an optional number of microseconds since * Unix Epoch in local time. * - * @param s - * \- string with timestamp to parse - * @param allowTimeZone - * \- indicates strict parsing of timezone + * @param s String with timestamp to parse + * @param allowTimeZone Indicates strict parsing of timezone * @return * An optional number of microseconds since epoch. The result is None on invalid input. * @throws ParseException @@ -164,10 +158,8 @@ sealed trait TimestampFormatter extends Serializable { * digits beyond `precision` are truncated (floored), matching the cast/parse rule used by the * microsecond path and `SparkDateTimeUtils`. * - * @param s - * \- string with timestamp to parse - * @param precision - * \- the target fractional-second precision in `[7, 9]` + * @param s String with timestamp to parse + * @param precision The target fractional-second precision in `[7, 9]` * @return * the parsed value as a [[TimestampNanosVal]]. */ @@ -192,12 +184,9 @@ sealed trait TimestampFormatter extends Serializable { * discarded when `allowTimeZone` is `true` and rejected otherwise. Fractional digits beyond * `precision` are truncated (floored). * - * @param s - * \- string with timestamp to parse - * @param precision - * \- the target fractional-second precision in `[7, 9]` - * @param allowTimeZone - * \- indicates strict parsing of timezone + * @param s String with timestamp to parse + * @param precision The target fractional-second precision in `[7, 9]` + * @param allowTimeZone Indicates strict parsing of timezone * @throws IllegalStateException * The formatter for timestamp without time zone should always implement this method. The * exception should never be hit. @@ -263,7 +252,7 @@ sealed trait TimestampFormatter extends Serializable { /** * Validates the pattern string. * @param checkLegacy - * if true and the pattern is invalid, check whether the pattern is valid for legacy + * If true and the pattern is invalid, check whether the pattern is valid for legacy * formatters and show hints for using legacy formatter. Otherwise, simply check the pattern * string. */ @@ -480,14 +469,10 @@ class Iso8601TimestampFormatter( * formatting, it uses the default pattern [[TimestampFormatter.defaultPattern()]]. In parsing, it * follows the CAST logic in conversion of strings to Catalyst's TimestampType. * - * @param zoneId - * The time zone ID in which timestamps should be formatted or parsed. - * @param locale - * The locale overrides the system locale and is used in formatting. - * @param legacyFormat - * Defines the formatter used for legacy timestamps. - * @param isParsing - * Whether the formatter is used for parsing (`true`) or for formatting (`false`). + * @param zoneId The time zone ID in which timestamps should be formatted or parsed. + * @param locale The locale overrides the system locale and is used in formatting. + * @param legacyFormat Defines the formatter used for legacy timestamps. + * @param isParsing Whether the formatter is used for parsing (`true`) or for formatting (`false`). */ class DefaultTimestampFormatter( zoneId: ZoneId, @@ -564,8 +549,7 @@ class DefaultTimestampFormatter( * formatter does not output trailing zeros in the fraction. For example, the timestamp * `2019-03-05 15:00:01.123400` is formatted as the string `2019-03-05 15:00:01.1234`. * - * @param zoneId - * the time zone identifier in which the formatter parses or format timestamps + * @param zoneId The time zone identifier in which the formatter parses or format timestamps */ class FractionTimestampFormatter(zoneId: ZoneId) extends Iso8601TimestampFormatter( From b559c5c9cab282392b9044c75b719b49081da806 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 3 Jun 2026 11:47:38 +0200 Subject: [PATCH 03/11] [SPARK-57162][SQL] Use a user-facing error when nanos timestamps meet the LEGACY parser Replace the INTERNAL_ERROR thrown by the legacy formatters' parseNanos/formatNanos with a user-facing UNSUPPORTED_FEATURE.TIMESTAMP_NANOS_WITH_LEGACY_TIME_PARSER error. The LEGACY time parser policy is user-configurable, so a caller may legitimately combine it with nanosecond-precision timestamps; that should surface as a proper unsupported-feature error suggesting CORRECTED, not an internal error. --- .../resources/error/error-conditions.json | 5 +++++ .../catalyst/util/TimestampFormatter.scala | 12 +++++----- .../spark/sql/errors/ExecutionErrors.scala | 6 +++++ .../util/TimestampFormatterSuite.scala | 22 +++++++++++++------ 4 files changed, 31 insertions(+), 14 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 2775e34808153..75d0568d05aa2 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -8365,6 +8365,11 @@ "Temporary views cannot be created with the WITH SCHEMA clause. Recreate the temporary view when the underlying schema changes, or use a persisted view." ] }, + "TIMESTAMP_NANOS_WITH_LEGACY_TIME_PARSER" : { + "message" : [ + "Parsing or formatting nanosecond-precision timestamps (TIMESTAMP_LTZ/TIMESTAMP_NTZ with precision in [7, 9]) under the LEGACY time parser policy. Set to CORRECTED." + ] + }, "TIME_TRAVEL" : { "message" : [ "Time travel on the relation: ." diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index 9db38b033d7cd..8d153ffb93d14 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -29,7 +29,7 @@ import scala.util.control.NonFatal import org.apache.commons.lang3.time.FastDateFormat -import org.apache.spark.{SparkException, SparkIllegalArgumentException} +import org.apache.spark.{SparkException, SparkIllegalArgumentException, SparkUnsupportedOperationException} import org.apache.spark.sql.catalyst.util.DateTimeConstants._ import org.apache.spark.sql.catalyst.util.LegacyDateFormats.{LegacyDateFormat, LENIENT_SIMPLE_DATE_FORMAT} import org.apache.spark.sql.catalyst.util.RebaseDateTime._ @@ -758,13 +758,11 @@ object TimestampFormatter { * The legacy formatters (`FastDateFormat` / `SimpleDateFormat`) cap at millisecond/microsecond * resolution and cannot represent the sub-microsecond remainder of a [[TimestampNanosVal]]. * Nanosecond-capable timestamp types are therefore unsupported under the `LEGACY` time parser - * policy; callers gate the nanos path behind the corresponding config, so reaching this is a - * misconfiguration. + * policy. This is a user-facing error (not an internal error) because the `LEGACY` policy is + * user-configurable and a caller may legitimately combine it with nanosecond timestamps. */ - def legacyNanosUnsupported(): SparkException = - SparkException.internalError( - "Nanosecond-precision timestamp parsing/formatting is not supported under the LEGACY " + - "time parser policy. Set spark.sql.legacy.timeParserPolicy to CORRECTED.") + def legacyNanosUnsupported(): SparkUnsupportedOperationException = + ExecutionErrors.nanosTimestampUnsupportedWithLegacyParserError() private def getFormatter( format: Option[String], diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/ExecutionErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/ExecutionErrors.scala index e6e3fd847298b..58a82c7270a71 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/errors/ExecutionErrors.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/ExecutionErrors.scala @@ -53,6 +53,12 @@ private[sql] trait ExecutionErrors extends DataTypeErrorsBase { e) } + def nanosTimestampUnsupportedWithLegacyParserError(): SparkUnsupportedOperationException = { + new SparkUnsupportedOperationException( + errorClass = "UNSUPPORTED_FEATURE.TIMESTAMP_NANOS_WITH_LEGACY_TIME_PARSER", + messageParameters = Map("config" -> toSQLConf(SqlApiConf.LEGACY_TIME_PARSER_POLICY_KEY))) + } + def stateStoreHandleNotInitialized(): SparkRuntimeException = { new SparkRuntimeException( errorClass = "STATE_STORE_HANDLE_NOT_INITIALIZED", diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala index 9f187d4d0b9ee..2a6b701e8eec2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.util import java.time.{DateTimeException, Instant, LocalDateTime, ZoneId} import java.util.Locale -import org.apache.spark.{SparkException, SparkUpgradeException} +import org.apache.spark.{SparkException, SparkUnsupportedOperationException, SparkUpgradeException} import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.catalyst.util.DateTimeUtils._ import org.apache.spark.sql.catalyst.util.LegacyDateFormats.LENIENT_SIMPLE_DATE_FORMAT @@ -693,13 +693,21 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite { "yyyy-MM-dd HH:mm:ss.SSSSSS", zoneId = UTC, locale = DateFormatter.defaultLocale) + val expectedParameters = Map( + "config" -> ("\"" + SQLConf.LEGACY_TIME_PARSER_POLICY.key + "\"")) Seq[TimestampFormatter](fast, simple).foreach { formatter => - intercept[SparkException] { - formatter.parseNanos("2020-01-01 00:00:00.123456789", 9) - } - intercept[SparkException] { - formatter.formatNanos(nanosVal(0L, 1), 9) - } + checkError( + exception = intercept[SparkUnsupportedOperationException] { + formatter.parseNanos("2020-01-01 00:00:00.123456789", 9) + }, + condition = "UNSUPPORTED_FEATURE.TIMESTAMP_NANOS_WITH_LEGACY_TIME_PARSER", + parameters = expectedParameters) + checkError( + exception = intercept[SparkUnsupportedOperationException] { + formatter.formatNanos(nanosVal(0L, 1), 9) + }, + condition = "UNSUPPORTED_FEATURE.TIMESTAMP_NANOS_WITH_LEGACY_TIME_PARSER", + parameters = expectedParameters) } } } From 19f1b7dac934cd164e71dfde2105df88aebf9008 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 3 Jun 2026 12:01:01 +0200 Subject: [PATCH 04/11] [SPARK-57162][SQL] Override parseWithoutTimeZoneNanos in legacy formatters to throw the user-facing error Co-authored-by: Max Gekk --- .../spark/sql/catalyst/util/TimestampFormatter.scala | 12 ++++++++++++ .../sql/catalyst/util/TimestampFormatterSuite.scala | 6 ++++++ 2 files changed, 18 insertions(+) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index 8d153ffb93d14..b41ffdea19ef5 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -688,6 +688,12 @@ class LegacyFastTimestampFormatter(pattern: String, zoneId: ZoneId, locale: Loca override def parseNanos(s: String, precision: Int): TimestampNanosVal = throw TimestampFormatter.legacyNanosUnsupported() + override def parseWithoutTimeZoneNanos( + s: String, + precision: Int, + allowTimeZone: Boolean): TimestampNanosVal = + throw TimestampFormatter.legacyNanosUnsupported() + override def formatNanos(v: TimestampNanosVal, precision: Int): String = throw TimestampFormatter.legacyNanosUnsupported() @@ -735,6 +741,12 @@ class LegacySimpleTimestampFormatter( override def parseNanos(s: String, precision: Int): TimestampNanosVal = throw TimestampFormatter.legacyNanosUnsupported() + override def parseWithoutTimeZoneNanos( + s: String, + precision: Int, + allowTimeZone: Boolean): TimestampNanosVal = + throw TimestampFormatter.legacyNanosUnsupported() + override def formatNanos(v: TimestampNanosVal, precision: Int): String = throw TimestampFormatter.legacyNanosUnsupported() diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala index 2a6b701e8eec2..2ef997e058642 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala @@ -708,6 +708,12 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite { }, condition = "UNSUPPORTED_FEATURE.TIMESTAMP_NANOS_WITH_LEGACY_TIME_PARSER", parameters = expectedParameters) + checkError( + exception = intercept[SparkUnsupportedOperationException] { + formatter.parseWithoutTimeZoneNanos("2020-01-01 00:00:00.123456789", 9) + }, + condition = "UNSUPPORTED_FEATURE.TIMESTAMP_NANOS_WITH_LEGACY_TIME_PARSER", + parameters = expectedParameters) } } } From d7f9d472ecac6b67c0a1f6a0a218dd9372205271 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 3 Jun 2026 12:06:35 +0200 Subject: [PATCH 05/11] [SPARK-57162][SQL] Test that DefaultTimestampFormatter.formatNanos emits no fractional digits Co-authored-by: Max Gekk --- .../catalyst/util/TimestampFormatterSuite.scala | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala index 2ef997e058642..af001425cd9be 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala @@ -684,6 +684,21 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite { } } + test("SPARK-57162: DefaultTimestampFormatter.formatNanos uses the default pattern (no fracs)") { + // DefaultTimestampFormatter inherits Iso8601TimestampFormatter.formatNanos, which renders via + // the default pattern "yyyy-MM-dd HH:mm:ss". That pattern has no S fields, so sub-second + // digits are not emitted. This is expected behaviour: DefaultTimestampFormatter is + // parse-oriented and callers that need fractional output should use FractionTimestampFormatter. + val formatter = new DefaultTimestampFormatter( + UTC, + locale = DateFormatter.defaultLocale, + legacyFormat = LegacyDateFormats.SIMPLE_DATE_FORMAT, + isParsing = false) + val value = nanosVal(123456L, 789) // 1970-01-01 00:00:00.123456789 UTC + assert(formatter.formatNanos(value, 9) === "1970-01-01 00:00:00") + assert(formatter.formatNanos(value, 7) === "1970-01-01 00:00:00") + } + test("SPARK-57162: legacy formatters reject nanosecond precision") { val fast = new LegacyFastTimestampFormatter( "yyyy-MM-dd HH:mm:ss.SSSSSS", From 0496cc3c82beec52f86e3780e596bce50dcd26a6 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 3 Jun 2026 12:12:13 +0200 Subject: [PATCH 06/11] [SPARK-57162][SQL] Explain why legacy formatters override parseWithoutTimeZoneNanos Co-authored-by: Max Gekk --- .../apache/spark/sql/catalyst/util/TimestampFormatter.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index b41ffdea19ef5..bb4702d92389e 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -688,6 +688,8 @@ class LegacyFastTimestampFormatter(pattern: String, zoneId: ZoneId, locale: Loca override def parseNanos(s: String, precision: Int): TimestampNanosVal = throw TimestampFormatter.legacyNanosUnsupported() + // Without this override the trait default throws SparkException.internalError instead of the + // user-facing legacyNanosUnsupported error. override def parseWithoutTimeZoneNanos( s: String, precision: Int, @@ -741,6 +743,8 @@ class LegacySimpleTimestampFormatter( override def parseNanos(s: String, precision: Int): TimestampNanosVal = throw TimestampFormatter.legacyNanosUnsupported() + // Without this override the trait default throws SparkException.internalError instead of the + // user-facing legacyNanosUnsupported error. override def parseWithoutTimeZoneNanos( s: String, precision: Int, From 490f3ca1abef09b02b868ef41c9361c28a10db87 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 3 Jun 2026 16:05:10 +0200 Subject: [PATCH 07/11] [SPARK-57162][SQL] Revert scaladoc @param normalization in TimestampFormatter Reverts the @param reflow that reformatted pre-existing methods' scaladoc, which added unrelated diff noise. Restores the original dashed multi-line @param style consistent with the rest of the file. --- .../catalyst/util/TimestampFormatter.scala | 50 ++++++++++++------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index bb4702d92389e..c7417a280caec 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -45,7 +45,8 @@ sealed trait TimestampFormatter extends Serializable { /** * Parses a timestamp in a string and converts it to microseconds. * - * @param s String with timestamp to parse + * @param s + * \- string with timestamp to parse * @return * microseconds since epoch. * @throws ParseException @@ -63,7 +64,8 @@ sealed trait TimestampFormatter extends Serializable { /** * Parses a timestamp in a string and converts it to an optional number of microseconds. * - * @param s String with timestamp to parse + * @param s + * \- string with timestamp to parse * @return * An optional number of microseconds since epoch. The result is None on invalid input. * @throws ParseException @@ -87,8 +89,10 @@ sealed trait TimestampFormatter extends Serializable { * Parses a timestamp in a string and converts it to microseconds since Unix Epoch in local * time. * - * @param s String with timestamp to parse - * @param allowTimeZone Indicates strict parsing of timezone + * @param s + * \- string with timestamp to parse + * @param allowTimeZone + * \- indicates strict parsing of timezone * @return * microseconds since epoch. * @throws ParseException @@ -114,8 +118,10 @@ sealed trait TimestampFormatter extends Serializable { * Parses a timestamp in a string and converts it to an optional number of microseconds since * Unix Epoch in local time. * - * @param s String with timestamp to parse - * @param allowTimeZone Indicates strict parsing of timezone + * @param s + * \- string with timestamp to parse + * @param allowTimeZone + * \- indicates strict parsing of timezone * @return * An optional number of microseconds since epoch. The result is None on invalid input. * @throws ParseException @@ -158,8 +164,10 @@ sealed trait TimestampFormatter extends Serializable { * digits beyond `precision` are truncated (floored), matching the cast/parse rule used by the * microsecond path and `SparkDateTimeUtils`. * - * @param s String with timestamp to parse - * @param precision The target fractional-second precision in `[7, 9]` + * @param s + * \- string with timestamp to parse + * @param precision + * \- the target fractional-second precision in `[7, 9]` * @return * the parsed value as a [[TimestampNanosVal]]. */ @@ -184,9 +192,12 @@ sealed trait TimestampFormatter extends Serializable { * discarded when `allowTimeZone` is `true` and rejected otherwise. Fractional digits beyond * `precision` are truncated (floored). * - * @param s String with timestamp to parse - * @param precision The target fractional-second precision in `[7, 9]` - * @param allowTimeZone Indicates strict parsing of timezone + * @param s + * \- string with timestamp to parse + * @param precision + * \- the target fractional-second precision in `[7, 9]` + * @param allowTimeZone + * \- indicates strict parsing of timezone * @throws IllegalStateException * The formatter for timestamp without time zone should always implement this method. The * exception should never be hit. @@ -252,7 +263,7 @@ sealed trait TimestampFormatter extends Serializable { /** * Validates the pattern string. * @param checkLegacy - * If true and the pattern is invalid, check whether the pattern is valid for legacy + * if true and the pattern is invalid, check whether the pattern is valid for legacy * formatters and show hints for using legacy formatter. Otherwise, simply check the pattern * string. */ @@ -469,10 +480,14 @@ class Iso8601TimestampFormatter( * formatting, it uses the default pattern [[TimestampFormatter.defaultPattern()]]. In parsing, it * follows the CAST logic in conversion of strings to Catalyst's TimestampType. * - * @param zoneId The time zone ID in which timestamps should be formatted or parsed. - * @param locale The locale overrides the system locale and is used in formatting. - * @param legacyFormat Defines the formatter used for legacy timestamps. - * @param isParsing Whether the formatter is used for parsing (`true`) or for formatting (`false`). + * @param zoneId + * The time zone ID in which timestamps should be formatted or parsed. + * @param locale + * The locale overrides the system locale and is used in formatting. + * @param legacyFormat + * Defines the formatter used for legacy timestamps. + * @param isParsing + * Whether the formatter is used for parsing (`true`) or for formatting (`false`). */ class DefaultTimestampFormatter( zoneId: ZoneId, @@ -549,7 +564,8 @@ class DefaultTimestampFormatter( * formatter does not output trailing zeros in the fraction. For example, the timestamp * `2019-03-05 15:00:01.123400` is formatted as the string `2019-03-05 15:00:01.1234`. * - * @param zoneId The time zone identifier in which the formatter parses or format timestamps + * @param zoneId + * the time zone identifier in which the formatter parses or format timestamps */ class FractionTimestampFormatter(zoneId: ZoneId) extends Iso8601TimestampFormatter( From df28319fe9415cae1ed3ed316839fa5bc86ffbf3 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 3 Jun 2026 16:11:51 +0200 Subject: [PATCH 08/11] [SPARK-57162][SQL] Add NTZ-aware formatWithoutTimeZoneNanos to TimestampFormatter formatNanos always routes through format(Instant) and applies the formatter's zoneId, making it LTZ-only; an NTZ value (a UTC-grid wall clock) rendered under a non-UTC zone came out shifted. Add an NTZ counterpart formatWithoutTimeZoneNanos that mirrors the microsecond format(LocalDateTime) path: it rebuilds the zone-independent local date-time via timestampNanosToLocalDateTime, floors sub-precision digits, and renders with the pattern only (no zoneId). The legacy formatters reject it with the same UNSUPPORTED_FEATURE.TIMESTAMP_NANOS_WITH_LEGACY_TIME_PARSER error, and formatNanos is documented as LTZ-only. Adds a +01:00 regression test that the prior all-UTC NTZ cases could not catch. --- .../catalyst/util/TimestampFormatter.scala | 35 +++++++++++++++++-- .../util/TimestampFormatterSuite.scala | 28 +++++++++++++++ 2 files changed, 60 insertions(+), 3 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index c7417a280caec..635636d2d0453 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -244,12 +244,24 @@ sealed trait TimestampFormatter extends Serializable { /** * Formats a [[TimestampNanosVal]] to a string at the target fractional-second `precision` in - * `[7, 9]`. Sub-`precision` digits are truncated (floored) before rendering; the number of - * fractional digits actually emitted follows the formatter pattern (e.g. the count of `S` - * letters), consistent with the microsecond `format` overloads. + * `[7, 9]` for `TIMESTAMP_LTZ(precision)`. The value is rendered in the formatter's `zoneId` + * (it goes through the `format(instant: Instant)` path), so it must not be used for NTZ + * values; use [[formatWithoutTimeZoneNanos]] for those. Sub-`precision` digits are truncated + * (floored) before rendering; the number of fractional digits actually emitted follows the + * formatter pattern (e.g. the count of `S` letters), consistent with the microsecond `format` + * overloads. */ def formatNanos(v: TimestampNanosVal, precision: Int): String + /** + * NTZ counterpart of [[formatNanos]]: formats a [[TimestampNanosVal]] for + * `TIMESTAMP_NTZ(precision)` independently of any time zone. The value is rendered as its + * UTC-grid wall-clock local date-time, mirroring the microsecond `format(localDateTime: + * LocalDateTime)` path; unlike [[formatNanos]] it does not apply the formatter's `zoneId`. + * Sub-`precision` digits are truncated (floored) before rendering. + */ + def formatWithoutTimeZoneNanos(v: TimestampNanosVal, precision: Int): String + def format(us: Long): String def format(ts: Timestamp): String def format(instant: Instant): String @@ -460,6 +472,17 @@ class Iso8601TimestampFormatter( format(SparkDateTimeUtils.timestampNanosToInstant(truncated)) } + override def formatWithoutTimeZoneNanos(v: TimestampNanosVal, precision: Int): String = { + // Floor sub-`precision` digits, then render the reconstructed local date-time via the + // pattern only (no `zoneId`), mirroring `format(localDateTime: LocalDateTime)` on the + // microsecond path. Routing an NTZ value through `formatNanos` / `format(Instant)` would + // apply the formatter's `zoneId` and shift the UTC-grid wall clock. + val truncated = SparkDateTimeUtils.localDateTimeToTimestampNanos( + SparkDateTimeUtils.timestampNanosToLocalDateTime(v), + precision) + format(SparkDateTimeUtils.timestampNanosToLocalDateTime(truncated)) + } + override def validatePatternString(checkLegacy: Boolean): Unit = { if (checkLegacy) { try { @@ -715,6 +738,9 @@ class LegacyFastTimestampFormatter(pattern: String, zoneId: ZoneId, locale: Loca override def formatNanos(v: TimestampNanosVal, precision: Int): String = throw TimestampFormatter.legacyNanosUnsupported() + override def formatWithoutTimeZoneNanos(v: TimestampNanosVal, precision: Int): String = + throw TimestampFormatter.legacyNanosUnsupported() + override def validatePatternString(checkLegacy: Boolean): Unit = fastDateFormat } @@ -770,6 +796,9 @@ class LegacySimpleTimestampFormatter( override def formatNanos(v: TimestampNanosVal, precision: Int): String = throw TimestampFormatter.legacyNanosUnsupported() + override def formatWithoutTimeZoneNanos(v: TimestampNanosVal, precision: Int): String = + throw TimestampFormatter.legacyNanosUnsupported() + override def validatePatternString(checkLegacy: Boolean): Unit = sdf } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala index af001425cd9be..789eab32b19d9 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala @@ -645,6 +645,28 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite { assert(fraction.formatNanos(value, 7) === "1970-01-01 00:00:00.1234567") } + test("SPARK-57162: formatWithoutTimeZoneNanos is zone-independent (NTZ)") { + // Regression guard for an LTZ-only `formatNanos`: with a non-UTC formatter zone, the NTZ + // method must render the UTC-grid wall clock unchanged, whereas `formatNanos` (LTZ) routes + // through `format(Instant)` and shifts the value into the zone. All-UTC NTZ cases miss this. + val value = nanosVal(123456L, 789) // wall clock 1970-01-01 00:00:00.123456789 on the UTC grid + val zone = getZoneId("+01:00") + val printer = TimestampFormatter(nanosPattern, zone, isParsing = false) + // The 9-`S` pattern always emits 9 fractional digits; truncation zeros the low ones. + Seq( + 9 -> "1970-01-01T00:00:00.123456789", + 8 -> "1970-01-01T00:00:00.123456780", + 7 -> "1970-01-01T00:00:00.123456700").foreach { case (precision, expectedNtz) => + assert(printer.formatWithoutTimeZoneNanos(value, precision) === expectedNtz) + } + // LTZ rendering of the same value is shifted by the +01:00 offset. + assert(printer.formatNanos(value, 9) === "1970-01-01T01:00:00.123456789") + // The NTZ output round-trips through the matching NTZ parser regardless of formatter zone. + val parser = TimestampFormatter(nanosPattern, zone, isParsing = true) + assert(parser.parseWithoutTimeZoneNanos( + printer.formatWithoutTimeZoneNanos(value, 9), 9) === value) + } + test("SPARK-57162: NTZ nanos parse rejects a time zone when not allowed") { val formatter = TimestampFormatter( "yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSSXXX", @@ -729,6 +751,12 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite { }, condition = "UNSUPPORTED_FEATURE.TIMESTAMP_NANOS_WITH_LEGACY_TIME_PARSER", parameters = expectedParameters) + checkError( + exception = intercept[SparkUnsupportedOperationException] { + formatter.formatWithoutTimeZoneNanos(nanosVal(0L, 1), 9) + }, + condition = "UNSUPPORTED_FEATURE.TIMESTAMP_NANOS_WITH_LEGACY_TIME_PARSER", + parameters = expectedParameters) } } } From fb35bfe4a4070c00a64e92cbb49c8cd590a2ee83 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 3 Jun 2026 16:23:08 +0200 Subject: [PATCH 09/11] [SPARK-57162][SQL] Enforce nanos precision bound in TimestampNanos converters instantToTimestampNanos / localDateTimeToTimestampNanos truncated sub-micro digits via truncateNanosWithinMicroToPrecision, which silently passed through any precision outside [7, 9]. The DefaultTimestampFormatter parse path already rejects out-of-range precision (the string helpers throw), but the Iso8601 path did not, so the contract was enforced inconsistently across subclasses. Make the shared truncation helper raise SparkException.internalError for precision outside [7, 9] so both the NTZ (LocalDateTime) and LTZ (Instant) converters - and thus both formatter subclasses, in parse and format directions - enforce the bound the same way. The check is an internal invariant, not a user-facing validation: precision always originates from a validated TimestampNTZNanosType / TimestampLTZNanosType (constructible only with p in [7, 9]), so it is unreachable from public APIs. --- .../catalyst/util/SparkDateTimeUtils.scala | 12 ++++++++--- .../catalyst/util/DateTimeUtilsSuite.scala | 20 ++++++++++++++++++- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala index d7200715f9374..90de715e2d705 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala @@ -215,14 +215,20 @@ trait SparkDateTimeUtils { * The input is the already-extracted `nanosWithinMicro` component (`0..999`), so truncation is * independent of the epoch sign of the original timestamp value. * - * Precisions outside `[7, 9]` are passed through unchanged because the surrounding timestamp - * nanos types validate the bound. + * `precision` is expected to originate from a validated `TimestampNTZNanosType` / + * `TimestampLTZNanosType` (which can only be constructed with `p` in [7, 9]), so it is not a + * user-reachable input here. An out-of-range value therefore indicates an internal caller bug + * and raises an internal error rather than silently retaining all sub-microsecond digits. */ private def truncateNanosWithinMicroToPrecision(nanosWithinMicro: Int, precision: Int): Int = { precision match { case 7 => (nanosWithinMicro / 100) * 100 case 8 => (nanosWithinMicro / 10) * 10 - case _ => nanosWithinMicro + case 9 => nanosWithinMicro + case _ => + throw SparkException.internalError( + s"Fractional second precision $precision is out of range " + + s"[${TimestampNTZNanosType.MIN_PRECISION}, ${TimestampNTZNanosType.MAX_PRECISION}].") } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 47eb4a1e3e3cb..2d9793e687a72 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -26,7 +26,7 @@ import java.util.concurrent.TimeUnit import org.scalatest.matchers.must.Matchers import org.scalatest.matchers.should.Matchers._ -import org.apache.spark.{SparkArithmeticException, SparkDateTimeException, SparkFunSuite, SparkIllegalArgumentException} +import org.apache.spark.{SparkArithmeticException, SparkDateTimeException, SparkException, SparkFunSuite, SparkIllegalArgumentException} import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.catalyst.util.DateTimeConstants._ import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ @@ -1956,6 +1956,24 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { } } + test("SPARK-57162: nanos converters raise an internal error for precision outside [7, 9]") { + // `precision` is always sourced from a validated TimestampNTZNanosType/TimestampLTZNanosType + // (constructible only with p in [7, 9]), so an out-of-range value is an internal caller bug, + // not user input. Both the NTZ (LocalDateTime) and LTZ (Instant) converters must reject it. + val ldt = LocalDateTime.parse("2019-02-26T16:56:00.123456789") + val instant = Instant.parse("2019-02-26T16:56:00.123456789Z") + Seq(6, 10).foreach { p => + checkError( + exception = intercept[SparkException](localDateTimeToTimestampNanos(ldt, p)), + condition = "INTERNAL_ERROR", + parameters = Map("message" -> s"Fractional second precision $p is out of range [7, 9].")) + checkError( + exception = intercept[SparkException](instantToTimestampNanos(instant, p)), + condition = "INTERNAL_ERROR", + parameters = Map("message" -> s"Fractional second precision $p is out of range [7, 9].")) + } + } + test("SPARK-57033: random roundtrip across precisions floors to the precision step") { val rnd = new scala.util.Random(0) val min = Instant.parse("0001-01-01T00:00:00Z").getEpochSecond From 52c876e12026f1f338fbe2436cfa00236f9d52fb Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 3 Jun 2026 16:29:33 +0200 Subject: [PATCH 10/11] [SPARK-57162][SQL] Apply scalafmt formatting to TimestampFormatter --- .../catalyst/util/TimestampFormatter.scala | 33 +++++++++++-------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index 635636d2d0453..5b5665fac61d4 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -245,11 +245,10 @@ sealed trait TimestampFormatter extends Serializable { /** * Formats a [[TimestampNanosVal]] to a string at the target fractional-second `precision` in * `[7, 9]` for `TIMESTAMP_LTZ(precision)`. The value is rendered in the formatter's `zoneId` - * (it goes through the `format(instant: Instant)` path), so it must not be used for NTZ - * values; use [[formatWithoutTimeZoneNanos]] for those. Sub-`precision` digits are truncated - * (floored) before rendering; the number of fractional digits actually emitted follows the - * formatter pattern (e.g. the count of `S` letters), consistent with the microsecond `format` - * overloads. + * (it goes through the `format(instant: Instant)` path), so it must not be used for NTZ values; + * use [[formatWithoutTimeZoneNanos]] for those. Sub-`precision` digits are truncated (floored) + * before rendering; the number of fractional digits actually emitted follows the formatter + * pattern (e.g. the count of `S` letters), consistent with the microsecond `format` overloads. */ def formatNanos(v: TimestampNanosVal, precision: Int): String @@ -552,7 +551,10 @@ class DefaultTimestampFormatter( override def parseNanos(s: String, precision: Int): TimestampNanosVal = { try { - SparkDateTimeUtils.stringToTimestampLTZNanosAnsi(UTF8String.fromString(s), precision, zoneId) + SparkDateTimeUtils.stringToTimestampLTZNanosAnsi( + UTF8String.fromString(s), + precision, + zoneId) } catch checkParsedDiff(s, legacyNanosParse) } @@ -565,12 +567,14 @@ class DefaultTimestampFormatter( allowTimeZone: Boolean): TimestampNanosVal = { try { val utf8Value = UTF8String.fromString(s) - SparkDateTimeUtils.stringToTimestampNTZNanos(utf8Value, precision, allowTimeZone).getOrElse { - throw ExecutionErrors.cannotParseStringAsDataTypeError( - TimestampFormatter.defaultPattern(), - s, - TimestampNTZType) - } + SparkDateTimeUtils + .stringToTimestampNTZNanos(utf8Value, precision, allowTimeZone) + .getOrElse { + throw ExecutionErrors.cannotParseStringAsDataTypeError( + TimestampFormatter.defaultPattern(), + s, + TimestampNTZType) + } } catch checkParsedDiff(s, legacyNanosParse) } @@ -578,7 +582,10 @@ class DefaultTimestampFormatter( s: String, precision: Int, allowTimeZone: Boolean): Option[TimestampNanosVal] = - SparkDateTimeUtils.stringToTimestampNTZNanos(UTF8String.fromString(s), precision, allowTimeZone) + SparkDateTimeUtils.stringToTimestampNTZNanos( + UTF8String.fromString(s), + precision, + allowTimeZone) } /** From d69af46be73a23b65421c1da966611892971a284 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 3 Jun 2026 22:33:33 +0200 Subject: [PATCH 11/11] [SPARK-57162][SQL] Make nanos parse *Optional methods abstract so legacy formatters surface the unsupported error The legacy formatters override the strict `parseNanos` / `parseWithoutTimeZoneNanos` to throw the user-facing UNSUPPORTED_FEATURE.TIMESTAMP_NANOS_WITH_LEGACY_TIME_PARSER error, but inherited the trait default for `parseNanosOptional` / `parseWithoutTimeZoneNanosOptional`, which catches `case _: Exception => None`. Since `legacyNanosUnsupported()` is a non-fatal SparkUnsupportedOperationException, the optional path swallowed it and returned None - so under the LEGACY time parser policy the strict path errored while the optional path silently yielded null, conflating invalid input with an unsupported config combination. Remove the swallowing defaults: make `parseNanosOptional` and `parseWithoutTimeZoneNanosOptional` abstract in the sealed `TimestampFormatter` trait so every implementer must decide explicitly. The legacy formatters implement them by throwing `legacyNanosUnsupported()`; the Default/Iso8601 formatters already implement the None-on-invalid-input behavior. Extend the LEGACY-policy test to assert both optional variants raise the error instead of returning None. --- .../catalyst/util/TimestampFormatter.scala | 46 ++++++++++++++----- .../util/TimestampFormatterSuite.scala | 17 +++++++ 2 files changed, 51 insertions(+), 12 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index 5b5665fac61d4..a340e9a3b9b22 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -178,13 +178,13 @@ sealed trait TimestampFormatter extends Serializable { /** * Optional counterpart of [[parseNanos]]. The result is `None` on invalid input. + * + * Intentionally abstract (unlike the microsecond [[parseOptional]]): a swallowing `try`/`catch` + * default would also mask the user-facing `TIMESTAMP_NANOS_WITH_LEGACY_TIME_PARSER` error that + * the legacy formatters raise from [[parseNanos]], silently returning `None`. Each formatter + * must decide explicitly. */ - def parseNanosOptional(s: String, precision: Int): Option[TimestampNanosVal] = - try { - Some(parseNanos(s, precision)) - } catch { - case _: Exception => None - } + def parseNanosOptional(s: String, precision: Int): Option[TimestampNanosVal] /** * Parses a timestamp in a string and converts it to a [[TimestampNanosVal]] for @@ -216,6 +216,9 @@ sealed trait TimestampFormatter extends Serializable { /** * Optional counterpart of [[parseWithoutTimeZoneNanos]]. The result is `None` on invalid input. + * + * Intentionally abstract for the same reason as [[parseNanosOptional]]: a swallowing default + * would mask the legacy formatters' `TIMESTAMP_NANOS_WITH_LEGACY_TIME_PARSER` error. */ @throws(classOf[ParseException]) @throws(classOf[DateTimeParseException]) @@ -224,12 +227,7 @@ sealed trait TimestampFormatter extends Serializable { def parseWithoutTimeZoneNanosOptional( s: String, precision: Int, - allowTimeZone: Boolean): Option[TimestampNanosVal] = - try { - Some(parseWithoutTimeZoneNanos(s, precision, allowTimeZone)) - } catch { - case _: Exception => None - } + allowTimeZone: Boolean): Option[TimestampNanosVal] /** * Parses a timestamp in a string to a [[TimestampNanosVal]] for `TIMESTAMP_NTZ(precision)`. @@ -734,6 +732,12 @@ class LegacyFastTimestampFormatter(pattern: String, zoneId: ZoneId, locale: Loca override def parseNanos(s: String, precision: Int): TimestampNanosVal = throw TimestampFormatter.legacyNanosUnsupported() + // The `*Optional` nanos methods are abstract in the trait (no swallowing default), so the legacy + // formatters must implement them. They throw rather than return `None` so the unsupported-feature + // error is surfaced instead of being silently masked under the LEGACY time parser policy. + override def parseNanosOptional(s: String, precision: Int): Option[TimestampNanosVal] = + throw TimestampFormatter.legacyNanosUnsupported() + // Without this override the trait default throws SparkException.internalError instead of the // user-facing legacyNanosUnsupported error. override def parseWithoutTimeZoneNanos( @@ -742,6 +746,12 @@ class LegacyFastTimestampFormatter(pattern: String, zoneId: ZoneId, locale: Loca allowTimeZone: Boolean): TimestampNanosVal = throw TimestampFormatter.legacyNanosUnsupported() + override def parseWithoutTimeZoneNanosOptional( + s: String, + precision: Int, + allowTimeZone: Boolean): Option[TimestampNanosVal] = + throw TimestampFormatter.legacyNanosUnsupported() + override def formatNanos(v: TimestampNanosVal, precision: Int): String = throw TimestampFormatter.legacyNanosUnsupported() @@ -792,6 +802,12 @@ class LegacySimpleTimestampFormatter( override def parseNanos(s: String, precision: Int): TimestampNanosVal = throw TimestampFormatter.legacyNanosUnsupported() + // The `*Optional` nanos methods are abstract in the trait (no swallowing default), so the legacy + // formatters must implement them. They throw rather than return `None` so the unsupported-feature + // error is surfaced instead of being silently masked under the LEGACY time parser policy. + override def parseNanosOptional(s: String, precision: Int): Option[TimestampNanosVal] = + throw TimestampFormatter.legacyNanosUnsupported() + // Without this override the trait default throws SparkException.internalError instead of the // user-facing legacyNanosUnsupported error. override def parseWithoutTimeZoneNanos( @@ -800,6 +816,12 @@ class LegacySimpleTimestampFormatter( allowTimeZone: Boolean): TimestampNanosVal = throw TimestampFormatter.legacyNanosUnsupported() + override def parseWithoutTimeZoneNanosOptional( + s: String, + precision: Int, + allowTimeZone: Boolean): Option[TimestampNanosVal] = + throw TimestampFormatter.legacyNanosUnsupported() + override def formatNanos(v: TimestampNanosVal, precision: Int): String = throw TimestampFormatter.legacyNanosUnsupported() diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala index 789eab32b19d9..105fee7193f2c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala @@ -739,6 +739,14 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite { }, condition = "UNSUPPORTED_FEATURE.TIMESTAMP_NANOS_WITH_LEGACY_TIME_PARSER", parameters = expectedParameters) + // The optional variants must surface the unsupported-feature error too, not swallow it and + // return None. Their counterparts are abstract in the trait specifically to force this. + checkError( + exception = intercept[SparkUnsupportedOperationException] { + formatter.parseNanosOptional("2020-01-01 00:00:00.123456789", 9) + }, + condition = "UNSUPPORTED_FEATURE.TIMESTAMP_NANOS_WITH_LEGACY_TIME_PARSER", + parameters = expectedParameters) checkError( exception = intercept[SparkUnsupportedOperationException] { formatter.formatNanos(nanosVal(0L, 1), 9) @@ -751,6 +759,15 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite { }, condition = "UNSUPPORTED_FEATURE.TIMESTAMP_NANOS_WITH_LEGACY_TIME_PARSER", parameters = expectedParameters) + checkError( + exception = intercept[SparkUnsupportedOperationException] { + formatter.parseWithoutTimeZoneNanosOptional( + "2020-01-01 00:00:00.123456789", + 9, + allowTimeZone = true) + }, + condition = "UNSUPPORTED_FEATURE.TIMESTAMP_NANOS_WITH_LEGACY_TIME_PARSER", + parameters = expectedParameters) checkError( exception = intercept[SparkUnsupportedOperationException] { formatter.formatWithoutTimeZoneNanos(nanosVal(0L, 1), 9)