Skip to content

[SPARK-31867][SQL][FOLLOWUP] Check result differences for datetime formatting #28736

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,15 @@ class Iso8601DateFormatter(
try {
val localDate = toLocalDate(formatter.parse(s))
localDateToDays(localDate)
} catch checkDiffResult(s, legacyFormatter.parse)
} catch checkParsedDiff(s, legacyFormatter.parse)
}
}

override def format(localDate: LocalDate): String = {
localDate.format(formatter)
try {
localDate.format(formatter)
} catch checkFormattedDiff(toJavaDate(localDateToDays(localDate)),
(d: Date) => format(d))
}

override def format(days: Int): String = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import java.time._
import java.time.chrono.IsoChronology
import java.time.format.{DateTimeFormatter, DateTimeFormatterBuilder, ResolverStyle}
import java.time.temporal.{ChronoField, TemporalAccessor, TemporalQueries}
import java.util.Locale
import java.util.{Date, Locale}

import com.google.common.cache.CacheBuilder

Expand Down Expand Up @@ -109,13 +109,17 @@ trait DateTimeFormatterHelper {
formatter
}

private def needConvertToSparkUpgradeException(e: Throwable): Boolean = e match {
case _: DateTimeException if SQLConf.get.legacyTimeParserPolicy == EXCEPTION => true
case _ => false
}
// When legacy time parser policy set to EXCEPTION, check whether we will get different results
// between legacy parser and new parser. If new parser fails but legacy parser works, throw a
// SparkUpgradeException. On the contrary, if the legacy policy set to CORRECTED,
// DateTimeParseException will address by the caller side.
protected def checkDiffResult[T](
protected def checkParsedDiff[T](
s: String, legacyParseFunc: String => T): PartialFunction[Throwable, T] = {
case e: DateTimeException if SQLConf.get.legacyTimeParserPolicy == EXCEPTION =>
case e if needConvertToSparkUpgradeException(e) =>
try {
legacyParseFunc(s)
} catch {
Expand All @@ -126,6 +130,25 @@ trait DateTimeFormatterHelper {
s"before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.", e)
}

// When legacy time parser policy set to EXCEPTION, check whether we will get different results
// between legacy formatter and new formatter. If new formatter fails but legacy formatter works,
// throw a SparkUpgradeException. On the contrary, if the legacy policy set to CORRECTED,
// DateTimeParseException will address by the caller side.
protected def checkFormattedDiff[T <: Date](
d: T,
legacyFormatFunc: T => String): PartialFunction[Throwable, String] = {
case e if needConvertToSparkUpgradeException(e) =>
val resultCandidate = try {
legacyFormatFunc(d)
} catch {
case _: Throwable => throw e
}
throw new SparkUpgradeException("3.0", s"Fail to format it to '$resultCandidate' in the new" +
Copy link
Member

@gatorsmile gatorsmile Jul 2, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@HyukjinKwon Should we combine the JVM stacktrace for SparkUpgradeException in the python side?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1!

s" formatter. You can set ${SQLConf.LEGACY_TIME_PARSER_POLICY.key} to LEGACY to restore" +
" the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid" +
" datetime string.", e)
}

/**
* When the new DateTimeFormatter failed to initialize because of invalid datetime pattern, it
* will throw IllegalArgumentException. If the pattern can be recognized by the legacy formatter
Expand All @@ -137,7 +160,6 @@ trait DateTimeFormatterHelper {
* @param tryLegacyFormatter a func to capture exception, identically which forces a legacy
* datetime formatter to be initialized
*/

protected def checkLegacyFormatter(
pattern: String,
tryLegacyFormatter: => Unit): PartialFunction[Throwable, DateTimeFormatter] = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,15 @@ class Iso8601TimestampFormatter(
val microsOfSecond = zonedDateTime.get(MICRO_OF_SECOND)

Math.addExact(SECONDS.toMicros(epochSeconds), microsOfSecond)
} catch checkDiffResult(s, legacyFormatter.parse)
} catch checkParsedDiff(s, legacyFormatter.parse)
}
}

override def format(instant: Instant): String = {
formatter.withZone(zoneId).format(instant)
try {
formatter.withZone(zoneId).format(instant)
} catch checkFormattedDiff(toJavaTimestamp(instantToMicros(instant)),
(t: Timestamp) => format(t))
}

override def format(us: Long): String = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -418,4 +418,19 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite {
val t5 = f3.parse("AM")
assert(t5 === date(1970))
}

test("check result differences for datetime formatting") {
val formatter = TimestampFormatter("DD", UTC, isParsing = false)
assert(formatter.format(date(1970, 1, 3)) == "03")
assert(formatter.format(date(1970, 4, 9)) == "99")

if (System.getProperty("java.version").split("\\D+")(0).toInt < 9) {
// https://bugs.openjdk.java.net/browse/JDK-8079628
intercept[SparkUpgradeException] {
formatter.format(date(1970, 4, 10))
}
} else {
assert(formatter.format(date(1970, 4, 10)) == "100")
}
}
}