Skip to content

Commit

Permalink
tests
Browse files Browse the repository at this point in the history
  • Loading branch information
yaooqinn committed Jun 4, 2020
1 parent ca79e03 commit 7671d96
Show file tree
Hide file tree
Showing 16 changed files with 1,328 additions and 118 deletions.
Expand Up @@ -126,7 +126,7 @@ object DateFormatter {
zoneId: ZoneId,
locale: Locale = defaultLocale,
legacyFormat: LegacyDateFormat = LENIENT_SIMPLE_DATE_FORMAT,
isParsing: Boolean = true): DateFormatter = {
isParsing: Boolean): DateFormatter = {
val pattern = format.getOrElse(defaultPattern)
if (SQLConf.get.legacyTimeParserPolicy == LEGACY) {
getLegacyFormatter(pattern, zoneId, locale, legacyFormat)
Expand Down Expand Up @@ -164,6 +164,6 @@ object DateFormatter {
}

def apply(zoneId: ZoneId): DateFormatter = {
getFormatter(None, zoneId)
getFormatter(None, zoneId, isParsing = false)
}
}
Expand Up @@ -97,7 +97,7 @@ trait DateTimeFormatterHelper {
protected def getOrCreateFormatter(
pattern: String,
locale: Locale,
isParsing: Boolean = false): DateTimeFormatter = {
isParsing: Boolean): DateTimeFormatter = {
val newPattern = convertIncompatiblePattern(pattern, isParsing)
val useVarLen = isParsing && newPattern.contains('S')
val key = (newPattern, locale, useVarLen)
Expand Down Expand Up @@ -261,7 +261,7 @@ private object DateTimeFormatterHelper {
* @param pattern The input pattern.
* @return The pattern for new parser
*/
def convertIncompatiblePattern(pattern: String, isParsing: Boolean = false): String = {
def convertIncompatiblePattern(pattern: String, isParsing: Boolean): String = {
val eraDesignatorContained = pattern.split("'").zipWithIndex.exists {
case (patternPart, index) =>
// Text can be quoted using single quotes, we only check the non-quote parts.
Expand All @@ -271,7 +271,7 @@ private object DateTimeFormatterHelper {
case (patternPart, index) =>
if (index % 2 == 0) {
for (c <- patternPart if weekBasedLetters.contains(c)) {
throw new IllegalArgumentException(s"All week-based pattern are unsupported since" +
throw new IllegalArgumentException(s"All week-based patterns are unsupported since" +
s" Spark 3.0, detected: $c, Please use the SQL function EXTRACT instead")
}
for (c <- patternPart if unsupportedLetters.contains(c) ||
Expand Down
Expand Up @@ -62,11 +62,11 @@ class Iso8601TimestampFormatter(
zoneId: ZoneId,
locale: Locale,
legacyFormat: LegacyDateFormat = LENIENT_SIMPLE_DATE_FORMAT,
needVarLengthSecondFraction: Boolean)
isParsing: Boolean)
extends TimestampFormatter with DateTimeFormatterHelper {
@transient
protected lazy val formatter: DateTimeFormatter =
getOrCreateFormatter(pattern, locale, needVarLengthSecondFraction)
getOrCreateFormatter(pattern, locale, isParsing)

@transient
protected lazy val legacyFormatter = TimestampFormatter.getLegacyFormatter(
Expand Down Expand Up @@ -122,7 +122,7 @@ class FractionTimestampFormatter(zoneId: ZoneId)
zoneId,
TimestampFormatter.defaultLocale,
LegacyDateFormats.FAST_DATE_FORMAT,
needVarLengthSecondFraction = false) {
isParsing = false) {

@transient
override protected lazy val formatter = DateTimeFormatterHelper.fractionFormatter
Expand Down Expand Up @@ -287,7 +287,7 @@ object TimestampFormatter {
zoneId: ZoneId,
locale: Locale = defaultLocale,
legacyFormat: LegacyDateFormat = LENIENT_SIMPLE_DATE_FORMAT,
isParsing: Boolean = false): TimestampFormatter = {
isParsing: Boolean): TimestampFormatter = {
val pattern = format.getOrElse(defaultPattern)
if (SQLConf.get.legacyTimeParserPolicy == LEGACY) {
getLegacyFormatter(pattern, zoneId, locale, legacyFormat)
Expand Down Expand Up @@ -334,12 +334,12 @@ object TimestampFormatter {
def apply(
format: String,
zoneId: ZoneId,
isParsing: Boolean = false): TimestampFormatter = {
isParsing: Boolean): TimestampFormatter = {
getFormatter(Some(format), zoneId, isParsing = isParsing)
}

def apply(zoneId: ZoneId): TimestampFormatter = {
getFormatter(None, zoneId)
getFormatter(None, zoneId, isParsing = false)
}

def getFractionFormatter(zoneId: ZoneId): TimestampFormatter = {
Expand Down
Expand Up @@ -41,7 +41,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
private val JST_OPT = Option(JST.getId)

def toMillis(timestamp: String): Long = {
val tf = TimestampFormatter("yyyy-MM-dd HH:mm:ss", UTC)
val tf = TimestampFormatter("yyyy-MM-dd HH:mm:ss", UTC, isParsing = true)
DateTimeUtils.microsToMillis(tf.parse(timestamp))
}
val date = "2015-04-08 13:10:15"
Expand Down
Expand Up @@ -22,6 +22,10 @@ import org.apache.spark.sql.catalyst.util.DateTimeFormatterHelper._

class DateTimeFormatterHelperSuite extends SparkFunSuite {

private def convertIncompatiblePattern(pattern: String): String = {
DateTimeFormatterHelper.convertIncompatiblePattern(pattern, isParsing = false)
}

test("check incompatible pattern") {
assert(convertIncompatiblePattern("yyyy-MM-dd'T'HH:mm:ss.SSSz")
=== "uuuu-MM-dd'T'HH:mm:ss.SSSz")
Expand All @@ -43,7 +47,7 @@ class DateTimeFormatterHelperSuite extends SparkFunSuite {
}
unsupportedLettersForParsing.foreach { l =>
val e = intercept[IllegalArgumentException] {
convertIncompatiblePattern(s"$l", isParsing = true)
DateTimeFormatterHelper.convertIncompatiblePattern(s"$l", isParsing = true)
}
assert(e.getMessage === s"Illegal pattern character: $l")
assert(convertIncompatiblePattern(s"$l").nonEmpty)
Expand Down
Expand Up @@ -96,7 +96,7 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite {
2177456523456789L,
11858049903010203L).foreach { micros =>
outstandingZoneIds.foreach { zoneId =>
val timestamp = TimestampFormatter(pattern, zoneId).format(micros)
val timestamp = TimestampFormatter(pattern, zoneId, isParsing = false).format(micros)
val parsed = TimestampFormatter(
pattern, zoneId, isParsing = true).parse(timestamp)
assert(micros === parsed)
Expand All @@ -120,14 +120,14 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite {
val pattern = "yyyy-MM-dd'T'HH:mm:ss.SSSSSS"
val micros = TimestampFormatter(
pattern, zoneId, isParsing = true).parse(timestamp)
val formatted = TimestampFormatter(pattern, zoneId).format(micros)
val formatted = TimestampFormatter(pattern, zoneId, isParsing = false).format(micros)
assert(timestamp === formatted)
}
}
}

test("case insensitive parsing of am and pm") {
val formatter = TimestampFormatter("yyyy MMM dd hh:mm:ss a", UTC)
val formatter = TimestampFormatter("yyyy MMM dd hh:mm:ss a", UTC, isParsing = false)
val micros = formatter.parse("2009 Mar 20 11:30:01 am")
assert(micros === date(2009, 3, 20, 11, 30, 1))
}
Expand Down Expand Up @@ -157,8 +157,8 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite {
assert(TimestampFormatter(UTC).format(micros) === "-0099-01-01 00:00:00")
assert(TimestampFormatter(UTC).format(instant) === "-0099-01-01 00:00:00")
withDefaultTimeZone(UTC) { // toJavaTimestamp depends on the default time zone
assert(TimestampFormatter("yyyy-MM-dd HH:mm:SS G", UTC).format(toJavaTimestamp(micros))
=== "0100-01-01 00:00:00 BC")
assert(TimestampFormatter("yyyy-MM-dd HH:mm:SS G", UTC, isParsing = false)
.format(toJavaTimestamp(micros)) === "0100-01-01 00:00:00 BC")
}
}

Expand Down Expand Up @@ -209,7 +209,7 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite {
"2019-10-14T09:39:07.1", "2019-10-14T09:39:07.1")

try {
TimestampFormatter("yyyy/MM/dd HH_mm_ss.SSSSSS", zoneId, true)
TimestampFormatter("yyyy/MM/dd HH_mm_ss.SSSSSS", zoneId, isParsing = true)
.parse("2019/11/14 20#25#30.123456")
fail("Expected to throw an exception for the invalid input")
} catch {
Expand All @@ -222,7 +222,7 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite {
test("formatting timestamp strings up to microsecond precision") {
outstandingZoneIds.foreach { zoneId =>
def check(pattern: String, input: String, expected: String): Unit = {
val formatter = TimestampFormatter(pattern, zoneId)
val formatter = TimestampFormatter(pattern, zoneId, isParsing = false)
val timestamp = stringToTimestamp(UTF8String.fromString(input), zoneId).get
val actual = formatter.format(timestamp)
assert(actual === expected)
Expand Down Expand Up @@ -259,7 +259,7 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite {
}

test("SPARK-30958: parse timestamp with negative year") {
val formatter1 = TimestampFormatter("yyyy-MM-dd HH:mm:ss", UTC, true)
val formatter1 = TimestampFormatter("yyyy-MM-dd HH:mm:ss", UTC, isParsing = true)
assert(formatter1.parse("-1234-02-22 02:22:22") === date(-1234, 2, 22, 2, 22, 22))

def assertParsingError(f: => Unit): Unit = {
Expand All @@ -272,7 +272,7 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite {
}

// "yyyy" with "G" can't parse negative year or year 0000.
val formatter2 = TimestampFormatter("G yyyy-MM-dd HH:mm:ss", UTC, true)
val formatter2 = TimestampFormatter("G yyyy-MM-dd HH:mm:ss", UTC, isParsing = true)
assertParsingError(formatter2.parse("BC -1234-02-22 02:22:22"))
assertParsingError(formatter2.parse("AC 0000-02-22 02:22:22"))

Expand Down Expand Up @@ -318,7 +318,7 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite {
test("parsing hour with various patterns") {
def createFormatter(pattern: String): TimestampFormatter = {
// Use `SIMPLE_DATE_FORMAT`, so that the legacy parser also fails with invalid value range.
TimestampFormatter(pattern, UTC, LegacyDateFormats.SIMPLE_DATE_FORMAT, false)
TimestampFormatter(pattern, UTC, LegacyDateFormats.SIMPLE_DATE_FORMAT, isParsing = true)
}

withClue("HH") {
Expand Down Expand Up @@ -377,41 +377,42 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite {
}

test("missing date fields") {
val formatter = TimestampFormatter("HH:mm:ss", UTC)
val formatter = TimestampFormatter("HH:mm:ss", UTC, isParsing = true)
val micros = formatter.parse("11:30:01")
assert(micros === date(1970, 1, 1, 11, 30, 1))
}

test("missing year field with invalid date") {
// Use `SIMPLE_DATE_FORMAT`, so that the legacy parser also fails with invalid date.
val formatter = TimestampFormatter("MM-dd", UTC, LegacyDateFormats.SIMPLE_DATE_FORMAT, false)
val formatter =
TimestampFormatter("MM-dd", UTC, LegacyDateFormats.SIMPLE_DATE_FORMAT, isParsing = true)
withDefaultTimeZone(UTC)(intercept[DateTimeException](formatter.parse("02-29")))
}

test("missing am/pm field") {
Seq("HH", "hh", "KK", "kk").foreach { hour =>
val formatter = TimestampFormatter(s"yyyy $hour:mm:ss", UTC)
val formatter = TimestampFormatter(s"yyyy $hour:mm:ss", UTC, isParsing = true)
val micros = formatter.parse("2009 11:30:01")
assert(micros === date(2009, 1, 1, 11, 30, 1))
}
}

test("missing time fields") {
val formatter = TimestampFormatter("yyyy HH", UTC)
val formatter = TimestampFormatter("yyyy HH", UTC, isParsing = true)
val micros = formatter.parse("2009 11")
assert(micros === date(2009, 1, 1, 11))
}

test("missing hour field") {
val f1 = TimestampFormatter("mm:ss a", UTC)
val f1 = TimestampFormatter("mm:ss a", UTC, isParsing = true)
val t1 = f1.parse("30:01 PM")
assert(t1 === date(1970, 1, 1, 12, 30, 1))
val t2 = f1.parse("30:01 AM")
assert(t2 === date(1970, 1, 1, 0, 30, 1))
val f2 = TimestampFormatter("mm:ss", UTC)
val f2 = TimestampFormatter("mm:ss", UTC, isParsing = true)
val t3 = f2.parse("30:01")
assert(t3 === date(1970, 1, 1, 0, 30, 1))
val f3 = TimestampFormatter("a", UTC)
val f3 = TimestampFormatter("a", UTC, isParsing = true)
val t4 = f3.parse("PM")
assert(t4 === date(1970, 1, 1, 12))
val t5 = f3.parse("AM")
Expand Down
@@ -0,0 +1,53 @@
--- TESTS FOR DATETIME FORMATTING FUNCTIONS WITH INVALID PATTERNS ---

-- separating this from datetime-formatting.sql, because the text form
-- for patterns with 5 letters in SimpleDateFormat varies from different JDKs
select date_format('2018-11-17 13:33:33.333', 'GGGGG');
-- pattern letter count can not be greater than 10
select date_format('2018-11-17 13:33:33.333', 'yyyyyyyyyyy');
-- q/L in JDK 8 will fail when the count is more than 2
select date_format('2018-11-17 13:33:33.333', 'qqqqq');
select date_format('2018-11-17 13:33:33.333', 'QQQQQ');
select date_format('2018-11-17 13:33:33.333', 'MMMMM');
select date_format('2018-11-17 13:33:33.333', 'LLLLL');

select date_format('2018-11-17 13:33:33.333', 'EEEEE');
select date_format('2018-11-17 13:33:33.333', 'FF');
select date_format('2018-11-17 13:33:33.333', 'ddd');
-- DD is invalid if the day-of-year exceeds 100, but it becomes valid in Java 11
-- select date_format('2018-11-17 13:33:33.333', 'DD');
select date_format('2018-11-17 13:33:33.333', 'DDDD');
select date_format('2018-11-17 13:33:33.333', 'HHH');
select date_format('2018-11-17 13:33:33.333', 'hhh');
select date_format('2018-11-17 13:33:33.333', 'kkk');
select date_format('2018-11-17 13:33:33.333', 'KKK');
select date_format('2018-11-17 13:33:33.333', 'mmm');
select date_format('2018-11-17 13:33:33.333', 'sss');
select date_format('2018-11-17 13:33:33.333', 'SSSSSSSSSS');
select date_format('2018-11-17 13:33:33.333', 'aa');
select date_format('2018-11-17 13:33:33.333', 'V');
select date_format('2018-11-17 13:33:33.333', 'zzzzz');
select date_format('2018-11-17 13:33:33.333', 'XXXXXX');
select date_format('2018-11-17 13:33:33.333', 'ZZZZZZ');
select date_format('2018-11-17 13:33:33.333', 'OO');
select date_format('2018-11-17 13:33:33.333', 'xxxxxx');

select date_format('2018-11-17 13:33:33.333', 'A');
select date_format('2018-11-17 13:33:33.333', 'n');
select date_format('2018-11-17 13:33:33.333', 'N');
select date_format('2018-11-17 13:33:33.333', 'p');

-- disabled week-based patterns
select date_format('2018-11-17 13:33:33.333', 'Y');
select date_format('2018-11-17 13:33:33.333', 'w');
select date_format('2018-11-17 13:33:33.333', 'W');
select date_format('2018-11-17 13:33:33.333', 'u');
select date_format('2018-11-17 13:33:33.333', 'e');
select date_format('2018-11-17 13:33:33.333', 'c');

-- others
select date_format('2018-11-17 13:33:33.333', 'B');
select date_format('2018-11-17 13:33:33.333', 'C');
select date_format('2018-11-17 13:33:33.333', 'I');


@@ -0,0 +1,2 @@
--SET spark.sql.legacy.timeParserPolicy=LEGACY
--IMPORT datetime-formatting.sql
@@ -0,0 +1,68 @@
--- TESTS FOR DATETIME FORMATTING FUNCTIONS ---

create temporary view v as select col from values
(timestamp '1582-06-01 11:33:33.123UTC+080000'),
(timestamp '1970-01-01 00:00:00.000Europe/Paris'),
(timestamp '1970-12-31 23:59:59.999Asia/Srednekolymsk'),
(timestamp '1996-04-01 00:33:33.123Australia/Darwin'),
(timestamp '2018-11-17 13:33:33.123Z'),
(timestamp '2020-01-01 01:33:33.123Asia/Shanghai'),
(timestamp '2100-01-01 01:33:33.123America/Los_Angeles') t(col);

select col, date_format(col, 'G GG GGG GGGG') from v;

select col, date_format(col, 'y yy yyy yyyy yyyyy yyyyyy yyyyyyy yyyyyyyy yyyyyyyyy yyyyyyyyyy') from v;

select col, date_format(col, 'q qq') from v;

select col, date_format(col, 'Q QQ QQQ QQQQ') from v;

select col, date_format(col, 'M MM MMM MMMM') from v;

select col, date_format(col, 'L LL') from v;

select col, date_format(col, 'E EE EEE EEEE') from v;

select col, date_format(col, 'F') from v;

select col, date_format(col, 'd dd') from v;

select col, date_format(col, 'DD') from v where col = timestamp '2100-01-01 01:33:33.123America/Los_Angeles';
select col, date_format(col, 'D DDD') from v;

select col, date_format(col, 'H HH') from v;

select col, date_format(col, 'h hh') from v;

select col, date_format(col, 'k kk') from v;

select col, date_format(col, 'K KK') from v;

select col, date_format(col, 'm mm') from v;

select col, date_format(col, 's ss') from v;

select col, date_format(col, 'S SS SSS SSSS SSSSS SSSSSS SSSSSSS SSSSSSSS SSSSSSSSS') from v;

select col, date_format(col, 'a') from v;

select col, date_format(col, 'VV') from v;

select col, date_format(col, 'z zz zzz zzzz') from v;

select col, date_format(col, 'X XX XXX') from v;
select col, date_format(col, 'XXXX XXXXX') from v;

select col, date_format(col, 'Z ZZ ZZZ ZZZZ ZZZZZ') from v;

select col, date_format(col, 'O OOOO') from v;

select col, date_format(col, 'x xx xxx xxxx xxxx xxxxx') from v;

-- optional pattern, but the results won't be optional for formatting
select col, date_format(col, '[yyyy-MM-dd HH:mm:ss]') from v;

-- literals
select col, date_format(col, "姚123'GyYqQMLwWuEFDdhHmsSaVzZxXOV'") from v;
select col, date_format(col, "''") from v;
select col, date_format(col, '') from v;
4 changes: 0 additions & 4 deletions sql/core/src/test/resources/sql-tests/inputs/datetime.sql
Expand Up @@ -124,10 +124,6 @@ select to_timestamp("12.1234019-10-06S10:11", "ss.SSSSy-MM-dd'S'HH:mm");
select to_timestamp("2019-10-06S", "yyyy-MM-dd'S'");
select to_timestamp("S2019-10-06", "'S'yyyy-MM-dd");

select date_format(timestamp '2019-10-06', 'yyyy-MM-dd uuee');
select date_format(timestamp '2019-10-06', 'yyyy-MM-dd eecc');
select date_format(timestamp '2019-10-06', 'yyyy-MM-dd eeee');

select to_timestamp("2019-10-06T10:11:12'12", "yyyy-MM-dd'T'HH:mm:ss''SSSS"); -- middle
select to_timestamp("2019-10-06T10:11:12'", "yyyy-MM-dd'T'HH:mm:ss''"); -- tail
select to_timestamp("'2019-10-06T10:11:12", "''yyyy-MM-dd'T'HH:mm:ss"); -- head
Expand Down

0 comments on commit 7671d96

Please sign in to comment.