Skip to content

Commit

Permalink
[SPARK-9794] [SQL] Fix datetime parsing in SparkSQL.
Browse files Browse the repository at this point in the history
This fixes https://issues.apache.org/jira/browse/SPARK-9794 by using a real ISO8601 parser. (courtesy of the xml component of the standard java library)

cc: angelini

Author: Kevin Cox <kevincox@kevincox.ca>

Closes #8396 from kevincox/kevincox-sql-time-parsing.
  • Loading branch information
kevincox authored and rxin committed Sep 16, 2015
1 parent 896edb5 commit d39f15e
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.util
import java.sql.{Date, Timestamp}
import java.text.{DateFormat, SimpleDateFormat}
import java.util.{TimeZone, Calendar}
import javax.xml.bind.DatatypeConverter;

import org.apache.spark.unsafe.types.UTF8String

Expand Down Expand Up @@ -109,30 +110,22 @@ object DateTimeUtils {
}

def stringToTime(s: String): java.util.Date = {
if (!s.contains('T')) {
var indexOfGMT = s.indexOf("GMT");
if (indexOfGMT != -1) {
// ISO8601 with a weird time zone specifier (2000-01-01T00:00GMT+01:00)
val s0 = s.substring(0, indexOfGMT)
val s1 = s.substring(indexOfGMT + 3)
// Mapped to 2000-01-01T00:00+01:00
stringToTime(s0 + s1)
} else if (!s.contains('T')) {
// JDBC escape string
if (s.contains(' ')) {
Timestamp.valueOf(s)
} else {
Date.valueOf(s)
}
} else if (s.endsWith("Z")) {
// this is zero timezone of ISO8601
stringToTime(s.substring(0, s.length - 1) + "GMT-00:00")
} else if (s.indexOf("GMT") == -1) {
// timezone with ISO8601
val inset = "+00.00".length
val s0 = s.substring(0, s.length - inset)
val s1 = s.substring(s.length - inset, s.length)
if (s0.substring(s0.lastIndexOf(':')).contains('.')) {
stringToTime(s0 + "GMT" + s1)
} else {
stringToTime(s0 + ".0GMT" + s1)
}
} else {
// ISO8601 with GMT insert
val ISO8601GMT: SimpleDateFormat = new SimpleDateFormat( "yyyy-MM-dd'T'HH:mm:ss.SSSz" )
ISO8601GMT.parse(s)
DatatypeConverter.parseDateTime(s).getTime()
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,38 @@ class DateTimeUtilsSuite extends SparkFunSuite {
assert(stringToDate(UTF8String.fromString("2015-031-8")).isEmpty)
}

test("string to time") {
// Tests with UTC.
var c = Calendar.getInstance(TimeZone.getTimeZone("UTC"))
c.set(Calendar.MILLISECOND, 0)

c.set(1900, 0, 1, 0, 0, 0)
assert(stringToTime("1900-01-01T00:00:00GMT-00:00") === c.getTime())

c.set(2000, 11, 30, 10, 0, 0)
assert(stringToTime("2000-12-30T10:00:00Z") === c.getTime())

// Tests with set time zone.
c.setTimeZone(TimeZone.getTimeZone("GMT-04:00"))
c.set(Calendar.MILLISECOND, 0)

c.set(1900, 0, 1, 0, 0, 0)
assert(stringToTime("1900-01-01T00:00:00-04:00") === c.getTime())

c.set(1900, 0, 1, 0, 0, 0)
assert(stringToTime("1900-01-01T00:00:00GMT-04:00") === c.getTime())

// Tests with local time zone.
c.setTimeZone(TimeZone.getDefault())
c.set(Calendar.MILLISECOND, 0)

c.set(2000, 11, 30, 0, 0, 0)
assert(stringToTime("2000-12-30") === new Date(c.getTimeInMillis()))

c.set(2000, 11, 30, 10, 0, 0)
assert(stringToTime("2000-12-30 10:00:00") === new Timestamp(c.getTimeInMillis()))
}

test("string to timestamp") {
var c = Calendar.getInstance()
c.set(1969, 11, 31, 16, 0, 0)
Expand Down

0 comments on commit d39f15e

Please sign in to comment.