From 882a55c2dfaa42e05a1a9860c27ff05b50859b57 Mon Sep 17 00:00:00 2001 From: Kemal Hadimli Date: Fri, 21 Jul 2023 13:09:08 +0100 Subject: [PATCH 1/2] fix: Scalar timestamp parsing This was broken with https://github.com/cloudquery/plugin-sdk/pull/1095 --- scalar/timestamp.go | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/scalar/timestamp.go b/scalar/timestamp.go index d3b26cf65c..321171dc4a 100644 --- a/scalar/timestamp.go +++ b/scalar/timestamp.go @@ -18,6 +18,7 @@ const defaultStringFormat = "2006-01-02 15:04:05.999999999 -0700 MST" // this is used by arrow string format (time is in UTC) const arrowStringFormat = "2006-01-02 15:04:05.999999999" +const arrowStringFormatNew = "2006-01-02 15:04:05.999999999Z" // const microsecFromUnixEpochToY2K = 946684800 * 1000000 @@ -140,24 +141,19 @@ func (s *Timestamp) DecodeText(src []byte) error { sbuf = sbuf[:len(defaultStringFormat)] } - // there is no good way of detecting format so we just try few of them - tim, err = time.Parse(time.RFC3339, sbuf) - if err == nil { - s.Value = tim.UTC() - s.Valid = true - return nil - } - tim, err = time.Parse(defaultStringFormat, sbuf) - if err == nil { - s.Value = tim.UTC() - s.Valid = true - return nil - } - tim, err = time.Parse(arrowStringFormat, sbuf) - if err == nil { - s.Value = tim.UTC() - s.Valid = true - return nil + // there is no good way of detecting format, so we just try few of them + for _, format := range []string{ + time.RFC3339, + defaultStringFormat, + arrowStringFormat, + arrowStringFormatNew, + } { + tim, err = time.Parse(format, sbuf) + if err == nil { + s.Value = tim.UTC() + s.Valid = true + return nil + } } return &ValidationError{Type: s.DataType(), Msg: "cannot parse timestamp", Value: sbuf, Err: err} } From 59c86fd9d54c8e00f15041aacd45f48e6205d5a1 Mon Sep 17 00:00:00 2001 From: Kemal Hadimli Date: Wed, 2 Aug 2023 15:09:51 +0100 Subject: [PATCH 2/2] Update test --- scalar/timestamp.go | 21 +++------- scalar/timestamp_test.go | 85 +++++++++++++++++++++++++++++++--------- 2 files changed, 73 insertions(+), 33 deletions(-) diff --git a/scalar/timestamp.go b/scalar/timestamp.go index 321171dc4a..371f676572 100644 --- a/scalar/timestamp.go +++ b/scalar/timestamp.go @@ -9,22 +9,13 @@ import ( "github.com/apache/arrow/go/v13/arrow" ) -// const pgTimestamptzHourFormat = "2006-01-02 15:04:05.999999999Z07" -// const pgTimestamptzMinuteFormat = "2006-01-02 15:04:05.999999999Z07:00" -// const pgTimestamptzSecondFormat = "2006-01-02 15:04:05.999999999Z07:00:00" - -// this is the default format used by time.Time.String() -const defaultStringFormat = "2006-01-02 15:04:05.999999999 -0700 MST" - -// this is used by arrow string format (time is in UTC) -const arrowStringFormat = "2006-01-02 15:04:05.999999999" -const arrowStringFormatNew = "2006-01-02 15:04:05.999999999Z" - -// const microsecFromUnixEpochToY2K = 946684800 * 1000000 - const ( -// negativeInfinityMicrosecondOffset = -9223372036854775808 -// infinityMicrosecondOffset = 9223372036854775807 + // this is the default format used by time.Time.String() + defaultStringFormat = "2006-01-02 15:04:05.999999999 -0700 MST" + + // these are used by Arrow string format (time is in UTC) + arrowStringFormat = "2006-01-02 15:04:05.999999999" + arrowStringFormatNew = "2006-01-02 15:04:05.999999999Z" ) type Timestamp struct { diff --git a/scalar/timestamp_test.go b/scalar/timestamp_test.go index d1832906b3..dc0e7145d4 100644 --- a/scalar/timestamp_test.go +++ b/scalar/timestamp_test.go @@ -1,6 +1,7 @@ package scalar import ( + "strconv" "testing" "time" @@ -69,26 +70,74 @@ func TestTimestampDoubleSet(t *testing.T) { } func TestAppendToBuilderTimestamp(t *testing.T) { - units := []arrow.TimeUnit{arrow.Second, arrow.Millisecond, arrow.Microsecond, arrow.Nanosecond} - expected := []string{"1999-01-08 04:05:06Z", "1999-01-08 04:05:06.123Z", "1999-01-08 04:05:06.123456Z", "1999-01-08 04:05:06.123456789Z"} - for i, unit := range units { - timestamp := Timestamp{ - Type: &arrow.TimestampType{ - Unit: unit, - TimeZone: "UTC", - }, - } - err := timestamp.Set("1999-01-08 04:05:06.123456789") - if err != nil { - t.Fatal(err) - } + for idx, tc := range []struct { + Unit arrow.TimeUnit + Input string + Expected string + }{ + // Input format: arrowStringFormat + { + Unit: arrow.Second, + Input: "1999-01-08 04:05:06.123456789", + Expected: "1999-01-08 04:05:06Z", + }, + { + Unit: arrow.Millisecond, + Input: "1999-01-08 04:05:06.123456789", + Expected: "1999-01-08 04:05:06.123Z", + }, + { + Unit: arrow.Microsecond, + Input: "1999-01-08 04:05:06.123456789", + Expected: "1999-01-08 04:05:06.123456Z", + }, + { + Unit: arrow.Nanosecond, + Input: "1999-01-08 04:05:06.123456789", + Expected: "1999-01-08 04:05:06.123456789Z", + }, + // Input format: arrowStringFormatNew + { + Unit: arrow.Second, + Input: "1999-01-08 04:05:06.123456789Z", + Expected: "1999-01-08 04:05:06Z", + }, + { + Unit: arrow.Millisecond, + Input: "1999-01-08 04:05:06.123456789Z", + Expected: "1999-01-08 04:05:06.123Z", + }, + { + Unit: arrow.Microsecond, + Input: "1999-01-08 04:05:06.123456789Z", + Expected: "1999-01-08 04:05:06.123456Z", + }, + { + Unit: arrow.Nanosecond, + Input: "1999-01-08 04:05:06.123456789Z", + Expected: "1999-01-08 04:05:06.123456789Z", + }, + } { + tc := tc + t.Run(strconv.FormatInt(int64(idx), 10), func(t *testing.T) { + timestamp := Timestamp{ + Type: &arrow.TimestampType{ + Unit: tc.Unit, + TimeZone: "UTC", + }, + } + err := timestamp.Set(tc.Input) + if err != nil { + t.Fatal(err) + } - bldr := array.NewTimestampBuilder(memory.DefaultAllocator, timestamp.Type) - AppendToBuilder(bldr, ×tamp) + bldr := array.NewTimestampBuilder(memory.DefaultAllocator, timestamp.Type) + AppendToBuilder(bldr, ×tamp) - arr := bldr.NewArray().(*array.Timestamp) - actual := arr.ValueStr(0) + arr := bldr.NewArray().(*array.Timestamp) + actual := arr.ValueStr(0) - require.Equal(t, expected[i], actual) + require.Equal(t, tc.Expected, actual) + }) } }