From 2d32efeedad88743dd635ff562c65e072cfb44f7 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats Date: Thu, 25 May 2023 17:20:29 +0300 Subject: [PATCH] GH-35686:[Go] Add AppendTime to TimestampBuilder (#35687) ### Rationale for this change ### What changes are included in this PR? ### Are these changes tested? ### Are there any user-facing changes? * Closes: #35686 Lead-authored-by: candiduslynx Co-authored-by: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Signed-off-by: Matt Topol --- go/arrow/_tools/tmpl/main.go | 2 +- go/arrow/array/numeric.gen.go | 90 ------ go/arrow/array/numeric.gen.go.tmpl | 8 +- go/arrow/array/numericbuilder.gen.go | 229 -------------- go/arrow/array/numericbuilder.gen.go.tmpl | 14 +- go/arrow/array/numericbuilder.gen_test.go | 207 ------------- go/arrow/array/timestamp.go | 360 ++++++++++++++++++++++ go/arrow/array/timestamp_test.go | 235 ++++++++++++++ go/arrow/datatype_fixedwidth.go | 39 ++- go/arrow/numeric.tmpldata | 12 - go/arrow/type_traits_numeric.gen.go | 79 +---- go/arrow/type_traits_numeric.gen_test.go | 34 -- go/arrow/type_traits_test.go | 35 +++ go/arrow/type_traits_timestamp.go | 71 +++++ 14 files changed, 743 insertions(+), 672 deletions(-) create mode 100644 go/arrow/array/timestamp.go create mode 100644 go/arrow/array/timestamp_test.go create mode 100644 go/arrow/type_traits_timestamp.go diff --git a/go/arrow/_tools/tmpl/main.go b/go/arrow/_tools/tmpl/main.go index 0cabef3db2c8c..436d48fb63701 100644 --- a/go/arrow/_tools/tmpl/main.go +++ b/go/arrow/_tools/tmpl/main.go @@ -180,7 +180,7 @@ func process(data interface{}, specs []pathSpec) { } } - ioutil.WriteFile(spec.out, generated, fileMode(spec.in)) + os.WriteFile(spec.out, generated, fileMode(spec.in)) } } diff --git a/go/arrow/array/numeric.gen.go b/go/arrow/array/numeric.gen.go index 1849d6531ba78..aa14ca0ea2844 100644 --- a/go/arrow/array/numeric.gen.go +++ b/go/arrow/array/numeric.gen.go @@ -977,96 +977,6 @@ func arrayEqualUint8(left, right *Uint8) bool { return true } -// A type which represents an immutable sequence of arrow.Timestamp values. -type Timestamp struct { - array - values []arrow.Timestamp -} - -// NewTimestampData creates a new Timestamp. -func NewTimestampData(data arrow.ArrayData) *Timestamp { - a := &Timestamp{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Timestamp) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Timestamp) Value(i int) arrow.Timestamp { return a.values[i] } - -// Values returns the values. -func (a *Timestamp) TimestampValues() []arrow.Timestamp { return a.values } - -// String returns a string representation of the array. -func (a *Timestamp) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Timestamp) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.TimestampTraits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Timestamp) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return a.values[i].ToTime(a.DataType().(*arrow.TimestampType).Unit).Format("2006-01-02 15:04:05.999999999") -} - -func (a *Timestamp) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - return a.values[i].ToTime(a.DataType().(*arrow.TimestampType).Unit).Format("2006-01-02 15:04:05.999999999") -} - -func (a *Timestamp) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := range a.values { - vals[i] = a.GetOneForMarshal(i) - } - - return json.Marshal(vals) -} - -func arrayEqualTimestamp(left, right *Timestamp) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - // A type which represents an immutable sequence of arrow.Time32 values. type Time32 struct { array diff --git a/go/arrow/array/numeric.gen.go.tmpl b/go/arrow/array/numeric.gen.go.tmpl index adece058b84b1..b141276d756ac 100644 --- a/go/arrow/array/numeric.gen.go.tmpl +++ b/go/arrow/array/numeric.gen.go.tmpl @@ -90,9 +90,7 @@ func (a *{{.Name}}) ValueStr(i int) string { return a.values[i].FormattedString() {{else if or (eq .Name "Time32") (eq .Name "Time64") -}} return a.values[i].FormattedString(a.DataType().(*{{.QualifiedType}}Type).Unit) -{{else if or (eq .Name "Timestamp") -}} - return a.values[i].ToTime(a.DataType().(*{{.QualifiedType}}Type).Unit).Format("2006-01-02 15:04:05.999999999") -{{else if (eq .Name "Duration") -}} +{{else if (eq .Name "Duration") -}} // return value and suffix as a string such as "12345ms" return fmt.Sprintf("%d%s", a.values[i], a.DataType().(*{{.QualifiedType}}Type).Unit) {{else if or (eq .Name "Int8") (eq .Name "Int16") (eq .Name "Int32") (eq .Name "Int64") -}} @@ -116,9 +114,7 @@ func (a *{{.Name}}) GetOneForMarshal(i int) interface{} { return a.values[i].ToTime().Format("2006-01-02") {{else if or (eq .Name "Time32") (eq .Name "Time64") -}} return a.values[i].ToTime(a.DataType().(*{{.QualifiedType}}Type).Unit).Format("15:04:05.999999999") -{{else if or (eq .Name "Timestamp") -}} - return a.values[i].ToTime(a.DataType().(*{{.QualifiedType}}Type).Unit).Format("2006-01-02 15:04:05.999999999") -{{else if (eq .Name "Duration") -}} +{{else if (eq .Name "Duration") -}} // return value and suffix as a string such as "12345ms" return fmt.Sprintf("%d%s", a.values[i], a.DataType().(*{{.QualifiedType}}Type).Unit.String()) {{else if (eq .Size "1")}} diff --git a/go/arrow/array/numericbuilder.gen.go b/go/arrow/array/numericbuilder.gen.go index 17fee6688a5e2..b76a7292d788b 100644 --- a/go/arrow/array/numericbuilder.gen.go +++ b/go/arrow/array/numericbuilder.gen.go @@ -2264,232 +2264,6 @@ func (b *Uint8Builder) UnmarshalJSON(data []byte) error { return b.Unmarshal(dec) } -type TimestampBuilder struct { - builder - - dtype *arrow.TimestampType - data *memory.Buffer - rawData []arrow.Timestamp -} - -func NewTimestampBuilder(mem memory.Allocator, dtype *arrow.TimestampType) *TimestampBuilder { - return &TimestampBuilder{builder: builder{refCount: 1, mem: mem}, dtype: dtype} -} - -func (b *TimestampBuilder) Type() arrow.DataType { return b.dtype } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *TimestampBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *TimestampBuilder) Append(v arrow.Timestamp) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *TimestampBuilder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *TimestampBuilder) AppendEmptyValue() { - b.Append(0) -} - -func (b *TimestampBuilder) UnsafeAppend(v arrow.Timestamp) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *TimestampBuilder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *TimestampBuilder) AppendValues(v []arrow.Timestamp, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.TimestampTraits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *TimestampBuilder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.TimestampTraits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.TimestampTraits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *TimestampBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *TimestampBuilder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.TimestampTraits.BytesRequired(n)) - b.rawData = arrow.TimestampTraits.CastFromBytes(b.data.Bytes()) - } -} - -// NewArray creates a Timestamp array from the memory buffers used by the builder and resets the TimestampBuilder -// so it can be used to build a new array. -func (b *TimestampBuilder) NewArray() arrow.Array { - return b.NewTimestampArray() -} - -// NewTimestampArray creates a Timestamp array from the memory buffers used by the builder and resets the TimestampBuilder -// so it can be used to build a new array. -func (b *TimestampBuilder) NewTimestampArray() (a *Timestamp) { - data := b.newData() - a = NewTimestampData(data) - data.Release() - return -} - -func (b *TimestampBuilder) newData() (data *Data) { - bytesRequired := arrow.TimestampTraits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *TimestampBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - v, err := arrow.TimestampFromString(s, b.dtype.Unit) - if err != nil { - b.AppendNull() - return err - } - b.Append(v) - return nil -} - -func (b *TimestampBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - case string: - loc, _ := b.dtype.GetZone() - tm, _, err := arrow.TimestampFromStringInLocation(v, b.dtype.Unit, loc) - - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(arrow.Timestamp(0)), - Offset: dec.InputOffset(), - } - } - - b.Append(tm) - case json.Number: - n, err := v.Int64() - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(arrow.Timestamp(0)), - Offset: dec.InputOffset(), - } - } - b.Append(arrow.Timestamp(n)) - case float64: - b.Append(arrow.Timestamp(v)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(arrow.Timestamp(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *TimestampBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *TimestampBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - type Time32Builder struct { builder @@ -2658,7 +2432,6 @@ func (b *Time32Builder) UnmarshalOne(dec *json.Decoder) error { b.AppendNull() case string: tm, err := arrow.Time32FromString(v, b.dtype.Unit) - if err != nil { return &json.UnmarshalTypeError{ Value: v, @@ -2883,7 +2656,6 @@ func (b *Time64Builder) UnmarshalOne(dec *json.Decoder) error { b.AppendNull() case string: tm, err := arrow.Time64FromString(v, b.dtype.Unit) - if err != nil { return &json.UnmarshalTypeError{ Value: v, @@ -3644,7 +3416,6 @@ var ( _ Builder = (*Uint16Builder)(nil) _ Builder = (*Int8Builder)(nil) _ Builder = (*Uint8Builder)(nil) - _ Builder = (*TimestampBuilder)(nil) _ Builder = (*Time32Builder)(nil) _ Builder = (*Time64Builder)(nil) _ Builder = (*Date32Builder)(nil) diff --git a/go/arrow/array/numericbuilder.gen.go.tmpl b/go/arrow/array/numericbuilder.gen.go.tmpl index d18829b2f0042..6eb12b5aeb478 100644 --- a/go/arrow/array/numericbuilder.gen.go.tmpl +++ b/go/arrow/array/numericbuilder.gen.go.tmpl @@ -217,13 +217,6 @@ func (b *{{.Name}}Builder) AppendValueFromString(s string) error { return err } b.Append(val) - {{else if or (eq .Name "Timestamp") -}} - v, err := arrow.TimestampFromString(s, b.dtype.Unit) - if err != nil { - b.AppendNull() - return err - } - b.Append(v) {{else if (eq .Name "Duration") -}} dur, err := time.ParseDuration(s) if err != nil { @@ -289,14 +282,9 @@ func (b *{{.Name}}Builder) UnmarshalOne(dec *json.Decoder) error { b.Append({{.QualifiedType}}(n)) case float64: b.Append({{.QualifiedType}}(v)) -{{else if or (eq .Name "Time32") (eq .Name "Time64") (eq .Name "Timestamp") -}} +{{else if or (eq .Name "Time32") (eq .Name "Time64") -}} case string: -{{if (eq .Name "Timestamp") -}} - loc, _ := b.dtype.GetZone() - tm, _, err := arrow.TimestampFromStringInLocation(v, b.dtype.Unit, loc) -{{else -}} tm, err := {{.QualifiedType}}FromString(v, b.dtype.Unit) -{{end}} if err != nil { return &json.UnmarshalTypeError{ Value: v, diff --git a/go/arrow/array/numericbuilder.gen_test.go b/go/arrow/array/numericbuilder.gen_test.go index 90c5a7f452304..246d3a7cdad9d 100644 --- a/go/arrow/array/numericbuilder.gen_test.go +++ b/go/arrow/array/numericbuilder.gen_test.go @@ -2047,213 +2047,6 @@ func TestUint8Builder_Resize(t *testing.T) { assert.Equal(t, 5, ab.Len()) } -func TestTimestampStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dt := &arrow.TimestampType{Unit: arrow.Second} - b := array.NewTimestampBuilder(mem, dt) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Timestamp) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewTimestampBuilder(mem, dt) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Timestamp) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewTimestampBuilder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.TimestampType{Unit: arrow.Second} - ab := array.NewTimestampBuilder(mem, dtype) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewTimestampArray - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewTimestampArray() - - // check state of builder after NewTimestampArray - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewTimestampArray did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewTimestampArray did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewTimestampArray did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []arrow.Timestamp{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.TimestampValues(), "unexpected TimestampValues") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.TimestampValues(), 10, "unexpected length of TimestampValues") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewTimestampArray() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []arrow.Timestamp{7, 8}, a.TimestampValues()) - assert.Len(t, a.TimestampValues(), 2) - - a.Release() - - var ( - want = []arrow.Timestamp{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewTimestampArray() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Timestamp); !ok { - t.Fatalf("could not type-assert to array.Timestamp") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Timestamp) - if !ok { - t.Fatalf("could not type-assert to array.Timestamp") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestTimestampBuilder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.TimestampType{Unit: arrow.Second} - ab := array.NewTimestampBuilder(mem, dtype) - defer ab.Release() - - exp := []arrow.Timestamp{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewTimestampArray() - assert.Equal(t, exp, a.TimestampValues()) - - a.Release() -} - -func TestTimestampBuilder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.TimestampType{Unit: arrow.Second} - ab := array.NewTimestampBuilder(mem, dtype) - defer ab.Release() - - exp := []arrow.Timestamp{0, 1, 2, 3} - - ab.AppendValues([]arrow.Timestamp{}, nil) - a := ab.NewTimestampArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewTimestampArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]arrow.Timestamp{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewTimestampArray() - assert.Equal(t, exp, a.TimestampValues()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]arrow.Timestamp{}, nil) - a = ab.NewTimestampArray() - assert.Equal(t, exp, a.TimestampValues()) - a.Release() -} - -func TestTimestampBuilder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.TimestampType{Unit: arrow.Second} - ab := array.NewTimestampBuilder(mem, dtype) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - func TestTime32StringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) diff --git a/go/arrow/array/timestamp.go b/go/arrow/array/timestamp.go new file mode 100644 index 0000000000000..b508006274128 --- /dev/null +++ b/go/arrow/array/timestamp.go @@ -0,0 +1,360 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package array + +import ( + "bytes" + "fmt" + "reflect" + "strings" + "sync/atomic" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/bitutil" + "github.com/apache/arrow/go/v13/arrow/internal/debug" + "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/goccy/go-json" +) + +// Timestamp represents an immutable sequence of arrow.Timestamp values. +type Timestamp struct { + array + values []arrow.Timestamp +} + +// NewTimestampData creates a new Timestamp from Data. +func NewTimestampData(data arrow.ArrayData) *Timestamp { + a := &Timestamp{} + a.refCount = 1 + a.setData(data.(*Data)) + return a +} + +// Reset resets the array for re-use. +func (a *Timestamp) Reset(data *Data) { + a.setData(data) +} + +// Value returns the value at the specified index. +func (a *Timestamp) Value(i int) arrow.Timestamp { return a.values[i] } + +// TimestampValues returns the values. +func (a *Timestamp) TimestampValues() []arrow.Timestamp { return a.values } + +// String returns a string representation of the array. +func (a *Timestamp) String() string { + o := new(strings.Builder) + o.WriteString("[") + for i, v := range a.values { + if i > 0 { + fmt.Fprintf(o, " ") + } + switch { + case a.IsNull(i): + o.WriteString(NullValueStr) + default: + fmt.Fprintf(o, "%v", v) + } + } + o.WriteString("]") + return o.String() +} + +func (a *Timestamp) setData(data *Data) { + a.array.setData(data) + vals := data.buffers[1] + if vals != nil { + a.values = arrow.TimestampTraits.CastFromBytes(vals.Bytes()) + beg := a.array.data.offset + end := beg + a.array.data.length + a.values = a.values[beg:end] + } +} + +func (a *Timestamp) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } + return a.values[i].ToTime(a.DataType().(*arrow.TimestampType).Unit).Format("2006-01-02 15:04:05.999999999") +} + +func (a *Timestamp) GetOneForMarshal(i int) interface{} { + if a.IsNull(i) { + return nil + } + return a.values[i].ToTime(a.DataType().(*arrow.TimestampType).Unit).Format("2006-01-02 15:04:05.999999999") +} + +func (a *Timestamp) MarshalJSON() ([]byte, error) { + vals := make([]interface{}, a.Len()) + for i := range a.values { + vals[i] = a.GetOneForMarshal(i) + } + + return json.Marshal(vals) +} + +func arrayEqualTimestamp(left, right *Timestamp) bool { + for i := 0; i < left.Len(); i++ { + if left.IsNull(i) { + continue + } + if left.Value(i) != right.Value(i) { + return false + } + } + return true +} + +type TimestampBuilder struct { + builder + + dtype *arrow.TimestampType + data *memory.Buffer + rawData []arrow.Timestamp +} + +func NewTimestampBuilder(mem memory.Allocator, dtype *arrow.TimestampType) *TimestampBuilder { + return &TimestampBuilder{builder: builder{refCount: 1, mem: mem}, dtype: dtype} +} + +func (b *TimestampBuilder) Type() arrow.DataType { return b.dtype } + +// Release decreases the reference count by 1. +// When the reference count goes to zero, the memory is freed. +func (b *TimestampBuilder) Release() { + debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") + + if atomic.AddInt64(&b.refCount, -1) == 0 { + if b.nullBitmap != nil { + b.nullBitmap.Release() + b.nullBitmap = nil + } + if b.data != nil { + b.data.Release() + b.data = nil + b.rawData = nil + } + } +} + +func (b *TimestampBuilder) AppendTime(t time.Time) { + ts, err := arrow.TimestampFromTime(t, b.dtype.Unit) + if err != nil { + panic(err) + } + b.Append(ts) +} + +func (b *TimestampBuilder) Append(v arrow.Timestamp) { + b.Reserve(1) + b.UnsafeAppend(v) +} + +func (b *TimestampBuilder) AppendNull() { + b.Reserve(1) + b.UnsafeAppendBoolToBitmap(false) +} + +func (b *TimestampBuilder) AppendEmptyValue() { + b.Append(0) +} + +func (b *TimestampBuilder) UnsafeAppend(v arrow.Timestamp) { + bitutil.SetBit(b.nullBitmap.Bytes(), b.length) + b.rawData[b.length] = v + b.length++ +} + +func (b *TimestampBuilder) UnsafeAppendBoolToBitmap(isValid bool) { + if isValid { + bitutil.SetBit(b.nullBitmap.Bytes(), b.length) + } else { + b.nulls++ + } + b.length++ +} + +// AppendValues will append the values in the v slice. The valid slice determines which values +// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, +// all values in v are appended and considered valid. +func (b *TimestampBuilder) AppendValues(v []arrow.Timestamp, valid []bool) { + if len(v) != len(valid) && len(valid) != 0 { + panic("len(v) != len(valid) && len(valid) != 0") + } + + if len(v) == 0 { + return + } + + b.Reserve(len(v)) + arrow.TimestampTraits.Copy(b.rawData[b.length:], v) + b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) +} + +func (b *TimestampBuilder) init(capacity int) { + b.builder.init(capacity) + + b.data = memory.NewResizableBuffer(b.mem) + bytesN := arrow.TimestampTraits.BytesRequired(capacity) + b.data.Resize(bytesN) + b.rawData = arrow.TimestampTraits.CastFromBytes(b.data.Bytes()) +} + +// Reserve ensures there is enough space for appending n elements +// by checking the capacity and calling Resize if necessary. +func (b *TimestampBuilder) Reserve(n int) { + b.builder.reserve(n, b.Resize) +} + +// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), +// additional memory will be allocated. If n is smaller, the allocated memory may reduced. +func (b *TimestampBuilder) Resize(n int) { + nBuilder := n + if n < minBuilderCapacity { + n = minBuilderCapacity + } + + if b.capacity == 0 { + b.init(n) + } else { + b.builder.resize(nBuilder, b.init) + b.data.Resize(arrow.TimestampTraits.BytesRequired(n)) + b.rawData = arrow.TimestampTraits.CastFromBytes(b.data.Bytes()) + } +} + +// NewArray creates a Timestamp array from the memory buffers used by the builder and resets the TimestampBuilder +// so it can be used to build a new array. +func (b *TimestampBuilder) NewArray() arrow.Array { + return b.NewTimestampArray() +} + +// NewTimestampArray creates a Timestamp array from the memory buffers used by the builder and resets the TimestampBuilder +// so it can be used to build a new array. +func (b *TimestampBuilder) NewTimestampArray() (a *Timestamp) { + data := b.newData() + a = NewTimestampData(data) + data.Release() + return +} + +func (b *TimestampBuilder) newData() (data *Data) { + bytesRequired := arrow.TimestampTraits.BytesRequired(b.length) + if bytesRequired > 0 && bytesRequired < b.data.Len() { + // trim buffers + b.data.Resize(bytesRequired) + } + data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) + b.reset() + + if b.data != nil { + b.data.Release() + b.data = nil + b.rawData = nil + } + + return +} + +func (b *TimestampBuilder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + v, err := arrow.TimestampFromString(s, b.dtype.Unit) + if err != nil { + b.AppendNull() + return err + } + b.Append(v) + return nil +} + +func (b *TimestampBuilder) UnmarshalOne(dec *json.Decoder) error { + t, err := dec.Token() + if err != nil { + return err + } + + switch v := t.(type) { + case nil: + b.AppendNull() + case string: + loc, _ := b.dtype.GetZone() + tm, _, err := arrow.TimestampFromStringInLocation(v, b.dtype.Unit, loc) + if err != nil { + return &json.UnmarshalTypeError{ + Value: v, + Type: reflect.TypeOf(arrow.Timestamp(0)), + Offset: dec.InputOffset(), + } + } + + b.Append(tm) + case json.Number: + n, err := v.Int64() + if err != nil { + return &json.UnmarshalTypeError{ + Value: v.String(), + Type: reflect.TypeOf(arrow.Timestamp(0)), + Offset: dec.InputOffset(), + } + } + b.Append(arrow.Timestamp(n)) + case float64: + b.Append(arrow.Timestamp(v)) + + default: + return &json.UnmarshalTypeError{ + Value: fmt.Sprint(t), + Type: reflect.TypeOf(arrow.Timestamp(0)), + Offset: dec.InputOffset(), + } + } + + return nil +} + +func (b *TimestampBuilder) Unmarshal(dec *json.Decoder) error { + for dec.More() { + if err := b.UnmarshalOne(dec); err != nil { + return err + } + } + return nil +} + +func (b *TimestampBuilder) UnmarshalJSON(data []byte) error { + dec := json.NewDecoder(bytes.NewReader(data)) + t, err := dec.Token() + if err != nil { + return err + } + + if delim, ok := t.(json.Delim); !ok || delim != '[' { + return fmt.Errorf("binary builder must unpack from json array, found %s", delim) + } + + return b.Unmarshal(dec) +} + +var ( + _ arrow.Array = (*Timestamp)(nil) + _ Builder = (*TimestampBuilder)(nil) +) diff --git a/go/arrow/array/timestamp_test.go b/go/arrow/array/timestamp_test.go new file mode 100644 index 0000000000000..27978976dbc9c --- /dev/null +++ b/go/arrow/array/timestamp_test.go @@ -0,0 +1,235 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package array_test + +import ( + "testing" + "time" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/stretchr/testify/assert" +) + +func TestTimestampStringRoundTrip(t *testing.T) { + // 1. create array + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + dt := &arrow.TimestampType{Unit: arrow.Second} + b := array.NewTimestampBuilder(mem, dt) + defer b.Release() + + b.Append(1) + b.Append(2) + b.Append(3) + b.AppendNull() + b.Append(5) + b.Append(6) + b.AppendNull() + b.Append(8) + b.Append(9) + b.Append(10) + + arr := b.NewArray().(*array.Timestamp) + defer arr.Release() + + // 2. create array via AppendValueFromString + b1 := array.NewTimestampBuilder(mem, dt) + defer b1.Release() + + for i := 0; i < arr.Len(); i++ { + assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) + } + + arr1 := b1.NewArray().(*array.Timestamp) + defer arr1.Release() + + assert.True(t, array.Equal(arr, arr1)) +} + +func TestNewTimestampBuilder(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + timestamp := time.Now() + dtype := &arrow.TimestampType{Unit: arrow.Second} + ab := array.NewTimestampBuilder(mem, dtype) + defer ab.Release() + + ab.Retain() + ab.Release() + + ab.Append(1) + ab.Append(2) + ab.Append(3) + ab.AppendNull() + ab.Append(5) + ab.Append(6) + ab.AppendNull() + ab.Append(8) + ab.Append(9) + ab.Append(10) + ab.AppendTime(timestamp) + + // check state of builder before NewTimestampArray + assert.Equal(t, 11, ab.Len(), "unexpected Len()") + assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") + + a := ab.NewTimestampArray() + + // check state of builder after NewTimestampArray + assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewTimestampArray did not reset state") + assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewTimestampArray did not reset state") + assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewTimestampArray did not reset state") + + // check state of array + assert.Equal(t, 2, a.NullN(), "unexpected null count") + assert.Equal(t, []arrow.Timestamp{1, 2, 3, 0, 5, 6, 0, 8, 9, 10, arrow.Timestamp(timestamp.Unix())}, a.TimestampValues(), "unexpected TimestampValues") + assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity + assert.Len(t, a.TimestampValues(), 11, "unexpected length of TimestampValues") + + a.Release() + + ab.Append(7) + ab.Append(8) + + a = ab.NewTimestampArray() + + assert.Equal(t, 0, a.NullN()) + assert.Equal(t, []arrow.Timestamp{7, 8}, a.TimestampValues()) + assert.Len(t, a.TimestampValues(), 2) + + a.Release() + + var ( + want = []arrow.Timestamp{1, 2, 3, 4} + valids = []bool{true, true, false, true} + ) + + ab.AppendValues(want, valids) + a = ab.NewTimestampArray() + + sub := array.MakeFromData(a.Data()) + defer sub.Release() + + if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { + t.Fatalf("invalid type: got=%q, want=%q", got, want) + } + + if _, ok := sub.(*array.Timestamp); !ok { + t.Fatalf("could not type-assert to array.Timestamp") + } + + if got, want := a.String(), `[1 2 (null) 4]`; got != want { + t.Fatalf("got=%q, want=%q", got, want) + } + + slice := array.NewSliceData(a.Data(), 2, 4) + defer slice.Release() + + sub1 := array.MakeFromData(slice) + defer sub1.Release() + + v, ok := sub1.(*array.Timestamp) + if !ok { + t.Fatalf("could not type-assert to array.Timestamp") + } + + if got, want := v.String(), `[(null) 4]`; got != want { + t.Fatalf("got=%q, want=%q", got, want) + } + + a.Release() +} + +func TestTimestampBuilder_AppendValues(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + dtype := &arrow.TimestampType{Unit: arrow.Second} + ab := array.NewTimestampBuilder(mem, dtype) + defer ab.Release() + + exp := []arrow.Timestamp{0, 1, 2, 3} + ab.AppendValues(exp, nil) + a := ab.NewTimestampArray() + assert.Equal(t, exp, a.TimestampValues()) + + a.Release() +} + +func TestTimestampBuilder_Empty(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + dtype := &arrow.TimestampType{Unit: arrow.Second} + ab := array.NewTimestampBuilder(mem, dtype) + defer ab.Release() + + exp := []arrow.Timestamp{0, 1, 2, 3} + + ab.AppendValues([]arrow.Timestamp{}, nil) + a := ab.NewTimestampArray() + assert.Zero(t, a.Len()) + a.Release() + + ab.AppendValues(nil, nil) + a = ab.NewTimestampArray() + assert.Zero(t, a.Len()) + a.Release() + + ab.AppendValues([]arrow.Timestamp{}, nil) + ab.AppendValues(exp, nil) + a = ab.NewTimestampArray() + assert.Equal(t, exp, a.TimestampValues()) + a.Release() + + ab.AppendValues(exp, nil) + ab.AppendValues([]arrow.Timestamp{}, nil) + a = ab.NewTimestampArray() + assert.Equal(t, exp, a.TimestampValues()) + a.Release() +} + +func TestTimestampBuilder_Resize(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + dtype := &arrow.TimestampType{Unit: arrow.Second} + ab := array.NewTimestampBuilder(mem, dtype) + defer ab.Release() + + assert.Equal(t, 0, ab.Cap()) + assert.Equal(t, 0, ab.Len()) + + ab.Reserve(63) + assert.Equal(t, 64, ab.Cap()) + assert.Equal(t, 0, ab.Len()) + + for i := 0; i < 63; i++ { + ab.Append(0) + } + assert.Equal(t, 64, ab.Cap()) + assert.Equal(t, 63, ab.Len()) + + ab.Resize(5) + assert.Equal(t, 5, ab.Len()) + + ab.Resize(32) + assert.Equal(t, 5, ab.Len()) +} diff --git a/go/arrow/datatype_fixedwidth.go b/go/arrow/datatype_fixedwidth.go index 2a2bbc77a85a2..9f60149616296 100644 --- a/go/arrow/datatype_fixedwidth.go +++ b/go/arrow/datatype_fixedwidth.go @@ -106,7 +106,7 @@ func (d Date64) FormattedString() string { } // TimestampFromStringInLocation is like TimestampFromString, but treats the time instant -// as if it were in the passed timezone before converting to UTC for internal representation. +// as if it were in the provided timezone before converting to UTC for internal representation. func TimestampFromStringInLocation(val string, unit TimeUnit, loc *time.Location) (Timestamp, bool, error) { if len(val) < 10 { return 0, false, fmt.Errorf("%w: invalid timestamp string", ErrInvalid) @@ -167,17 +167,8 @@ func TimestampFromStringInLocation(val string, unit TimeUnit, loc *time.Location out = out.In(loc).UTC() } - switch unit { - case Second: - return Timestamp(out.Unix()), zoneFmt != "", nil - case Millisecond: - return Timestamp(out.Unix()*1e3 + int64(out.Nanosecond())/1e6), zoneFmt != "", nil - case Microsecond: - return Timestamp(out.Unix()*1e6 + int64(out.Nanosecond())/1e3), zoneFmt != "", nil - case Nanosecond: - return Timestamp(out.UnixNano()), zoneFmt != "", nil - } - return 0, zoneFmt != "", fmt.Errorf("%w: unexpected timestamp unit: %s", ErrInvalid, unit) + ts, err := TimestampFromTime(out, unit) + return ts, zoneFmt != "", err } // TimestampFromString parses a string and returns a timestamp for the given unit @@ -187,10 +178,10 @@ func TimestampFromStringInLocation(val string, unit TimeUnit, loc *time.Location // or a space, and [.zzzzzzzzz] can be either left out or up to 9 digits of // fractions of a second. // -// YYYY-MM-DD -// YYYY-MM-DD[T]HH -// YYYY-MM-DD[T]HH:MM -// YYYY-MM-DD[T]HH:MM:SS[.zzzzzzzz] +// YYYY-MM-DD +// YYYY-MM-DD[T]HH +// YYYY-MM-DD[T]HH:MM +// YYYY-MM-DD[T]HH:MM:SS[.zzzzzzzz] // // You can also optionally have an ending Z to indicate UTC or indicate a specific // timezone using ±HH, ±HHMM or ±HH:MM at the end of the string. @@ -206,6 +197,22 @@ func (t Timestamp) ToTime(unit TimeUnit) time.Time { return time.Unix(0, int64(t)*int64(unit.Multiplier())).UTC() } +// TimestampFromTime allows converting time.Time to Timestamp +func TimestampFromTime(val time.Time, unit TimeUnit) (Timestamp, error) { + switch unit { + case Second: + return Timestamp(val.Unix()), nil + case Millisecond: + return Timestamp(val.Unix()*1e3 + int64(val.Nanosecond())/1e6), nil + case Microsecond: + return Timestamp(val.Unix()*1e6 + int64(val.Nanosecond())/1e3), nil + case Nanosecond: + return Timestamp(val.UnixNano()), nil + default: + return 0, fmt.Errorf("%w: unexpected timestamp unit: %s", ErrInvalid, unit) + } +} + // Time32FromString parses a string to return a Time32 value in the given unit, // unit needs to be only seconds or milliseconds and the string should be in the // form of HH:MM or HH:MM:SS[.zzz] where the fractions of a second are optional. diff --git a/go/arrow/numeric.tmpldata b/go/arrow/numeric.tmpldata index 92cd48ba1078d..3c2d63b7cb2bb 100644 --- a/go/arrow/numeric.tmpldata +++ b/go/arrow/numeric.tmpldata @@ -78,18 +78,6 @@ "Default": "0", "Size": "1" }, - { - "Name": "Timestamp", - "name": "timestamp", - "Type": "Timestamp", - "QualifiedType": "arrow.Timestamp", - "InternalType": "int64", - "Default": "0", - "Size": "8", - "Opt": { - "Parametric": true - } - }, { "Name": "Time32", "name": "time32", diff --git a/go/arrow/type_traits_numeric.gen.go b/go/arrow/type_traits_numeric.gen.go index 4eac7158957a7..ba394b67e8303 100644 --- a/go/arrow/type_traits_numeric.gen.go +++ b/go/arrow/type_traits_numeric.gen.go @@ -27,22 +27,21 @@ import ( ) var ( - Int64Traits int64Traits - Uint64Traits uint64Traits - Float64Traits float64Traits - Int32Traits int32Traits - Uint32Traits uint32Traits - Float32Traits float32Traits - Int16Traits int16Traits - Uint16Traits uint16Traits - Int8Traits int8Traits - Uint8Traits uint8Traits - TimestampTraits timestampTraits - Time32Traits time32Traits - Time64Traits time64Traits - Date32Traits date32Traits - Date64Traits date64Traits - DurationTraits durationTraits + Int64Traits int64Traits + Uint64Traits uint64Traits + Float64Traits float64Traits + Int32Traits int32Traits + Uint32Traits uint32Traits + Float32Traits float32Traits + Int16Traits int16Traits + Uint16Traits uint16Traits + Int8Traits int8Traits + Uint8Traits uint8Traits + Time32Traits time32Traits + Time64Traits time64Traits + Date32Traits date32Traits + Date64Traits date64Traits + DurationTraits durationTraits ) // Int64 traits @@ -525,54 +524,6 @@ func (uint8Traits) CastToBytes(b []uint8) []byte { // Copy copies src to dst. func (uint8Traits) Copy(dst, src []uint8) { copy(dst, src) } -// Timestamp traits - -const ( - // TimestampSizeBytes specifies the number of bytes required to store a single Timestamp in memory - TimestampSizeBytes = int(unsafe.Sizeof(Timestamp(0))) -) - -type timestampTraits struct{} - -// BytesRequired returns the number of bytes required to store n elements in memory. -func (timestampTraits) BytesRequired(n int) int { return TimestampSizeBytes * n } - -// PutValue -func (timestampTraits) PutValue(b []byte, v Timestamp) { - endian.Native.PutUint64(b, uint64(v)) -} - -// CastFromBytes reinterprets the slice b to a slice of type Timestamp. -// -// NOTE: len(b) must be a multiple of TimestampSizeBytes. -func (timestampTraits) CastFromBytes(b []byte) []Timestamp { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - var res []Timestamp - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len / TimestampSizeBytes - s.Cap = h.Cap / TimestampSizeBytes - - return res -} - -// CastToBytes reinterprets the slice b to a slice of bytes. -func (timestampTraits) CastToBytes(b []Timestamp) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - var res []byte - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len * TimestampSizeBytes - s.Cap = h.Cap * TimestampSizeBytes - - return res -} - -// Copy copies src to dst. -func (timestampTraits) Copy(dst, src []Timestamp) { copy(dst, src) } - // Time32 traits const ( diff --git a/go/arrow/type_traits_numeric.gen_test.go b/go/arrow/type_traits_numeric.gen_test.go index 9c6cfaadd5921..abcc2aca928fa 100644 --- a/go/arrow/type_traits_numeric.gen_test.go +++ b/go/arrow/type_traits_numeric.gen_test.go @@ -365,40 +365,6 @@ func TestUint8Traits(t *testing.T) { } } -func TestTimestampTraits(t *testing.T) { - const N = 10 - b1 := arrow.TimestampTraits.CastToBytes([]arrow.Timestamp{ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - }) - - b2 := make([]byte, arrow.TimestampTraits.BytesRequired(N)) - for i := 0; i < N; i++ { - beg := i * arrow.TimestampSizeBytes - end := (i + 1) * arrow.TimestampSizeBytes - arrow.TimestampTraits.PutValue(b2[beg:end], arrow.Timestamp(i)) - } - - if !reflect.DeepEqual(b1, b2) { - v1 := arrow.TimestampTraits.CastFromBytes(b1) - v2 := arrow.TimestampTraits.CastFromBytes(b2) - t.Fatalf("invalid values:\nb1=%v\nb2=%v\nv1=%v\nv2=%v\n", b1, b2, v1, v2) - } - - v1 := arrow.TimestampTraits.CastFromBytes(b1) - for i, v := range v1 { - if got, want := v, arrow.Timestamp(i); got != want { - t.Fatalf("invalid value[%d]. got=%v, want=%v", i, got, want) - } - } - - v2 := make([]arrow.Timestamp, N) - arrow.TimestampTraits.Copy(v2, v1) - - if !reflect.DeepEqual(v1, v2) { - t.Fatalf("invalid values:\nv1=%v\nv2=%v\n", v1, v2) - } -} - func TestTime32Traits(t *testing.T) { const N = 10 b1 := arrow.Time32Traits.CastToBytes([]arrow.Time32{ diff --git a/go/arrow/type_traits_test.go b/go/arrow/type_traits_test.go index df6335dc69803..3b9571d3d3d49 100644 --- a/go/arrow/type_traits_test.go +++ b/go/arrow/type_traits_test.go @@ -17,6 +17,7 @@ package arrow_test import ( + "bytes" "fmt" "reflect" "testing" @@ -233,3 +234,37 @@ func TestMonthDayNanoIntervalTraits(t *testing.T) { t.Fatalf("invalid values:\nv1=%v\nv2=%v\n", v1, v2) } } + +func TestTimestampTraits(t *testing.T) { + const N = 10 + b1 := arrow.TimestampTraits.CastToBytes([]arrow.Timestamp{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + }) + + b2 := make([]byte, arrow.TimestampTraits.BytesRequired(N)) + for i := 0; i < N; i++ { + beg := i * arrow.TimestampSizeBytes + end := (i + 1) * arrow.TimestampSizeBytes + arrow.TimestampTraits.PutValue(b2[beg:end], arrow.Timestamp(i)) + } + + if !bytes.Equal(b1, b2) { + v1 := arrow.TimestampTraits.CastFromBytes(b1) + v2 := arrow.TimestampTraits.CastFromBytes(b2) + t.Fatalf("invalid values:\nb1=%v\nb2=%v\nv1=%v\nv2=%v\n", b1, b2, v1, v2) + } + + v1 := arrow.TimestampTraits.CastFromBytes(b1) + for i, v := range v1 { + if got, want := v, arrow.Timestamp(i); got != want { + t.Fatalf("invalid value[%d]. got=%v, want=%v", i, got, want) + } + } + + v2 := make([]arrow.Timestamp, N) + arrow.TimestampTraits.Copy(v2, v1) + + if !reflect.DeepEqual(v1, v2) { + t.Fatalf("invalid values:\nv1=%v\nv2=%v\n", v1, v2) + } +} diff --git a/go/arrow/type_traits_timestamp.go b/go/arrow/type_traits_timestamp.go new file mode 100644 index 0000000000000..ade215e629be5 --- /dev/null +++ b/go/arrow/type_traits_timestamp.go @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package arrow + +import ( + "reflect" + "unsafe" + + "github.com/apache/arrow/go/v13/arrow/endian" +) + +var TimestampTraits timestampTraits + +const ( + // TimestampSizeBytes specifies the number of bytes required to store a single Timestamp in memory + TimestampSizeBytes = int(unsafe.Sizeof(Timestamp(0))) +) + +type timestampTraits struct{} + +// BytesRequired returns the number of bytes required to store n elements in memory. +func (timestampTraits) BytesRequired(n int) int { return TimestampSizeBytes * n } + +func (timestampTraits) PutValue(b []byte, v Timestamp) { + endian.Native.PutUint64(b, uint64(v)) +} + +// CastFromBytes reinterprets the slice b to a slice of type Timestamp. +// +// NOTE: len(b) must be a multiple of TimestampSizeBytes. +func (timestampTraits) CastFromBytes(b []byte) []Timestamp { + h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) + + var res []Timestamp + s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) + s.Data = h.Data + s.Len = h.Len / TimestampSizeBytes + s.Cap = h.Cap / TimestampSizeBytes + + return res +} + +// CastToBytes reinterprets the slice b to a slice of bytes. +func (timestampTraits) CastToBytes(b []Timestamp) []byte { + h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) + + var res []byte + s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) + s.Data = h.Data + s.Len = h.Len * TimestampSizeBytes + s.Cap = h.Cap * TimestampSizeBytes + + return res +} + +// Copy copies src to dst. +func (timestampTraits) Copy(dst, src []Timestamp) { copy(dst, src) }