Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go/arrow/array/array_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ func TestMakeFromData(t *testing.T) {
{name: "timestamp", d: &testDataType{arrow.TIMESTAMP}},
{name: "time32", d: &testDataType{arrow.TIME32}},
{name: "time64", d: &testDataType{arrow.TIME64}},
{name: "fixed_size_binary", d: &testDataType{arrow.FIXED_SIZE_BINARY}, size: 3},
{name: "fixed_size_binary", d: &testDataType{arrow.FIXED_SIZE_BINARY}},

{name: "list", d: &testDataType{arrow.LIST}, child: []*array.Data{
array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0, 0),
Expand Down
45 changes: 13 additions & 32 deletions go/arrow/array/fixedsize_binary.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,14 @@ import (
// A type which represents an immutable sequence of fixed-length binary strings.
type FixedSizeBinary struct {
array
valueOffsets []int32
valueBytes []byte

valueBytes []byte
bytewidth int32
}

// NewFixedSizeBinaryData constructs a new fixed-size binary array from data.
func NewFixedSizeBinaryData(data *Data) *FixedSizeBinary {
a := &FixedSizeBinary{}
a := &FixedSizeBinary{bytewidth: int32(data.DataType().(arrow.FixedWidthDataType).BitWidth() / 8)}
a.refCount = 1
a.setData(data)
return a
Expand All @@ -41,14 +42,14 @@ func NewFixedSizeBinaryData(data *Data) *FixedSizeBinary {
// Value returns the fixed-size slice at index i. This value should not be mutated.
func (a *FixedSizeBinary) Value(i int) []byte {
i += a.array.data.offset
return a.valueBytes[a.valueOffsets[i]:a.valueOffsets[i+1]]
var (
bw = int(a.bytewidth)
beg = i * bw
end = (i + 1) * bw
)
return a.valueBytes[beg:end]
}

func (a *FixedSizeBinary) ValueOffset(i int) int { return int(a.valueOffsets[i]) }
func (a *FixedSizeBinary) ValueLen(i int) int { return int(a.valueOffsets[i+1] - a.valueOffsets[i]) }
func (a *FixedSizeBinary) ValueOffsets() []int32 { return a.valueOffsets }
func (a *FixedSizeBinary) ValueBytes() []byte { return a.valueBytes }

func (a *FixedSizeBinary) String() string {
o := new(strings.Builder)
o.WriteString("[")
Expand All @@ -68,32 +69,12 @@ func (a *FixedSizeBinary) String() string {
}

func (a *FixedSizeBinary) setData(data *Data) {
if len(data.buffers) != 3 {
panic("len(data.buffers) != 3")
}

a.array.setData(data)

if valueBytes := data.buffers[2]; valueBytes != nil {
a.valueBytes = valueBytes.Bytes()
vals := data.buffers[1]
if vals != nil {
a.valueBytes = vals.Bytes()
}

switch valueOffsets := data.buffers[1]; valueOffsets {
case nil:
// re-compute offsets
offsets := make([]int32, a.Len()+1)
bw := a.DataType().(arrow.FixedWidthDataType).BitWidth() / 8
for i := range offsets[1:] {
var delta int32
if a.IsValid(i) {
delta = int32(bw)
}
offsets[i+1] = offsets[i] + delta
}
a.valueOffsets = offsets
default:
a.valueOffsets = arrow.Int32Traits.CastFromBytes(valueOffsets.Bytes())
}
}

var (
Expand Down
8 changes: 6 additions & 2 deletions go/arrow/array/fixedsize_binary_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ func TestFixedSizeBinary(t *testing.T) {
dtype := arrow.FixedSizeBinaryType{ByteWidth: 7}
b := array.NewFixedSizeBinaryBuilder(mem, &dtype)

zero := make([]byte, dtype.ByteWidth)

values := [][]byte{
[]byte("7654321"),
nil,
Expand All @@ -48,7 +50,9 @@ func TestFixedSizeBinary(t *testing.T) {
assert.Equal(t, 3, a.Len())
assert.Equal(t, 1, a.NullN())
assert.Equal(t, []byte("7654321"), a.Value(0))
assert.Equal(t, []byte{}, a.Value(1))
assert.Equal(t, zero, a.Value(1))
assert.Equal(t, true, a.IsNull(1))
assert.Equal(t, false, a.IsValid(1))
assert.Equal(t, []byte("AZERTYU"), a.Value(2))
a.Release()

Expand All @@ -58,7 +62,7 @@ func TestFixedSizeBinary(t *testing.T) {
assert.Equal(t, 3, a.Len())
assert.Equal(t, 1, a.NullN())
assert.Equal(t, []byte("7654321"), a.Value(0))
assert.Equal(t, []byte{}, a.Value(1))
assert.Equal(t, zero, a.Value(1))
assert.Equal(t, []byte("AZERTYU"), a.Value(2))
a.Release()

Expand Down
50 changes: 13 additions & 37 deletions go/arrow/array/fixedsize_binarybuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package array

import (
"fmt"
"sync/atomic"

"github.com/apache/arrow/go/arrow"
Expand All @@ -28,16 +29,14 @@ import (
type FixedSizeBinaryBuilder struct {
builder

dtype *arrow.FixedSizeBinaryType
offsets *int32BufferBuilder
values *byteBufferBuilder
dtype *arrow.FixedSizeBinaryType
values *byteBufferBuilder
}

func NewFixedSizeBinaryBuilder(mem memory.Allocator, dtype *arrow.FixedSizeBinaryType) *FixedSizeBinaryBuilder {
b := &FixedSizeBinaryBuilder{
builder: builder{refCount: 1, mem: mem},
dtype: dtype,
offsets: newInt32BufferBuilder(mem),
values: newByteBufferBuilder(mem),
}
return b
Expand All @@ -54,10 +53,6 @@ func (b *FixedSizeBinaryBuilder) Release() {
b.nullBitmap.Release()
b.nullBitmap = nil
}
if b.offsets != nil {
b.offsets.Release()
b.offsets = nil
}
if b.values != nil {
b.values.Release()
b.values = nil
Expand All @@ -72,14 +67,13 @@ func (b *FixedSizeBinaryBuilder) Append(v []byte) {
}

b.Reserve(1)
b.appendNextOffset()
b.values.Append(v)
b.UnsafeAppendBoolToBitmap(true)
}

func (b *FixedSizeBinaryBuilder) AppendNull() {
b.Reserve(1)
b.appendNextOffset()
b.values.Advance(b.dtype.ByteWidth)
b.UnsafeAppendBoolToBitmap(false)
}

Expand All @@ -97,25 +91,19 @@ func (b *FixedSizeBinaryBuilder) AppendValues(v [][]byte, valid []bool) {

b.Reserve(len(v))
for _, vv := range v {
b.appendNextOffset()
b.values.Append(vv)
switch len(vv) {
case 0:
b.values.Advance(b.dtype.ByteWidth)
case b.dtype.ByteWidth:
b.values.Append(vv)
default:
panic(fmt.Errorf("array: invalid binary length (got=%d, want=%d)", len(vv), b.dtype.ByteWidth))
}
}

b.builder.unsafeAppendBoolsToBitmap(valid, len(v))
}

func (b *FixedSizeBinaryBuilder) Value(i int) []byte {
offsets := b.offsets.Values()
start := int(offsets[i])
var end int
if i == (b.length - 1) {
end = b.values.Len()
} else {
end = int(offsets[i+1])
}
return b.values.Bytes()[start:end]
}

func (b *FixedSizeBinaryBuilder) init(capacity int) {
b.builder.init(capacity)
b.values.resize(capacity * b.dtype.ByteWidth)
Expand All @@ -130,7 +118,6 @@ func (b *FixedSizeBinaryBuilder) Reserve(n int) {
// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(),
// additional memory will be allocated. If n is smaller, the allocated memory may reduced.
func (b *FixedSizeBinaryBuilder) Resize(n int) {
b.offsets.resize((n + 1) * arrow.Int32SizeBytes)
b.builder.resize(n, b.init)
}

Expand All @@ -150,29 +137,18 @@ func (b *FixedSizeBinaryBuilder) NewFixedSizeBinaryArray() (a *FixedSizeBinary)
}

func (b *FixedSizeBinaryBuilder) newData() (data *Data) {
b.appendNextOffset()
values := b.values.Finish()
offsets := b.offsets.Finish()
data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, offsets, values}, nil, b.nulls, 0)
data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, values}, nil, b.nulls, 0)

if values != nil {
values.Release()
}
if offsets != nil {
offsets.Release()
}

b.builder.reset()

return
}

func (b *FixedSizeBinaryBuilder) appendNextOffset() {
numBytes := b.values.Len()
// TODO(alexandre): check binaryArrayMaximumCapacity?
b.offsets.AppendValue(int32(numBytes))
}

var (
_ Builder = (*FixedSizeBinaryBuilder)(nil)
)
9 changes: 0 additions & 9 deletions go/arrow/array/fixedsize_binarybuilder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,6 @@ func TestFixedSizeBinaryBuilder(t *testing.T) {
assert.Equal(t, 4, b.Len(), "unexpected Len()")
assert.Equal(t, 2, b.NullN(), "unexpected NullN()")

assert.Equal(t, b.Value(0), []byte("1234567"))
assert.Equal(t, b.Value(1), []byte{})
assert.Equal(t, b.Value(2), []byte("ABCDEFG"))
assert.Equal(t, b.Value(3), []byte{})

values := [][]byte{
[]byte("7654321"),
nil,
Expand All @@ -54,10 +49,6 @@ func TestFixedSizeBinaryBuilder(t *testing.T) {
assert.Equal(t, 7, b.Len(), "unexpected Len()")
assert.Equal(t, 3, b.NullN(), "unexpected NullN()")

assert.Equal(t, []byte("7654321"), b.Value(4))
assert.Equal(t, []byte{}, b.Value(5))
assert.Equal(t, []byte("AZERTYU"), b.Value(6))

a := b.NewFixedSizeBinaryArray()

// check state of builder after NewFixedSizeBinaryArray
Expand Down
50 changes: 50 additions & 0 deletions go/arrow/internal/arrdata/arrdata.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ func init() {
Records["strings"] = makeStringsRecords()
Records["fixed_size_lists"] = makeFixedSizeListsRecords()
Records["fixed_width_types"] = makeFixedWidthTypesRecords()
Records["fixed_size_binaries"] = makeFixedSizeBinariesRecords()

for k := range Records {
RecordNames = append(RecordNames, k)
Expand Down Expand Up @@ -398,6 +399,45 @@ func makeFixedWidthTypesRecords() []array.Record {
return recs
}

type fsb3 string

func makeFixedSizeBinariesRecords() []array.Record {
mem := memory.NewGoAllocator()
schema := arrow.NewSchema(
[]arrow.Field{
arrow.Field{Name: "fixed_size_binary_3", Type: &arrow.FixedSizeBinaryType{ByteWidth: 3}, Nullable: true},
}, nil,
)

mask := []bool{true, false, false, true, true}
chunks := [][]array.Interface{
[]array.Interface{
arrayOf(mem, []fsb3{"001", "002", "003", "004", "005"}, mask),
},
[]array.Interface{
arrayOf(mem, []fsb3{"011", "012", "013", "014", "015"}, mask),
},
[]array.Interface{
arrayOf(mem, []fsb3{"021", "022", "023", "024", "025"}, mask),
},
}

defer func() {
for _, chunk := range chunks {
for _, col := range chunk {
col.Release()
}
}
}()

recs := make([]array.Record, len(chunks))
for i, chunk := range chunks {
recs[i] = array.NewRecord(schema, chunk, -1)
}

return recs
}

func arrayOf(mem memory.Allocator, a interface{}, valids []bool) array.Interface {
if mem == nil {
mem = memory.NewGoAllocator()
Expand Down Expand Up @@ -567,6 +607,16 @@ func arrayOf(mem memory.Allocator, a interface{}, valids []bool) array.Interface
bldr.AppendValues(a, valids)
return bldr.NewArray()

case []fsb3:
bldr := array.NewFixedSizeBinaryBuilder(mem, &arrow.FixedSizeBinaryType{ByteWidth: 3})
defer bldr.Release()
vs := make([][]byte, len(a))
for i, v := range a {
vs[i] = []byte(v)
}
bldr.AppendValues(vs, valids)
return bldr.NewArray()

default:
panic(fmt.Errorf("arrdata: invalid data slice type %T", a))
}
Expand Down
32 changes: 32 additions & 0 deletions go/arrow/ipc/cmd/arrow-cat/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,16 @@ record 3...
col[5] "timestamp": [20 (null) (null) 23 24]
col[6] "date32s": [-22 (null) (null) 21 22]
col[7] "date64s": [-22 (null) (null) 21 22]
`,
},
{
name: "fixed_size_binaries",
want: `record 1...
col[0] "fixed_size_binary_3": ["001" (null) (null) "004" "005"]
record 2...
col[0] "fixed_size_binary_3": ["011" (null) (null) "014" "015"]
record 3...
col[0] "fixed_size_binary_3": ["021" (null) (null) "024" "025"]
`,
},
} {
Expand Down Expand Up @@ -448,6 +458,28 @@ record 3/3...
col[5] "timestamp": [20 (null) (null) 23 24]
col[6] "date32s": [-22 (null) (null) 21 22]
col[7] "date64s": [-22 (null) (null) 21 22]
`,
},
{
stream: true,
name: "fixed_size_binaries",
want: `record 1...
col[0] "fixed_size_binary_3": ["001" (null) (null) "004" "005"]
record 2...
col[0] "fixed_size_binary_3": ["011" (null) (null) "014" "015"]
record 3...
col[0] "fixed_size_binary_3": ["021" (null) (null) "024" "025"]
`,
},
{
name: "fixed_size_binaries",
want: `version: V4
record 1/3...
col[0] "fixed_size_binary_3": ["001" (null) (null) "004" "005"]
record 2/3...
col[0] "fixed_size_binary_3": ["011" (null) (null) "014" "015"]
record 3/3...
col[0] "fixed_size_binary_3": ["021" (null) (null) "024" "025"]
`,
},
} {
Expand Down
Loading