From 549bc80b15a45376e9021da89ac64ba9603bd531 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 7 Apr 2026 12:58:18 -0700 Subject: [PATCH 01/82] =?UTF-8?q?feat(arreflect):=20add=20arrow/arreflect?= =?UTF-8?q?=20package=20for=20Go=E2=86=94Arrow=20reflection?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces a new opt-in sub-package arrow/arreflect that provides bidirectional conversion between Apache Arrow arrays and Go structs using reflection. Public API: - ToGo[T], ToGoSlice[T] — Arrow array → Go value/slice - FromGoSlice[T] — Go slice → Arrow array - RecordToSlice[T] — Arrow RecordBatch → Go slice - RecordFromSlice[T] — Go slice → Arrow RecordBatch - InferArrowSchema[T] — infer *arrow.Schema from Go struct - InferArrowType[T] — infer arrow.DataType from Go type Supported Arrow types: all primitives, Timestamp/Date/Time/Duration, Decimal32/64/128/256, Struct, List/LargeList/ListView/LargeListView, Map, FixedSizeList, Dictionary (via 'dict' tag), RunEndEncoded (via 'ree' tag). Struct field mapping is controlled via arrow struct tags: arrow:"name" — custom column name arrow:"-" — exclude field arrow:",dict" — dictionary-encode arrow:",ree" — run-end encode arrow:",listview" — use ListView instead of List arrow:",decimal(p,s)" — override decimal precision/scale Pointer fields map to nullable Arrow fields (null on nil pointer). Embedded struct fields are promoted following encoding/json rules. Struct metadata is cached per type using sync.Map. arrow/array is unchanged; arreflect depends on it but not vice versa. --- arrow/arreflect/doc.go | 34 + arrow/arreflect/reflect.go | 448 +++++++++++ arrow/arreflect/reflect_arrow_to_go.go | 519 +++++++++++++ arrow/arreflect/reflect_arrow_to_go_test.go | 815 ++++++++++++++++++++ arrow/arreflect/reflect_go_to_arrow.go | 813 +++++++++++++++++++ arrow/arreflect/reflect_go_to_arrow_test.go | 780 +++++++++++++++++++ arrow/arreflect/reflect_infer.go | 205 +++++ arrow/arreflect/reflect_infer_test.go | 415 ++++++++++ arrow/arreflect/reflect_integration_test.go | 514 ++++++++++++ arrow/arreflect/reflect_public_test.go | 430 +++++++++++ arrow/arreflect/reflect_test.go | 295 +++++++ 11 files changed, 5268 insertions(+) create mode 100644 arrow/arreflect/doc.go create mode 100644 arrow/arreflect/reflect.go create mode 100644 arrow/arreflect/reflect_arrow_to_go.go create mode 100644 arrow/arreflect/reflect_arrow_to_go_test.go create mode 100644 arrow/arreflect/reflect_go_to_arrow.go create mode 100644 arrow/arreflect/reflect_go_to_arrow_test.go create mode 100644 arrow/arreflect/reflect_infer.go create mode 100644 arrow/arreflect/reflect_infer_test.go create mode 100644 arrow/arreflect/reflect_integration_test.go create mode 100644 arrow/arreflect/reflect_public_test.go create mode 100644 arrow/arreflect/reflect_test.go diff --git a/arrow/arreflect/doc.go b/arrow/arreflect/doc.go new file mode 100644 index 00000000..c4ba67a0 --- /dev/null +++ b/arrow/arreflect/doc.go @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package arreflect provides utilities for converting between +// Apache Arrow arrays and Go structs using reflection. +// +// The primary entry points are the generic functions [ToGo], [ToGoSlice], +// [FromGoSlice], [RecordToSlice], and [RecordFromSlice], which convert +// between Arrow arrays/records and Go slices of structs. +// +// Schema inference is available via [InferArrowSchema] and [InferArrowType]. +// +// Arrow struct tags control field mapping: +// +// type MyRow struct { +// Name string `arrow:"name"` +// Score float64 `arrow:"score"` +// Skip string `arrow:"-"` +// Enc string `arrow:"enc,dict"` +// } +package arreflect diff --git a/arrow/arreflect/reflect.go b/arrow/arreflect/reflect.go new file mode 100644 index 00000000..47f54f58 --- /dev/null +++ b/arrow/arreflect/reflect.go @@ -0,0 +1,448 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package arreflect + +import ( + "fmt" + "reflect" + "strconv" + "strings" + "sync" + + "github.com/apache/arrow-go/v18/arrow" + "github.com/apache/arrow-go/v18/arrow/array" + "github.com/apache/arrow-go/v18/arrow/memory" +) + +type tagOpts struct { + Name string + Skip bool + Dict bool + ListView bool + REE bool + DecimalPrecision int32 + DecimalScale int32 + HasDecimalOpts bool +} + +type fieldMeta struct { + Name string + Index []int + Type reflect.Type + Nullable bool + Opts tagOpts +} + +func parseTag(tag string) tagOpts { + if tag == "-" { + return tagOpts{Skip: true} + } + + var name, rest string + if idx := strings.Index(tag, ","); idx >= 0 { + name = tag[:idx] + rest = tag[idx+1:] + } else { + name = tag + rest = "" + } + + opts := tagOpts{Name: name} + + if rest == "" { + return opts + } + + parseOptions(&opts, rest) + return opts +} + +func parseOptions(opts *tagOpts, rest string) { + for len(rest) > 0 { + var token string + if idx := strings.Index(rest, ","); idx >= 0 { + token = rest[:idx] + rest = rest[idx+1:] + } else { + token = rest + rest = "" + } + token = strings.TrimSpace(token) + + if strings.HasPrefix(token, "decimal(") { + if strings.HasSuffix(token, ")") { + parseDecimalOpt(opts, token) + continue + } + next := token + for len(rest) > 0 { + var part string + if idx := strings.Index(rest, ","); idx >= 0 { + part = rest[:idx] + rest = rest[idx+1:] + } else { + part = rest + rest = "" + } + next = next + "," + strings.TrimSpace(part) + if strings.HasSuffix(next, ")") { + break + } + } + parseDecimalOpt(opts, next) + continue + } + + switch token { + case "dict": + opts.Dict = true + case "listview": + opts.ListView = true + case "ree": + opts.REE = true + } + } +} + +func parseDecimalOpt(opts *tagOpts, token string) { + inner := strings.TrimPrefix(token, "decimal(") + inner = strings.TrimSuffix(inner, ")") + parts := strings.SplitN(inner, ",", 2) + if len(parts) == 2 { + p, errP := strconv.ParseInt(strings.TrimSpace(parts[0]), 10, 32) + s, errS := strconv.ParseInt(strings.TrimSpace(parts[1]), 10, 32) + if errP == nil && errS == nil { + opts.HasDecimalOpts = true + opts.DecimalPrecision = int32(p) + opts.DecimalScale = int32(s) + } + } +} + +type structFieldsEntry struct { + index []int + field reflect.StructField + depth int +} + +func getStructFields(t reflect.Type) []fieldMeta { + for t.Kind() == reflect.Ptr { + t = t.Elem() + } + + if t.Kind() != reflect.Struct { + return nil + } + + type bfsEntry struct { + t reflect.Type + index []int + depth int + } + + type candidate struct { + meta fieldMeta + depth int + tagged bool + } + + nameMap := make(map[string][]candidate) + + queue := []bfsEntry{{t: t, index: nil, depth: 0}} + + visited := make(map[reflect.Type]bool) + + for len(queue) > 0 { + entry := queue[0] + queue = queue[1:] + + st := entry.t + for st.Kind() == reflect.Ptr { + st = st.Elem() + } + if st.Kind() != reflect.Struct { + continue + } + + if visited[st] { + continue + } + if entry.depth > 0 { + visited[st] = true + } + + for i := 0; i < st.NumField(); i++ { + sf := st.Field(i) + + fullIndex := make([]int, len(entry.index)+1) + copy(fullIndex, entry.index) + fullIndex[len(entry.index)] = i + + if !sf.IsExported() && !sf.Anonymous { + continue + } + + tagVal, hasTag := sf.Tag.Lookup("arrow") + var opts tagOpts + if hasTag { + opts = parseTag(tagVal) + } + + if opts.Skip { + continue + } + + arrowName := opts.Name + if arrowName == "" { + arrowName = sf.Name + } + + if sf.Anonymous && !hasTag { + ft := sf.Type + for ft.Kind() == reflect.Ptr { + ft = ft.Elem() + } + if ft.Kind() == reflect.Struct { + queue = append(queue, bfsEntry{ + t: ft, + index: fullIndex, + depth: entry.depth + 1, + }) + continue + } + } + + nullable := sf.Type.Kind() == reflect.Ptr + + tagged := hasTag && opts.Name != "" + + meta := fieldMeta{ + Name: arrowName, + Index: fullIndex, + Type: sf.Type, + Nullable: nullable, + Opts: opts, + } + + nameMap[arrowName] = append(nameMap[arrowName], candidate{ + meta: meta, + depth: entry.depth, + tagged: tagged, + }) + } + } + + type resolvedField struct { + meta fieldMeta + order int + } + + nameOrder := make(map[string]int) + orderCounter := 0 + + type bfsEntry2 struct { + t reflect.Type + index []int + depth int + } + queue2 := []bfsEntry2{{t: t, index: nil, depth: 0}} + visited2 := make(map[reflect.Type]bool) + + for len(queue2) > 0 { + entry := queue2[0] + queue2 = queue2[1:] + + st := entry.t + for st.Kind() == reflect.Ptr { + st = st.Elem() + } + if st.Kind() != reflect.Struct { + continue + } + if entry.depth > 0 { + if visited2[st] { + continue + } + visited2[st] = true + } + + for i := 0; i < st.NumField(); i++ { + sf := st.Field(i) + fullIndex := make([]int, len(entry.index)+1) + copy(fullIndex, entry.index) + fullIndex[len(entry.index)] = i + + if !sf.IsExported() && !sf.Anonymous { + continue + } + + tagVal, hasTag := sf.Tag.Lookup("arrow") + var opts tagOpts + if hasTag { + opts = parseTag(tagVal) + } + if opts.Skip { + continue + } + + arrowName := opts.Name + if arrowName == "" { + arrowName = sf.Name + } + + if sf.Anonymous && !hasTag { + ft := sf.Type + for ft.Kind() == reflect.Ptr { + ft = ft.Elem() + } + if ft.Kind() == reflect.Struct { + queue2 = append(queue2, bfsEntry2{ + t: ft, + index: fullIndex, + depth: entry.depth + 1, + }) + continue + } + } + + if _, seen := nameOrder[arrowName]; !seen { + nameOrder[arrowName] = orderCounter + orderCounter++ + } + } + } + + resolved := make([]resolvedField, 0, len(nameMap)) + for name, candidates := range nameMap { + minDepth := candidates[0].depth + for _, c := range candidates[1:] { + if c.depth < minDepth { + minDepth = c.depth + } + } + + var atMin []candidate + for _, c := range candidates { + if c.depth == minDepth { + atMin = append(atMin, c) + } + } + + var winner *candidate + if len(atMin) == 1 { + winner = &atMin[0] + } else { + var tagged []candidate + for _, c := range atMin { + if c.tagged { + tagged = append(tagged, c) + } + } + if len(tagged) == 1 { + winner = &tagged[0] + } + } + + if winner != nil { + order := nameOrder[name] + resolved = append(resolved, resolvedField{meta: winner.meta, order: order}) + } + } + + for i := 1; i < len(resolved); i++ { + for j := i; j > 0 && resolved[j].order < resolved[j-1].order; j-- { + resolved[j], resolved[j-1] = resolved[j-1], resolved[j] + } + } + + result := make([]fieldMeta, len(resolved)) + for i, r := range resolved { + result[i] = r.meta + } + return result +} + +var structFieldCache sync.Map + +func cachedStructFields(t reflect.Type) []fieldMeta { + for t.Kind() == reflect.Ptr { + t = t.Elem() + } + + if v, ok := structFieldCache.Load(t); ok { + return v.([]fieldMeta) + } + + fields := getStructFields(t) + v, _ := structFieldCache.LoadOrStore(t, fields) + return v.([]fieldMeta) +} + +func ToGo[T any](arr arrow.Array, i int) (T, error) { + var result T + v := reflect.ValueOf(&result).Elem() + if err := setValue(v, arr, i); err != nil { + var zero T + return zero, err + } + return result, nil +} + +func ToGoSlice[T any](arr arrow.Array) ([]T, error) { + n := arr.Len() + result := make([]T, n) + for i := 0; i < n; i++ { + v := reflect.ValueOf(&result[i]).Elem() + if err := setValue(v, arr, i); err != nil { + return nil, fmt.Errorf("ToGoSlice: index %d: %w", i, err) + } + } + return result, nil +} + +func FromGoSlice[T any](vals []T, mem memory.Allocator) (arrow.Array, error) { + if len(vals) == 0 { + dt, err := inferArrowType(reflect.TypeFor[T]()) + if err != nil { + return nil, fmt.Errorf("FromGoSlice: %w", err) + } + b := array.NewBuilder(mem, dt) + defer b.Release() + return b.NewArray(), nil + } + sv := reflect.ValueOf(vals) + return buildArray(sv, tagOpts{}, mem) +} + +func RecordToSlice[T any](rec arrow.Record) ([]T, error) { + sa := array.RecordToStructArray(rec) + defer sa.Release() + return ToGoSlice[T](sa) +} + +func RecordFromSlice[T any](vals []T, mem memory.Allocator) (arrow.Record, error) { + arr, err := FromGoSlice[T](vals, mem) + if err != nil { + return nil, err + } + defer arr.Release() + sa, ok := arr.(*array.Struct) + if !ok { + return nil, fmt.Errorf("RecordFromSlice: T must be a struct type, got %T", arr) + } + return array.RecordFromStructArray(sa, nil), nil +} diff --git a/arrow/arreflect/reflect_arrow_to_go.go b/arrow/arreflect/reflect_arrow_to_go.go new file mode 100644 index 00000000..cf0e5dfa --- /dev/null +++ b/arrow/arreflect/reflect_arrow_to_go.go @@ -0,0 +1,519 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package arreflect + +import ( + "fmt" + "reflect" + "time" + + "github.com/apache/arrow-go/v18/arrow" + "github.com/apache/arrow-go/v18/arrow/array" +) + +func setValue(v reflect.Value, arr arrow.Array, i int) error { + if arr.IsNull(i) { + v.Set(reflect.Zero(v.Type())) + return nil + } + if v.Kind() == reflect.Ptr { + v.Set(reflect.New(v.Type().Elem())) + v = v.Elem() + } + + switch arr.DataType().ID() { + case arrow.BOOL: + a, ok := arr.(*array.Boolean) + if !ok { + return fmt.Errorf("arrow/reflect: expected *Boolean, got %T", arr) + } + if v.Kind() != reflect.Bool { + return fmt.Errorf("arrow/reflect: cannot set bool into %s", v.Type()) + } + v.SetBool(a.Value(i)) + + case arrow.INT8, arrow.INT16, arrow.INT32, arrow.INT64, + arrow.UINT8, arrow.UINT16, arrow.UINT32, arrow.UINT64, + arrow.FLOAT32, arrow.FLOAT64: + return setPrimitiveValue(v, arr, i) + + case arrow.STRING: + a, ok := arr.(*array.String) + if !ok { + return fmt.Errorf("arrow/reflect: expected *String, got %T", arr) + } + if v.Kind() != reflect.String { + return fmt.Errorf("arrow/reflect: cannot set string into %s", v.Type()) + } + v.SetString(a.Value(i)) + + case arrow.LARGE_STRING: + a, ok := arr.(*array.LargeString) + if !ok { + return fmt.Errorf("arrow/reflect: expected *LargeString, got %T", arr) + } + if v.Kind() != reflect.String { + return fmt.Errorf("arrow/reflect: cannot set string into %s", v.Type()) + } + v.SetString(a.Value(i)) + + case arrow.BINARY: + a, ok := arr.(*array.Binary) + if !ok { + return fmt.Errorf("arrow/reflect: expected *Binary, got %T", arr) + } + if v.Kind() != reflect.Slice || v.Type().Elem().Kind() != reflect.Uint8 { + return fmt.Errorf("arrow/reflect: cannot set []byte into %s", v.Type()) + } + v.SetBytes(a.Value(i)) + + case arrow.LARGE_BINARY: + a, ok := arr.(*array.LargeBinary) + if !ok { + return fmt.Errorf("arrow/reflect: expected *LargeBinary, got %T", arr) + } + if v.Kind() != reflect.Slice || v.Type().Elem().Kind() != reflect.Uint8 { + return fmt.Errorf("arrow/reflect: cannot set []byte into %s", v.Type()) + } + v.SetBytes(a.Value(i)) + + case arrow.TIMESTAMP, arrow.DATE32, arrow.DATE64, + arrow.TIME32, arrow.TIME64, arrow.DURATION: + return setTemporalValue(v, arr, i) + + case arrow.DECIMAL128, arrow.DECIMAL256, arrow.DECIMAL32, arrow.DECIMAL64: + return setDecimalValue(v, arr, i) + + case arrow.STRUCT: + a, ok := arr.(*array.Struct) + if !ok { + return fmt.Errorf("arrow/reflect: expected *Struct, got %T", arr) + } + return setStructValue(v, a, i) + + case arrow.LIST, arrow.LARGE_LIST, arrow.LIST_VIEW, arrow.LARGE_LIST_VIEW: + a, ok := arr.(array.ListLike) + if !ok { + return fmt.Errorf("arrow/reflect: expected ListLike, got %T", arr) + } + return setListValue(v, a, i) + + case arrow.MAP: + a, ok := arr.(*array.Map) + if !ok { + return fmt.Errorf("arrow/reflect: expected *Map, got %T", arr) + } + return setMapValue(v, a, i) + + case arrow.FIXED_SIZE_LIST: + a, ok := arr.(*array.FixedSizeList) + if !ok { + return fmt.Errorf("arrow/reflect: expected *FixedSizeList, got %T", arr) + } + return setFixedSizeListValue(v, a, i) + + case arrow.DICTIONARY: + a, ok := arr.(*array.Dictionary) + if !ok { + return fmt.Errorf("arrow/reflect: expected *Dictionary, got %T", arr) + } + return setDictionaryValue(v, a, i) + + case arrow.RUN_END_ENCODED: + a, ok := arr.(*array.RunEndEncoded) + if !ok { + return fmt.Errorf("arrow/reflect: expected *RunEndEncoded, got %T", arr) + } + return setRunEndEncodedValue(v, a, i) + + default: + return fmt.Errorf("arrow/reflect: unsupported Arrow type %v for reflection", arr.DataType()) + } + return nil +} + +func setPrimitiveValue(v reflect.Value, arr arrow.Array, i int) error { + if v.Kind() == reflect.Ptr { + v.Set(reflect.New(v.Type().Elem())) + v = v.Elem() + } + + switch arr.DataType().ID() { + case arrow.INT8: + if v.Kind() != reflect.Int && v.Kind() != reflect.Int8 && v.Kind() != reflect.Int16 && + v.Kind() != reflect.Int32 && v.Kind() != reflect.Int64 { + return fmt.Errorf("arrow/reflect: cannot set int8 into %s", v.Type()) + } + v.SetInt(int64(arr.(*array.Int8).Value(i))) + case arrow.INT16: + if v.Kind() != reflect.Int && v.Kind() != reflect.Int8 && v.Kind() != reflect.Int16 && + v.Kind() != reflect.Int32 && v.Kind() != reflect.Int64 { + return fmt.Errorf("arrow/reflect: cannot set int16 into %s", v.Type()) + } + v.SetInt(int64(arr.(*array.Int16).Value(i))) + case arrow.INT32: + if v.Kind() != reflect.Int && v.Kind() != reflect.Int8 && v.Kind() != reflect.Int16 && + v.Kind() != reflect.Int32 && v.Kind() != reflect.Int64 { + return fmt.Errorf("arrow/reflect: cannot set int32 into %s", v.Type()) + } + v.SetInt(int64(arr.(*array.Int32).Value(i))) + case arrow.INT64: + if v.Kind() != reflect.Int && v.Kind() != reflect.Int8 && v.Kind() != reflect.Int16 && + v.Kind() != reflect.Int32 && v.Kind() != reflect.Int64 { + return fmt.Errorf("arrow/reflect: cannot set int64 into %s", v.Type()) + } + v.SetInt(arr.(*array.Int64).Value(i)) + case arrow.UINT8: + if v.Kind() != reflect.Uint && v.Kind() != reflect.Uint8 && v.Kind() != reflect.Uint16 && + v.Kind() != reflect.Uint32 && v.Kind() != reflect.Uint64 && v.Kind() != reflect.Uintptr { + return fmt.Errorf("arrow/reflect: cannot set uint8 into %s", v.Type()) + } + v.SetUint(uint64(arr.(*array.Uint8).Value(i))) + case arrow.UINT16: + if v.Kind() != reflect.Uint && v.Kind() != reflect.Uint8 && v.Kind() != reflect.Uint16 && + v.Kind() != reflect.Uint32 && v.Kind() != reflect.Uint64 && v.Kind() != reflect.Uintptr { + return fmt.Errorf("arrow/reflect: cannot set uint16 into %s", v.Type()) + } + v.SetUint(uint64(arr.(*array.Uint16).Value(i))) + case arrow.UINT32: + if v.Kind() != reflect.Uint && v.Kind() != reflect.Uint8 && v.Kind() != reflect.Uint16 && + v.Kind() != reflect.Uint32 && v.Kind() != reflect.Uint64 && v.Kind() != reflect.Uintptr { + return fmt.Errorf("arrow/reflect: cannot set uint32 into %s", v.Type()) + } + v.SetUint(uint64(arr.(*array.Uint32).Value(i))) + case arrow.UINT64: + if v.Kind() != reflect.Uint && v.Kind() != reflect.Uint8 && v.Kind() != reflect.Uint16 && + v.Kind() != reflect.Uint32 && v.Kind() != reflect.Uint64 && v.Kind() != reflect.Uintptr { + return fmt.Errorf("arrow/reflect: cannot set uint64 into %s", v.Type()) + } + v.SetUint(arr.(*array.Uint64).Value(i)) + case arrow.FLOAT32: + if v.Kind() != reflect.Float32 && v.Kind() != reflect.Float64 { + return fmt.Errorf("arrow/reflect: cannot set float32 into %s", v.Type()) + } + v.SetFloat(float64(arr.(*array.Float32).Value(i))) + case arrow.FLOAT64: + if v.Kind() != reflect.Float32 && v.Kind() != reflect.Float64 { + return fmt.Errorf("arrow/reflect: cannot set float64 into %s", v.Type()) + } + v.SetFloat(arr.(*array.Float64).Value(i)) + default: + return fmt.Errorf("arrow/reflect: unsupported primitive type %v", arr.DataType()) + } + return nil +} + +func setTemporalValue(v reflect.Value, arr arrow.Array, i int) error { + if v.Kind() == reflect.Ptr { + v.Set(reflect.New(v.Type().Elem())) + v = v.Elem() + } + + switch arr.DataType().ID() { + case arrow.TIMESTAMP: + a, ok := arr.(*array.Timestamp) + if !ok { + return fmt.Errorf("arrow/reflect: expected *Timestamp, got %T", arr) + } + if v.Type() != typeOfTime { + return fmt.Errorf("arrow/reflect: cannot set time.Time into %s", v.Type()) + } + unit := arr.DataType().(*arrow.TimestampType).Unit + t := a.Value(i).ToTime(unit) + v.Set(reflect.ValueOf(t)) + + case arrow.DATE32: + a, ok := arr.(*array.Date32) + if !ok { + return fmt.Errorf("arrow/reflect: expected *Date32, got %T", arr) + } + if v.Type() != typeOfTime { + return fmt.Errorf("arrow/reflect: cannot set time.Time into %s", v.Type()) + } + t := a.Value(i).ToTime() + v.Set(reflect.ValueOf(t)) + + case arrow.DATE64: + a, ok := arr.(*array.Date64) + if !ok { + return fmt.Errorf("arrow/reflect: expected *Date64, got %T", arr) + } + if v.Type() != typeOfTime { + return fmt.Errorf("arrow/reflect: cannot set time.Time into %s", v.Type()) + } + t := a.Value(i).ToTime() + v.Set(reflect.ValueOf(t)) + + case arrow.TIME32: + a, ok := arr.(*array.Time32) + if !ok { + return fmt.Errorf("arrow/reflect: expected *Time32, got %T", arr) + } + if v.Type() != typeOfTime { + return fmt.Errorf("arrow/reflect: cannot set time.Time into %s", v.Type()) + } + unit := arr.DataType().(*arrow.Time32Type).Unit + t := a.Value(i).ToTime(unit) + v.Set(reflect.ValueOf(t)) + + case arrow.TIME64: + a, ok := arr.(*array.Time64) + if !ok { + return fmt.Errorf("arrow/reflect: expected *Time64, got %T", arr) + } + if v.Type() != typeOfTime { + return fmt.Errorf("arrow/reflect: cannot set time.Time into %s", v.Type()) + } + unit := arr.DataType().(*arrow.Time64Type).Unit + t := a.Value(i).ToTime(unit) + v.Set(reflect.ValueOf(t)) + + case arrow.DURATION: + a, ok := arr.(*array.Duration) + if !ok { + return fmt.Errorf("arrow/reflect: expected *Duration, got %T", arr) + } + if v.Type() != typeOfDuration { + return fmt.Errorf("arrow/reflect: cannot set time.Duration into %s", v.Type()) + } + unit := arr.DataType().(*arrow.DurationType).Unit + dur := time.Duration(a.Value(i)) * unit.Multiplier() + v.Set(reflect.ValueOf(dur)) + + default: + return fmt.Errorf("arrow/reflect: unsupported temporal type %v", arr.DataType()) + } + return nil +} + +func setDecimalValue(v reflect.Value, arr arrow.Array, i int) error { + if v.Kind() == reflect.Ptr { + v.Set(reflect.New(v.Type().Elem())) + v = v.Elem() + } + + switch arr.DataType().ID() { + case arrow.DECIMAL128: + a, ok := arr.(*array.Decimal128) + if !ok { + return fmt.Errorf("arrow/reflect: expected *Decimal128, got %T", arr) + } + if v.Type() != typeOfDec128 { + return fmt.Errorf("arrow/reflect: cannot set decimal128.Num into %s", v.Type()) + } + num := a.Value(i) + v.Set(reflect.ValueOf(num)) + + case arrow.DECIMAL256: + a, ok := arr.(*array.Decimal256) + if !ok { + return fmt.Errorf("arrow/reflect: expected *Decimal256, got %T", arr) + } + if v.Type() != typeOfDec256 { + return fmt.Errorf("arrow/reflect: cannot set decimal256.Num into %s", v.Type()) + } + num := a.Value(i) + v.Set(reflect.ValueOf(num)) + + case arrow.DECIMAL32: + a, ok := arr.(*array.Decimal32) + if !ok { + return fmt.Errorf("arrow/reflect: expected *Decimal32, got %T", arr) + } + if v.Type() != typeOfDec32 { + return fmt.Errorf("arrow/reflect: cannot set decimal.Decimal32 into %s", v.Type()) + } + v.Set(reflect.ValueOf(a.Value(i))) + + case arrow.DECIMAL64: + a, ok := arr.(*array.Decimal64) + if !ok { + return fmt.Errorf("arrow/reflect: expected *Decimal64, got %T", arr) + } + if v.Type() != typeOfDec64 { + return fmt.Errorf("arrow/reflect: cannot set decimal.Decimal64 into %s", v.Type()) + } + v.Set(reflect.ValueOf(a.Value(i))) + + default: + return fmt.Errorf("arrow/reflect: unsupported decimal type %v", arr.DataType()) + } + return nil +} + +func setStructValue(v reflect.Value, sa *array.Struct, i int) error { + if sa.IsNull(i) { + if v.Kind() == reflect.Ptr { + v.Set(reflect.Zero(v.Type())) + } + return nil + } + if v.Kind() == reflect.Ptr { + v.Set(reflect.New(v.Type().Elem())) + v = v.Elem() + } + + if v.Kind() != reflect.Struct { + return fmt.Errorf("arrow/reflect: cannot set struct into %s", v.Type()) + } + + fields := cachedStructFields(v.Type()) + st := sa.DataType().(*arrow.StructType) + + for _, fm := range fields { + arrowIdx, found := st.FieldIdx(fm.Name) + if !found { + continue + } + if err := setValue(v.FieldByIndex(fm.Index), sa.Field(arrowIdx), i); err != nil { + return fmt.Errorf("arrow/reflect: field %q: %w", fm.Name, err) + } + } + return nil +} + +func setListValue(v reflect.Value, arr array.ListLike, i int) error { + if arr.IsNull(i) { + if v.Kind() == reflect.Ptr { + v.Set(reflect.Zero(v.Type())) + return nil + } + if v.Kind() == reflect.Slice { + v.Set(reflect.MakeSlice(v.Type(), 0, 0)) + } + return nil + } + if v.Kind() == reflect.Ptr { + v.Set(reflect.New(v.Type().Elem())) + v = v.Elem() + } + + if v.Kind() != reflect.Slice { + return fmt.Errorf("arrow/reflect: cannot set list into %s", v.Type()) + } + + start, end := arr.ValueOffsets(i) + child := arr.ListValues() + length := int(end - start) + + result := reflect.MakeSlice(v.Type(), length, length) + for j := 0; j < length; j++ { + if err := setValue(result.Index(j), child, int(start)+j); err != nil { + return fmt.Errorf("arrow/reflect: list element %d: %w", j, err) + } + } + v.Set(result) + return nil +} + +func setMapValue(v reflect.Value, arr *array.Map, i int) error { + if arr.IsNull(i) { + if v.Kind() == reflect.Ptr { + v.Set(reflect.Zero(v.Type())) + } + return nil + } + if v.Kind() == reflect.Ptr { + v.Set(reflect.New(v.Type().Elem())) + v = v.Elem() + } + + if v.Kind() != reflect.Map { + return fmt.Errorf("arrow/reflect: cannot set map into %s", v.Type()) + } + + start, end := arr.ValueOffsets(i) + keys := arr.Keys() + items := arr.Items() + keyType := v.Type().Key() + elemType := v.Type().Elem() + + result := reflect.MakeMap(v.Type()) + for j := int(start); j < int(end); j++ { + keyVal := reflect.New(keyType).Elem() + if err := setValue(keyVal, keys, j); err != nil { + return fmt.Errorf("arrow/reflect: map key %d: %w", j-int(start), err) + } + elemVal := reflect.New(elemType).Elem() + if err := setValue(elemVal, items, j); err != nil { + return fmt.Errorf("arrow/reflect: map value %d: %w", j-int(start), err) + } + result.SetMapIndex(keyVal, elemVal) + } + v.Set(result) + return nil +} + +func setFixedSizeListValue(v reflect.Value, arr *array.FixedSizeList, i int) error { + if arr.IsNull(i) { + if v.Kind() == reflect.Ptr { + v.Set(reflect.Zero(v.Type())) + } + return nil + } + if v.Kind() == reflect.Ptr { + v.Set(reflect.New(v.Type().Elem())) + v = v.Elem() + } + + n := int(arr.DataType().(*arrow.FixedSizeListType).Len()) + child := arr.ListValues() + start, _ := arr.ValueOffsets(i) + + switch v.Kind() { + case reflect.Array: + if v.Len() != n { + return fmt.Errorf("arrow/reflect: fixed-size list length %d does not match Go array length %d", n, v.Len()) + } + for k := 0; k < n; k++ { + if err := setValue(v.Index(k), child, int(start)+k); err != nil { + return fmt.Errorf("arrow/reflect: fixed-size list element %d: %w", k, err) + } + } + case reflect.Slice: + result := reflect.MakeSlice(v.Type(), n, n) + for k := 0; k < n; k++ { + if err := setValue(result.Index(k), child, int(start)+k); err != nil { + return fmt.Errorf("arrow/reflect: fixed-size list element %d: %w", k, err) + } + } + v.Set(result) + default: + return fmt.Errorf("arrow/reflect: cannot set fixed-size list into %s", v.Type()) + } + return nil +} + +func setDictionaryValue(v reflect.Value, arr *array.Dictionary, i int) error { + if arr.IsNull(i) { + if v.Kind() == reflect.Ptr { + v.Set(reflect.Zero(v.Type())) + } + return nil + } + return setValue(v, arr.Dictionary(), arr.GetValueIndex(i)) +} + +func setRunEndEncodedValue(v reflect.Value, arr *array.RunEndEncoded, i int) error { + if arr.IsNull(i) { + if v.Kind() == reflect.Ptr { + v.Set(reflect.Zero(v.Type())) + } + return nil + } + return setValue(v, arr.Values(), arr.GetPhysicalIndex(i)) +} diff --git a/arrow/arreflect/reflect_arrow_to_go_test.go b/arrow/arreflect/reflect_arrow_to_go_test.go new file mode 100644 index 00000000..f7886e2b --- /dev/null +++ b/arrow/arreflect/reflect_arrow_to_go_test.go @@ -0,0 +1,815 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package arreflect + +import ( + "reflect" + "testing" + "time" + + "github.com/apache/arrow-go/v18/arrow" + "github.com/apache/arrow-go/v18/arrow/array" + "github.com/apache/arrow-go/v18/arrow/decimal" + "github.com/apache/arrow-go/v18/arrow/decimal128" + "github.com/apache/arrow-go/v18/arrow/decimal256" + "github.com/apache/arrow-go/v18/arrow/memory" +) + +func TestSetValue(t *testing.T) { + mem := memory.NewGoAllocator() + + t.Run("bool", func(t *testing.T) { + b := array.NewBooleanBuilder(mem) + defer b.Release() + b.Append(true) + b.AppendNull() + arr := b.NewBooleanArray() + defer arr.Release() + + var got bool + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { + t.Fatal(err) + } + if !got { + t.Errorf("expected true, got false") + } + + got = true + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 1); err != nil { + t.Fatal(err) + } + if got { + t.Errorf("expected false (null → zero), got true") + } + }) + + t.Run("string", func(t *testing.T) { + b := array.NewStringBuilder(mem) + defer b.Release() + b.Append("hello") + arr := b.NewStringArray() + defer arr.Release() + + var got string + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { + t.Fatal(err) + } + if got != "hello" { + t.Errorf("expected hello, got %q", got) + } + }) + + t.Run("binary", func(t *testing.T) { + b := array.NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) + defer b.Release() + b.Append([]byte("data")) + arr := b.NewBinaryArray() + defer arr.Release() + + var got []byte + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { + t.Fatal(err) + } + if string(got) != "data" { + t.Errorf("expected data, got %q", got) + } + }) + + t.Run("unsupported type error", func(t *testing.T) { + b := array.NewBooleanBuilder(mem) + defer b.Release() + b.Append(true) + arr := b.NewBooleanArray() + defer arr.Release() + + var got int32 + err := setValue(reflect.ValueOf(&got).Elem(), arr, 0) + if err == nil { + t.Error("expected error for bool→int32 mismatch") + } + }) + + t.Run("pointer allocation", func(t *testing.T) { + b := array.NewStringBuilder(mem) + defer b.Release() + b.Append("ptr") + b.AppendNull() + arr := b.NewStringArray() + defer arr.Release() + + var got *string + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { + t.Fatal(err) + } + if got == nil || *got != "ptr" { + t.Errorf("expected ptr, got %v", got) + } + + got = new(string) + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 1); err != nil { + t.Fatal(err) + } + if got != nil { + t.Errorf("expected nil for null, got %v", got) + } + }) +} + +func TestSetPrimitiveValue(t *testing.T) { + mem := memory.NewGoAllocator() + + t.Run("int32", func(t *testing.T) { + b := array.NewInt32Builder(mem) + defer b.Release() + b.Append(42) + b.AppendNull() + arr := b.NewInt32Array() + defer arr.Release() + + var got int32 + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { + t.Fatal(err) + } + if got != 42 { + t.Errorf("expected 42, got %d", got) + } + + got = 99 + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 1); err != nil { + t.Fatal(err) + } + if got != 0 { + t.Errorf("expected 0 for null, got %d", got) + } + }) + + t.Run("int64", func(t *testing.T) { + b := array.NewInt64Builder(mem) + defer b.Release() + b.Append(int64(1 << 40)) + arr := b.NewInt64Array() + defer arr.Release() + + var got int64 + if err := setPrimitiveValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { + t.Fatal(err) + } + if got != int64(1<<40) { + t.Errorf("expected large int64, got %d", got) + } + }) + + t.Run("uint8", func(t *testing.T) { + b := array.NewUint8Builder(mem) + defer b.Release() + b.Append(255) + arr := b.NewUint8Array() + defer arr.Release() + + var got uint8 + if err := setPrimitiveValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { + t.Fatal(err) + } + if got != 255 { + t.Errorf("expected 255, got %d", got) + } + }) + + t.Run("float64", func(t *testing.T) { + b := array.NewFloat64Builder(mem) + defer b.Release() + b.Append(3.14) + arr := b.NewFloat64Array() + defer arr.Release() + + var got float64 + if err := setPrimitiveValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { + t.Fatal(err) + } + if got != 3.14 { + t.Errorf("expected 3.14, got %f", got) + } + }) + + t.Run("type mismatch returns error", func(t *testing.T) { + b := array.NewInt32Builder(mem) + defer b.Release() + b.Append(10) + arr := b.NewInt32Array() + defer arr.Release() + + var got float64 + err := setPrimitiveValue(reflect.ValueOf(&got).Elem(), arr, 0) + if err == nil { + t.Error("expected error for int32→float64 mismatch") + } + }) +} + +func TestSetTemporalValue(t *testing.T) { + mem := memory.NewGoAllocator() + + t.Run("timestamp", func(t *testing.T) { + dt := &arrow.TimestampType{Unit: arrow.Second} + b := array.NewTimestampBuilder(mem, dt) + defer b.Release() + now := time.Unix(1700000000, 0).UTC() + b.Append(arrow.Timestamp(now.Unix())) + arr := b.NewArray().(*array.Timestamp) + defer arr.Release() + + var got time.Time + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { + t.Fatal(err) + } + if !got.Equal(now) { + t.Errorf("expected %v, got %v", now, got) + } + }) + + t.Run("date32", func(t *testing.T) { + b := array.NewDate32Builder(mem) + defer b.Release() + b.Append(arrow.Date32(19000)) + arr := b.NewArray().(*array.Date32) + defer arr.Release() + + var got time.Time + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { + t.Fatal(err) + } + expected := arrow.Date32(19000).ToTime() + if !got.Equal(expected) { + t.Errorf("expected %v, got %v", expected, got) + } + }) + + t.Run("duration", func(t *testing.T) { + dt := &arrow.DurationType{Unit: arrow.Second} + b := array.NewDurationBuilder(mem, dt) + defer b.Release() + b.Append(arrow.Duration(5)) + arr := b.NewArray().(*array.Duration) + defer arr.Release() + + var got time.Duration + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { + t.Fatal(err) + } + expected := 5 * time.Second + if got != expected { + t.Errorf("expected %v, got %v", expected, got) + } + }) + + t.Run("null temporal", func(t *testing.T) { + dt := &arrow.TimestampType{Unit: arrow.Second} + b := array.NewTimestampBuilder(mem, dt) + defer b.Release() + b.AppendNull() + arr := b.NewArray().(*array.Timestamp) + defer arr.Release() + + var got *time.Time + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { + t.Fatal(err) + } + if got != nil { + t.Errorf("expected nil for null timestamp pointer") + } + }) +} + +func TestSetDecimalValue(t *testing.T) { + mem := memory.NewGoAllocator() + + t.Run("decimal128", func(t *testing.T) { + dt := &arrow.Decimal128Type{Precision: 10, Scale: 2} + b := array.NewDecimal128Builder(mem, dt) + defer b.Release() + num := decimal128.New(0, 12345) + b.Append(num) + b.AppendNull() + arr := b.NewDecimal128Array() + defer arr.Release() + + var got decimal128.Num + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { + t.Fatal(err) + } + if got != num { + t.Errorf("expected %v, got %v", num, got) + } + + var gotPtr *decimal128.Num + if err := setValue(reflect.ValueOf(&gotPtr).Elem(), arr, 1); err != nil { + t.Fatal(err) + } + if gotPtr != nil { + t.Errorf("expected nil for null decimal128") + } + }) + + t.Run("decimal256", func(t *testing.T) { + dt := &arrow.Decimal256Type{Precision: 20, Scale: 4} + b := array.NewDecimal256Builder(mem, dt) + defer b.Release() + num := decimal256.New(0, 0, 0, 9876) + b.Append(num) + arr := b.NewDecimal256Array() + defer arr.Release() + + var got decimal256.Num + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { + t.Fatal(err) + } + if got != num { + t.Errorf("expected %v, got %v", num, got) + } + }) + + t.Run("decimal32", func(t *testing.T) { + dt := &arrow.Decimal32Type{Precision: 9, Scale: 2} + b := array.NewDecimal32Builder(mem, dt) + defer b.Release() + num := decimal.Decimal32(12345) + b.Append(num) + b.AppendNull() + arr := b.NewArray().(*array.Decimal32) + defer arr.Release() + + var got decimal.Decimal32 + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { + t.Fatal(err) + } + if got != num { + t.Errorf("expected %v, got %v", num, got) + } + + var gotPtr *decimal.Decimal32 + if err := setValue(reflect.ValueOf(&gotPtr).Elem(), arr, 1); err != nil { + t.Fatal(err) + } + if gotPtr != nil { + t.Errorf("expected nil for null decimal32") + } + }) + + t.Run("decimal64", func(t *testing.T) { + dt := &arrow.Decimal64Type{Precision: 18, Scale: 3} + b := array.NewDecimal64Builder(mem, dt) + defer b.Release() + num := decimal.Decimal64(987654321) + b.Append(num) + arr := b.NewArray().(*array.Decimal64) + defer arr.Release() + + var got decimal.Decimal64 + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { + t.Fatal(err) + } + if got != num { + t.Errorf("expected %v, got %v", num, got) + } + }) +} + +func TestSetStructValue(t *testing.T) { + mem := memory.NewGoAllocator() + + t.Run("basic struct", func(t *testing.T) { + nameArr := func() *array.String { + b := array.NewStringBuilder(mem) + defer b.Release() + b.Append("Alice") + b.Append("Bob") + return b.NewStringArray() + }() + defer nameArr.Release() + + ageArr := func() *array.Int32 { + b := array.NewInt32Builder(mem) + defer b.Release() + b.Append(30) + b.Append(25) + return b.NewInt32Array() + }() + defer ageArr.Release() + + sa, err := array.NewStructArray( + []arrow.Array{nameArr, ageArr}, + []string{"Name", "Age"}, + ) + if err != nil { + t.Fatal(err) + } + defer sa.Release() + + type Person struct { + Name string + Age int32 + } + + var got Person + if err := setValue(reflect.ValueOf(&got).Elem(), sa, 0); err != nil { + t.Fatal(err) + } + if got.Name != "Alice" || got.Age != 30 { + t.Errorf("expected Alice/30, got %+v", got) + } + + if err := setValue(reflect.ValueOf(&got).Elem(), sa, 1); err != nil { + t.Fatal(err) + } + if got.Name != "Bob" || got.Age != 25 { + t.Errorf("expected Bob/25, got %+v", got) + } + }) + + t.Run("arrow tag mapping", func(t *testing.T) { + nameArr := func() *array.String { + b := array.NewStringBuilder(mem) + defer b.Release() + b.Append("Charlie") + return b.NewStringArray() + }() + defer nameArr.Release() + + sa, err := array.NewStructArray( + []arrow.Array{nameArr}, + []string{"full_name"}, + ) + if err != nil { + t.Fatal(err) + } + defer sa.Release() + + type TaggedPerson struct { + FullName string `arrow:"full_name"` + } + + var got TaggedPerson + if err := setValue(reflect.ValueOf(&got).Elem(), sa, 0); err != nil { + t.Fatal(err) + } + if got.FullName != "Charlie" { + t.Errorf("expected Charlie, got %q", got.FullName) + } + }) + + t.Run("missing arrow field leaves go field zero", func(t *testing.T) { + nameArr := func() *array.String { + b := array.NewStringBuilder(mem) + defer b.Release() + b.Append("Dave") + return b.NewStringArray() + }() + defer nameArr.Release() + + sa, err := array.NewStructArray( + []arrow.Array{nameArr}, + []string{"Name"}, + ) + if err != nil { + t.Fatal(err) + } + defer sa.Release() + + type PersonWithExtra struct { + Name string + Email string + } + + var got PersonWithExtra + if err := setValue(reflect.ValueOf(&got).Elem(), sa, 0); err != nil { + t.Fatal(err) + } + if got.Name != "Dave" { + t.Errorf("expected Dave, got %q", got.Name) + } + if got.Email != "" { + t.Errorf("expected empty Email, got %q", got.Email) + } + }) +} + +func TestSetListValue(t *testing.T) { + mem := memory.NewGoAllocator() + + t.Run("list of int32", func(t *testing.T) { + vb := array.NewInt32Builder(mem) + lb := array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32) + defer lb.Release() + + vb = lb.ValueBuilder().(*array.Int32Builder) + lb.Append(true) + vb.AppendValues([]int32{1, 2, 3}, nil) + lb.Append(true) + vb.AppendValues([]int32{4, 5}, nil) + lb.AppendNull() + + arr := lb.NewListArray() + defer arr.Release() + + var got []int32 + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(got, []int32{1, 2, 3}) { + t.Errorf("expected [1,2,3], got %v", got) + } + + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 1); err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(got, []int32{4, 5}) { + t.Errorf("expected [4,5], got %v", got) + } + + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 2); err != nil { + t.Fatal(err) + } + if got != nil { + t.Errorf("expected nil slice for null list, got %v", got) + } + }) + + t.Run("nested list of lists", func(t *testing.T) { + inner := array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32) + defer inner.Release() + outer := array.NewListBuilder(mem, arrow.ListOf(arrow.PrimitiveTypes.Int32)) + defer outer.Release() + + innerVB := inner.ValueBuilder().(*array.Int32Builder) + + inner.Append(true) + innerVB.AppendValues([]int32{1, 2}, nil) + inner.Append(true) + innerVB.AppendValues([]int32{3}, nil) + innerArr := inner.NewListArray() + defer innerArr.Release() + + outerVB := outer.ValueBuilder().(*array.ListBuilder) + outerInnerVB := outerVB.ValueBuilder().(*array.Int32Builder) + outer.Append(true) + outerVB.Append(true) + outerInnerVB.AppendValues([]int32{10, 20}, nil) + outerVB.Append(true) + outerInnerVB.AppendValues([]int32{30}, nil) + + outerArr := outer.NewListArray() + defer outerArr.Release() + + var got [][]int32 + if err := setValue(reflect.ValueOf(&got).Elem(), outerArr, 0); err != nil { + t.Fatal(err) + } + if len(got) != 2 { + t.Fatalf("expected 2 inner slices, got %d", len(got)) + } + if !reflect.DeepEqual(got[0], []int32{10, 20}) { + t.Errorf("expected [10,20], got %v", got[0]) + } + if !reflect.DeepEqual(got[1], []int32{30}) { + t.Errorf("expected [30], got %v", got[1]) + } + }) +} + +func TestSetMapValue(t *testing.T) { + mem := memory.NewGoAllocator() + + t.Run("map string to int32", func(t *testing.T) { + mb := array.NewMapBuilder(mem, arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32, false) + defer mb.Release() + + kb := mb.KeyBuilder().(*array.StringBuilder) + ib := mb.ItemBuilder().(*array.Int32Builder) + + mb.Append(true) + kb.Append("a") + ib.Append(1) + kb.Append("b") + ib.Append(2) + + mb.Append(true) + kb.Append("x") + ib.Append(10) + + mb.AppendNull() + + arr := mb.NewMapArray() + defer arr.Release() + + var got map[string]int32 + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { + t.Fatal(err) + } + if got["a"] != 1 || got["b"] != 2 { + t.Errorf("expected {a:1, b:2}, got %v", got) + } + + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 1); err != nil { + t.Fatal(err) + } + if got["x"] != 10 { + t.Errorf("expected {x:10}, got %v", got) + } + + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 2); err != nil { + t.Fatal(err) + } + if got != nil { + t.Errorf("expected nil map for null, got %v", got) + } + }) +} + +func TestSetFixedSizeListValue(t *testing.T) { + mem := memory.NewGoAllocator() + + t.Run("go array", func(t *testing.T) { + b := array.NewFixedSizeListBuilder(mem, 3, arrow.PrimitiveTypes.Int32) + defer b.Release() + vb := b.ValueBuilder().(*array.Int32Builder) + + b.Append(true) + vb.AppendValues([]int32{10, 20, 30}, nil) + b.Append(true) + vb.AppendValues([]int32{40, 50, 60}, nil) + b.AppendNull() + + arr := b.NewArray().(*array.FixedSizeList) + defer arr.Release() + + var got [3]int32 + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { + t.Fatal(err) + } + if got != [3]int32{10, 20, 30} { + t.Errorf("expected [10,20,30], got %v", got) + } + + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 1); err != nil { + t.Fatal(err) + } + if got != [3]int32{40, 50, 60} { + t.Errorf("expected [40,50,60], got %v", got) + } + + got = [3]int32{1, 2, 3} + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 2); err != nil { + t.Fatal(err) + } + if got != ([3]int32{}) { + t.Errorf("expected zero array for null, got %v", got) + } + }) + + t.Run("go slice", func(t *testing.T) { + b := array.NewFixedSizeListBuilder(mem, 2, arrow.PrimitiveTypes.Int32) + defer b.Release() + vb := b.ValueBuilder().(*array.Int32Builder) + + b.Append(true) + vb.AppendValues([]int32{7, 8}, nil) + + arr := b.NewArray().(*array.FixedSizeList) + defer arr.Release() + + var got []int32 + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(got, []int32{7, 8}) { + t.Errorf("expected [7,8], got %v", got) + } + }) + + t.Run("size mismatch returns error", func(t *testing.T) { + b := array.NewFixedSizeListBuilder(mem, 3, arrow.PrimitiveTypes.Int32) + defer b.Release() + vb := b.ValueBuilder().(*array.Int32Builder) + b.Append(true) + vb.AppendValues([]int32{1, 2, 3}, nil) + + arr := b.NewArray().(*array.FixedSizeList) + defer arr.Release() + + var got [2]int32 + err := setValue(reflect.ValueOf(&got).Elem(), arr, 0) + if err == nil { + t.Error("expected error for size mismatch") + } + }) +} + +func TestSetDictionaryValue(t *testing.T) { + mem := memory.NewGoAllocator() + + t.Run("dictionary int8 to string", func(t *testing.T) { + dt := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.BinaryTypes.String} + bldr := array.NewDictionaryBuilder(mem, dt) + defer bldr.Release() + db := bldr.(*array.BinaryDictionaryBuilder) + + db.AppendString("foo") + db.AppendString("bar") + db.AppendString("foo") + db.AppendNull() + + arr := bldr.NewDictionaryArray() + defer arr.Release() + + var got string + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { + t.Fatal(err) + } + if got != "foo" { + t.Errorf("expected foo, got %q", got) + } + + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 1); err != nil { + t.Fatal(err) + } + if got != "bar" { + t.Errorf("expected bar, got %q", got) + } + + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 2); err != nil { + t.Fatal(err) + } + if got != "foo" { + t.Errorf("expected foo, got %q", got) + } + + var gotPtr *string + if err := setValue(reflect.ValueOf(&gotPtr).Elem(), arr, 3); err != nil { + t.Fatal(err) + } + if gotPtr != nil { + t.Errorf("expected nil for null dictionary entry") + } + }) +} + +func TestSetRunEndEncodedValue(t *testing.T) { + mem := memory.NewGoAllocator() + + t.Run("ree int32 to string", func(t *testing.T) { + b := array.NewRunEndEncodedBuilder(mem, arrow.PrimitiveTypes.Int32, arrow.BinaryTypes.String) + defer b.Release() + vb := b.ValueBuilder().(*array.StringBuilder) + + b.Append(3) + vb.Append("aaa") + b.Append(2) + vb.Append("bbb") + + arr := b.NewRunEndEncodedArray() + defer arr.Release() + + var got string + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { + t.Fatal(err) + } + if got != "aaa" { + t.Errorf("expected aaa at logical 0, got %q", got) + } + + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 2); err != nil { + t.Fatal(err) + } + if got != "aaa" { + t.Errorf("expected aaa at logical 2, got %q", got) + } + + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 3); err != nil { + t.Fatal(err) + } + if got != "bbb" { + t.Errorf("expected bbb at logical 3, got %q", got) + } + + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 4); err != nil { + t.Fatal(err) + } + if got != "bbb" { + t.Errorf("expected bbb at logical 4, got %q", got) + } + }) +} diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go new file mode 100644 index 00000000..b4378f11 --- /dev/null +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -0,0 +1,813 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package arreflect + +import ( + "fmt" + "reflect" + "time" + + "github.com/apache/arrow-go/v18/arrow" + "github.com/apache/arrow-go/v18/arrow/array" + "github.com/apache/arrow-go/v18/arrow/decimal" + "github.com/apache/arrow-go/v18/arrow/decimal128" + "github.com/apache/arrow-go/v18/arrow/decimal256" + "github.com/apache/arrow-go/v18/arrow/memory" +) + +func buildArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.Array, error) { + if vals.Kind() != reflect.Slice { + return nil, fmt.Errorf("buildArray: expected slice, got %v", vals.Kind()) + } + + elemType := vals.Type().Elem() + for elemType.Kind() == reflect.Ptr { + elemType = elemType.Elem() + } + + if opts.Dict { + return buildDictionaryArray(vals, mem) + } + if opts.REE { + return buildRunEndEncodedArray(vals, mem) + } + if opts.ListView { + return buildListViewArray(vals, mem) + } + + switch elemType.Kind() { + case reflect.Slice: + if elemType == typeOfByteSlice { + return buildPrimitiveArray(vals, mem) + } + return buildListArray(vals, mem) + + case reflect.Array: + return buildFixedSizeListArray(vals, mem) + + case reflect.Map: + return buildMapArray(vals, mem) + + case reflect.Struct: + switch elemType { + case typeOfTime: + return buildTemporalArray(vals, mem) + case typeOfDuration: + return buildTemporalArray(vals, mem) + case typeOfDec128: + return buildDecimalArray(vals, opts, mem) + case typeOfDec256: + return buildDecimalArray(vals, opts, mem) + default: + return buildStructArray(vals, mem) + } + + default: + if elemType == typeOfDec32 || elemType == typeOfDec64 { + return buildDecimalArray(vals, opts, mem) + } + return buildPrimitiveArray(vals, mem) + } +} + +func buildPrimitiveArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { + elemType := vals.Type().Elem() + for elemType.Kind() == reflect.Ptr { + elemType = elemType.Elem() + } + + dt, err := inferArrowType(elemType) + if err != nil { + return nil, fmt.Errorf("buildPrimitiveArray: %w", err) + } + + b := array.NewBuilder(mem, dt) + defer b.Release() + b.Reserve(vals.Len()) + + isPtr := vals.Type().Elem().Kind() == reflect.Ptr + + for i := 0; i < vals.Len(); i++ { + v := vals.Index(i) + if isPtr { + if v.IsNil() { + b.AppendNull() + continue + } + v = v.Elem() + } + if err := appendPrimitiveValue(b, v, dt); err != nil { + return nil, err + } + } + + return b.NewArray(), nil +} + +func appendPrimitiveValue(b array.Builder, v reflect.Value, dt arrow.DataType) error { + switch dt.ID() { + case arrow.INT8: + b.(*array.Int8Builder).Append(int8(v.Int())) + case arrow.INT16: + b.(*array.Int16Builder).Append(int16(v.Int())) + case arrow.INT32: + b.(*array.Int32Builder).Append(int32(v.Int())) + case arrow.INT64: + b.(*array.Int64Builder).Append(int64(v.Int())) + case arrow.UINT8: + b.(*array.Uint8Builder).Append(uint8(v.Uint())) + case arrow.UINT16: + b.(*array.Uint16Builder).Append(uint16(v.Uint())) + case arrow.UINT32: + b.(*array.Uint32Builder).Append(uint32(v.Uint())) + case arrow.UINT64: + b.(*array.Uint64Builder).Append(uint64(v.Uint())) + case arrow.FLOAT32: + b.(*array.Float32Builder).Append(float32(v.Float())) + case arrow.FLOAT64: + b.(*array.Float64Builder).Append(float64(v.Float())) + case arrow.BOOL: + b.(*array.BooleanBuilder).Append(v.Bool()) + case arrow.STRING: + b.(*array.StringBuilder).Append(v.String()) + case arrow.BINARY: + b.(*array.BinaryBuilder).Append(v.Bytes()) + case arrow.TIMESTAMP: + t := v.Interface().(time.Time) + b.(*array.TimestampBuilder).Append(arrow.Timestamp(t.UnixNano())) + case arrow.DURATION: + d := v.Interface().(time.Duration) + b.(*array.DurationBuilder).Append(arrow.Duration(d.Nanoseconds())) + case arrow.DECIMAL128: + n := v.Interface().(decimal128.Num) + b.(*array.Decimal128Builder).Append(n) + case arrow.DECIMAL256: + n := v.Interface().(decimal256.Num) + b.(*array.Decimal256Builder).Append(n) + case arrow.DECIMAL32: + b.(*array.Decimal32Builder).Append(decimal.Decimal32(v.Int())) + case arrow.DECIMAL64: + b.(*array.Decimal64Builder).Append(decimal.Decimal64(v.Int())) + default: + return fmt.Errorf("appendPrimitiveValue: unsupported Arrow type %v", dt) + } + return nil +} + +func buildTemporalArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { + elemType := vals.Type().Elem() + for elemType.Kind() == reflect.Ptr { + elemType = elemType.Elem() + } + + isPtr := vals.Type().Elem().Kind() == reflect.Ptr + + switch elemType { + case typeOfTime: + dt := &arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "UTC"} + tb := array.NewTimestampBuilder(mem, dt) + defer tb.Release() + tb.Reserve(vals.Len()) + for i := 0; i < vals.Len(); i++ { + v := vals.Index(i) + if isPtr { + if v.IsNil() { + tb.AppendNull() + continue + } + v = v.Elem() + } + t := v.Interface().(time.Time) + tb.Append(arrow.Timestamp(t.UnixNano())) + } + return tb.NewArray(), nil + + case typeOfDuration: + dt := &arrow.DurationType{Unit: arrow.Nanosecond} + db := array.NewDurationBuilder(mem, dt) + defer db.Release() + db.Reserve(vals.Len()) + for i := 0; i < vals.Len(); i++ { + v := vals.Index(i) + if isPtr { + if v.IsNil() { + db.AppendNull() + continue + } + v = v.Elem() + } + d := v.Interface().(time.Duration) + db.Append(arrow.Duration(d.Nanoseconds())) + } + return db.NewArray(), nil + + default: + return nil, fmt.Errorf("buildTemporalArray: unsupported type %v", elemType) + } +} + +func buildDecimalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.Array, error) { + elemType := vals.Type().Elem() + for elemType.Kind() == reflect.Ptr { + elemType = elemType.Elem() + } + + isPtr := vals.Type().Elem().Kind() == reflect.Ptr + + switch elemType { + case typeOfDec128: + precision, scale := int32(38), int32(0) + if opts.HasDecimalOpts { + precision = opts.DecimalPrecision + scale = opts.DecimalScale + } + dt := &arrow.Decimal128Type{Precision: precision, Scale: scale} + b := array.NewDecimal128Builder(mem, dt) + defer b.Release() + b.Reserve(vals.Len()) + for i := 0; i < vals.Len(); i++ { + v := vals.Index(i) + if isPtr { + if v.IsNil() { + b.AppendNull() + continue + } + v = v.Elem() + } + n := v.Interface().(decimal128.Num) + b.Append(n) + } + return b.NewArray(), nil + + case typeOfDec256: + precision, scale := int32(76), int32(0) + if opts.HasDecimalOpts { + precision = opts.DecimalPrecision + scale = opts.DecimalScale + } + dt := &arrow.Decimal256Type{Precision: precision, Scale: scale} + b := array.NewDecimal256Builder(mem, dt) + defer b.Release() + b.Reserve(vals.Len()) + for i := 0; i < vals.Len(); i++ { + v := vals.Index(i) + if isPtr { + if v.IsNil() { + b.AppendNull() + continue + } + v = v.Elem() + } + n := v.Interface().(decimal256.Num) + b.Append(n) + } + return b.NewArray(), nil + + case typeOfDec32: + precision, scale := int32(9), int32(0) + if opts.HasDecimalOpts { + precision = opts.DecimalPrecision + scale = opts.DecimalScale + } + dt := &arrow.Decimal32Type{Precision: precision, Scale: scale} + b := array.NewDecimal32Builder(mem, dt) + defer b.Release() + b.Reserve(vals.Len()) + for i := 0; i < vals.Len(); i++ { + v := vals.Index(i) + if isPtr { + if v.IsNil() { + b.AppendNull() + continue + } + v = v.Elem() + } + b.Append(decimal.Decimal32(v.Int())) + } + return b.NewArray(), nil + + case typeOfDec64: + precision, scale := int32(18), int32(0) + if opts.HasDecimalOpts { + precision = opts.DecimalPrecision + scale = opts.DecimalScale + } + dt := &arrow.Decimal64Type{Precision: precision, Scale: scale} + b := array.NewDecimal64Builder(mem, dt) + defer b.Release() + b.Reserve(vals.Len()) + for i := 0; i < vals.Len(); i++ { + v := vals.Index(i) + if isPtr { + if v.IsNil() { + b.AppendNull() + continue + } + v = v.Elem() + } + b.Append(decimal.Decimal64(v.Int())) + } + return b.NewArray(), nil + + default: + return nil, fmt.Errorf("buildDecimalArray: unsupported type %v", elemType) + } +} + +func buildStructArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { + elemType := vals.Type().Elem() + isPtr := elemType.Kind() == reflect.Ptr + for elemType.Kind() == reflect.Ptr { + elemType = elemType.Elem() + } + + st, err := inferStructType(elemType) + if err != nil { + return nil, fmt.Errorf("buildStructArray: %w", err) + } + + fields := cachedStructFields(elemType) + sb := array.NewStructBuilder(mem, st) + defer sb.Release() + sb.Reserve(vals.Len()) + + for i := 0; i < vals.Len(); i++ { + v := vals.Index(i) + if isPtr { + if v.IsNil() { + sb.AppendNull() + continue + } + v = v.Elem() + } + sb.Append(true) + for fi, fm := range fields { + fv := v.FieldByIndex(fm.Index) + fb := sb.FieldBuilder(fi) + if err := appendValue(fb, fv, fm.Opts); err != nil { + return nil, fmt.Errorf("buildStructArray: field %q: %w", fm.Name, err) + } + } + } + + return sb.NewArray(), nil +} + +func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { + for v.Kind() == reflect.Ptr { + if v.IsNil() { + b.AppendNull() + return nil + } + v = v.Elem() + } + + switch tb := b.(type) { + case *array.Int8Builder: + tb.Append(int8(v.Int())) + case *array.Int16Builder: + tb.Append(int16(v.Int())) + case *array.Int32Builder: + tb.Append(int32(v.Int())) + case *array.Int64Builder: + tb.Append(int64(v.Int())) + case *array.Uint8Builder: + tb.Append(uint8(v.Uint())) + case *array.Uint16Builder: + tb.Append(uint16(v.Uint())) + case *array.Uint32Builder: + tb.Append(uint32(v.Uint())) + case *array.Uint64Builder: + tb.Append(uint64(v.Uint())) + case *array.Float32Builder: + tb.Append(float32(v.Float())) + case *array.Float64Builder: + tb.Append(float64(v.Float())) + case *array.BooleanBuilder: + tb.Append(v.Bool()) + case *array.StringBuilder: + tb.Append(v.String()) + case *array.BinaryBuilder: + if v.IsNil() { + tb.AppendNull() + } else { + tb.Append(v.Bytes()) + } + case *array.TimestampBuilder: + t := v.Interface().(time.Time) + tb.Append(arrow.Timestamp(t.UnixNano())) + case *array.DurationBuilder: + d := v.Interface().(time.Duration) + tb.Append(arrow.Duration(d.Nanoseconds())) + case *array.Decimal128Builder: + n := v.Interface().(decimal128.Num) + tb.Append(n) + case *array.Decimal256Builder: + n := v.Interface().(decimal256.Num) + tb.Append(n) + case *array.Decimal32Builder: + tb.Append(decimal.Decimal32(v.Int())) + case *array.Decimal64Builder: + tb.Append(decimal.Decimal64(v.Int())) + case *array.ListBuilder: + if v.Kind() == reflect.Slice && v.IsNil() { + tb.AppendNull() + } else { + tb.Append(true) + vb := tb.ValueBuilder() + for i := 0; i < v.Len(); i++ { + if err := appendValue(vb, v.Index(i), tagOpts{}); err != nil { + return err + } + } + } + case *array.FixedSizeListBuilder: + tb.Append(true) + vb := tb.ValueBuilder() + for i := 0; i < v.Len(); i++ { + if err := appendValue(vb, v.Index(i), tagOpts{}); err != nil { + return err + } + } + case *array.MapBuilder: + if v.IsNil() { + tb.AppendNull() + } else { + tb.Append(true) + kb := tb.KeyBuilder() + ib := tb.ItemBuilder() + for _, key := range v.MapKeys() { + if err := appendValue(kb, key, tagOpts{}); err != nil { + return err + } + if err := appendValue(ib, v.MapIndex(key), tagOpts{}); err != nil { + return err + } + } + } + case *array.StructBuilder: + elemType := v.Type() + fields := cachedStructFields(elemType) + tb.Append(true) + for fi, fm := range fields { + fv := v.FieldByIndex(fm.Index) + fb := tb.FieldBuilder(fi) + if err := appendValue(fb, fv, fm.Opts); err != nil { + return fmt.Errorf("appendValue: struct field %q: %w", fm.Name, err) + } + } + case *array.ListViewBuilder: + if v.Kind() == reflect.Slice && v.IsNil() { + tb.AppendNull() + } else { + tb.AppendWithSize(true, v.Len()) + vb := tb.ValueBuilder() + for i := 0; i < v.Len(); i++ { + if err := appendValue(vb, v.Index(i), tagOpts{}); err != nil { + return err + } + } + } + default: + if db, ok := b.(array.DictionaryBuilder); ok { + return appendToDictBuilder(db, v) + } + return fmt.Errorf("appendValue: unsupported builder type %T", b) + } + return nil +} + +func appendToDictBuilder(db array.DictionaryBuilder, v reflect.Value) error { + switch bdb := db.(type) { + case *array.BinaryDictionaryBuilder: + switch v.Kind() { + case reflect.String: + return bdb.AppendString(v.String()) + case reflect.Slice: + if v.IsNil() { + bdb.AppendNull() + return nil + } + return bdb.Append(v.Bytes()) + } + case *array.Int8DictionaryBuilder: + return bdb.Append(int8(v.Int())) + case *array.Int16DictionaryBuilder: + return bdb.Append(int16(v.Int())) + case *array.Int32DictionaryBuilder: + return bdb.Append(int32(v.Int())) + case *array.Int64DictionaryBuilder: + return bdb.Append(int64(v.Int())) + case *array.Uint8DictionaryBuilder: + return bdb.Append(uint8(v.Uint())) + case *array.Uint16DictionaryBuilder: + return bdb.Append(uint16(v.Uint())) + case *array.Uint32DictionaryBuilder: + return bdb.Append(uint32(v.Uint())) + case *array.Uint64DictionaryBuilder: + return bdb.Append(uint64(v.Uint())) + case *array.Float32DictionaryBuilder: + return bdb.Append(float32(v.Float())) + case *array.Float64DictionaryBuilder: + return bdb.Append(float64(v.Float())) + } + return fmt.Errorf("appendToDictBuilder: unsupported builder type %T", db) +} + +func buildListArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { + innerSliceType := vals.Type().Elem() + isOuterPtr := innerSliceType.Kind() == reflect.Ptr + for innerSliceType.Kind() == reflect.Ptr { + innerSliceType = innerSliceType.Elem() + } + + innerElemType := innerSliceType.Elem() + for innerElemType.Kind() == reflect.Ptr { + innerElemType = innerElemType.Elem() + } + + elemDT, err := inferArrowType(innerElemType) + if err != nil { + return nil, fmt.Errorf("buildListArray: %w", err) + } + + lb := array.NewListBuilder(mem, elemDT) + defer lb.Release() + + vb := lb.ValueBuilder() + + for i := 0; i < vals.Len(); i++ { + outer := vals.Index(i) + if isOuterPtr { + if outer.IsNil() { + lb.AppendNull() + continue + } + outer = outer.Elem() + } + if outer.IsNil() { + lb.AppendNull() + continue + } + lb.Append(true) + for j := 0; j < outer.Len(); j++ { + if err := appendValue(vb, outer.Index(j), tagOpts{}); err != nil { + return nil, fmt.Errorf("buildListArray: element [%d][%d]: %w", i, j, err) + } + } + } + + return lb.NewArray(), nil +} + +func buildMapArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { + mapType := vals.Type().Elem() + isPtr := mapType.Kind() == reflect.Ptr + for mapType.Kind() == reflect.Ptr { + mapType = mapType.Elem() + } + + keyType := mapType.Key() + valType := mapType.Elem() + + for keyType.Kind() == reflect.Ptr { + keyType = keyType.Elem() + } + for valType.Kind() == reflect.Ptr { + valType = valType.Elem() + } + + keyDT, err := inferArrowType(keyType) + if err != nil { + return nil, fmt.Errorf("buildMapArray: key type: %w", err) + } + valDT, err := inferArrowType(valType) + if err != nil { + return nil, fmt.Errorf("buildMapArray: value type: %w", err) + } + + mb := array.NewMapBuilder(mem, keyDT, valDT, false) + defer mb.Release() + + kb := mb.KeyBuilder() + ib := mb.ItemBuilder() + + for i := 0; i < vals.Len(); i++ { + m := vals.Index(i) + if isPtr { + if m.IsNil() { + mb.AppendNull() + continue + } + m = m.Elem() + } + if m.IsNil() { + mb.AppendNull() + continue + } + mb.Append(true) + for _, key := range m.MapKeys() { + if err := appendValue(kb, key, tagOpts{}); err != nil { + return nil, fmt.Errorf("buildMapArray: key: %w", err) + } + if err := appendValue(ib, m.MapIndex(key), tagOpts{}); err != nil { + return nil, fmt.Errorf("buildMapArray: value: %w", err) + } + } + } + + return mb.NewArray(), nil +} + +func buildFixedSizeListArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { + elemType := vals.Type().Elem() + isPtr := elemType.Kind() == reflect.Ptr + for elemType.Kind() == reflect.Ptr { + elemType = elemType.Elem() + } + + if elemType.Kind() != reflect.Array { + return nil, fmt.Errorf("buildFixedSizeListArray: expected array element, got %v", elemType.Kind()) + } + + n := int32(elemType.Len()) + innerElemType := elemType.Elem() + for innerElemType.Kind() == reflect.Ptr { + innerElemType = innerElemType.Elem() + } + + innerDT, err := inferArrowType(innerElemType) + if err != nil { + return nil, fmt.Errorf("buildFixedSizeListArray: %w", err) + } + + fb := array.NewFixedSizeListBuilder(mem, n, innerDT) + defer fb.Release() + + vb := fb.ValueBuilder() + + for i := 0; i < vals.Len(); i++ { + elem := vals.Index(i) + if isPtr { + if elem.IsNil() { + fb.AppendNull() + continue + } + elem = elem.Elem() + } + fb.Append(true) + for j := 0; j < int(n); j++ { + if err := appendValue(vb, elem.Index(j), tagOpts{}); err != nil { + return nil, fmt.Errorf("buildFixedSizeListArray: element [%d][%d]: %w", i, j, err) + } + } + } + + return fb.NewArray(), nil +} + +func buildDictionaryArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { + elemType := vals.Type().Elem() + for elemType.Kind() == reflect.Ptr { + elemType = elemType.Elem() + } + + valDT, err := inferArrowType(elemType) + if err != nil { + return nil, fmt.Errorf("buildDictionaryArray: %w", err) + } + + dt := &arrow.DictionaryType{ + IndexType: arrow.PrimitiveTypes.Int32, + ValueType: valDT, + } + db := array.NewDictionaryBuilder(mem, dt) + defer db.Release() + + rawArr, err := buildPrimitiveArray(vals, mem) + if err != nil { + return nil, fmt.Errorf("buildDictionaryArray: building raw values: %w", err) + } + defer rawArr.Release() + + if err := db.AppendArray(rawArr); err != nil { + return nil, fmt.Errorf("buildDictionaryArray: AppendArray: %w", err) + } + + return db.NewArray(), nil +} + +func buildRunEndEncodedArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { + if vals.Len() == 0 { + runEndsArr, err := buildPrimitiveArray(reflect.MakeSlice(reflect.TypeOf([]int32{}), 0, 0), mem) + if err != nil { + return nil, err + } + defer runEndsArr.Release() + valuesArr, err := buildPrimitiveArray(reflect.MakeSlice(vals.Type(), 0, 0), mem) + if err != nil { + return nil, err + } + defer valuesArr.Release() + return array.NewRunEndEncodedArray(runEndsArr, valuesArr, 0, 0), nil + } + + type run struct { + end int32 + val reflect.Value + } + + var runs []run + current := vals.Index(0) + for i := 1; i < vals.Len(); i++ { + next := vals.Index(i) + if !reflect.DeepEqual(current.Interface(), next.Interface()) { + runs = append(runs, run{end: int32(i), val: current}) + current = next + } + } + runs = append(runs, run{end: int32(vals.Len()), val: current}) + + runEnds := make([]int32, len(runs)) + for i, r := range runs { + runEnds[i] = r.end + } + runEndsSlice := reflect.ValueOf(runEnds) + runEndsArr, err := buildPrimitiveArray(runEndsSlice, mem) + if err != nil { + return nil, fmt.Errorf("buildRunEndEncodedArray: run ends: %w", err) + } + defer runEndsArr.Release() + + runValues := reflect.MakeSlice(vals.Type(), len(runs), len(runs)) + for i, r := range runs { + runValues.Index(i).Set(r.val) + } + valuesArr, err := buildArray(runValues, tagOpts{}, mem) + if err != nil { + return nil, fmt.Errorf("buildRunEndEncodedArray: values: %w", err) + } + defer valuesArr.Release() + + return array.NewRunEndEncodedArray(runEndsArr, valuesArr, vals.Len(), 0), nil +} + +func buildListViewArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { + innerSliceType := vals.Type().Elem() + isOuterPtr := innerSliceType.Kind() == reflect.Ptr + for innerSliceType.Kind() == reflect.Ptr { + innerSliceType = innerSliceType.Elem() + } + + innerElemType := innerSliceType.Elem() + for innerElemType.Kind() == reflect.Ptr { + innerElemType = innerElemType.Elem() + } + + elemDT, err := inferArrowType(innerElemType) + if err != nil { + return nil, fmt.Errorf("buildListViewArray: %w", err) + } + + lvb := array.NewListViewBuilder(mem, elemDT) + defer lvb.Release() + + vb := lvb.ValueBuilder() + + for i := 0; i < vals.Len(); i++ { + outer := vals.Index(i) + if isOuterPtr { + if outer.IsNil() { + lvb.AppendNull() + continue + } + outer = outer.Elem() + } + if outer.IsNil() { + lvb.AppendNull() + continue + } + lvb.AppendWithSize(true, outer.Len()) + for j := 0; j < outer.Len(); j++ { + if err := appendValue(vb, outer.Index(j), tagOpts{}); err != nil { + return nil, fmt.Errorf("buildListViewArray: element [%d][%d]: %w", i, j, err) + } + } + } + + return lvb.NewArray(), nil +} diff --git a/arrow/arreflect/reflect_go_to_arrow_test.go b/arrow/arreflect/reflect_go_to_arrow_test.go new file mode 100644 index 00000000..6f184275 --- /dev/null +++ b/arrow/arreflect/reflect_go_to_arrow_test.go @@ -0,0 +1,780 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package arreflect + +import ( + "reflect" + "testing" + "time" + + "github.com/apache/arrow-go/v18/arrow" + "github.com/apache/arrow-go/v18/arrow/array" + "github.com/apache/arrow-go/v18/arrow/decimal" + "github.com/apache/arrow-go/v18/arrow/decimal128" + "github.com/apache/arrow-go/v18/arrow/decimal256" + "github.com/apache/arrow-go/v18/arrow/memory" +) + +func TestBuildPrimitiveArray(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + t.Run("int32", func(t *testing.T) { + vals := []int32{1, 2, 3, 4, 5} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if arr.Len() != 5 { + t.Errorf("expected 5, got %d", arr.Len()) + } + if arr.DataType().ID() != arrow.INT32 { + t.Errorf("expected INT32, got %v", arr.DataType()) + } + typed := arr.(*array.Int32) + for i, want := range vals { + if typed.Value(i) != want { + t.Errorf("[%d] want %d, got %d", i, want, typed.Value(i)) + } + } + }) + + t.Run("string", func(t *testing.T) { + vals := []string{"hello", "world", "foo"} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.STRING { + t.Errorf("expected STRING, got %v", arr.DataType()) + } + typed := arr.(*array.String) + for i, want := range vals { + if typed.Value(i) != want { + t.Errorf("[%d] want %q, got %q", i, want, typed.Value(i)) + } + } + }) + + t.Run("pointer_with_null", func(t *testing.T) { + v1, v3 := int32(10), int32(30) + vals := []*int32{&v1, nil, &v3} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if !arr.IsNull(1) { + t.Error("expected index 1 to be null") + } + typed := arr.(*array.Int32) + if typed.Value(0) != 10 || typed.Value(2) != 30 { + t.Error("unexpected values") + } + }) + + t.Run("bool", func(t *testing.T) { + vals := []bool{true, false, true} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.BOOL { + t.Errorf("expected BOOL, got %v", arr.DataType()) + } + typed := arr.(*array.Boolean) + if !typed.Value(0) || typed.Value(1) || !typed.Value(2) { + t.Error("unexpected bool values") + } + }) + + t.Run("binary", func(t *testing.T) { + vals := [][]byte{{1, 2, 3}, {4, 5}, {6}} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.BINARY { + t.Errorf("expected BINARY, got %v", arr.DataType()) + } + }) + + t.Run("numeric_types", func(t *testing.T) { + cases := []struct { + vals any + id arrow.Type + }{ + {[]int8{1, -2, 3}, arrow.INT8}, + {[]int16{100, -200}, arrow.INT16}, + {[]int64{1000, -2000}, arrow.INT64}, + {[]uint8{1, 2, 3}, arrow.UINT8}, + {[]uint16{1, 2}, arrow.UINT16}, + {[]uint32{1, 2}, arrow.UINT32}, + {[]uint64{1, 2}, arrow.UINT64}, + {[]float32{1.0, 2.0}, arrow.FLOAT32}, + {[]float64{1.1, 2.2}, arrow.FLOAT64}, + } + for _, tc := range cases { + arr, err := buildArray(reflect.ValueOf(tc.vals), tagOpts{}, mem) + if err != nil { + t.Fatalf("type %v: %v", tc.id, err) + } + if arr.DataType().ID() != tc.id { + t.Errorf("expected %v, got %v", tc.id, arr.DataType()) + } + arr.Release() + } + }) +} + +func TestBuildTemporalArray(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + t.Run("time_time", func(t *testing.T) { + now := time.Now().UTC() + vals := []time.Time{now, now.Add(time.Hour)} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.TIMESTAMP { + t.Errorf("expected TIMESTAMP, got %v", arr.DataType()) + } + typed := arr.(*array.Timestamp) + for i, want := range vals { + if typed.Value(i) != arrow.Timestamp(want.UnixNano()) { + t.Errorf("[%d] timestamp mismatch", i) + } + } + }) + + t.Run("time_duration", func(t *testing.T) { + vals := []time.Duration{time.Second, time.Minute, time.Hour} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.DURATION { + t.Errorf("expected DURATION, got %v", arr.DataType()) + } + typed := arr.(*array.Duration) + for i, want := range vals { + if typed.Value(i) != arrow.Duration(want.Nanoseconds()) { + t.Errorf("[%d] duration mismatch", i) + } + } + }) +} + +func TestBuildDecimalArray(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + t.Run("decimal128", func(t *testing.T) { + vals := []decimal128.Num{ + decimal128.New(0, 100), + decimal128.New(0, 200), + decimal128.New(0, 300), + } + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.DECIMAL128 { + t.Errorf("expected DECIMAL128, got %v", arr.DataType()) + } + typed := arr.(*array.Decimal128) + for i, want := range vals { + if typed.Value(i) != want { + t.Errorf("[%d] decimal128 mismatch", i) + } + } + }) + + t.Run("decimal256", func(t *testing.T) { + vals := []decimal256.Num{ + decimal256.New(0, 0, 0, 100), + decimal256.New(0, 0, 0, 200), + } + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.DECIMAL256 { + t.Errorf("expected DECIMAL256, got %v", arr.DataType()) + } + typed := arr.(*array.Decimal256) + for i, want := range vals { + if typed.Value(i) != want { + t.Errorf("[%d] decimal256 mismatch", i) + } + } + }) + + t.Run("decimal128_custom_opts", func(t *testing.T) { + vals := []decimal128.Num{decimal128.New(0, 12345)} + opts := tagOpts{HasDecimalOpts: true, DecimalPrecision: 10, DecimalScale: 3} + arr, err := buildArray(reflect.ValueOf(vals), opts, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + dt := arr.DataType().(*arrow.Decimal128Type) + if dt.Precision != 10 || dt.Scale != 3 { + t.Errorf("expected p=10 s=3, got p=%d s=%d", dt.Precision, dt.Scale) + } + }) + + t.Run("decimal32", func(t *testing.T) { + vals := []decimal.Decimal32{100, 200, 300} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.DECIMAL32 { + t.Errorf("expected DECIMAL32, got %v", arr.DataType()) + } + typed := arr.(*array.Decimal32) + for i, want := range vals { + if typed.Value(i) != want { + t.Errorf("[%d] decimal32 mismatch: got %v, want %v", i, typed.Value(i), want) + } + } + }) + + t.Run("decimal64", func(t *testing.T) { + vals := []decimal.Decimal64{1000, 2000} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.DECIMAL64 { + t.Errorf("expected DECIMAL64, got %v", arr.DataType()) + } + typed := arr.(*array.Decimal64) + for i, want := range vals { + if typed.Value(i) != want { + t.Errorf("[%d] decimal64 mismatch: got %v, want %v", i, typed.Value(i), want) + } + } + }) + + t.Run("decimal32_custom_opts", func(t *testing.T) { + vals := []decimal.Decimal32{12345} + opts := tagOpts{HasDecimalOpts: true, DecimalPrecision: 9, DecimalScale: 2} + arr, err := buildArray(reflect.ValueOf(vals), opts, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + dt := arr.DataType().(*arrow.Decimal32Type) + if dt.Precision != 9 || dt.Scale != 2 { + t.Errorf("expected p=9 s=2, got p=%d s=%d", dt.Precision, dt.Scale) + } + }) +} + +type buildSimpleStruct struct { + X int32 + Y string +} + +type buildNestedStruct struct { + A int32 + B buildSimpleStruct +} + +type buildNullableStruct struct { + X *int32 + Y *string +} + +func TestBuildStructArray(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + t.Run("simple", func(t *testing.T) { + vals := []buildSimpleStruct{ + {X: 1, Y: "one"}, + {X: 2, Y: "two"}, + {X: 3, Y: "three"}, + } + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.STRUCT { + t.Fatalf("expected STRUCT, got %v", arr.DataType()) + } + typed := arr.(*array.Struct) + if typed.Len() != 3 { + t.Errorf("expected 3, got %d", typed.Len()) + } + xArr := typed.Field(0).(*array.Int32) + yArr := typed.Field(1).(*array.String) + for i, want := range vals { + if xArr.Value(i) != want.X { + t.Errorf("[%d] X: want %d, got %d", i, want.X, xArr.Value(i)) + } + if yArr.Value(i) != want.Y { + t.Errorf("[%d] Y: want %q, got %q", i, want.Y, yArr.Value(i)) + } + } + }) + + t.Run("pointer_null_row", func(t *testing.T) { + v1 := buildSimpleStruct{X: 42, Y: "answer"} + vals := []*buildSimpleStruct{&v1, nil} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if arr.Len() != 2 { + t.Errorf("expected 2, got %d", arr.Len()) + } + if !arr.IsNull(1) { + t.Error("expected index 1 to be null") + } + }) + + t.Run("nullable_fields", func(t *testing.T) { + x1 := int32(10) + y1 := "hello" + vals := []buildNullableStruct{ + {X: &x1, Y: &y1}, + {X: nil, Y: nil}, + } + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + typed := arr.(*array.Struct) + if !typed.Field(0).IsNull(1) { + t.Error("expected X[1] to be null") + } + if !typed.Field(1).IsNull(1) { + t.Error("expected Y[1] to be null") + } + }) + + t.Run("nested_struct", func(t *testing.T) { + vals := []buildNestedStruct{ + {A: 1, B: buildSimpleStruct{X: 10, Y: "inner1"}}, + {A: 2, B: buildSimpleStruct{X: 20, Y: "inner2"}}, + } + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.STRUCT { + t.Fatalf("expected STRUCT, got %v", arr.DataType()) + } + typed := arr.(*array.Struct) + aArr := typed.Field(0).(*array.Int32) + if aArr.Value(0) != 1 || aArr.Value(1) != 2 { + t.Error("unexpected A values") + } + bArr := typed.Field(1).(*array.Struct) + bxArr := bArr.Field(0).(*array.Int32) + if bxArr.Value(0) != 10 || bxArr.Value(1) != 20 { + t.Error("unexpected B.X values") + } + }) +} + +func TestBuildListArray(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + t.Run("int32_lists", func(t *testing.T) { + vals := [][]int32{{1, 2, 3}, {4, 5}, {6}} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.LIST { + t.Fatalf("expected LIST, got %v", arr.DataType()) + } + typed := arr.(*array.List) + if typed.Len() != 3 { + t.Errorf("expected 3, got %d", typed.Len()) + } + if typed.ListValues().(*array.Int32).Len() != 6 { + t.Errorf("expected 6 total values") + } + }) + + t.Run("null_inner", func(t *testing.T) { + vals := [][]int32{{1, 2}, nil, {3}} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if !arr.IsNull(1) { + t.Error("expected index 1 to be null") + } + }) + + t.Run("string_lists", func(t *testing.T) { + vals := [][]string{{"a", "b"}, {"c"}} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.LIST { + t.Fatalf("expected LIST, got %v", arr.DataType()) + } + }) + + t.Run("nested", func(t *testing.T) { + vals := [][][]int32{{{1, 2}, {3}}, {{4, 5, 6}}} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.LIST { + t.Fatalf("expected outer LIST, got %v", arr.DataType()) + } + outer := arr.(*array.List) + if outer.Len() != 2 { + t.Errorf("expected 2 outer rows, got %d", outer.Len()) + } + if outer.ListValues().DataType().ID() != arrow.LIST { + t.Fatalf("expected inner LIST, got %v", outer.ListValues().DataType()) + } + }) +} + +func TestBuildMapArray(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + t.Run("string_int32", func(t *testing.T) { + vals := []map[string]int32{ + {"a": 1, "b": 2}, + {"c": 3}, + } + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.MAP { + t.Fatalf("expected MAP, got %v", arr.DataType()) + } + if arr.(*array.Map).Len() != 2 { + t.Errorf("expected 2, got %d", arr.Len()) + } + }) + + t.Run("null_map", func(t *testing.T) { + vals := []map[string]int32{{"a": 1}, nil} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if !arr.IsNull(1) { + t.Error("expected index 1 to be null") + } + }) + + t.Run("entry_count", func(t *testing.T) { + vals := []map[string]int32{{"x": 10, "y": 20, "z": 30}} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + kvArr := arr.(*array.Map).ListValues().(*array.Struct) + if kvArr.Len() != 3 { + t.Errorf("expected 3 key-value pairs, got %d", kvArr.Len()) + } + }) +} + +func TestBuildFixedSizeListArray(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + t.Run("int32_n3", func(t *testing.T) { + vals := [][3]int32{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.FIXED_SIZE_LIST { + t.Fatalf("expected FIXED_SIZE_LIST, got %v", arr.DataType()) + } + typed := arr.(*array.FixedSizeList) + if typed.Len() != 3 { + t.Errorf("expected 3, got %d", typed.Len()) + } + if typed.DataType().(*arrow.FixedSizeListType).Len() != 3 { + t.Error("expected fixed size 3") + } + values := typed.ListValues().(*array.Int32) + if values.Len() != 9 { + t.Errorf("expected 9 values, got %d", values.Len()) + } + if values.Value(0) != 1 || values.Value(3) != 4 || values.Value(6) != 7 { + t.Error("unexpected values") + } + }) + + t.Run("float64_n2", func(t *testing.T) { + vals := [][2]float64{{1.0, 2.0}, {3.0, 4.0}} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.FIXED_SIZE_LIST { + t.Fatalf("expected FIXED_SIZE_LIST, got %v", arr.DataType()) + } + if arr.DataType().(*arrow.FixedSizeListType).Len() != 2 { + t.Error("expected fixed size 2") + } + }) +} + +func TestBuildDictionaryArray(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + t.Run("string_dict", func(t *testing.T) { + vals := []string{"apple", "banana", "apple", "cherry", "banana", "apple"} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{Dict: true}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.DICTIONARY { + t.Fatalf("expected DICTIONARY, got %v", arr.DataType()) + } + typed := arr.(*array.Dictionary) + if typed.Len() != 6 { + t.Errorf("expected 6, got %d", typed.Len()) + } + if typed.Dictionary().Len() != 3 { + t.Errorf("expected 3 unique, got %d", typed.Dictionary().Len()) + } + }) + + t.Run("int32_dict", func(t *testing.T) { + vals := []int32{1, 2, 1, 3, 2, 1} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{Dict: true}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.DICTIONARY { + t.Fatalf("expected DICTIONARY, got %v", arr.DataType()) + } + typed := arr.(*array.Dictionary) + if typed.Len() != 6 { + t.Errorf("expected 6, got %d", typed.Len()) + } + if typed.Dictionary().Len() != 3 { + t.Errorf("expected 3 unique, got %d", typed.Dictionary().Len()) + } + }) + + t.Run("index_type_is_int32", func(t *testing.T) { + vals := []string{"x", "y", "z"} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{Dict: true}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + dt := arr.DataType().(*arrow.DictionaryType) + if dt.IndexType.ID() != arrow.INT32 { + t.Errorf("expected INT32 index, got %v", dt.IndexType) + } + }) +} + +func TestBuildRunEndEncodedArray(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + t.Run("int32_runs", func(t *testing.T) { + vals := []int32{1, 1, 1, 2, 2, 3} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{REE: true}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.RUN_END_ENCODED { + t.Fatalf("expected RUN_END_ENCODED, got %v", arr.DataType()) + } + ree := arr.(*array.RunEndEncoded) + if ree.Len() != 6 { + t.Errorf("expected 6, got %d", ree.Len()) + } + runEnds := ree.RunEndsArr().(*array.Int32) + if runEnds.Len() != 3 { + t.Errorf("expected 3 runs, got %d", runEnds.Len()) + } + if runEnds.Value(0) != 3 || runEnds.Value(1) != 5 || runEnds.Value(2) != 6 { + t.Errorf("unexpected run ends: %d %d %d", + runEnds.Value(0), runEnds.Value(1), runEnds.Value(2)) + } + values := ree.Values().(*array.Int32) + if values.Len() != 3 { + t.Errorf("expected 3 values, got %d", values.Len()) + } + if values.Value(0) != 1 || values.Value(1) != 2 || values.Value(2) != 3 { + t.Errorf("unexpected values: %d %d %d", + values.Value(0), values.Value(1), values.Value(2)) + } + }) + + t.Run("string_runs", func(t *testing.T) { + vals := []string{"a", "a", "b", "b", "b", "c"} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{REE: true}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.RUN_END_ENCODED { + t.Fatalf("expected RUN_END_ENCODED, got %v", arr.DataType()) + } + ree := arr.(*array.RunEndEncoded) + if ree.Len() != 6 { + t.Errorf("expected 6, got %d", ree.Len()) + } + if ree.RunEndsArr().Len() != 3 { + t.Errorf("expected 3 runs, got %d", ree.RunEndsArr().Len()) + } + }) + + t.Run("single_run", func(t *testing.T) { + vals := []int32{42, 42, 42} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{REE: true}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + ree := arr.(*array.RunEndEncoded) + if ree.Len() != 3 { + t.Errorf("expected 3, got %d", ree.Len()) + } + runEnds := ree.RunEndsArr().(*array.Int32) + if runEnds.Len() != 1 || runEnds.Value(0) != 3 { + t.Errorf("expected 1 run ending at 3, got %d runs, end=%d", + runEnds.Len(), runEnds.Value(0)) + } + }) + + t.Run("all_distinct", func(t *testing.T) { + vals := []int32{1, 2, 3, 4, 5} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{REE: true}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + ree := arr.(*array.RunEndEncoded) + if ree.Len() != 5 { + t.Errorf("expected 5, got %d", ree.Len()) + } + if ree.RunEndsArr().Len() != 5 { + t.Errorf("expected 5 runs for all-distinct, got %d", ree.RunEndsArr().Len()) + } + }) +} + +func TestBuildListViewArray(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + t.Run("int32_listview", func(t *testing.T) { + vals := [][]int32{{1, 2, 3}, {4, 5}, {6}} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{ListView: true}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.LIST_VIEW { + t.Fatalf("expected LIST_VIEW, got %v", arr.DataType()) + } + typed := arr.(*array.ListView) + if typed.Len() != 3 { + t.Errorf("expected 3, got %d", typed.Len()) + } + }) + + t.Run("null_entry", func(t *testing.T) { + vals := [][]int32{{1, 2}, nil, {3}} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{ListView: true}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if !arr.IsNull(1) { + t.Error("expected index 1 to be null") + } + }) + + t.Run("string_listview", func(t *testing.T) { + vals := [][]string{{"hello", "world"}, {"foo"}, {"a", "b", "c"}} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{ListView: true}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.LIST_VIEW { + t.Fatalf("expected LIST_VIEW, got %v", arr.DataType()) + } + if arr.Len() != 3 { + t.Errorf("expected 3, got %d", arr.Len()) + } + }) + + t.Run("total_values", func(t *testing.T) { + vals := [][]int32{{10, 20}, {30}} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{ListView: true}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + allVals := arr.(*array.ListView).ListValues().(*array.Int32) + if allVals.Len() != 3 { + t.Errorf("expected 3 total values, got %d", allVals.Len()) + } + }) +} diff --git a/arrow/arreflect/reflect_infer.go b/arrow/arreflect/reflect_infer.go new file mode 100644 index 00000000..2df6196a --- /dev/null +++ b/arrow/arreflect/reflect_infer.go @@ -0,0 +1,205 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package arreflect + +import ( + "fmt" + "reflect" + "time" + + "github.com/apache/arrow-go/v18/arrow" + "github.com/apache/arrow-go/v18/arrow/decimal" + "github.com/apache/arrow-go/v18/arrow/decimal128" + "github.com/apache/arrow-go/v18/arrow/decimal256" +) + +var ( + typeOfTime = reflect.TypeOf(time.Time{}) + typeOfDuration = reflect.TypeOf(time.Duration(0)) + typeOfDec32 = reflect.TypeOf(decimal.Decimal32(0)) + typeOfDec64 = reflect.TypeOf(decimal.Decimal64(0)) + typeOfDec128 = reflect.TypeOf(decimal128.Num{}) + typeOfDec256 = reflect.TypeOf(decimal256.Num{}) + typeOfByteSlice = reflect.TypeOf([]byte{}) +) + +func inferPrimitiveArrowType(t reflect.Type) (arrow.DataType, error) { + for t.Kind() == reflect.Ptr { + t = t.Elem() + } + + switch t { + case reflect.TypeOf(int8(0)): + return arrow.PrimitiveTypes.Int8, nil + case reflect.TypeOf(int16(0)): + return arrow.PrimitiveTypes.Int16, nil + case reflect.TypeOf(int32(0)): + return arrow.PrimitiveTypes.Int32, nil + case reflect.TypeOf(int64(0)): + return arrow.PrimitiveTypes.Int64, nil + case reflect.TypeOf(uint8(0)): + return arrow.PrimitiveTypes.Uint8, nil + case reflect.TypeOf(uint16(0)): + return arrow.PrimitiveTypes.Uint16, nil + case reflect.TypeOf(uint32(0)): + return arrow.PrimitiveTypes.Uint32, nil + case reflect.TypeOf(uint64(0)): + return arrow.PrimitiveTypes.Uint64, nil + case reflect.TypeOf(float32(0)): + return arrow.PrimitiveTypes.Float32, nil + case reflect.TypeOf(float64(0)): + return arrow.PrimitiveTypes.Float64, nil + case reflect.TypeOf(false): + return arrow.FixedWidthTypes.Boolean, nil + case reflect.TypeOf(""): + return arrow.BinaryTypes.String, nil + case typeOfByteSlice: + return arrow.BinaryTypes.Binary, nil + case typeOfTime: + return &arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "UTC"}, nil + case typeOfDuration: + return &arrow.DurationType{Unit: arrow.Nanosecond}, nil + case typeOfDec128: + return &arrow.Decimal128Type{Precision: 38, Scale: 0}, nil + case typeOfDec32: + return &arrow.Decimal32Type{Precision: 9, Scale: 0}, nil + case typeOfDec64: + return &arrow.Decimal64Type{Precision: 18, Scale: 0}, nil + case typeOfDec256: + return &arrow.Decimal256Type{Precision: 76, Scale: 0}, nil + default: + return nil, fmt.Errorf("unsupported Go type for Arrow inference: %v", t) + } +} + +func inferArrowType(t reflect.Type) (arrow.DataType, error) { + for t.Kind() == reflect.Ptr { + t = t.Elem() + } + + if t == typeOfByteSlice { + return arrow.BinaryTypes.Binary, nil + } + + switch t.Kind() { + case reflect.Slice: + elemDT, err := inferArrowType(t.Elem()) + if err != nil { + return nil, err + } + return arrow.ListOf(elemDT), nil + + case reflect.Array: + elemDT, err := inferArrowType(t.Elem()) + if err != nil { + return nil, err + } + return arrow.FixedSizeListOf(int32(t.Len()), elemDT), nil + + case reflect.Map: + keyDT, err := inferArrowType(t.Key()) + if err != nil { + return nil, err + } + valDT, err := inferArrowType(t.Elem()) + if err != nil { + return nil, err + } + return arrow.MapOf(keyDT, valDT), nil + + case reflect.Struct: + return inferStructType(t) + + default: + return inferPrimitiveArrowType(t) + } +} + +func inferStructType(t reflect.Type) (*arrow.StructType, error) { + for t.Kind() == reflect.Ptr { + t = t.Elem() + } + if t.Kind() != reflect.Struct { + return nil, fmt.Errorf("inferStructType: expected struct, got %v", t) + } + + fields := cachedStructFields(t) + arrowFields := make([]arrow.Field, 0, len(fields)) + + for _, fm := range fields { + origType := fm.Type + for origType.Kind() == reflect.Ptr { + origType = origType.Elem() + } + + dt, err := inferArrowType(fm.Type) + if err != nil { + return nil, fmt.Errorf("inferStructType: field %q: %w", fm.Name, err) + } + + if fm.Opts.HasDecimalOpts { + switch origType { + case typeOfDec32: + dt = &arrow.Decimal32Type{Precision: fm.Opts.DecimalPrecision, Scale: fm.Opts.DecimalScale} + case typeOfDec64: + dt = &arrow.Decimal64Type{Precision: fm.Opts.DecimalPrecision, Scale: fm.Opts.DecimalScale} + case typeOfDec128: + dt = &arrow.Decimal128Type{ + Precision: fm.Opts.DecimalPrecision, + Scale: fm.Opts.DecimalScale, + } + case typeOfDec256: + dt = &arrow.Decimal256Type{ + Precision: fm.Opts.DecimalPrecision, + Scale: fm.Opts.DecimalScale, + } + } + } + + arrowFields = append(arrowFields, arrow.Field{ + Name: fm.Name, + Type: dt, + Nullable: fm.Nullable, + }) + } + + return arrow.StructOf(arrowFields...), nil +} + +func InferArrowSchema[T any]() (*arrow.Schema, error) { + t := reflect.TypeFor[T]() + for t.Kind() == reflect.Ptr { + t = t.Elem() + } + if t.Kind() != reflect.Struct { + return nil, fmt.Errorf("InferArrowSchema: T must be a struct type, got %v", t) + } + st, err := inferStructType(t) + if err != nil { + return nil, err + } + fields := make([]arrow.Field, st.NumFields()) + for i := 0; i < st.NumFields(); i++ { + fields[i] = st.Field(i) + } + return arrow.NewSchema(fields, nil), nil +} + +func InferArrowType[T any]() (arrow.DataType, error) { + t := reflect.TypeFor[T]() + return inferArrowType(t) +} diff --git a/arrow/arreflect/reflect_infer_test.go b/arrow/arreflect/reflect_infer_test.go new file mode 100644 index 00000000..17262eab --- /dev/null +++ b/arrow/arreflect/reflect_infer_test.go @@ -0,0 +1,415 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package arreflect + +import ( + "reflect" + "testing" + "time" + + "github.com/apache/arrow-go/v18/arrow" + "github.com/apache/arrow-go/v18/arrow/decimal" + "github.com/apache/arrow-go/v18/arrow/decimal128" + "github.com/apache/arrow-go/v18/arrow/decimal256" +) + +func TestInferPrimitiveArrowType(t *testing.T) { + cases := []struct { + name string + goType reflect.Type + wantID arrow.Type + wantErr bool + }{ + {"int8", reflect.TypeOf(int8(0)), arrow.INT8, false}, + {"int16", reflect.TypeOf(int16(0)), arrow.INT16, false}, + {"int32", reflect.TypeOf(int32(0)), arrow.INT32, false}, + {"int64", reflect.TypeOf(int64(0)), arrow.INT64, false}, + {"uint8", reflect.TypeOf(uint8(0)), arrow.UINT8, false}, + {"uint16", reflect.TypeOf(uint16(0)), arrow.UINT16, false}, + {"uint32", reflect.TypeOf(uint32(0)), arrow.UINT32, false}, + {"uint64", reflect.TypeOf(uint64(0)), arrow.UINT64, false}, + {"float32", reflect.TypeOf(float32(0)), arrow.FLOAT32, false}, + {"float64", reflect.TypeOf(float64(0)), arrow.FLOAT64, false}, + {"bool", reflect.TypeOf(false), arrow.BOOL, false}, + {"string", reflect.TypeOf(""), arrow.STRING, false}, + {"[]byte", reflect.TypeOf([]byte{}), arrow.BINARY, false}, + {"time.Time", reflect.TypeOf(time.Time{}), arrow.TIMESTAMP, false}, + {"time.Duration", reflect.TypeOf(time.Duration(0)), arrow.DURATION, false}, + {"decimal128.Num", reflect.TypeOf(decimal128.Num{}), arrow.DECIMAL128, false}, + {"decimal256.Num", reflect.TypeOf(decimal256.Num{}), arrow.DECIMAL256, false}, + {"decimal.Decimal32", reflect.TypeOf(decimal.Decimal32(0)), arrow.DECIMAL32, false}, + {"decimal.Decimal64", reflect.TypeOf(decimal.Decimal64(0)), arrow.DECIMAL64, false}, + {"*int32 pointer transparent", reflect.TypeOf((*int32)(nil)), arrow.INT32, false}, + {"chan int unsupported", reflect.TypeOf(make(chan int)), 0, true}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got, err := inferPrimitiveArrowType(tc.goType) + if tc.wantErr { + if err == nil { + t.Fatalf("expected error, got nil (type: %v)", got) + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got.ID() != tc.wantID { + t.Errorf("got ID %v, want %v", got.ID(), tc.wantID) + } + }) + } +} + +func TestInferArrowType(t *testing.T) { + t.Run("[]int32 is LIST", func(t *testing.T) { + dt, err := inferArrowType(reflect.TypeOf([]int32{})) + if err != nil { + t.Fatal(err) + } + if dt.ID() != arrow.LIST { + t.Errorf("got %v, want LIST", dt.ID()) + } + }) + + t.Run("[3]float64 is FIXED_SIZE_LIST size 3", func(t *testing.T) { + dt, err := inferArrowType(reflect.TypeOf([3]float64{})) + if err != nil { + t.Fatal(err) + } + if dt.ID() != arrow.FIXED_SIZE_LIST { + t.Errorf("got %v, want FIXED_SIZE_LIST", dt.ID()) + } + fsl := dt.(*arrow.FixedSizeListType) + if fsl.Len() != 3 { + t.Errorf("got size %d, want 3", fsl.Len()) + } + }) + + t.Run("map[string]int64 is MAP", func(t *testing.T) { + dt, err := inferArrowType(reflect.TypeOf(map[string]int64{})) + if err != nil { + t.Fatal(err) + } + if dt.ID() != arrow.MAP { + t.Errorf("got %v, want MAP", dt.ID()) + } + }) + + t.Run("struct with 2 fields is STRUCT", func(t *testing.T) { + type S struct { + Name string + Age int32 + } + dt, err := inferArrowType(reflect.TypeOf(S{})) + if err != nil { + t.Fatal(err) + } + if dt.ID() != arrow.STRUCT { + t.Errorf("got %v, want STRUCT", dt.ID()) + } + st := dt.(*arrow.StructType) + if st.NumFields() != 2 { + t.Errorf("got %d fields, want 2", st.NumFields()) + } + }) + + t.Run("[]map[string]struct{Score float64} nested", func(t *testing.T) { + type Inner struct { + Score float64 + } + dt, err := inferArrowType(reflect.TypeOf([]map[string]Inner{})) + if err != nil { + t.Fatal(err) + } + if dt.ID() != arrow.LIST { + t.Errorf("got %v, want LIST", dt.ID()) + } + lt := dt.(*arrow.ListType) + if lt.Elem().ID() != arrow.MAP { + t.Errorf("list elem got %v, want MAP", lt.Elem().ID()) + } + mt := lt.Elem().(*arrow.MapType) + if mt.ValueType().ID() != arrow.STRUCT { + t.Errorf("map value got %v, want STRUCT", mt.ValueType().ID()) + } + }) + + t.Run("*[]string pointer to slice is LIST", func(t *testing.T) { + dt, err := inferArrowType(reflect.TypeOf((*[]string)(nil))) + if err != nil { + t.Fatal(err) + } + if dt.ID() != arrow.LIST { + t.Errorf("got %v, want LIST", dt.ID()) + } + }) +} + +func TestInferStructType(t *testing.T) { + t.Run("simple struct field names and types", func(t *testing.T) { + type S struct { + Name string + Score float32 + } + st, err := inferStructType(reflect.TypeOf(S{})) + if err != nil { + t.Fatal(err) + } + if st.NumFields() != 2 { + t.Fatalf("got %d fields, want 2", st.NumFields()) + } + if st.Field(0).Name != "Name" || st.Field(0).Type.ID() != arrow.STRING { + t.Errorf("field 0: got %v/%v, want Name/STRING", st.Field(0).Name, st.Field(0).Type.ID()) + } + if st.Field(1).Name != "Score" || st.Field(1).Type.ID() != arrow.FLOAT32 { + t.Errorf("field 1: got %v/%v, want Score/FLOAT32", st.Field(1).Name, st.Field(1).Type.ID()) + } + }) + + t.Run("pointer fields are nullable", func(t *testing.T) { + type S struct { + ID int32 + Label *string + } + st, err := inferStructType(reflect.TypeOf(S{})) + if err != nil { + t.Fatal(err) + } + if st.Field(0).Nullable { + t.Errorf("ID should not be nullable") + } + if !st.Field(1).Nullable { + t.Errorf("Label should be nullable") + } + }) + + t.Run("arrow:\"-\" tagged field is excluded", func(t *testing.T) { + type S struct { + Keep string + Hidden int32 `arrow:"-"` + } + st, err := inferStructType(reflect.TypeOf(S{})) + if err != nil { + t.Fatal(err) + } + if st.NumFields() != 1 { + t.Errorf("got %d fields, want 1", st.NumFields()) + } + if st.Field(0).Name != "Keep" { + t.Errorf("got field name %q, want Keep", st.Field(0).Name) + } + }) + + t.Run("arrow custom name tag", func(t *testing.T) { + type S struct { + GoName int64 `arrow:"custom_name"` + } + st, err := inferStructType(reflect.TypeOf(S{})) + if err != nil { + t.Fatal(err) + } + if st.Field(0).Name != "custom_name" { + t.Errorf("got %q, want custom_name", st.Field(0).Name) + } + }) + + t.Run("decimal128 with precision/scale tag", func(t *testing.T) { + type S struct { + Amount decimal128.Num `arrow:",decimal(18,2)"` + } + st, err := inferStructType(reflect.TypeOf(S{})) + if err != nil { + t.Fatal(err) + } + dt := st.Field(0).Type + if dt.ID() != arrow.DECIMAL128 { + t.Fatalf("got %v, want DECIMAL128", dt.ID()) + } + d128 := dt.(*arrow.Decimal128Type) + if d128.Precision != 18 || d128.Scale != 2 { + t.Errorf("got precision=%d scale=%d, want 18,2", d128.Precision, d128.Scale) + } + }) + + t.Run("decimal256 with precision/scale tag", func(t *testing.T) { + type S struct { + Amount decimal256.Num `arrow:",decimal(40,5)"` + } + st, err := inferStructType(reflect.TypeOf(S{})) + if err != nil { + t.Fatal(err) + } + dt := st.Field(0).Type + if dt.ID() != arrow.DECIMAL256 { + t.Fatalf("got %v, want DECIMAL256", dt.ID()) + } + d256 := dt.(*arrow.Decimal256Type) + if d256.Precision != 40 || d256.Scale != 5 { + t.Errorf("got precision=%d scale=%d, want 40,5", d256.Precision, d256.Scale) + } + }) + + t.Run("decimal32 with precision/scale tag", func(t *testing.T) { + type S struct { + Amount decimal.Decimal32 `arrow:",decimal(9,2)"` + } + st, err := inferStructType(reflect.TypeOf(S{})) + if err != nil { + t.Fatal(err) + } + dt := st.Field(0).Type + if dt.ID() != arrow.DECIMAL32 { + t.Fatalf("got %v, want DECIMAL32", dt.ID()) + } + d32 := dt.(*arrow.Decimal32Type) + if d32.Precision != 9 || d32.Scale != 2 { + t.Errorf("got precision=%d scale=%d, want 9,2", d32.Precision, d32.Scale) + } + }) + + t.Run("non-struct returns error", func(t *testing.T) { + _, err := inferStructType(reflect.TypeOf(42)) + if err == nil { + t.Error("expected error for non-struct, got nil") + } + }) +} + +func TestInferArrowSchema(t *testing.T) { + t.Run("simple struct mixed fields", func(t *testing.T) { + type S struct { + Name string + Age int32 + Score float64 + } + schema, err := InferArrowSchema[S]() + if err != nil { + t.Fatal(err) + } + if schema.NumFields() != 3 { + t.Fatalf("got %d fields, want 3", schema.NumFields()) + } + if schema.Field(0).Name != "Name" || schema.Field(0).Type.ID() != arrow.STRING { + t.Errorf("field 0: got %v/%v, want Name/STRING", schema.Field(0).Name, schema.Field(0).Type.ID()) + } + if schema.Field(1).Name != "Age" || schema.Field(1).Type.ID() != arrow.INT32 { + t.Errorf("field 1: got %v/%v, want Age/INT32", schema.Field(1).Name, schema.Field(1).Type.ID()) + } + if schema.Field(2).Name != "Score" || schema.Field(2).Type.ID() != arrow.FLOAT64 { + t.Errorf("field 2: got %v/%v, want Score/FLOAT64", schema.Field(2).Name, schema.Field(2).Type.ID()) + } + }) + + t.Run("pointer fields are nullable", func(t *testing.T) { + type S struct { + ID int32 + Label *string + } + schema, err := InferArrowSchema[S]() + if err != nil { + t.Fatal(err) + } + if schema.Field(0).Nullable { + t.Errorf("ID should not be nullable") + } + if !schema.Field(1).Nullable { + t.Errorf("Label should be nullable") + } + }) + + t.Run("arrow:\"-\" tag excludes field", func(t *testing.T) { + type S struct { + Keep string + Hidden int32 `arrow:"-"` + } + schema, err := InferArrowSchema[S]() + if err != nil { + t.Fatal(err) + } + if schema.NumFields() != 1 { + t.Errorf("got %d fields, want 1", schema.NumFields()) + } + if schema.Field(0).Name != "Keep" { + t.Errorf("got field name %q, want Keep", schema.Field(0).Name) + } + }) + + t.Run("arrow custom name tag", func(t *testing.T) { + type S struct { + GoName int64 `arrow:"custom_name"` + } + schema, err := InferArrowSchema[S]() + if err != nil { + t.Fatal(err) + } + if schema.Field(0).Name != "custom_name" { + t.Errorf("got %q, want custom_name", schema.Field(0).Name) + } + }) + + t.Run("non-struct type returns error", func(t *testing.T) { + _, err := InferArrowSchema[int]() + if err == nil { + t.Error("expected error for non-struct, got nil") + } + }) +} + +func TestInferArrowTypePublic(t *testing.T) { + t.Run("int32 is INT32", func(t *testing.T) { + dt, err := InferArrowType[int32]() + if err != nil { + t.Fatal(err) + } + if dt.ID() != arrow.INT32 { + t.Errorf("got %v, want INT32", dt.ID()) + } + }) + + t.Run("[]string is LIST", func(t *testing.T) { + dt, err := InferArrowType[[]string]() + if err != nil { + t.Fatal(err) + } + if dt.ID() != arrow.LIST { + t.Errorf("got %v, want LIST", dt.ID()) + } + }) + + t.Run("map[string]float64 is MAP", func(t *testing.T) { + dt, err := InferArrowType[map[string]float64]() + if err != nil { + t.Fatal(err) + } + if dt.ID() != arrow.MAP { + t.Errorf("got %v, want MAP", dt.ID()) + } + }) + + t.Run("struct{X int32} is STRUCT", func(t *testing.T) { + type S struct{ X int32 } + dt, err := InferArrowType[S]() + if err != nil { + t.Fatal(err) + } + if dt.ID() != arrow.STRUCT { + t.Errorf("got %v, want STRUCT", dt.ID()) + } + }) +} diff --git a/arrow/arreflect/reflect_integration_test.go b/arrow/arreflect/reflect_integration_test.go new file mode 100644 index 00000000..6d206fe8 --- /dev/null +++ b/arrow/arreflect/reflect_integration_test.go @@ -0,0 +1,514 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package arreflect + +import ( + "reflect" + "testing" + + "github.com/apache/arrow-go/v18/arrow" + "github.com/apache/arrow-go/v18/arrow/memory" +) + +type integOrderItem struct { + Product string + Tags map[string]string + Ratings [5]float32 +} + +type integOrder struct { + ID int64 + Items []integOrderItem +} + +type integLargeRow struct { + X int32 + Y float64 +} + +type integNullable struct { + A *string + B *int32 + C *float64 +} + +type integMixed struct { + Required string + Optional *string + Count int32 + MaybeCount *int32 +} + +type integBase struct { + ID int64 +} + +type integExtended struct { + integBase + Name string `arrow:"name"` + Skip string `arrow:"-"` +} + +func TestReflectIntegration(t *testing.T) { + mem := memory.NewGoAllocator() + + t.Run("complex nested round-trip", func(t *testing.T) { + orders := []integOrder{ + { + ID: 1001, + Items: []integOrderItem{ + {Product: "widget", Tags: map[string]string{"color": "red"}, Ratings: [5]float32{4.5, 3.0, 5.0, 4.0, 3.5}}, + {Product: "gadget", Tags: map[string]string{"size": "large"}, Ratings: [5]float32{1.0, 2.0, 3.0, 4.0, 5.0}}, + }, + }, + { + ID: 1002, + Items: []integOrderItem{ + {Product: "thingamajig", Tags: map[string]string{"material": "steel", "finish": "matte"}, Ratings: [5]float32{5.0, 5.0, 5.0, 5.0, 5.0}}, + }, + }, + { + ID: 1003, + Items: nil, + }, + { + ID: 1004, + Items: []integOrderItem{ + {Product: "doohickey", Tags: map[string]string{"brand": "acme"}, Ratings: [5]float32{2.5, 3.5, 4.5, 1.5, 0.5}}, + {Product: "whatchamacallit", Tags: map[string]string{"type": "premium"}, Ratings: [5]float32{3.0, 3.0, 3.0, 3.0, 3.0}}, + {Product: "thingy", Tags: map[string]string{"category": "misc"}, Ratings: [5]float32{1.0, 1.0, 1.0, 1.0, 1.0}}, + }, + }, + { + ID: 1005, + Items: []integOrderItem{ + {Product: "sprocket", Tags: map[string]string{"grade": "A"}, Ratings: [5]float32{4.0, 4.0, 4.0, 4.0, 4.0}}, + }, + }, + } + + arr, err := FromGoSlice(orders, mem) + if err != nil { + t.Fatalf("FromGoSlice: %v", err) + } + defer arr.Release() + + output, err := ToGoSlice[integOrder](arr) + if err != nil { + t.Fatalf("ToGoSlice: %v", err) + } + + if len(output) != len(orders) { + t.Fatalf("length mismatch: got %d, want %d", len(output), len(orders)) + } + + for i, want := range orders { + got := output[i] + if got.ID != want.ID { + t.Errorf("[%d] ID: got %d, want %d", i, got.ID, want.ID) + } + if len(got.Items) != len(want.Items) { + t.Errorf("[%d] Items length: got %d, want %d", i, len(got.Items), len(want.Items)) + continue + } + for j, wantItem := range want.Items { + gotItem := got.Items[j] + if gotItem.Product != wantItem.Product { + t.Errorf("[%d][%d] Product: got %q, want %q", i, j, gotItem.Product, wantItem.Product) + } + if !reflect.DeepEqual(gotItem.Ratings, wantItem.Ratings) { + t.Errorf("[%d][%d] Ratings: got %v, want %v", i, j, gotItem.Ratings, wantItem.Ratings) + } + if !reflect.DeepEqual(gotItem.Tags, wantItem.Tags) { + t.Errorf("[%d][%d] Tags: got %v, want %v", i, j, gotItem.Tags, wantItem.Tags) + } + } + } + }) + + t.Run("large array round-trip", func(t *testing.T) { + const n = 10000 + rows := make([]integLargeRow, n) + for i := range rows { + rows[i] = integLargeRow{X: int32(i), Y: float64(i) * 1.5} + } + + arr, err := FromGoSlice(rows, mem) + if err != nil { + t.Fatalf("FromGoSlice: %v", err) + } + defer arr.Release() + + if arr.Len() != n { + t.Fatalf("array length: got %d, want %d", arr.Len(), n) + } + + output, err := ToGoSlice[integLargeRow](arr) + if err != nil { + t.Fatalf("ToGoSlice: %v", err) + } + + if len(output) != n { + t.Fatalf("output length: got %d, want %d", len(output), n) + } + for i, want := range rows { + if output[i].X != want.X || output[i].Y != want.Y { + t.Errorf("[%d] got %+v, want %+v", i, output[i], want) + } + } + }) + + t.Run("all-null fields", func(t *testing.T) { + rows := []integNullable{ + {A: nil, B: nil, C: nil}, + {A: nil, B: nil, C: nil}, + {A: nil, B: nil, C: nil}, + } + + arr, err := FromGoSlice(rows, mem) + if err != nil { + t.Fatalf("FromGoSlice: %v", err) + } + defer arr.Release() + + output, err := ToGoSlice[integNullable](arr) + if err != nil { + t.Fatalf("ToGoSlice: %v", err) + } + + if len(output) != 3 { + t.Fatalf("length: got %d, want 3", len(output)) + } + for i, got := range output { + if got.A != nil { + t.Errorf("[%d] A: expected nil, got non-nil", i) + } + if got.B != nil { + t.Errorf("[%d] B: expected nil, got non-nil", i) + } + if got.C != nil { + t.Errorf("[%d] C: expected nil, got non-nil", i) + } + } + }) + + t.Run("empty int32 slice", func(t *testing.T) { + arr, err := FromGoSlice[int32]([]int32{}, mem) + if err != nil { + t.Fatalf("FromGoSlice: %v", err) + } + defer arr.Release() + + if arr.Len() != 0 { + t.Errorf("array length: got %d, want 0", arr.Len()) + } + + output, err := ToGoSlice[int32](arr) + if err != nil { + t.Fatalf("ToGoSlice: %v", err) + } + if output == nil { + t.Error("ToGoSlice returned nil, want non-nil empty slice") + } + if len(output) != 0 { + t.Errorf("output length: got %d, want 0", len(output)) + } + }) + + t.Run("empty struct slice", func(t *testing.T) { + type simpleXY struct{ X int32 } + arr, err := FromGoSlice[simpleXY]([]simpleXY{}, mem) + if err != nil { + t.Fatalf("FromGoSlice empty struct: %v", err) + } + defer arr.Release() + + if arr.Len() != 0 { + t.Errorf("array length: got %d, want 0", arr.Len()) + } + if arr.DataType().ID() != arrow.STRUCT { + t.Errorf("expected STRUCT type for empty struct slice, got %v", arr.DataType()) + } + }) + + t.Run("mixed nullability round-trip", func(t *testing.T) { + s1 := "hello" + s2 := "world" + c1 := int32(42) + c3 := int32(99) + + rows := []integMixed{ + {Required: "first", Optional: &s1, Count: 10, MaybeCount: &c1}, + {Required: "second", Optional: nil, Count: 20, MaybeCount: nil}, + {Required: "third", Optional: &s2, Count: 30, MaybeCount: &c3}, + {Required: "fourth", Optional: nil, Count: 40, MaybeCount: nil}, + } + + arr, err := FromGoSlice(rows, mem) + if err != nil { + t.Fatalf("FromGoSlice: %v", err) + } + defer arr.Release() + + output, err := ToGoSlice[integMixed](arr) + if err != nil { + t.Fatalf("ToGoSlice: %v", err) + } + + if len(output) != len(rows) { + t.Fatalf("length: got %d, want %d", len(output), len(rows)) + } + + for i, want := range rows { + got := output[i] + if got.Required != want.Required { + t.Errorf("[%d] Required: got %q, want %q", i, got.Required, want.Required) + } + if got.Count != want.Count { + t.Errorf("[%d] Count: got %d, want %d", i, got.Count, want.Count) + } + if (got.Optional == nil) != (want.Optional == nil) { + t.Errorf("[%d] Optional nil mismatch: got nil=%v, want nil=%v", i, got.Optional == nil, want.Optional == nil) + } else if got.Optional != nil && *got.Optional != *want.Optional { + t.Errorf("[%d] Optional value: got %q, want %q", i, *got.Optional, *want.Optional) + } + if (got.MaybeCount == nil) != (want.MaybeCount == nil) { + t.Errorf("[%d] MaybeCount nil mismatch: got nil=%v, want nil=%v", i, got.MaybeCount == nil, want.MaybeCount == nil) + } else if got.MaybeCount != nil && *got.MaybeCount != *want.MaybeCount { + t.Errorf("[%d] MaybeCount value: got %d, want %d", i, *got.MaybeCount, *want.MaybeCount) + } + } + }) + + t.Run("embedded struct with tags", func(t *testing.T) { + rows := []integExtended{ + {integBase: integBase{ID: 1}, Name: "alice"}, + {integBase: integBase{ID: 2}, Name: "bob"}, + {integBase: integBase{ID: 3}, Name: "carol"}, + } + + arr, err := FromGoSlice(rows, mem) + if err != nil { + t.Fatalf("FromGoSlice: %v", err) + } + defer arr.Release() + + st, ok := arr.DataType().(*arrow.StructType) + if !ok { + t.Fatalf("expected StructType, got %T", arr.DataType()) + } + + var hasID, hasName, hasSkip bool + for i := 0; i < st.NumFields(); i++ { + switch st.Field(i).Name { + case "ID": + hasID = true + case "name": + hasName = true + case "Skip": + hasSkip = true + } + } + if !hasID { + t.Error("expected field 'ID' in schema") + } + if !hasName { + t.Error("expected field 'name' in schema") + } + if hasSkip { + t.Error("unexpected field 'Skip' in schema (should be skipped by arrow:\"-\" tag)") + } + + output, err := ToGoSlice[integExtended](arr) + if err != nil { + t.Fatalf("ToGoSlice: %v", err) + } + + if len(output) != len(rows) { + t.Fatalf("length: got %d, want %d", len(output), len(rows)) + } + for i, want := range rows { + got := output[i] + if got.ID != want.ID { + t.Errorf("[%d] ID: got %d, want %d", i, got.ID, want.ID) + } + if got.Name != want.Name { + t.Errorf("[%d] Name: got %q, want %q", i, got.Name, want.Name) + } + if got.Skip != "" { + t.Errorf("[%d] Skip: expected empty string, got %q", i, got.Skip) + } + } + }) + + t.Run("schema consistency", func(t *testing.T) { + orders := []integOrder{ + {ID: 1, Items: []integOrderItem{{Product: "a", Tags: map[string]string{"k": "v"}, Ratings: [5]float32{1, 2, 3, 4, 5}}}}, + } + + schema, err := InferArrowSchema[integOrder]() + if err != nil { + t.Fatalf("InferArrowSchema: %v", err) + } + + arr, err := FromGoSlice(orders, mem) + if err != nil { + t.Fatalf("FromGoSlice: %v", err) + } + defer arr.Release() + + st, ok := arr.DataType().(*arrow.StructType) + if !ok { + t.Fatalf("expected StructType, got %T", arr.DataType()) + } + + if st.NumFields() != schema.NumFields() { + t.Fatalf("field count mismatch: array has %d, schema has %d", st.NumFields(), schema.NumFields()) + } + + for i := 0; i < schema.NumFields(); i++ { + schemaField := schema.Field(i) + structField := st.Field(i) + if structField.Name != schemaField.Name { + t.Errorf("field[%d] name: array has %q, schema has %q", i, structField.Name, schemaField.Name) + } + } + }) + + t.Run("cache reuse without corruption", func(t *testing.T) { + batch1 := make([]integLargeRow, 3) + for i := range batch1 { + batch1[i] = integLargeRow{X: int32(i + 1), Y: float64(i+1) * 2.0} + } + + arr1, err := FromGoSlice(batch1, mem) + if err != nil { + t.Fatalf("FromGoSlice batch1: %v", err) + } + defer arr1.Release() + + batch2 := make([]integLargeRow, 5) + for i := range batch2 { + batch2[i] = integLargeRow{X: int32(i * 10), Y: float64(i) * 3.14} + } + + arr2, err := FromGoSlice(batch2, mem) + if err != nil { + t.Fatalf("FromGoSlice batch2: %v", err) + } + defer arr2.Release() + + out1, err := ToGoSlice[integLargeRow](arr1) + if err != nil { + t.Fatalf("ToGoSlice batch1: %v", err) + } + out2, err := ToGoSlice[integLargeRow](arr2) + if err != nil { + t.Fatalf("ToGoSlice batch2: %v", err) + } + + if len(out1) != len(batch1) { + t.Fatalf("batch1 length: got %d, want %d", len(out1), len(batch1)) + } + if len(out2) != len(batch2) { + t.Fatalf("batch2 length: got %d, want %d", len(out2), len(batch2)) + } + + for i, want := range batch1 { + if out1[i] != want { + t.Errorf("batch1[%d]: got %+v, want %+v", i, out1[i], want) + } + } + for i, want := range batch2 { + if out2[i] != want { + t.Errorf("batch2[%d]: got %+v, want %+v", i, out2[i], want) + } + } + }) + + t.Run("record batch round-trip", func(t *testing.T) { + rows := []integLargeRow{ + {X: 10, Y: 1.1}, + {X: 20, Y: 2.2}, + {X: 30, Y: 3.3}, + {X: 40, Y: 4.4}, + {X: 50, Y: 5.5}, + } + + rec, err := RecordFromSlice(rows, mem) + if err != nil { + t.Fatalf("RecordFromSlice: %v", err) + } + defer rec.Release() + + if rec.NumRows() != int64(len(rows)) { + t.Fatalf("NumRows: got %d, want %d", rec.NumRows(), len(rows)) + } + + output, err := RecordToSlice[integLargeRow](rec) + if err != nil { + t.Fatalf("RecordToSlice: %v", err) + } + + if len(output) != len(rows) { + t.Fatalf("output length: got %d, want %d", len(output), len(rows)) + } + + if !reflect.DeepEqual(rows, output) { + t.Errorf("record round-trip mismatch:\n got: %v\n want: %v", output, rows) + } + }) +} + +func BenchmarkReflectFromGoSlice(b *testing.B) { + mem := memory.NewGoAllocator() + rows := make([]integLargeRow, 1000) + for i := range rows { + rows[i] = integLargeRow{X: int32(i), Y: float64(i) * 1.5} + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + arr, err := FromGoSlice(rows, mem) + if err != nil { + b.Fatal(err) + } + arr.Release() + } +} + +func BenchmarkReflectToGoSlice(b *testing.B) { + mem := memory.NewGoAllocator() + rows := make([]integLargeRow, 1000) + for i := range rows { + rows[i] = integLargeRow{X: int32(i), Y: float64(i) * 1.5} + } + + arr, err := FromGoSlice(rows, mem) + if err != nil { + b.Fatal(err) + } + defer arr.Release() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + out, err := ToGoSlice[integLargeRow](arr) + if err != nil { + b.Fatal(err) + } + _ = out + } +} diff --git a/arrow/arreflect/reflect_public_test.go b/arrow/arreflect/reflect_public_test.go new file mode 100644 index 00000000..75a2d459 --- /dev/null +++ b/arrow/arreflect/reflect_public_test.go @@ -0,0 +1,430 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package arreflect + +import ( + "testing" + + "github.com/apache/arrow-go/v18/arrow" + "github.com/apache/arrow-go/v18/arrow/array" + "github.com/apache/arrow-go/v18/arrow/memory" +) + +func TestToGo(t *testing.T) { + mem := memory.NewGoAllocator() + + t.Run("int32 element 0", func(t *testing.T) { + b := array.NewInt32Builder(mem) + defer b.Release() + b.AppendValues([]int32{10, 20, 30}, nil) + arr := b.NewInt32Array() + defer arr.Release() + + got, err := ToGo[int32](arr, 0) + if err != nil { + t.Fatal(err) + } + if got != 10 { + t.Errorf("expected 10, got %d", got) + } + }) + + t.Run("string element 1", func(t *testing.T) { + b := array.NewStringBuilder(mem) + defer b.Release() + b.AppendValues([]string{"hello", "world"}, nil) + arr := b.NewStringArray() + defer arr.Release() + + got, err := ToGo[string](arr, 1) + if err != nil { + t.Fatal(err) + } + if got != "world" { + t.Errorf("expected world, got %q", got) + } + }) + + t.Run("struct element 0", func(t *testing.T) { + type Person struct { + Name string + Age int32 + } + vals := []Person{{"Alice", 30}, {"Bob", 25}} + arr, err := FromGoSlice(vals, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + + got, err := ToGo[Person](arr, 0) + if err != nil { + t.Fatal(err) + } + if got.Name != "Alice" || got.Age != 30 { + t.Errorf("expected {Alice 30}, got %+v", got) + } + }) + + t.Run("null element to *int32 is nil", func(t *testing.T) { + b := array.NewInt32Builder(mem) + defer b.Release() + b.AppendNull() + arr := b.NewInt32Array() + defer arr.Release() + + got, err := ToGo[*int32](arr, 0) + if err != nil { + t.Fatal(err) + } + if got != nil { + t.Errorf("expected nil pointer for null, got %v", *got) + } + }) + + t.Run("null element to int32 is zero", func(t *testing.T) { + b := array.NewInt32Builder(mem) + defer b.Release() + b.AppendNull() + arr := b.NewInt32Array() + defer arr.Release() + + got, err := ToGo[int32](arr, 0) + if err != nil { + t.Fatal(err) + } + if got != 0 { + t.Errorf("expected 0 for null, got %d", got) + } + }) +} + +func TestToGoSlice(t *testing.T) { + mem := memory.NewGoAllocator() + + t.Run("[]int32", func(t *testing.T) { + b := array.NewInt32Builder(mem) + defer b.Release() + b.AppendValues([]int32{1, 2, 3}, nil) + arr := b.NewInt32Array() + defer arr.Release() + + got, err := ToGoSlice[int32](arr) + if err != nil { + t.Fatal(err) + } + want := []int32{1, 2, 3} + if len(got) != len(want) { + t.Fatalf("expected len %d, got %d", len(want), len(got)) + } + for i, v := range want { + if got[i] != v { + t.Errorf("index %d: expected %d, got %d", i, v, got[i]) + } + } + }) + + t.Run("[]string", func(t *testing.T) { + b := array.NewStringBuilder(mem) + defer b.Release() + b.AppendValues([]string{"foo", "bar", "baz"}, nil) + arr := b.NewStringArray() + defer arr.Release() + + got, err := ToGoSlice[string](arr) + if err != nil { + t.Fatal(err) + } + want := []string{"foo", "bar", "baz"} + if len(got) != len(want) { + t.Fatalf("expected len %d, got %d", len(want), len(got)) + } + for i, v := range want { + if got[i] != v { + t.Errorf("index %d: expected %q, got %q", i, v, got[i]) + } + } + }) + + t.Run("[]struct{Name string}", func(t *testing.T) { + type Row struct { + Name string + } + vals := []Row{{"Alice"}, {"Bob"}, {"Charlie"}} + arr, err := FromGoSlice(vals, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + + got, err := ToGoSlice[Row](arr) + if err != nil { + t.Fatal(err) + } + if len(got) != len(vals) { + t.Fatalf("expected len %d, got %d", len(vals), len(got)) + } + for i, want := range vals { + if got[i].Name != want.Name { + t.Errorf("index %d: expected %q, got %q", i, want.Name, got[i].Name) + } + } + }) + + t.Run("empty array gives empty slice", func(t *testing.T) { + b := array.NewInt32Builder(mem) + defer b.Release() + arr := b.NewInt32Array() + defer arr.Release() + + got, err := ToGoSlice[int32](arr) + if err != nil { + t.Fatal(err) + } + if got == nil { + t.Error("expected non-nil empty slice, got nil") + } + if len(got) != 0 { + t.Errorf("expected len 0, got %d", len(got)) + } + }) +} + +func TestFromGoSlice(t *testing.T) { + mem := memory.NewGoAllocator() + + t.Run("[]int32", func(t *testing.T) { + arr, err := FromGoSlice([]int32{1, 2, 3}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + + if arr.Len() != 3 { + t.Fatalf("expected len 3, got %d", arr.Len()) + } + typed := arr.(*array.Int32) + for i, want := range []int32{1, 2, 3} { + if typed.Value(i) != want { + t.Errorf("index %d: expected %d, got %d", i, want, typed.Value(i)) + } + } + }) + + t.Run("[]string", func(t *testing.T) { + arr, err := FromGoSlice([]string{"a", "b"}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + + if arr.Len() != 2 { + t.Fatalf("expected len 2, got %d", arr.Len()) + } + typed := arr.(*array.String) + if typed.Value(0) != "a" || typed.Value(1) != "b" { + t.Errorf("expected [a b], got [%s %s]", typed.Value(0), typed.Value(1)) + } + }) + + t.Run("[]struct{Name string; Score float64}", func(t *testing.T) { + type Row struct { + Name string + Score float64 + } + vals := []Row{{"Alice", 9.5}, {"Bob", 8.0}} + arr, err := FromGoSlice(vals, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + + if arr.Len() != 2 { + t.Fatalf("expected len 2, got %d", arr.Len()) + } + got, err := ToGoSlice[Row](arr) + if err != nil { + t.Fatal(err) + } + for i, want := range vals { + if got[i].Name != want.Name || got[i].Score != want.Score { + t.Errorf("index %d: expected %+v, got %+v", i, want, got[i]) + } + } + }) + + t.Run("[]*int32 with nil produces null", func(t *testing.T) { + v := int32(42) + arr, err := FromGoSlice([]*int32{&v, nil}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + + if arr.Len() != 2 { + t.Fatalf("expected len 2, got %d", arr.Len()) + } + if arr.IsNull(1) == false { + t.Error("expected index 1 to be null") + } + typed := arr.(*array.Int32) + if typed.Value(0) != 42 { + t.Errorf("expected 42 at index 0, got %d", typed.Value(0)) + } + }) + + t.Run("empty []int32 gives length-0 array", func(t *testing.T) { + arr, err := FromGoSlice([]int32{}, mem) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + + if arr.Len() != 0 { + t.Errorf("expected len 0, got %d", arr.Len()) + } + }) +} + +func TestRecordToSlice(t *testing.T) { + mem := memory.NewGoAllocator() + + type Row struct { + Name string + Score float64 + } + + buildRecord := func(rows []Row) arrow.Record { + schema := arrow.NewSchema([]arrow.Field{ + {Name: "Name", Type: arrow.BinaryTypes.String}, + {Name: "Score", Type: arrow.PrimitiveTypes.Float64}, + }, nil) + nameB := array.NewStringBuilder(mem) + defer nameB.Release() + scoreB := array.NewFloat64Builder(mem) + defer scoreB.Release() + for _, r := range rows { + nameB.Append(r.Name) + scoreB.Append(r.Score) + } + nameArr := nameB.NewStringArray() + defer nameArr.Release() + scoreArr := scoreB.NewFloat64Array() + defer scoreArr.Release() + return array.NewRecord(schema, []arrow.Array{nameArr, scoreArr}, int64(len(rows))) + } + + t.Run("basic 3-row record", func(t *testing.T) { + want := []Row{{"Alice", 9.5}, {"Bob", 8.0}, {"Carol", 7.5}} + rec := buildRecord(want) + defer rec.Release() + + got, err := RecordToSlice[Row](rec) + if err != nil { + t.Fatal(err) + } + if len(got) != len(want) { + t.Fatalf("expected len %d, got %d", len(want), len(got)) + } + for i, w := range want { + if got[i].Name != w.Name || got[i].Score != w.Score { + t.Errorf("index %d: expected %+v, got %+v", i, w, got[i]) + } + } + }) + + t.Run("empty record gives empty slice", func(t *testing.T) { + rec := buildRecord(nil) + defer rec.Release() + + got, err := RecordToSlice[Row](rec) + if err != nil { + t.Fatal(err) + } + if len(got) != 0 { + t.Errorf("expected empty slice, got len %d", len(got)) + } + }) +} + +func TestRecordFromSlice(t *testing.T) { + mem := memory.NewGoAllocator() + + type Row struct { + Name string + Score float64 + } + + t.Run("struct slice produces correct schema and values", func(t *testing.T) { + vals := []Row{{"Alice", 9.5}, {"Bob", 8.0}} + rec, err := RecordFromSlice(vals, mem) + if err != nil { + t.Fatal(err) + } + defer rec.Release() + + if rec.NumCols() != 2 { + t.Fatalf("expected 2 cols, got %d", rec.NumCols()) + } + if rec.NumRows() != 2 { + t.Fatalf("expected 2 rows, got %d", rec.NumRows()) + } + if rec.Schema().Field(0).Name != "Name" { + t.Errorf("expected col 0 name 'Name', got %q", rec.Schema().Field(0).Name) + } + if rec.Schema().Field(1).Name != "Score" { + t.Errorf("expected col 1 name 'Score', got %q", rec.Schema().Field(1).Name) + } + nameCol := rec.Column(0).(*array.String) + if nameCol.Value(0) != "Alice" || nameCol.Value(1) != "Bob" { + t.Errorf("unexpected name values: %q %q", nameCol.Value(0), nameCol.Value(1)) + } + scoreCol := rec.Column(1).(*array.Float64) + if scoreCol.Value(0) != 9.5 || scoreCol.Value(1) != 8.0 { + t.Errorf("unexpected score values: %v %v", scoreCol.Value(0), scoreCol.Value(1)) + } + }) + + t.Run("non-struct T returns error", func(t *testing.T) { + _, err := RecordFromSlice([]int32{1, 2, 3}, mem) + if err == nil { + t.Fatal("expected error for non-struct T, got nil") + } + }) + + t.Run("round-trip RecordFromSlice then RecordToSlice", func(t *testing.T) { + want := []Row{{"Alice", 9.5}, {"Bob", 8.0}, {"Carol", 7.5}} + rec, err := RecordFromSlice(want, mem) + if err != nil { + t.Fatal(err) + } + defer rec.Release() + + got, err := RecordToSlice[Row](rec) + if err != nil { + t.Fatal(err) + } + if len(got) != len(want) { + t.Fatalf("expected len %d, got %d", len(want), len(got)) + } + for i, w := range want { + if got[i].Name != w.Name || got[i].Score != w.Score { + t.Errorf("index %d: expected %+v, got %+v", i, w, got[i]) + } + } + }) +} diff --git a/arrow/arreflect/reflect_test.go b/arrow/arreflect/reflect_test.go new file mode 100644 index 00000000..87a4b807 --- /dev/null +++ b/arrow/arreflect/reflect_test.go @@ -0,0 +1,295 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package arreflect + +import ( + "reflect" + "testing" +) + +func TestParseTag(t *testing.T) { + tests := []struct { + input string + want tagOpts + }{ + { + input: "custom_name", + want: tagOpts{Name: "custom_name"}, + }, + { + input: "-", + want: tagOpts{Skip: true}, + }, + { + input: "-,", + want: tagOpts{Name: "-"}, + }, + { + input: "", + want: tagOpts{}, + }, + { + input: "name,dict", + want: tagOpts{Name: "name", Dict: true}, + }, + { + input: "name,listview", + want: tagOpts{Name: "name", ListView: true}, + }, + { + input: "name,ree", + want: tagOpts{Name: "name", REE: true}, + }, + { + input: "name,decimal(38,10)", + want: tagOpts{Name: "name", HasDecimalOpts: true, DecimalPrecision: 38, DecimalScale: 10}, + }, + { + input: ",decimal(18,2)", + want: tagOpts{Name: "", HasDecimalOpts: true, DecimalPrecision: 18, DecimalScale: 2}, + }, + { + input: "name,dict,ree", + want: tagOpts{Name: "name", Dict: true, REE: true}, + }, + { + input: "name,unknown_option", + want: tagOpts{Name: "name"}, + }, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + got := parseTag(tt.input) + if got != tt.want { + t.Errorf("parseTag(%q) = %+v, want %+v", tt.input, got, tt.want) + } + }) + } +} + +func TestGetStructFields(t *testing.T) { + t.Run("simple struct", func(t *testing.T) { + type Simple struct { + Name string + Age int32 + } + fields := getStructFields(reflect.TypeOf(Simple{})) + if len(fields) != 2 { + t.Fatalf("expected 2 fields, got %d", len(fields)) + } + if fields[0].Name != "Name" { + t.Errorf("fields[0].Name = %q, want %q", fields[0].Name, "Name") + } + if fields[1].Name != "Age" { + t.Errorf("fields[1].Name = %q, want %q", fields[1].Name, "Age") + } + }) + + t.Run("struct with arrow tags", func(t *testing.T) { + type Tagged struct { + UserName string `arrow:"user_name"` + Score float64 `arrow:"score"` + Internal string `arrow:"-"` + } + fields := getStructFields(reflect.TypeOf(Tagged{})) + if len(fields) != 2 { + t.Fatalf("expected 2 fields, got %d: %v", len(fields), fields) + } + if fields[0].Name != "user_name" { + t.Errorf("fields[0].Name = %q, want %q", fields[0].Name, "user_name") + } + if fields[1].Name != "score" { + t.Errorf("fields[1].Name = %q, want %q", fields[1].Name, "score") + } + }) + + t.Run("unexported fields skipped", func(t *testing.T) { + type Mixed struct { + Exported string + unexported string //nolint:unused + } + fields := getStructFields(reflect.TypeOf(Mixed{})) + if len(fields) != 1 { + t.Fatalf("expected 1 field, got %d", len(fields)) + } + if fields[0].Name != "Exported" { + t.Errorf("fields[0].Name = %q, want %q", fields[0].Name, "Exported") + } + }) + + t.Run("pointer fields are nullable", func(t *testing.T) { + type WithPointers struct { + Required string + Optional *string + } + fields := getStructFields(reflect.TypeOf(WithPointers{})) + if len(fields) != 2 { + t.Fatalf("expected 2 fields, got %d", len(fields)) + } + if fields[0].Nullable { + t.Errorf("Required.Nullable = true, want false") + } + if !fields[1].Nullable { + t.Errorf("Optional.Nullable = false, want true") + } + }) + + t.Run("embedded struct promotion", func(t *testing.T) { + type Inner struct { + City string + Zip int32 + } + type Outer struct { + Name string + Inner + } + fields := getStructFields(reflect.TypeOf(Outer{})) + if len(fields) != 3 { + t.Fatalf("expected 3 fields, got %d: %v", len(fields), fields) + } + names := make([]string, len(fields)) + for i, f := range fields { + names[i] = f.Name + } + wantNames := []string{"Name", "City", "Zip"} + for i, want := range wantNames { + if names[i] != want { + t.Errorf("fields[%d].Name = %q, want %q", i, names[i], want) + } + } + }) + + t.Run("embedded struct conflict excluded", func(t *testing.T) { + type A struct{ ID string } + type B struct{ ID string } + type Conflicted struct { + A + B + } + fields := getStructFields(reflect.TypeOf(Conflicted{})) + if len(fields) != 0 { + t.Errorf("expected 0 fields due to conflict, got %d: %v", len(fields), fields) + } + }) + + t.Run("embedded with tag overrides promotion", func(t *testing.T) { + type Inner struct { + City string + Zip int32 + } + type HasTag struct { + Inner `arrow:"inner_struct"` + } + fields := getStructFields(reflect.TypeOf(HasTag{})) + if len(fields) != 1 { + t.Fatalf("expected 1 field, got %d: %v", len(fields), fields) + } + if fields[0].Name != "inner_struct" { + t.Errorf("fields[0].Name = %q, want %q", fields[0].Name, "inner_struct") + } + }) +} + +func TestCachedStructFields(t *testing.T) { + type S struct { + X int32 + Y string + } + + fields1 := cachedStructFields(reflect.TypeOf(S{})) + fields2 := cachedStructFields(reflect.TypeOf(S{})) + + if len(fields1) != len(fields2) { + t.Fatalf("cached call returned different lengths: %d vs %d", len(fields1), len(fields2)) + } + + for i := range fields1 { + if fields1[i].Name != fields2[i].Name { + t.Errorf("fields[%d].Name mismatch: %q vs %q", i, fields1[i].Name, fields2[i].Name) + } + } + + if len(fields1) != 2 { + t.Fatalf("expected 2 fields, got %d", len(fields1)) + } + if fields1[0].Name != "X" { + t.Errorf("fields1[0].Name = %q, want %q", fields1[0].Name, "X") + } + if fields1[1].Name != "Y" { + t.Errorf("fields1[1].Name = %q, want %q", fields1[1].Name, "Y") + } +} + +// ── shared test types used across reflect test files ────────────────────────── + +type testPrimitive struct { + I8 int8 + I16 int16 + I32 int32 + I64 int64 + U8 uint8 + U16 uint16 + U32 uint32 + U64 uint64 + F32 float32 + F64 float64 + B bool + S string + Blob []byte +} + +type testNested struct { + Name string + Scores []float64 + Tags map[string]string + Address struct { + City string + Zip int32 + } +} + +type testNullable struct { + Required string + Optional *string + MaybeInt *int32 +} + +type testEmbedded struct { + ID string + testEmbeddedInner +} + +type testEmbeddedInner struct { //nolint:unused + City string + Code int32 +} + +type testTagged struct { + UserName string `arrow:"user_name"` + Score float64 `arrow:"score"` + Hidden string `arrow:"-"` +} + +func TestHelpers(t *testing.T) { + // Verify shared test types are usable + _ = testPrimitive{I8: 1, I32: 2, S: "hi"} + _ = testNested{Name: "n", Scores: []float64{1.0}} + _ = testNullable{Required: "r"} + _ = testTagged{UserName: "u", Score: 3.14} + _ = testEmbedded{ID: "id"} +} From 429120ed4dbc00cea3cfb6b2bc3f7d79b01e3eaf Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 7 Apr 2026 13:12:30 -0700 Subject: [PATCH 02/82] fix(arreflect): address code review findings from roborev #849 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - setListValue: null Arrow list now yields nil Go slice (was empty non-nil) - getStructFields: collapse two BFS passes into one with inline order tracking - inferPrimitiveArrowType: map int→INT64 and uint→UINT64 (was unsupported error) - tagOpts: add Temporal field for date32/date64/time32/time64 tag options, allowing time.Time struct fields to infer non-Timestamp Arrow types - appendToDictBuilder: fix error message to report value kind, not builder type - buildDictionaryArray: remove intermediate array allocation; iterate directly with appendToDictBuilder per element - TestSetListValue: add LargeListView subtest for LARGE_LIST_VIEW coverage --- arrow/arreflect/doc.go | 8 ++ arrow/arreflect/reflect.go | 99 ++++----------------- arrow/arreflect/reflect_arrow_to_go.go | 8 +- arrow/arreflect/reflect_arrow_to_go_test.go | 29 ++++++ arrow/arreflect/reflect_go_to_arrow.go | 24 +++-- arrow/arreflect/reflect_infer.go | 19 ++++ arrow/arreflect/reflect_infer_test.go | 16 ++++ 7 files changed, 107 insertions(+), 96 deletions(-) diff --git a/arrow/arreflect/doc.go b/arrow/arreflect/doc.go index c4ba67a0..2605f449 100644 --- a/arrow/arreflect/doc.go +++ b/arrow/arreflect/doc.go @@ -30,5 +30,13 @@ // Score float64 `arrow:"score"` // Skip string `arrow:"-"` // Enc string `arrow:"enc,dict"` +// T32 time.Time `arrow:"t32,time32"` // } +// +// Temporal type overrides for time.Time fields: +// +// arrow:"field,date32" — use Date32 instead of Timestamp +// arrow:"field,date64" — use Date64 instead of Timestamp +// arrow:"field,time32" — use Time32(ms) instead of Timestamp +// arrow:"field,time64" — use Time64(ns) instead of Timestamp package arreflect diff --git a/arrow/arreflect/reflect.go b/arrow/arreflect/reflect.go index 47f54f58..01d41a84 100644 --- a/arrow/arreflect/reflect.go +++ b/arrow/arreflect/reflect.go @@ -37,6 +37,7 @@ type tagOpts struct { DecimalPrecision int32 DecimalScale int32 HasDecimalOpts bool + Temporal string // "timestamp" (default), "date32", "date64", "time32", "time64" } type fieldMeta struct { @@ -114,6 +115,8 @@ func parseOptions(opts *tagOpts, rest string) { opts.ListView = true case "ree": opts.REE = true + case "date32", "date64", "time32", "time64", "timestamp": + opts.Temporal = token } } } @@ -158,12 +161,13 @@ func getStructFields(t reflect.Type) []fieldMeta { meta fieldMeta depth int tagged bool + order int } nameMap := make(map[string][]candidate) + orderCounter := 0 queue := []bfsEntry{{t: t, index: nil, depth: 0}} - visited := make(map[reflect.Type]bool) for len(queue) > 0 { @@ -227,7 +231,6 @@ func getStructFields(t reflect.Type) []fieldMeta { } nullable := sf.Type.Kind() == reflect.Ptr - tagged := hasTag && opts.Name != "" meta := fieldMeta{ @@ -238,10 +241,20 @@ func getStructFields(t reflect.Type) []fieldMeta { Opts: opts, } - nameMap[arrowName] = append(nameMap[arrowName], candidate{ + // Assign insertion order on first encounter of this name. + existingCands := nameMap[arrowName] + order := orderCounter + if len(existingCands) > 0 { + order = existingCands[0].order + } else { + orderCounter++ + } + + nameMap[arrowName] = append(existingCands, candidate{ meta: meta, depth: entry.depth, tagged: tagged, + order: order, }) } } @@ -251,83 +264,8 @@ func getStructFields(t reflect.Type) []fieldMeta { order int } - nameOrder := make(map[string]int) - orderCounter := 0 - - type bfsEntry2 struct { - t reflect.Type - index []int - depth int - } - queue2 := []bfsEntry2{{t: t, index: nil, depth: 0}} - visited2 := make(map[reflect.Type]bool) - - for len(queue2) > 0 { - entry := queue2[0] - queue2 = queue2[1:] - - st := entry.t - for st.Kind() == reflect.Ptr { - st = st.Elem() - } - if st.Kind() != reflect.Struct { - continue - } - if entry.depth > 0 { - if visited2[st] { - continue - } - visited2[st] = true - } - - for i := 0; i < st.NumField(); i++ { - sf := st.Field(i) - fullIndex := make([]int, len(entry.index)+1) - copy(fullIndex, entry.index) - fullIndex[len(entry.index)] = i - - if !sf.IsExported() && !sf.Anonymous { - continue - } - - tagVal, hasTag := sf.Tag.Lookup("arrow") - var opts tagOpts - if hasTag { - opts = parseTag(tagVal) - } - if opts.Skip { - continue - } - - arrowName := opts.Name - if arrowName == "" { - arrowName = sf.Name - } - - if sf.Anonymous && !hasTag { - ft := sf.Type - for ft.Kind() == reflect.Ptr { - ft = ft.Elem() - } - if ft.Kind() == reflect.Struct { - queue2 = append(queue2, bfsEntry2{ - t: ft, - index: fullIndex, - depth: entry.depth + 1, - }) - continue - } - } - - if _, seen := nameOrder[arrowName]; !seen { - nameOrder[arrowName] = orderCounter - orderCounter++ - } - } - } - resolved := make([]resolvedField, 0, len(nameMap)) - for name, candidates := range nameMap { + for _, candidates := range nameMap { minDepth := candidates[0].depth for _, c := range candidates[1:] { if c.depth < minDepth { @@ -358,8 +296,7 @@ func getStructFields(t reflect.Type) []fieldMeta { } if winner != nil { - order := nameOrder[name] - resolved = append(resolved, resolvedField{meta: winner.meta, order: order}) + resolved = append(resolved, resolvedField{meta: winner.meta, order: winner.order}) } } diff --git a/arrow/arreflect/reflect_arrow_to_go.go b/arrow/arreflect/reflect_arrow_to_go.go index cf0e5dfa..1d39b875 100644 --- a/arrow/arreflect/reflect_arrow_to_go.go +++ b/arrow/arreflect/reflect_arrow_to_go.go @@ -388,13 +388,7 @@ func setStructValue(v reflect.Value, sa *array.Struct, i int) error { func setListValue(v reflect.Value, arr array.ListLike, i int) error { if arr.IsNull(i) { - if v.Kind() == reflect.Ptr { - v.Set(reflect.Zero(v.Type())) - return nil - } - if v.Kind() == reflect.Slice { - v.Set(reflect.MakeSlice(v.Type(), 0, 0)) - } + v.Set(reflect.Zero(v.Type())) return nil } if v.Kind() == reflect.Ptr { diff --git a/arrow/arreflect/reflect_arrow_to_go_test.go b/arrow/arreflect/reflect_arrow_to_go_test.go index f7886e2b..10e384db 100644 --- a/arrow/arreflect/reflect_arrow_to_go_test.go +++ b/arrow/arreflect/reflect_arrow_to_go_test.go @@ -588,6 +588,35 @@ func TestSetListValue(t *testing.T) { t.Errorf("expected [30], got %v", got[1]) } }) + + t.Run("large list view of int32", func(t *testing.T) { + lvb := array.NewLargeListViewBuilder(mem, arrow.PrimitiveTypes.Int32) + defer lvb.Release() + vb := lvb.ValueBuilder().(*array.Int32Builder) + + lvb.AppendWithSize(true, 2) + vb.AppendValues([]int32{1, 2}, nil) + lvb.AppendWithSize(true, 1) + vb.AppendValues([]int32{3}, nil) + + arr := lvb.NewLargeListViewArray() + defer arr.Release() + + var got []int32 + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(got, []int32{1, 2}) { + t.Errorf("row 0: expected [1,2], got %v", got) + } + + if err := setValue(reflect.ValueOf(&got).Elem(), arr, 1); err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(got, []int32{3}) { + t.Errorf("row 1: expected [3], got %v", got) + } + }) } func TestSetMapValue(t *testing.T) { diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index b4378f11..89b272cd 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -503,6 +503,8 @@ func appendToDictBuilder(db array.DictionaryBuilder, v reflect.Value) error { return nil } return bdb.Append(v.Bytes()) + default: + return fmt.Errorf("appendToDictBuilder: unsupported value kind %v for BinaryDictionaryBuilder", v.Kind()) } case *array.Int8DictionaryBuilder: return bdb.Append(int8(v.Int())) @@ -681,6 +683,7 @@ func buildFixedSizeListArray(vals reflect.Value, mem memory.Allocator) (arrow.Ar } func buildDictionaryArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { + n := vals.Len() elemType := vals.Type().Elem() for elemType.Kind() == reflect.Ptr { elemType = elemType.Elem() @@ -698,16 +701,21 @@ func buildDictionaryArray(vals reflect.Value, mem memory.Allocator) (arrow.Array db := array.NewDictionaryBuilder(mem, dt) defer db.Release() - rawArr, err := buildPrimitiveArray(vals, mem) - if err != nil { - return nil, fmt.Errorf("buildDictionaryArray: building raw values: %w", err) - } - defer rawArr.Release() + isPtr := vals.Type().Elem().Kind() == reflect.Ptr - if err := db.AppendArray(rawArr); err != nil { - return nil, fmt.Errorf("buildDictionaryArray: AppendArray: %w", err) + for i := 0; i < n; i++ { + elem := vals.Index(i) + if isPtr { + if elem.IsNil() { + db.AppendNull() + continue + } + elem = elem.Elem() + } + if err := appendToDictBuilder(db, elem); err != nil { + return nil, fmt.Errorf("buildDictionaryArray[%d]: %w", i, err) + } } - return db.NewArray(), nil } diff --git a/arrow/arreflect/reflect_infer.go b/arrow/arreflect/reflect_infer.go index 2df6196a..01661bcc 100644 --- a/arrow/arreflect/reflect_infer.go +++ b/arrow/arreflect/reflect_infer.go @@ -35,6 +35,8 @@ var ( typeOfDec128 = reflect.TypeOf(decimal128.Num{}) typeOfDec256 = reflect.TypeOf(decimal256.Num{}) typeOfByteSlice = reflect.TypeOf([]byte{}) + typeOfInt = reflect.TypeOf(int(0)) + typeOfUint = reflect.TypeOf(uint(0)) ) func inferPrimitiveArrowType(t reflect.Type) (arrow.DataType, error) { @@ -51,6 +53,8 @@ func inferPrimitiveArrowType(t reflect.Type) (arrow.DataType, error) { return arrow.PrimitiveTypes.Int32, nil case reflect.TypeOf(int64(0)): return arrow.PrimitiveTypes.Int64, nil + case typeOfInt: + return arrow.PrimitiveTypes.Int64, nil case reflect.TypeOf(uint8(0)): return arrow.PrimitiveTypes.Uint8, nil case reflect.TypeOf(uint16(0)): @@ -59,6 +63,8 @@ func inferPrimitiveArrowType(t reflect.Type) (arrow.DataType, error) { return arrow.PrimitiveTypes.Uint32, nil case reflect.TypeOf(uint64(0)): return arrow.PrimitiveTypes.Uint64, nil + case typeOfUint: + return arrow.PrimitiveTypes.Uint64, nil case reflect.TypeOf(float32(0)): return arrow.PrimitiveTypes.Float32, nil case reflect.TypeOf(float64(0)): @@ -170,6 +176,19 @@ func inferStructType(t reflect.Type) (*arrow.StructType, error) { } } + if origType == typeOfTime && fm.Opts.Temporal != "" { + switch fm.Opts.Temporal { + case "date32": + dt = arrow.FixedWidthTypes.Date32 + case "date64": + dt = arrow.FixedWidthTypes.Date64 + case "time32": + dt = &arrow.Time32Type{Unit: arrow.Millisecond} + case "time64": + dt = &arrow.Time64Type{Unit: arrow.Nanosecond} + } + } + arrowFields = append(arrowFields, arrow.Field{ Name: fm.Name, Type: dt, diff --git a/arrow/arreflect/reflect_infer_test.go b/arrow/arreflect/reflect_infer_test.go index 17262eab..994bb93c 100644 --- a/arrow/arreflect/reflect_infer_test.go +++ b/arrow/arreflect/reflect_infer_test.go @@ -38,10 +38,12 @@ func TestInferPrimitiveArrowType(t *testing.T) { {"int16", reflect.TypeOf(int16(0)), arrow.INT16, false}, {"int32", reflect.TypeOf(int32(0)), arrow.INT32, false}, {"int64", reflect.TypeOf(int64(0)), arrow.INT64, false}, + {"int", reflect.TypeOf(int(0)), arrow.INT64, false}, {"uint8", reflect.TypeOf(uint8(0)), arrow.UINT8, false}, {"uint16", reflect.TypeOf(uint16(0)), arrow.UINT16, false}, {"uint32", reflect.TypeOf(uint32(0)), arrow.UINT32, false}, {"uint64", reflect.TypeOf(uint64(0)), arrow.UINT64, false}, + {"uint", reflect.TypeOf(uint(0)), arrow.UINT64, false}, {"float32", reflect.TypeOf(float32(0)), arrow.FLOAT32, false}, {"float64", reflect.TypeOf(float64(0)), arrow.FLOAT64, false}, {"bool", reflect.TypeOf(false), arrow.BOOL, false}, @@ -289,6 +291,20 @@ func TestInferStructType(t *testing.T) { t.Error("expected error for non-struct, got nil") } }) + + t.Run("time.Time with date32 tag maps to DATE32", func(t *testing.T) { + type S struct { + Ts time.Time `arrow:",date32"` + } + st, err := inferStructType(reflect.TypeOf(S{})) + if err != nil { + t.Fatal(err) + } + dt := st.Field(0).Type + if dt.ID() != arrow.DATE32 { + t.Errorf("got %v, want DATE32", dt.ID()) + } + }) } func TestInferArrowSchema(t *testing.T) { From 12e024f429454a404f8a08b1fa7388dc4fe4706d Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 7 Apr 2026 13:16:01 -0700 Subject: [PATCH 03/82] fix(arreflect): use reflect.Value.Equal for REE run detection on comparable types Replace reflect.DeepEqual with reflect.Value.Equal for comparable element types in buildRunEndEncodedArray. This avoids the interface{} boxing cost and is safe for all comparable types. Falls back to reflect.DeepEqual for non-comparable element types (slices, maps), with a note that function-valued elements cannot be run-compressed. --- arrow/arreflect/reflect_go_to_arrow.go | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index 89b272cd..8a0cc197 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -739,11 +739,27 @@ func buildRunEndEncodedArray(vals reflect.Value, mem memory.Allocator) (arrow.Ar val reflect.Value } + // For comparable element types use reflect.Value.Equal (fast, avoids boxing). + // For non-comparable types (e.g. slices, maps) fall back to reflect.DeepEqual, + // which handles structural equality but cannot compress runs of function values. + elemType := vals.Type().Elem() + for elemType.Kind() == reflect.Ptr { + elemType = elemType.Elem() + } + comparable := elemType.Comparable() + + equal := func(a, b reflect.Value) bool { + if comparable { + return a.Equal(b) + } + return reflect.DeepEqual(a.Interface(), b.Interface()) + } + var runs []run current := vals.Index(0) for i := 1; i < vals.Len(); i++ { next := vals.Index(i) - if !reflect.DeepEqual(current.Interface(), next.Interface()) { + if !equal(current, next) { runs = append(runs, run{end: int32(i), val: current}) current = next } From e2a7e6673b8f5886589262612fcaeabb0da758cd Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 7 Apr 2026 13:22:06 -0700 Subject: [PATCH 04/82] fix(arreflect): complete temporal-tag write path and add roundtrip tests - appendValue: add Date32Builder, Date64Builder, Time32Builder, Time64Builder cases so temporal-tagged time.Time struct fields can be written to Arrow (previously fell through to unsupported-builder-type error) - buildTemporalArray: accept tagOpts and branch on opts.Temporal, allowing top-level []time.Time slices to produce Date32/Date64/Time32/Time64 arrays when an explicit temporal tag is provided - TestBuildTemporalTaggedArray: add roundtrip subtests for date32, date64, time32, and time64 to prevent regressions in the write path --- arrow/arreflect/reflect_go_to_arrow.go | 113 +++++++++++++++++--- arrow/arreflect/reflect_go_to_arrow_test.go | 91 ++++++++++++++++ 2 files changed, 187 insertions(+), 17 deletions(-) diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index 8a0cc197..944770eb 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -65,9 +65,9 @@ func buildArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.A case reflect.Struct: switch elemType { case typeOfTime: - return buildTemporalArray(vals, mem) + return buildTemporalArray(vals, opts, mem) case typeOfDuration: - return buildTemporalArray(vals, mem) + return buildTemporalArray(vals, opts, mem) case typeOfDec128: return buildDecimalArray(vals, opts, mem) case typeOfDec256: @@ -168,7 +168,7 @@ func appendPrimitiveValue(b array.Builder, v reflect.Value, dt arrow.DataType) e return nil } -func buildTemporalArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { +func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.Array, error) { elemType := vals.Type().Elem() for elemType.Kind() == reflect.Ptr { elemType = elemType.Elem() @@ -178,23 +178,92 @@ func buildTemporalArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, switch elemType { case typeOfTime: - dt := &arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "UTC"} - tb := array.NewTimestampBuilder(mem, dt) - defer tb.Release() - tb.Reserve(vals.Len()) - for i := 0; i < vals.Len(); i++ { - v := vals.Index(i) - if isPtr { - if v.IsNil() { - tb.AppendNull() - continue + switch opts.Temporal { + case "date32": + b := array.NewDate32Builder(mem) + defer b.Release() + b.Reserve(vals.Len()) + for i := 0; i < vals.Len(); i++ { + v := vals.Index(i) + if isPtr { + if v.IsNil() { + b.AppendNull() + continue + } + v = v.Elem() } - v = v.Elem() + b.Append(arrow.Date32FromTime(v.Interface().(time.Time))) + } + return b.NewArray(), nil + case "date64": + b := array.NewDate64Builder(mem) + defer b.Release() + b.Reserve(vals.Len()) + for i := 0; i < vals.Len(); i++ { + v := vals.Index(i) + if isPtr { + if v.IsNil() { + b.AppendNull() + continue + } + v = v.Elem() + } + b.Append(arrow.Date64FromTime(v.Interface().(time.Time))) + } + return b.NewArray(), nil + case "time32": + dt := &arrow.Time32Type{Unit: arrow.Millisecond} + b := array.NewTime32Builder(mem, dt) + defer b.Release() + b.Reserve(vals.Len()) + for i := 0; i < vals.Len(); i++ { + v := vals.Index(i) + if isPtr { + if v.IsNil() { + b.AppendNull() + continue + } + v = v.Elem() + } + b.Append(arrow.Time32(v.Interface().(time.Time).UnixNano() / int64(dt.Unit.Multiplier()))) + } + return b.NewArray(), nil + case "time64": + dt := &arrow.Time64Type{Unit: arrow.Nanosecond} + b := array.NewTime64Builder(mem, dt) + defer b.Release() + b.Reserve(vals.Len()) + for i := 0; i < vals.Len(); i++ { + v := vals.Index(i) + if isPtr { + if v.IsNil() { + b.AppendNull() + continue + } + v = v.Elem() + } + b.Append(arrow.Time64(v.Interface().(time.Time).UnixNano() / int64(dt.Unit.Multiplier()))) + } + return b.NewArray(), nil + default: + dt := &arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "UTC"} + tb := array.NewTimestampBuilder(mem, dt) + defer tb.Release() + tb.Reserve(vals.Len()) + for i := 0; i < vals.Len(); i++ { + v := vals.Index(i) + if isPtr { + if v.IsNil() { + tb.AppendNull() + continue + } + v = v.Elem() + } + t := v.Interface().(time.Time) + tb.Append(arrow.Timestamp(t.UnixNano())) } - t := v.Interface().(time.Time) - tb.Append(arrow.Timestamp(t.UnixNano())) + return tb.NewArray(), nil } - return tb.NewArray(), nil case typeOfDuration: dt := &arrow.DurationType{Unit: arrow.Nanosecond} @@ -410,6 +479,16 @@ func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { case *array.TimestampBuilder: t := v.Interface().(time.Time) tb.Append(arrow.Timestamp(t.UnixNano())) + case *array.Date32Builder: + tb.Append(arrow.Date32FromTime(v.Interface().(time.Time))) + case *array.Date64Builder: + tb.Append(arrow.Date64FromTime(v.Interface().(time.Time))) + case *array.Time32Builder: + unit := tb.Type().(*arrow.Time32Type).Unit + tb.Append(arrow.Time32(v.Interface().(time.Time).UnixNano() / int64(unit.Multiplier()))) + case *array.Time64Builder: + unit := tb.Type().(*arrow.Time64Type).Unit + tb.Append(arrow.Time64(v.Interface().(time.Time).UnixNano() / int64(unit.Multiplier()))) case *array.DurationBuilder: d := v.Interface().(time.Duration) tb.Append(arrow.Duration(d.Nanoseconds())) diff --git a/arrow/arreflect/reflect_go_to_arrow_test.go b/arrow/arreflect/reflect_go_to_arrow_test.go index 6f184275..807d5dfb 100644 --- a/arrow/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/arreflect/reflect_go_to_arrow_test.go @@ -778,3 +778,94 @@ func TestBuildListViewArray(t *testing.T) { } }) } + +func TestBuildTemporalTaggedArray(t *testing.T) { + mem := memory.NewGoAllocator() + + // reference time-of-day: 2024-01-15 10:30:00 UTC + ref := time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC) + + t.Run("date32", func(t *testing.T) { + vals := []time.Time{ref, ref.AddDate(0, 0, 1)} + opts := tagOpts{Temporal: "date32"} + sv := reflect.ValueOf(vals) + arr, err := buildTemporalArray(sv, opts, mem) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.DATE32 { + t.Errorf("expected DATE32, got %v", arr.DataType().ID()) + } + if arr.Len() != 2 { + t.Errorf("expected len 2, got %d", arr.Len()) + } + // roundtrip: convert back and check date + d32arr := arr.(*array.Date32) + got0 := d32arr.Value(0).ToTime() + if got0.Year() != ref.Year() || got0.Month() != ref.Month() || got0.Day() != ref.Day() { + t.Errorf("date32 roundtrip: got %v, want %v", got0, ref) + } + }) + + t.Run("date64", func(t *testing.T) { + vals := []time.Time{ref} + opts := tagOpts{Temporal: "date64"} + sv := reflect.ValueOf(vals) + arr, err := buildTemporalArray(sv, opts, mem) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.DATE64 { + t.Errorf("expected DATE64, got %v", arr.DataType().ID()) + } + d64arr := arr.(*array.Date64) + got0 := d64arr.Value(0).ToTime() + if got0.Year() != ref.Year() || got0.Month() != ref.Month() || got0.Day() != ref.Day() { + t.Errorf("date64 roundtrip: got %v, want %v", got0, ref) + } + }) + + t.Run("time32", func(t *testing.T) { + vals := []time.Time{ref} + opts := tagOpts{Temporal: "time32"} + sv := reflect.ValueOf(vals) + arr, err := buildTemporalArray(sv, opts, mem) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.TIME32 { + t.Errorf("expected TIME32, got %v", arr.DataType().ID()) + } + if arr.Len() != 1 { + t.Errorf("expected len 1, got %d", arr.Len()) + } + t32arr := arr.(*array.Time32) + val := t32arr.Value(0) + if val == 0 { + t.Error("expected non-zero time32 value") + } + }) + + t.Run("time64", func(t *testing.T) { + vals := []time.Time{ref} + opts := tagOpts{Temporal: "time64"} + sv := reflect.ValueOf(vals) + arr, err := buildTemporalArray(sv, opts, mem) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + defer arr.Release() + if arr.DataType().ID() != arrow.TIME64 { + t.Errorf("expected TIME64, got %v", arr.DataType().ID()) + } + t64arr := arr.(*array.Time64) + unit := arr.DataType().(*arrow.Time64Type).Unit + got0 := t64arr.Value(0).ToTime(unit) + if got0.Hour() != ref.Hour() || got0.Minute() != ref.Minute() || got0.Second() != ref.Second() { + t.Errorf("time64 roundtrip: got %v, want %v", got0, ref) + } + }) +} From 00494baf7d86b4d6869e87aeb3dba46d1dbaf0ed Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 7 Apr 2026 13:24:36 -0700 Subject: [PATCH 05/82] fix(arreflect): use value-equality for pointer-typed REE elements The equal closure in buildRunEndEncodedArray was calling reflect.Value.Equal on the original pointer values, comparing addresses instead of pointed-to values. This caused []*T slices where multiple distinct pointers point to equal values to produce N single-element runs instead of being coalesced. Fix: dereference pointer chains inside the equal closure before comparing, handling nil pointers correctly (two nils are equal). Add pointer_value_equality subtest to TestBuildRunEndEncodedArray to confirm two *string pointers to equal values produce one coalesced run. --- arrow/arreflect/reflect_go_to_arrow.go | 9 ++++++++- arrow/arreflect/reflect_go_to_arrow_test.go | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index 944770eb..579b0e0d 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -829,7 +829,14 @@ func buildRunEndEncodedArray(vals reflect.Value, mem memory.Allocator) (arrow.Ar equal := func(a, b reflect.Value) bool { if comparable { - return a.Equal(b) + da, db := a, b + for da.Kind() == reflect.Ptr { + if da.IsNil() || db.IsNil() { + return da.IsNil() && db.IsNil() + } + da, db = da.Elem(), db.Elem() + } + return da.Equal(db) } return reflect.DeepEqual(a.Interface(), b.Interface()) } diff --git a/arrow/arreflect/reflect_go_to_arrow_test.go b/arrow/arreflect/reflect_go_to_arrow_test.go index 807d5dfb..ef39ad82 100644 --- a/arrow/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/arreflect/reflect_go_to_arrow_test.go @@ -716,6 +716,25 @@ func TestBuildRunEndEncodedArray(t *testing.T) { t.Errorf("expected 5 runs for all-distinct, got %d", ree.RunEndsArr().Len()) } }) + + t.Run("pointer_value_equality", func(t *testing.T) { + // Two distinct *string pointers pointing to equal values "x" + // Should produce ONE run, not two (value equality, not address equality) + x1 := "x" + x2 := "x" + y := "y" + vals := []*string{&x1, &x2, &y} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{REE: true}, mem) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + defer arr.Release() + // "x","x" is one run; "y" is another → 2 runs total + ree := arr.(*array.RunEndEncoded) + if ree.RunEndsArr().Len() != 2 { + t.Errorf("expected 2 runs (x+x coalesced, y), got %d", ree.RunEndsArr().Len()) + } + }) } func TestBuildListViewArray(t *testing.T) { From 56c19db90cc67f55ee2a67ce4eb9a02e87130e95 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 7 Apr 2026 13:29:47 -0700 Subject: [PATCH 06/82] fix(arreflect): use time-of-day for Time32/Time64 encoding, fix int32 overflow Time32 values are time-of-day since midnight, not Unix epoch timestamps. Using UnixNano() for 2024 dates gives ~1.7 trillion milliseconds, which silently overflows int32 max (2.1 billion) producing garbage values. Fix all four write locations to compute time-of-day from midnight: t.Sub(time.Date(Y,M,D,0,0,0,0,loc)).Nanoseconds() / unit.Multiplier() Applied to: - buildTemporalArray: time32 and time64 cases - appendValue: Time32Builder and Time64Builder cases Also strengthen the time32 roundtrip test to assert H/M/S match instead of just checking the value is non-zero (which passed even with overflow). --- arrow/arreflect/reflect_go_to_arrow.go | 16 ++++++++++++---- arrow/arreflect/reflect_go_to_arrow_test.go | 9 ++++++--- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index 579b0e0d..2b2e234e 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -225,7 +225,9 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) } v = v.Elem() } - b.Append(arrow.Time32(v.Interface().(time.Time).UnixNano() / int64(dt.Unit.Multiplier()))) + t := v.Interface().(time.Time) + midnight := time.Date(t.Year(), t.Month(), t.Day(), 0, 0, 0, 0, t.Location()) + b.Append(arrow.Time32(t.Sub(midnight).Nanoseconds() / int64(dt.Unit.Multiplier()))) } return b.NewArray(), nil case "time64": @@ -242,7 +244,9 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) } v = v.Elem() } - b.Append(arrow.Time64(v.Interface().(time.Time).UnixNano() / int64(dt.Unit.Multiplier()))) + t64 := v.Interface().(time.Time) + midnight64 := time.Date(t64.Year(), t64.Month(), t64.Day(), 0, 0, 0, 0, t64.Location()) + b.Append(arrow.Time64(t64.Sub(midnight64).Nanoseconds() / int64(dt.Unit.Multiplier()))) } return b.NewArray(), nil default: @@ -485,10 +489,14 @@ func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { tb.Append(arrow.Date64FromTime(v.Interface().(time.Time))) case *array.Time32Builder: unit := tb.Type().(*arrow.Time32Type).Unit - tb.Append(arrow.Time32(v.Interface().(time.Time).UnixNano() / int64(unit.Multiplier()))) + t := v.Interface().(time.Time) + midnight := time.Date(t.Year(), t.Month(), t.Day(), 0, 0, 0, 0, t.Location()) + tb.Append(arrow.Time32(t.Sub(midnight).Nanoseconds() / int64(unit.Multiplier()))) case *array.Time64Builder: unit := tb.Type().(*arrow.Time64Type).Unit - tb.Append(arrow.Time64(v.Interface().(time.Time).UnixNano() / int64(unit.Multiplier()))) + t := v.Interface().(time.Time) + midnight := time.Date(t.Year(), t.Month(), t.Day(), 0, 0, 0, 0, t.Location()) + tb.Append(arrow.Time64(t.Sub(midnight).Nanoseconds() / int64(unit.Multiplier()))) case *array.DurationBuilder: d := v.Interface().(time.Duration) tb.Append(arrow.Duration(d.Nanoseconds())) diff --git a/arrow/arreflect/reflect_go_to_arrow_test.go b/arrow/arreflect/reflect_go_to_arrow_test.go index ef39ad82..424f19a0 100644 --- a/arrow/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/arreflect/reflect_go_to_arrow_test.go @@ -862,9 +862,12 @@ func TestBuildTemporalTaggedArray(t *testing.T) { t.Errorf("expected len 1, got %d", arr.Len()) } t32arr := arr.(*array.Time32) - val := t32arr.Value(0) - if val == 0 { - t.Error("expected non-zero time32 value") + unit := arr.DataType().(*arrow.Time32Type).Unit + got0 := t32arr.Value(0).ToTime(unit) + if got0.Hour() != ref.Hour() || got0.Minute() != ref.Minute() || got0.Second() != ref.Second() { + t.Errorf("time32 roundtrip: got hour=%d min=%d sec=%d, want hour=%d min=%d sec=%d", + got0.Hour(), got0.Minute(), got0.Second(), + ref.Hour(), ref.Minute(), ref.Second()) } }) From ddae670f004b468f47c79e8b2c7553cec91f99c2 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 7 Apr 2026 13:34:47 -0700 Subject: [PATCH 07/82] refactor(arreflect): extract timeOfDayNanos helper, strengthen time tests - Extract timeOfDayNanos(t time.Time) int64 helper to eliminate the 4x duplicated midnight-computation pattern across buildTemporalArray and appendValue - Rename t64/midnight64 to t/midnight in the time64 loop for consistency with the time32 loop pattern - time32 test: add millisecond-precision assertion (regression guard) - time64 test: add nanosecond-precision roundtrip with 123456789ns reference value to verify full sub-second fidelity --- arrow/arreflect/reflect_go_to_arrow.go | 21 +++++++++------------ arrow/arreflect/reflect_go_to_arrow_test.go | 19 +++++++++++++++++++ 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index 2b2e234e..6b6d3c9b 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -168,6 +168,11 @@ func appendPrimitiveValue(b array.Builder, v reflect.Value, dt arrow.DataType) e return nil } +func timeOfDayNanos(t time.Time) int64 { + midnight := time.Date(t.Year(), t.Month(), t.Day(), 0, 0, 0, 0, t.Location()) + return t.Sub(midnight).Nanoseconds() +} + func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.Array, error) { elemType := vals.Type().Elem() for elemType.Kind() == reflect.Ptr { @@ -225,9 +230,7 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) } v = v.Elem() } - t := v.Interface().(time.Time) - midnight := time.Date(t.Year(), t.Month(), t.Day(), 0, 0, 0, 0, t.Location()) - b.Append(arrow.Time32(t.Sub(midnight).Nanoseconds() / int64(dt.Unit.Multiplier()))) + b.Append(arrow.Time32(timeOfDayNanos(v.Interface().(time.Time)) / int64(dt.Unit.Multiplier()))) } return b.NewArray(), nil case "time64": @@ -244,9 +247,7 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) } v = v.Elem() } - t64 := v.Interface().(time.Time) - midnight64 := time.Date(t64.Year(), t64.Month(), t64.Day(), 0, 0, 0, 0, t64.Location()) - b.Append(arrow.Time64(t64.Sub(midnight64).Nanoseconds() / int64(dt.Unit.Multiplier()))) + b.Append(arrow.Time64(timeOfDayNanos(v.Interface().(time.Time)) / int64(dt.Unit.Multiplier()))) } return b.NewArray(), nil default: @@ -489,14 +490,10 @@ func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { tb.Append(arrow.Date64FromTime(v.Interface().(time.Time))) case *array.Time32Builder: unit := tb.Type().(*arrow.Time32Type).Unit - t := v.Interface().(time.Time) - midnight := time.Date(t.Year(), t.Month(), t.Day(), 0, 0, 0, 0, t.Location()) - tb.Append(arrow.Time32(t.Sub(midnight).Nanoseconds() / int64(unit.Multiplier()))) + tb.Append(arrow.Time32(timeOfDayNanos(v.Interface().(time.Time)) / int64(unit.Multiplier()))) case *array.Time64Builder: unit := tb.Type().(*arrow.Time64Type).Unit - t := v.Interface().(time.Time) - midnight := time.Date(t.Year(), t.Month(), t.Day(), 0, 0, 0, 0, t.Location()) - tb.Append(arrow.Time64(t.Sub(midnight).Nanoseconds() / int64(unit.Multiplier()))) + tb.Append(arrow.Time64(timeOfDayNanos(v.Interface().(time.Time)) / int64(unit.Multiplier()))) case *array.DurationBuilder: d := v.Interface().(time.Duration) tb.Append(arrow.Duration(d.Nanoseconds())) diff --git a/arrow/arreflect/reflect_go_to_arrow_test.go b/arrow/arreflect/reflect_go_to_arrow_test.go index 424f19a0..a4ef5c85 100644 --- a/arrow/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/arreflect/reflect_go_to_arrow_test.go @@ -869,6 +869,11 @@ func TestBuildTemporalTaggedArray(t *testing.T) { got0.Hour(), got0.Minute(), got0.Second(), ref.Hour(), ref.Minute(), ref.Second()) } + // time32 uses millisecond unit — check sub-second precision to ms + refMs := ref.Truncate(time.Millisecond) + if got0.Nanosecond()/1e6 != refMs.Nanosecond()/1e6 { + t.Errorf("time32 millisecond: got %d ms, want %d ms", got0.Nanosecond()/1e6, refMs.Nanosecond()/1e6) + } }) t.Run("time64", func(t *testing.T) { @@ -889,5 +894,19 @@ func TestBuildTemporalTaggedArray(t *testing.T) { if got0.Hour() != ref.Hour() || got0.Minute() != ref.Minute() || got0.Second() != ref.Second() { t.Errorf("time64 roundtrip: got %v, want %v", got0, ref) } + // time64 uses nanosecond unit — verify full nanosecond precision + refWithNanos := time.Date(ref.Year(), ref.Month(), ref.Day(), ref.Hour(), ref.Minute(), ref.Second(), 123456789, ref.Location()) + sv64 := reflect.ValueOf([]time.Time{refWithNanos}) + arr64, err := buildTemporalArray(sv64, tagOpts{Temporal: "time64"}, mem) + if err != nil { + t.Fatalf("time64 with nanos: %v", err) + } + defer arr64.Release() + t64arr64 := arr64.(*array.Time64) + unit64 := arr64.DataType().(*arrow.Time64Type).Unit + got64 := t64arr64.Value(0).ToTime(unit64) + if got64.Nanosecond() != refWithNanos.Nanosecond() { + t.Errorf("time64 nanosecond: got %d, want %d", got64.Nanosecond(), refWithNanos.Nanosecond()) + } }) } From b6f76d95d5d4559f3c1af078426290e286e27a79 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 7 Apr 2026 13:39:58 -0700 Subject: [PATCH 08/82] test(arreflect): fix vacuous time32 millisecond precision assertion The previous ms check compared ref.Truncate(ms).Nanosecond()/1e6 against itself (both always 0 since ref has zero sub-second component), so it never detected regressions. Replace with a roundtrip using a 500ms reference value that verifies sub-second fidelity is actually preserved through Time32 encoding. --- arrow/arreflect/reflect_go_to_arrow_test.go | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/arrow/arreflect/reflect_go_to_arrow_test.go b/arrow/arreflect/reflect_go_to_arrow_test.go index a4ef5c85..b975f6e1 100644 --- a/arrow/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/arreflect/reflect_go_to_arrow_test.go @@ -869,10 +869,18 @@ func TestBuildTemporalTaggedArray(t *testing.T) { got0.Hour(), got0.Minute(), got0.Second(), ref.Hour(), ref.Minute(), ref.Second()) } - // time32 uses millisecond unit — check sub-second precision to ms - refMs := ref.Truncate(time.Millisecond) - if got0.Nanosecond()/1e6 != refMs.Nanosecond()/1e6 { - t.Errorf("time32 millisecond: got %d ms, want %d ms", got0.Nanosecond()/1e6, refMs.Nanosecond()/1e6) + refWithMs := time.Date(ref.Year(), ref.Month(), ref.Day(), ref.Hour(), ref.Minute(), ref.Second(), 500_000_000, ref.Location()) + svMs := reflect.ValueOf([]time.Time{refWithMs}) + arrMs, err := buildTemporalArray(svMs, tagOpts{Temporal: "time32"}, mem) + if err != nil { + t.Fatalf("time32 with ms: %v", err) + } + defer arrMs.Release() + t32ms := arrMs.(*array.Time32) + unitMs := arrMs.DataType().(*arrow.Time32Type).Unit + gotMs := t32ms.Value(0).ToTime(unitMs) + if gotMs.Nanosecond()/1_000_000 != 500 { + t.Errorf("time32 millisecond: got %d ms, want 500 ms", gotMs.Nanosecond()/1_000_000) } }) From 3174a110bca4fde7a504b790b131cbd870dd1992 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 7 Apr 2026 14:34:01 -0700 Subject: [PATCH 09/82] fix(arreflect): propagate dict/ree/listview tags in struct schema, misc fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - inferStructType: apply encoding opts after type inference — Dict-tagged fields wrap dt in DictionaryType, listview fields replace ListType with ListViewType, ree fields wrap with RunEndEncodedType(Int32, dt). This ensures NewStructBuilder creates the correct sub-builder for each field so appendValue dispatches correctly. - appendValue: add RunEndEncodedBuilder case that appends to ValueBuilder then calls Append(1) to record the logical element - reflect.go: remove unused structFieldsEntry type declaration (dead code) - TestSetTemporalValue: add time32 (ms unit, 10:30:00.500) and time64 (ns unit, 10:30:00.123456789) subtests for the Arrow→Go read path - appendValue FixedSizeList case: validate v.Len() matches builder's declared fixed size before iterating --- arrow/arreflect/reflect.go | 6 ---- arrow/arreflect/reflect_arrow_to_go_test.go | 39 +++++++++++++++++++++ arrow/arreflect/reflect_go_to_arrow.go | 13 +++++++ arrow/arreflect/reflect_infer.go | 11 ++++++ arrow/arreflect/reflect_infer_test.go | 36 +++++++++++++++++++ 5 files changed, 99 insertions(+), 6 deletions(-) diff --git a/arrow/arreflect/reflect.go b/arrow/arreflect/reflect.go index 01d41a84..0cfbcfdb 100644 --- a/arrow/arreflect/reflect.go +++ b/arrow/arreflect/reflect.go @@ -136,12 +136,6 @@ func parseDecimalOpt(opts *tagOpts, token string) { } } -type structFieldsEntry struct { - index []int - field reflect.StructField - depth int -} - func getStructFields(t reflect.Type) []fieldMeta { for t.Kind() == reflect.Ptr { t = t.Elem() diff --git a/arrow/arreflect/reflect_arrow_to_go_test.go b/arrow/arreflect/reflect_arrow_to_go_test.go index 10e384db..ac0b496c 100644 --- a/arrow/arreflect/reflect_arrow_to_go_test.go +++ b/arrow/arreflect/reflect_arrow_to_go_test.go @@ -292,6 +292,45 @@ func TestSetTemporalValue(t *testing.T) { t.Errorf("expected nil for null timestamp pointer") } }) + + t.Run("time32", func(t *testing.T) { + dt := &arrow.Time32Type{Unit: arrow.Millisecond} + b := array.NewTime32Builder(mem, dt) + defer b.Release() + // 10h30m0s500ms = (10*3600 + 30*60)*1000 + 500 = 37800500 ms + b.Append(arrow.Time32(37800500)) + arr := b.NewArray() + defer arr.Release() + + var got time.Time + v := reflect.ValueOf(&got).Elem() + if err := setValue(v, arr, 0); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got.Hour() != 10 || got.Minute() != 30 || got.Second() != 0 || got.Nanosecond()/1_000_000 != 500 { + t.Errorf("time32: got %v, want 10:30:00.500", got) + } + }) + + t.Run("time64", func(t *testing.T) { + dt := &arrow.Time64Type{Unit: arrow.Nanosecond} + b := array.NewTime64Builder(mem, dt) + defer b.Release() + // 10h30m0s123456789ns + nanos := int64(10*3600+30*60)*1_000_000_000 + 123456789 + b.Append(arrow.Time64(nanos)) + arr := b.NewArray() + defer arr.Release() + + var got time.Time + v := reflect.ValueOf(&got).Elem() + if err := setValue(v, arr, 0); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got.Hour() != 10 || got.Minute() != 30 || got.Nanosecond() != 123456789 { + t.Errorf("time64: got %v, want 10:30:00.123456789", got) + } + }) } func TestSetDecimalValue(t *testing.T) { diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index 6b6d3c9b..3b5651ac 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -520,6 +520,10 @@ func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { } } case *array.FixedSizeListBuilder: + expectedLen := int(tb.Type().(*arrow.FixedSizeListType).Len()) + if v.Len() != expectedLen { + return fmt.Errorf("appendValue: fixed-size list length mismatch: got %d, want %d", v.Len(), expectedLen) + } tb.Append(true) vb := tb.ValueBuilder() for i := 0; i < v.Len(); i++ { @@ -566,6 +570,15 @@ func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { } } } + case *array.RunEndEncodedBuilder: + if v.Kind() == reflect.Ptr && v.IsNil() { + tb.AppendNull() + } else { + if err := appendValue(tb.ValueBuilder(), v, tagOpts{}); err != nil { + return err + } + tb.Append(1) + } default: if db, ok := b.(array.DictionaryBuilder); ok { return appendToDictBuilder(db, v) diff --git a/arrow/arreflect/reflect_infer.go b/arrow/arreflect/reflect_infer.go index 01661bcc..a02bf299 100644 --- a/arrow/arreflect/reflect_infer.go +++ b/arrow/arreflect/reflect_infer.go @@ -189,6 +189,17 @@ func inferStructType(t reflect.Type) (*arrow.StructType, error) { } } + switch { + case fm.Opts.Dict: + dt = &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: dt} + case fm.Opts.ListView: + if lt, ok := dt.(*arrow.ListType); ok { + dt = arrow.ListViewOf(lt.Elem()) + } + case fm.Opts.REE: + dt = arrow.RunEndEncodedOf(arrow.PrimitiveTypes.Int32, dt) + } + arrowFields = append(arrowFields, arrow.Field{ Name: fm.Name, Type: dt, diff --git a/arrow/arreflect/reflect_infer_test.go b/arrow/arreflect/reflect_infer_test.go index 994bb93c..eac14141 100644 --- a/arrow/arreflect/reflect_infer_test.go +++ b/arrow/arreflect/reflect_infer_test.go @@ -429,3 +429,39 @@ func TestInferArrowTypePublic(t *testing.T) { } }) } + +func TestInferArrowSchemaStructFieldEncoding(t *testing.T) { + t.Run("dict-tagged string field becomes DICTIONARY", func(t *testing.T) { + type S struct { + Name string `arrow:"name,dict"` + } + schema, err := InferArrowSchema[S]() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + f, ok := schema.FieldsByName("name") + if !ok || len(f) == 0 { + t.Fatal("field 'name' not found in schema") + } + if f[0].Type.ID() != arrow.DICTIONARY { + t.Errorf("got %v, want DICTIONARY", f[0].Type.ID()) + } + }) + + t.Run("listview-tagged []string field becomes LIST_VIEW", func(t *testing.T) { + type S struct { + Tags []string `arrow:"tags,listview"` + } + schema, err := InferArrowSchema[S]() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + f, ok := schema.FieldsByName("tags") + if !ok || len(f) == 0 { + t.Fatal("field 'tags' not found in schema") + } + if f[0].Type.ID() != arrow.LIST_VIEW { + t.Errorf("got %v, want LIST_VIEW", f[0].Type.ID()) + } + }) +} From d2b26131799496cce9b828b05f56b8d11d7314af Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 7 Apr 2026 14:41:07 -0700 Subject: [PATCH 10/82] fix(arreflect): REE test, listview error on bad type, time64 second check - reflect_infer.go: listview case now returns fmt.Errorf when fm.Opts.ListView is set but inferred type is not *arrow.ListType, instead of silently no-oping - reflect_infer_test.go: add ree-tagged roundtrip subtest that exercises inferStructType REE path (schema shows RUN_END_ENCODED) and both appendValue RunEndEncodedBuilder and setValue RunEndEncoded paths via FromGoSlice/ToGoSlice roundtrip - reflect_arrow_to_go_test.go: time64 assertion now includes Second() check to catch seconds-level regressions (was only Hour/Minute/Nanosecond) --- arrow/arreflect/reflect_arrow_to_go_test.go | 2 +- arrow/arreflect/reflect_infer.go | 6 ++-- arrow/arreflect/reflect_infer_test.go | 38 +++++++++++++++++++++ 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/arrow/arreflect/reflect_arrow_to_go_test.go b/arrow/arreflect/reflect_arrow_to_go_test.go index ac0b496c..344dc2f8 100644 --- a/arrow/arreflect/reflect_arrow_to_go_test.go +++ b/arrow/arreflect/reflect_arrow_to_go_test.go @@ -327,7 +327,7 @@ func TestSetTemporalValue(t *testing.T) { if err := setValue(v, arr, 0); err != nil { t.Fatalf("unexpected error: %v", err) } - if got.Hour() != 10 || got.Minute() != 30 || got.Nanosecond() != 123456789 { + if got.Hour() != 10 || got.Minute() != 30 || got.Second() != 0 || got.Nanosecond() != 123456789 { t.Errorf("time64: got %v, want 10:30:00.123456789", got) } }) diff --git a/arrow/arreflect/reflect_infer.go b/arrow/arreflect/reflect_infer.go index a02bf299..43b234cc 100644 --- a/arrow/arreflect/reflect_infer.go +++ b/arrow/arreflect/reflect_infer.go @@ -193,9 +193,11 @@ func inferStructType(t reflect.Type) (*arrow.StructType, error) { case fm.Opts.Dict: dt = &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: dt} case fm.Opts.ListView: - if lt, ok := dt.(*arrow.ListType); ok { - dt = arrow.ListViewOf(lt.Elem()) + lt, ok := dt.(*arrow.ListType) + if !ok { + return nil, fmt.Errorf("inferStructType: listview tag on field %q requires a slice type, got %v", fm.Name, dt) } + dt = arrow.ListViewOf(lt.Elem()) case fm.Opts.REE: dt = arrow.RunEndEncodedOf(arrow.PrimitiveTypes.Int32, dt) } diff --git a/arrow/arreflect/reflect_infer_test.go b/arrow/arreflect/reflect_infer_test.go index eac14141..19c87fc3 100644 --- a/arrow/arreflect/reflect_infer_test.go +++ b/arrow/arreflect/reflect_infer_test.go @@ -25,6 +25,7 @@ import ( "github.com/apache/arrow-go/v18/arrow/decimal" "github.com/apache/arrow-go/v18/arrow/decimal128" "github.com/apache/arrow-go/v18/arrow/decimal256" + "github.com/apache/arrow-go/v18/arrow/memory" ) func TestInferPrimitiveArrowType(t *testing.T) { @@ -464,4 +465,41 @@ func TestInferArrowSchemaStructFieldEncoding(t *testing.T) { t.Errorf("got %v, want LIST_VIEW", f[0].Type.ID()) } }) + + t.Run("ree-tagged field becomes RUN_END_ENCODED", func(t *testing.T) { + type REERow struct { + Val string `arrow:"val,ree"` + } + schema, err := InferArrowSchema[REERow]() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if schema.NumFields() != 1 { + t.Fatalf("expected 1 field, got %d", schema.NumFields()) + } + if schema.Field(0).Type.ID() != arrow.RUN_END_ENCODED { + t.Errorf("expected RUN_END_ENCODED, got %v", schema.Field(0).Type.ID()) + } + + // Roundtrip: exercises appendValue RunEndEncodedBuilder + setValue RunEndEncoded paths + mem := memory.NewGoAllocator() + rows := []REERow{{"hello"}, {"hello"}, {"world"}} + arr, err := FromGoSlice(rows, mem) + if err != nil { + t.Fatalf("FromGoSlice: %v", err) + } + defer arr.Release() + got, err := ToGoSlice[REERow](arr) + if err != nil { + t.Fatalf("ToGoSlice: %v", err) + } + if len(got) != len(rows) { + t.Fatalf("length mismatch: got %d, want %d", len(got), len(rows)) + } + for i, r := range rows { + if got[i] != r { + t.Errorf("[%d]: got %v, want %v", i, got[i], r) + } + } + }) } From 45e8575593cc37f28b6695cb5dc4f1b17ec07900 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 7 Apr 2026 14:54:53 -0700 Subject: [PATCH 11/82] fix(arreflect): ree struct field error, dead code cleanup, UTC normalization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - inferStructType: return error for ree-tagged struct fields; the RunEndEncodedBuilder.Append creates a new run for every element so struct-field REE never compresses. Users should apply ree at the top-level via FromGoSlice instead. - reflect_arrow_to_go.go: remove dead arr.IsNull(i) checks from setStructValue, setMapValue, setFixedSizeListValue, setDictionaryValue, setRunEndEncodedValue, setListValue — setValue handles null before dispatching so these were unreachable - timeOfDayNanos: normalize t to UTC before computing midnight to avoid wrong results on DST spring-forward days in non-UTC locations - appendPrimitiveValue: remove dead arrow.TIMESTAMP case; time.Time is a struct so it always routes through buildTemporalArray, never here. Retain arrow.DURATION: time.Duration has kind=int64 and legitimately routes through buildPrimitiveArray --- arrow/arreflect/reflect_arrow_to_go.go | 34 ----------------------- arrow/arreflect/reflect_go_to_arrow.go | 6 ++-- arrow/arreflect/reflect_infer.go | 2 +- arrow/arreflect/reflect_infer_test.go | 38 +++++--------------------- 4 files changed, 10 insertions(+), 70 deletions(-) diff --git a/arrow/arreflect/reflect_arrow_to_go.go b/arrow/arreflect/reflect_arrow_to_go.go index 1d39b875..05c059eb 100644 --- a/arrow/arreflect/reflect_arrow_to_go.go +++ b/arrow/arreflect/reflect_arrow_to_go.go @@ -356,12 +356,6 @@ func setDecimalValue(v reflect.Value, arr arrow.Array, i int) error { } func setStructValue(v reflect.Value, sa *array.Struct, i int) error { - if sa.IsNull(i) { - if v.Kind() == reflect.Ptr { - v.Set(reflect.Zero(v.Type())) - } - return nil - } if v.Kind() == reflect.Ptr { v.Set(reflect.New(v.Type().Elem())) v = v.Elem() @@ -387,10 +381,6 @@ func setStructValue(v reflect.Value, sa *array.Struct, i int) error { } func setListValue(v reflect.Value, arr array.ListLike, i int) error { - if arr.IsNull(i) { - v.Set(reflect.Zero(v.Type())) - return nil - } if v.Kind() == reflect.Ptr { v.Set(reflect.New(v.Type().Elem())) v = v.Elem() @@ -415,12 +405,6 @@ func setListValue(v reflect.Value, arr array.ListLike, i int) error { } func setMapValue(v reflect.Value, arr *array.Map, i int) error { - if arr.IsNull(i) { - if v.Kind() == reflect.Ptr { - v.Set(reflect.Zero(v.Type())) - } - return nil - } if v.Kind() == reflect.Ptr { v.Set(reflect.New(v.Type().Elem())) v = v.Elem() @@ -453,12 +437,6 @@ func setMapValue(v reflect.Value, arr *array.Map, i int) error { } func setFixedSizeListValue(v reflect.Value, arr *array.FixedSizeList, i int) error { - if arr.IsNull(i) { - if v.Kind() == reflect.Ptr { - v.Set(reflect.Zero(v.Type())) - } - return nil - } if v.Kind() == reflect.Ptr { v.Set(reflect.New(v.Type().Elem())) v = v.Elem() @@ -493,21 +471,9 @@ func setFixedSizeListValue(v reflect.Value, arr *array.FixedSizeList, i int) err } func setDictionaryValue(v reflect.Value, arr *array.Dictionary, i int) error { - if arr.IsNull(i) { - if v.Kind() == reflect.Ptr { - v.Set(reflect.Zero(v.Type())) - } - return nil - } return setValue(v, arr.Dictionary(), arr.GetValueIndex(i)) } func setRunEndEncodedValue(v reflect.Value, arr *array.RunEndEncoded, i int) error { - if arr.IsNull(i) { - if v.Kind() == reflect.Ptr { - v.Set(reflect.Zero(v.Type())) - } - return nil - } return setValue(v, arr.Values(), arr.GetPhysicalIndex(i)) } diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index 3b5651ac..346f9bfe 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -146,9 +146,6 @@ func appendPrimitiveValue(b array.Builder, v reflect.Value, dt arrow.DataType) e b.(*array.StringBuilder).Append(v.String()) case arrow.BINARY: b.(*array.BinaryBuilder).Append(v.Bytes()) - case arrow.TIMESTAMP: - t := v.Interface().(time.Time) - b.(*array.TimestampBuilder).Append(arrow.Timestamp(t.UnixNano())) case arrow.DURATION: d := v.Interface().(time.Duration) b.(*array.DurationBuilder).Append(arrow.Duration(d.Nanoseconds())) @@ -169,7 +166,8 @@ func appendPrimitiveValue(b array.Builder, v reflect.Value, dt arrow.DataType) e } func timeOfDayNanos(t time.Time) int64 { - midnight := time.Date(t.Year(), t.Month(), t.Day(), 0, 0, 0, 0, t.Location()) + t = t.UTC() + midnight := time.Date(t.Year(), t.Month(), t.Day(), 0, 0, 0, 0, time.UTC) return t.Sub(midnight).Nanoseconds() } diff --git a/arrow/arreflect/reflect_infer.go b/arrow/arreflect/reflect_infer.go index 43b234cc..f0514c2c 100644 --- a/arrow/arreflect/reflect_infer.go +++ b/arrow/arreflect/reflect_infer.go @@ -199,7 +199,7 @@ func inferStructType(t reflect.Type) (*arrow.StructType, error) { } dt = arrow.ListViewOf(lt.Elem()) case fm.Opts.REE: - dt = arrow.RunEndEncodedOf(arrow.PrimitiveTypes.Int32, dt) + return nil, fmt.Errorf("inferStructType: ree tag on struct field %q is not supported; use ree at top-level via buildArray", fm.Name) } arrowFields = append(arrowFields, arrow.Field{ diff --git a/arrow/arreflect/reflect_infer_test.go b/arrow/arreflect/reflect_infer_test.go index 19c87fc3..c9c39072 100644 --- a/arrow/arreflect/reflect_infer_test.go +++ b/arrow/arreflect/reflect_infer_test.go @@ -18,6 +18,7 @@ package arreflect import ( "reflect" + "strings" "testing" "time" @@ -25,7 +26,6 @@ import ( "github.com/apache/arrow-go/v18/arrow/decimal" "github.com/apache/arrow-go/v18/arrow/decimal128" "github.com/apache/arrow-go/v18/arrow/decimal256" - "github.com/apache/arrow-go/v18/arrow/memory" ) func TestInferPrimitiveArrowType(t *testing.T) { @@ -466,40 +466,16 @@ func TestInferArrowSchemaStructFieldEncoding(t *testing.T) { } }) - t.Run("ree-tagged field becomes RUN_END_ENCODED", func(t *testing.T) { + t.Run("ree-tagged field on struct is unsupported", func(t *testing.T) { type REERow struct { Val string `arrow:"val,ree"` } - schema, err := InferArrowSchema[REERow]() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if schema.NumFields() != 1 { - t.Fatalf("expected 1 field, got %d", schema.NumFields()) - } - if schema.Field(0).Type.ID() != arrow.RUN_END_ENCODED { - t.Errorf("expected RUN_END_ENCODED, got %v", schema.Field(0).Type.ID()) - } - - // Roundtrip: exercises appendValue RunEndEncodedBuilder + setValue RunEndEncoded paths - mem := memory.NewGoAllocator() - rows := []REERow{{"hello"}, {"hello"}, {"world"}} - arr, err := FromGoSlice(rows, mem) - if err != nil { - t.Fatalf("FromGoSlice: %v", err) - } - defer arr.Release() - got, err := ToGoSlice[REERow](arr) - if err != nil { - t.Fatalf("ToGoSlice: %v", err) - } - if len(got) != len(rows) { - t.Fatalf("length mismatch: got %d, want %d", len(got), len(rows)) + _, err := InferArrowSchema[REERow]() + if err == nil { + t.Fatal("expected error for ree tag on struct field, got nil") } - for i, r := range rows { - if got[i] != r { - t.Errorf("[%d]: got %v, want %v", i, got[i], r) - } + if !strings.Contains(err.Error(), "ree tag on struct field") { + t.Errorf("unexpected error message: %v", err) } }) } From c6e62927cb24356166dd30ea08a99c45774449bb Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 7 Apr 2026 14:59:30 -0700 Subject: [PATCH 12/82] refactor(arreflect): replace v.Interface().(T) with reflect.TypeAssert[T](v) Use Go 1.25's reflect.TypeAssert[T](v) (T, bool) in place of the v.Interface().(T) boxing pattern throughout reflect_go_to_arrow.go. Avoids the unnecessary any allocation from Interface() and makes the type constraint explicit at the call site. 19 call sites updated. --- arrow/arreflect/reflect_go_to_arrow.go | 46 +++++++++++++++----------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index 346f9bfe..85264d9e 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -147,13 +147,13 @@ func appendPrimitiveValue(b array.Builder, v reflect.Value, dt arrow.DataType) e case arrow.BINARY: b.(*array.BinaryBuilder).Append(v.Bytes()) case arrow.DURATION: - d := v.Interface().(time.Duration) + d, _ := reflect.TypeAssert[time.Duration](v) b.(*array.DurationBuilder).Append(arrow.Duration(d.Nanoseconds())) case arrow.DECIMAL128: - n := v.Interface().(decimal128.Num) + n, _ := reflect.TypeAssert[decimal128.Num](v) b.(*array.Decimal128Builder).Append(n) case arrow.DECIMAL256: - n := v.Interface().(decimal256.Num) + n, _ := reflect.TypeAssert[decimal256.Num](v) b.(*array.Decimal256Builder).Append(n) case arrow.DECIMAL32: b.(*array.Decimal32Builder).Append(decimal.Decimal32(v.Int())) @@ -195,7 +195,8 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) } v = v.Elem() } - b.Append(arrow.Date32FromTime(v.Interface().(time.Time))) + t, _ := reflect.TypeAssert[time.Time](v) + b.Append(arrow.Date32FromTime(t)) } return b.NewArray(), nil case "date64": @@ -211,7 +212,8 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) } v = v.Elem() } - b.Append(arrow.Date64FromTime(v.Interface().(time.Time))) + t, _ := reflect.TypeAssert[time.Time](v) + b.Append(arrow.Date64FromTime(t)) } return b.NewArray(), nil case "time32": @@ -228,7 +230,8 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) } v = v.Elem() } - b.Append(arrow.Time32(timeOfDayNanos(v.Interface().(time.Time)) / int64(dt.Unit.Multiplier()))) + t, _ := reflect.TypeAssert[time.Time](v) + b.Append(arrow.Time32(timeOfDayNanos(t) / int64(dt.Unit.Multiplier()))) } return b.NewArray(), nil case "time64": @@ -245,7 +248,8 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) } v = v.Elem() } - b.Append(arrow.Time64(timeOfDayNanos(v.Interface().(time.Time)) / int64(dt.Unit.Multiplier()))) + t, _ := reflect.TypeAssert[time.Time](v) + b.Append(arrow.Time64(timeOfDayNanos(t) / int64(dt.Unit.Multiplier()))) } return b.NewArray(), nil default: @@ -262,7 +266,7 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) } v = v.Elem() } - t := v.Interface().(time.Time) + t, _ := reflect.TypeAssert[time.Time](v) tb.Append(arrow.Timestamp(t.UnixNano())) } return tb.NewArray(), nil @@ -282,7 +286,7 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) } v = v.Elem() } - d := v.Interface().(time.Duration) + d, _ := reflect.TypeAssert[time.Duration](v) db.Append(arrow.Duration(d.Nanoseconds())) } return db.NewArray(), nil @@ -320,7 +324,7 @@ func buildDecimalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) ( } v = v.Elem() } - n := v.Interface().(decimal128.Num) + n, _ := reflect.TypeAssert[decimal128.Num](v) b.Append(n) } return b.NewArray(), nil @@ -344,7 +348,7 @@ func buildDecimalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) ( } v = v.Elem() } - n := v.Interface().(decimal256.Num) + n, _ := reflect.TypeAssert[decimal256.Num](v) b.Append(n) } return b.NewArray(), nil @@ -480,26 +484,30 @@ func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { tb.Append(v.Bytes()) } case *array.TimestampBuilder: - t := v.Interface().(time.Time) + t, _ := reflect.TypeAssert[time.Time](v) tb.Append(arrow.Timestamp(t.UnixNano())) case *array.Date32Builder: - tb.Append(arrow.Date32FromTime(v.Interface().(time.Time))) + t, _ := reflect.TypeAssert[time.Time](v) + tb.Append(arrow.Date32FromTime(t)) case *array.Date64Builder: - tb.Append(arrow.Date64FromTime(v.Interface().(time.Time))) + t, _ := reflect.TypeAssert[time.Time](v) + tb.Append(arrow.Date64FromTime(t)) case *array.Time32Builder: unit := tb.Type().(*arrow.Time32Type).Unit - tb.Append(arrow.Time32(timeOfDayNanos(v.Interface().(time.Time)) / int64(unit.Multiplier()))) + t, _ := reflect.TypeAssert[time.Time](v) + tb.Append(arrow.Time32(timeOfDayNanos(t) / int64(unit.Multiplier()))) case *array.Time64Builder: unit := tb.Type().(*arrow.Time64Type).Unit - tb.Append(arrow.Time64(timeOfDayNanos(v.Interface().(time.Time)) / int64(unit.Multiplier()))) + t, _ := reflect.TypeAssert[time.Time](v) + tb.Append(arrow.Time64(timeOfDayNanos(t) / int64(unit.Multiplier()))) case *array.DurationBuilder: - d := v.Interface().(time.Duration) + d, _ := reflect.TypeAssert[time.Duration](v) tb.Append(arrow.Duration(d.Nanoseconds())) case *array.Decimal128Builder: - n := v.Interface().(decimal128.Num) + n, _ := reflect.TypeAssert[decimal128.Num](v) tb.Append(n) case *array.Decimal256Builder: - n := v.Interface().(decimal256.Num) + n, _ := reflect.TypeAssert[decimal256.Num](v) tb.Append(n) case *array.Decimal32Builder: tb.Append(decimal.Decimal32(v.Int())) From f4b3c668be8ab21d952554c22bf6ebc7c9ba30c0 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 7 Apr 2026 15:07:25 -0700 Subject: [PATCH 13/82] refactor(arreflect): rename public API, standardize errors, update docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit API renames (jobs 861/863): - ToGo[T](arr, i) → Get[T](arr, i) - ToGoSlice[T](arr) → ToSlice[T](arr) - FromGoSlice[T](vals, mem) → FromSlice[T](vals, mem) - InferArrowSchema[T]() → SchemaOf[T]() - InferArrowType[T]() → TypeOf[T]() RecordToSlice / RecordFromSlice unchanged. Error prefixes: all fmt.Errorf calls now use 'arreflect: ' prefix consistently (was a mix of 'arrow/reflect: ', 'buildXxx: ', etc.) Bug fix: RecordFromSlice error now prints Go type T via reflect.TypeFor[T]() instead of the Arrow array type from %T arr. REE error: 'via buildArray' (unexported) → 'via FromSlice' (public API). doc.go: added missing tag options listview, ree, and decimal(p,s) with brief descriptions. --- arrow/arreflect/doc.go | 12 +- arrow/arreflect/reflect.go | 16 +-- arrow/arreflect/reflect_arrow_to_go.go | 122 ++++++++++---------- arrow/arreflect/reflect_go_to_arrow.go | 54 ++++----- arrow/arreflect/reflect_infer.go | 16 +-- arrow/arreflect/reflect_infer_test.go | 24 ++-- arrow/arreflect/reflect_integration_test.go | 84 +++++++------- arrow/arreflect/reflect_public_test.go | 34 +++--- 8 files changed, 184 insertions(+), 178 deletions(-) diff --git a/arrow/arreflect/doc.go b/arrow/arreflect/doc.go index 2605f449..6eb242f7 100644 --- a/arrow/arreflect/doc.go +++ b/arrow/arreflect/doc.go @@ -17,11 +17,11 @@ // Package arreflect provides utilities for converting between // Apache Arrow arrays and Go structs using reflection. // -// The primary entry points are the generic functions [ToGo], [ToGoSlice], -// [FromGoSlice], [RecordToSlice], and [RecordFromSlice], which convert +// The primary entry points are the generic functions [Get], [ToSlice], +// [FromSlice], [RecordToSlice], and [RecordFromSlice], which convert // between Arrow arrays/records and Go slices of structs. // -// Schema inference is available via [InferArrowSchema] and [InferArrowType]. +// Schema inference is available via [SchemaOf] and [TypeOf]. // // Arrow struct tags control field mapping: // @@ -39,4 +39,10 @@ // arrow:"field,date64" — use Date64 instead of Timestamp // arrow:"field,time32" — use Time32(ms) instead of Timestamp // arrow:"field,time64" — use Time64(ns) instead of Timestamp +// +// Additional tag options: +// +// arrow:"field,listview" — use ListView instead of List for slice fields +// arrow:"field,ree" — run-end encoding at top-level only (struct fields not supported) +// arrow:"field,decimal(precision,scale)" — override decimal precision and scale (e.g., arrow:",decimal(18,2)") package arreflect diff --git a/arrow/arreflect/reflect.go b/arrow/arreflect/reflect.go index 0cfbcfdb..280919eb 100644 --- a/arrow/arreflect/reflect.go +++ b/arrow/arreflect/reflect.go @@ -323,7 +323,7 @@ func cachedStructFields(t reflect.Type) []fieldMeta { return v.([]fieldMeta) } -func ToGo[T any](arr arrow.Array, i int) (T, error) { +func Get[T any](arr arrow.Array, i int) (T, error) { var result T v := reflect.ValueOf(&result).Elem() if err := setValue(v, arr, i); err != nil { @@ -333,23 +333,23 @@ func ToGo[T any](arr arrow.Array, i int) (T, error) { return result, nil } -func ToGoSlice[T any](arr arrow.Array) ([]T, error) { +func ToSlice[T any](arr arrow.Array) ([]T, error) { n := arr.Len() result := make([]T, n) for i := 0; i < n; i++ { v := reflect.ValueOf(&result[i]).Elem() if err := setValue(v, arr, i); err != nil { - return nil, fmt.Errorf("ToGoSlice: index %d: %w", i, err) + return nil, fmt.Errorf("arreflect: index %d: %w", i, err) } } return result, nil } -func FromGoSlice[T any](vals []T, mem memory.Allocator) (arrow.Array, error) { +func FromSlice[T any](vals []T, mem memory.Allocator) (arrow.Array, error) { if len(vals) == 0 { dt, err := inferArrowType(reflect.TypeFor[T]()) if err != nil { - return nil, fmt.Errorf("FromGoSlice: %w", err) + return nil, fmt.Errorf("arreflect: %w", err) } b := array.NewBuilder(mem, dt) defer b.Release() @@ -362,18 +362,18 @@ func FromGoSlice[T any](vals []T, mem memory.Allocator) (arrow.Array, error) { func RecordToSlice[T any](rec arrow.Record) ([]T, error) { sa := array.RecordToStructArray(rec) defer sa.Release() - return ToGoSlice[T](sa) + return ToSlice[T](sa) } func RecordFromSlice[T any](vals []T, mem memory.Allocator) (arrow.Record, error) { - arr, err := FromGoSlice[T](vals, mem) + arr, err := FromSlice[T](vals, mem) if err != nil { return nil, err } defer arr.Release() sa, ok := arr.(*array.Struct) if !ok { - return nil, fmt.Errorf("RecordFromSlice: T must be a struct type, got %T", arr) + return nil, fmt.Errorf("arreflect: RecordFromSlice requires a struct type T, got %v", reflect.TypeFor[T]()) } return array.RecordFromStructArray(sa, nil), nil } diff --git a/arrow/arreflect/reflect_arrow_to_go.go b/arrow/arreflect/reflect_arrow_to_go.go index 05c059eb..8ef4e3a8 100644 --- a/arrow/arreflect/reflect_arrow_to_go.go +++ b/arrow/arreflect/reflect_arrow_to_go.go @@ -39,10 +39,10 @@ func setValue(v reflect.Value, arr arrow.Array, i int) error { case arrow.BOOL: a, ok := arr.(*array.Boolean) if !ok { - return fmt.Errorf("arrow/reflect: expected *Boolean, got %T", arr) + return fmt.Errorf("arreflect: expected *Boolean, got %T", arr) } if v.Kind() != reflect.Bool { - return fmt.Errorf("arrow/reflect: cannot set bool into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set bool into %s", v.Type()) } v.SetBool(a.Value(i)) @@ -54,40 +54,40 @@ func setValue(v reflect.Value, arr arrow.Array, i int) error { case arrow.STRING: a, ok := arr.(*array.String) if !ok { - return fmt.Errorf("arrow/reflect: expected *String, got %T", arr) + return fmt.Errorf("arreflect: expected *String, got %T", arr) } if v.Kind() != reflect.String { - return fmt.Errorf("arrow/reflect: cannot set string into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set string into %s", v.Type()) } v.SetString(a.Value(i)) case arrow.LARGE_STRING: a, ok := arr.(*array.LargeString) if !ok { - return fmt.Errorf("arrow/reflect: expected *LargeString, got %T", arr) + return fmt.Errorf("arreflect: expected *LargeString, got %T", arr) } if v.Kind() != reflect.String { - return fmt.Errorf("arrow/reflect: cannot set string into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set string into %s", v.Type()) } v.SetString(a.Value(i)) case arrow.BINARY: a, ok := arr.(*array.Binary) if !ok { - return fmt.Errorf("arrow/reflect: expected *Binary, got %T", arr) + return fmt.Errorf("arreflect: expected *Binary, got %T", arr) } if v.Kind() != reflect.Slice || v.Type().Elem().Kind() != reflect.Uint8 { - return fmt.Errorf("arrow/reflect: cannot set []byte into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set []byte into %s", v.Type()) } v.SetBytes(a.Value(i)) case arrow.LARGE_BINARY: a, ok := arr.(*array.LargeBinary) if !ok { - return fmt.Errorf("arrow/reflect: expected *LargeBinary, got %T", arr) + return fmt.Errorf("arreflect: expected *LargeBinary, got %T", arr) } if v.Kind() != reflect.Slice || v.Type().Elem().Kind() != reflect.Uint8 { - return fmt.Errorf("arrow/reflect: cannot set []byte into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set []byte into %s", v.Type()) } v.SetBytes(a.Value(i)) @@ -101,47 +101,47 @@ func setValue(v reflect.Value, arr arrow.Array, i int) error { case arrow.STRUCT: a, ok := arr.(*array.Struct) if !ok { - return fmt.Errorf("arrow/reflect: expected *Struct, got %T", arr) + return fmt.Errorf("arreflect: expected *Struct, got %T", arr) } return setStructValue(v, a, i) case arrow.LIST, arrow.LARGE_LIST, arrow.LIST_VIEW, arrow.LARGE_LIST_VIEW: a, ok := arr.(array.ListLike) if !ok { - return fmt.Errorf("arrow/reflect: expected ListLike, got %T", arr) + return fmt.Errorf("arreflect: expected ListLike, got %T", arr) } return setListValue(v, a, i) case arrow.MAP: a, ok := arr.(*array.Map) if !ok { - return fmt.Errorf("arrow/reflect: expected *Map, got %T", arr) + return fmt.Errorf("arreflect: expected *Map, got %T", arr) } return setMapValue(v, a, i) case arrow.FIXED_SIZE_LIST: a, ok := arr.(*array.FixedSizeList) if !ok { - return fmt.Errorf("arrow/reflect: expected *FixedSizeList, got %T", arr) + return fmt.Errorf("arreflect: expected *FixedSizeList, got %T", arr) } return setFixedSizeListValue(v, a, i) case arrow.DICTIONARY: a, ok := arr.(*array.Dictionary) if !ok { - return fmt.Errorf("arrow/reflect: expected *Dictionary, got %T", arr) + return fmt.Errorf("arreflect: expected *Dictionary, got %T", arr) } return setDictionaryValue(v, a, i) case arrow.RUN_END_ENCODED: a, ok := arr.(*array.RunEndEncoded) if !ok { - return fmt.Errorf("arrow/reflect: expected *RunEndEncoded, got %T", arr) + return fmt.Errorf("arreflect: expected *RunEndEncoded, got %T", arr) } return setRunEndEncodedValue(v, a, i) default: - return fmt.Errorf("arrow/reflect: unsupported Arrow type %v for reflection", arr.DataType()) + return fmt.Errorf("arreflect: unsupported Arrow type %v for reflection", arr.DataType()) } return nil } @@ -156,63 +156,63 @@ func setPrimitiveValue(v reflect.Value, arr arrow.Array, i int) error { case arrow.INT8: if v.Kind() != reflect.Int && v.Kind() != reflect.Int8 && v.Kind() != reflect.Int16 && v.Kind() != reflect.Int32 && v.Kind() != reflect.Int64 { - return fmt.Errorf("arrow/reflect: cannot set int8 into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set int8 into %s", v.Type()) } v.SetInt(int64(arr.(*array.Int8).Value(i))) case arrow.INT16: if v.Kind() != reflect.Int && v.Kind() != reflect.Int8 && v.Kind() != reflect.Int16 && v.Kind() != reflect.Int32 && v.Kind() != reflect.Int64 { - return fmt.Errorf("arrow/reflect: cannot set int16 into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set int16 into %s", v.Type()) } v.SetInt(int64(arr.(*array.Int16).Value(i))) case arrow.INT32: if v.Kind() != reflect.Int && v.Kind() != reflect.Int8 && v.Kind() != reflect.Int16 && v.Kind() != reflect.Int32 && v.Kind() != reflect.Int64 { - return fmt.Errorf("arrow/reflect: cannot set int32 into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set int32 into %s", v.Type()) } v.SetInt(int64(arr.(*array.Int32).Value(i))) case arrow.INT64: if v.Kind() != reflect.Int && v.Kind() != reflect.Int8 && v.Kind() != reflect.Int16 && v.Kind() != reflect.Int32 && v.Kind() != reflect.Int64 { - return fmt.Errorf("arrow/reflect: cannot set int64 into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set int64 into %s", v.Type()) } v.SetInt(arr.(*array.Int64).Value(i)) case arrow.UINT8: if v.Kind() != reflect.Uint && v.Kind() != reflect.Uint8 && v.Kind() != reflect.Uint16 && v.Kind() != reflect.Uint32 && v.Kind() != reflect.Uint64 && v.Kind() != reflect.Uintptr { - return fmt.Errorf("arrow/reflect: cannot set uint8 into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set uint8 into %s", v.Type()) } v.SetUint(uint64(arr.(*array.Uint8).Value(i))) case arrow.UINT16: if v.Kind() != reflect.Uint && v.Kind() != reflect.Uint8 && v.Kind() != reflect.Uint16 && v.Kind() != reflect.Uint32 && v.Kind() != reflect.Uint64 && v.Kind() != reflect.Uintptr { - return fmt.Errorf("arrow/reflect: cannot set uint16 into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set uint16 into %s", v.Type()) } v.SetUint(uint64(arr.(*array.Uint16).Value(i))) case arrow.UINT32: if v.Kind() != reflect.Uint && v.Kind() != reflect.Uint8 && v.Kind() != reflect.Uint16 && v.Kind() != reflect.Uint32 && v.Kind() != reflect.Uint64 && v.Kind() != reflect.Uintptr { - return fmt.Errorf("arrow/reflect: cannot set uint32 into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set uint32 into %s", v.Type()) } v.SetUint(uint64(arr.(*array.Uint32).Value(i))) case arrow.UINT64: if v.Kind() != reflect.Uint && v.Kind() != reflect.Uint8 && v.Kind() != reflect.Uint16 && v.Kind() != reflect.Uint32 && v.Kind() != reflect.Uint64 && v.Kind() != reflect.Uintptr { - return fmt.Errorf("arrow/reflect: cannot set uint64 into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set uint64 into %s", v.Type()) } v.SetUint(arr.(*array.Uint64).Value(i)) case arrow.FLOAT32: if v.Kind() != reflect.Float32 && v.Kind() != reflect.Float64 { - return fmt.Errorf("arrow/reflect: cannot set float32 into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set float32 into %s", v.Type()) } v.SetFloat(float64(arr.(*array.Float32).Value(i))) case arrow.FLOAT64: if v.Kind() != reflect.Float32 && v.Kind() != reflect.Float64 { - return fmt.Errorf("arrow/reflect: cannot set float64 into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set float64 into %s", v.Type()) } v.SetFloat(arr.(*array.Float64).Value(i)) default: - return fmt.Errorf("arrow/reflect: unsupported primitive type %v", arr.DataType()) + return fmt.Errorf("arreflect: unsupported primitive type %v", arr.DataType()) } return nil } @@ -227,10 +227,10 @@ func setTemporalValue(v reflect.Value, arr arrow.Array, i int) error { case arrow.TIMESTAMP: a, ok := arr.(*array.Timestamp) if !ok { - return fmt.Errorf("arrow/reflect: expected *Timestamp, got %T", arr) + return fmt.Errorf("arreflect: expected *Timestamp, got %T", arr) } if v.Type() != typeOfTime { - return fmt.Errorf("arrow/reflect: cannot set time.Time into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set time.Time into %s", v.Type()) } unit := arr.DataType().(*arrow.TimestampType).Unit t := a.Value(i).ToTime(unit) @@ -239,10 +239,10 @@ func setTemporalValue(v reflect.Value, arr arrow.Array, i int) error { case arrow.DATE32: a, ok := arr.(*array.Date32) if !ok { - return fmt.Errorf("arrow/reflect: expected *Date32, got %T", arr) + return fmt.Errorf("arreflect: expected *Date32, got %T", arr) } if v.Type() != typeOfTime { - return fmt.Errorf("arrow/reflect: cannot set time.Time into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set time.Time into %s", v.Type()) } t := a.Value(i).ToTime() v.Set(reflect.ValueOf(t)) @@ -250,10 +250,10 @@ func setTemporalValue(v reflect.Value, arr arrow.Array, i int) error { case arrow.DATE64: a, ok := arr.(*array.Date64) if !ok { - return fmt.Errorf("arrow/reflect: expected *Date64, got %T", arr) + return fmt.Errorf("arreflect: expected *Date64, got %T", arr) } if v.Type() != typeOfTime { - return fmt.Errorf("arrow/reflect: cannot set time.Time into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set time.Time into %s", v.Type()) } t := a.Value(i).ToTime() v.Set(reflect.ValueOf(t)) @@ -261,10 +261,10 @@ func setTemporalValue(v reflect.Value, arr arrow.Array, i int) error { case arrow.TIME32: a, ok := arr.(*array.Time32) if !ok { - return fmt.Errorf("arrow/reflect: expected *Time32, got %T", arr) + return fmt.Errorf("arreflect: expected *Time32, got %T", arr) } if v.Type() != typeOfTime { - return fmt.Errorf("arrow/reflect: cannot set time.Time into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set time.Time into %s", v.Type()) } unit := arr.DataType().(*arrow.Time32Type).Unit t := a.Value(i).ToTime(unit) @@ -273,10 +273,10 @@ func setTemporalValue(v reflect.Value, arr arrow.Array, i int) error { case arrow.TIME64: a, ok := arr.(*array.Time64) if !ok { - return fmt.Errorf("arrow/reflect: expected *Time64, got %T", arr) + return fmt.Errorf("arreflect: expected *Time64, got %T", arr) } if v.Type() != typeOfTime { - return fmt.Errorf("arrow/reflect: cannot set time.Time into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set time.Time into %s", v.Type()) } unit := arr.DataType().(*arrow.Time64Type).Unit t := a.Value(i).ToTime(unit) @@ -285,17 +285,17 @@ func setTemporalValue(v reflect.Value, arr arrow.Array, i int) error { case arrow.DURATION: a, ok := arr.(*array.Duration) if !ok { - return fmt.Errorf("arrow/reflect: expected *Duration, got %T", arr) + return fmt.Errorf("arreflect: expected *Duration, got %T", arr) } if v.Type() != typeOfDuration { - return fmt.Errorf("arrow/reflect: cannot set time.Duration into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set time.Duration into %s", v.Type()) } unit := arr.DataType().(*arrow.DurationType).Unit dur := time.Duration(a.Value(i)) * unit.Multiplier() v.Set(reflect.ValueOf(dur)) default: - return fmt.Errorf("arrow/reflect: unsupported temporal type %v", arr.DataType()) + return fmt.Errorf("arreflect: unsupported temporal type %v", arr.DataType()) } return nil } @@ -310,10 +310,10 @@ func setDecimalValue(v reflect.Value, arr arrow.Array, i int) error { case arrow.DECIMAL128: a, ok := arr.(*array.Decimal128) if !ok { - return fmt.Errorf("arrow/reflect: expected *Decimal128, got %T", arr) + return fmt.Errorf("arreflect: expected *Decimal128, got %T", arr) } if v.Type() != typeOfDec128 { - return fmt.Errorf("arrow/reflect: cannot set decimal128.Num into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set decimal128.Num into %s", v.Type()) } num := a.Value(i) v.Set(reflect.ValueOf(num)) @@ -321,10 +321,10 @@ func setDecimalValue(v reflect.Value, arr arrow.Array, i int) error { case arrow.DECIMAL256: a, ok := arr.(*array.Decimal256) if !ok { - return fmt.Errorf("arrow/reflect: expected *Decimal256, got %T", arr) + return fmt.Errorf("arreflect: expected *Decimal256, got %T", arr) } if v.Type() != typeOfDec256 { - return fmt.Errorf("arrow/reflect: cannot set decimal256.Num into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set decimal256.Num into %s", v.Type()) } num := a.Value(i) v.Set(reflect.ValueOf(num)) @@ -332,25 +332,25 @@ func setDecimalValue(v reflect.Value, arr arrow.Array, i int) error { case arrow.DECIMAL32: a, ok := arr.(*array.Decimal32) if !ok { - return fmt.Errorf("arrow/reflect: expected *Decimal32, got %T", arr) + return fmt.Errorf("arreflect: expected *Decimal32, got %T", arr) } if v.Type() != typeOfDec32 { - return fmt.Errorf("arrow/reflect: cannot set decimal.Decimal32 into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set decimal.Decimal32 into %s", v.Type()) } v.Set(reflect.ValueOf(a.Value(i))) case arrow.DECIMAL64: a, ok := arr.(*array.Decimal64) if !ok { - return fmt.Errorf("arrow/reflect: expected *Decimal64, got %T", arr) + return fmt.Errorf("arreflect: expected *Decimal64, got %T", arr) } if v.Type() != typeOfDec64 { - return fmt.Errorf("arrow/reflect: cannot set decimal.Decimal64 into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set decimal.Decimal64 into %s", v.Type()) } v.Set(reflect.ValueOf(a.Value(i))) default: - return fmt.Errorf("arrow/reflect: unsupported decimal type %v", arr.DataType()) + return fmt.Errorf("arreflect: unsupported decimal type %v", arr.DataType()) } return nil } @@ -362,7 +362,7 @@ func setStructValue(v reflect.Value, sa *array.Struct, i int) error { } if v.Kind() != reflect.Struct { - return fmt.Errorf("arrow/reflect: cannot set struct into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set struct into %s", v.Type()) } fields := cachedStructFields(v.Type()) @@ -374,7 +374,7 @@ func setStructValue(v reflect.Value, sa *array.Struct, i int) error { continue } if err := setValue(v.FieldByIndex(fm.Index), sa.Field(arrowIdx), i); err != nil { - return fmt.Errorf("arrow/reflect: field %q: %w", fm.Name, err) + return fmt.Errorf("arreflect: field %q: %w", fm.Name, err) } } return nil @@ -387,7 +387,7 @@ func setListValue(v reflect.Value, arr array.ListLike, i int) error { } if v.Kind() != reflect.Slice { - return fmt.Errorf("arrow/reflect: cannot set list into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set list into %s", v.Type()) } start, end := arr.ValueOffsets(i) @@ -397,7 +397,7 @@ func setListValue(v reflect.Value, arr array.ListLike, i int) error { result := reflect.MakeSlice(v.Type(), length, length) for j := 0; j < length; j++ { if err := setValue(result.Index(j), child, int(start)+j); err != nil { - return fmt.Errorf("arrow/reflect: list element %d: %w", j, err) + return fmt.Errorf("arreflect: list element %d: %w", j, err) } } v.Set(result) @@ -411,7 +411,7 @@ func setMapValue(v reflect.Value, arr *array.Map, i int) error { } if v.Kind() != reflect.Map { - return fmt.Errorf("arrow/reflect: cannot set map into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set map into %s", v.Type()) } start, end := arr.ValueOffsets(i) @@ -424,11 +424,11 @@ func setMapValue(v reflect.Value, arr *array.Map, i int) error { for j := int(start); j < int(end); j++ { keyVal := reflect.New(keyType).Elem() if err := setValue(keyVal, keys, j); err != nil { - return fmt.Errorf("arrow/reflect: map key %d: %w", j-int(start), err) + return fmt.Errorf("arreflect: map key %d: %w", j-int(start), err) } elemVal := reflect.New(elemType).Elem() if err := setValue(elemVal, items, j); err != nil { - return fmt.Errorf("arrow/reflect: map value %d: %w", j-int(start), err) + return fmt.Errorf("arreflect: map value %d: %w", j-int(start), err) } result.SetMapIndex(keyVal, elemVal) } @@ -449,23 +449,23 @@ func setFixedSizeListValue(v reflect.Value, arr *array.FixedSizeList, i int) err switch v.Kind() { case reflect.Array: if v.Len() != n { - return fmt.Errorf("arrow/reflect: fixed-size list length %d does not match Go array length %d", n, v.Len()) + return fmt.Errorf("arreflect: fixed-size list length %d does not match Go array length %d", n, v.Len()) } for k := 0; k < n; k++ { if err := setValue(v.Index(k), child, int(start)+k); err != nil { - return fmt.Errorf("arrow/reflect: fixed-size list element %d: %w", k, err) + return fmt.Errorf("arreflect: fixed-size list element %d: %w", k, err) } } case reflect.Slice: result := reflect.MakeSlice(v.Type(), n, n) for k := 0; k < n; k++ { if err := setValue(result.Index(k), child, int(start)+k); err != nil { - return fmt.Errorf("arrow/reflect: fixed-size list element %d: %w", k, err) + return fmt.Errorf("arreflect: fixed-size list element %d: %w", k, err) } } v.Set(result) default: - return fmt.Errorf("arrow/reflect: cannot set fixed-size list into %s", v.Type()) + return fmt.Errorf("arreflect: cannot set fixed-size list into %s", v.Type()) } return nil } diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index 85264d9e..3a0d242d 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -31,7 +31,7 @@ import ( func buildArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.Array, error) { if vals.Kind() != reflect.Slice { - return nil, fmt.Errorf("buildArray: expected slice, got %v", vals.Kind()) + return nil, fmt.Errorf("arreflect: expected slice, got %v", vals.Kind()) } elemType := vals.Type().Elem() @@ -92,7 +92,7 @@ func buildPrimitiveArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, dt, err := inferArrowType(elemType) if err != nil { - return nil, fmt.Errorf("buildPrimitiveArray: %w", err) + return nil, fmt.Errorf("arreflect: %w", err) } b := array.NewBuilder(mem, dt) @@ -160,7 +160,7 @@ func appendPrimitiveValue(b array.Builder, v reflect.Value, dt arrow.DataType) e case arrow.DECIMAL64: b.(*array.Decimal64Builder).Append(decimal.Decimal64(v.Int())) default: - return fmt.Errorf("appendPrimitiveValue: unsupported Arrow type %v", dt) + return fmt.Errorf("arreflect: unsupported Arrow type %v", dt) } return nil } @@ -292,7 +292,7 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) return db.NewArray(), nil default: - return nil, fmt.Errorf("buildTemporalArray: unsupported type %v", elemType) + return nil, fmt.Errorf("arreflect: unsupported temporal type %v", elemType) } } @@ -400,7 +400,7 @@ func buildDecimalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) ( return b.NewArray(), nil default: - return nil, fmt.Errorf("buildDecimalArray: unsupported type %v", elemType) + return nil, fmt.Errorf("arreflect: unsupported decimal type %v", elemType) } } @@ -413,7 +413,7 @@ func buildStructArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, er st, err := inferStructType(elemType) if err != nil { - return nil, fmt.Errorf("buildStructArray: %w", err) + return nil, fmt.Errorf("arreflect: %w", err) } fields := cachedStructFields(elemType) @@ -435,7 +435,7 @@ func buildStructArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, er fv := v.FieldByIndex(fm.Index) fb := sb.FieldBuilder(fi) if err := appendValue(fb, fv, fm.Opts); err != nil { - return nil, fmt.Errorf("buildStructArray: field %q: %w", fm.Name, err) + return nil, fmt.Errorf("arreflect: struct field %q: %w", fm.Name, err) } } } @@ -528,7 +528,7 @@ func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { case *array.FixedSizeListBuilder: expectedLen := int(tb.Type().(*arrow.FixedSizeListType).Len()) if v.Len() != expectedLen { - return fmt.Errorf("appendValue: fixed-size list length mismatch: got %d, want %d", v.Len(), expectedLen) + return fmt.Errorf("arreflect: fixed-size list length mismatch: got %d, want %d", v.Len(), expectedLen) } tb.Append(true) vb := tb.ValueBuilder() @@ -561,7 +561,7 @@ func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { fv := v.FieldByIndex(fm.Index) fb := tb.FieldBuilder(fi) if err := appendValue(fb, fv, fm.Opts); err != nil { - return fmt.Errorf("appendValue: struct field %q: %w", fm.Name, err) + return fmt.Errorf("arreflect: struct field %q: %w", fm.Name, err) } } case *array.ListViewBuilder: @@ -589,7 +589,7 @@ func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { if db, ok := b.(array.DictionaryBuilder); ok { return appendToDictBuilder(db, v) } - return fmt.Errorf("appendValue: unsupported builder type %T", b) + return fmt.Errorf("arreflect: unsupported builder type %T", b) } return nil } @@ -607,7 +607,7 @@ func appendToDictBuilder(db array.DictionaryBuilder, v reflect.Value) error { } return bdb.Append(v.Bytes()) default: - return fmt.Errorf("appendToDictBuilder: unsupported value kind %v for BinaryDictionaryBuilder", v.Kind()) + return fmt.Errorf("arreflect: unsupported value kind %v for BinaryDictionaryBuilder", v.Kind()) } case *array.Int8DictionaryBuilder: return bdb.Append(int8(v.Int())) @@ -630,7 +630,7 @@ func appendToDictBuilder(db array.DictionaryBuilder, v reflect.Value) error { case *array.Float64DictionaryBuilder: return bdb.Append(float64(v.Float())) } - return fmt.Errorf("appendToDictBuilder: unsupported builder type %T", db) + return fmt.Errorf("arreflect: unsupported builder type %T", db) } func buildListArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { @@ -647,7 +647,7 @@ func buildListArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, erro elemDT, err := inferArrowType(innerElemType) if err != nil { - return nil, fmt.Errorf("buildListArray: %w", err) + return nil, fmt.Errorf("arreflect: %w", err) } lb := array.NewListBuilder(mem, elemDT) @@ -671,7 +671,7 @@ func buildListArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, erro lb.Append(true) for j := 0; j < outer.Len(); j++ { if err := appendValue(vb, outer.Index(j), tagOpts{}); err != nil { - return nil, fmt.Errorf("buildListArray: element [%d][%d]: %w", i, j, err) + return nil, fmt.Errorf("arreflect: list element [%d][%d]: %w", i, j, err) } } } @@ -698,11 +698,11 @@ func buildMapArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error keyDT, err := inferArrowType(keyType) if err != nil { - return nil, fmt.Errorf("buildMapArray: key type: %w", err) + return nil, fmt.Errorf("arreflect: map key type: %w", err) } valDT, err := inferArrowType(valType) if err != nil { - return nil, fmt.Errorf("buildMapArray: value type: %w", err) + return nil, fmt.Errorf("arreflect: map value type: %w", err) } mb := array.NewMapBuilder(mem, keyDT, valDT, false) @@ -727,10 +727,10 @@ func buildMapArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error mb.Append(true) for _, key := range m.MapKeys() { if err := appendValue(kb, key, tagOpts{}); err != nil { - return nil, fmt.Errorf("buildMapArray: key: %w", err) + return nil, fmt.Errorf("arreflect: map key: %w", err) } if err := appendValue(ib, m.MapIndex(key), tagOpts{}); err != nil { - return nil, fmt.Errorf("buildMapArray: value: %w", err) + return nil, fmt.Errorf("arreflect: map value: %w", err) } } } @@ -746,7 +746,7 @@ func buildFixedSizeListArray(vals reflect.Value, mem memory.Allocator) (arrow.Ar } if elemType.Kind() != reflect.Array { - return nil, fmt.Errorf("buildFixedSizeListArray: expected array element, got %v", elemType.Kind()) + return nil, fmt.Errorf("arreflect: expected array element, got %v", elemType.Kind()) } n := int32(elemType.Len()) @@ -757,7 +757,7 @@ func buildFixedSizeListArray(vals reflect.Value, mem memory.Allocator) (arrow.Ar innerDT, err := inferArrowType(innerElemType) if err != nil { - return nil, fmt.Errorf("buildFixedSizeListArray: %w", err) + return nil, fmt.Errorf("arreflect: %w", err) } fb := array.NewFixedSizeListBuilder(mem, n, innerDT) @@ -777,7 +777,7 @@ func buildFixedSizeListArray(vals reflect.Value, mem memory.Allocator) (arrow.Ar fb.Append(true) for j := 0; j < int(n); j++ { if err := appendValue(vb, elem.Index(j), tagOpts{}); err != nil { - return nil, fmt.Errorf("buildFixedSizeListArray: element [%d][%d]: %w", i, j, err) + return nil, fmt.Errorf("arreflect: fixed-size list element [%d][%d]: %w", i, j, err) } } } @@ -794,7 +794,7 @@ func buildDictionaryArray(vals reflect.Value, mem memory.Allocator) (arrow.Array valDT, err := inferArrowType(elemType) if err != nil { - return nil, fmt.Errorf("buildDictionaryArray: %w", err) + return nil, fmt.Errorf("arreflect: %w", err) } dt := &arrow.DictionaryType{ @@ -816,7 +816,7 @@ func buildDictionaryArray(vals reflect.Value, mem memory.Allocator) (arrow.Array elem = elem.Elem() } if err := appendToDictBuilder(db, elem); err != nil { - return nil, fmt.Errorf("buildDictionaryArray[%d]: %w", i, err) + return nil, fmt.Errorf("arreflect: dictionary element [%d]: %w", i, err) } } return db.NewArray(), nil @@ -883,7 +883,7 @@ func buildRunEndEncodedArray(vals reflect.Value, mem memory.Allocator) (arrow.Ar runEndsSlice := reflect.ValueOf(runEnds) runEndsArr, err := buildPrimitiveArray(runEndsSlice, mem) if err != nil { - return nil, fmt.Errorf("buildRunEndEncodedArray: run ends: %w", err) + return nil, fmt.Errorf("arreflect: run-end encoded run ends: %w", err) } defer runEndsArr.Release() @@ -893,7 +893,7 @@ func buildRunEndEncodedArray(vals reflect.Value, mem memory.Allocator) (arrow.Ar } valuesArr, err := buildArray(runValues, tagOpts{}, mem) if err != nil { - return nil, fmt.Errorf("buildRunEndEncodedArray: values: %w", err) + return nil, fmt.Errorf("arreflect: run-end encoded values: %w", err) } defer valuesArr.Release() @@ -914,7 +914,7 @@ func buildListViewArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, elemDT, err := inferArrowType(innerElemType) if err != nil { - return nil, fmt.Errorf("buildListViewArray: %w", err) + return nil, fmt.Errorf("arreflect: %w", err) } lvb := array.NewListViewBuilder(mem, elemDT) @@ -938,7 +938,7 @@ func buildListViewArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, lvb.AppendWithSize(true, outer.Len()) for j := 0; j < outer.Len(); j++ { if err := appendValue(vb, outer.Index(j), tagOpts{}); err != nil { - return nil, fmt.Errorf("buildListViewArray: element [%d][%d]: %w", i, j, err) + return nil, fmt.Errorf("arreflect: list-view element [%d][%d]: %w", i, j, err) } } } diff --git a/arrow/arreflect/reflect_infer.go b/arrow/arreflect/reflect_infer.go index f0514c2c..26c9dc84 100644 --- a/arrow/arreflect/reflect_infer.go +++ b/arrow/arreflect/reflect_infer.go @@ -88,7 +88,7 @@ func inferPrimitiveArrowType(t reflect.Type) (arrow.DataType, error) { case typeOfDec256: return &arrow.Decimal256Type{Precision: 76, Scale: 0}, nil default: - return nil, fmt.Errorf("unsupported Go type for Arrow inference: %v", t) + return nil, fmt.Errorf("arreflect: unsupported Go type for Arrow inference: %v", t) } } @@ -140,7 +140,7 @@ func inferStructType(t reflect.Type) (*arrow.StructType, error) { t = t.Elem() } if t.Kind() != reflect.Struct { - return nil, fmt.Errorf("inferStructType: expected struct, got %v", t) + return nil, fmt.Errorf("arreflect: expected struct, got %v", t) } fields := cachedStructFields(t) @@ -154,7 +154,7 @@ func inferStructType(t reflect.Type) (*arrow.StructType, error) { dt, err := inferArrowType(fm.Type) if err != nil { - return nil, fmt.Errorf("inferStructType: field %q: %w", fm.Name, err) + return nil, fmt.Errorf("arreflect: struct field %q: %w", fm.Name, err) } if fm.Opts.HasDecimalOpts { @@ -195,11 +195,11 @@ func inferStructType(t reflect.Type) (*arrow.StructType, error) { case fm.Opts.ListView: lt, ok := dt.(*arrow.ListType) if !ok { - return nil, fmt.Errorf("inferStructType: listview tag on field %q requires a slice type, got %v", fm.Name, dt) + return nil, fmt.Errorf("arreflect: listview tag on field %q requires a slice type, got %v", fm.Name, dt) } dt = arrow.ListViewOf(lt.Elem()) case fm.Opts.REE: - return nil, fmt.Errorf("inferStructType: ree tag on struct field %q is not supported; use ree at top-level via buildArray", fm.Name) + return nil, fmt.Errorf("arreflect: ree tag on struct field %q is not supported; use ree at top-level via FromSlice", fm.Name) } arrowFields = append(arrowFields, arrow.Field{ @@ -212,13 +212,13 @@ func inferStructType(t reflect.Type) (*arrow.StructType, error) { return arrow.StructOf(arrowFields...), nil } -func InferArrowSchema[T any]() (*arrow.Schema, error) { +func SchemaOf[T any]() (*arrow.Schema, error) { t := reflect.TypeFor[T]() for t.Kind() == reflect.Ptr { t = t.Elem() } if t.Kind() != reflect.Struct { - return nil, fmt.Errorf("InferArrowSchema: T must be a struct type, got %v", t) + return nil, fmt.Errorf("arreflect: SchemaOf requires a struct type T, got %v", t) } st, err := inferStructType(t) if err != nil { @@ -231,7 +231,7 @@ func InferArrowSchema[T any]() (*arrow.Schema, error) { return arrow.NewSchema(fields, nil), nil } -func InferArrowType[T any]() (arrow.DataType, error) { +func TypeOf[T any]() (arrow.DataType, error) { t := reflect.TypeFor[T]() return inferArrowType(t) } diff --git a/arrow/arreflect/reflect_infer_test.go b/arrow/arreflect/reflect_infer_test.go index c9c39072..df2177db 100644 --- a/arrow/arreflect/reflect_infer_test.go +++ b/arrow/arreflect/reflect_infer_test.go @@ -315,7 +315,7 @@ func TestInferArrowSchema(t *testing.T) { Age int32 Score float64 } - schema, err := InferArrowSchema[S]() + schema, err := SchemaOf[S]() if err != nil { t.Fatal(err) } @@ -338,7 +338,7 @@ func TestInferArrowSchema(t *testing.T) { ID int32 Label *string } - schema, err := InferArrowSchema[S]() + schema, err := SchemaOf[S]() if err != nil { t.Fatal(err) } @@ -355,7 +355,7 @@ func TestInferArrowSchema(t *testing.T) { Keep string Hidden int32 `arrow:"-"` } - schema, err := InferArrowSchema[S]() + schema, err := SchemaOf[S]() if err != nil { t.Fatal(err) } @@ -371,7 +371,7 @@ func TestInferArrowSchema(t *testing.T) { type S struct { GoName int64 `arrow:"custom_name"` } - schema, err := InferArrowSchema[S]() + schema, err := SchemaOf[S]() if err != nil { t.Fatal(err) } @@ -381,7 +381,7 @@ func TestInferArrowSchema(t *testing.T) { }) t.Run("non-struct type returns error", func(t *testing.T) { - _, err := InferArrowSchema[int]() + _, err := SchemaOf[int]() if err == nil { t.Error("expected error for non-struct, got nil") } @@ -390,7 +390,7 @@ func TestInferArrowSchema(t *testing.T) { func TestInferArrowTypePublic(t *testing.T) { t.Run("int32 is INT32", func(t *testing.T) { - dt, err := InferArrowType[int32]() + dt, err := TypeOf[int32]() if err != nil { t.Fatal(err) } @@ -400,7 +400,7 @@ func TestInferArrowTypePublic(t *testing.T) { }) t.Run("[]string is LIST", func(t *testing.T) { - dt, err := InferArrowType[[]string]() + dt, err := TypeOf[[]string]() if err != nil { t.Fatal(err) } @@ -410,7 +410,7 @@ func TestInferArrowTypePublic(t *testing.T) { }) t.Run("map[string]float64 is MAP", func(t *testing.T) { - dt, err := InferArrowType[map[string]float64]() + dt, err := TypeOf[map[string]float64]() if err != nil { t.Fatal(err) } @@ -421,7 +421,7 @@ func TestInferArrowTypePublic(t *testing.T) { t.Run("struct{X int32} is STRUCT", func(t *testing.T) { type S struct{ X int32 } - dt, err := InferArrowType[S]() + dt, err := TypeOf[S]() if err != nil { t.Fatal(err) } @@ -436,7 +436,7 @@ func TestInferArrowSchemaStructFieldEncoding(t *testing.T) { type S struct { Name string `arrow:"name,dict"` } - schema, err := InferArrowSchema[S]() + schema, err := SchemaOf[S]() if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -453,7 +453,7 @@ func TestInferArrowSchemaStructFieldEncoding(t *testing.T) { type S struct { Tags []string `arrow:"tags,listview"` } - schema, err := InferArrowSchema[S]() + schema, err := SchemaOf[S]() if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -470,7 +470,7 @@ func TestInferArrowSchemaStructFieldEncoding(t *testing.T) { type REERow struct { Val string `arrow:"val,ree"` } - _, err := InferArrowSchema[REERow]() + _, err := SchemaOf[REERow]() if err == nil { t.Fatal("expected error for ree tag on struct field, got nil") } diff --git a/arrow/arreflect/reflect_integration_test.go b/arrow/arreflect/reflect_integration_test.go index 6d206fe8..5dce1a33 100644 --- a/arrow/arreflect/reflect_integration_test.go +++ b/arrow/arreflect/reflect_integration_test.go @@ -101,15 +101,15 @@ func TestReflectIntegration(t *testing.T) { }, } - arr, err := FromGoSlice(orders, mem) + arr, err := FromSlice(orders, mem) if err != nil { - t.Fatalf("FromGoSlice: %v", err) + t.Fatalf("FromSlice: %v", err) } defer arr.Release() - output, err := ToGoSlice[integOrder](arr) + output, err := ToSlice[integOrder](arr) if err != nil { - t.Fatalf("ToGoSlice: %v", err) + t.Fatalf("ToSlice: %v", err) } if len(output) != len(orders) { @@ -147,9 +147,9 @@ func TestReflectIntegration(t *testing.T) { rows[i] = integLargeRow{X: int32(i), Y: float64(i) * 1.5} } - arr, err := FromGoSlice(rows, mem) + arr, err := FromSlice(rows, mem) if err != nil { - t.Fatalf("FromGoSlice: %v", err) + t.Fatalf("FromSlice: %v", err) } defer arr.Release() @@ -157,9 +157,9 @@ func TestReflectIntegration(t *testing.T) { t.Fatalf("array length: got %d, want %d", arr.Len(), n) } - output, err := ToGoSlice[integLargeRow](arr) + output, err := ToSlice[integLargeRow](arr) if err != nil { - t.Fatalf("ToGoSlice: %v", err) + t.Fatalf("ToSlice: %v", err) } if len(output) != n { @@ -179,15 +179,15 @@ func TestReflectIntegration(t *testing.T) { {A: nil, B: nil, C: nil}, } - arr, err := FromGoSlice(rows, mem) + arr, err := FromSlice(rows, mem) if err != nil { - t.Fatalf("FromGoSlice: %v", err) + t.Fatalf("FromSlice: %v", err) } defer arr.Release() - output, err := ToGoSlice[integNullable](arr) + output, err := ToSlice[integNullable](arr) if err != nil { - t.Fatalf("ToGoSlice: %v", err) + t.Fatalf("ToSlice: %v", err) } if len(output) != 3 { @@ -207,9 +207,9 @@ func TestReflectIntegration(t *testing.T) { }) t.Run("empty int32 slice", func(t *testing.T) { - arr, err := FromGoSlice[int32]([]int32{}, mem) + arr, err := FromSlice[int32]([]int32{}, mem) if err != nil { - t.Fatalf("FromGoSlice: %v", err) + t.Fatalf("FromSlice: %v", err) } defer arr.Release() @@ -217,12 +217,12 @@ func TestReflectIntegration(t *testing.T) { t.Errorf("array length: got %d, want 0", arr.Len()) } - output, err := ToGoSlice[int32](arr) + output, err := ToSlice[int32](arr) if err != nil { - t.Fatalf("ToGoSlice: %v", err) + t.Fatalf("ToSlice: %v", err) } if output == nil { - t.Error("ToGoSlice returned nil, want non-nil empty slice") + t.Error("ToSlice returned nil, want non-nil empty slice") } if len(output) != 0 { t.Errorf("output length: got %d, want 0", len(output)) @@ -231,9 +231,9 @@ func TestReflectIntegration(t *testing.T) { t.Run("empty struct slice", func(t *testing.T) { type simpleXY struct{ X int32 } - arr, err := FromGoSlice[simpleXY]([]simpleXY{}, mem) + arr, err := FromSlice[simpleXY]([]simpleXY{}, mem) if err != nil { - t.Fatalf("FromGoSlice empty struct: %v", err) + t.Fatalf("FromSlice empty struct: %v", err) } defer arr.Release() @@ -258,15 +258,15 @@ func TestReflectIntegration(t *testing.T) { {Required: "fourth", Optional: nil, Count: 40, MaybeCount: nil}, } - arr, err := FromGoSlice(rows, mem) + arr, err := FromSlice(rows, mem) if err != nil { - t.Fatalf("FromGoSlice: %v", err) + t.Fatalf("FromSlice: %v", err) } defer arr.Release() - output, err := ToGoSlice[integMixed](arr) + output, err := ToSlice[integMixed](arr) if err != nil { - t.Fatalf("ToGoSlice: %v", err) + t.Fatalf("ToSlice: %v", err) } if len(output) != len(rows) { @@ -301,9 +301,9 @@ func TestReflectIntegration(t *testing.T) { {integBase: integBase{ID: 3}, Name: "carol"}, } - arr, err := FromGoSlice(rows, mem) + arr, err := FromSlice(rows, mem) if err != nil { - t.Fatalf("FromGoSlice: %v", err) + t.Fatalf("FromSlice: %v", err) } defer arr.Release() @@ -333,9 +333,9 @@ func TestReflectIntegration(t *testing.T) { t.Error("unexpected field 'Skip' in schema (should be skipped by arrow:\"-\" tag)") } - output, err := ToGoSlice[integExtended](arr) + output, err := ToSlice[integExtended](arr) if err != nil { - t.Fatalf("ToGoSlice: %v", err) + t.Fatalf("ToSlice: %v", err) } if len(output) != len(rows) { @@ -360,14 +360,14 @@ func TestReflectIntegration(t *testing.T) { {ID: 1, Items: []integOrderItem{{Product: "a", Tags: map[string]string{"k": "v"}, Ratings: [5]float32{1, 2, 3, 4, 5}}}}, } - schema, err := InferArrowSchema[integOrder]() + schema, err := SchemaOf[integOrder]() if err != nil { - t.Fatalf("InferArrowSchema: %v", err) + t.Fatalf("SchemaOf: %v", err) } - arr, err := FromGoSlice(orders, mem) + arr, err := FromSlice(orders, mem) if err != nil { - t.Fatalf("FromGoSlice: %v", err) + t.Fatalf("FromSlice: %v", err) } defer arr.Release() @@ -395,9 +395,9 @@ func TestReflectIntegration(t *testing.T) { batch1[i] = integLargeRow{X: int32(i + 1), Y: float64(i+1) * 2.0} } - arr1, err := FromGoSlice(batch1, mem) + arr1, err := FromSlice(batch1, mem) if err != nil { - t.Fatalf("FromGoSlice batch1: %v", err) + t.Fatalf("FromSlice batch1: %v", err) } defer arr1.Release() @@ -406,19 +406,19 @@ func TestReflectIntegration(t *testing.T) { batch2[i] = integLargeRow{X: int32(i * 10), Y: float64(i) * 3.14} } - arr2, err := FromGoSlice(batch2, mem) + arr2, err := FromSlice(batch2, mem) if err != nil { - t.Fatalf("FromGoSlice batch2: %v", err) + t.Fatalf("FromSlice batch2: %v", err) } defer arr2.Release() - out1, err := ToGoSlice[integLargeRow](arr1) + out1, err := ToSlice[integLargeRow](arr1) if err != nil { - t.Fatalf("ToGoSlice batch1: %v", err) + t.Fatalf("ToSlice batch1: %v", err) } - out2, err := ToGoSlice[integLargeRow](arr2) + out2, err := ToSlice[integLargeRow](arr2) if err != nil { - t.Fatalf("ToGoSlice batch2: %v", err) + t.Fatalf("ToSlice batch2: %v", err) } if len(out1) != len(batch1) { @@ -482,7 +482,7 @@ func BenchmarkReflectFromGoSlice(b *testing.B) { } b.ResetTimer() for i := 0; i < b.N; i++ { - arr, err := FromGoSlice(rows, mem) + arr, err := FromSlice(rows, mem) if err != nil { b.Fatal(err) } @@ -497,7 +497,7 @@ func BenchmarkReflectToGoSlice(b *testing.B) { rows[i] = integLargeRow{X: int32(i), Y: float64(i) * 1.5} } - arr, err := FromGoSlice(rows, mem) + arr, err := FromSlice(rows, mem) if err != nil { b.Fatal(err) } @@ -505,7 +505,7 @@ func BenchmarkReflectToGoSlice(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - out, err := ToGoSlice[integLargeRow](arr) + out, err := ToSlice[integLargeRow](arr) if err != nil { b.Fatal(err) } diff --git a/arrow/arreflect/reflect_public_test.go b/arrow/arreflect/reflect_public_test.go index 75a2d459..193d88b1 100644 --- a/arrow/arreflect/reflect_public_test.go +++ b/arrow/arreflect/reflect_public_test.go @@ -34,7 +34,7 @@ func TestToGo(t *testing.T) { arr := b.NewInt32Array() defer arr.Release() - got, err := ToGo[int32](arr, 0) + got, err := Get[int32](arr, 0) if err != nil { t.Fatal(err) } @@ -50,7 +50,7 @@ func TestToGo(t *testing.T) { arr := b.NewStringArray() defer arr.Release() - got, err := ToGo[string](arr, 1) + got, err := Get[string](arr, 1) if err != nil { t.Fatal(err) } @@ -65,13 +65,13 @@ func TestToGo(t *testing.T) { Age int32 } vals := []Person{{"Alice", 30}, {"Bob", 25}} - arr, err := FromGoSlice(vals, mem) + arr, err := FromSlice(vals, mem) if err != nil { t.Fatal(err) } defer arr.Release() - got, err := ToGo[Person](arr, 0) + got, err := Get[Person](arr, 0) if err != nil { t.Fatal(err) } @@ -87,7 +87,7 @@ func TestToGo(t *testing.T) { arr := b.NewInt32Array() defer arr.Release() - got, err := ToGo[*int32](arr, 0) + got, err := Get[*int32](arr, 0) if err != nil { t.Fatal(err) } @@ -103,7 +103,7 @@ func TestToGo(t *testing.T) { arr := b.NewInt32Array() defer arr.Release() - got, err := ToGo[int32](arr, 0) + got, err := Get[int32](arr, 0) if err != nil { t.Fatal(err) } @@ -123,7 +123,7 @@ func TestToGoSlice(t *testing.T) { arr := b.NewInt32Array() defer arr.Release() - got, err := ToGoSlice[int32](arr) + got, err := ToSlice[int32](arr) if err != nil { t.Fatal(err) } @@ -145,7 +145,7 @@ func TestToGoSlice(t *testing.T) { arr := b.NewStringArray() defer arr.Release() - got, err := ToGoSlice[string](arr) + got, err := ToSlice[string](arr) if err != nil { t.Fatal(err) } @@ -165,13 +165,13 @@ func TestToGoSlice(t *testing.T) { Name string } vals := []Row{{"Alice"}, {"Bob"}, {"Charlie"}} - arr, err := FromGoSlice(vals, mem) + arr, err := FromSlice(vals, mem) if err != nil { t.Fatal(err) } defer arr.Release() - got, err := ToGoSlice[Row](arr) + got, err := ToSlice[Row](arr) if err != nil { t.Fatal(err) } @@ -191,7 +191,7 @@ func TestToGoSlice(t *testing.T) { arr := b.NewInt32Array() defer arr.Release() - got, err := ToGoSlice[int32](arr) + got, err := ToSlice[int32](arr) if err != nil { t.Fatal(err) } @@ -208,7 +208,7 @@ func TestFromGoSlice(t *testing.T) { mem := memory.NewGoAllocator() t.Run("[]int32", func(t *testing.T) { - arr, err := FromGoSlice([]int32{1, 2, 3}, mem) + arr, err := FromSlice([]int32{1, 2, 3}, mem) if err != nil { t.Fatal(err) } @@ -226,7 +226,7 @@ func TestFromGoSlice(t *testing.T) { }) t.Run("[]string", func(t *testing.T) { - arr, err := FromGoSlice([]string{"a", "b"}, mem) + arr, err := FromSlice([]string{"a", "b"}, mem) if err != nil { t.Fatal(err) } @@ -247,7 +247,7 @@ func TestFromGoSlice(t *testing.T) { Score float64 } vals := []Row{{"Alice", 9.5}, {"Bob", 8.0}} - arr, err := FromGoSlice(vals, mem) + arr, err := FromSlice(vals, mem) if err != nil { t.Fatal(err) } @@ -256,7 +256,7 @@ func TestFromGoSlice(t *testing.T) { if arr.Len() != 2 { t.Fatalf("expected len 2, got %d", arr.Len()) } - got, err := ToGoSlice[Row](arr) + got, err := ToSlice[Row](arr) if err != nil { t.Fatal(err) } @@ -269,7 +269,7 @@ func TestFromGoSlice(t *testing.T) { t.Run("[]*int32 with nil produces null", func(t *testing.T) { v := int32(42) - arr, err := FromGoSlice([]*int32{&v, nil}, mem) + arr, err := FromSlice([]*int32{&v, nil}, mem) if err != nil { t.Fatal(err) } @@ -288,7 +288,7 @@ func TestFromGoSlice(t *testing.T) { }) t.Run("empty []int32 gives length-0 array", func(t *testing.T) { - arr, err := FromGoSlice([]int32{}, mem) + arr, err := FromSlice([]int32{}, mem) if err != nil { t.Fatal(err) } From 3485a4a9ad0e9089dc789cbc49c522c7a4b8e709 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 7 Apr 2026 15:14:22 -0700 Subject: [PATCH 14/82] refactor(arreflect): eliminate code duplication (-89 lines) Extract helpers to consolidate repeated patterns: reflect_go_to_arrow.go: - derefSliceElem(vals) (elemType, isPtr): replaces 5-site elemType+isPtr extraction at top of build functions - iterSlice(vals, isPtr, appendNull, appendVal): replaces 10 identical isPtr nil-check loop bodies across buildTemporalArray and buildDecimalArray - listBuildPreamble(vals): extracts shared 11-line setup from buildListArray and buildListViewArray (builders themselves stay separate as their Append semantics differ) reflect_arrow_to_go.go: - isIntKind/isUintKind/isFloatKind: extract the 8 multi-condition kind guard blocks in setPrimitiveValue to single-call predicates reflect_infer.go: - dec32/64/128/256DefaultPrecision constants: replace 3-site hardcoded precision values (38/76/9/18) No behavioral changes; all tests pass. --- arrow/arreflect/reflect_arrow_to_go.go | 40 ++--- arrow/arreflect/reflect_go_to_arrow.go | 209 +++++++------------------ arrow/arreflect/reflect_infer.go | 15 +- 3 files changed, 93 insertions(+), 171 deletions(-) diff --git a/arrow/arreflect/reflect_arrow_to_go.go b/arrow/arreflect/reflect_arrow_to_go.go index 8ef4e3a8..810c9bd4 100644 --- a/arrow/arreflect/reflect_arrow_to_go.go +++ b/arrow/arreflect/reflect_arrow_to_go.go @@ -25,6 +25,18 @@ import ( "github.com/apache/arrow-go/v18/arrow/array" ) +func isIntKind(k reflect.Kind) bool { + return k == reflect.Int || k == reflect.Int8 || k == reflect.Int16 || + k == reflect.Int32 || k == reflect.Int64 +} + +func isUintKind(k reflect.Kind) bool { + return k == reflect.Uint || k == reflect.Uint8 || k == reflect.Uint16 || + k == reflect.Uint32 || k == reflect.Uint64 || k == reflect.Uintptr +} + +func isFloatKind(k reflect.Kind) bool { return k == reflect.Float32 || k == reflect.Float64 } + func setValue(v reflect.Value, arr arrow.Array, i int) error { if arr.IsNull(i) { v.Set(reflect.Zero(v.Type())) @@ -154,60 +166,52 @@ func setPrimitiveValue(v reflect.Value, arr arrow.Array, i int) error { switch arr.DataType().ID() { case arrow.INT8: - if v.Kind() != reflect.Int && v.Kind() != reflect.Int8 && v.Kind() != reflect.Int16 && - v.Kind() != reflect.Int32 && v.Kind() != reflect.Int64 { + if !isIntKind(v.Kind()) { return fmt.Errorf("arreflect: cannot set int8 into %s", v.Type()) } v.SetInt(int64(arr.(*array.Int8).Value(i))) case arrow.INT16: - if v.Kind() != reflect.Int && v.Kind() != reflect.Int8 && v.Kind() != reflect.Int16 && - v.Kind() != reflect.Int32 && v.Kind() != reflect.Int64 { + if !isIntKind(v.Kind()) { return fmt.Errorf("arreflect: cannot set int16 into %s", v.Type()) } v.SetInt(int64(arr.(*array.Int16).Value(i))) case arrow.INT32: - if v.Kind() != reflect.Int && v.Kind() != reflect.Int8 && v.Kind() != reflect.Int16 && - v.Kind() != reflect.Int32 && v.Kind() != reflect.Int64 { + if !isIntKind(v.Kind()) { return fmt.Errorf("arreflect: cannot set int32 into %s", v.Type()) } v.SetInt(int64(arr.(*array.Int32).Value(i))) case arrow.INT64: - if v.Kind() != reflect.Int && v.Kind() != reflect.Int8 && v.Kind() != reflect.Int16 && - v.Kind() != reflect.Int32 && v.Kind() != reflect.Int64 { + if !isIntKind(v.Kind()) { return fmt.Errorf("arreflect: cannot set int64 into %s", v.Type()) } v.SetInt(arr.(*array.Int64).Value(i)) case arrow.UINT8: - if v.Kind() != reflect.Uint && v.Kind() != reflect.Uint8 && v.Kind() != reflect.Uint16 && - v.Kind() != reflect.Uint32 && v.Kind() != reflect.Uint64 && v.Kind() != reflect.Uintptr { + if !isUintKind(v.Kind()) { return fmt.Errorf("arreflect: cannot set uint8 into %s", v.Type()) } v.SetUint(uint64(arr.(*array.Uint8).Value(i))) case arrow.UINT16: - if v.Kind() != reflect.Uint && v.Kind() != reflect.Uint8 && v.Kind() != reflect.Uint16 && - v.Kind() != reflect.Uint32 && v.Kind() != reflect.Uint64 && v.Kind() != reflect.Uintptr { + if !isUintKind(v.Kind()) { return fmt.Errorf("arreflect: cannot set uint16 into %s", v.Type()) } v.SetUint(uint64(arr.(*array.Uint16).Value(i))) case arrow.UINT32: - if v.Kind() != reflect.Uint && v.Kind() != reflect.Uint8 && v.Kind() != reflect.Uint16 && - v.Kind() != reflect.Uint32 && v.Kind() != reflect.Uint64 && v.Kind() != reflect.Uintptr { + if !isUintKind(v.Kind()) { return fmt.Errorf("arreflect: cannot set uint32 into %s", v.Type()) } v.SetUint(uint64(arr.(*array.Uint32).Value(i))) case arrow.UINT64: - if v.Kind() != reflect.Uint && v.Kind() != reflect.Uint8 && v.Kind() != reflect.Uint16 && - v.Kind() != reflect.Uint32 && v.Kind() != reflect.Uint64 && v.Kind() != reflect.Uintptr { + if !isUintKind(v.Kind()) { return fmt.Errorf("arreflect: cannot set uint64 into %s", v.Type()) } v.SetUint(arr.(*array.Uint64).Value(i)) case arrow.FLOAT32: - if v.Kind() != reflect.Float32 && v.Kind() != reflect.Float64 { + if !isFloatKind(v.Kind()) { return fmt.Errorf("arreflect: cannot set float32 into %s", v.Type()) } v.SetFloat(float64(arr.(*array.Float32).Value(i))) case arrow.FLOAT64: - if v.Kind() != reflect.Float32 && v.Kind() != reflect.Float64 { + if !isFloatKind(v.Kind()) { return fmt.Errorf("arreflect: cannot set float64 into %s", v.Type()) } v.SetFloat(arr.(*array.Float64).Value(i)) diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index 3a0d242d..4bd7cb7a 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -85,10 +85,7 @@ func buildArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.A } func buildPrimitiveArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { - elemType := vals.Type().Elem() - for elemType.Kind() == reflect.Ptr { - elemType = elemType.Elem() - } + elemType, isPtr := derefSliceElem(vals) dt, err := inferArrowType(elemType) if err != nil { @@ -99,8 +96,6 @@ func buildPrimitiveArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, defer b.Release() b.Reserve(vals.Len()) - isPtr := vals.Type().Elem().Kind() == reflect.Ptr - for i := 0; i < vals.Len(); i++ { v := vals.Index(i) if isPtr { @@ -171,13 +166,41 @@ func timeOfDayNanos(t time.Time) int64 { return t.Sub(midnight).Nanoseconds() } -func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.Array, error) { - elemType := vals.Type().Elem() +func derefSliceElem(vals reflect.Value) (elemType reflect.Type, isPtr bool) { + elemType = vals.Type().Elem() + isPtr = elemType.Kind() == reflect.Ptr for elemType.Kind() == reflect.Ptr { elemType = elemType.Elem() } + return +} + +func iterSlice(vals reflect.Value, isPtr bool, appendNull func(), appendVal func(reflect.Value)) { + for i := 0; i < vals.Len(); i++ { + v := vals.Index(i) + if isPtr { + if v.IsNil() { + appendNull() + continue + } + v = v.Elem() + } + appendVal(v) + } +} - isPtr := vals.Type().Elem().Kind() == reflect.Ptr +func listBuildPreamble(vals reflect.Value) (elemDT arrow.DataType, isOuterPtr bool, err error) { + outerSliceType, isOuterPtr := derefSliceElem(vals) + innerElemType := outerSliceType.Elem() + for innerElemType.Kind() == reflect.Ptr { + innerElemType = innerElemType.Elem() + } + elemDT, err = inferArrowType(innerElemType) + return +} + +func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.Array, error) { + elemType, isPtr := derefSliceElem(vals) switch elemType { case typeOfTime: @@ -186,89 +209,49 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) b := array.NewDate32Builder(mem) defer b.Release() b.Reserve(vals.Len()) - for i := 0; i < vals.Len(); i++ { - v := vals.Index(i) - if isPtr { - if v.IsNil() { - b.AppendNull() - continue - } - v = v.Elem() - } + iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) { t, _ := reflect.TypeAssert[time.Time](v) b.Append(arrow.Date32FromTime(t)) - } + }) return b.NewArray(), nil case "date64": b := array.NewDate64Builder(mem) defer b.Release() b.Reserve(vals.Len()) - for i := 0; i < vals.Len(); i++ { - v := vals.Index(i) - if isPtr { - if v.IsNil() { - b.AppendNull() - continue - } - v = v.Elem() - } + iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) { t, _ := reflect.TypeAssert[time.Time](v) b.Append(arrow.Date64FromTime(t)) - } + }) return b.NewArray(), nil case "time32": dt := &arrow.Time32Type{Unit: arrow.Millisecond} b := array.NewTime32Builder(mem, dt) defer b.Release() b.Reserve(vals.Len()) - for i := 0; i < vals.Len(); i++ { - v := vals.Index(i) - if isPtr { - if v.IsNil() { - b.AppendNull() - continue - } - v = v.Elem() - } + iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) { t, _ := reflect.TypeAssert[time.Time](v) b.Append(arrow.Time32(timeOfDayNanos(t) / int64(dt.Unit.Multiplier()))) - } + }) return b.NewArray(), nil case "time64": dt := &arrow.Time64Type{Unit: arrow.Nanosecond} b := array.NewTime64Builder(mem, dt) defer b.Release() b.Reserve(vals.Len()) - for i := 0; i < vals.Len(); i++ { - v := vals.Index(i) - if isPtr { - if v.IsNil() { - b.AppendNull() - continue - } - v = v.Elem() - } + iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) { t, _ := reflect.TypeAssert[time.Time](v) b.Append(arrow.Time64(timeOfDayNanos(t) / int64(dt.Unit.Multiplier()))) - } + }) return b.NewArray(), nil default: dt := &arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "UTC"} tb := array.NewTimestampBuilder(mem, dt) defer tb.Release() tb.Reserve(vals.Len()) - for i := 0; i < vals.Len(); i++ { - v := vals.Index(i) - if isPtr { - if v.IsNil() { - tb.AppendNull() - continue - } - v = v.Elem() - } + iterSlice(vals, isPtr, tb.AppendNull, func(v reflect.Value) { t, _ := reflect.TypeAssert[time.Time](v) tb.Append(arrow.Timestamp(t.UnixNano())) - } + }) return tb.NewArray(), nil } @@ -277,18 +260,10 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) db := array.NewDurationBuilder(mem, dt) defer db.Release() db.Reserve(vals.Len()) - for i := 0; i < vals.Len(); i++ { - v := vals.Index(i) - if isPtr { - if v.IsNil() { - db.AppendNull() - continue - } - v = v.Elem() - } + iterSlice(vals, isPtr, db.AppendNull, func(v reflect.Value) { d, _ := reflect.TypeAssert[time.Duration](v) db.Append(arrow.Duration(d.Nanoseconds())) - } + }) return db.NewArray(), nil default: @@ -297,16 +272,11 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) } func buildDecimalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.Array, error) { - elemType := vals.Type().Elem() - for elemType.Kind() == reflect.Ptr { - elemType = elemType.Elem() - } - - isPtr := vals.Type().Elem().Kind() == reflect.Ptr + elemType, isPtr := derefSliceElem(vals) switch elemType { case typeOfDec128: - precision, scale := int32(38), int32(0) + precision, scale := dec128DefaultPrecision, int32(0) if opts.HasDecimalOpts { precision = opts.DecimalPrecision scale = opts.DecimalScale @@ -315,22 +285,14 @@ func buildDecimalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) ( b := array.NewDecimal128Builder(mem, dt) defer b.Release() b.Reserve(vals.Len()) - for i := 0; i < vals.Len(); i++ { - v := vals.Index(i) - if isPtr { - if v.IsNil() { - b.AppendNull() - continue - } - v = v.Elem() - } + iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) { n, _ := reflect.TypeAssert[decimal128.Num](v) b.Append(n) - } + }) return b.NewArray(), nil case typeOfDec256: - precision, scale := int32(76), int32(0) + precision, scale := dec256DefaultPrecision, int32(0) if opts.HasDecimalOpts { precision = opts.DecimalPrecision scale = opts.DecimalScale @@ -339,22 +301,14 @@ func buildDecimalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) ( b := array.NewDecimal256Builder(mem, dt) defer b.Release() b.Reserve(vals.Len()) - for i := 0; i < vals.Len(); i++ { - v := vals.Index(i) - if isPtr { - if v.IsNil() { - b.AppendNull() - continue - } - v = v.Elem() - } + iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) { n, _ := reflect.TypeAssert[decimal256.Num](v) b.Append(n) - } + }) return b.NewArray(), nil case typeOfDec32: - precision, scale := int32(9), int32(0) + precision, scale := dec32DefaultPrecision, int32(0) if opts.HasDecimalOpts { precision = opts.DecimalPrecision scale = opts.DecimalScale @@ -363,21 +317,13 @@ func buildDecimalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) ( b := array.NewDecimal32Builder(mem, dt) defer b.Release() b.Reserve(vals.Len()) - for i := 0; i < vals.Len(); i++ { - v := vals.Index(i) - if isPtr { - if v.IsNil() { - b.AppendNull() - continue - } - v = v.Elem() - } + iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) { b.Append(decimal.Decimal32(v.Int())) - } + }) return b.NewArray(), nil case typeOfDec64: - precision, scale := int32(18), int32(0) + precision, scale := dec64DefaultPrecision, int32(0) if opts.HasDecimalOpts { precision = opts.DecimalPrecision scale = opts.DecimalScale @@ -386,17 +332,9 @@ func buildDecimalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) ( b := array.NewDecimal64Builder(mem, dt) defer b.Release() b.Reserve(vals.Len()) - for i := 0; i < vals.Len(); i++ { - v := vals.Index(i) - if isPtr { - if v.IsNil() { - b.AppendNull() - continue - } - v = v.Elem() - } + iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) { b.Append(decimal.Decimal64(v.Int())) - } + }) return b.NewArray(), nil default: @@ -634,18 +572,7 @@ func appendToDictBuilder(db array.DictionaryBuilder, v reflect.Value) error { } func buildListArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { - innerSliceType := vals.Type().Elem() - isOuterPtr := innerSliceType.Kind() == reflect.Ptr - for innerSliceType.Kind() == reflect.Ptr { - innerSliceType = innerSliceType.Elem() - } - - innerElemType := innerSliceType.Elem() - for innerElemType.Kind() == reflect.Ptr { - innerElemType = innerElemType.Elem() - } - - elemDT, err := inferArrowType(innerElemType) + elemDT, isOuterPtr, err := listBuildPreamble(vals) if err != nil { return nil, fmt.Errorf("arreflect: %w", err) } @@ -787,10 +714,7 @@ func buildFixedSizeListArray(vals reflect.Value, mem memory.Allocator) (arrow.Ar func buildDictionaryArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { n := vals.Len() - elemType := vals.Type().Elem() - for elemType.Kind() == reflect.Ptr { - elemType = elemType.Elem() - } + elemType, isPtr := derefSliceElem(vals) valDT, err := inferArrowType(elemType) if err != nil { @@ -804,8 +728,6 @@ func buildDictionaryArray(vals reflect.Value, mem memory.Allocator) (arrow.Array db := array.NewDictionaryBuilder(mem, dt) defer db.Release() - isPtr := vals.Type().Elem().Kind() == reflect.Ptr - for i := 0; i < n; i++ { elem := vals.Index(i) if isPtr { @@ -901,18 +823,7 @@ func buildRunEndEncodedArray(vals reflect.Value, mem memory.Allocator) (arrow.Ar } func buildListViewArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { - innerSliceType := vals.Type().Elem() - isOuterPtr := innerSliceType.Kind() == reflect.Ptr - for innerSliceType.Kind() == reflect.Ptr { - innerSliceType = innerSliceType.Elem() - } - - innerElemType := innerSliceType.Elem() - for innerElemType.Kind() == reflect.Ptr { - innerElemType = innerElemType.Elem() - } - - elemDT, err := inferArrowType(innerElemType) + elemDT, isOuterPtr, err := listBuildPreamble(vals) if err != nil { return nil, fmt.Errorf("arreflect: %w", err) } diff --git a/arrow/arreflect/reflect_infer.go b/arrow/arreflect/reflect_infer.go index 26c9dc84..1ad17a27 100644 --- a/arrow/arreflect/reflect_infer.go +++ b/arrow/arreflect/reflect_infer.go @@ -39,6 +39,13 @@ var ( typeOfUint = reflect.TypeOf(uint(0)) ) +const ( + dec32DefaultPrecision int32 = 9 + dec64DefaultPrecision int32 = 18 + dec128DefaultPrecision int32 = 38 + dec256DefaultPrecision int32 = 76 +) + func inferPrimitiveArrowType(t reflect.Type) (arrow.DataType, error) { for t.Kind() == reflect.Ptr { t = t.Elem() @@ -80,13 +87,13 @@ func inferPrimitiveArrowType(t reflect.Type) (arrow.DataType, error) { case typeOfDuration: return &arrow.DurationType{Unit: arrow.Nanosecond}, nil case typeOfDec128: - return &arrow.Decimal128Type{Precision: 38, Scale: 0}, nil + return &arrow.Decimal128Type{Precision: dec128DefaultPrecision, Scale: 0}, nil case typeOfDec32: - return &arrow.Decimal32Type{Precision: 9, Scale: 0}, nil + return &arrow.Decimal32Type{Precision: dec32DefaultPrecision, Scale: 0}, nil case typeOfDec64: - return &arrow.Decimal64Type{Precision: 18, Scale: 0}, nil + return &arrow.Decimal64Type{Precision: dec64DefaultPrecision, Scale: 0}, nil case typeOfDec256: - return &arrow.Decimal256Type{Precision: 76, Scale: 0}, nil + return &arrow.Decimal256Type{Precision: dec256DefaultPrecision, Scale: 0}, nil default: return nil, fmt.Errorf("arreflect: unsupported Go type for Arrow inference: %v", t) } From 4068ba73babe6b6173013262e32f63aebf276eac Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 7 Apr 2026 15:25:05 -0700 Subject: [PATCH 15/82] fix(arreflect): TypeAssert ok check, deduplicate error prefixes, API improvements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Job 864 — reflect.TypeAssert silent corruption fix: Changed iterSlice signature to return error; added ok check at all 19 TypeAssert call sites in buildTemporalArray, buildDecimalArray, appendValue, appendPrimitiveValue. Type mismatch now returns an error instead of silently producing zero-value Arrow data. Job 865 — doubled arreflect: prefix fix: Removed the redundant arreflect: prefix from 18 re-wrapping sites that were adding it on top of errors already carrying the prefix. Context wrappers (field name, index) keep their context but drop the now-redundant package prefix. Job 866 — design review improvements: - inferStructType: add explicit case 'timestamp','': so behavior is intentional rather than accidental fallthrough to default - appendValue: add *array.LargeListBuilder and *array.LargeListViewBuilder cases to handle large variants that inference may not currently produce - ErrUnsupportedType, ErrTypeMismatch: add sentinel errors for errors.Is - FromSliceDefault[T], RecordFromSliceDefault[T]: convenience wrappers using memory.DefaultAllocator for tests and quick scripts - SchemaOf, TypeOf: improve godoc to explain when to use each --- arrow/arreflect/reflect.go | 18 +- arrow/arreflect/reflect_go_to_arrow.go | 232 ++++++++++++++++++------- arrow/arreflect/reflect_infer.go | 12 +- 3 files changed, 200 insertions(+), 62 deletions(-) diff --git a/arrow/arreflect/reflect.go b/arrow/arreflect/reflect.go index 280919eb..b3263286 100644 --- a/arrow/arreflect/reflect.go +++ b/arrow/arreflect/reflect.go @@ -17,6 +17,7 @@ package arreflect import ( + "errors" "fmt" "reflect" "strconv" @@ -28,6 +29,11 @@ import ( "github.com/apache/arrow-go/v18/arrow/memory" ) +var ( + ErrUnsupportedType = errors.New("arreflect: unsupported type") + ErrTypeMismatch = errors.New("arreflect: type mismatch") +) + type tagOpts struct { Name string Skip bool @@ -339,7 +345,7 @@ func ToSlice[T any](arr arrow.Array) ([]T, error) { for i := 0; i < n; i++ { v := reflect.ValueOf(&result[i]).Elem() if err := setValue(v, arr, i); err != nil { - return nil, fmt.Errorf("arreflect: index %d: %w", i, err) + return nil, fmt.Errorf("index %d: %w", i, err) } } return result, nil @@ -349,7 +355,7 @@ func FromSlice[T any](vals []T, mem memory.Allocator) (arrow.Array, error) { if len(vals) == 0 { dt, err := inferArrowType(reflect.TypeFor[T]()) if err != nil { - return nil, fmt.Errorf("arreflect: %w", err) + return nil, err } b := array.NewBuilder(mem, dt) defer b.Release() @@ -359,6 +365,10 @@ func FromSlice[T any](vals []T, mem memory.Allocator) (arrow.Array, error) { return buildArray(sv, tagOpts{}, mem) } +func FromSliceDefault[T any](vals []T) (arrow.Array, error) { + return FromSlice(vals, memory.DefaultAllocator) +} + func RecordToSlice[T any](rec arrow.Record) ([]T, error) { sa := array.RecordToStructArray(rec) defer sa.Release() @@ -377,3 +387,7 @@ func RecordFromSlice[T any](vals []T, mem memory.Allocator) (arrow.Record, error } return array.RecordFromStructArray(sa, nil), nil } + +func RecordFromSliceDefault[T any](vals []T) (arrow.Record, error) { + return RecordFromSlice(vals, memory.DefaultAllocator) +} diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index 4bd7cb7a..eb9415cc 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -89,7 +89,7 @@ func buildPrimitiveArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, dt, err := inferArrowType(elemType) if err != nil { - return nil, fmt.Errorf("arreflect: %w", err) + return nil, err } b := array.NewBuilder(mem, dt) @@ -142,13 +142,22 @@ func appendPrimitiveValue(b array.Builder, v reflect.Value, dt arrow.DataType) e case arrow.BINARY: b.(*array.BinaryBuilder).Append(v.Bytes()) case arrow.DURATION: - d, _ := reflect.TypeAssert[time.Duration](v) + d, ok := reflect.TypeAssert[time.Duration](v) + if !ok { + return fmt.Errorf("arreflect: expected time.Duration, got %s", v.Type()) + } b.(*array.DurationBuilder).Append(arrow.Duration(d.Nanoseconds())) case arrow.DECIMAL128: - n, _ := reflect.TypeAssert[decimal128.Num](v) + n, ok := reflect.TypeAssert[decimal128.Num](v) + if !ok { + return fmt.Errorf("arreflect: expected decimal128.Num, got %s", v.Type()) + } b.(*array.Decimal128Builder).Append(n) case arrow.DECIMAL256: - n, _ := reflect.TypeAssert[decimal256.Num](v) + n, ok := reflect.TypeAssert[decimal256.Num](v) + if !ok { + return fmt.Errorf("arreflect: expected decimal256.Num, got %s", v.Type()) + } b.(*array.Decimal256Builder).Append(n) case arrow.DECIMAL32: b.(*array.Decimal32Builder).Append(decimal.Decimal32(v.Int())) @@ -175,7 +184,7 @@ func derefSliceElem(vals reflect.Value) (elemType reflect.Type, isPtr bool) { return } -func iterSlice(vals reflect.Value, isPtr bool, appendNull func(), appendVal func(reflect.Value)) { +func iterSlice(vals reflect.Value, isPtr bool, appendNull func(), appendVal func(reflect.Value) error) error { for i := 0; i < vals.Len(); i++ { v := vals.Index(i) if isPtr { @@ -185,8 +194,11 @@ func iterSlice(vals reflect.Value, isPtr bool, appendNull func(), appendVal func } v = v.Elem() } - appendVal(v) + if err := appendVal(v); err != nil { + return err + } } + return nil } func listBuildPreamble(vals reflect.Value) (elemDT arrow.DataType, isOuterPtr bool, err error) { @@ -209,49 +221,79 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) b := array.NewDate32Builder(mem) defer b.Release() b.Reserve(vals.Len()) - iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) { - t, _ := reflect.TypeAssert[time.Time](v) + if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { + t, ok := reflect.TypeAssert[time.Time](v) + if !ok { + return fmt.Errorf("arreflect: expected time.Time, got %s", v.Type()) + } b.Append(arrow.Date32FromTime(t)) - }) + return nil + }); err != nil { + return nil, err + } return b.NewArray(), nil case "date64": b := array.NewDate64Builder(mem) defer b.Release() b.Reserve(vals.Len()) - iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) { - t, _ := reflect.TypeAssert[time.Time](v) + if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { + t, ok := reflect.TypeAssert[time.Time](v) + if !ok { + return fmt.Errorf("arreflect: expected time.Time, got %s", v.Type()) + } b.Append(arrow.Date64FromTime(t)) - }) + return nil + }); err != nil { + return nil, err + } return b.NewArray(), nil case "time32": dt := &arrow.Time32Type{Unit: arrow.Millisecond} b := array.NewTime32Builder(mem, dt) defer b.Release() b.Reserve(vals.Len()) - iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) { - t, _ := reflect.TypeAssert[time.Time](v) + if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { + t, ok := reflect.TypeAssert[time.Time](v) + if !ok { + return fmt.Errorf("arreflect: expected time.Time, got %s", v.Type()) + } b.Append(arrow.Time32(timeOfDayNanos(t) / int64(dt.Unit.Multiplier()))) - }) + return nil + }); err != nil { + return nil, err + } return b.NewArray(), nil case "time64": dt := &arrow.Time64Type{Unit: arrow.Nanosecond} b := array.NewTime64Builder(mem, dt) defer b.Release() b.Reserve(vals.Len()) - iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) { - t, _ := reflect.TypeAssert[time.Time](v) + if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { + t, ok := reflect.TypeAssert[time.Time](v) + if !ok { + return fmt.Errorf("arreflect: expected time.Time, got %s", v.Type()) + } b.Append(arrow.Time64(timeOfDayNanos(t) / int64(dt.Unit.Multiplier()))) - }) + return nil + }); err != nil { + return nil, err + } return b.NewArray(), nil default: dt := &arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "UTC"} tb := array.NewTimestampBuilder(mem, dt) defer tb.Release() tb.Reserve(vals.Len()) - iterSlice(vals, isPtr, tb.AppendNull, func(v reflect.Value) { - t, _ := reflect.TypeAssert[time.Time](v) + if err := iterSlice(vals, isPtr, tb.AppendNull, func(v reflect.Value) error { + t, ok := reflect.TypeAssert[time.Time](v) + if !ok { + return fmt.Errorf("arreflect: expected time.Time, got %s", v.Type()) + } tb.Append(arrow.Timestamp(t.UnixNano())) - }) + return nil + }); err != nil { + return nil, err + } return tb.NewArray(), nil } @@ -260,10 +302,16 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) db := array.NewDurationBuilder(mem, dt) defer db.Release() db.Reserve(vals.Len()) - iterSlice(vals, isPtr, db.AppendNull, func(v reflect.Value) { - d, _ := reflect.TypeAssert[time.Duration](v) + if err := iterSlice(vals, isPtr, db.AppendNull, func(v reflect.Value) error { + d, ok := reflect.TypeAssert[time.Duration](v) + if !ok { + return fmt.Errorf("arreflect: expected time.Duration, got %s", v.Type()) + } db.Append(arrow.Duration(d.Nanoseconds())) - }) + return nil + }); err != nil { + return nil, err + } return db.NewArray(), nil default: @@ -285,10 +333,16 @@ func buildDecimalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) ( b := array.NewDecimal128Builder(mem, dt) defer b.Release() b.Reserve(vals.Len()) - iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) { - n, _ := reflect.TypeAssert[decimal128.Num](v) + if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { + n, ok := reflect.TypeAssert[decimal128.Num](v) + if !ok { + return fmt.Errorf("arreflect: expected decimal128.Num, got %s", v.Type()) + } b.Append(n) - }) + return nil + }); err != nil { + return nil, err + } return b.NewArray(), nil case typeOfDec256: @@ -301,10 +355,16 @@ func buildDecimalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) ( b := array.NewDecimal256Builder(mem, dt) defer b.Release() b.Reserve(vals.Len()) - iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) { - n, _ := reflect.TypeAssert[decimal256.Num](v) + if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { + n, ok := reflect.TypeAssert[decimal256.Num](v) + if !ok { + return fmt.Errorf("arreflect: expected decimal256.Num, got %s", v.Type()) + } b.Append(n) - }) + return nil + }); err != nil { + return nil, err + } return b.NewArray(), nil case typeOfDec32: @@ -317,9 +377,12 @@ func buildDecimalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) ( b := array.NewDecimal32Builder(mem, dt) defer b.Release() b.Reserve(vals.Len()) - iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) { + if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { b.Append(decimal.Decimal32(v.Int())) - }) + return nil + }); err != nil { + return nil, err + } return b.NewArray(), nil case typeOfDec64: @@ -332,9 +395,12 @@ func buildDecimalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) ( b := array.NewDecimal64Builder(mem, dt) defer b.Release() b.Reserve(vals.Len()) - iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) { + if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { b.Append(decimal.Decimal64(v.Int())) - }) + return nil + }); err != nil { + return nil, err + } return b.NewArray(), nil default: @@ -351,7 +417,7 @@ func buildStructArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, er st, err := inferStructType(elemType) if err != nil { - return nil, fmt.Errorf("arreflect: %w", err) + return nil, err } fields := cachedStructFields(elemType) @@ -373,7 +439,7 @@ func buildStructArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, er fv := v.FieldByIndex(fm.Index) fb := sb.FieldBuilder(fi) if err := appendValue(fb, fv, fm.Opts); err != nil { - return nil, fmt.Errorf("arreflect: struct field %q: %w", fm.Name, err) + return nil, fmt.Errorf("struct field %q: %w", fm.Name, err) } } } @@ -422,30 +488,54 @@ func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { tb.Append(v.Bytes()) } case *array.TimestampBuilder: - t, _ := reflect.TypeAssert[time.Time](v) + t, ok := reflect.TypeAssert[time.Time](v) + if !ok { + return fmt.Errorf("arreflect: expected time.Time, got %s", v.Type()) + } tb.Append(arrow.Timestamp(t.UnixNano())) case *array.Date32Builder: - t, _ := reflect.TypeAssert[time.Time](v) + t, ok := reflect.TypeAssert[time.Time](v) + if !ok { + return fmt.Errorf("arreflect: expected time.Time, got %s", v.Type()) + } tb.Append(arrow.Date32FromTime(t)) case *array.Date64Builder: - t, _ := reflect.TypeAssert[time.Time](v) + t, ok := reflect.TypeAssert[time.Time](v) + if !ok { + return fmt.Errorf("arreflect: expected time.Time, got %s", v.Type()) + } tb.Append(arrow.Date64FromTime(t)) case *array.Time32Builder: unit := tb.Type().(*arrow.Time32Type).Unit - t, _ := reflect.TypeAssert[time.Time](v) + t, ok := reflect.TypeAssert[time.Time](v) + if !ok { + return fmt.Errorf("arreflect: expected time.Time, got %s", v.Type()) + } tb.Append(arrow.Time32(timeOfDayNanos(t) / int64(unit.Multiplier()))) case *array.Time64Builder: unit := tb.Type().(*arrow.Time64Type).Unit - t, _ := reflect.TypeAssert[time.Time](v) + t, ok := reflect.TypeAssert[time.Time](v) + if !ok { + return fmt.Errorf("arreflect: expected time.Time, got %s", v.Type()) + } tb.Append(arrow.Time64(timeOfDayNanos(t) / int64(unit.Multiplier()))) case *array.DurationBuilder: - d, _ := reflect.TypeAssert[time.Duration](v) + d, ok := reflect.TypeAssert[time.Duration](v) + if !ok { + return fmt.Errorf("arreflect: expected time.Duration, got %s", v.Type()) + } tb.Append(arrow.Duration(d.Nanoseconds())) case *array.Decimal128Builder: - n, _ := reflect.TypeAssert[decimal128.Num](v) + n, ok := reflect.TypeAssert[decimal128.Num](v) + if !ok { + return fmt.Errorf("arreflect: expected decimal128.Num, got %s", v.Type()) + } tb.Append(n) case *array.Decimal256Builder: - n, _ := reflect.TypeAssert[decimal256.Num](v) + n, ok := reflect.TypeAssert[decimal256.Num](v) + if !ok { + return fmt.Errorf("arreflect: expected decimal256.Num, got %s", v.Type()) + } tb.Append(n) case *array.Decimal32Builder: tb.Append(decimal.Decimal32(v.Int())) @@ -499,7 +589,7 @@ func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { fv := v.FieldByIndex(fm.Index) fb := tb.FieldBuilder(fi) if err := appendValue(fb, fv, fm.Opts); err != nil { - return fmt.Errorf("arreflect: struct field %q: %w", fm.Name, err) + return fmt.Errorf("struct field %q: %w", fm.Name, err) } } case *array.ListViewBuilder: @@ -514,6 +604,30 @@ func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { } } } + case *array.LargeListBuilder: + if v.Kind() == reflect.Slice && v.IsNil() { + tb.AppendNull() + } else { + tb.Append(true) + vb := tb.ValueBuilder() + for i := 0; i < v.Len(); i++ { + if err := appendValue(vb, v.Index(i), tagOpts{}); err != nil { + return err + } + } + } + case *array.LargeListViewBuilder: + if v.Kind() == reflect.Slice && v.IsNil() { + tb.AppendNull() + } else { + tb.AppendWithSize(true, v.Len()) + vb := tb.ValueBuilder() + for i := 0; i < v.Len(); i++ { + if err := appendValue(vb, v.Index(i), tagOpts{}); err != nil { + return err + } + } + } case *array.RunEndEncodedBuilder: if v.Kind() == reflect.Ptr && v.IsNil() { tb.AppendNull() @@ -574,7 +688,7 @@ func appendToDictBuilder(db array.DictionaryBuilder, v reflect.Value) error { func buildListArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { elemDT, isOuterPtr, err := listBuildPreamble(vals) if err != nil { - return nil, fmt.Errorf("arreflect: %w", err) + return nil, err } lb := array.NewListBuilder(mem, elemDT) @@ -598,7 +712,7 @@ func buildListArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, erro lb.Append(true) for j := 0; j < outer.Len(); j++ { if err := appendValue(vb, outer.Index(j), tagOpts{}); err != nil { - return nil, fmt.Errorf("arreflect: list element [%d][%d]: %w", i, j, err) + return nil, fmt.Errorf("list element [%d][%d]: %w", i, j, err) } } } @@ -625,11 +739,11 @@ func buildMapArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error keyDT, err := inferArrowType(keyType) if err != nil { - return nil, fmt.Errorf("arreflect: map key type: %w", err) + return nil, fmt.Errorf("map key type: %w", err) } valDT, err := inferArrowType(valType) if err != nil { - return nil, fmt.Errorf("arreflect: map value type: %w", err) + return nil, fmt.Errorf("map value type: %w", err) } mb := array.NewMapBuilder(mem, keyDT, valDT, false) @@ -654,10 +768,10 @@ func buildMapArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error mb.Append(true) for _, key := range m.MapKeys() { if err := appendValue(kb, key, tagOpts{}); err != nil { - return nil, fmt.Errorf("arreflect: map key: %w", err) + return nil, fmt.Errorf("map key: %w", err) } if err := appendValue(ib, m.MapIndex(key), tagOpts{}); err != nil { - return nil, fmt.Errorf("arreflect: map value: %w", err) + return nil, fmt.Errorf("map value: %w", err) } } } @@ -684,7 +798,7 @@ func buildFixedSizeListArray(vals reflect.Value, mem memory.Allocator) (arrow.Ar innerDT, err := inferArrowType(innerElemType) if err != nil { - return nil, fmt.Errorf("arreflect: %w", err) + return nil, err } fb := array.NewFixedSizeListBuilder(mem, n, innerDT) @@ -704,7 +818,7 @@ func buildFixedSizeListArray(vals reflect.Value, mem memory.Allocator) (arrow.Ar fb.Append(true) for j := 0; j < int(n); j++ { if err := appendValue(vb, elem.Index(j), tagOpts{}); err != nil { - return nil, fmt.Errorf("arreflect: fixed-size list element [%d][%d]: %w", i, j, err) + return nil, fmt.Errorf("fixed-size list element [%d][%d]: %w", i, j, err) } } } @@ -718,7 +832,7 @@ func buildDictionaryArray(vals reflect.Value, mem memory.Allocator) (arrow.Array valDT, err := inferArrowType(elemType) if err != nil { - return nil, fmt.Errorf("arreflect: %w", err) + return nil, err } dt := &arrow.DictionaryType{ @@ -738,7 +852,7 @@ func buildDictionaryArray(vals reflect.Value, mem memory.Allocator) (arrow.Array elem = elem.Elem() } if err := appendToDictBuilder(db, elem); err != nil { - return nil, fmt.Errorf("arreflect: dictionary element [%d]: %w", i, err) + return nil, fmt.Errorf("dictionary element [%d]: %w", i, err) } } return db.NewArray(), nil @@ -805,7 +919,7 @@ func buildRunEndEncodedArray(vals reflect.Value, mem memory.Allocator) (arrow.Ar runEndsSlice := reflect.ValueOf(runEnds) runEndsArr, err := buildPrimitiveArray(runEndsSlice, mem) if err != nil { - return nil, fmt.Errorf("arreflect: run-end encoded run ends: %w", err) + return nil, fmt.Errorf("run-end encoded run ends: %w", err) } defer runEndsArr.Release() @@ -815,7 +929,7 @@ func buildRunEndEncodedArray(vals reflect.Value, mem memory.Allocator) (arrow.Ar } valuesArr, err := buildArray(runValues, tagOpts{}, mem) if err != nil { - return nil, fmt.Errorf("arreflect: run-end encoded values: %w", err) + return nil, fmt.Errorf("run-end encoded values: %w", err) } defer valuesArr.Release() @@ -825,7 +939,7 @@ func buildRunEndEncodedArray(vals reflect.Value, mem memory.Allocator) (arrow.Ar func buildListViewArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { elemDT, isOuterPtr, err := listBuildPreamble(vals) if err != nil { - return nil, fmt.Errorf("arreflect: %w", err) + return nil, err } lvb := array.NewListViewBuilder(mem, elemDT) @@ -849,7 +963,7 @@ func buildListViewArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, lvb.AppendWithSize(true, outer.Len()) for j := 0; j < outer.Len(); j++ { if err := appendValue(vb, outer.Index(j), tagOpts{}); err != nil { - return nil, fmt.Errorf("arreflect: list-view element [%d][%d]: %w", i, j, err) + return nil, fmt.Errorf("list-view element [%d][%d]: %w", i, j, err) } } } diff --git a/arrow/arreflect/reflect_infer.go b/arrow/arreflect/reflect_infer.go index 1ad17a27..2756ffea 100644 --- a/arrow/arreflect/reflect_infer.go +++ b/arrow/arreflect/reflect_infer.go @@ -161,7 +161,7 @@ func inferStructType(t reflect.Type) (*arrow.StructType, error) { dt, err := inferArrowType(fm.Type) if err != nil { - return nil, fmt.Errorf("arreflect: struct field %q: %w", fm.Name, err) + return nil, fmt.Errorf("struct field %q: %w", fm.Name, err) } if fm.Opts.HasDecimalOpts { @@ -193,6 +193,7 @@ func inferStructType(t reflect.Type) (*arrow.StructType, error) { dt = &arrow.Time32Type{Unit: arrow.Millisecond} case "time64": dt = &arrow.Time64Type{Unit: arrow.Nanosecond} + case "timestamp", "": } } @@ -219,6 +220,11 @@ func inferStructType(t reflect.Type) (*arrow.StructType, error) { return arrow.StructOf(arrowFields...), nil } +// SchemaOf infers an *arrow.Schema from a Go struct type T. +// T must be a struct type; returns an error otherwise. +// For column-level type inspection without schema overhead, use [TypeOf]. +// Field names come from arrow struct tags or Go field names. +// Pointer fields are marked Nullable=true. func SchemaOf[T any]() (*arrow.Schema, error) { t := reflect.TypeFor[T]() for t.Kind() == reflect.Ptr { @@ -238,6 +244,10 @@ func SchemaOf[T any]() (*arrow.Schema, error) { return arrow.NewSchema(fields, nil), nil } +// TypeOf infers the Arrow DataType for a Go type T. +// For struct types, [SchemaOf] is preferred when the result will be used with +// arrow.Record or array.NewRecord; TypeOf returns an arrow.DataType that would +// require an additional cast to *arrow.StructType. func TypeOf[T any]() (arrow.DataType, error) { t := reflect.TypeFor[T]() return inferArrowType(t) From 4b116ce531e2df27f30fc5459430b48aed1d158b Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 7 Apr 2026 15:31:46 -0700 Subject: [PATCH 16/82] refactor(arreflect): eliminate code smells per roborev #868 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit reflect_go_to_arrow.go: - Move dec32/dec64 dispatch alongside dec128/dec256 in buildArray (same type family, was split across struct/default branches) - Extract decimalPrecisionScale(opts, defaultPrec) helper (4 sites) - Rename listBuildPreamble → inferListElemDT (descriptive name) - Extract buildListLikeArray(vals, mem, isView) shared implementation; buildListArray/buildListViewArray become one-line wrappers - Extract appendTemporalValue and appendDecimalValue from appendValue; appendValue delegates 10 temporal/decimal cases to two lines reflect.go: - Replace hand-rolled insertion sort with sort.Slice in getStructFields reflect_infer.go: - Extract applyDecimalOpts, applyTemporalOpts, applyEncodingOpts helpers; inferStructType loop body reduced to 5 clear lines --- arrow/arreflect/reflect.go | 9 +- arrow/arreflect/reflect_go_to_arrow.go | 224 ++++++++++++------------- arrow/arreflect/reflect_infer.go | 100 ++++++----- 3 files changed, 169 insertions(+), 164 deletions(-) diff --git a/arrow/arreflect/reflect.go b/arrow/arreflect/reflect.go index b3263286..aaede697 100644 --- a/arrow/arreflect/reflect.go +++ b/arrow/arreflect/reflect.go @@ -20,6 +20,7 @@ import ( "errors" "fmt" "reflect" + "sort" "strconv" "strings" "sync" @@ -300,11 +301,9 @@ func getStructFields(t reflect.Type) []fieldMeta { } } - for i := 1; i < len(resolved); i++ { - for j := i; j > 0 && resolved[j].order < resolved[j-1].order; j-- { - resolved[j], resolved[j-1] = resolved[j-1], resolved[j] - } - } + sort.Slice(resolved, func(i, j int) bool { + return resolved[i].order < resolved[j].order + }) result := make([]fieldMeta, len(resolved)) for i, r := range resolved { diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index eb9415cc..6b7f298b 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -49,6 +49,11 @@ func buildArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.A return buildListViewArray(vals, mem) } + switch elemType { + case typeOfDec32, typeOfDec64, typeOfDec128, typeOfDec256: + return buildDecimalArray(vals, opts, mem) + } + switch elemType.Kind() { case reflect.Slice: if elemType == typeOfByteSlice { @@ -64,22 +69,13 @@ func buildArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.A case reflect.Struct: switch elemType { - case typeOfTime: + case typeOfTime, typeOfDuration: return buildTemporalArray(vals, opts, mem) - case typeOfDuration: - return buildTemporalArray(vals, opts, mem) - case typeOfDec128: - return buildDecimalArray(vals, opts, mem) - case typeOfDec256: - return buildDecimalArray(vals, opts, mem) default: return buildStructArray(vals, mem) } default: - if elemType == typeOfDec32 || elemType == typeOfDec64 { - return buildDecimalArray(vals, opts, mem) - } return buildPrimitiveArray(vals, mem) } } @@ -201,7 +197,7 @@ func iterSlice(vals reflect.Value, isPtr bool, appendNull func(), appendVal func return nil } -func listBuildPreamble(vals reflect.Value) (elemDT arrow.DataType, isOuterPtr bool, err error) { +func inferListElemDT(vals reflect.Value) (elemDT arrow.DataType, isOuterPtr bool, err error) { outerSliceType, isOuterPtr := derefSliceElem(vals) innerElemType := outerSliceType.Elem() for innerElemType.Kind() == reflect.Ptr { @@ -319,16 +315,19 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) } } +func decimalPrecisionScale(opts tagOpts, defaultPrec int32) (precision, scale int32) { + if opts.HasDecimalOpts { + return opts.DecimalPrecision, opts.DecimalScale + } + return defaultPrec, 0 +} + func buildDecimalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.Array, error) { elemType, isPtr := derefSliceElem(vals) switch elemType { case typeOfDec128: - precision, scale := dec128DefaultPrecision, int32(0) - if opts.HasDecimalOpts { - precision = opts.DecimalPrecision - scale = opts.DecimalScale - } + precision, scale := decimalPrecisionScale(opts, dec128DefaultPrecision) dt := &arrow.Decimal128Type{Precision: precision, Scale: scale} b := array.NewDecimal128Builder(mem, dt) defer b.Release() @@ -346,11 +345,7 @@ func buildDecimalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) ( return b.NewArray(), nil case typeOfDec256: - precision, scale := dec256DefaultPrecision, int32(0) - if opts.HasDecimalOpts { - precision = opts.DecimalPrecision - scale = opts.DecimalScale - } + precision, scale := decimalPrecisionScale(opts, dec256DefaultPrecision) dt := &arrow.Decimal256Type{Precision: precision, Scale: scale} b := array.NewDecimal256Builder(mem, dt) defer b.Release() @@ -368,11 +363,7 @@ func buildDecimalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) ( return b.NewArray(), nil case typeOfDec32: - precision, scale := dec32DefaultPrecision, int32(0) - if opts.HasDecimalOpts { - precision = opts.DecimalPrecision - scale = opts.DecimalScale - } + precision, scale := decimalPrecisionScale(opts, dec32DefaultPrecision) dt := &arrow.Decimal32Type{Precision: precision, Scale: scale} b := array.NewDecimal32Builder(mem, dt) defer b.Release() @@ -386,11 +377,7 @@ func buildDecimalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) ( return b.NewArray(), nil case typeOfDec64: - precision, scale := dec64DefaultPrecision, int32(0) - if opts.HasDecimalOpts { - precision = opts.DecimalPrecision - scale = opts.DecimalScale - } + precision, scale := decimalPrecisionScale(opts, dec64DefaultPrecision) dt := &arrow.Decimal64Type{Precision: precision, Scale: scale} b := array.NewDecimal64Builder(mem, dt) defer b.Release() @@ -447,46 +434,8 @@ func buildStructArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, er return sb.NewArray(), nil } -func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { - for v.Kind() == reflect.Ptr { - if v.IsNil() { - b.AppendNull() - return nil - } - v = v.Elem() - } - +func appendTemporalValue(b array.Builder, v reflect.Value) error { switch tb := b.(type) { - case *array.Int8Builder: - tb.Append(int8(v.Int())) - case *array.Int16Builder: - tb.Append(int16(v.Int())) - case *array.Int32Builder: - tb.Append(int32(v.Int())) - case *array.Int64Builder: - tb.Append(int64(v.Int())) - case *array.Uint8Builder: - tb.Append(uint8(v.Uint())) - case *array.Uint16Builder: - tb.Append(uint16(v.Uint())) - case *array.Uint32Builder: - tb.Append(uint32(v.Uint())) - case *array.Uint64Builder: - tb.Append(uint64(v.Uint())) - case *array.Float32Builder: - tb.Append(float32(v.Float())) - case *array.Float64Builder: - tb.Append(float64(v.Float())) - case *array.BooleanBuilder: - tb.Append(v.Bool()) - case *array.StringBuilder: - tb.Append(v.String()) - case *array.BinaryBuilder: - if v.IsNil() { - tb.AppendNull() - } else { - tb.Append(v.Bytes()) - } case *array.TimestampBuilder: t, ok := reflect.TypeAssert[time.Time](v) if !ok { @@ -525,6 +474,14 @@ func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { return fmt.Errorf("arreflect: expected time.Duration, got %s", v.Type()) } tb.Append(arrow.Duration(d.Nanoseconds())) + default: + return fmt.Errorf("arreflect: unexpected temporal builder %T", b) + } + return nil +} + +func appendDecimalValue(b array.Builder, v reflect.Value) error { + switch tb := b.(type) { case *array.Decimal128Builder: n, ok := reflect.TypeAssert[decimal128.Num](v) if !ok { @@ -541,6 +498,57 @@ func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { tb.Append(decimal.Decimal32(v.Int())) case *array.Decimal64Builder: tb.Append(decimal.Decimal64(v.Int())) + default: + return fmt.Errorf("arreflect: unexpected decimal builder %T", b) + } + return nil +} + +func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { + for v.Kind() == reflect.Ptr { + if v.IsNil() { + b.AppendNull() + return nil + } + v = v.Elem() + } + + switch tb := b.(type) { + case *array.Int8Builder: + tb.Append(int8(v.Int())) + case *array.Int16Builder: + tb.Append(int16(v.Int())) + case *array.Int32Builder: + tb.Append(int32(v.Int())) + case *array.Int64Builder: + tb.Append(int64(v.Int())) + case *array.Uint8Builder: + tb.Append(uint8(v.Uint())) + case *array.Uint16Builder: + tb.Append(uint16(v.Uint())) + case *array.Uint32Builder: + tb.Append(uint32(v.Uint())) + case *array.Uint64Builder: + tb.Append(uint64(v.Uint())) + case *array.Float32Builder: + tb.Append(float32(v.Float())) + case *array.Float64Builder: + tb.Append(float64(v.Float())) + case *array.BooleanBuilder: + tb.Append(v.Bool()) + case *array.StringBuilder: + tb.Append(v.String()) + case *array.BinaryBuilder: + if v.IsNil() { + tb.AppendNull() + } else { + tb.Append(v.Bytes()) + } + case *array.TimestampBuilder, *array.Date32Builder, *array.Date64Builder, + *array.Time32Builder, *array.Time64Builder, *array.DurationBuilder: + return appendTemporalValue(b, v) + case *array.Decimal128Builder, *array.Decimal256Builder, *array.Decimal32Builder, *array.Decimal64Builder: + return appendDecimalValue(b, v) case *array.ListBuilder: if v.Kind() == reflect.Slice && v.IsNil() { tb.AppendNull() @@ -685,39 +693,60 @@ func appendToDictBuilder(db array.DictionaryBuilder, v reflect.Value) error { return fmt.Errorf("arreflect: unsupported builder type %T", db) } -func buildListArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { - elemDT, isOuterPtr, err := listBuildPreamble(vals) +type listBuilderLike interface { + array.Builder + ValueBuilder() array.Builder +} + +func buildListLikeArray(vals reflect.Value, mem memory.Allocator, isView bool) (arrow.Array, error) { + elemDT, isOuterPtr, err := inferListElemDT(vals) if err != nil { return nil, err } - lb := array.NewListBuilder(mem, elemDT) - defer lb.Release() - - vb := lb.ValueBuilder() + var bldr listBuilderLike + var beginRow func(int) + if isView { + b := array.NewListViewBuilder(mem, elemDT) + bldr = b + beginRow = func(n int) { b.AppendWithSize(true, n) } + } else { + b := array.NewListBuilder(mem, elemDT) + bldr = b + beginRow = func(_ int) { b.Append(true) } + } + defer bldr.Release() + vb := bldr.ValueBuilder() for i := 0; i < vals.Len(); i++ { outer := vals.Index(i) if isOuterPtr { if outer.IsNil() { - lb.AppendNull() + bldr.AppendNull() continue } outer = outer.Elem() } if outer.IsNil() { - lb.AppendNull() + bldr.AppendNull() continue } - lb.Append(true) + beginRow(outer.Len()) for j := 0; j < outer.Len(); j++ { if err := appendValue(vb, outer.Index(j), tagOpts{}); err != nil { return nil, fmt.Errorf("list element [%d][%d]: %w", i, j, err) } } } + return bldr.NewArray(), nil +} - return lb.NewArray(), nil +func buildListArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { + return buildListLikeArray(vals, mem, false) +} + +func buildListViewArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { + return buildListLikeArray(vals, mem, true) } func buildMapArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { @@ -935,38 +964,3 @@ func buildRunEndEncodedArray(vals reflect.Value, mem memory.Allocator) (arrow.Ar return array.NewRunEndEncodedArray(runEndsArr, valuesArr, vals.Len(), 0), nil } - -func buildListViewArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { - elemDT, isOuterPtr, err := listBuildPreamble(vals) - if err != nil { - return nil, err - } - - lvb := array.NewListViewBuilder(mem, elemDT) - defer lvb.Release() - - vb := lvb.ValueBuilder() - - for i := 0; i < vals.Len(); i++ { - outer := vals.Index(i) - if isOuterPtr { - if outer.IsNil() { - lvb.AppendNull() - continue - } - outer = outer.Elem() - } - if outer.IsNil() { - lvb.AppendNull() - continue - } - lvb.AppendWithSize(true, outer.Len()) - for j := 0; j < outer.Len(); j++ { - if err := appendValue(vb, outer.Index(j), tagOpts{}); err != nil { - return nil, fmt.Errorf("list-view element [%d][%d]: %w", i, j, err) - } - } - } - - return lvb.NewArray(), nil -} diff --git a/arrow/arreflect/reflect_infer.go b/arrow/arreflect/reflect_infer.go index 2756ffea..c061e6e1 100644 --- a/arrow/arreflect/reflect_infer.go +++ b/arrow/arreflect/reflect_infer.go @@ -142,6 +142,57 @@ func inferArrowType(t reflect.Type) (arrow.DataType, error) { } } +func applyDecimalOpts(dt arrow.DataType, origType reflect.Type, opts tagOpts) arrow.DataType { + if !opts.HasDecimalOpts { + return dt + } + prec, scale := opts.DecimalPrecision, opts.DecimalScale + switch origType { + case typeOfDec128: + return &arrow.Decimal128Type{Precision: prec, Scale: scale} + case typeOfDec256: + return &arrow.Decimal256Type{Precision: prec, Scale: scale} + case typeOfDec32: + return &arrow.Decimal32Type{Precision: prec, Scale: scale} + case typeOfDec64: + return &arrow.Decimal64Type{Precision: prec, Scale: scale} + } + return dt +} + +func applyTemporalOpts(dt arrow.DataType, origType reflect.Type, opts tagOpts) arrow.DataType { + if origType != typeOfTime || opts.Temporal == "" || opts.Temporal == "timestamp" { + return dt + } + switch opts.Temporal { + case "date32": + return arrow.FixedWidthTypes.Date32 + case "date64": + return arrow.FixedWidthTypes.Date64 + case "time32": + return &arrow.Time32Type{Unit: arrow.Millisecond} + case "time64": + return &arrow.Time64Type{Unit: arrow.Nanosecond} + } + return dt +} + +func applyEncodingOpts(dt arrow.DataType, fm fieldMeta) (arrow.DataType, error) { + switch { + case fm.Opts.Dict: + return &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: dt}, nil + case fm.Opts.ListView: + lt, ok := dt.(*arrow.ListType) + if !ok { + return nil, fmt.Errorf("arreflect: listview tag on field %q requires a slice type, got %v", fm.Name, dt) + } + return arrow.ListViewOf(lt.Elem()), nil + case fm.Opts.REE: + return nil, fmt.Errorf("arreflect: ree tag on struct field %q is not supported; use ree at top-level via FromSlice", fm.Name) + } + return dt, nil +} + func inferStructType(t reflect.Type) (*arrow.StructType, error) { for t.Kind() == reflect.Ptr { t = t.Elem() @@ -164,50 +215,11 @@ func inferStructType(t reflect.Type) (*arrow.StructType, error) { return nil, fmt.Errorf("struct field %q: %w", fm.Name, err) } - if fm.Opts.HasDecimalOpts { - switch origType { - case typeOfDec32: - dt = &arrow.Decimal32Type{Precision: fm.Opts.DecimalPrecision, Scale: fm.Opts.DecimalScale} - case typeOfDec64: - dt = &arrow.Decimal64Type{Precision: fm.Opts.DecimalPrecision, Scale: fm.Opts.DecimalScale} - case typeOfDec128: - dt = &arrow.Decimal128Type{ - Precision: fm.Opts.DecimalPrecision, - Scale: fm.Opts.DecimalScale, - } - case typeOfDec256: - dt = &arrow.Decimal256Type{ - Precision: fm.Opts.DecimalPrecision, - Scale: fm.Opts.DecimalScale, - } - } - } - - if origType == typeOfTime && fm.Opts.Temporal != "" { - switch fm.Opts.Temporal { - case "date32": - dt = arrow.FixedWidthTypes.Date32 - case "date64": - dt = arrow.FixedWidthTypes.Date64 - case "time32": - dt = &arrow.Time32Type{Unit: arrow.Millisecond} - case "time64": - dt = &arrow.Time64Type{Unit: arrow.Nanosecond} - case "timestamp", "": - } - } - - switch { - case fm.Opts.Dict: - dt = &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: dt} - case fm.Opts.ListView: - lt, ok := dt.(*arrow.ListType) - if !ok { - return nil, fmt.Errorf("arreflect: listview tag on field %q requires a slice type, got %v", fm.Name, dt) - } - dt = arrow.ListViewOf(lt.Elem()) - case fm.Opts.REE: - return nil, fmt.Errorf("arreflect: ree tag on struct field %q is not supported; use ree at top-level via FromSlice", fm.Name) + dt = applyDecimalOpts(dt, origType, fm.Opts) + dt = applyTemporalOpts(dt, origType, fm.Opts) + dt, err = applyEncodingOpts(dt, fm) + if err != nil { + return nil, err } arrowFields = append(arrowFields, arrow.Field{ From 29a94c451906e38e399e4674ddf445879c24c685 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 7 Apr 2026 15:47:06 -0700 Subject: [PATCH 17/82] fix(arreflect): wire sentinel errors and fix list-view error label ErrTypeMismatch and ErrUnsupportedType are now wrapped with %w at all relevant error origins (~82 sites across reflect_go_to_arrow.go, reflect_arrow_to_go.go, and reflect_infer.go), making errors.Is usable for callers implementing fallback logic. Error prefix convention: - Leaf errors: no arreflect: prefix, wrap sentinel with %w - Context wrappers (field/index names): no arreflect: prefix, wrap %w - Top-level entry points: keep arreflect: prefix buildListLikeArray: restore 'list-view element' label for error messages when isView=true (was incorrectly unified to 'list element'). --- arrow/arreflect/reflect_arrow_to_go.go | 108 ++++++++++++------------- arrow/arreflect/reflect_go_to_arrow.go | 61 +++++++------- arrow/arreflect/reflect_infer.go | 2 +- 3 files changed, 88 insertions(+), 83 deletions(-) diff --git a/arrow/arreflect/reflect_arrow_to_go.go b/arrow/arreflect/reflect_arrow_to_go.go index 810c9bd4..9f1087e5 100644 --- a/arrow/arreflect/reflect_arrow_to_go.go +++ b/arrow/arreflect/reflect_arrow_to_go.go @@ -51,10 +51,10 @@ func setValue(v reflect.Value, arr arrow.Array, i int) error { case arrow.BOOL: a, ok := arr.(*array.Boolean) if !ok { - return fmt.Errorf("arreflect: expected *Boolean, got %T", arr) + return fmt.Errorf("expected *Boolean, got %T: %w", arr, ErrTypeMismatch) } if v.Kind() != reflect.Bool { - return fmt.Errorf("arreflect: cannot set bool into %s", v.Type()) + return fmt.Errorf("cannot set bool into %s: %w", v.Type(), ErrTypeMismatch) } v.SetBool(a.Value(i)) @@ -66,40 +66,40 @@ func setValue(v reflect.Value, arr arrow.Array, i int) error { case arrow.STRING: a, ok := arr.(*array.String) if !ok { - return fmt.Errorf("arreflect: expected *String, got %T", arr) + return fmt.Errorf("expected *String, got %T: %w", arr, ErrTypeMismatch) } if v.Kind() != reflect.String { - return fmt.Errorf("arreflect: cannot set string into %s", v.Type()) + return fmt.Errorf("cannot set string into %s: %w", v.Type(), ErrTypeMismatch) } v.SetString(a.Value(i)) case arrow.LARGE_STRING: a, ok := arr.(*array.LargeString) if !ok { - return fmt.Errorf("arreflect: expected *LargeString, got %T", arr) + return fmt.Errorf("expected *LargeString, got %T: %w", arr, ErrTypeMismatch) } if v.Kind() != reflect.String { - return fmt.Errorf("arreflect: cannot set string into %s", v.Type()) + return fmt.Errorf("cannot set string into %s: %w", v.Type(), ErrTypeMismatch) } v.SetString(a.Value(i)) case arrow.BINARY: a, ok := arr.(*array.Binary) if !ok { - return fmt.Errorf("arreflect: expected *Binary, got %T", arr) + return fmt.Errorf("expected *Binary, got %T: %w", arr, ErrTypeMismatch) } if v.Kind() != reflect.Slice || v.Type().Elem().Kind() != reflect.Uint8 { - return fmt.Errorf("arreflect: cannot set []byte into %s", v.Type()) + return fmt.Errorf("cannot set []byte into %s: %w", v.Type(), ErrTypeMismatch) } v.SetBytes(a.Value(i)) case arrow.LARGE_BINARY: a, ok := arr.(*array.LargeBinary) if !ok { - return fmt.Errorf("arreflect: expected *LargeBinary, got %T", arr) + return fmt.Errorf("expected *LargeBinary, got %T: %w", arr, ErrTypeMismatch) } if v.Kind() != reflect.Slice || v.Type().Elem().Kind() != reflect.Uint8 { - return fmt.Errorf("arreflect: cannot set []byte into %s", v.Type()) + return fmt.Errorf("cannot set []byte into %s: %w", v.Type(), ErrTypeMismatch) } v.SetBytes(a.Value(i)) @@ -113,47 +113,47 @@ func setValue(v reflect.Value, arr arrow.Array, i int) error { case arrow.STRUCT: a, ok := arr.(*array.Struct) if !ok { - return fmt.Errorf("arreflect: expected *Struct, got %T", arr) + return fmt.Errorf("expected *Struct, got %T: %w", arr, ErrTypeMismatch) } return setStructValue(v, a, i) case arrow.LIST, arrow.LARGE_LIST, arrow.LIST_VIEW, arrow.LARGE_LIST_VIEW: a, ok := arr.(array.ListLike) if !ok { - return fmt.Errorf("arreflect: expected ListLike, got %T", arr) + return fmt.Errorf("expected ListLike, got %T: %w", arr, ErrTypeMismatch) } return setListValue(v, a, i) case arrow.MAP: a, ok := arr.(*array.Map) if !ok { - return fmt.Errorf("arreflect: expected *Map, got %T", arr) + return fmt.Errorf("expected *Map, got %T: %w", arr, ErrTypeMismatch) } return setMapValue(v, a, i) case arrow.FIXED_SIZE_LIST: a, ok := arr.(*array.FixedSizeList) if !ok { - return fmt.Errorf("arreflect: expected *FixedSizeList, got %T", arr) + return fmt.Errorf("expected *FixedSizeList, got %T: %w", arr, ErrTypeMismatch) } return setFixedSizeListValue(v, a, i) case arrow.DICTIONARY: a, ok := arr.(*array.Dictionary) if !ok { - return fmt.Errorf("arreflect: expected *Dictionary, got %T", arr) + return fmt.Errorf("expected *Dictionary, got %T: %w", arr, ErrTypeMismatch) } return setDictionaryValue(v, a, i) case arrow.RUN_END_ENCODED: a, ok := arr.(*array.RunEndEncoded) if !ok { - return fmt.Errorf("arreflect: expected *RunEndEncoded, got %T", arr) + return fmt.Errorf("expected *RunEndEncoded, got %T: %w", arr, ErrTypeMismatch) } return setRunEndEncodedValue(v, a, i) default: - return fmt.Errorf("arreflect: unsupported Arrow type %v for reflection", arr.DataType()) + return fmt.Errorf("unsupported Arrow type %v for reflection: %w", arr.DataType(), ErrUnsupportedType) } return nil } @@ -167,56 +167,56 @@ func setPrimitiveValue(v reflect.Value, arr arrow.Array, i int) error { switch arr.DataType().ID() { case arrow.INT8: if !isIntKind(v.Kind()) { - return fmt.Errorf("arreflect: cannot set int8 into %s", v.Type()) + return fmt.Errorf("cannot set int8 into %s: %w", v.Type(), ErrTypeMismatch) } v.SetInt(int64(arr.(*array.Int8).Value(i))) case arrow.INT16: if !isIntKind(v.Kind()) { - return fmt.Errorf("arreflect: cannot set int16 into %s", v.Type()) + return fmt.Errorf("cannot set int16 into %s: %w", v.Type(), ErrTypeMismatch) } v.SetInt(int64(arr.(*array.Int16).Value(i))) case arrow.INT32: if !isIntKind(v.Kind()) { - return fmt.Errorf("arreflect: cannot set int32 into %s", v.Type()) + return fmt.Errorf("cannot set int32 into %s: %w", v.Type(), ErrTypeMismatch) } v.SetInt(int64(arr.(*array.Int32).Value(i))) case arrow.INT64: if !isIntKind(v.Kind()) { - return fmt.Errorf("arreflect: cannot set int64 into %s", v.Type()) + return fmt.Errorf("cannot set int64 into %s: %w", v.Type(), ErrTypeMismatch) } v.SetInt(arr.(*array.Int64).Value(i)) case arrow.UINT8: if !isUintKind(v.Kind()) { - return fmt.Errorf("arreflect: cannot set uint8 into %s", v.Type()) + return fmt.Errorf("cannot set uint8 into %s: %w", v.Type(), ErrTypeMismatch) } v.SetUint(uint64(arr.(*array.Uint8).Value(i))) case arrow.UINT16: if !isUintKind(v.Kind()) { - return fmt.Errorf("arreflect: cannot set uint16 into %s", v.Type()) + return fmt.Errorf("cannot set uint16 into %s: %w", v.Type(), ErrTypeMismatch) } v.SetUint(uint64(arr.(*array.Uint16).Value(i))) case arrow.UINT32: if !isUintKind(v.Kind()) { - return fmt.Errorf("arreflect: cannot set uint32 into %s", v.Type()) + return fmt.Errorf("cannot set uint32 into %s: %w", v.Type(), ErrTypeMismatch) } v.SetUint(uint64(arr.(*array.Uint32).Value(i))) case arrow.UINT64: if !isUintKind(v.Kind()) { - return fmt.Errorf("arreflect: cannot set uint64 into %s", v.Type()) + return fmt.Errorf("cannot set uint64 into %s: %w", v.Type(), ErrTypeMismatch) } v.SetUint(arr.(*array.Uint64).Value(i)) case arrow.FLOAT32: if !isFloatKind(v.Kind()) { - return fmt.Errorf("arreflect: cannot set float32 into %s", v.Type()) + return fmt.Errorf("cannot set float32 into %s: %w", v.Type(), ErrTypeMismatch) } v.SetFloat(float64(arr.(*array.Float32).Value(i))) case arrow.FLOAT64: if !isFloatKind(v.Kind()) { - return fmt.Errorf("arreflect: cannot set float64 into %s", v.Type()) + return fmt.Errorf("cannot set float64 into %s: %w", v.Type(), ErrTypeMismatch) } v.SetFloat(arr.(*array.Float64).Value(i)) default: - return fmt.Errorf("arreflect: unsupported primitive type %v", arr.DataType()) + return fmt.Errorf("unsupported primitive type %v: %w", arr.DataType(), ErrUnsupportedType) } return nil } @@ -231,10 +231,10 @@ func setTemporalValue(v reflect.Value, arr arrow.Array, i int) error { case arrow.TIMESTAMP: a, ok := arr.(*array.Timestamp) if !ok { - return fmt.Errorf("arreflect: expected *Timestamp, got %T", arr) + return fmt.Errorf("expected *Timestamp, got %T: %w", arr, ErrTypeMismatch) } if v.Type() != typeOfTime { - return fmt.Errorf("arreflect: cannot set time.Time into %s", v.Type()) + return fmt.Errorf("cannot set time.Time into %s: %w", v.Type(), ErrTypeMismatch) } unit := arr.DataType().(*arrow.TimestampType).Unit t := a.Value(i).ToTime(unit) @@ -243,10 +243,10 @@ func setTemporalValue(v reflect.Value, arr arrow.Array, i int) error { case arrow.DATE32: a, ok := arr.(*array.Date32) if !ok { - return fmt.Errorf("arreflect: expected *Date32, got %T", arr) + return fmt.Errorf("expected *Date32, got %T: %w", arr, ErrTypeMismatch) } if v.Type() != typeOfTime { - return fmt.Errorf("arreflect: cannot set time.Time into %s", v.Type()) + return fmt.Errorf("cannot set time.Time into %s: %w", v.Type(), ErrTypeMismatch) } t := a.Value(i).ToTime() v.Set(reflect.ValueOf(t)) @@ -254,10 +254,10 @@ func setTemporalValue(v reflect.Value, arr arrow.Array, i int) error { case arrow.DATE64: a, ok := arr.(*array.Date64) if !ok { - return fmt.Errorf("arreflect: expected *Date64, got %T", arr) + return fmt.Errorf("expected *Date64, got %T: %w", arr, ErrTypeMismatch) } if v.Type() != typeOfTime { - return fmt.Errorf("arreflect: cannot set time.Time into %s", v.Type()) + return fmt.Errorf("cannot set time.Time into %s: %w", v.Type(), ErrTypeMismatch) } t := a.Value(i).ToTime() v.Set(reflect.ValueOf(t)) @@ -265,10 +265,10 @@ func setTemporalValue(v reflect.Value, arr arrow.Array, i int) error { case arrow.TIME32: a, ok := arr.(*array.Time32) if !ok { - return fmt.Errorf("arreflect: expected *Time32, got %T", arr) + return fmt.Errorf("expected *Time32, got %T: %w", arr, ErrTypeMismatch) } if v.Type() != typeOfTime { - return fmt.Errorf("arreflect: cannot set time.Time into %s", v.Type()) + return fmt.Errorf("cannot set time.Time into %s: %w", v.Type(), ErrTypeMismatch) } unit := arr.DataType().(*arrow.Time32Type).Unit t := a.Value(i).ToTime(unit) @@ -277,10 +277,10 @@ func setTemporalValue(v reflect.Value, arr arrow.Array, i int) error { case arrow.TIME64: a, ok := arr.(*array.Time64) if !ok { - return fmt.Errorf("arreflect: expected *Time64, got %T", arr) + return fmt.Errorf("expected *Time64, got %T: %w", arr, ErrTypeMismatch) } if v.Type() != typeOfTime { - return fmt.Errorf("arreflect: cannot set time.Time into %s", v.Type()) + return fmt.Errorf("cannot set time.Time into %s: %w", v.Type(), ErrTypeMismatch) } unit := arr.DataType().(*arrow.Time64Type).Unit t := a.Value(i).ToTime(unit) @@ -289,17 +289,17 @@ func setTemporalValue(v reflect.Value, arr arrow.Array, i int) error { case arrow.DURATION: a, ok := arr.(*array.Duration) if !ok { - return fmt.Errorf("arreflect: expected *Duration, got %T", arr) + return fmt.Errorf("expected *Duration, got %T: %w", arr, ErrTypeMismatch) } if v.Type() != typeOfDuration { - return fmt.Errorf("arreflect: cannot set time.Duration into %s", v.Type()) + return fmt.Errorf("cannot set time.Duration into %s: %w", v.Type(), ErrTypeMismatch) } unit := arr.DataType().(*arrow.DurationType).Unit dur := time.Duration(a.Value(i)) * unit.Multiplier() v.Set(reflect.ValueOf(dur)) default: - return fmt.Errorf("arreflect: unsupported temporal type %v", arr.DataType()) + return fmt.Errorf("unsupported temporal type %v: %w", arr.DataType(), ErrUnsupportedType) } return nil } @@ -314,10 +314,10 @@ func setDecimalValue(v reflect.Value, arr arrow.Array, i int) error { case arrow.DECIMAL128: a, ok := arr.(*array.Decimal128) if !ok { - return fmt.Errorf("arreflect: expected *Decimal128, got %T", arr) + return fmt.Errorf("expected *Decimal128, got %T: %w", arr, ErrTypeMismatch) } if v.Type() != typeOfDec128 { - return fmt.Errorf("arreflect: cannot set decimal128.Num into %s", v.Type()) + return fmt.Errorf("cannot set decimal128.Num into %s: %w", v.Type(), ErrTypeMismatch) } num := a.Value(i) v.Set(reflect.ValueOf(num)) @@ -325,10 +325,10 @@ func setDecimalValue(v reflect.Value, arr arrow.Array, i int) error { case arrow.DECIMAL256: a, ok := arr.(*array.Decimal256) if !ok { - return fmt.Errorf("arreflect: expected *Decimal256, got %T", arr) + return fmt.Errorf("expected *Decimal256, got %T: %w", arr, ErrTypeMismatch) } if v.Type() != typeOfDec256 { - return fmt.Errorf("arreflect: cannot set decimal256.Num into %s", v.Type()) + return fmt.Errorf("cannot set decimal256.Num into %s: %w", v.Type(), ErrTypeMismatch) } num := a.Value(i) v.Set(reflect.ValueOf(num)) @@ -336,25 +336,25 @@ func setDecimalValue(v reflect.Value, arr arrow.Array, i int) error { case arrow.DECIMAL32: a, ok := arr.(*array.Decimal32) if !ok { - return fmt.Errorf("arreflect: expected *Decimal32, got %T", arr) + return fmt.Errorf("expected *Decimal32, got %T: %w", arr, ErrTypeMismatch) } if v.Type() != typeOfDec32 { - return fmt.Errorf("arreflect: cannot set decimal.Decimal32 into %s", v.Type()) + return fmt.Errorf("cannot set decimal.Decimal32 into %s: %w", v.Type(), ErrTypeMismatch) } v.Set(reflect.ValueOf(a.Value(i))) case arrow.DECIMAL64: a, ok := arr.(*array.Decimal64) if !ok { - return fmt.Errorf("arreflect: expected *Decimal64, got %T", arr) + return fmt.Errorf("expected *Decimal64, got %T: %w", arr, ErrTypeMismatch) } if v.Type() != typeOfDec64 { - return fmt.Errorf("arreflect: cannot set decimal.Decimal64 into %s", v.Type()) + return fmt.Errorf("cannot set decimal.Decimal64 into %s: %w", v.Type(), ErrTypeMismatch) } v.Set(reflect.ValueOf(a.Value(i))) default: - return fmt.Errorf("arreflect: unsupported decimal type %v", arr.DataType()) + return fmt.Errorf("unsupported decimal type %v: %w", arr.DataType(), ErrUnsupportedType) } return nil } @@ -366,7 +366,7 @@ func setStructValue(v reflect.Value, sa *array.Struct, i int) error { } if v.Kind() != reflect.Struct { - return fmt.Errorf("arreflect: cannot set struct into %s", v.Type()) + return fmt.Errorf("cannot set struct into %s: %w", v.Type(), ErrTypeMismatch) } fields := cachedStructFields(v.Type()) @@ -391,7 +391,7 @@ func setListValue(v reflect.Value, arr array.ListLike, i int) error { } if v.Kind() != reflect.Slice { - return fmt.Errorf("arreflect: cannot set list into %s", v.Type()) + return fmt.Errorf("cannot set list into %s: %w", v.Type(), ErrTypeMismatch) } start, end := arr.ValueOffsets(i) @@ -415,7 +415,7 @@ func setMapValue(v reflect.Value, arr *array.Map, i int) error { } if v.Kind() != reflect.Map { - return fmt.Errorf("arreflect: cannot set map into %s", v.Type()) + return fmt.Errorf("cannot set map into %s: %w", v.Type(), ErrTypeMismatch) } start, end := arr.ValueOffsets(i) @@ -469,7 +469,7 @@ func setFixedSizeListValue(v reflect.Value, arr *array.FixedSizeList, i int) err } v.Set(result) default: - return fmt.Errorf("arreflect: cannot set fixed-size list into %s", v.Type()) + return fmt.Errorf("cannot set fixed-size list into %s: %w", v.Type(), ErrTypeMismatch) } return nil } diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index 6b7f298b..e678bf4e 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -140,19 +140,19 @@ func appendPrimitiveValue(b array.Builder, v reflect.Value, dt arrow.DataType) e case arrow.DURATION: d, ok := reflect.TypeAssert[time.Duration](v) if !ok { - return fmt.Errorf("arreflect: expected time.Duration, got %s", v.Type()) + return fmt.Errorf("expected time.Duration, got %s: %w", v.Type(), ErrTypeMismatch) } b.(*array.DurationBuilder).Append(arrow.Duration(d.Nanoseconds())) case arrow.DECIMAL128: n, ok := reflect.TypeAssert[decimal128.Num](v) if !ok { - return fmt.Errorf("arreflect: expected decimal128.Num, got %s", v.Type()) + return fmt.Errorf("expected decimal128.Num, got %s: %w", v.Type(), ErrTypeMismatch) } b.(*array.Decimal128Builder).Append(n) case arrow.DECIMAL256: n, ok := reflect.TypeAssert[decimal256.Num](v) if !ok { - return fmt.Errorf("arreflect: expected decimal256.Num, got %s", v.Type()) + return fmt.Errorf("expected decimal256.Num, got %s: %w", v.Type(), ErrTypeMismatch) } b.(*array.Decimal256Builder).Append(n) case arrow.DECIMAL32: @@ -160,7 +160,7 @@ func appendPrimitiveValue(b array.Builder, v reflect.Value, dt arrow.DataType) e case arrow.DECIMAL64: b.(*array.Decimal64Builder).Append(decimal.Decimal64(v.Int())) default: - return fmt.Errorf("arreflect: unsupported Arrow type %v", dt) + return fmt.Errorf("unsupported Arrow type %v: %w", dt, ErrUnsupportedType) } return nil } @@ -220,7 +220,7 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { t, ok := reflect.TypeAssert[time.Time](v) if !ok { - return fmt.Errorf("arreflect: expected time.Time, got %s", v.Type()) + return fmt.Errorf("expected time.Time, got %s: %w", v.Type(), ErrTypeMismatch) } b.Append(arrow.Date32FromTime(t)) return nil @@ -235,7 +235,7 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { t, ok := reflect.TypeAssert[time.Time](v) if !ok { - return fmt.Errorf("arreflect: expected time.Time, got %s", v.Type()) + return fmt.Errorf("expected time.Time, got %s: %w", v.Type(), ErrTypeMismatch) } b.Append(arrow.Date64FromTime(t)) return nil @@ -251,7 +251,7 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { t, ok := reflect.TypeAssert[time.Time](v) if !ok { - return fmt.Errorf("arreflect: expected time.Time, got %s", v.Type()) + return fmt.Errorf("expected time.Time, got %s: %w", v.Type(), ErrTypeMismatch) } b.Append(arrow.Time32(timeOfDayNanos(t) / int64(dt.Unit.Multiplier()))) return nil @@ -267,7 +267,7 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { t, ok := reflect.TypeAssert[time.Time](v) if !ok { - return fmt.Errorf("arreflect: expected time.Time, got %s", v.Type()) + return fmt.Errorf("expected time.Time, got %s: %w", v.Type(), ErrTypeMismatch) } b.Append(arrow.Time64(timeOfDayNanos(t) / int64(dt.Unit.Multiplier()))) return nil @@ -283,7 +283,7 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) if err := iterSlice(vals, isPtr, tb.AppendNull, func(v reflect.Value) error { t, ok := reflect.TypeAssert[time.Time](v) if !ok { - return fmt.Errorf("arreflect: expected time.Time, got %s", v.Type()) + return fmt.Errorf("expected time.Time, got %s: %w", v.Type(), ErrTypeMismatch) } tb.Append(arrow.Timestamp(t.UnixNano())) return nil @@ -301,7 +301,7 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) if err := iterSlice(vals, isPtr, db.AppendNull, func(v reflect.Value) error { d, ok := reflect.TypeAssert[time.Duration](v) if !ok { - return fmt.Errorf("arreflect: expected time.Duration, got %s", v.Type()) + return fmt.Errorf("expected time.Duration, got %s: %w", v.Type(), ErrTypeMismatch) } db.Append(arrow.Duration(d.Nanoseconds())) return nil @@ -311,7 +311,7 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) return db.NewArray(), nil default: - return nil, fmt.Errorf("arreflect: unsupported temporal type %v", elemType) + return nil, fmt.Errorf("unsupported temporal type %v: %w", elemType, ErrUnsupportedType) } } @@ -335,7 +335,7 @@ func buildDecimalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) ( if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { n, ok := reflect.TypeAssert[decimal128.Num](v) if !ok { - return fmt.Errorf("arreflect: expected decimal128.Num, got %s", v.Type()) + return fmt.Errorf("expected decimal128.Num, got %s: %w", v.Type(), ErrTypeMismatch) } b.Append(n) return nil @@ -353,7 +353,7 @@ func buildDecimalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) ( if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { n, ok := reflect.TypeAssert[decimal256.Num](v) if !ok { - return fmt.Errorf("arreflect: expected decimal256.Num, got %s", v.Type()) + return fmt.Errorf("expected decimal256.Num, got %s: %w", v.Type(), ErrTypeMismatch) } b.Append(n) return nil @@ -391,7 +391,7 @@ func buildDecimalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) ( return b.NewArray(), nil default: - return nil, fmt.Errorf("arreflect: unsupported decimal type %v", elemType) + return nil, fmt.Errorf("unsupported decimal type %v: %w", elemType, ErrUnsupportedType) } } @@ -439,43 +439,43 @@ func appendTemporalValue(b array.Builder, v reflect.Value) error { case *array.TimestampBuilder: t, ok := reflect.TypeAssert[time.Time](v) if !ok { - return fmt.Errorf("arreflect: expected time.Time, got %s", v.Type()) + return fmt.Errorf("expected time.Time, got %s: %w", v.Type(), ErrTypeMismatch) } tb.Append(arrow.Timestamp(t.UnixNano())) case *array.Date32Builder: t, ok := reflect.TypeAssert[time.Time](v) if !ok { - return fmt.Errorf("arreflect: expected time.Time, got %s", v.Type()) + return fmt.Errorf("expected time.Time, got %s: %w", v.Type(), ErrTypeMismatch) } tb.Append(arrow.Date32FromTime(t)) case *array.Date64Builder: t, ok := reflect.TypeAssert[time.Time](v) if !ok { - return fmt.Errorf("arreflect: expected time.Time, got %s", v.Type()) + return fmt.Errorf("expected time.Time, got %s: %w", v.Type(), ErrTypeMismatch) } tb.Append(arrow.Date64FromTime(t)) case *array.Time32Builder: unit := tb.Type().(*arrow.Time32Type).Unit t, ok := reflect.TypeAssert[time.Time](v) if !ok { - return fmt.Errorf("arreflect: expected time.Time, got %s", v.Type()) + return fmt.Errorf("expected time.Time, got %s: %w", v.Type(), ErrTypeMismatch) } tb.Append(arrow.Time32(timeOfDayNanos(t) / int64(unit.Multiplier()))) case *array.Time64Builder: unit := tb.Type().(*arrow.Time64Type).Unit t, ok := reflect.TypeAssert[time.Time](v) if !ok { - return fmt.Errorf("arreflect: expected time.Time, got %s", v.Type()) + return fmt.Errorf("expected time.Time, got %s: %w", v.Type(), ErrTypeMismatch) } tb.Append(arrow.Time64(timeOfDayNanos(t) / int64(unit.Multiplier()))) case *array.DurationBuilder: d, ok := reflect.TypeAssert[time.Duration](v) if !ok { - return fmt.Errorf("arreflect: expected time.Duration, got %s", v.Type()) + return fmt.Errorf("expected time.Duration, got %s: %w", v.Type(), ErrTypeMismatch) } tb.Append(arrow.Duration(d.Nanoseconds())) default: - return fmt.Errorf("arreflect: unexpected temporal builder %T", b) + return fmt.Errorf("unexpected temporal builder %T: %w", b, ErrTypeMismatch) } return nil } @@ -485,13 +485,13 @@ func appendDecimalValue(b array.Builder, v reflect.Value) error { case *array.Decimal128Builder: n, ok := reflect.TypeAssert[decimal128.Num](v) if !ok { - return fmt.Errorf("arreflect: expected decimal128.Num, got %s", v.Type()) + return fmt.Errorf("expected decimal128.Num, got %s: %w", v.Type(), ErrTypeMismatch) } tb.Append(n) case *array.Decimal256Builder: n, ok := reflect.TypeAssert[decimal256.Num](v) if !ok { - return fmt.Errorf("arreflect: expected decimal256.Num, got %s", v.Type()) + return fmt.Errorf("expected decimal256.Num, got %s: %w", v.Type(), ErrTypeMismatch) } tb.Append(n) case *array.Decimal32Builder: @@ -499,7 +499,7 @@ func appendDecimalValue(b array.Builder, v reflect.Value) error { case *array.Decimal64Builder: tb.Append(decimal.Decimal64(v.Int())) default: - return fmt.Errorf("arreflect: unexpected decimal builder %T", b) + return fmt.Errorf("unexpected decimal builder %T: %w", b, ErrTypeMismatch) } return nil } @@ -649,7 +649,7 @@ func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { if db, ok := b.(array.DictionaryBuilder); ok { return appendToDictBuilder(db, v) } - return fmt.Errorf("arreflect: unsupported builder type %T", b) + return fmt.Errorf("unsupported builder type %T: %w", b, ErrUnsupportedType) } return nil } @@ -667,7 +667,7 @@ func appendToDictBuilder(db array.DictionaryBuilder, v reflect.Value) error { } return bdb.Append(v.Bytes()) default: - return fmt.Errorf("arreflect: unsupported value kind %v for BinaryDictionaryBuilder", v.Kind()) + return fmt.Errorf("unsupported value kind %v for BinaryDictionaryBuilder: %w", v.Kind(), ErrUnsupportedType) } case *array.Int8DictionaryBuilder: return bdb.Append(int8(v.Int())) @@ -690,7 +690,7 @@ func appendToDictBuilder(db array.DictionaryBuilder, v reflect.Value) error { case *array.Float64DictionaryBuilder: return bdb.Append(float64(v.Float())) } - return fmt.Errorf("arreflect: unsupported builder type %T", db) + return fmt.Errorf("unsupported builder type %T: %w", db, ErrUnsupportedType) } type listBuilderLike interface { @@ -704,6 +704,11 @@ func buildListLikeArray(vals reflect.Value, mem memory.Allocator, isView bool) ( return nil, err } + label := "list element" + if isView { + label = "list-view element" + } + var bldr listBuilderLike var beginRow func(int) if isView { @@ -734,7 +739,7 @@ func buildListLikeArray(vals reflect.Value, mem memory.Allocator, isView bool) ( beginRow(outer.Len()) for j := 0; j < outer.Len(); j++ { if err := appendValue(vb, outer.Index(j), tagOpts{}); err != nil { - return nil, fmt.Errorf("list element [%d][%d]: %w", i, j, err) + return nil, fmt.Errorf("%s [%d][%d]: %w", label, i, j, err) } } } diff --git a/arrow/arreflect/reflect_infer.go b/arrow/arreflect/reflect_infer.go index c061e6e1..615232ef 100644 --- a/arrow/arreflect/reflect_infer.go +++ b/arrow/arreflect/reflect_infer.go @@ -95,7 +95,7 @@ func inferPrimitiveArrowType(t reflect.Type) (arrow.DataType, error) { case typeOfDec256: return &arrow.Decimal256Type{Precision: dec256DefaultPrecision, Scale: 0}, nil default: - return nil, fmt.Errorf("arreflect: unsupported Go type for Arrow inference: %v", t) + return nil, fmt.Errorf("unsupported Go type for Arrow inference %v: %w", t, ErrUnsupportedType) } } From bb2882c833f8aa9229724cf10eca256b17f40e5a Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 7 Apr 2026 15:53:47 -0700 Subject: [PATCH 18/82] feat(arreflect): add GoTypeOf, GetAny, ToAnySlice for runtime type inference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For callers that don't know the Go type at compile time: GoTypeOf(dt arrow.DataType) (reflect.Type, error): Inverts the Arrow→Go mapping. All scalar, temporal, and decimal types map to their concrete Go equivalents. Composite types recurse: LIST/LARGE_LIST/LIST_VIEW → []T, FIXED_SIZE_LIST(N) → [N]T, MAP → map[K]V, STRUCT → dynamic anonymous struct via reflect.StructOf (exported field names, arrow: tags, nullable fields as *T). DICTIONARY and RUN_END_ENCODED delegate to their value/encoded type. Returns ErrUnsupportedType (wrappable via errors.Is) for unknown types. GetAny(arr, i) (any, error): Single-element dynamic accessor. Infers Go type via GoTypeOf, then delegates to the existing setValue path. Prefer Get[T] when T is known. ToAnySlice(arr) ([]any, error): Full-array dynamic converter. All elements share the inferred type. Prefer ToSlice[T] when T is known. --- arrow/arreflect/reflect.go | 38 ++++++++ arrow/arreflect/reflect_infer.go | 126 +++++++++++++++++++++++++ arrow/arreflect/reflect_infer_test.go | 77 +++++++++++++++ arrow/arreflect/reflect_public_test.go | 47 +++++++++ 4 files changed, 288 insertions(+) diff --git a/arrow/arreflect/reflect.go b/arrow/arreflect/reflect.go index aaede697..891760e4 100644 --- a/arrow/arreflect/reflect.go +++ b/arrow/arreflect/reflect.go @@ -390,3 +390,41 @@ func RecordFromSlice[T any](vals []T, mem memory.Allocator) (arrow.Record, error func RecordFromSliceDefault[T any](vals []T) (arrow.Record, error) { return RecordFromSlice(vals, memory.DefaultAllocator) } + +// GetAny converts a single element at index i of an Arrow array to a Go value, +// inferring the Go type from the Arrow DataType at runtime via [GoTypeOf]. +// Useful when the column type is not known at compile time. +// For typed access when T is known, prefer [Get]. +func GetAny(arr arrow.Array, i int) (any, error) { + goType, err := GoTypeOf(arr.DataType()) + if err != nil { + return nil, err + } + result := reflect.New(goType).Elem() + if err := setValue(result, arr, i); err != nil { + return nil, err + } + return result.Interface(), nil +} + +// ToAnySlice converts all elements of an Arrow array to Go values, +// inferring the Go type from the Arrow DataType at runtime via [GoTypeOf]. +// All elements share the same inferred Go type; null elements are nil (for +// nullable column types) or zero values. +// For typed access when T is known, prefer [ToSlice]. +func ToAnySlice(arr arrow.Array) ([]any, error) { + goType, err := GoTypeOf(arr.DataType()) + if err != nil { + return nil, err + } + n := arr.Len() + result := make([]any, n) + for i := 0; i < n; i++ { + v := reflect.New(goType).Elem() + if err := setValue(v, arr, i); err != nil { + return nil, fmt.Errorf("index %d: %w", i, err) + } + result[i] = v.Interface() + } + return result, nil +} diff --git a/arrow/arreflect/reflect_infer.go b/arrow/arreflect/reflect_infer.go index 615232ef..0624c806 100644 --- a/arrow/arreflect/reflect_infer.go +++ b/arrow/arreflect/reflect_infer.go @@ -19,6 +19,7 @@ package arreflect import ( "fmt" "reflect" + "strings" "time" "github.com/apache/arrow-go/v18/arrow" @@ -264,3 +265,128 @@ func TypeOf[T any]() (arrow.DataType, error) { t := reflect.TypeFor[T]() return inferArrowType(t) } + +// GoTypeOf returns the Go reflect.Type corresponding to the given Arrow DataType. +// For STRUCT types it constructs an anonymous struct type at runtime using +// [reflect.StructOf]; field names are exported (capitalised) with the original +// Arrow field name preserved in an arrow struct tag. Nullable Arrow fields +// (field.Nullable == true) become pointer types (*T). +// For DICTIONARY and RUN_END_ENCODED types it returns the Go type of the +// value/encoded type respectively (dictionaries are resolved transparently). +func GoTypeOf(dt arrow.DataType) (reflect.Type, error) { + switch dt.ID() { + case arrow.INT8: + return reflect.TypeOf(int8(0)), nil + case arrow.INT16: + return reflect.TypeOf(int16(0)), nil + case arrow.INT32: + return reflect.TypeOf(int32(0)), nil + case arrow.INT64: + return reflect.TypeOf(int64(0)), nil + case arrow.UINT8: + return reflect.TypeOf(uint8(0)), nil + case arrow.UINT16: + return reflect.TypeOf(uint16(0)), nil + case arrow.UINT32: + return reflect.TypeOf(uint32(0)), nil + case arrow.UINT64: + return reflect.TypeOf(uint64(0)), nil + case arrow.FLOAT32: + return reflect.TypeOf(float32(0)), nil + case arrow.FLOAT64: + return reflect.TypeOf(float64(0)), nil + case arrow.BOOL: + return reflect.TypeOf(false), nil + case arrow.STRING, arrow.LARGE_STRING: + return reflect.TypeOf(""), nil + case arrow.BINARY, arrow.LARGE_BINARY: + return typeOfByteSlice, nil + case arrow.TIMESTAMP, arrow.DATE32, arrow.DATE64, arrow.TIME32, arrow.TIME64: + return typeOfTime, nil + case arrow.DURATION: + return typeOfDuration, nil + case arrow.DECIMAL128: + return typeOfDec128, nil + case arrow.DECIMAL256: + return typeOfDec256, nil + case arrow.DECIMAL32: + return typeOfDec32, nil + case arrow.DECIMAL64: + return typeOfDec64, nil + + case arrow.LIST, arrow.LARGE_LIST, arrow.LIST_VIEW, arrow.LARGE_LIST_VIEW: + var elemDT arrow.DataType + switch t := dt.(type) { + case *arrow.ListType: + elemDT = t.Elem() + case *arrow.LargeListType: + elemDT = t.Elem() + case *arrow.ListViewType: + elemDT = t.Elem() + case *arrow.LargeListViewType: + elemDT = t.Elem() + default: + return nil, fmt.Errorf("unsupported Arrow type for Go inference: %v: %w", dt, ErrUnsupportedType) + } + elemType, err := GoTypeOf(elemDT) + if err != nil { + return nil, err + } + return reflect.SliceOf(elemType), nil + + case arrow.FIXED_SIZE_LIST: + fsl := dt.(*arrow.FixedSizeListType) + elemType, err := GoTypeOf(fsl.Elem()) + if err != nil { + return nil, err + } + return reflect.ArrayOf(int(fsl.Len()), elemType), nil + + case arrow.MAP: + mt := dt.(*arrow.MapType) + keyType, err := GoTypeOf(mt.KeyType()) + if err != nil { + return nil, err + } + valType, err := GoTypeOf(mt.ValueType()) + if err != nil { + return nil, err + } + return reflect.MapOf(keyType, valType), nil + + case arrow.STRUCT: + st := dt.(*arrow.StructType) + fields := make([]reflect.StructField, st.NumFields()) + for i := 0; i < st.NumFields(); i++ { + f := st.Field(i) + ft, err := GoTypeOf(f.Type) + if err != nil { + return nil, err + } + if f.Nullable { + ft = reflect.PointerTo(ft) + } + var exportedName string + if len(f.Name) == 0 { + exportedName = fmt.Sprintf("Field%d", i) + } else { + exportedName = strings.ToUpper(f.Name[:1]) + f.Name[1:] + } + fields[i] = reflect.StructField{ + Name: exportedName, + Type: ft, + Tag: reflect.StructTag(fmt.Sprintf(`arrow:%q`, f.Name)), + } + } + return reflect.StructOf(fields), nil + + case arrow.DICTIONARY: + return GoTypeOf(dt.(*arrow.DictionaryType).ValueType) + + case arrow.RUN_END_ENCODED: + return GoTypeOf(dt.(*arrow.RunEndEncodedType).Encoded()) + + default: + return nil, fmt.Errorf("unsupported Arrow type for Go inference: %v: %w", dt, ErrUnsupportedType) + } +} diff --git a/arrow/arreflect/reflect_infer_test.go b/arrow/arreflect/reflect_infer_test.go index df2177db..e07f99d1 100644 --- a/arrow/arreflect/reflect_infer_test.go +++ b/arrow/arreflect/reflect_infer_test.go @@ -17,6 +17,7 @@ package arreflect import ( + "errors" "reflect" "strings" "testing" @@ -479,3 +480,79 @@ func TestInferArrowSchemaStructFieldEncoding(t *testing.T) { } }) } + +func TestGoTypeOf(t *testing.T) { + primitives := []struct { + dt arrow.DataType + want reflect.Type + }{ + {arrow.PrimitiveTypes.Int32, reflect.TypeOf(int32(0))}, + {arrow.PrimitiveTypes.Float64, reflect.TypeOf(float64(0))}, + {arrow.FixedWidthTypes.Boolean, reflect.TypeOf(bool(false))}, + {arrow.BinaryTypes.String, reflect.TypeOf("")}, + {arrow.BinaryTypes.Binary, reflect.TypeOf([]byte{})}, + {&arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "UTC"}, reflect.TypeOf(time.Time{})}, + {&arrow.DurationType{Unit: arrow.Nanosecond}, reflect.TypeOf(time.Duration(0))}, + } + for _, tt := range primitives { + got, err := GoTypeOf(tt.dt) + if err != nil { + t.Errorf("GoTypeOf(%v): %v", tt.dt, err) + continue + } + if got != tt.want { + t.Errorf("GoTypeOf(%v) = %v, want %v", tt.dt, got, tt.want) + } + } + + st := arrow.StructOf( + arrow.Field{Name: "id", Type: arrow.PrimitiveTypes.Int64}, + arrow.Field{Name: "name", Type: arrow.BinaryTypes.String, Nullable: true}, + ) + structType, err := GoTypeOf(st) + if err != nil { + t.Fatalf("struct: %v", err) + } + if structType.Kind() != reflect.Struct { + t.Fatalf("want struct, got %v", structType.Kind()) + } + if structType.NumField() != 2 { + t.Fatalf("want 2 fields, got %d", structType.NumField()) + } + if structType.Field(1).Type.Kind() != reflect.Ptr { + t.Errorf("nullable field should be pointer") + } + if structType.Field(1).Type.Elem().Kind() != reflect.String { + t.Errorf("nullable field should be *string") + } + + listType, err := GoTypeOf(arrow.ListOf(arrow.PrimitiveTypes.Int32)) + if err != nil { + t.Fatalf("list: %v", err) + } + if listType.Kind() != reflect.Slice { + t.Fatalf("want slice, got %v", listType.Kind()) + } + if listType.Elem() != reflect.TypeOf(int32(0)) { + t.Errorf("list elem wrong") + } + + fslType, err := GoTypeOf(arrow.FixedSizeListOf(3, arrow.PrimitiveTypes.Float32)) + if err != nil { + t.Fatalf("fsl: %v", err) + } + if fslType.Kind() != reflect.Array { + t.Fatalf("want array, got %v", fslType.Kind()) + } + if fslType.Len() != 3 { + t.Errorf("array len want 3, got %d", fslType.Len()) + } + + _, err = GoTypeOf(arrow.Null) + if err == nil { + t.Error("expected error for unsupported type") + } + if !errors.Is(err, ErrUnsupportedType) { + t.Errorf("want ErrUnsupportedType, got %v", err) + } +} diff --git a/arrow/arreflect/reflect_public_test.go b/arrow/arreflect/reflect_public_test.go index 193d88b1..88588c39 100644 --- a/arrow/arreflect/reflect_public_test.go +++ b/arrow/arreflect/reflect_public_test.go @@ -428,3 +428,50 @@ func TestRecordFromSlice(t *testing.T) { } }) } + +func TestGetAny(t *testing.T) { + mem := memory.NewGoAllocator() + b := array.NewInt32Builder(mem) + defer b.Release() + b.Append(42) + b.AppendNull() + arr := b.NewArray() + defer arr.Release() + + got, err := GetAny(arr, 0) + if err != nil { + t.Fatalf("GetAny(0): %v", err) + } + if v, ok := got.(int32); !ok || v != 42 { + t.Errorf("GetAny(0) = %v (%T), want int32(42)", got, got) + } + + got, err = GetAny(arr, 1) + if err != nil { + t.Fatalf("GetAny(1): %v", err) + } + if v, ok := got.(int32); !ok || v != 0 { + t.Errorf("GetAny(1) = %v, want int32(0)", got) + } +} + +func TestToAnySlice(t *testing.T) { + mem := memory.NewGoAllocator() + b := array.NewStringBuilder(mem) + defer b.Release() + b.Append("hello") + b.Append("world") + arr := b.NewArray() + defer arr.Release() + + got, err := ToAnySlice(arr) + if err != nil { + t.Fatalf("ToAnySlice: %v", err) + } + if len(got) != 2 { + t.Fatalf("len = %d, want 2", len(got)) + } + if got[0].(string) != "hello" || got[1].(string) != "world" { + t.Errorf("got %v, want [hello world]", got) + } +} From 765f03bb5d1c43ea9ea3363a007deeab728cd6e5 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 7 Apr 2026 16:01:36 -0700 Subject: [PATCH 19/82] =?UTF-8?q?feat(arreflect):=20API=20polish=20?= =?UTF-8?q?=E2=80=94=20renames,=20options,=20nil=20allocator,=20RecordAt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Renames (final stabilization): - Get[T] → At[T] (indexed access idiom) - SchemaOf[T] → InferSchema[T] (makes inference explicit) - TypeOf[T] → InferType[T] (consistent with InferSchema) - GoTypeOf → InferGoType (consistent with above) Encoding options for FromSlice/RecordFromSlice: - type Option func(*tagOpts) - WithDict(), WithListView(), WithREE(), WithDecimal(p,s) constructors - Unblocks Dict/ListView/REE for top-level slices (previously only reachable via struct field tags) - FromSlice[T](vals, mem, opts...Option) — variadic, backward compatible Nil allocator fallback: - FromSlice/RecordFromSlice: nil mem → memory.DefaultAllocator - FromSliceDefault/RecordFromSliceDefault removed (superseded) New API: - RecordAt[T](rec, i) — single-row accessor for Records; completes the symmetry with RecordToSlice/RecordFromSlice InferSchema godoc: removed misleading 'without schema overhead' phrase --- arrow/arreflect/reflect.go | 60 +++++++++++++++------ arrow/arreflect/reflect_infer.go | 34 ++++++------ arrow/arreflect/reflect_infer_test.go | 38 ++++++------- arrow/arreflect/reflect_integration_test.go | 2 +- arrow/arreflect/reflect_public_test.go | 10 ++-- 5 files changed, 86 insertions(+), 58 deletions(-) diff --git a/arrow/arreflect/reflect.go b/arrow/arreflect/reflect.go index 891760e4..7c6cb1e3 100644 --- a/arrow/arreflect/reflect.go +++ b/arrow/arreflect/reflect.go @@ -328,7 +328,7 @@ func cachedStructFields(t reflect.Type) []fieldMeta { return v.([]fieldMeta) } -func Get[T any](arr arrow.Array, i int) (T, error) { +func At[T any](arr arrow.Array, i int) (T, error) { var result T v := reflect.ValueOf(&result).Elem() if err := setValue(v, arr, i); err != nil { @@ -350,7 +350,31 @@ func ToSlice[T any](arr arrow.Array) ([]T, error) { return result, nil } -func FromSlice[T any](vals []T, mem memory.Allocator) (arrow.Array, error) { +// Option configures encoding behavior for [FromSlice] and [RecordFromSlice]. +type Option func(*tagOpts) + +// WithDict requests dictionary encoding for the top-level array. +func WithDict() Option { return func(o *tagOpts) { o.Dict = true } } + +// WithListView requests ListView encoding instead of List for slice types. +func WithListView() Option { return func(o *tagOpts) { o.ListView = true } } + +// WithREE requests run-end encoding for the top-level array. +func WithREE() Option { return func(o *tagOpts) { o.REE = true } } + +// WithDecimal sets the precision and scale for decimal types. +func WithDecimal(precision, scale int32) Option { + return func(o *tagOpts) { + o.DecimalPrecision = precision + o.DecimalScale = scale + o.HasDecimalOpts = true + } +} + +func FromSlice[T any](vals []T, mem memory.Allocator, opts ...Option) (arrow.Array, error) { + if mem == nil { + mem = memory.DefaultAllocator + } if len(vals) == 0 { dt, err := inferArrowType(reflect.TypeFor[T]()) if err != nil { @@ -360,12 +384,12 @@ func FromSlice[T any](vals []T, mem memory.Allocator) (arrow.Array, error) { defer b.Release() return b.NewArray(), nil } + var tOpts tagOpts + for _, o := range opts { + o(&tOpts) + } sv := reflect.ValueOf(vals) - return buildArray(sv, tagOpts{}, mem) -} - -func FromSliceDefault[T any](vals []T) (arrow.Array, error) { - return FromSlice(vals, memory.DefaultAllocator) + return buildArray(sv, tOpts, mem) } func RecordToSlice[T any](rec arrow.Record) ([]T, error) { @@ -374,8 +398,8 @@ func RecordToSlice[T any](rec arrow.Record) ([]T, error) { return ToSlice[T](sa) } -func RecordFromSlice[T any](vals []T, mem memory.Allocator) (arrow.Record, error) { - arr, err := FromSlice[T](vals, mem) +func RecordFromSlice[T any](vals []T, mem memory.Allocator, opts ...Option) (arrow.Record, error) { + arr, err := FromSlice[T](vals, mem, opts...) if err != nil { return nil, err } @@ -387,16 +411,20 @@ func RecordFromSlice[T any](vals []T, mem memory.Allocator) (arrow.Record, error return array.RecordFromStructArray(sa, nil), nil } -func RecordFromSliceDefault[T any](vals []T) (arrow.Record, error) { - return RecordFromSlice(vals, memory.DefaultAllocator) +// RecordAt converts the row at index i of an Arrow Record to a Go value of type T. +// T must be a struct type whose fields correspond to the record's columns. +func RecordAt[T any](rec arrow.Record, i int) (T, error) { + sa := array.RecordToStructArray(rec) + defer sa.Release() + return At[T](sa, i) } // GetAny converts a single element at index i of an Arrow array to a Go value, -// inferring the Go type from the Arrow DataType at runtime via [GoTypeOf]. +// inferring the Go type from the Arrow DataType at runtime via [InferGoType]. // Useful when the column type is not known at compile time. -// For typed access when T is known, prefer [Get]. +// For typed access when T is known, prefer [At]. func GetAny(arr arrow.Array, i int) (any, error) { - goType, err := GoTypeOf(arr.DataType()) + goType, err := InferGoType(arr.DataType()) if err != nil { return nil, err } @@ -408,12 +436,12 @@ func GetAny(arr arrow.Array, i int) (any, error) { } // ToAnySlice converts all elements of an Arrow array to Go values, -// inferring the Go type from the Arrow DataType at runtime via [GoTypeOf]. +// inferring the Go type from the Arrow DataType at runtime via [InferGoType]. // All elements share the same inferred Go type; null elements are nil (for // nullable column types) or zero values. // For typed access when T is known, prefer [ToSlice]. func ToAnySlice(arr arrow.Array) ([]any, error) { - goType, err := GoTypeOf(arr.DataType()) + goType, err := InferGoType(arr.DataType()) if err != nil { return nil, err } diff --git a/arrow/arreflect/reflect_infer.go b/arrow/arreflect/reflect_infer.go index 0624c806..08fefe40 100644 --- a/arrow/arreflect/reflect_infer.go +++ b/arrow/arreflect/reflect_infer.go @@ -233,18 +233,18 @@ func inferStructType(t reflect.Type) (*arrow.StructType, error) { return arrow.StructOf(arrowFields...), nil } -// SchemaOf infers an *arrow.Schema from a Go struct type T. +// InferSchema infers an *arrow.Schema from a Go struct type T. // T must be a struct type; returns an error otherwise. -// For column-level type inspection without schema overhead, use [TypeOf]. +// For column-level Arrow type inspection, use [InferType]. // Field names come from arrow struct tags or Go field names. // Pointer fields are marked Nullable=true. -func SchemaOf[T any]() (*arrow.Schema, error) { +func InferSchema[T any]() (*arrow.Schema, error) { t := reflect.TypeFor[T]() for t.Kind() == reflect.Ptr { t = t.Elem() } if t.Kind() != reflect.Struct { - return nil, fmt.Errorf("arreflect: SchemaOf requires a struct type T, got %v", t) + return nil, fmt.Errorf("arreflect: InferSchema requires a struct type T, got %v", t) } st, err := inferStructType(t) if err != nil { @@ -257,23 +257,23 @@ func SchemaOf[T any]() (*arrow.Schema, error) { return arrow.NewSchema(fields, nil), nil } -// TypeOf infers the Arrow DataType for a Go type T. -// For struct types, [SchemaOf] is preferred when the result will be used with -// arrow.Record or array.NewRecord; TypeOf returns an arrow.DataType that would +// InferType infers the Arrow DataType for a Go type T. +// For struct types, [InferSchema] is preferred when the result will be used with +// arrow.Record or array.NewRecord; InferType returns an arrow.DataType that would // require an additional cast to *arrow.StructType. -func TypeOf[T any]() (arrow.DataType, error) { +func InferType[T any]() (arrow.DataType, error) { t := reflect.TypeFor[T]() return inferArrowType(t) } -// GoTypeOf returns the Go reflect.Type corresponding to the given Arrow DataType. +// InferGoType returns the Go reflect.Type corresponding to the given Arrow DataType. // For STRUCT types it constructs an anonymous struct type at runtime using // [reflect.StructOf]; field names are exported (capitalised) with the original // Arrow field name preserved in an arrow struct tag. Nullable Arrow fields // (field.Nullable == true) become pointer types (*T). // For DICTIONARY and RUN_END_ENCODED types it returns the Go type of the // value/encoded type respectively (dictionaries are resolved transparently). -func GoTypeOf(dt arrow.DataType) (reflect.Type, error) { +func InferGoType(dt arrow.DataType) (reflect.Type, error) { switch dt.ID() { case arrow.INT8: return reflect.TypeOf(int8(0)), nil @@ -328,7 +328,7 @@ func GoTypeOf(dt arrow.DataType) (reflect.Type, error) { default: return nil, fmt.Errorf("unsupported Arrow type for Go inference: %v: %w", dt, ErrUnsupportedType) } - elemType, err := GoTypeOf(elemDT) + elemType, err := InferGoType(elemDT) if err != nil { return nil, err } @@ -336,7 +336,7 @@ func GoTypeOf(dt arrow.DataType) (reflect.Type, error) { case arrow.FIXED_SIZE_LIST: fsl := dt.(*arrow.FixedSizeListType) - elemType, err := GoTypeOf(fsl.Elem()) + elemType, err := InferGoType(fsl.Elem()) if err != nil { return nil, err } @@ -344,11 +344,11 @@ func GoTypeOf(dt arrow.DataType) (reflect.Type, error) { case arrow.MAP: mt := dt.(*arrow.MapType) - keyType, err := GoTypeOf(mt.KeyType()) + keyType, err := InferGoType(mt.KeyType()) if err != nil { return nil, err } - valType, err := GoTypeOf(mt.ValueType()) + valType, err := InferGoType(mt.ValueType()) if err != nil { return nil, err } @@ -359,7 +359,7 @@ func GoTypeOf(dt arrow.DataType) (reflect.Type, error) { fields := make([]reflect.StructField, st.NumFields()) for i := 0; i < st.NumFields(); i++ { f := st.Field(i) - ft, err := GoTypeOf(f.Type) + ft, err := InferGoType(f.Type) if err != nil { return nil, err } @@ -381,10 +381,10 @@ func GoTypeOf(dt arrow.DataType) (reflect.Type, error) { return reflect.StructOf(fields), nil case arrow.DICTIONARY: - return GoTypeOf(dt.(*arrow.DictionaryType).ValueType) + return InferGoType(dt.(*arrow.DictionaryType).ValueType) case arrow.RUN_END_ENCODED: - return GoTypeOf(dt.(*arrow.RunEndEncodedType).Encoded()) + return InferGoType(dt.(*arrow.RunEndEncodedType).Encoded()) default: return nil, fmt.Errorf("unsupported Arrow type for Go inference: %v: %w", dt, ErrUnsupportedType) diff --git a/arrow/arreflect/reflect_infer_test.go b/arrow/arreflect/reflect_infer_test.go index e07f99d1..eb0e8d41 100644 --- a/arrow/arreflect/reflect_infer_test.go +++ b/arrow/arreflect/reflect_infer_test.go @@ -316,7 +316,7 @@ func TestInferArrowSchema(t *testing.T) { Age int32 Score float64 } - schema, err := SchemaOf[S]() + schema, err := InferSchema[S]() if err != nil { t.Fatal(err) } @@ -339,7 +339,7 @@ func TestInferArrowSchema(t *testing.T) { ID int32 Label *string } - schema, err := SchemaOf[S]() + schema, err := InferSchema[S]() if err != nil { t.Fatal(err) } @@ -356,7 +356,7 @@ func TestInferArrowSchema(t *testing.T) { Keep string Hidden int32 `arrow:"-"` } - schema, err := SchemaOf[S]() + schema, err := InferSchema[S]() if err != nil { t.Fatal(err) } @@ -372,7 +372,7 @@ func TestInferArrowSchema(t *testing.T) { type S struct { GoName int64 `arrow:"custom_name"` } - schema, err := SchemaOf[S]() + schema, err := InferSchema[S]() if err != nil { t.Fatal(err) } @@ -382,7 +382,7 @@ func TestInferArrowSchema(t *testing.T) { }) t.Run("non-struct type returns error", func(t *testing.T) { - _, err := SchemaOf[int]() + _, err := InferSchema[int]() if err == nil { t.Error("expected error for non-struct, got nil") } @@ -391,7 +391,7 @@ func TestInferArrowSchema(t *testing.T) { func TestInferArrowTypePublic(t *testing.T) { t.Run("int32 is INT32", func(t *testing.T) { - dt, err := TypeOf[int32]() + dt, err := InferType[int32]() if err != nil { t.Fatal(err) } @@ -401,7 +401,7 @@ func TestInferArrowTypePublic(t *testing.T) { }) t.Run("[]string is LIST", func(t *testing.T) { - dt, err := TypeOf[[]string]() + dt, err := InferType[[]string]() if err != nil { t.Fatal(err) } @@ -411,7 +411,7 @@ func TestInferArrowTypePublic(t *testing.T) { }) t.Run("map[string]float64 is MAP", func(t *testing.T) { - dt, err := TypeOf[map[string]float64]() + dt, err := InferType[map[string]float64]() if err != nil { t.Fatal(err) } @@ -422,7 +422,7 @@ func TestInferArrowTypePublic(t *testing.T) { t.Run("struct{X int32} is STRUCT", func(t *testing.T) { type S struct{ X int32 } - dt, err := TypeOf[S]() + dt, err := InferType[S]() if err != nil { t.Fatal(err) } @@ -437,7 +437,7 @@ func TestInferArrowSchemaStructFieldEncoding(t *testing.T) { type S struct { Name string `arrow:"name,dict"` } - schema, err := SchemaOf[S]() + schema, err := InferSchema[S]() if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -454,7 +454,7 @@ func TestInferArrowSchemaStructFieldEncoding(t *testing.T) { type S struct { Tags []string `arrow:"tags,listview"` } - schema, err := SchemaOf[S]() + schema, err := InferSchema[S]() if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -471,7 +471,7 @@ func TestInferArrowSchemaStructFieldEncoding(t *testing.T) { type REERow struct { Val string `arrow:"val,ree"` } - _, err := SchemaOf[REERow]() + _, err := InferSchema[REERow]() if err == nil { t.Fatal("expected error for ree tag on struct field, got nil") } @@ -495,13 +495,13 @@ func TestGoTypeOf(t *testing.T) { {&arrow.DurationType{Unit: arrow.Nanosecond}, reflect.TypeOf(time.Duration(0))}, } for _, tt := range primitives { - got, err := GoTypeOf(tt.dt) + got, err := InferGoType(tt.dt) if err != nil { - t.Errorf("GoTypeOf(%v): %v", tt.dt, err) + t.Errorf("InferGoType(%v): %v", tt.dt, err) continue } if got != tt.want { - t.Errorf("GoTypeOf(%v) = %v, want %v", tt.dt, got, tt.want) + t.Errorf("InferGoType(%v) = %v, want %v", tt.dt, got, tt.want) } } @@ -509,7 +509,7 @@ func TestGoTypeOf(t *testing.T) { arrow.Field{Name: "id", Type: arrow.PrimitiveTypes.Int64}, arrow.Field{Name: "name", Type: arrow.BinaryTypes.String, Nullable: true}, ) - structType, err := GoTypeOf(st) + structType, err := InferGoType(st) if err != nil { t.Fatalf("struct: %v", err) } @@ -526,7 +526,7 @@ func TestGoTypeOf(t *testing.T) { t.Errorf("nullable field should be *string") } - listType, err := GoTypeOf(arrow.ListOf(arrow.PrimitiveTypes.Int32)) + listType, err := InferGoType(arrow.ListOf(arrow.PrimitiveTypes.Int32)) if err != nil { t.Fatalf("list: %v", err) } @@ -537,7 +537,7 @@ func TestGoTypeOf(t *testing.T) { t.Errorf("list elem wrong") } - fslType, err := GoTypeOf(arrow.FixedSizeListOf(3, arrow.PrimitiveTypes.Float32)) + fslType, err := InferGoType(arrow.FixedSizeListOf(3, arrow.PrimitiveTypes.Float32)) if err != nil { t.Fatalf("fsl: %v", err) } @@ -548,7 +548,7 @@ func TestGoTypeOf(t *testing.T) { t.Errorf("array len want 3, got %d", fslType.Len()) } - _, err = GoTypeOf(arrow.Null) + _, err = InferGoType(arrow.Null) if err == nil { t.Error("expected error for unsupported type") } diff --git a/arrow/arreflect/reflect_integration_test.go b/arrow/arreflect/reflect_integration_test.go index 5dce1a33..574ef7b9 100644 --- a/arrow/arreflect/reflect_integration_test.go +++ b/arrow/arreflect/reflect_integration_test.go @@ -360,7 +360,7 @@ func TestReflectIntegration(t *testing.T) { {ID: 1, Items: []integOrderItem{{Product: "a", Tags: map[string]string{"k": "v"}, Ratings: [5]float32{1, 2, 3, 4, 5}}}}, } - schema, err := SchemaOf[integOrder]() + schema, err := InferSchema[integOrder]() if err != nil { t.Fatalf("SchemaOf: %v", err) } diff --git a/arrow/arreflect/reflect_public_test.go b/arrow/arreflect/reflect_public_test.go index 88588c39..a7344fb1 100644 --- a/arrow/arreflect/reflect_public_test.go +++ b/arrow/arreflect/reflect_public_test.go @@ -34,7 +34,7 @@ func TestToGo(t *testing.T) { arr := b.NewInt32Array() defer arr.Release() - got, err := Get[int32](arr, 0) + got, err := At[int32](arr, 0) if err != nil { t.Fatal(err) } @@ -50,7 +50,7 @@ func TestToGo(t *testing.T) { arr := b.NewStringArray() defer arr.Release() - got, err := Get[string](arr, 1) + got, err := At[string](arr, 1) if err != nil { t.Fatal(err) } @@ -71,7 +71,7 @@ func TestToGo(t *testing.T) { } defer arr.Release() - got, err := Get[Person](arr, 0) + got, err := At[Person](arr, 0) if err != nil { t.Fatal(err) } @@ -87,7 +87,7 @@ func TestToGo(t *testing.T) { arr := b.NewInt32Array() defer arr.Release() - got, err := Get[*int32](arr, 0) + got, err := At[*int32](arr, 0) if err != nil { t.Fatal(err) } @@ -103,7 +103,7 @@ func TestToGo(t *testing.T) { arr := b.NewInt32Array() defer arr.Release() - got, err := Get[int32](arr, 0) + got, err := At[int32](arr, 0) if err != nil { t.Fatal(err) } From d7f8fa49682fda19f1c094beab6c9c966c428bec Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 7 Apr 2026 16:10:07 -0700 Subject: [PATCH 20/82] fix(arreflect): sentinel classification, MAP inference, rune safety, tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Job 872: - appendTemporalValue/appendDecimalValue default cases: use ErrUnsupportedType (unrecognized builder) not ErrTypeMismatch (value type mismatch) - reflect_arrow_to_go.go: fixed-size list length mismatch wraps ErrTypeMismatch, removes arreflect: prefix to match leaf-error convention - TestErrSentinels: 3 subtests verify errors.Is works end-to-end for both sentinels, including propagation through context wrappers Job 873: - InferGoType MAP: use mt.ItemField().Type for the map value type (was mt.ValueType() which returns the full entry *StructType, not the value type) - GoTypeOf struct field capitalization: use unicode.ToUpper on rune slice instead of strings.ToUpper on byte slice (correct for multi-byte UTF-8) - GetAny/ToAnySlice: document null-element-as-zero-value semantics; direct callers to arr.IsNull(i) for null detection - TestGetAnyComposite: struct/list/map end-to-end pipeline tests confirming GoTypeOf→GetAny correctly populates field values through setValue --- arrow/arreflect/reflect.go | 7 +- arrow/arreflect/reflect_arrow_to_go.go | 2 +- arrow/arreflect/reflect_go_to_arrow.go | 4 +- arrow/arreflect/reflect_infer.go | 7 +- arrow/arreflect/reflect_public_test.go | 162 +++++++++++++++++++++++++ 5 files changed, 174 insertions(+), 8 deletions(-) diff --git a/arrow/arreflect/reflect.go b/arrow/arreflect/reflect.go index 7c6cb1e3..ee00d557 100644 --- a/arrow/arreflect/reflect.go +++ b/arrow/arreflect/reflect.go @@ -422,6 +422,8 @@ func RecordAt[T any](rec arrow.Record, i int) (T, error) { // GetAny converts a single element at index i of an Arrow array to a Go value, // inferring the Go type from the Arrow DataType at runtime via [InferGoType]. // Useful when the column type is not known at compile time. +// Null elements are returned as the Go zero value of the inferred type; use +// arr.IsNull(i) to distinguish a null element from a genuine zero. // For typed access when T is known, prefer [At]. func GetAny(arr arrow.Array, i int) (any, error) { goType, err := InferGoType(arr.DataType()) @@ -437,8 +439,9 @@ func GetAny(arr arrow.Array, i int) (any, error) { // ToAnySlice converts all elements of an Arrow array to Go values, // inferring the Go type from the Arrow DataType at runtime via [InferGoType]. -// All elements share the same inferred Go type; null elements are nil (for -// nullable column types) or zero values. +// All elements share the same inferred Go type. Null elements are returned as +// the Go zero value of the inferred type; use arr.IsNull(i) to distinguish +// a null element from a genuine zero value. // For typed access when T is known, prefer [ToSlice]. func ToAnySlice(arr arrow.Array) ([]any, error) { goType, err := InferGoType(arr.DataType()) diff --git a/arrow/arreflect/reflect_arrow_to_go.go b/arrow/arreflect/reflect_arrow_to_go.go index 9f1087e5..76f9f4b7 100644 --- a/arrow/arreflect/reflect_arrow_to_go.go +++ b/arrow/arreflect/reflect_arrow_to_go.go @@ -453,7 +453,7 @@ func setFixedSizeListValue(v reflect.Value, arr *array.FixedSizeList, i int) err switch v.Kind() { case reflect.Array: if v.Len() != n { - return fmt.Errorf("arreflect: fixed-size list length %d does not match Go array length %d", n, v.Len()) + return fmt.Errorf("fixed-size list length %d does not match Go array length %d: %w", n, v.Len(), ErrTypeMismatch) } for k := 0; k < n; k++ { if err := setValue(v.Index(k), child, int(start)+k); err != nil { diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index e678bf4e..afe196bb 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -475,7 +475,7 @@ func appendTemporalValue(b array.Builder, v reflect.Value) error { } tb.Append(arrow.Duration(d.Nanoseconds())) default: - return fmt.Errorf("unexpected temporal builder %T: %w", b, ErrTypeMismatch) + return fmt.Errorf("unexpected temporal builder %T: %w", b, ErrUnsupportedType) } return nil } @@ -499,7 +499,7 @@ func appendDecimalValue(b array.Builder, v reflect.Value) error { case *array.Decimal64Builder: tb.Append(decimal.Decimal64(v.Int())) default: - return fmt.Errorf("unexpected decimal builder %T: %w", b, ErrTypeMismatch) + return fmt.Errorf("unexpected decimal builder %T: %w", b, ErrUnsupportedType) } return nil } diff --git a/arrow/arreflect/reflect_infer.go b/arrow/arreflect/reflect_infer.go index 08fefe40..d1578637 100644 --- a/arrow/arreflect/reflect_infer.go +++ b/arrow/arreflect/reflect_infer.go @@ -19,8 +19,8 @@ package arreflect import ( "fmt" "reflect" - "strings" "time" + "unicode" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/decimal" @@ -348,7 +348,7 @@ func InferGoType(dt arrow.DataType) (reflect.Type, error) { if err != nil { return nil, err } - valType, err := InferGoType(mt.ValueType()) + valType, err := InferGoType(mt.ItemField().Type) if err != nil { return nil, err } @@ -370,7 +370,8 @@ func InferGoType(dt arrow.DataType) (reflect.Type, error) { if len(f.Name) == 0 { exportedName = fmt.Sprintf("Field%d", i) } else { - exportedName = strings.ToUpper(f.Name[:1]) + f.Name[1:] + runes := []rune(f.Name) + exportedName = string(unicode.ToUpper(runes[0])) + string(runes[1:]) } fields[i] = reflect.StructField{ Name: exportedName, diff --git a/arrow/arreflect/reflect_public_test.go b/arrow/arreflect/reflect_public_test.go index a7344fb1..65a5b55b 100644 --- a/arrow/arreflect/reflect_public_test.go +++ b/arrow/arreflect/reflect_public_test.go @@ -17,6 +17,8 @@ package arreflect import ( + "errors" + "reflect" "testing" "github.com/apache/arrow-go/v18/arrow" @@ -475,3 +477,163 @@ func TestToAnySlice(t *testing.T) { t.Errorf("got %v, want [hello world]", got) } } + +func TestErrSentinels(t *testing.T) { + mem := memory.NewGoAllocator() + + t.Run("ErrTypeMismatch via setValue wrong kind", func(t *testing.T) { + b := array.NewInt32Builder(mem) + defer b.Release() + b.Append(42) + arr := b.NewArray() + defer arr.Release() + + var got string + v := reflect.ValueOf(&got).Elem() + err := setValue(v, arr, 0) + if err == nil { + t.Fatal("expected error, got nil") + } + if !errors.Is(err, ErrTypeMismatch) { + t.Errorf("expected errors.Is(err, ErrTypeMismatch) = true, got false; err = %v", err) + } + }) + + t.Run("ErrUnsupportedType via InferGoType", func(t *testing.T) { + _, err := InferGoType(arrow.Null) + if err == nil { + t.Fatal("expected error, got nil") + } + if !errors.Is(err, ErrUnsupportedType) { + t.Errorf("expected errors.Is(err, ErrUnsupportedType) = true, got false; err = %v", err) + } + }) + + t.Run("ErrTypeMismatch propagates through struct field context wrapper", func(t *testing.T) { + st := arrow.StructOf(arrow.Field{Name: "name", Type: arrow.BinaryTypes.String}) + sb := array.NewStructBuilder(mem, st) + defer sb.Release() + sb.Append(true) + sb.FieldBuilder(0).(*array.StringBuilder).Append("hello") + arr := sb.NewArray() + defer arr.Release() + + type wrongType struct { + Name int32 `arrow:"name"` + } + _, err := At[wrongType](arr, 0) + if err == nil { + t.Fatal("expected error, got nil") + } + if !errors.Is(err, ErrTypeMismatch) { + t.Errorf("ErrTypeMismatch not found through context wrapper; err = %v", err) + } + }) +} + +func TestGetAnyComposite(t *testing.T) { + mem := memory.NewGoAllocator() + + t.Run("struct", func(t *testing.T) { + st := arrow.StructOf( + arrow.Field{Name: "id", Type: arrow.PrimitiveTypes.Int32}, + arrow.Field{Name: "name", Type: arrow.BinaryTypes.String}, + ) + sb := array.NewStructBuilder(mem, st) + defer sb.Release() + sb.Append(true) + sb.FieldBuilder(0).(*array.Int32Builder).Append(99) + sb.FieldBuilder(1).(*array.StringBuilder).Append("alice") + arr := sb.NewArray() + defer arr.Release() + + got, err := GetAny(arr, 0) + if err != nil { + t.Fatalf("GetAny: %v", err) + } + + v := reflect.ValueOf(got) + if v.Kind() != reflect.Struct { + t.Fatalf("want struct, got %v", v.Kind()) + } + + vt := v.Type() + var idField, nameField reflect.Value + for i := 0; i < v.NumField(); i++ { + tag := vt.Field(i).Tag.Get("arrow") + switch tag { + case "id": + idField = v.Field(i) + case "name": + nameField = v.Field(i) + } + } + if !idField.IsValid() { + t.Fatal("id field not found") + } + if !nameField.IsValid() { + t.Fatal("name field not found") + } + if idField.Int() != 99 { + t.Errorf("id = %v, want 99", idField.Int()) + } + if nameField.String() != "alice" { + t.Errorf("name = %v, want alice", nameField.String()) + } + }) + + t.Run("list", func(t *testing.T) { + lb := array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32) + defer lb.Release() + lb.Append(true) + lb.ValueBuilder().(*array.Int32Builder).Append(1) + lb.ValueBuilder().(*array.Int32Builder).Append(2) + lb.ValueBuilder().(*array.Int32Builder).Append(3) + arr := lb.NewArray() + defer arr.Release() + + got, err := GetAny(arr, 0) + if err != nil { + t.Fatalf("GetAny: %v", err) + } + + v := reflect.ValueOf(got) + if v.Kind() != reflect.Slice { + t.Fatalf("want slice, got %v", v.Kind()) + } + if v.Len() != 3 { + t.Fatalf("want 3 elems, got %d", v.Len()) + } + if v.Index(0).Int() != 1 || v.Index(2).Int() != 3 { + t.Errorf("list = %v, want [1 2 3]", got) + } + }) + + t.Run("map", func(t *testing.T) { + mb := array.NewMapBuilder(mem, arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32, false) + defer mb.Release() + mb.Append(true) + mb.KeyBuilder().(*array.StringBuilder).Append("x") + mb.ItemBuilder().(*array.Int32Builder).Append(7) + arr := mb.NewArray() + defer arr.Release() + + got, err := GetAny(arr, 0) + if err != nil { + t.Fatalf("GetAny: %v", err) + } + + v := reflect.ValueOf(got) + if v.Kind() != reflect.Map { + t.Fatalf("want map, got %v", v.Kind()) + } + key := reflect.ValueOf("x") + val := v.MapIndex(key) + if !val.IsValid() { + t.Fatal("key 'x' not found in map") + } + if val.Int() != 7 { + t.Errorf("map[x] = %v, want 7", val.Int()) + } + }) +} From 39d7944171d12b5d8e227c46c768c84b1294f850 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 7 Apr 2026 16:22:18 -0700 Subject: [PATCH 21/82] feat(arreflect): API fixes, options expansion, duplication cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jobs 874/878 — FromSlice empty-slice opts bug: Parse variadic opts before the len(vals)==0 early return so WithDecimal, WithDict, etc. are applied correctly for empty slices. Job 875 — API consistency: - doc.go: fix stale links [Get]->[At], [SchemaOf]->[InferSchema], [TypeOf]->[InferType] - GetAny -> AtAny (matches At/ToSlice naming convention) - WithTemporal(s string) Option: expose temporal type override for top-level slices e.g. FromSlice([]time.Time{...}, mem, WithTemporal("date32")) - RecordAtAny: runtime-typed single-row record accessor - TestGoTypeOf -> TestInferGoType: test name matches function name Job 876 — Duplication elimination (-65 lines): - D1: remove dead ptr-alloc blocks from 7 sub-functions (setValue already dereferences before dispatching; the blocks were unreachable) - D3: collapse STRING+LARGE_STRING / BINARY+LARGE_BINARY pairs using local stringer/byter interfaces (-20 lines) - D4: extract setTime helper for setTemporalValue (-17 lines net) Job 877 — Test quality: - Extract testMem() helper in reflect_public_test.go Job 878 — Coverage: - TestRecordAt: happy-path test for single-row record access --- arrow/arreflect/doc.go | 4 +- arrow/arreflect/reflect.go | 32 ++++++-- arrow/arreflect/reflect_arrow_to_go.go | 105 +++++-------------------- arrow/arreflect/reflect_infer_test.go | 2 +- arrow/arreflect/reflect_public_test.go | 66 ++++++++++++---- 5 files changed, 98 insertions(+), 111 deletions(-) diff --git a/arrow/arreflect/doc.go b/arrow/arreflect/doc.go index 6eb242f7..07690ca6 100644 --- a/arrow/arreflect/doc.go +++ b/arrow/arreflect/doc.go @@ -17,11 +17,11 @@ // Package arreflect provides utilities for converting between // Apache Arrow arrays and Go structs using reflection. // -// The primary entry points are the generic functions [Get], [ToSlice], +// The primary entry points are the generic functions [At], [ToSlice], // [FromSlice], [RecordToSlice], and [RecordFromSlice], which convert // between Arrow arrays/records and Go slices of structs. // -// Schema inference is available via [SchemaOf] and [TypeOf]. +// Schema inference is available via [InferSchema] and [InferType]. // // Arrow struct tags control field mapping: // diff --git a/arrow/arreflect/reflect.go b/arrow/arreflect/reflect.go index ee00d557..51e2d2d4 100644 --- a/arrow/arreflect/reflect.go +++ b/arrow/arreflect/reflect.go @@ -371,23 +371,34 @@ func WithDecimal(precision, scale int32) Option { } } +// WithTemporal overrides the Arrow temporal encoding for time.Time slices. +// Valid values: "date32", "date64", "time32", "time64", "timestamp" (default). +// Equivalent to tagging a struct field with arrow:",date32" etc. +func WithTemporal(temporal string) Option { + return func(o *tagOpts) { o.Temporal = temporal } +} + func FromSlice[T any](vals []T, mem memory.Allocator, opts ...Option) (arrow.Array, error) { if mem == nil { mem = memory.DefaultAllocator } + var tOpts tagOpts + for _, o := range opts { + o(&tOpts) + } if len(vals) == 0 { dt, err := inferArrowType(reflect.TypeFor[T]()) if err != nil { return nil, err } + dt = applyTemporalOpts(dt, reflect.TypeFor[T](), tOpts) + if tOpts.Dict { + dt = &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: dt} + } b := array.NewBuilder(mem, dt) defer b.Release() return b.NewArray(), nil } - var tOpts tagOpts - for _, o := range opts { - o(&tOpts) - } sv := reflect.ValueOf(vals) return buildArray(sv, tOpts, mem) } @@ -419,13 +430,22 @@ func RecordAt[T any](rec arrow.Record, i int) (T, error) { return At[T](sa, i) } -// GetAny converts a single element at index i of an Arrow array to a Go value, +// RecordAtAny converts the row at index i of an Arrow Record to a Go value, +// inferring the Go type from the record's schema at runtime via [InferGoType]. +// Equivalent to AtAny on the struct array underlying the record. +func RecordAtAny(rec arrow.Record, i int) (any, error) { + sa := array.RecordToStructArray(rec) + defer sa.Release() + return AtAny(sa, i) +} + +// AtAny converts a single element at index i of an Arrow array to a Go value, // inferring the Go type from the Arrow DataType at runtime via [InferGoType]. // Useful when the column type is not known at compile time. // Null elements are returned as the Go zero value of the inferred type; use // arr.IsNull(i) to distinguish a null element from a genuine zero. // For typed access when T is known, prefer [At]. -func GetAny(arr arrow.Array, i int) (any, error) { +func AtAny(arr arrow.Array, i int) (any, error) { goType, err := InferGoType(arr.DataType()) if err != nil { return nil, err diff --git a/arrow/arreflect/reflect_arrow_to_go.go b/arrow/arreflect/reflect_arrow_to_go.go index 76f9f4b7..aa8b25d9 100644 --- a/arrow/arreflect/reflect_arrow_to_go.go +++ b/arrow/arreflect/reflect_arrow_to_go.go @@ -63,40 +63,22 @@ func setValue(v reflect.Value, arr arrow.Array, i int) error { arrow.FLOAT32, arrow.FLOAT64: return setPrimitiveValue(v, arr, i) - case arrow.STRING: - a, ok := arr.(*array.String) + case arrow.STRING, arrow.LARGE_STRING: + type stringer interface{ Value(int) string } + a, ok := arr.(stringer) if !ok { - return fmt.Errorf("expected *String, got %T: %w", arr, ErrTypeMismatch) + return fmt.Errorf("expected string array, got %T: %w", arr, ErrTypeMismatch) } if v.Kind() != reflect.String { return fmt.Errorf("cannot set string into %s: %w", v.Type(), ErrTypeMismatch) } v.SetString(a.Value(i)) - case arrow.LARGE_STRING: - a, ok := arr.(*array.LargeString) + case arrow.BINARY, arrow.LARGE_BINARY: + type byter interface{ Value(int) []byte } + a, ok := arr.(byter) if !ok { - return fmt.Errorf("expected *LargeString, got %T: %w", arr, ErrTypeMismatch) - } - if v.Kind() != reflect.String { - return fmt.Errorf("cannot set string into %s: %w", v.Type(), ErrTypeMismatch) - } - v.SetString(a.Value(i)) - - case arrow.BINARY: - a, ok := arr.(*array.Binary) - if !ok { - return fmt.Errorf("expected *Binary, got %T: %w", arr, ErrTypeMismatch) - } - if v.Kind() != reflect.Slice || v.Type().Elem().Kind() != reflect.Uint8 { - return fmt.Errorf("cannot set []byte into %s: %w", v.Type(), ErrTypeMismatch) - } - v.SetBytes(a.Value(i)) - - case arrow.LARGE_BINARY: - a, ok := arr.(*array.LargeBinary) - if !ok { - return fmt.Errorf("expected *LargeBinary, got %T: %w", arr, ErrTypeMismatch) + return fmt.Errorf("expected binary array, got %T: %w", arr, ErrTypeMismatch) } if v.Kind() != reflect.Slice || v.Type().Elem().Kind() != reflect.Uint8 { return fmt.Errorf("cannot set []byte into %s: %w", v.Type(), ErrTypeMismatch) @@ -159,11 +141,6 @@ func setValue(v reflect.Value, arr arrow.Array, i int) error { } func setPrimitiveValue(v reflect.Value, arr arrow.Array, i int) error { - if v.Kind() == reflect.Ptr { - v.Set(reflect.New(v.Type().Elem())) - v = v.Elem() - } - switch arr.DataType().ID() { case arrow.INT8: if !isIntKind(v.Kind()) { @@ -221,70 +198,53 @@ func setPrimitiveValue(v reflect.Value, arr arrow.Array, i int) error { return nil } -func setTemporalValue(v reflect.Value, arr arrow.Array, i int) error { - if v.Kind() == reflect.Ptr { - v.Set(reflect.New(v.Type().Elem())) - v = v.Elem() +func setTime(v reflect.Value, t time.Time) error { + if v.Type() != typeOfTime { + return fmt.Errorf("cannot set time.Time into %s: %w", v.Type(), ErrTypeMismatch) } + v.Set(reflect.ValueOf(t)) + return nil +} +func setTemporalValue(v reflect.Value, arr arrow.Array, i int) error { switch arr.DataType().ID() { case arrow.TIMESTAMP: a, ok := arr.(*array.Timestamp) if !ok { return fmt.Errorf("expected *Timestamp, got %T: %w", arr, ErrTypeMismatch) } - if v.Type() != typeOfTime { - return fmt.Errorf("cannot set time.Time into %s: %w", v.Type(), ErrTypeMismatch) - } unit := arr.DataType().(*arrow.TimestampType).Unit - t := a.Value(i).ToTime(unit) - v.Set(reflect.ValueOf(t)) + return setTime(v, a.Value(i).ToTime(unit)) case arrow.DATE32: a, ok := arr.(*array.Date32) if !ok { return fmt.Errorf("expected *Date32, got %T: %w", arr, ErrTypeMismatch) } - if v.Type() != typeOfTime { - return fmt.Errorf("cannot set time.Time into %s: %w", v.Type(), ErrTypeMismatch) - } - t := a.Value(i).ToTime() - v.Set(reflect.ValueOf(t)) + return setTime(v, a.Value(i).ToTime()) case arrow.DATE64: a, ok := arr.(*array.Date64) if !ok { return fmt.Errorf("expected *Date64, got %T: %w", arr, ErrTypeMismatch) } - if v.Type() != typeOfTime { - return fmt.Errorf("cannot set time.Time into %s: %w", v.Type(), ErrTypeMismatch) - } - t := a.Value(i).ToTime() - v.Set(reflect.ValueOf(t)) + return setTime(v, a.Value(i).ToTime()) case arrow.TIME32: a, ok := arr.(*array.Time32) if !ok { return fmt.Errorf("expected *Time32, got %T: %w", arr, ErrTypeMismatch) } - if v.Type() != typeOfTime { - return fmt.Errorf("cannot set time.Time into %s: %w", v.Type(), ErrTypeMismatch) - } unit := arr.DataType().(*arrow.Time32Type).Unit - t := a.Value(i).ToTime(unit) - v.Set(reflect.ValueOf(t)) + return setTime(v, a.Value(i).ToTime(unit)) case arrow.TIME64: a, ok := arr.(*array.Time64) if !ok { return fmt.Errorf("expected *Time64, got %T: %w", arr, ErrTypeMismatch) } - if v.Type() != typeOfTime { - return fmt.Errorf("cannot set time.Time into %s: %w", v.Type(), ErrTypeMismatch) - } unit := arr.DataType().(*arrow.Time64Type).Unit - t := a.Value(i).ToTime(unit) - v.Set(reflect.ValueOf(t)) + return setTime(v, a.Value(i).ToTime(unit)) case arrow.DURATION: a, ok := arr.(*array.Duration) @@ -305,11 +265,6 @@ func setTemporalValue(v reflect.Value, arr arrow.Array, i int) error { } func setDecimalValue(v reflect.Value, arr arrow.Array, i int) error { - if v.Kind() == reflect.Ptr { - v.Set(reflect.New(v.Type().Elem())) - v = v.Elem() - } - switch arr.DataType().ID() { case arrow.DECIMAL128: a, ok := arr.(*array.Decimal128) @@ -360,11 +315,6 @@ func setDecimalValue(v reflect.Value, arr arrow.Array, i int) error { } func setStructValue(v reflect.Value, sa *array.Struct, i int) error { - if v.Kind() == reflect.Ptr { - v.Set(reflect.New(v.Type().Elem())) - v = v.Elem() - } - if v.Kind() != reflect.Struct { return fmt.Errorf("cannot set struct into %s: %w", v.Type(), ErrTypeMismatch) } @@ -385,11 +335,6 @@ func setStructValue(v reflect.Value, sa *array.Struct, i int) error { } func setListValue(v reflect.Value, arr array.ListLike, i int) error { - if v.Kind() == reflect.Ptr { - v.Set(reflect.New(v.Type().Elem())) - v = v.Elem() - } - if v.Kind() != reflect.Slice { return fmt.Errorf("cannot set list into %s: %w", v.Type(), ErrTypeMismatch) } @@ -409,11 +354,6 @@ func setListValue(v reflect.Value, arr array.ListLike, i int) error { } func setMapValue(v reflect.Value, arr *array.Map, i int) error { - if v.Kind() == reflect.Ptr { - v.Set(reflect.New(v.Type().Elem())) - v = v.Elem() - } - if v.Kind() != reflect.Map { return fmt.Errorf("cannot set map into %s: %w", v.Type(), ErrTypeMismatch) } @@ -441,11 +381,6 @@ func setMapValue(v reflect.Value, arr *array.Map, i int) error { } func setFixedSizeListValue(v reflect.Value, arr *array.FixedSizeList, i int) error { - if v.Kind() == reflect.Ptr { - v.Set(reflect.New(v.Type().Elem())) - v = v.Elem() - } - n := int(arr.DataType().(*arrow.FixedSizeListType).Len()) child := arr.ListValues() start, _ := arr.ValueOffsets(i) diff --git a/arrow/arreflect/reflect_infer_test.go b/arrow/arreflect/reflect_infer_test.go index eb0e8d41..d5449b42 100644 --- a/arrow/arreflect/reflect_infer_test.go +++ b/arrow/arreflect/reflect_infer_test.go @@ -481,7 +481,7 @@ func TestInferArrowSchemaStructFieldEncoding(t *testing.T) { }) } -func TestGoTypeOf(t *testing.T) { +func TestInferGoType(t *testing.T) { primitives := []struct { dt arrow.DataType want reflect.Type diff --git a/arrow/arreflect/reflect_public_test.go b/arrow/arreflect/reflect_public_test.go index 65a5b55b..f0ed4407 100644 --- a/arrow/arreflect/reflect_public_test.go +++ b/arrow/arreflect/reflect_public_test.go @@ -26,8 +26,10 @@ import ( "github.com/apache/arrow-go/v18/arrow/memory" ) +func testMem() memory.Allocator { return memory.NewGoAllocator() } + func TestToGo(t *testing.T) { - mem := memory.NewGoAllocator() + mem := testMem() t.Run("int32 element 0", func(t *testing.T) { b := array.NewInt32Builder(mem) @@ -116,7 +118,7 @@ func TestToGo(t *testing.T) { } func TestToGoSlice(t *testing.T) { - mem := memory.NewGoAllocator() + mem := testMem() t.Run("[]int32", func(t *testing.T) { b := array.NewInt32Builder(mem) @@ -207,7 +209,7 @@ func TestToGoSlice(t *testing.T) { } func TestFromGoSlice(t *testing.T) { - mem := memory.NewGoAllocator() + mem := testMem() t.Run("[]int32", func(t *testing.T) { arr, err := FromSlice([]int32{1, 2, 3}, mem) @@ -431,7 +433,7 @@ func TestRecordFromSlice(t *testing.T) { }) } -func TestGetAny(t *testing.T) { +func TestAtAny(t *testing.T) { mem := memory.NewGoAllocator() b := array.NewInt32Builder(mem) defer b.Release() @@ -440,20 +442,20 @@ func TestGetAny(t *testing.T) { arr := b.NewArray() defer arr.Release() - got, err := GetAny(arr, 0) + got, err := AtAny(arr, 0) if err != nil { - t.Fatalf("GetAny(0): %v", err) + t.Fatalf("AtAny(0): %v", err) } if v, ok := got.(int32); !ok || v != 42 { - t.Errorf("GetAny(0) = %v (%T), want int32(42)", got, got) + t.Errorf("AtAny(0) = %v (%T), want int32(42)", got, got) } - got, err = GetAny(arr, 1) + got, err = AtAny(arr, 1) if err != nil { - t.Fatalf("GetAny(1): %v", err) + t.Fatalf("AtAny(1): %v", err) } if v, ok := got.(int32); !ok || v != 0 { - t.Errorf("GetAny(1) = %v, want int32(0)", got) + t.Errorf("AtAny(1) = %v, want int32(0)", got) } } @@ -531,7 +533,37 @@ func TestErrSentinels(t *testing.T) { }) } -func TestGetAnyComposite(t *testing.T) { +func TestRecordAt(t *testing.T) { + mem := memory.NewGoAllocator() + type Row struct { + Name string `arrow:"name"` + Score float64 `arrow:"score"` + } + rows := []Row{{"alice", 9.5}, {"bob", 7.0}} + rec, err := RecordFromSlice(rows, mem) + if err != nil { + t.Fatalf("RecordFromSlice: %v", err) + } + defer rec.Release() + + got, err := RecordAt[Row](rec, 0) + if err != nil { + t.Fatalf("RecordAt(0): %v", err) + } + if got != rows[0] { + t.Errorf("RecordAt(0) = %v, want %v", got, rows[0]) + } + + got, err = RecordAt[Row](rec, 1) + if err != nil { + t.Fatalf("RecordAt(1): %v", err) + } + if got != rows[1] { + t.Errorf("RecordAt(1) = %v, want %v", got, rows[1]) + } +} + +func TestAtAnyComposite(t *testing.T) { mem := memory.NewGoAllocator() t.Run("struct", func(t *testing.T) { @@ -547,9 +579,9 @@ func TestGetAnyComposite(t *testing.T) { arr := sb.NewArray() defer arr.Release() - got, err := GetAny(arr, 0) + got, err := AtAny(arr, 0) if err != nil { - t.Fatalf("GetAny: %v", err) + t.Fatalf("AtAny: %v", err) } v := reflect.ValueOf(got) @@ -592,9 +624,9 @@ func TestGetAnyComposite(t *testing.T) { arr := lb.NewArray() defer arr.Release() - got, err := GetAny(arr, 0) + got, err := AtAny(arr, 0) if err != nil { - t.Fatalf("GetAny: %v", err) + t.Fatalf("AtAny: %v", err) } v := reflect.ValueOf(got) @@ -618,9 +650,9 @@ func TestGetAnyComposite(t *testing.T) { arr := mb.NewArray() defer arr.Release() - got, err := GetAny(arr, 0) + got, err := AtAny(arr, 0) if err != nil { - t.Fatalf("GetAny: %v", err) + t.Fatalf("AtAny: %v", err) } v := reflect.ValueOf(got) From 86d41a85ff7704591d74246d4072a0c7ebf9dfc1 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 14 Apr 2026 12:40:16 -0400 Subject: [PATCH 22/82] refactor(arreflect): address review findings, reduce duplication, add RecordToAnySlice Code review fixes (job 879): - FromSlice empty-slice path now applies all encoding opts (decimal, temporal, dict, listview, ree) instead of only temporal/dict - WithTemporal validates input, returns ErrUnsupportedType on invalid values - Add TestRecordAtAny and TestRecordToAnySlice for untested API surface API (job 881): - Add RecordToAnySlice for Record{Func} symmetry with ToAnySlice Duplication (job 880): - Extract asTime/asDuration helpers, eliminating 13 identical TypeAssert blocks - Collapse 4 list-like builder cases in appendValue into appendListElement Refactoring (job 883): - Split getStructFields into collectFieldCandidates + resolveFieldCandidates - Replace fragile decimal comma-reassembly with paren-aware splitTagTokens - Cache 12 primitive reflect.Type vars, replace inline reflect.TypeOf calls - Use listLike interface for Elem() in InferGoType (replaces 4-way switch) Test quality (job 882): - Extract setValueAt[T] generic helper, replacing 24 boilerplate blocks - Consolidate testMem() usage across public and integration tests --- arrow/arreflect/reflect.go | 150 ++++++++++------- arrow/arreflect/reflect_arrow_to_go_test.go | 124 ++++---------- arrow/arreflect/reflect_go_to_arrow.go | 173 ++++++++++---------- arrow/arreflect/reflect_infer.go | 75 +++++---- arrow/arreflect/reflect_integration_test.go | 7 +- arrow/arreflect/reflect_public_test.go | 90 +++++++++- 6 files changed, 336 insertions(+), 283 deletions(-) diff --git a/arrow/arreflect/reflect.go b/arrow/arreflect/reflect.go index 51e2d2d4..67b33151 100644 --- a/arrow/arreflect/reflect.go +++ b/arrow/arreflect/reflect.go @@ -79,42 +79,35 @@ func parseTag(tag string) tagOpts { return opts } -func parseOptions(opts *tagOpts, rest string) { - for len(rest) > 0 { - var token string - if idx := strings.Index(rest, ","); idx >= 0 { - token = rest[:idx] - rest = rest[idx+1:] - } else { - token = rest - rest = "" +func splitTagTokens(rest string) []string { + var tokens []string + depth := 0 + start := 0 + for i := 0; i < len(rest); i++ { + switch rest[i] { + case '(': + depth++ + case ')': + depth-- + case ',': + if depth == 0 { + tokens = append(tokens, strings.TrimSpace(rest[start:i])) + start = i + 1 + } } - token = strings.TrimSpace(token) + } + if start < len(rest) { + tokens = append(tokens, strings.TrimSpace(rest[start:])) + } + return tokens +} - if strings.HasPrefix(token, "decimal(") { - if strings.HasSuffix(token, ")") { - parseDecimalOpt(opts, token) - continue - } - next := token - for len(rest) > 0 { - var part string - if idx := strings.Index(rest, ","); idx >= 0 { - part = rest[:idx] - rest = rest[idx+1:] - } else { - part = rest - rest = "" - } - next = next + "," + strings.TrimSpace(part) - if strings.HasSuffix(next, ")") { - break - } - } - parseDecimalOpt(opts, next) +func parseOptions(opts *tagOpts, rest string) { + for _, token := range splitTagTokens(rest) { + if strings.HasPrefix(token, "decimal(") && strings.HasSuffix(token, ")") { + parseDecimalOpt(opts, token) continue } - switch token { case "dict": opts.Dict = true @@ -143,28 +136,25 @@ func parseDecimalOpt(opts *tagOpts, token string) { } } -func getStructFields(t reflect.Type) []fieldMeta { - for t.Kind() == reflect.Ptr { - t = t.Elem() - } - - if t.Kind() != reflect.Struct { - return nil - } +type bfsEntry struct { + t reflect.Type + index []int + depth int +} - type bfsEntry struct { - t reflect.Type - index []int - depth int - } +type candidate struct { + meta fieldMeta + depth int + tagged bool + order int +} - type candidate struct { - meta fieldMeta - depth int - tagged bool - order int - } +type resolvedField struct { + meta fieldMeta + order int +} +func collectFieldCandidates(t reflect.Type) map[string][]candidate { nameMap := make(map[string][]candidate) orderCounter := 0 @@ -242,7 +232,6 @@ func getStructFields(t reflect.Type) []fieldMeta { Opts: opts, } - // Assign insertion order on first encounter of this name. existingCands := nameMap[arrowName] order := orderCounter if len(existingCands) > 0 { @@ -260,11 +249,10 @@ func getStructFields(t reflect.Type) []fieldMeta { } } - type resolvedField struct { - meta fieldMeta - order int - } + return nameMap +} +func resolveFieldCandidates(nameMap map[string][]candidate) []fieldMeta { resolved := make([]resolvedField, 0, len(nameMap)) for _, candidates := range nameMap { minDepth := candidates[0].depth @@ -312,6 +300,18 @@ func getStructFields(t reflect.Type) []fieldMeta { return result } +func getStructFields(t reflect.Type) []fieldMeta { + for t.Kind() == reflect.Ptr { + t = t.Elem() + } + + if t.Kind() != reflect.Struct { + return nil + } + + return resolveFieldCandidates(collectFieldCandidates(t)) +} + var structFieldCache sync.Map func cachedStructFields(t reflect.Type) []fieldMeta { @@ -374,10 +374,22 @@ func WithDecimal(precision, scale int32) Option { // WithTemporal overrides the Arrow temporal encoding for time.Time slices. // Valid values: "date32", "date64", "time32", "time64", "timestamp" (default). // Equivalent to tagging a struct field with arrow:",date32" etc. +// Invalid values cause FromSlice to return an error. func WithTemporal(temporal string) Option { return func(o *tagOpts) { o.Temporal = temporal } } +var validTemporalOpts = map[string]bool{ + "": true, "timestamp": true, "date32": true, "date64": true, "time32": true, "time64": true, +} + +func validateTemporalOpt(temporal string) error { + if !validTemporalOpts[temporal] { + return fmt.Errorf("arreflect: invalid WithTemporal value %q; valid values are date32, date64, time32, time64, timestamp: %w", temporal, ErrUnsupportedType) + } + return nil +} + func FromSlice[T any](vals []T, mem memory.Allocator, opts ...Option) (arrow.Array, error) { if mem == nil { mem = memory.DefaultAllocator @@ -386,12 +398,25 @@ func FromSlice[T any](vals []T, mem memory.Allocator, opts ...Option) (arrow.Arr for _, o := range opts { o(&tOpts) } + if err := validateTemporalOpt(tOpts.Temporal); err != nil { + return nil, err + } if len(vals) == 0 { - dt, err := inferArrowType(reflect.TypeFor[T]()) + goType := reflect.TypeFor[T]() + dt, err := inferArrowType(goType) if err != nil { return nil, err } - dt = applyTemporalOpts(dt, reflect.TypeFor[T](), tOpts) + dt = applyDecimalOpts(dt, goType, tOpts) + dt = applyTemporalOpts(dt, goType, tOpts) + if tOpts.ListView { + if lt, ok := dt.(*arrow.ListType); ok { + dt = arrow.ListViewOf(lt.Elem()) + } + } + if tOpts.REE { + dt = arrow.RunEndEncodedOf(arrow.PrimitiveTypes.Int32, dt) + } if tOpts.Dict { dt = &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: dt} } @@ -439,6 +464,15 @@ func RecordAtAny(rec arrow.Record, i int) (any, error) { return AtAny(sa, i) } +// RecordToAnySlice converts all rows of an Arrow Record to Go values, +// inferring the Go type at runtime via [InferGoType]. +// Equivalent to ToAnySlice on the struct array underlying the record. +func RecordToAnySlice(rec arrow.Record) ([]any, error) { + sa := array.RecordToStructArray(rec) + defer sa.Release() + return ToAnySlice(sa) +} + // AtAny converts a single element at index i of an Arrow array to a Go value, // inferring the Go type from the Arrow DataType at runtime via [InferGoType]. // Useful when the column type is not known at compile time. diff --git a/arrow/arreflect/reflect_arrow_to_go_test.go b/arrow/arreflect/reflect_arrow_to_go_test.go index 344dc2f8..dda3c384 100644 --- a/arrow/arreflect/reflect_arrow_to_go_test.go +++ b/arrow/arreflect/reflect_arrow_to_go_test.go @@ -29,6 +29,15 @@ import ( "github.com/apache/arrow-go/v18/arrow/memory" ) +func setValueAt[T any](t *testing.T, arr arrow.Array, i int) T { + t.Helper() + var got T + if err := setValue(reflect.ValueOf(&got).Elem(), arr, i); err != nil { + t.Fatal(err) + } + return got +} + func TestSetValue(t *testing.T) { mem := memory.NewGoAllocator() @@ -40,10 +49,7 @@ func TestSetValue(t *testing.T) { arr := b.NewBooleanArray() defer arr.Release() - var got bool - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { - t.Fatal(err) - } + got := setValueAt[bool](t, arr, 0) if !got { t.Errorf("expected true, got false") } @@ -64,10 +70,7 @@ func TestSetValue(t *testing.T) { arr := b.NewStringArray() defer arr.Release() - var got string - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { - t.Fatal(err) - } + got := setValueAt[string](t, arr, 0) if got != "hello" { t.Errorf("expected hello, got %q", got) } @@ -80,10 +83,7 @@ func TestSetValue(t *testing.T) { arr := b.NewBinaryArray() defer arr.Release() - var got []byte - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { - t.Fatal(err) - } + got := setValueAt[[]byte](t, arr, 0) if string(got) != "data" { t.Errorf("expected data, got %q", got) } @@ -111,10 +111,7 @@ func TestSetValue(t *testing.T) { arr := b.NewStringArray() defer arr.Release() - var got *string - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { - t.Fatal(err) - } + got := setValueAt[*string](t, arr, 0) if got == nil || *got != "ptr" { t.Errorf("expected ptr, got %v", got) } @@ -140,10 +137,7 @@ func TestSetPrimitiveValue(t *testing.T) { arr := b.NewInt32Array() defer arr.Release() - var got int32 - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { - t.Fatal(err) - } + got := setValueAt[int32](t, arr, 0) if got != 42 { t.Errorf("expected 42, got %d", got) } @@ -232,10 +226,7 @@ func TestSetTemporalValue(t *testing.T) { arr := b.NewArray().(*array.Timestamp) defer arr.Release() - var got time.Time - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { - t.Fatal(err) - } + got := setValueAt[time.Time](t, arr, 0) if !got.Equal(now) { t.Errorf("expected %v, got %v", now, got) } @@ -248,10 +239,7 @@ func TestSetTemporalValue(t *testing.T) { arr := b.NewArray().(*array.Date32) defer arr.Release() - var got time.Time - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { - t.Fatal(err) - } + got := setValueAt[time.Time](t, arr, 0) expected := arrow.Date32(19000).ToTime() if !got.Equal(expected) { t.Errorf("expected %v, got %v", expected, got) @@ -266,10 +254,7 @@ func TestSetTemporalValue(t *testing.T) { arr := b.NewArray().(*array.Duration) defer arr.Release() - var got time.Duration - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { - t.Fatal(err) - } + got := setValueAt[time.Duration](t, arr, 0) expected := 5 * time.Second if got != expected { t.Errorf("expected %v, got %v", expected, got) @@ -284,10 +269,7 @@ func TestSetTemporalValue(t *testing.T) { arr := b.NewArray().(*array.Timestamp) defer arr.Release() - var got *time.Time - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { - t.Fatal(err) - } + got := setValueAt[*time.Time](t, arr, 0) if got != nil { t.Errorf("expected nil for null timestamp pointer") } @@ -346,18 +328,12 @@ func TestSetDecimalValue(t *testing.T) { arr := b.NewDecimal128Array() defer arr.Release() - var got decimal128.Num - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { - t.Fatal(err) - } + got := setValueAt[decimal128.Num](t, arr, 0) if got != num { t.Errorf("expected %v, got %v", num, got) } - var gotPtr *decimal128.Num - if err := setValue(reflect.ValueOf(&gotPtr).Elem(), arr, 1); err != nil { - t.Fatal(err) - } + gotPtr := setValueAt[*decimal128.Num](t, arr, 1) if gotPtr != nil { t.Errorf("expected nil for null decimal128") } @@ -372,10 +348,7 @@ func TestSetDecimalValue(t *testing.T) { arr := b.NewDecimal256Array() defer arr.Release() - var got decimal256.Num - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { - t.Fatal(err) - } + got := setValueAt[decimal256.Num](t, arr, 0) if got != num { t.Errorf("expected %v, got %v", num, got) } @@ -391,18 +364,12 @@ func TestSetDecimalValue(t *testing.T) { arr := b.NewArray().(*array.Decimal32) defer arr.Release() - var got decimal.Decimal32 - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { - t.Fatal(err) - } + got := setValueAt[decimal.Decimal32](t, arr, 0) if got != num { t.Errorf("expected %v, got %v", num, got) } - var gotPtr *decimal.Decimal32 - if err := setValue(reflect.ValueOf(&gotPtr).Elem(), arr, 1); err != nil { - t.Fatal(err) - } + gotPtr := setValueAt[*decimal.Decimal32](t, arr, 1) if gotPtr != nil { t.Errorf("expected nil for null decimal32") } @@ -417,10 +384,7 @@ func TestSetDecimalValue(t *testing.T) { arr := b.NewArray().(*array.Decimal64) defer arr.Release() - var got decimal.Decimal64 - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { - t.Fatal(err) - } + got := setValueAt[decimal.Decimal64](t, arr, 0) if got != num { t.Errorf("expected %v, got %v", num, got) } @@ -564,10 +528,7 @@ func TestSetListValue(t *testing.T) { arr := lb.NewListArray() defer arr.Release() - var got []int32 - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { - t.Fatal(err) - } + got := setValueAt[[]int32](t, arr, 0) if !reflect.DeepEqual(got, []int32{1, 2, 3}) { t.Errorf("expected [1,2,3], got %v", got) } @@ -641,10 +602,7 @@ func TestSetListValue(t *testing.T) { arr := lvb.NewLargeListViewArray() defer arr.Release() - var got []int32 - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { - t.Fatal(err) - } + got := setValueAt[[]int32](t, arr, 0) if !reflect.DeepEqual(got, []int32{1, 2}) { t.Errorf("row 0: expected [1,2], got %v", got) } @@ -683,10 +641,7 @@ func TestSetMapValue(t *testing.T) { arr := mb.NewMapArray() defer arr.Release() - var got map[string]int32 - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { - t.Fatal(err) - } + got := setValueAt[map[string]int32](t, arr, 0) if got["a"] != 1 || got["b"] != 2 { t.Errorf("expected {a:1, b:2}, got %v", got) } @@ -724,10 +679,7 @@ func TestSetFixedSizeListValue(t *testing.T) { arr := b.NewArray().(*array.FixedSizeList) defer arr.Release() - var got [3]int32 - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { - t.Fatal(err) - } + got := setValueAt[[3]int32](t, arr, 0) if got != [3]int32{10, 20, 30} { t.Errorf("expected [10,20,30], got %v", got) } @@ -759,10 +711,7 @@ func TestSetFixedSizeListValue(t *testing.T) { arr := b.NewArray().(*array.FixedSizeList) defer arr.Release() - var got []int32 - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { - t.Fatal(err) - } + got := setValueAt[[]int32](t, arr, 0) if !reflect.DeepEqual(got, []int32{7, 8}) { t.Errorf("expected [7,8], got %v", got) } @@ -803,10 +752,7 @@ func TestSetDictionaryValue(t *testing.T) { arr := bldr.NewDictionaryArray() defer arr.Release() - var got string - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { - t.Fatal(err) - } + got := setValueAt[string](t, arr, 0) if got != "foo" { t.Errorf("expected foo, got %q", got) } @@ -825,10 +771,7 @@ func TestSetDictionaryValue(t *testing.T) { t.Errorf("expected foo, got %q", got) } - var gotPtr *string - if err := setValue(reflect.ValueOf(&gotPtr).Elem(), arr, 3); err != nil { - t.Fatal(err) - } + gotPtr := setValueAt[*string](t, arr, 3) if gotPtr != nil { t.Errorf("expected nil for null dictionary entry") } @@ -851,10 +794,7 @@ func TestSetRunEndEncodedValue(t *testing.T) { arr := b.NewRunEndEncodedArray() defer arr.Release() - var got string - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { - t.Fatal(err) - } + got := setValueAt[string](t, arr, 0) if got != "aaa" { t.Errorf("expected aaa at logical 0, got %q", got) } diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index afe196bb..b1e36008 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -138,9 +138,9 @@ func appendPrimitiveValue(b array.Builder, v reflect.Value, dt arrow.DataType) e case arrow.BINARY: b.(*array.BinaryBuilder).Append(v.Bytes()) case arrow.DURATION: - d, ok := reflect.TypeAssert[time.Duration](v) - if !ok { - return fmt.Errorf("expected time.Duration, got %s: %w", v.Type(), ErrTypeMismatch) + d, err := asDuration(v) + if err != nil { + return err } b.(*array.DurationBuilder).Append(arrow.Duration(d.Nanoseconds())) case arrow.DECIMAL128: @@ -171,6 +171,22 @@ func timeOfDayNanos(t time.Time) int64 { return t.Sub(midnight).Nanoseconds() } +func asTime(v reflect.Value) (time.Time, error) { + t, ok := reflect.TypeAssert[time.Time](v) + if !ok { + return time.Time{}, fmt.Errorf("expected time.Time, got %s: %w", v.Type(), ErrTypeMismatch) + } + return t, nil +} + +func asDuration(v reflect.Value) (time.Duration, error) { + d, ok := reflect.TypeAssert[time.Duration](v) + if !ok { + return 0, fmt.Errorf("expected time.Duration, got %s: %w", v.Type(), ErrTypeMismatch) + } + return d, nil +} + func derefSliceElem(vals reflect.Value) (elemType reflect.Type, isPtr bool) { elemType = vals.Type().Elem() isPtr = elemType.Kind() == reflect.Ptr @@ -218,9 +234,9 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) defer b.Release() b.Reserve(vals.Len()) if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { - t, ok := reflect.TypeAssert[time.Time](v) - if !ok { - return fmt.Errorf("expected time.Time, got %s: %w", v.Type(), ErrTypeMismatch) + t, err := asTime(v) + if err != nil { + return err } b.Append(arrow.Date32FromTime(t)) return nil @@ -233,9 +249,9 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) defer b.Release() b.Reserve(vals.Len()) if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { - t, ok := reflect.TypeAssert[time.Time](v) - if !ok { - return fmt.Errorf("expected time.Time, got %s: %w", v.Type(), ErrTypeMismatch) + t, err := asTime(v) + if err != nil { + return err } b.Append(arrow.Date64FromTime(t)) return nil @@ -249,9 +265,9 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) defer b.Release() b.Reserve(vals.Len()) if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { - t, ok := reflect.TypeAssert[time.Time](v) - if !ok { - return fmt.Errorf("expected time.Time, got %s: %w", v.Type(), ErrTypeMismatch) + t, err := asTime(v) + if err != nil { + return err } b.Append(arrow.Time32(timeOfDayNanos(t) / int64(dt.Unit.Multiplier()))) return nil @@ -265,9 +281,9 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) defer b.Release() b.Reserve(vals.Len()) if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { - t, ok := reflect.TypeAssert[time.Time](v) - if !ok { - return fmt.Errorf("expected time.Time, got %s: %w", v.Type(), ErrTypeMismatch) + t, err := asTime(v) + if err != nil { + return err } b.Append(arrow.Time64(timeOfDayNanos(t) / int64(dt.Unit.Multiplier()))) return nil @@ -281,9 +297,9 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) defer tb.Release() tb.Reserve(vals.Len()) if err := iterSlice(vals, isPtr, tb.AppendNull, func(v reflect.Value) error { - t, ok := reflect.TypeAssert[time.Time](v) - if !ok { - return fmt.Errorf("expected time.Time, got %s: %w", v.Type(), ErrTypeMismatch) + t, err := asTime(v) + if err != nil { + return err } tb.Append(arrow.Timestamp(t.UnixNano())) return nil @@ -299,9 +315,9 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) defer db.Release() db.Reserve(vals.Len()) if err := iterSlice(vals, isPtr, db.AppendNull, func(v reflect.Value) error { - d, ok := reflect.TypeAssert[time.Duration](v) - if !ok { - return fmt.Errorf("expected time.Duration, got %s: %w", v.Type(), ErrTypeMismatch) + d, err := asDuration(v) + if err != nil { + return err } db.Append(arrow.Duration(d.Nanoseconds())) return nil @@ -437,41 +453,41 @@ func buildStructArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, er func appendTemporalValue(b array.Builder, v reflect.Value) error { switch tb := b.(type) { case *array.TimestampBuilder: - t, ok := reflect.TypeAssert[time.Time](v) - if !ok { - return fmt.Errorf("expected time.Time, got %s: %w", v.Type(), ErrTypeMismatch) + t, err := asTime(v) + if err != nil { + return err } tb.Append(arrow.Timestamp(t.UnixNano())) case *array.Date32Builder: - t, ok := reflect.TypeAssert[time.Time](v) - if !ok { - return fmt.Errorf("expected time.Time, got %s: %w", v.Type(), ErrTypeMismatch) + t, err := asTime(v) + if err != nil { + return err } tb.Append(arrow.Date32FromTime(t)) case *array.Date64Builder: - t, ok := reflect.TypeAssert[time.Time](v) - if !ok { - return fmt.Errorf("expected time.Time, got %s: %w", v.Type(), ErrTypeMismatch) + t, err := asTime(v) + if err != nil { + return err } tb.Append(arrow.Date64FromTime(t)) case *array.Time32Builder: unit := tb.Type().(*arrow.Time32Type).Unit - t, ok := reflect.TypeAssert[time.Time](v) - if !ok { - return fmt.Errorf("expected time.Time, got %s: %w", v.Type(), ErrTypeMismatch) + t, err := asTime(v) + if err != nil { + return err } tb.Append(arrow.Time32(timeOfDayNanos(t) / int64(unit.Multiplier()))) case *array.Time64Builder: unit := tb.Type().(*arrow.Time64Type).Unit - t, ok := reflect.TypeAssert[time.Time](v) - if !ok { - return fmt.Errorf("expected time.Time, got %s: %w", v.Type(), ErrTypeMismatch) + t, err := asTime(v) + if err != nil { + return err } tb.Append(arrow.Time64(timeOfDayNanos(t) / int64(unit.Multiplier()))) case *array.DurationBuilder: - d, ok := reflect.TypeAssert[time.Duration](v) - if !ok { - return fmt.Errorf("expected time.Duration, got %s: %w", v.Type(), ErrTypeMismatch) + d, err := asDuration(v) + if err != nil { + return err } tb.Append(arrow.Duration(d.Nanoseconds())) default: @@ -549,18 +565,8 @@ func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { return appendTemporalValue(b, v) case *array.Decimal128Builder, *array.Decimal256Builder, *array.Decimal32Builder, *array.Decimal64Builder: return appendDecimalValue(b, v) - case *array.ListBuilder: - if v.Kind() == reflect.Slice && v.IsNil() { - tb.AppendNull() - } else { - tb.Append(true) - vb := tb.ValueBuilder() - for i := 0; i < v.Len(); i++ { - if err := appendValue(vb, v.Index(i), tagOpts{}); err != nil { - return err - } - } - } + case *array.ListBuilder, *array.LargeListBuilder, *array.ListViewBuilder, *array.LargeListViewBuilder: + return appendListElement(b, v) case *array.FixedSizeListBuilder: expectedLen := int(tb.Type().(*arrow.FixedSizeListType).Len()) if v.Len() != expectedLen { @@ -600,42 +606,6 @@ func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { return fmt.Errorf("struct field %q: %w", fm.Name, err) } } - case *array.ListViewBuilder: - if v.Kind() == reflect.Slice && v.IsNil() { - tb.AppendNull() - } else { - tb.AppendWithSize(true, v.Len()) - vb := tb.ValueBuilder() - for i := 0; i < v.Len(); i++ { - if err := appendValue(vb, v.Index(i), tagOpts{}); err != nil { - return err - } - } - } - case *array.LargeListBuilder: - if v.Kind() == reflect.Slice && v.IsNil() { - tb.AppendNull() - } else { - tb.Append(true) - vb := tb.ValueBuilder() - for i := 0; i < v.Len(); i++ { - if err := appendValue(vb, v.Index(i), tagOpts{}); err != nil { - return err - } - } - } - case *array.LargeListViewBuilder: - if v.Kind() == reflect.Slice && v.IsNil() { - tb.AppendNull() - } else { - tb.AppendWithSize(true, v.Len()) - vb := tb.ValueBuilder() - for i := 0; i < v.Len(); i++ { - if err := appendValue(vb, v.Index(i), tagOpts{}); err != nil { - return err - } - } - } case *array.RunEndEncodedBuilder: if v.Kind() == reflect.Ptr && v.IsNil() { tb.AppendNull() @@ -698,6 +668,35 @@ type listBuilderLike interface { ValueBuilder() array.Builder } +func appendListElement(b array.Builder, v reflect.Value) error { + type listAppender interface { + AppendNull() + ValueBuilder() array.Builder + } + la := b.(listAppender) + if v.Kind() == reflect.Slice && v.IsNil() { + la.AppendNull() + return nil + } + switch lb := b.(type) { + case *array.ListViewBuilder: + lb.AppendWithSize(true, v.Len()) + case *array.LargeListViewBuilder: + lb.AppendWithSize(true, v.Len()) + case *array.ListBuilder: + lb.Append(true) + case *array.LargeListBuilder: + lb.Append(true) + } + vb := la.ValueBuilder() + for i := 0; i < v.Len(); i++ { + if err := appendValue(vb, v.Index(i), tagOpts{}); err != nil { + return err + } + } + return nil +} + func buildListLikeArray(vals reflect.Value, mem memory.Allocator, isView bool) (arrow.Array, error) { elemDT, isOuterPtr, err := inferListElemDT(vals) if err != nil { diff --git a/arrow/arreflect/reflect_infer.go b/arrow/arreflect/reflect_infer.go index d1578637..b49855d5 100644 --- a/arrow/arreflect/reflect_infer.go +++ b/arrow/arreflect/reflect_infer.go @@ -38,6 +38,18 @@ var ( typeOfByteSlice = reflect.TypeOf([]byte{}) typeOfInt = reflect.TypeOf(int(0)) typeOfUint = reflect.TypeOf(uint(0)) + typeOfInt8 = reflect.TypeOf(int8(0)) + typeOfInt16 = reflect.TypeOf(int16(0)) + typeOfInt32 = reflect.TypeOf(int32(0)) + typeOfInt64 = reflect.TypeOf(int64(0)) + typeOfUint8 = reflect.TypeOf(uint8(0)) + typeOfUint16 = reflect.TypeOf(uint16(0)) + typeOfUint32 = reflect.TypeOf(uint32(0)) + typeOfUint64 = reflect.TypeOf(uint64(0)) + typeOfFloat32 = reflect.TypeOf(float32(0)) + typeOfFloat64 = reflect.TypeOf(float64(0)) + typeOfBool = reflect.TypeOf(false) + typeOfString = reflect.TypeOf("") ) const ( @@ -53,33 +65,33 @@ func inferPrimitiveArrowType(t reflect.Type) (arrow.DataType, error) { } switch t { - case reflect.TypeOf(int8(0)): + case typeOfInt8: return arrow.PrimitiveTypes.Int8, nil - case reflect.TypeOf(int16(0)): + case typeOfInt16: return arrow.PrimitiveTypes.Int16, nil - case reflect.TypeOf(int32(0)): + case typeOfInt32: return arrow.PrimitiveTypes.Int32, nil - case reflect.TypeOf(int64(0)): + case typeOfInt64: return arrow.PrimitiveTypes.Int64, nil case typeOfInt: return arrow.PrimitiveTypes.Int64, nil - case reflect.TypeOf(uint8(0)): + case typeOfUint8: return arrow.PrimitiveTypes.Uint8, nil - case reflect.TypeOf(uint16(0)): + case typeOfUint16: return arrow.PrimitiveTypes.Uint16, nil - case reflect.TypeOf(uint32(0)): + case typeOfUint32: return arrow.PrimitiveTypes.Uint32, nil - case reflect.TypeOf(uint64(0)): + case typeOfUint64: return arrow.PrimitiveTypes.Uint64, nil case typeOfUint: return arrow.PrimitiveTypes.Uint64, nil - case reflect.TypeOf(float32(0)): + case typeOfFloat32: return arrow.PrimitiveTypes.Float32, nil - case reflect.TypeOf(float64(0)): + case typeOfFloat64: return arrow.PrimitiveTypes.Float64, nil - case reflect.TypeOf(false): + case typeOfBool: return arrow.FixedWidthTypes.Boolean, nil - case reflect.TypeOf(""): + case typeOfString: return arrow.BinaryTypes.String, nil case typeOfByteSlice: return arrow.BinaryTypes.Binary, nil @@ -276,29 +288,29 @@ func InferType[T any]() (arrow.DataType, error) { func InferGoType(dt arrow.DataType) (reflect.Type, error) { switch dt.ID() { case arrow.INT8: - return reflect.TypeOf(int8(0)), nil + return typeOfInt8, nil case arrow.INT16: - return reflect.TypeOf(int16(0)), nil + return typeOfInt16, nil case arrow.INT32: - return reflect.TypeOf(int32(0)), nil + return typeOfInt32, nil case arrow.INT64: - return reflect.TypeOf(int64(0)), nil + return typeOfInt64, nil case arrow.UINT8: - return reflect.TypeOf(uint8(0)), nil + return typeOfUint8, nil case arrow.UINT16: - return reflect.TypeOf(uint16(0)), nil + return typeOfUint16, nil case arrow.UINT32: - return reflect.TypeOf(uint32(0)), nil + return typeOfUint32, nil case arrow.UINT64: - return reflect.TypeOf(uint64(0)), nil + return typeOfUint64, nil case arrow.FLOAT32: - return reflect.TypeOf(float32(0)), nil + return typeOfFloat32, nil case arrow.FLOAT64: - return reflect.TypeOf(float64(0)), nil + return typeOfFloat64, nil case arrow.BOOL: - return reflect.TypeOf(false), nil + return typeOfBool, nil case arrow.STRING, arrow.LARGE_STRING: - return reflect.TypeOf(""), nil + return typeOfString, nil case arrow.BINARY, arrow.LARGE_BINARY: return typeOfByteSlice, nil case arrow.TIMESTAMP, arrow.DATE32, arrow.DATE64, arrow.TIME32, arrow.TIME64: @@ -315,19 +327,12 @@ func InferGoType(dt arrow.DataType) (reflect.Type, error) { return typeOfDec64, nil case arrow.LIST, arrow.LARGE_LIST, arrow.LIST_VIEW, arrow.LARGE_LIST_VIEW: - var elemDT arrow.DataType - switch t := dt.(type) { - case *arrow.ListType: - elemDT = t.Elem() - case *arrow.LargeListType: - elemDT = t.Elem() - case *arrow.ListViewType: - elemDT = t.Elem() - case *arrow.LargeListViewType: - elemDT = t.Elem() - default: + type listLike interface{ Elem() arrow.DataType } + ll, ok := dt.(listLike) + if !ok { return nil, fmt.Errorf("unsupported Arrow type for Go inference: %v: %w", dt, ErrUnsupportedType) } + elemDT := ll.Elem() elemType, err := InferGoType(elemDT) if err != nil { return nil, err diff --git a/arrow/arreflect/reflect_integration_test.go b/arrow/arreflect/reflect_integration_test.go index 574ef7b9..4838d14c 100644 --- a/arrow/arreflect/reflect_integration_test.go +++ b/arrow/arreflect/reflect_integration_test.go @@ -21,7 +21,6 @@ import ( "testing" "github.com/apache/arrow-go/v18/arrow" - "github.com/apache/arrow-go/v18/arrow/memory" ) type integOrderItem struct { @@ -64,7 +63,7 @@ type integExtended struct { } func TestReflectIntegration(t *testing.T) { - mem := memory.NewGoAllocator() + mem := testMem() t.Run("complex nested round-trip", func(t *testing.T) { orders := []integOrder{ @@ -475,7 +474,7 @@ func TestReflectIntegration(t *testing.T) { } func BenchmarkReflectFromGoSlice(b *testing.B) { - mem := memory.NewGoAllocator() + mem := testMem() rows := make([]integLargeRow, 1000) for i := range rows { rows[i] = integLargeRow{X: int32(i), Y: float64(i) * 1.5} @@ -491,7 +490,7 @@ func BenchmarkReflectFromGoSlice(b *testing.B) { } func BenchmarkReflectToGoSlice(b *testing.B) { - mem := memory.NewGoAllocator() + mem := testMem() rows := make([]integLargeRow, 1000) for i := range rows { rows[i] = integLargeRow{X: int32(i), Y: float64(i) * 1.5} diff --git a/arrow/arreflect/reflect_public_test.go b/arrow/arreflect/reflect_public_test.go index f0ed4407..5d5b94b0 100644 --- a/arrow/arreflect/reflect_public_test.go +++ b/arrow/arreflect/reflect_public_test.go @@ -305,7 +305,7 @@ func TestFromGoSlice(t *testing.T) { } func TestRecordToSlice(t *testing.T) { - mem := memory.NewGoAllocator() + mem := testMem() type Row struct { Name string @@ -366,7 +366,7 @@ func TestRecordToSlice(t *testing.T) { } func TestRecordFromSlice(t *testing.T) { - mem := memory.NewGoAllocator() + mem := testMem() type Row struct { Name string @@ -434,7 +434,7 @@ func TestRecordFromSlice(t *testing.T) { } func TestAtAny(t *testing.T) { - mem := memory.NewGoAllocator() + mem := testMem() b := array.NewInt32Builder(mem) defer b.Release() b.Append(42) @@ -460,7 +460,7 @@ func TestAtAny(t *testing.T) { } func TestToAnySlice(t *testing.T) { - mem := memory.NewGoAllocator() + mem := testMem() b := array.NewStringBuilder(mem) defer b.Release() b.Append("hello") @@ -481,7 +481,7 @@ func TestToAnySlice(t *testing.T) { } func TestErrSentinels(t *testing.T) { - mem := memory.NewGoAllocator() + mem := testMem() t.Run("ErrTypeMismatch via setValue wrong kind", func(t *testing.T) { b := array.NewInt32Builder(mem) @@ -534,7 +534,7 @@ func TestErrSentinels(t *testing.T) { } func TestRecordAt(t *testing.T) { - mem := memory.NewGoAllocator() + mem := testMem() type Row struct { Name string `arrow:"name"` Score float64 `arrow:"score"` @@ -563,8 +563,84 @@ func TestRecordAt(t *testing.T) { } } +func TestRecordAtAny(t *testing.T) { + mem := testMem() + type Row struct { + Name string `arrow:"name"` + Score float64 `arrow:"score"` + } + rows := []Row{{"alice", 9.5}, {"bob", 7.0}} + rec, err := RecordFromSlice(rows, mem) + if err != nil { + t.Fatalf("RecordFromSlice: %v", err) + } + defer rec.Release() + + got, err := RecordAtAny(rec, 0) + if err != nil { + t.Fatalf("RecordAtAny(0): %v", err) + } + v := reflect.ValueOf(got) + if v.Kind() != reflect.Struct { + t.Fatalf("expected struct, got %s", v.Kind()) + } + var nameField, scoreField reflect.Value + for i := 0; i < v.NumField(); i++ { + tag := v.Type().Field(i).Tag.Get("arrow") + switch tag { + case "name": + nameField = v.Field(i) + case "score": + scoreField = v.Field(i) + } + } + if nameField.String() != "alice" { + t.Errorf("name = %q, want %q", nameField.String(), "alice") + } + if scoreField.Float() != 9.5 { + t.Errorf("score = %v, want 9.5", scoreField.Float()) + } +} + +func TestRecordToAnySlice(t *testing.T) { + mem := testMem() + type Row struct { + Name string `arrow:"name"` + Score float64 `arrow:"score"` + } + rows := []Row{{"alice", 9.5}, {"bob", 7.0}} + rec, err := RecordFromSlice(rows, mem) + if err != nil { + t.Fatalf("RecordFromSlice: %v", err) + } + defer rec.Release() + + got, err := RecordToAnySlice(rec) + if err != nil { + t.Fatalf("RecordToAnySlice: %v", err) + } + if len(got) != 2 { + t.Fatalf("len = %d, want 2", len(got)) + } + for i, row := range got { + v := reflect.ValueOf(row) + if v.Kind() != reflect.Struct { + t.Fatalf("row %d: expected struct, got %s", i, v.Kind()) + } + var nameField reflect.Value + for fi := 0; fi < v.NumField(); fi++ { + if v.Type().Field(fi).Tag.Get("arrow") == "name" { + nameField = v.Field(fi) + } + } + if nameField.String() != rows[i].Name { + t.Errorf("row %d name = %q, want %q", i, nameField.String(), rows[i].Name) + } + } +} + func TestAtAnyComposite(t *testing.T) { - mem := memory.NewGoAllocator() + mem := testMem() t.Run("struct", func(t *testing.T) { st := arrow.StructOf( From 079eb28d175bae033df098f27a7b376f8e093dab Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 14 Apr 2026 12:45:38 -0400 Subject: [PATCH 23/82] fix(arreflect): guard test reflect.Value, add list builder default, pin ListView empty-slice - TestRecordAtAny: add IsValid() guards on nameField/scoreField to prevent panic if field lookup misses - appendListElement: add default error case to inner switch preventing silent builder corruption on unexpected list builder type - TestFromGoSlice: add empty-slice WithListView pinning test verifying LIST_VIEW type is produced --- arrow/arreflect/reflect_go_to_arrow.go | 2 ++ arrow/arreflect/reflect_public_test.go | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index b1e36008..c52753e7 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -687,6 +687,8 @@ func appendListElement(b array.Builder, v reflect.Value) error { lb.Append(true) case *array.LargeListBuilder: lb.Append(true) + default: + return fmt.Errorf("unexpected list builder type %T: %w", b, ErrUnsupportedType) } vb := la.ValueBuilder() for i := 0; i < v.Len(); i++ { diff --git a/arrow/arreflect/reflect_public_test.go b/arrow/arreflect/reflect_public_test.go index 5d5b94b0..72f392c7 100644 --- a/arrow/arreflect/reflect_public_test.go +++ b/arrow/arreflect/reflect_public_test.go @@ -302,6 +302,18 @@ func TestFromGoSlice(t *testing.T) { t.Errorf("expected len 0, got %d", arr.Len()) } }) + + t.Run("empty slice with WithListView", func(t *testing.T) { + arr, err := FromSlice([][]int32{}, mem, WithListView()) + if err != nil { + t.Fatal(err) + } + defer arr.Release() + + if arr.DataType().ID() != arrow.LIST_VIEW { + t.Errorf("expected LIST_VIEW, got %v", arr.DataType()) + } + }) } func TestRecordToSlice(t *testing.T) { @@ -594,6 +606,12 @@ func TestRecordAtAny(t *testing.T) { scoreField = v.Field(i) } } + if !nameField.IsValid() { + t.Fatal("name field not found") + } + if !scoreField.IsValid() { + t.Fatal("score field not found") + } if nameField.String() != "alice" { t.Errorf("name = %q, want %q", nameField.String(), "alice") } From 1b3cdb7f4b53b493a9ad3070986a94dd874bab73 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 14 Apr 2026 14:16:41 -0400 Subject: [PATCH 24/82] test(arreflect): convert to testify, add godoc examples and struct-array ToAnySlice test - Convert all 6 test files from raw t.Errorf/t.Fatalf to testify assert/require - Add example_test.go with 11 testable examples for godoc: FromSlice (primitive, struct, WithDict, WithDecimal), ToSlice, RecordFromSlice, RecordToSlice, At, InferSchema, ToAnySlice (basic + nullable fields) - Add TestToAnySliceStructArray covering InferGoType -> reflect.StructOf -> ToAnySlice pipeline with nullable fields --- arrow/arreflect/example_test.go | 312 +++++++++++ arrow/arreflect/reflect_arrow_to_go_test.go | 339 +++--------- arrow/arreflect/reflect_go_to_arrow_test.go | 581 ++++++-------------- arrow/arreflect/reflect_infer_test.go | 363 ++++-------- arrow/arreflect/reflect_integration_test.go | 265 +++------ arrow/arreflect/reflect_public_test.go | 502 +++++++---------- arrow/arreflect/reflect_test.go | 91 +-- 7 files changed, 945 insertions(+), 1508 deletions(-) create mode 100644 arrow/arreflect/example_test.go diff --git a/arrow/arreflect/example_test.go b/arrow/arreflect/example_test.go new file mode 100644 index 00000000..95346f63 --- /dev/null +++ b/arrow/arreflect/example_test.go @@ -0,0 +1,312 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package arreflect_test + +import ( + "fmt" + "reflect" + + "github.com/apache/arrow-go/v18/arrow" + "github.com/apache/arrow-go/v18/arrow/array" + "github.com/apache/arrow-go/v18/arrow/arreflect" + "github.com/apache/arrow-go/v18/arrow/decimal128" + "github.com/apache/arrow-go/v18/arrow/memory" +) + +func ExampleFromSlice() { + mem := memory.NewGoAllocator() + + arr, err := arreflect.FromSlice([]int32{10, 20, 30}, mem) + if err != nil { + panic(err) + } + defer arr.Release() + + fmt.Println("Type:", arr.DataType()) + fmt.Println("Len:", arr.Len()) + for i := 0; i < arr.Len(); i++ { + fmt.Println(arr.(*array.Int32).Value(i)) + } + // Output: + // Type: int32 + // Len: 3 + // 10 + // 20 + // 30 +} + +func ExampleFromSlice_structSlice() { + mem := memory.NewGoAllocator() + + type Row struct { + Name string `arrow:"name"` + Score float64 `arrow:"score"` + } + + arr, err := arreflect.FromSlice([]Row{ + {"alice", 9.5}, + {"bob", 7.0}, + }, mem) + if err != nil { + panic(err) + } + defer arr.Release() + + sa := arr.(*array.Struct) + fmt.Println("Type:", sa.DataType()) + fmt.Println("Names:", sa.Field(0)) + fmt.Println("Scores:", sa.Field(1)) + // Output: + // Type: struct + // Names: ["alice" "bob"] + // Scores: [9.5 7] +} + +func ExampleFromSlice_withDecimal() { + mem := memory.NewGoAllocator() + + vals := []decimal128.Num{ + decimal128.FromI64(12345), + decimal128.FromI64(-67890), + } + arr, err := arreflect.FromSlice(vals, mem, arreflect.WithDecimal(10, 2)) + if err != nil { + panic(err) + } + defer arr.Release() + + fmt.Println("Type:", arr.DataType()) + fmt.Println("Len:", arr.Len()) + // Output: + // Type: decimal(10, 2) + // Len: 2 +} + +func ExampleToSlice() { + mem := memory.NewGoAllocator() + + b := array.NewFloat64Builder(mem) + defer b.Release() + b.Append(1.1) + b.Append(2.2) + b.Append(3.3) + arr := b.NewArray() + defer arr.Release() + + vals, err := arreflect.ToSlice[float64](arr) + if err != nil { + panic(err) + } + fmt.Println(vals) + // Output: + // [1.1 2.2 3.3] +} + +type Measurement struct { + Sensor string `arrow:"sensor"` + Value float64 `arrow:"value"` +} + +func ExampleRecordFromSlice() { + mem := memory.NewGoAllocator() + + rows := []Measurement{ + {"temp-1", 23.5}, + {"temp-2", 19.8}, + } + rec, err := arreflect.RecordFromSlice(rows, mem) + if err != nil { + panic(err) + } + defer rec.Release() + + fmt.Println("Schema:", rec.Schema()) + fmt.Println("Rows:", rec.NumRows()) + fmt.Println("Col 0:", rec.Column(0)) + fmt.Println("Col 1:", rec.Column(1)) + // Output: + // Schema: schema: + // fields: 2 + // - sensor: type=utf8 + // - value: type=float64 + // Rows: 2 + // Col 0: ["temp-1" "temp-2"] + // Col 1: [23.5 19.8] +} + +func ExampleRecordToSlice() { + mem := memory.NewGoAllocator() + + rows := []Measurement{ + {"temp-1", 23.5}, + {"temp-2", 19.8}, + } + rec, err := arreflect.RecordFromSlice(rows, mem) + if err != nil { + panic(err) + } + defer rec.Release() + + got, err := arreflect.RecordToSlice[Measurement](rec) + if err != nil { + panic(err) + } + for _, m := range got { + fmt.Printf("%s: %.1f\n", m.Sensor, m.Value) + } + // Output: + // temp-1: 23.5 + // temp-2: 19.8 +} + +func ExampleAt() { + mem := memory.NewGoAllocator() + + b := array.NewStringBuilder(mem) + defer b.Release() + b.Append("alpha") + b.Append("beta") + b.Append("gamma") + arr := b.NewArray() + defer arr.Release() + + val, err := arreflect.At[string](arr, 1) + if err != nil { + panic(err) + } + fmt.Println(val) + // Output: + // beta +} + +func ExampleInferSchema() { + type Event struct { + ID int64 `arrow:"id"` + Name string `arrow:"name"` + Score float64 `arrow:"score"` + Comment *string `arrow:"comment"` + } + + schema, err := arreflect.InferSchema[Event]() + if err != nil { + panic(err) + } + fmt.Println(schema) + // Output: + // schema: + // fields: 4 + // - id: type=int64 + // - name: type=utf8 + // - score: type=float64 + // - comment: type=utf8, nullable +} + +func ExampleFromSlice_withDict() { + mem := memory.NewGoAllocator() + + arr, err := arreflect.FromSlice( + []string{"red", "green", "red", "blue", "green"}, + mem, + arreflect.WithDict(), + ) + if err != nil { + panic(err) + } + defer arr.Release() + + fmt.Println("Type:", arr.DataType()) + dict := arr.(*array.Dictionary) + fmt.Println("Indices:", dict.Indices()) + fmt.Println("Dictionary:", dict.Dictionary()) + // Output: + // Type: dictionary + // Indices: [0 1 0 2 1] + // Dictionary: ["red" "green" "blue"] +} + +func ExampleToAnySlice() { + mem := memory.NewGoAllocator() + + st := arrow.StructOf( + arrow.Field{Name: "city", Type: arrow.BinaryTypes.String}, + arrow.Field{Name: "pop", Type: arrow.PrimitiveTypes.Int64}, + ) + sb := array.NewStructBuilder(mem, st) + defer sb.Release() + + sb.Append(true) + sb.FieldBuilder(0).(*array.StringBuilder).Append("Tokyo") + sb.FieldBuilder(1).(*array.Int64Builder).Append(14000000) + + sb.Append(true) + sb.FieldBuilder(0).(*array.StringBuilder).Append("Paris") + sb.FieldBuilder(1).(*array.Int64Builder).Append(2200000) + + arr := sb.NewArray() + defer arr.Release() + + rows, err := arreflect.ToAnySlice(arr) + if err != nil { + panic(err) + } + for _, row := range rows { + fmt.Println(row) + } + // Output: + // {Tokyo 14000000} + // {Paris 2200000} +} + +func ExampleToAnySlice_nullableFields() { + mem := memory.NewGoAllocator() + + st := arrow.StructOf( + arrow.Field{Name: "name", Type: arrow.BinaryTypes.String, Nullable: false}, + arrow.Field{Name: "score", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, + ) + sb := array.NewStructBuilder(mem, st) + defer sb.Release() + + sb.Append(true) + sb.FieldBuilder(0).(*array.StringBuilder).Append("alice") + sb.FieldBuilder(1).(*array.Float64Builder).Append(9.5) + + sb.Append(true) + sb.FieldBuilder(0).(*array.StringBuilder).Append("bob") + sb.FieldBuilder(1).(*array.Float64Builder).AppendNull() + + arr := sb.NewArray() + defer arr.Release() + + rows, err := arreflect.ToAnySlice(arr) + if err != nil { + panic(err) + } + for _, row := range rows { + v := reflect.ValueOf(row) + name := v.FieldByIndex([]int{0}).String() + scoreField := v.FieldByIndex([]int{1}) + if scoreField.IsNil() { + fmt.Printf("%s: \n", name) + } else { + fmt.Printf("%s: %.1f\n", name, scoreField.Elem().Float()) + } + } + // Output: + // alice: 9.5 + // bob: +} diff --git a/arrow/arreflect/reflect_arrow_to_go_test.go b/arrow/arreflect/reflect_arrow_to_go_test.go index dda3c384..0d5b37a0 100644 --- a/arrow/arreflect/reflect_arrow_to_go_test.go +++ b/arrow/arreflect/reflect_arrow_to_go_test.go @@ -27,14 +27,14 @@ import ( "github.com/apache/arrow-go/v18/arrow/decimal128" "github.com/apache/arrow-go/v18/arrow/decimal256" "github.com/apache/arrow-go/v18/arrow/memory" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func setValueAt[T any](t *testing.T, arr arrow.Array, i int) T { t.Helper() var got T - if err := setValue(reflect.ValueOf(&got).Elem(), arr, i); err != nil { - t.Fatal(err) - } + require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, i)) return got } @@ -50,17 +50,11 @@ func TestSetValue(t *testing.T) { defer arr.Release() got := setValueAt[bool](t, arr, 0) - if !got { - t.Errorf("expected true, got false") - } + assert.True(t, got, "expected true, got false") got = true - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 1); err != nil { - t.Fatal(err) - } - if got { - t.Errorf("expected false (null → zero), got true") - } + require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 1)) + assert.False(t, got, "expected false (null → zero), got true") }) t.Run("string", func(t *testing.T) { @@ -71,9 +65,7 @@ func TestSetValue(t *testing.T) { defer arr.Release() got := setValueAt[string](t, arr, 0) - if got != "hello" { - t.Errorf("expected hello, got %q", got) - } + assert.Equal(t, "hello", got) }) t.Run("binary", func(t *testing.T) { @@ -84,9 +76,7 @@ func TestSetValue(t *testing.T) { defer arr.Release() got := setValueAt[[]byte](t, arr, 0) - if string(got) != "data" { - t.Errorf("expected data, got %q", got) - } + assert.Equal(t, "data", string(got)) }) t.Run("unsupported type error", func(t *testing.T) { @@ -98,9 +88,7 @@ func TestSetValue(t *testing.T) { var got int32 err := setValue(reflect.ValueOf(&got).Elem(), arr, 0) - if err == nil { - t.Error("expected error for bool→int32 mismatch") - } + assert.Error(t, err, "expected error for bool→int32 mismatch") }) t.Run("pointer allocation", func(t *testing.T) { @@ -112,17 +100,13 @@ func TestSetValue(t *testing.T) { defer arr.Release() got := setValueAt[*string](t, arr, 0) - if got == nil || *got != "ptr" { - t.Errorf("expected ptr, got %v", got) + if assert.NotNil(t, got) { + assert.Equal(t, "ptr", *got) } got = new(string) - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 1); err != nil { - t.Fatal(err) - } - if got != nil { - t.Errorf("expected nil for null, got %v", got) - } + require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 1)) + assert.Nil(t, got, "expected nil for null, got %v", got) }) } @@ -138,17 +122,11 @@ func TestSetPrimitiveValue(t *testing.T) { defer arr.Release() got := setValueAt[int32](t, arr, 0) - if got != 42 { - t.Errorf("expected 42, got %d", got) - } + assert.Equal(t, int32(42), got) got = 99 - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 1); err != nil { - t.Fatal(err) - } - if got != 0 { - t.Errorf("expected 0 for null, got %d", got) - } + require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 1)) + assert.Equal(t, int32(0), got, "expected 0 for null, got %d", got) }) t.Run("int64", func(t *testing.T) { @@ -159,12 +137,8 @@ func TestSetPrimitiveValue(t *testing.T) { defer arr.Release() var got int64 - if err := setPrimitiveValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { - t.Fatal(err) - } - if got != int64(1<<40) { - t.Errorf("expected large int64, got %d", got) - } + require.NoError(t, setPrimitiveValue(reflect.ValueOf(&got).Elem(), arr, 0)) + assert.Equal(t, int64(1<<40), got) }) t.Run("uint8", func(t *testing.T) { @@ -175,12 +149,8 @@ func TestSetPrimitiveValue(t *testing.T) { defer arr.Release() var got uint8 - if err := setPrimitiveValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { - t.Fatal(err) - } - if got != 255 { - t.Errorf("expected 255, got %d", got) - } + require.NoError(t, setPrimitiveValue(reflect.ValueOf(&got).Elem(), arr, 0)) + assert.Equal(t, uint8(255), got) }) t.Run("float64", func(t *testing.T) { @@ -191,12 +161,8 @@ func TestSetPrimitiveValue(t *testing.T) { defer arr.Release() var got float64 - if err := setPrimitiveValue(reflect.ValueOf(&got).Elem(), arr, 0); err != nil { - t.Fatal(err) - } - if got != 3.14 { - t.Errorf("expected 3.14, got %f", got) - } + require.NoError(t, setPrimitiveValue(reflect.ValueOf(&got).Elem(), arr, 0)) + assert.Equal(t, 3.14, got) }) t.Run("type mismatch returns error", func(t *testing.T) { @@ -208,9 +174,7 @@ func TestSetPrimitiveValue(t *testing.T) { var got float64 err := setPrimitiveValue(reflect.ValueOf(&got).Elem(), arr, 0) - if err == nil { - t.Error("expected error for int32→float64 mismatch") - } + assert.Error(t, err, "expected error for int32→float64 mismatch") }) } @@ -227,9 +191,7 @@ func TestSetTemporalValue(t *testing.T) { defer arr.Release() got := setValueAt[time.Time](t, arr, 0) - if !got.Equal(now) { - t.Errorf("expected %v, got %v", now, got) - } + assert.True(t, got.Equal(now), "expected %v, got %v", now, got) }) t.Run("date32", func(t *testing.T) { @@ -241,9 +203,7 @@ func TestSetTemporalValue(t *testing.T) { got := setValueAt[time.Time](t, arr, 0) expected := arrow.Date32(19000).ToTime() - if !got.Equal(expected) { - t.Errorf("expected %v, got %v", expected, got) - } + assert.True(t, got.Equal(expected), "expected %v, got %v", expected, got) }) t.Run("duration", func(t *testing.T) { @@ -256,9 +216,7 @@ func TestSetTemporalValue(t *testing.T) { got := setValueAt[time.Duration](t, arr, 0) expected := 5 * time.Second - if got != expected { - t.Errorf("expected %v, got %v", expected, got) - } + assert.Equal(t, expected, got) }) t.Run("null temporal", func(t *testing.T) { @@ -270,9 +228,7 @@ func TestSetTemporalValue(t *testing.T) { defer arr.Release() got := setValueAt[*time.Time](t, arr, 0) - if got != nil { - t.Errorf("expected nil for null timestamp pointer") - } + assert.Nil(t, got, "expected nil for null timestamp pointer") }) t.Run("time32", func(t *testing.T) { @@ -286,12 +242,9 @@ func TestSetTemporalValue(t *testing.T) { var got time.Time v := reflect.ValueOf(&got).Elem() - if err := setValue(v, arr, 0); err != nil { - t.Fatalf("unexpected error: %v", err) - } - if got.Hour() != 10 || got.Minute() != 30 || got.Second() != 0 || got.Nanosecond()/1_000_000 != 500 { - t.Errorf("time32: got %v, want 10:30:00.500", got) - } + require.NoError(t, setValue(v, arr, 0)) + assert.True(t, got.Hour() == 10 && got.Minute() == 30 && got.Second() == 0 && got.Nanosecond()/1_000_000 == 500, + "time32: got %v, want 10:30:00.500", got) }) t.Run("time64", func(t *testing.T) { @@ -306,12 +259,9 @@ func TestSetTemporalValue(t *testing.T) { var got time.Time v := reflect.ValueOf(&got).Elem() - if err := setValue(v, arr, 0); err != nil { - t.Fatalf("unexpected error: %v", err) - } - if got.Hour() != 10 || got.Minute() != 30 || got.Second() != 0 || got.Nanosecond() != 123456789 { - t.Errorf("time64: got %v, want 10:30:00.123456789", got) - } + require.NoError(t, setValue(v, arr, 0)) + assert.True(t, got.Hour() == 10 && got.Minute() == 30 && got.Second() == 0 && got.Nanosecond() == 123456789, + "time64: got %v, want 10:30:00.123456789", got) }) } @@ -329,14 +279,10 @@ func TestSetDecimalValue(t *testing.T) { defer arr.Release() got := setValueAt[decimal128.Num](t, arr, 0) - if got != num { - t.Errorf("expected %v, got %v", num, got) - } + assert.Equal(t, num, got) gotPtr := setValueAt[*decimal128.Num](t, arr, 1) - if gotPtr != nil { - t.Errorf("expected nil for null decimal128") - } + assert.Nil(t, gotPtr, "expected nil for null decimal128") }) t.Run("decimal256", func(t *testing.T) { @@ -349,9 +295,7 @@ func TestSetDecimalValue(t *testing.T) { defer arr.Release() got := setValueAt[decimal256.Num](t, arr, 0) - if got != num { - t.Errorf("expected %v, got %v", num, got) - } + assert.Equal(t, num, got) }) t.Run("decimal32", func(t *testing.T) { @@ -365,14 +309,10 @@ func TestSetDecimalValue(t *testing.T) { defer arr.Release() got := setValueAt[decimal.Decimal32](t, arr, 0) - if got != num { - t.Errorf("expected %v, got %v", num, got) - } + assert.Equal(t, num, got) gotPtr := setValueAt[*decimal.Decimal32](t, arr, 1) - if gotPtr != nil { - t.Errorf("expected nil for null decimal32") - } + assert.Nil(t, gotPtr, "expected nil for null decimal32") }) t.Run("decimal64", func(t *testing.T) { @@ -385,9 +325,7 @@ func TestSetDecimalValue(t *testing.T) { defer arr.Release() got := setValueAt[decimal.Decimal64](t, arr, 0) - if got != num { - t.Errorf("expected %v, got %v", num, got) - } + assert.Equal(t, num, got) }) } @@ -417,9 +355,7 @@ func TestSetStructValue(t *testing.T) { []arrow.Array{nameArr, ageArr}, []string{"Name", "Age"}, ) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer sa.Release() type Person struct { @@ -428,19 +364,13 @@ func TestSetStructValue(t *testing.T) { } var got Person - if err := setValue(reflect.ValueOf(&got).Elem(), sa, 0); err != nil { - t.Fatal(err) - } - if got.Name != "Alice" || got.Age != 30 { - t.Errorf("expected Alice/30, got %+v", got) - } + require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), sa, 0)) + assert.Equal(t, "Alice", got.Name) + assert.Equal(t, int32(30), got.Age) - if err := setValue(reflect.ValueOf(&got).Elem(), sa, 1); err != nil { - t.Fatal(err) - } - if got.Name != "Bob" || got.Age != 25 { - t.Errorf("expected Bob/25, got %+v", got) - } + require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), sa, 1)) + assert.Equal(t, "Bob", got.Name) + assert.Equal(t, int32(25), got.Age) }) t.Run("arrow tag mapping", func(t *testing.T) { @@ -456,9 +386,7 @@ func TestSetStructValue(t *testing.T) { []arrow.Array{nameArr}, []string{"full_name"}, ) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer sa.Release() type TaggedPerson struct { @@ -466,12 +394,8 @@ func TestSetStructValue(t *testing.T) { } var got TaggedPerson - if err := setValue(reflect.ValueOf(&got).Elem(), sa, 0); err != nil { - t.Fatal(err) - } - if got.FullName != "Charlie" { - t.Errorf("expected Charlie, got %q", got.FullName) - } + require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), sa, 0)) + assert.Equal(t, "Charlie", got.FullName) }) t.Run("missing arrow field leaves go field zero", func(t *testing.T) { @@ -487,9 +411,7 @@ func TestSetStructValue(t *testing.T) { []arrow.Array{nameArr}, []string{"Name"}, ) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer sa.Release() type PersonWithExtra struct { @@ -498,15 +420,9 @@ func TestSetStructValue(t *testing.T) { } var got PersonWithExtra - if err := setValue(reflect.ValueOf(&got).Elem(), sa, 0); err != nil { - t.Fatal(err) - } - if got.Name != "Dave" { - t.Errorf("expected Dave, got %q", got.Name) - } - if got.Email != "" { - t.Errorf("expected empty Email, got %q", got.Email) - } + require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), sa, 0)) + assert.Equal(t, "Dave", got.Name) + assert.Equal(t, "", got.Email) }) } @@ -529,23 +445,13 @@ func TestSetListValue(t *testing.T) { defer arr.Release() got := setValueAt[[]int32](t, arr, 0) - if !reflect.DeepEqual(got, []int32{1, 2, 3}) { - t.Errorf("expected [1,2,3], got %v", got) - } + assert.Equal(t, []int32{1, 2, 3}, got) - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 1); err != nil { - t.Fatal(err) - } - if !reflect.DeepEqual(got, []int32{4, 5}) { - t.Errorf("expected [4,5], got %v", got) - } + require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 1)) + assert.Equal(t, []int32{4, 5}, got) - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 2); err != nil { - t.Fatal(err) - } - if got != nil { - t.Errorf("expected nil slice for null list, got %v", got) - } + require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 2)) + assert.Nil(t, got, "expected nil slice for null list, got %v", got) }) t.Run("nested list of lists", func(t *testing.T) { @@ -575,18 +481,10 @@ func TestSetListValue(t *testing.T) { defer outerArr.Release() var got [][]int32 - if err := setValue(reflect.ValueOf(&got).Elem(), outerArr, 0); err != nil { - t.Fatal(err) - } - if len(got) != 2 { - t.Fatalf("expected 2 inner slices, got %d", len(got)) - } - if !reflect.DeepEqual(got[0], []int32{10, 20}) { - t.Errorf("expected [10,20], got %v", got[0]) - } - if !reflect.DeepEqual(got[1], []int32{30}) { - t.Errorf("expected [30], got %v", got[1]) - } + require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), outerArr, 0)) + require.Len(t, got, 2, "expected 2 inner slices, got %d", len(got)) + assert.Equal(t, []int32{10, 20}, got[0]) + assert.Equal(t, []int32{30}, got[1]) }) t.Run("large list view of int32", func(t *testing.T) { @@ -603,16 +501,10 @@ func TestSetListValue(t *testing.T) { defer arr.Release() got := setValueAt[[]int32](t, arr, 0) - if !reflect.DeepEqual(got, []int32{1, 2}) { - t.Errorf("row 0: expected [1,2], got %v", got) - } + assert.Equal(t, []int32{1, 2}, got, "row 0: expected [1,2], got %v", got) - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 1); err != nil { - t.Fatal(err) - } - if !reflect.DeepEqual(got, []int32{3}) { - t.Errorf("row 1: expected [3], got %v", got) - } + require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 1)) + assert.Equal(t, []int32{3}, got, "row 1: expected [3], got %v", got) }) } @@ -642,23 +534,14 @@ func TestSetMapValue(t *testing.T) { defer arr.Release() got := setValueAt[map[string]int32](t, arr, 0) - if got["a"] != 1 || got["b"] != 2 { - t.Errorf("expected {a:1, b:2}, got %v", got) - } + assert.Equal(t, int32(1), got["a"]) + assert.Equal(t, int32(2), got["b"]) - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 1); err != nil { - t.Fatal(err) - } - if got["x"] != 10 { - t.Errorf("expected {x:10}, got %v", got) - } + require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 1)) + assert.Equal(t, int32(10), got["x"]) - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 2); err != nil { - t.Fatal(err) - } - if got != nil { - t.Errorf("expected nil map for null, got %v", got) - } + require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 2)) + assert.Nil(t, got, "expected nil map for null, got %v", got) }) } @@ -680,24 +563,14 @@ func TestSetFixedSizeListValue(t *testing.T) { defer arr.Release() got := setValueAt[[3]int32](t, arr, 0) - if got != [3]int32{10, 20, 30} { - t.Errorf("expected [10,20,30], got %v", got) - } + assert.Equal(t, [3]int32{10, 20, 30}, got) - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 1); err != nil { - t.Fatal(err) - } - if got != [3]int32{40, 50, 60} { - t.Errorf("expected [40,50,60], got %v", got) - } + require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 1)) + assert.Equal(t, [3]int32{40, 50, 60}, got) got = [3]int32{1, 2, 3} - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 2); err != nil { - t.Fatal(err) - } - if got != ([3]int32{}) { - t.Errorf("expected zero array for null, got %v", got) - } + require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 2)) + assert.Equal(t, [3]int32{}, got, "expected zero array for null, got %v", got) }) t.Run("go slice", func(t *testing.T) { @@ -712,9 +585,7 @@ func TestSetFixedSizeListValue(t *testing.T) { defer arr.Release() got := setValueAt[[]int32](t, arr, 0) - if !reflect.DeepEqual(got, []int32{7, 8}) { - t.Errorf("expected [7,8], got %v", got) - } + assert.Equal(t, []int32{7, 8}, got) }) t.Run("size mismatch returns error", func(t *testing.T) { @@ -729,9 +600,7 @@ func TestSetFixedSizeListValue(t *testing.T) { var got [2]int32 err := setValue(reflect.ValueOf(&got).Elem(), arr, 0) - if err == nil { - t.Error("expected error for size mismatch") - } + assert.Error(t, err, "expected error for size mismatch") }) } @@ -753,28 +622,16 @@ func TestSetDictionaryValue(t *testing.T) { defer arr.Release() got := setValueAt[string](t, arr, 0) - if got != "foo" { - t.Errorf("expected foo, got %q", got) - } + assert.Equal(t, "foo", got) - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 1); err != nil { - t.Fatal(err) - } - if got != "bar" { - t.Errorf("expected bar, got %q", got) - } + require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 1)) + assert.Equal(t, "bar", got) - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 2); err != nil { - t.Fatal(err) - } - if got != "foo" { - t.Errorf("expected foo, got %q", got) - } + require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 2)) + assert.Equal(t, "foo", got) gotPtr := setValueAt[*string](t, arr, 3) - if gotPtr != nil { - t.Errorf("expected nil for null dictionary entry") - } + assert.Nil(t, gotPtr, "expected nil for null dictionary entry") }) } @@ -795,29 +652,15 @@ func TestSetRunEndEncodedValue(t *testing.T) { defer arr.Release() got := setValueAt[string](t, arr, 0) - if got != "aaa" { - t.Errorf("expected aaa at logical 0, got %q", got) - } + assert.Equal(t, "aaa", got, "expected aaa at logical 0, got %q", got) - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 2); err != nil { - t.Fatal(err) - } - if got != "aaa" { - t.Errorf("expected aaa at logical 2, got %q", got) - } + require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 2)) + assert.Equal(t, "aaa", got, "expected aaa at logical 2, got %q", got) - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 3); err != nil { - t.Fatal(err) - } - if got != "bbb" { - t.Errorf("expected bbb at logical 3, got %q", got) - } + require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 3)) + assert.Equal(t, "bbb", got, "expected bbb at logical 3, got %q", got) - if err := setValue(reflect.ValueOf(&got).Elem(), arr, 4); err != nil { - t.Fatal(err) - } - if got != "bbb" { - t.Errorf("expected bbb at logical 4, got %q", got) - } + require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 4)) + assert.Equal(t, "bbb", got, "expected bbb at logical 4, got %q", got) }) } diff --git a/arrow/arreflect/reflect_go_to_arrow_test.go b/arrow/arreflect/reflect_go_to_arrow_test.go index b975f6e1..709b0d5c 100644 --- a/arrow/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/arreflect/reflect_go_to_arrow_test.go @@ -27,6 +27,8 @@ import ( "github.com/apache/arrow-go/v18/arrow/decimal128" "github.com/apache/arrow-go/v18/arrow/decimal256" "github.com/apache/arrow-go/v18/arrow/memory" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestBuildPrimitiveArray(t *testing.T) { @@ -36,39 +38,25 @@ func TestBuildPrimitiveArray(t *testing.T) { t.Run("int32", func(t *testing.T) { vals := []int32{1, 2, 3, 4, 5} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.Len() != 5 { - t.Errorf("expected 5, got %d", arr.Len()) - } - if arr.DataType().ID() != arrow.INT32 { - t.Errorf("expected INT32, got %v", arr.DataType()) - } + assert.Equal(t, 5, arr.Len()) + assert.Equal(t, arrow.INT32, arr.DataType().ID()) typed := arr.(*array.Int32) for i, want := range vals { - if typed.Value(i) != want { - t.Errorf("[%d] want %d, got %d", i, want, typed.Value(i)) - } + assert.Equal(t, want, typed.Value(i), "[%d] value mismatch", i) } }) t.Run("string", func(t *testing.T) { vals := []string{"hello", "world", "foo"} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.DataType().ID() != arrow.STRING { - t.Errorf("expected STRING, got %v", arr.DataType()) - } + assert.Equal(t, arrow.STRING, arr.DataType().ID()) typed := arr.(*array.String) for i, want := range vals { - if typed.Value(i) != want { - t.Errorf("[%d] want %q, got %q", i, want, typed.Value(i)) - } + assert.Equal(t, want, typed.Value(i), "[%d] value mismatch", i) } }) @@ -76,45 +64,29 @@ func TestBuildPrimitiveArray(t *testing.T) { v1, v3 := int32(10), int32(30) vals := []*int32{&v1, nil, &v3} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if !arr.IsNull(1) { - t.Error("expected index 1 to be null") - } + assert.True(t, arr.IsNull(1), "expected index 1 to be null") typed := arr.(*array.Int32) - if typed.Value(0) != 10 || typed.Value(2) != 30 { - t.Error("unexpected values") - } + assert.True(t, typed.Value(0) == 10 && typed.Value(2) == 30, "unexpected values") }) t.Run("bool", func(t *testing.T) { vals := []bool{true, false, true} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.DataType().ID() != arrow.BOOL { - t.Errorf("expected BOOL, got %v", arr.DataType()) - } + assert.Equal(t, arrow.BOOL, arr.DataType().ID()) typed := arr.(*array.Boolean) - if !typed.Value(0) || typed.Value(1) || !typed.Value(2) { - t.Error("unexpected bool values") - } + assert.True(t, typed.Value(0) && !typed.Value(1) && typed.Value(2), "unexpected bool values") }) t.Run("binary", func(t *testing.T) { vals := [][]byte{{1, 2, 3}, {4, 5}, {6}} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.DataType().ID() != arrow.BINARY { - t.Errorf("expected BINARY, got %v", arr.DataType()) - } + assert.Equal(t, arrow.BINARY, arr.DataType().ID()) }) t.Run("numeric_types", func(t *testing.T) { @@ -134,12 +106,8 @@ func TestBuildPrimitiveArray(t *testing.T) { } for _, tc := range cases { arr, err := buildArray(reflect.ValueOf(tc.vals), tagOpts{}, mem) - if err != nil { - t.Fatalf("type %v: %v", tc.id, err) - } - if arr.DataType().ID() != tc.id { - t.Errorf("expected %v, got %v", tc.id, arr.DataType()) - } + require.NoError(t, err, "type %v", tc.id) + assert.Equal(t, tc.id, arr.DataType().ID(), "expected %v, got %v", tc.id, arr.DataType()) arr.Release() } }) @@ -153,36 +121,24 @@ func TestBuildTemporalArray(t *testing.T) { now := time.Now().UTC() vals := []time.Time{now, now.Add(time.Hour)} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.DataType().ID() != arrow.TIMESTAMP { - t.Errorf("expected TIMESTAMP, got %v", arr.DataType()) - } + assert.Equal(t, arrow.TIMESTAMP, arr.DataType().ID()) typed := arr.(*array.Timestamp) for i, want := range vals { - if typed.Value(i) != arrow.Timestamp(want.UnixNano()) { - t.Errorf("[%d] timestamp mismatch", i) - } + assert.Equal(t, arrow.Timestamp(want.UnixNano()), typed.Value(i), "[%d] timestamp mismatch", i) } }) t.Run("time_duration", func(t *testing.T) { vals := []time.Duration{time.Second, time.Minute, time.Hour} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.DataType().ID() != arrow.DURATION { - t.Errorf("expected DURATION, got %v", arr.DataType()) - } + assert.Equal(t, arrow.DURATION, arr.DataType().ID()) typed := arr.(*array.Duration) for i, want := range vals { - if typed.Value(i) != arrow.Duration(want.Nanoseconds()) { - t.Errorf("[%d] duration mismatch", i) - } + assert.Equal(t, arrow.Duration(want.Nanoseconds()), typed.Value(i), "[%d] duration mismatch", i) } }) } @@ -198,18 +154,12 @@ func TestBuildDecimalArray(t *testing.T) { decimal128.New(0, 300), } arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.DataType().ID() != arrow.DECIMAL128 { - t.Errorf("expected DECIMAL128, got %v", arr.DataType()) - } + assert.Equal(t, arrow.DECIMAL128, arr.DataType().ID()) typed := arr.(*array.Decimal128) for i, want := range vals { - if typed.Value(i) != want { - t.Errorf("[%d] decimal128 mismatch", i) - } + assert.Equal(t, want, typed.Value(i), "[%d] decimal128 mismatch", i) } }) @@ -219,18 +169,12 @@ func TestBuildDecimalArray(t *testing.T) { decimal256.New(0, 0, 0, 200), } arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.DataType().ID() != arrow.DECIMAL256 { - t.Errorf("expected DECIMAL256, got %v", arr.DataType()) - } + assert.Equal(t, arrow.DECIMAL256, arr.DataType().ID()) typed := arr.(*array.Decimal256) for i, want := range vals { - if typed.Value(i) != want { - t.Errorf("[%d] decimal256 mismatch", i) - } + assert.Equal(t, want, typed.Value(i), "[%d] decimal256 mismatch", i) } }) @@ -238,49 +182,34 @@ func TestBuildDecimalArray(t *testing.T) { vals := []decimal128.Num{decimal128.New(0, 12345)} opts := tagOpts{HasDecimalOpts: true, DecimalPrecision: 10, DecimalScale: 3} arr, err := buildArray(reflect.ValueOf(vals), opts, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() dt := arr.DataType().(*arrow.Decimal128Type) - if dt.Precision != 10 || dt.Scale != 3 { - t.Errorf("expected p=10 s=3, got p=%d s=%d", dt.Precision, dt.Scale) - } + assert.Equal(t, int32(10), dt.Precision, "expected p=10, got p=%d", dt.Precision) + assert.Equal(t, int32(3), dt.Scale, "expected s=3, got s=%d", dt.Scale) }) t.Run("decimal32", func(t *testing.T) { vals := []decimal.Decimal32{100, 200, 300} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.DataType().ID() != arrow.DECIMAL32 { - t.Errorf("expected DECIMAL32, got %v", arr.DataType()) - } + assert.Equal(t, arrow.DECIMAL32, arr.DataType().ID()) typed := arr.(*array.Decimal32) for i, want := range vals { - if typed.Value(i) != want { - t.Errorf("[%d] decimal32 mismatch: got %v, want %v", i, typed.Value(i), want) - } + assert.Equal(t, want, typed.Value(i), "[%d] decimal32 mismatch", i) } }) t.Run("decimal64", func(t *testing.T) { vals := []decimal.Decimal64{1000, 2000} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.DataType().ID() != arrow.DECIMAL64 { - t.Errorf("expected DECIMAL64, got %v", arr.DataType()) - } + assert.Equal(t, arrow.DECIMAL64, arr.DataType().ID()) typed := arr.(*array.Decimal64) for i, want := range vals { - if typed.Value(i) != want { - t.Errorf("[%d] decimal64 mismatch: got %v, want %v", i, typed.Value(i), want) - } + assert.Equal(t, want, typed.Value(i), "[%d] decimal64 mismatch", i) } }) @@ -288,14 +217,11 @@ func TestBuildDecimalArray(t *testing.T) { vals := []decimal.Decimal32{12345} opts := tagOpts{HasDecimalOpts: true, DecimalPrecision: 9, DecimalScale: 2} arr, err := buildArray(reflect.ValueOf(vals), opts, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() dt := arr.DataType().(*arrow.Decimal32Type) - if dt.Precision != 9 || dt.Scale != 2 { - t.Errorf("expected p=9 s=2, got p=%d s=%d", dt.Precision, dt.Scale) - } + assert.Equal(t, int32(9), dt.Precision, "expected p=9, got p=%d", dt.Precision) + assert.Equal(t, int32(2), dt.Scale, "expected s=2, got s=%d", dt.Scale) }) } @@ -325,26 +251,16 @@ func TestBuildStructArray(t *testing.T) { {X: 3, Y: "three"}, } arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.DataType().ID() != arrow.STRUCT { - t.Fatalf("expected STRUCT, got %v", arr.DataType()) - } + require.Equal(t, arrow.STRUCT, arr.DataType().ID(), "expected STRUCT, got %v", arr.DataType()) typed := arr.(*array.Struct) - if typed.Len() != 3 { - t.Errorf("expected 3, got %d", typed.Len()) - } + assert.Equal(t, 3, typed.Len()) xArr := typed.Field(0).(*array.Int32) yArr := typed.Field(1).(*array.String) for i, want := range vals { - if xArr.Value(i) != want.X { - t.Errorf("[%d] X: want %d, got %d", i, want.X, xArr.Value(i)) - } - if yArr.Value(i) != want.Y { - t.Errorf("[%d] Y: want %q, got %q", i, want.Y, yArr.Value(i)) - } + assert.Equal(t, want.X, xArr.Value(i), "[%d] X mismatch", i) + assert.Equal(t, want.Y, yArr.Value(i), "[%d] Y mismatch", i) } }) @@ -352,16 +268,10 @@ func TestBuildStructArray(t *testing.T) { v1 := buildSimpleStruct{X: 42, Y: "answer"} vals := []*buildSimpleStruct{&v1, nil} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.Len() != 2 { - t.Errorf("expected 2, got %d", arr.Len()) - } - if !arr.IsNull(1) { - t.Error("expected index 1 to be null") - } + assert.Equal(t, 2, arr.Len()) + assert.True(t, arr.IsNull(1), "expected index 1 to be null") }) t.Run("nullable_fields", func(t *testing.T) { @@ -372,17 +282,11 @@ func TestBuildStructArray(t *testing.T) { {X: nil, Y: nil}, } arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() typed := arr.(*array.Struct) - if !typed.Field(0).IsNull(1) { - t.Error("expected X[1] to be null") - } - if !typed.Field(1).IsNull(1) { - t.Error("expected Y[1] to be null") - } + assert.True(t, typed.Field(0).IsNull(1), "expected X[1] to be null") + assert.True(t, typed.Field(1).IsNull(1), "expected Y[1] to be null") }) t.Run("nested_struct", func(t *testing.T) { @@ -391,23 +295,15 @@ func TestBuildStructArray(t *testing.T) { {A: 2, B: buildSimpleStruct{X: 20, Y: "inner2"}}, } arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.DataType().ID() != arrow.STRUCT { - t.Fatalf("expected STRUCT, got %v", arr.DataType()) - } + require.Equal(t, arrow.STRUCT, arr.DataType().ID(), "expected STRUCT, got %v", arr.DataType()) typed := arr.(*array.Struct) aArr := typed.Field(0).(*array.Int32) - if aArr.Value(0) != 1 || aArr.Value(1) != 2 { - t.Error("unexpected A values") - } + assert.True(t, aArr.Value(0) == 1 && aArr.Value(1) == 2, "unexpected A values") bArr := typed.Field(1).(*array.Struct) bxArr := bArr.Field(0).(*array.Int32) - if bxArr.Value(0) != 10 || bxArr.Value(1) != 20 { - t.Error("unexpected B.X values") - } + assert.True(t, bxArr.Value(0) == 10 && bxArr.Value(1) == 20, "unexpected B.X values") }) } @@ -418,63 +314,39 @@ func TestBuildListArray(t *testing.T) { t.Run("int32_lists", func(t *testing.T) { vals := [][]int32{{1, 2, 3}, {4, 5}, {6}} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.DataType().ID() != arrow.LIST { - t.Fatalf("expected LIST, got %v", arr.DataType()) - } + require.Equal(t, arrow.LIST, arr.DataType().ID(), "expected LIST, got %v", arr.DataType()) typed := arr.(*array.List) - if typed.Len() != 3 { - t.Errorf("expected 3, got %d", typed.Len()) - } - if typed.ListValues().(*array.Int32).Len() != 6 { - t.Errorf("expected 6 total values") - } + assert.Equal(t, 3, typed.Len()) + assert.Equal(t, 6, typed.ListValues().(*array.Int32).Len(), "expected 6 total values") }) t.Run("null_inner", func(t *testing.T) { vals := [][]int32{{1, 2}, nil, {3}} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if !arr.IsNull(1) { - t.Error("expected index 1 to be null") - } + assert.True(t, arr.IsNull(1), "expected index 1 to be null") }) t.Run("string_lists", func(t *testing.T) { vals := [][]string{{"a", "b"}, {"c"}} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.DataType().ID() != arrow.LIST { - t.Fatalf("expected LIST, got %v", arr.DataType()) - } + require.Equal(t, arrow.LIST, arr.DataType().ID(), "expected LIST, got %v", arr.DataType()) }) t.Run("nested", func(t *testing.T) { vals := [][][]int32{{{1, 2}, {3}}, {{4, 5, 6}}} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.DataType().ID() != arrow.LIST { - t.Fatalf("expected outer LIST, got %v", arr.DataType()) - } + require.Equal(t, arrow.LIST, arr.DataType().ID(), "expected outer LIST, got %v", arr.DataType()) outer := arr.(*array.List) - if outer.Len() != 2 { - t.Errorf("expected 2 outer rows, got %d", outer.Len()) - } - if outer.ListValues().DataType().ID() != arrow.LIST { - t.Fatalf("expected inner LIST, got %v", outer.ListValues().DataType()) - } + assert.Equal(t, 2, outer.Len(), "expected 2 outer rows, got %d", outer.Len()) + require.Equal(t, arrow.LIST, outer.ListValues().DataType().ID(), "expected inner LIST, got %v", outer.ListValues().DataType()) }) } @@ -488,41 +360,27 @@ func TestBuildMapArray(t *testing.T) { {"c": 3}, } arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.DataType().ID() != arrow.MAP { - t.Fatalf("expected MAP, got %v", arr.DataType()) - } - if arr.(*array.Map).Len() != 2 { - t.Errorf("expected 2, got %d", arr.Len()) - } + require.Equal(t, arrow.MAP, arr.DataType().ID(), "expected MAP, got %v", arr.DataType()) + assert.Equal(t, 2, arr.(*array.Map).Len()) }) t.Run("null_map", func(t *testing.T) { vals := []map[string]int32{{"a": 1}, nil} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if !arr.IsNull(1) { - t.Error("expected index 1 to be null") - } + assert.True(t, arr.IsNull(1), "expected index 1 to be null") }) t.Run("entry_count", func(t *testing.T) { vals := []map[string]int32{{"x": 10, "y": 20, "z": 30}} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() kvArr := arr.(*array.Map).ListValues().(*array.Struct) - if kvArr.Len() != 3 { - t.Errorf("expected 3 key-value pairs, got %d", kvArr.Len()) - } + assert.Equal(t, 3, kvArr.Len(), "expected 3 key-value pairs, got %d", kvArr.Len()) }) } @@ -533,42 +391,24 @@ func TestBuildFixedSizeListArray(t *testing.T) { t.Run("int32_n3", func(t *testing.T) { vals := [][3]int32{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.DataType().ID() != arrow.FIXED_SIZE_LIST { - t.Fatalf("expected FIXED_SIZE_LIST, got %v", arr.DataType()) - } + require.Equal(t, arrow.FIXED_SIZE_LIST, arr.DataType().ID(), "expected FIXED_SIZE_LIST, got %v", arr.DataType()) typed := arr.(*array.FixedSizeList) - if typed.Len() != 3 { - t.Errorf("expected 3, got %d", typed.Len()) - } - if typed.DataType().(*arrow.FixedSizeListType).Len() != 3 { - t.Error("expected fixed size 3") - } + assert.Equal(t, 3, typed.Len()) + assert.Equal(t, int32(3), typed.DataType().(*arrow.FixedSizeListType).Len(), "expected fixed size 3") values := typed.ListValues().(*array.Int32) - if values.Len() != 9 { - t.Errorf("expected 9 values, got %d", values.Len()) - } - if values.Value(0) != 1 || values.Value(3) != 4 || values.Value(6) != 7 { - t.Error("unexpected values") - } + assert.Equal(t, 9, values.Len()) + assert.True(t, values.Value(0) == 1 && values.Value(3) == 4 && values.Value(6) == 7, "unexpected values") }) t.Run("float64_n2", func(t *testing.T) { vals := [][2]float64{{1.0, 2.0}, {3.0, 4.0}} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.DataType().ID() != arrow.FIXED_SIZE_LIST { - t.Fatalf("expected FIXED_SIZE_LIST, got %v", arr.DataType()) - } - if arr.DataType().(*arrow.FixedSizeListType).Len() != 2 { - t.Error("expected fixed size 2") - } + require.Equal(t, arrow.FIXED_SIZE_LIST, arr.DataType().ID(), "expected FIXED_SIZE_LIST, got %v", arr.DataType()) + assert.Equal(t, int32(2), arr.DataType().(*arrow.FixedSizeListType).Len(), "expected fixed size 2") }) } @@ -579,52 +419,32 @@ func TestBuildDictionaryArray(t *testing.T) { t.Run("string_dict", func(t *testing.T) { vals := []string{"apple", "banana", "apple", "cherry", "banana", "apple"} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{Dict: true}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.DataType().ID() != arrow.DICTIONARY { - t.Fatalf("expected DICTIONARY, got %v", arr.DataType()) - } + require.Equal(t, arrow.DICTIONARY, arr.DataType().ID(), "expected DICTIONARY, got %v", arr.DataType()) typed := arr.(*array.Dictionary) - if typed.Len() != 6 { - t.Errorf("expected 6, got %d", typed.Len()) - } - if typed.Dictionary().Len() != 3 { - t.Errorf("expected 3 unique, got %d", typed.Dictionary().Len()) - } + assert.Equal(t, 6, typed.Len()) + assert.Equal(t, 3, typed.Dictionary().Len(), "expected 3 unique, got %d", typed.Dictionary().Len()) }) t.Run("int32_dict", func(t *testing.T) { vals := []int32{1, 2, 1, 3, 2, 1} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{Dict: true}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.DataType().ID() != arrow.DICTIONARY { - t.Fatalf("expected DICTIONARY, got %v", arr.DataType()) - } + require.Equal(t, arrow.DICTIONARY, arr.DataType().ID(), "expected DICTIONARY, got %v", arr.DataType()) typed := arr.(*array.Dictionary) - if typed.Len() != 6 { - t.Errorf("expected 6, got %d", typed.Len()) - } - if typed.Dictionary().Len() != 3 { - t.Errorf("expected 3 unique, got %d", typed.Dictionary().Len()) - } + assert.Equal(t, 6, typed.Len()) + assert.Equal(t, 3, typed.Dictionary().Len(), "expected 3 unique, got %d", typed.Dictionary().Len()) }) t.Run("index_type_is_int32", func(t *testing.T) { vals := []string{"x", "y", "z"} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{Dict: true}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() dt := arr.DataType().(*arrow.DictionaryType) - if dt.IndexType.ID() != arrow.INT32 { - t.Errorf("expected INT32 index, got %v", dt.IndexType) - } + assert.Equal(t, arrow.INT32, dt.IndexType.ID(), "expected INT32 index, got %v", dt.IndexType) }) } @@ -635,105 +455,64 @@ func TestBuildRunEndEncodedArray(t *testing.T) { t.Run("int32_runs", func(t *testing.T) { vals := []int32{1, 1, 1, 2, 2, 3} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{REE: true}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.DataType().ID() != arrow.RUN_END_ENCODED { - t.Fatalf("expected RUN_END_ENCODED, got %v", arr.DataType()) - } + require.Equal(t, arrow.RUN_END_ENCODED, arr.DataType().ID(), "expected RUN_END_ENCODED, got %v", arr.DataType()) ree := arr.(*array.RunEndEncoded) - if ree.Len() != 6 { - t.Errorf("expected 6, got %d", ree.Len()) - } + assert.Equal(t, 6, ree.Len()) runEnds := ree.RunEndsArr().(*array.Int32) - if runEnds.Len() != 3 { - t.Errorf("expected 3 runs, got %d", runEnds.Len()) - } - if runEnds.Value(0) != 3 || runEnds.Value(1) != 5 || runEnds.Value(2) != 6 { - t.Errorf("unexpected run ends: %d %d %d", - runEnds.Value(0), runEnds.Value(1), runEnds.Value(2)) - } + assert.Equal(t, 3, runEnds.Len(), "expected 3 runs, got %d", runEnds.Len()) + assert.True(t, runEnds.Value(0) == 3 && runEnds.Value(1) == 5 && runEnds.Value(2) == 6, + "unexpected run ends: %d %d %d", runEnds.Value(0), runEnds.Value(1), runEnds.Value(2)) values := ree.Values().(*array.Int32) - if values.Len() != 3 { - t.Errorf("expected 3 values, got %d", values.Len()) - } - if values.Value(0) != 1 || values.Value(1) != 2 || values.Value(2) != 3 { - t.Errorf("unexpected values: %d %d %d", - values.Value(0), values.Value(1), values.Value(2)) - } + assert.Equal(t, 3, values.Len(), "expected 3 values, got %d", values.Len()) + assert.True(t, values.Value(0) == 1 && values.Value(1) == 2 && values.Value(2) == 3, + "unexpected values: %d %d %d", values.Value(0), values.Value(1), values.Value(2)) }) t.Run("string_runs", func(t *testing.T) { vals := []string{"a", "a", "b", "b", "b", "c"} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{REE: true}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.DataType().ID() != arrow.RUN_END_ENCODED { - t.Fatalf("expected RUN_END_ENCODED, got %v", arr.DataType()) - } + require.Equal(t, arrow.RUN_END_ENCODED, arr.DataType().ID(), "expected RUN_END_ENCODED, got %v", arr.DataType()) ree := arr.(*array.RunEndEncoded) - if ree.Len() != 6 { - t.Errorf("expected 6, got %d", ree.Len()) - } - if ree.RunEndsArr().Len() != 3 { - t.Errorf("expected 3 runs, got %d", ree.RunEndsArr().Len()) - } + assert.Equal(t, 6, ree.Len()) + assert.Equal(t, 3, ree.RunEndsArr().Len(), "expected 3 runs, got %d", ree.RunEndsArr().Len()) }) t.Run("single_run", func(t *testing.T) { vals := []int32{42, 42, 42} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{REE: true}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() ree := arr.(*array.RunEndEncoded) - if ree.Len() != 3 { - t.Errorf("expected 3, got %d", ree.Len()) - } + assert.Equal(t, 3, ree.Len()) runEnds := ree.RunEndsArr().(*array.Int32) - if runEnds.Len() != 1 || runEnds.Value(0) != 3 { - t.Errorf("expected 1 run ending at 3, got %d runs, end=%d", - runEnds.Len(), runEnds.Value(0)) - } + assert.True(t, runEnds.Len() == 1 && runEnds.Value(0) == 3, + "expected 1 run ending at 3, got %d runs, end=%d", runEnds.Len(), runEnds.Value(0)) }) t.Run("all_distinct", func(t *testing.T) { vals := []int32{1, 2, 3, 4, 5} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{REE: true}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() ree := arr.(*array.RunEndEncoded) - if ree.Len() != 5 { - t.Errorf("expected 5, got %d", ree.Len()) - } - if ree.RunEndsArr().Len() != 5 { - t.Errorf("expected 5 runs for all-distinct, got %d", ree.RunEndsArr().Len()) - } + assert.Equal(t, 5, ree.Len()) + assert.Equal(t, 5, ree.RunEndsArr().Len(), "expected 5 runs for all-distinct, got %d", ree.RunEndsArr().Len()) }) t.Run("pointer_value_equality", func(t *testing.T) { - // Two distinct *string pointers pointing to equal values "x" - // Should produce ONE run, not two (value equality, not address equality) x1 := "x" x2 := "x" y := "y" vals := []*string{&x1, &x2, &y} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{REE: true}, mem) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } + require.NoError(t, err, "unexpected error") defer arr.Release() - // "x","x" is one run; "y" is another → 2 runs total ree := arr.(*array.RunEndEncoded) - if ree.RunEndsArr().Len() != 2 { - t.Errorf("expected 2 runs (x+x coalesced, y), got %d", ree.RunEndsArr().Len()) - } + assert.Equal(t, 2, ree.RunEndsArr().Len(), "expected 2 runs (x+x coalesced, y), got %d", ree.RunEndsArr().Len()) }) } @@ -744,64 +523,43 @@ func TestBuildListViewArray(t *testing.T) { t.Run("int32_listview", func(t *testing.T) { vals := [][]int32{{1, 2, 3}, {4, 5}, {6}} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{ListView: true}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.DataType().ID() != arrow.LIST_VIEW { - t.Fatalf("expected LIST_VIEW, got %v", arr.DataType()) - } + require.Equal(t, arrow.LIST_VIEW, arr.DataType().ID(), "expected LIST_VIEW, got %v", arr.DataType()) typed := arr.(*array.ListView) - if typed.Len() != 3 { - t.Errorf("expected 3, got %d", typed.Len()) - } + assert.Equal(t, 3, typed.Len()) }) t.Run("null_entry", func(t *testing.T) { vals := [][]int32{{1, 2}, nil, {3}} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{ListView: true}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if !arr.IsNull(1) { - t.Error("expected index 1 to be null") - } + assert.True(t, arr.IsNull(1), "expected index 1 to be null") }) t.Run("string_listview", func(t *testing.T) { vals := [][]string{{"hello", "world"}, {"foo"}, {"a", "b", "c"}} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{ListView: true}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.DataType().ID() != arrow.LIST_VIEW { - t.Fatalf("expected LIST_VIEW, got %v", arr.DataType()) - } - if arr.Len() != 3 { - t.Errorf("expected 3, got %d", arr.Len()) - } + require.Equal(t, arrow.LIST_VIEW, arr.DataType().ID(), "expected LIST_VIEW, got %v", arr.DataType()) + assert.Equal(t, 3, arr.Len()) }) t.Run("total_values", func(t *testing.T) { vals := [][]int32{{10, 20}, {30}} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{ListView: true}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() allVals := arr.(*array.ListView).ListValues().(*array.Int32) - if allVals.Len() != 3 { - t.Errorf("expected 3 total values, got %d", allVals.Len()) - } + assert.Equal(t, 3, allVals.Len(), "expected 3 total values, got %d", allVals.Len()) }) } func TestBuildTemporalTaggedArray(t *testing.T) { mem := memory.NewGoAllocator() - // reference time-of-day: 2024-01-15 10:30:00 UTC ref := time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC) t.Run("date32", func(t *testing.T) { @@ -809,22 +567,14 @@ func TestBuildTemporalTaggedArray(t *testing.T) { opts := tagOpts{Temporal: "date32"} sv := reflect.ValueOf(vals) arr, err := buildTemporalArray(sv, opts, mem) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } + require.NoError(t, err, "unexpected error") defer arr.Release() - if arr.DataType().ID() != arrow.DATE32 { - t.Errorf("expected DATE32, got %v", arr.DataType().ID()) - } - if arr.Len() != 2 { - t.Errorf("expected len 2, got %d", arr.Len()) - } - // roundtrip: convert back and check date + assert.Equal(t, arrow.DATE32, arr.DataType().ID()) + assert.Equal(t, 2, arr.Len()) d32arr := arr.(*array.Date32) got0 := d32arr.Value(0).ToTime() - if got0.Year() != ref.Year() || got0.Month() != ref.Month() || got0.Day() != ref.Day() { - t.Errorf("date32 roundtrip: got %v, want %v", got0, ref) - } + assert.True(t, got0.Year() == ref.Year() && got0.Month() == ref.Month() && got0.Day() == ref.Day(), + "date32 roundtrip: got %v, want %v", got0, ref) }) t.Run("date64", func(t *testing.T) { @@ -832,18 +582,13 @@ func TestBuildTemporalTaggedArray(t *testing.T) { opts := tagOpts{Temporal: "date64"} sv := reflect.ValueOf(vals) arr, err := buildTemporalArray(sv, opts, mem) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } + require.NoError(t, err, "unexpected error") defer arr.Release() - if arr.DataType().ID() != arrow.DATE64 { - t.Errorf("expected DATE64, got %v", arr.DataType().ID()) - } + assert.Equal(t, arrow.DATE64, arr.DataType().ID()) d64arr := arr.(*array.Date64) got0 := d64arr.Value(0).ToTime() - if got0.Year() != ref.Year() || got0.Month() != ref.Month() || got0.Day() != ref.Day() { - t.Errorf("date64 roundtrip: got %v, want %v", got0, ref) - } + assert.True(t, got0.Year() == ref.Year() && got0.Month() == ref.Month() && got0.Day() == ref.Day(), + "date64 roundtrip: got %v, want %v", got0, ref) }) t.Run("time32", func(t *testing.T) { @@ -851,37 +596,26 @@ func TestBuildTemporalTaggedArray(t *testing.T) { opts := tagOpts{Temporal: "time32"} sv := reflect.ValueOf(vals) arr, err := buildTemporalArray(sv, opts, mem) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } + require.NoError(t, err, "unexpected error") defer arr.Release() - if arr.DataType().ID() != arrow.TIME32 { - t.Errorf("expected TIME32, got %v", arr.DataType().ID()) - } - if arr.Len() != 1 { - t.Errorf("expected len 1, got %d", arr.Len()) - } + assert.Equal(t, arrow.TIME32, arr.DataType().ID()) + assert.Equal(t, 1, arr.Len()) t32arr := arr.(*array.Time32) unit := arr.DataType().(*arrow.Time32Type).Unit got0 := t32arr.Value(0).ToTime(unit) - if got0.Hour() != ref.Hour() || got0.Minute() != ref.Minute() || got0.Second() != ref.Second() { - t.Errorf("time32 roundtrip: got hour=%d min=%d sec=%d, want hour=%d min=%d sec=%d", - got0.Hour(), got0.Minute(), got0.Second(), - ref.Hour(), ref.Minute(), ref.Second()) - } + assert.True(t, got0.Hour() == ref.Hour() && got0.Minute() == ref.Minute() && got0.Second() == ref.Second(), + "time32 roundtrip: got hour=%d min=%d sec=%d, want hour=%d min=%d sec=%d", + got0.Hour(), got0.Minute(), got0.Second(), + ref.Hour(), ref.Minute(), ref.Second()) refWithMs := time.Date(ref.Year(), ref.Month(), ref.Day(), ref.Hour(), ref.Minute(), ref.Second(), 500_000_000, ref.Location()) svMs := reflect.ValueOf([]time.Time{refWithMs}) arrMs, err := buildTemporalArray(svMs, tagOpts{Temporal: "time32"}, mem) - if err != nil { - t.Fatalf("time32 with ms: %v", err) - } + require.NoError(t, err, "time32 with ms") defer arrMs.Release() t32ms := arrMs.(*array.Time32) unitMs := arrMs.DataType().(*arrow.Time32Type).Unit gotMs := t32ms.Value(0).ToTime(unitMs) - if gotMs.Nanosecond()/1_000_000 != 500 { - t.Errorf("time32 millisecond: got %d ms, want 500 ms", gotMs.Nanosecond()/1_000_000) - } + assert.Equal(t, 500, gotMs.Nanosecond()/1_000_000, "time32 millisecond: got %d ms, want 500 ms", gotMs.Nanosecond()/1_000_000) }) t.Run("time64", func(t *testing.T) { @@ -889,32 +623,23 @@ func TestBuildTemporalTaggedArray(t *testing.T) { opts := tagOpts{Temporal: "time64"} sv := reflect.ValueOf(vals) arr, err := buildTemporalArray(sv, opts, mem) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } + require.NoError(t, err, "unexpected error") defer arr.Release() - if arr.DataType().ID() != arrow.TIME64 { - t.Errorf("expected TIME64, got %v", arr.DataType().ID()) - } + assert.Equal(t, arrow.TIME64, arr.DataType().ID()) t64arr := arr.(*array.Time64) unit := arr.DataType().(*arrow.Time64Type).Unit got0 := t64arr.Value(0).ToTime(unit) - if got0.Hour() != ref.Hour() || got0.Minute() != ref.Minute() || got0.Second() != ref.Second() { - t.Errorf("time64 roundtrip: got %v, want %v", got0, ref) - } - // time64 uses nanosecond unit — verify full nanosecond precision + assert.True(t, got0.Hour() == ref.Hour() && got0.Minute() == ref.Minute() && got0.Second() == ref.Second(), + "time64 roundtrip: got %v, want %v", got0, ref) refWithNanos := time.Date(ref.Year(), ref.Month(), ref.Day(), ref.Hour(), ref.Minute(), ref.Second(), 123456789, ref.Location()) sv64 := reflect.ValueOf([]time.Time{refWithNanos}) arr64, err := buildTemporalArray(sv64, tagOpts{Temporal: "time64"}, mem) - if err != nil { - t.Fatalf("time64 with nanos: %v", err) - } + require.NoError(t, err, "time64 with nanos") defer arr64.Release() t64arr64 := arr64.(*array.Time64) unit64 := arr64.DataType().(*arrow.Time64Type).Unit got64 := t64arr64.Value(0).ToTime(unit64) - if got64.Nanosecond() != refWithNanos.Nanosecond() { - t.Errorf("time64 nanosecond: got %d, want %d", got64.Nanosecond(), refWithNanos.Nanosecond()) - } + assert.Equal(t, refWithNanos.Nanosecond(), got64.Nanosecond(), + "time64 nanosecond: got %d, want %d", got64.Nanosecond(), refWithNanos.Nanosecond()) }) } diff --git a/arrow/arreflect/reflect_infer_test.go b/arrow/arreflect/reflect_infer_test.go index d5449b42..614caa3f 100644 --- a/arrow/arreflect/reflect_infer_test.go +++ b/arrow/arreflect/reflect_infer_test.go @@ -17,7 +17,6 @@ package arreflect import ( - "errors" "reflect" "strings" "testing" @@ -27,6 +26,8 @@ import ( "github.com/apache/arrow-go/v18/arrow/decimal" "github.com/apache/arrow-go/v18/arrow/decimal128" "github.com/apache/arrow-go/v18/arrow/decimal256" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestInferPrimitiveArrowType(t *testing.T) { @@ -65,17 +66,11 @@ func TestInferPrimitiveArrowType(t *testing.T) { t.Run(tc.name, func(t *testing.T) { got, err := inferPrimitiveArrowType(tc.goType) if tc.wantErr { - if err == nil { - t.Fatalf("expected error, got nil (type: %v)", got) - } + require.Error(t, err, "expected error, got nil (type: %v)", got) return } - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if got.ID() != tc.wantID { - t.Errorf("got ID %v, want %v", got.ID(), tc.wantID) - } + require.NoError(t, err) + assert.Equal(t, tc.wantID, got.ID()) }) } } @@ -83,36 +78,22 @@ func TestInferPrimitiveArrowType(t *testing.T) { func TestInferArrowType(t *testing.T) { t.Run("[]int32 is LIST", func(t *testing.T) { dt, err := inferArrowType(reflect.TypeOf([]int32{})) - if err != nil { - t.Fatal(err) - } - if dt.ID() != arrow.LIST { - t.Errorf("got %v, want LIST", dt.ID()) - } + require.NoError(t, err) + assert.Equal(t, arrow.LIST, dt.ID()) }) t.Run("[3]float64 is FIXED_SIZE_LIST size 3", func(t *testing.T) { dt, err := inferArrowType(reflect.TypeOf([3]float64{})) - if err != nil { - t.Fatal(err) - } - if dt.ID() != arrow.FIXED_SIZE_LIST { - t.Errorf("got %v, want FIXED_SIZE_LIST", dt.ID()) - } + require.NoError(t, err) + assert.Equal(t, arrow.FIXED_SIZE_LIST, dt.ID()) fsl := dt.(*arrow.FixedSizeListType) - if fsl.Len() != 3 { - t.Errorf("got size %d, want 3", fsl.Len()) - } + assert.Equal(t, int32(3), fsl.Len()) }) t.Run("map[string]int64 is MAP", func(t *testing.T) { dt, err := inferArrowType(reflect.TypeOf(map[string]int64{})) - if err != nil { - t.Fatal(err) - } - if dt.ID() != arrow.MAP { - t.Errorf("got %v, want MAP", dt.ID()) - } + require.NoError(t, err) + assert.Equal(t, arrow.MAP, dt.ID()) }) t.Run("struct with 2 fields is STRUCT", func(t *testing.T) { @@ -121,16 +102,10 @@ func TestInferArrowType(t *testing.T) { Age int32 } dt, err := inferArrowType(reflect.TypeOf(S{})) - if err != nil { - t.Fatal(err) - } - if dt.ID() != arrow.STRUCT { - t.Errorf("got %v, want STRUCT", dt.ID()) - } + require.NoError(t, err) + assert.Equal(t, arrow.STRUCT, dt.ID()) st := dt.(*arrow.StructType) - if st.NumFields() != 2 { - t.Errorf("got %d fields, want 2", st.NumFields()) - } + assert.Equal(t, 2, st.NumFields()) }) t.Run("[]map[string]struct{Score float64} nested", func(t *testing.T) { @@ -138,30 +113,18 @@ func TestInferArrowType(t *testing.T) { Score float64 } dt, err := inferArrowType(reflect.TypeOf([]map[string]Inner{})) - if err != nil { - t.Fatal(err) - } - if dt.ID() != arrow.LIST { - t.Errorf("got %v, want LIST", dt.ID()) - } + require.NoError(t, err) + assert.Equal(t, arrow.LIST, dt.ID()) lt := dt.(*arrow.ListType) - if lt.Elem().ID() != arrow.MAP { - t.Errorf("list elem got %v, want MAP", lt.Elem().ID()) - } + assert.Equal(t, arrow.MAP, lt.Elem().ID()) mt := lt.Elem().(*arrow.MapType) - if mt.ValueType().ID() != arrow.STRUCT { - t.Errorf("map value got %v, want STRUCT", mt.ValueType().ID()) - } + assert.Equal(t, arrow.STRUCT, mt.ValueType().ID()) }) t.Run("*[]string pointer to slice is LIST", func(t *testing.T) { dt, err := inferArrowType(reflect.TypeOf((*[]string)(nil))) - if err != nil { - t.Fatal(err) - } - if dt.ID() != arrow.LIST { - t.Errorf("got %v, want LIST", dt.ID()) - } + require.NoError(t, err) + assert.Equal(t, arrow.LIST, dt.ID()) }) } @@ -172,18 +135,12 @@ func TestInferStructType(t *testing.T) { Score float32 } st, err := inferStructType(reflect.TypeOf(S{})) - if err != nil { - t.Fatal(err) - } - if st.NumFields() != 2 { - t.Fatalf("got %d fields, want 2", st.NumFields()) - } - if st.Field(0).Name != "Name" || st.Field(0).Type.ID() != arrow.STRING { - t.Errorf("field 0: got %v/%v, want Name/STRING", st.Field(0).Name, st.Field(0).Type.ID()) - } - if st.Field(1).Name != "Score" || st.Field(1).Type.ID() != arrow.FLOAT32 { - t.Errorf("field 1: got %v/%v, want Score/FLOAT32", st.Field(1).Name, st.Field(1).Type.ID()) - } + require.NoError(t, err) + require.Equal(t, 2, st.NumFields()) + assert.Equal(t, "Name", st.Field(0).Name) + assert.Equal(t, arrow.STRING, st.Field(0).Type.ID()) + assert.Equal(t, "Score", st.Field(1).Name) + assert.Equal(t, arrow.FLOAT32, st.Field(1).Type.ID()) }) t.Run("pointer fields are nullable", func(t *testing.T) { @@ -192,15 +149,9 @@ func TestInferStructType(t *testing.T) { Label *string } st, err := inferStructType(reflect.TypeOf(S{})) - if err != nil { - t.Fatal(err) - } - if st.Field(0).Nullable { - t.Errorf("ID should not be nullable") - } - if !st.Field(1).Nullable { - t.Errorf("Label should be nullable") - } + require.NoError(t, err) + assert.False(t, st.Field(0).Nullable, "ID should not be nullable") + assert.True(t, st.Field(1).Nullable, "Label should be nullable") }) t.Run("arrow:\"-\" tagged field is excluded", func(t *testing.T) { @@ -209,15 +160,9 @@ func TestInferStructType(t *testing.T) { Hidden int32 `arrow:"-"` } st, err := inferStructType(reflect.TypeOf(S{})) - if err != nil { - t.Fatal(err) - } - if st.NumFields() != 1 { - t.Errorf("got %d fields, want 1", st.NumFields()) - } - if st.Field(0).Name != "Keep" { - t.Errorf("got field name %q, want Keep", st.Field(0).Name) - } + require.NoError(t, err) + assert.Equal(t, 1, st.NumFields()) + assert.Equal(t, "Keep", st.Field(0).Name) }) t.Run("arrow custom name tag", func(t *testing.T) { @@ -225,12 +170,8 @@ func TestInferStructType(t *testing.T) { GoName int64 `arrow:"custom_name"` } st, err := inferStructType(reflect.TypeOf(S{})) - if err != nil { - t.Fatal(err) - } - if st.Field(0).Name != "custom_name" { - t.Errorf("got %q, want custom_name", st.Field(0).Name) - } + require.NoError(t, err) + assert.Equal(t, "custom_name", st.Field(0).Name) }) t.Run("decimal128 with precision/scale tag", func(t *testing.T) { @@ -238,17 +179,12 @@ func TestInferStructType(t *testing.T) { Amount decimal128.Num `arrow:",decimal(18,2)"` } st, err := inferStructType(reflect.TypeOf(S{})) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) dt := st.Field(0).Type - if dt.ID() != arrow.DECIMAL128 { - t.Fatalf("got %v, want DECIMAL128", dt.ID()) - } + require.Equal(t, arrow.DECIMAL128, dt.ID()) d128 := dt.(*arrow.Decimal128Type) - if d128.Precision != 18 || d128.Scale != 2 { - t.Errorf("got precision=%d scale=%d, want 18,2", d128.Precision, d128.Scale) - } + assert.Equal(t, int32(18), d128.Precision) + assert.Equal(t, int32(2), d128.Scale) }) t.Run("decimal256 with precision/scale tag", func(t *testing.T) { @@ -256,17 +192,12 @@ func TestInferStructType(t *testing.T) { Amount decimal256.Num `arrow:",decimal(40,5)"` } st, err := inferStructType(reflect.TypeOf(S{})) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) dt := st.Field(0).Type - if dt.ID() != arrow.DECIMAL256 { - t.Fatalf("got %v, want DECIMAL256", dt.ID()) - } + require.Equal(t, arrow.DECIMAL256, dt.ID()) d256 := dt.(*arrow.Decimal256Type) - if d256.Precision != 40 || d256.Scale != 5 { - t.Errorf("got precision=%d scale=%d, want 40,5", d256.Precision, d256.Scale) - } + assert.Equal(t, int32(40), d256.Precision) + assert.Equal(t, int32(5), d256.Scale) }) t.Run("decimal32 with precision/scale tag", func(t *testing.T) { @@ -274,24 +205,17 @@ func TestInferStructType(t *testing.T) { Amount decimal.Decimal32 `arrow:",decimal(9,2)"` } st, err := inferStructType(reflect.TypeOf(S{})) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) dt := st.Field(0).Type - if dt.ID() != arrow.DECIMAL32 { - t.Fatalf("got %v, want DECIMAL32", dt.ID()) - } + require.Equal(t, arrow.DECIMAL32, dt.ID()) d32 := dt.(*arrow.Decimal32Type) - if d32.Precision != 9 || d32.Scale != 2 { - t.Errorf("got precision=%d scale=%d, want 9,2", d32.Precision, d32.Scale) - } + assert.Equal(t, int32(9), d32.Precision) + assert.Equal(t, int32(2), d32.Scale) }) t.Run("non-struct returns error", func(t *testing.T) { _, err := inferStructType(reflect.TypeOf(42)) - if err == nil { - t.Error("expected error for non-struct, got nil") - } + assert.Error(t, err, "expected error for non-struct, got nil") }) t.Run("time.Time with date32 tag maps to DATE32", func(t *testing.T) { @@ -299,13 +223,9 @@ func TestInferStructType(t *testing.T) { Ts time.Time `arrow:",date32"` } st, err := inferStructType(reflect.TypeOf(S{})) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) dt := st.Field(0).Type - if dt.ID() != arrow.DATE32 { - t.Errorf("got %v, want DATE32", dt.ID()) - } + assert.Equal(t, arrow.DATE32, dt.ID()) }) } @@ -317,21 +237,14 @@ func TestInferArrowSchema(t *testing.T) { Score float64 } schema, err := InferSchema[S]() - if err != nil { - t.Fatal(err) - } - if schema.NumFields() != 3 { - t.Fatalf("got %d fields, want 3", schema.NumFields()) - } - if schema.Field(0).Name != "Name" || schema.Field(0).Type.ID() != arrow.STRING { - t.Errorf("field 0: got %v/%v, want Name/STRING", schema.Field(0).Name, schema.Field(0).Type.ID()) - } - if schema.Field(1).Name != "Age" || schema.Field(1).Type.ID() != arrow.INT32 { - t.Errorf("field 1: got %v/%v, want Age/INT32", schema.Field(1).Name, schema.Field(1).Type.ID()) - } - if schema.Field(2).Name != "Score" || schema.Field(2).Type.ID() != arrow.FLOAT64 { - t.Errorf("field 2: got %v/%v, want Score/FLOAT64", schema.Field(2).Name, schema.Field(2).Type.ID()) - } + require.NoError(t, err) + require.Equal(t, 3, schema.NumFields()) + assert.Equal(t, "Name", schema.Field(0).Name) + assert.Equal(t, arrow.STRING, schema.Field(0).Type.ID()) + assert.Equal(t, "Age", schema.Field(1).Name) + assert.Equal(t, arrow.INT32, schema.Field(1).Type.ID()) + assert.Equal(t, "Score", schema.Field(2).Name) + assert.Equal(t, arrow.FLOAT64, schema.Field(2).Type.ID()) }) t.Run("pointer fields are nullable", func(t *testing.T) { @@ -340,15 +253,9 @@ func TestInferArrowSchema(t *testing.T) { Label *string } schema, err := InferSchema[S]() - if err != nil { - t.Fatal(err) - } - if schema.Field(0).Nullable { - t.Errorf("ID should not be nullable") - } - if !schema.Field(1).Nullable { - t.Errorf("Label should be nullable") - } + require.NoError(t, err) + assert.False(t, schema.Field(0).Nullable, "ID should not be nullable") + assert.True(t, schema.Field(1).Nullable, "Label should be nullable") }) t.Run("arrow:\"-\" tag excludes field", func(t *testing.T) { @@ -357,15 +264,9 @@ func TestInferArrowSchema(t *testing.T) { Hidden int32 `arrow:"-"` } schema, err := InferSchema[S]() - if err != nil { - t.Fatal(err) - } - if schema.NumFields() != 1 { - t.Errorf("got %d fields, want 1", schema.NumFields()) - } - if schema.Field(0).Name != "Keep" { - t.Errorf("got field name %q, want Keep", schema.Field(0).Name) - } + require.NoError(t, err) + assert.Equal(t, 1, schema.NumFields()) + assert.Equal(t, "Keep", schema.Field(0).Name) }) t.Run("arrow custom name tag", func(t *testing.T) { @@ -373,62 +274,40 @@ func TestInferArrowSchema(t *testing.T) { GoName int64 `arrow:"custom_name"` } schema, err := InferSchema[S]() - if err != nil { - t.Fatal(err) - } - if schema.Field(0).Name != "custom_name" { - t.Errorf("got %q, want custom_name", schema.Field(0).Name) - } + require.NoError(t, err) + assert.Equal(t, "custom_name", schema.Field(0).Name) }) t.Run("non-struct type returns error", func(t *testing.T) { _, err := InferSchema[int]() - if err == nil { - t.Error("expected error for non-struct, got nil") - } + assert.Error(t, err, "expected error for non-struct, got nil") }) } func TestInferArrowTypePublic(t *testing.T) { t.Run("int32 is INT32", func(t *testing.T) { dt, err := InferType[int32]() - if err != nil { - t.Fatal(err) - } - if dt.ID() != arrow.INT32 { - t.Errorf("got %v, want INT32", dt.ID()) - } + require.NoError(t, err) + assert.Equal(t, arrow.INT32, dt.ID()) }) t.Run("[]string is LIST", func(t *testing.T) { dt, err := InferType[[]string]() - if err != nil { - t.Fatal(err) - } - if dt.ID() != arrow.LIST { - t.Errorf("got %v, want LIST", dt.ID()) - } + require.NoError(t, err) + assert.Equal(t, arrow.LIST, dt.ID()) }) t.Run("map[string]float64 is MAP", func(t *testing.T) { dt, err := InferType[map[string]float64]() - if err != nil { - t.Fatal(err) - } - if dt.ID() != arrow.MAP { - t.Errorf("got %v, want MAP", dt.ID()) - } + require.NoError(t, err) + assert.Equal(t, arrow.MAP, dt.ID()) }) t.Run("struct{X int32} is STRUCT", func(t *testing.T) { type S struct{ X int32 } dt, err := InferType[S]() - if err != nil { - t.Fatal(err) - } - if dt.ID() != arrow.STRUCT { - t.Errorf("got %v, want STRUCT", dt.ID()) - } + require.NoError(t, err) + assert.Equal(t, arrow.STRUCT, dt.ID()) }) } @@ -438,16 +317,10 @@ func TestInferArrowSchemaStructFieldEncoding(t *testing.T) { Name string `arrow:"name,dict"` } schema, err := InferSchema[S]() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } + require.NoError(t, err) f, ok := schema.FieldsByName("name") - if !ok || len(f) == 0 { - t.Fatal("field 'name' not found in schema") - } - if f[0].Type.ID() != arrow.DICTIONARY { - t.Errorf("got %v, want DICTIONARY", f[0].Type.ID()) - } + require.True(t, ok && len(f) > 0, "field 'name' not found in schema") + assert.Equal(t, arrow.DICTIONARY, f[0].Type.ID()) }) t.Run("listview-tagged []string field becomes LIST_VIEW", func(t *testing.T) { @@ -455,16 +328,10 @@ func TestInferArrowSchemaStructFieldEncoding(t *testing.T) { Tags []string `arrow:"tags,listview"` } schema, err := InferSchema[S]() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } + require.NoError(t, err) f, ok := schema.FieldsByName("tags") - if !ok || len(f) == 0 { - t.Fatal("field 'tags' not found in schema") - } - if f[0].Type.ID() != arrow.LIST_VIEW { - t.Errorf("got %v, want LIST_VIEW", f[0].Type.ID()) - } + require.True(t, ok && len(f) > 0, "field 'tags' not found in schema") + assert.Equal(t, arrow.LIST_VIEW, f[0].Type.ID()) }) t.Run("ree-tagged field on struct is unsupported", func(t *testing.T) { @@ -472,12 +339,8 @@ func TestInferArrowSchemaStructFieldEncoding(t *testing.T) { Val string `arrow:"val,ree"` } _, err := InferSchema[REERow]() - if err == nil { - t.Fatal("expected error for ree tag on struct field, got nil") - } - if !strings.Contains(err.Error(), "ree tag on struct field") { - t.Errorf("unexpected error message: %v", err) - } + require.Error(t, err, "expected error for ree tag on struct field, got nil") + assert.True(t, strings.Contains(err.Error(), "ree tag on struct field"), "unexpected error message: %v", err) }) } @@ -496,12 +359,8 @@ func TestInferGoType(t *testing.T) { } for _, tt := range primitives { got, err := InferGoType(tt.dt) - if err != nil { - t.Errorf("InferGoType(%v): %v", tt.dt, err) - continue - } - if got != tt.want { - t.Errorf("InferGoType(%v) = %v, want %v", tt.dt, got, tt.want) + if assert.NoError(t, err, "InferGoType(%v)", tt.dt) { + assert.Equal(t, tt.want, got, "InferGoType(%v)", tt.dt) } } @@ -510,49 +369,23 @@ func TestInferGoType(t *testing.T) { arrow.Field{Name: "name", Type: arrow.BinaryTypes.String, Nullable: true}, ) structType, err := InferGoType(st) - if err != nil { - t.Fatalf("struct: %v", err) - } - if structType.Kind() != reflect.Struct { - t.Fatalf("want struct, got %v", structType.Kind()) - } - if structType.NumField() != 2 { - t.Fatalf("want 2 fields, got %d", structType.NumField()) - } - if structType.Field(1).Type.Kind() != reflect.Ptr { - t.Errorf("nullable field should be pointer") - } - if structType.Field(1).Type.Elem().Kind() != reflect.String { - t.Errorf("nullable field should be *string") - } + require.NoError(t, err, "struct") + require.Equal(t, reflect.Struct, structType.Kind()) + require.Equal(t, 2, structType.NumField()) + assert.Equal(t, reflect.Ptr, structType.Field(1).Type.Kind(), "nullable field should be pointer") + assert.Equal(t, reflect.String, structType.Field(1).Type.Elem().Kind(), "nullable field should be *string") listType, err := InferGoType(arrow.ListOf(arrow.PrimitiveTypes.Int32)) - if err != nil { - t.Fatalf("list: %v", err) - } - if listType.Kind() != reflect.Slice { - t.Fatalf("want slice, got %v", listType.Kind()) - } - if listType.Elem() != reflect.TypeOf(int32(0)) { - t.Errorf("list elem wrong") - } + require.NoError(t, err, "list") + require.Equal(t, reflect.Slice, listType.Kind()) + assert.Equal(t, reflect.TypeOf(int32(0)), listType.Elem(), "list elem wrong") fslType, err := InferGoType(arrow.FixedSizeListOf(3, arrow.PrimitiveTypes.Float32)) - if err != nil { - t.Fatalf("fsl: %v", err) - } - if fslType.Kind() != reflect.Array { - t.Fatalf("want array, got %v", fslType.Kind()) - } - if fslType.Len() != 3 { - t.Errorf("array len want 3, got %d", fslType.Len()) - } + require.NoError(t, err, "fsl") + require.Equal(t, reflect.Array, fslType.Kind()) + assert.Equal(t, 3, fslType.Len(), "array len want 3") _, err = InferGoType(arrow.Null) - if err == nil { - t.Error("expected error for unsupported type") - } - if !errors.Is(err, ErrUnsupportedType) { - t.Errorf("want ErrUnsupportedType, got %v", err) - } + require.Error(t, err, "expected error for unsupported type") + assert.ErrorIs(t, err, ErrUnsupportedType) } diff --git a/arrow/arreflect/reflect_integration_test.go b/arrow/arreflect/reflect_integration_test.go index 4838d14c..002b811c 100644 --- a/arrow/arreflect/reflect_integration_test.go +++ b/arrow/arreflect/reflect_integration_test.go @@ -17,10 +17,11 @@ package arreflect import ( - "reflect" "testing" "github.com/apache/arrow-go/v18/arrow" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) type integOrderItem struct { @@ -101,39 +102,23 @@ func TestReflectIntegration(t *testing.T) { } arr, err := FromSlice(orders, mem) - if err != nil { - t.Fatalf("FromSlice: %v", err) - } + require.NoError(t, err, "FromSlice") defer arr.Release() output, err := ToSlice[integOrder](arr) - if err != nil { - t.Fatalf("ToSlice: %v", err) - } + require.NoError(t, err, "ToSlice") - if len(output) != len(orders) { - t.Fatalf("length mismatch: got %d, want %d", len(output), len(orders)) - } + require.Len(t, output, len(orders)) for i, want := range orders { got := output[i] - if got.ID != want.ID { - t.Errorf("[%d] ID: got %d, want %d", i, got.ID, want.ID) - } - if len(got.Items) != len(want.Items) { - t.Errorf("[%d] Items length: got %d, want %d", i, len(got.Items), len(want.Items)) - continue - } - for j, wantItem := range want.Items { - gotItem := got.Items[j] - if gotItem.Product != wantItem.Product { - t.Errorf("[%d][%d] Product: got %q, want %q", i, j, gotItem.Product, wantItem.Product) - } - if !reflect.DeepEqual(gotItem.Ratings, wantItem.Ratings) { - t.Errorf("[%d][%d] Ratings: got %v, want %v", i, j, gotItem.Ratings, wantItem.Ratings) - } - if !reflect.DeepEqual(gotItem.Tags, wantItem.Tags) { - t.Errorf("[%d][%d] Tags: got %v, want %v", i, j, gotItem.Tags, wantItem.Tags) + assert.Equal(t, want.ID, got.ID, "[%d] ID", i) + if assert.Len(t, got.Items, len(want.Items), "[%d] Items length", i) { + for j, wantItem := range want.Items { + gotItem := got.Items[j] + assert.Equal(t, wantItem.Product, gotItem.Product, "[%d][%d] Product", i, j) + assert.Equal(t, wantItem.Ratings, gotItem.Ratings, "[%d][%d] Ratings", i, j) + assert.Equal(t, wantItem.Tags, gotItem.Tags, "[%d][%d] Tags", i, j) } } } @@ -147,27 +132,18 @@ func TestReflectIntegration(t *testing.T) { } arr, err := FromSlice(rows, mem) - if err != nil { - t.Fatalf("FromSlice: %v", err) - } + require.NoError(t, err, "FromSlice") defer arr.Release() - if arr.Len() != n { - t.Fatalf("array length: got %d, want %d", arr.Len(), n) - } + require.Equal(t, n, arr.Len()) output, err := ToSlice[integLargeRow](arr) - if err != nil { - t.Fatalf("ToSlice: %v", err) - } + require.NoError(t, err, "ToSlice") - if len(output) != n { - t.Fatalf("output length: got %d, want %d", len(output), n) - } + require.Len(t, output, n) for i, want := range rows { - if output[i].X != want.X || output[i].Y != want.Y { - t.Errorf("[%d] got %+v, want %+v", i, output[i], want) - } + assert.Equal(t, want.X, output[i].X, "[%d] X", i) + assert.Equal(t, want.Y, output[i].Y, "[%d] Y", i) } }) @@ -179,69 +155,41 @@ func TestReflectIntegration(t *testing.T) { } arr, err := FromSlice(rows, mem) - if err != nil { - t.Fatalf("FromSlice: %v", err) - } + require.NoError(t, err, "FromSlice") defer arr.Release() output, err := ToSlice[integNullable](arr) - if err != nil { - t.Fatalf("ToSlice: %v", err) - } + require.NoError(t, err, "ToSlice") - if len(output) != 3 { - t.Fatalf("length: got %d, want 3", len(output)) - } + require.Len(t, output, 3) for i, got := range output { - if got.A != nil { - t.Errorf("[%d] A: expected nil, got non-nil", i) - } - if got.B != nil { - t.Errorf("[%d] B: expected nil, got non-nil", i) - } - if got.C != nil { - t.Errorf("[%d] C: expected nil, got non-nil", i) - } + assert.Nil(t, got.A, "[%d] A: expected nil", i) + assert.Nil(t, got.B, "[%d] B: expected nil", i) + assert.Nil(t, got.C, "[%d] C: expected nil", i) } }) t.Run("empty int32 slice", func(t *testing.T) { arr, err := FromSlice[int32]([]int32{}, mem) - if err != nil { - t.Fatalf("FromSlice: %v", err) - } + require.NoError(t, err, "FromSlice") defer arr.Release() - if arr.Len() != 0 { - t.Errorf("array length: got %d, want 0", arr.Len()) - } + assert.Equal(t, 0, arr.Len()) output, err := ToSlice[int32](arr) - if err != nil { - t.Fatalf("ToSlice: %v", err) - } - if output == nil { - t.Error("ToSlice returned nil, want non-nil empty slice") - } - if len(output) != 0 { - t.Errorf("output length: got %d, want 0", len(output)) - } + require.NoError(t, err, "ToSlice") + assert.NotNil(t, output, "ToSlice returned nil, want non-nil empty slice") + assert.Len(t, output, 0) }) t.Run("empty struct slice", func(t *testing.T) { type simpleXY struct{ X int32 } arr, err := FromSlice[simpleXY]([]simpleXY{}, mem) - if err != nil { - t.Fatalf("FromSlice empty struct: %v", err) - } + require.NoError(t, err, "FromSlice empty struct") defer arr.Release() - if arr.Len() != 0 { - t.Errorf("array length: got %d, want 0", arr.Len()) - } - if arr.DataType().ID() != arrow.STRUCT { - t.Errorf("expected STRUCT type for empty struct slice, got %v", arr.DataType()) - } + assert.Equal(t, 0, arr.Len()) + assert.Equal(t, arrow.STRUCT, arr.DataType().ID()) }) t.Run("mixed nullability round-trip", func(t *testing.T) { @@ -258,37 +206,27 @@ func TestReflectIntegration(t *testing.T) { } arr, err := FromSlice(rows, mem) - if err != nil { - t.Fatalf("FromSlice: %v", err) - } + require.NoError(t, err, "FromSlice") defer arr.Release() output, err := ToSlice[integMixed](arr) - if err != nil { - t.Fatalf("ToSlice: %v", err) - } + require.NoError(t, err, "ToSlice") - if len(output) != len(rows) { - t.Fatalf("length: got %d, want %d", len(output), len(rows)) - } + require.Len(t, output, len(rows)) for i, want := range rows { got := output[i] - if got.Required != want.Required { - t.Errorf("[%d] Required: got %q, want %q", i, got.Required, want.Required) - } - if got.Count != want.Count { - t.Errorf("[%d] Count: got %d, want %d", i, got.Count, want.Count) - } - if (got.Optional == nil) != (want.Optional == nil) { - t.Errorf("[%d] Optional nil mismatch: got nil=%v, want nil=%v", i, got.Optional == nil, want.Optional == nil) - } else if got.Optional != nil && *got.Optional != *want.Optional { - t.Errorf("[%d] Optional value: got %q, want %q", i, *got.Optional, *want.Optional) + assert.Equal(t, want.Required, got.Required, "[%d] Required", i) + assert.Equal(t, want.Count, got.Count, "[%d] Count", i) + if assert.Equal(t, want.Optional == nil, got.Optional == nil, "[%d] Optional nil mismatch", i) { + if got.Optional != nil { + assert.Equal(t, *want.Optional, *got.Optional, "[%d] Optional value", i) + } } - if (got.MaybeCount == nil) != (want.MaybeCount == nil) { - t.Errorf("[%d] MaybeCount nil mismatch: got nil=%v, want nil=%v", i, got.MaybeCount == nil, want.MaybeCount == nil) - } else if got.MaybeCount != nil && *got.MaybeCount != *want.MaybeCount { - t.Errorf("[%d] MaybeCount value: got %d, want %d", i, *got.MaybeCount, *want.MaybeCount) + if assert.Equal(t, want.MaybeCount == nil, got.MaybeCount == nil, "[%d] MaybeCount nil mismatch", i) { + if got.MaybeCount != nil { + assert.Equal(t, *want.MaybeCount, *got.MaybeCount, "[%d] MaybeCount value", i) + } } } }) @@ -301,15 +239,11 @@ func TestReflectIntegration(t *testing.T) { } arr, err := FromSlice(rows, mem) - if err != nil { - t.Fatalf("FromSlice: %v", err) - } + require.NoError(t, err, "FromSlice") defer arr.Release() st, ok := arr.DataType().(*arrow.StructType) - if !ok { - t.Fatalf("expected StructType, got %T", arr.DataType()) - } + require.True(t, ok, "expected StructType, got %T", arr.DataType()) var hasID, hasName, hasSkip bool for i := 0; i < st.NumFields(); i++ { @@ -322,35 +256,19 @@ func TestReflectIntegration(t *testing.T) { hasSkip = true } } - if !hasID { - t.Error("expected field 'ID' in schema") - } - if !hasName { - t.Error("expected field 'name' in schema") - } - if hasSkip { - t.Error("unexpected field 'Skip' in schema (should be skipped by arrow:\"-\" tag)") - } + assert.True(t, hasID, "expected field 'ID' in schema") + assert.True(t, hasName, "expected field 'name' in schema") + assert.False(t, hasSkip, "unexpected field 'Skip' in schema (should be skipped by arrow:\"-\" tag)") output, err := ToSlice[integExtended](arr) - if err != nil { - t.Fatalf("ToSlice: %v", err) - } + require.NoError(t, err, "ToSlice") - if len(output) != len(rows) { - t.Fatalf("length: got %d, want %d", len(output), len(rows)) - } + require.Len(t, output, len(rows)) for i, want := range rows { got := output[i] - if got.ID != want.ID { - t.Errorf("[%d] ID: got %d, want %d", i, got.ID, want.ID) - } - if got.Name != want.Name { - t.Errorf("[%d] Name: got %q, want %q", i, got.Name, want.Name) - } - if got.Skip != "" { - t.Errorf("[%d] Skip: expected empty string, got %q", i, got.Skip) - } + assert.Equal(t, want.ID, got.ID, "[%d] ID", i) + assert.Equal(t, want.Name, got.Name, "[%d] Name", i) + assert.Equal(t, "", got.Skip, "[%d] Skip: expected empty string", i) } }) @@ -360,31 +278,21 @@ func TestReflectIntegration(t *testing.T) { } schema, err := InferSchema[integOrder]() - if err != nil { - t.Fatalf("SchemaOf: %v", err) - } + require.NoError(t, err, "SchemaOf") arr, err := FromSlice(orders, mem) - if err != nil { - t.Fatalf("FromSlice: %v", err) - } + require.NoError(t, err, "FromSlice") defer arr.Release() st, ok := arr.DataType().(*arrow.StructType) - if !ok { - t.Fatalf("expected StructType, got %T", arr.DataType()) - } + require.True(t, ok, "expected StructType, got %T", arr.DataType()) - if st.NumFields() != schema.NumFields() { - t.Fatalf("field count mismatch: array has %d, schema has %d", st.NumFields(), schema.NumFields()) - } + require.Equal(t, schema.NumFields(), st.NumFields()) for i := 0; i < schema.NumFields(); i++ { schemaField := schema.Field(i) structField := st.Field(i) - if structField.Name != schemaField.Name { - t.Errorf("field[%d] name: array has %q, schema has %q", i, structField.Name, schemaField.Name) - } + assert.Equal(t, schemaField.Name, structField.Name, "field[%d] name", i) } }) @@ -395,9 +303,7 @@ func TestReflectIntegration(t *testing.T) { } arr1, err := FromSlice(batch1, mem) - if err != nil { - t.Fatalf("FromSlice batch1: %v", err) - } + require.NoError(t, err, "FromSlice batch1") defer arr1.Release() batch2 := make([]integLargeRow, 5) @@ -406,36 +312,22 @@ func TestReflectIntegration(t *testing.T) { } arr2, err := FromSlice(batch2, mem) - if err != nil { - t.Fatalf("FromSlice batch2: %v", err) - } + require.NoError(t, err, "FromSlice batch2") defer arr2.Release() out1, err := ToSlice[integLargeRow](arr1) - if err != nil { - t.Fatalf("ToSlice batch1: %v", err) - } + require.NoError(t, err, "ToSlice batch1") out2, err := ToSlice[integLargeRow](arr2) - if err != nil { - t.Fatalf("ToSlice batch2: %v", err) - } + require.NoError(t, err, "ToSlice batch2") - if len(out1) != len(batch1) { - t.Fatalf("batch1 length: got %d, want %d", len(out1), len(batch1)) - } - if len(out2) != len(batch2) { - t.Fatalf("batch2 length: got %d, want %d", len(out2), len(batch2)) - } + require.Len(t, out1, len(batch1)) + require.Len(t, out2, len(batch2)) for i, want := range batch1 { - if out1[i] != want { - t.Errorf("batch1[%d]: got %+v, want %+v", i, out1[i], want) - } + assert.Equal(t, want, out1[i], "batch1[%d]", i) } for i, want := range batch2 { - if out2[i] != want { - t.Errorf("batch2[%d]: got %+v, want %+v", i, out2[i], want) - } + assert.Equal(t, want, out2[i], "batch2[%d]", i) } }) @@ -449,27 +341,16 @@ func TestReflectIntegration(t *testing.T) { } rec, err := RecordFromSlice(rows, mem) - if err != nil { - t.Fatalf("RecordFromSlice: %v", err) - } + require.NoError(t, err, "RecordFromSlice") defer rec.Release() - if rec.NumRows() != int64(len(rows)) { - t.Fatalf("NumRows: got %d, want %d", rec.NumRows(), len(rows)) - } + require.Equal(t, int64(len(rows)), rec.NumRows()) output, err := RecordToSlice[integLargeRow](rec) - if err != nil { - t.Fatalf("RecordToSlice: %v", err) - } + require.NoError(t, err, "RecordToSlice") - if len(output) != len(rows) { - t.Fatalf("output length: got %d, want %d", len(output), len(rows)) - } - - if !reflect.DeepEqual(rows, output) { - t.Errorf("record round-trip mismatch:\n got: %v\n want: %v", output, rows) - } + require.Len(t, output, len(rows)) + assert.Equal(t, rows, output) }) } diff --git a/arrow/arreflect/reflect_public_test.go b/arrow/arreflect/reflect_public_test.go index 72f392c7..abb3063c 100644 --- a/arrow/arreflect/reflect_public_test.go +++ b/arrow/arreflect/reflect_public_test.go @@ -17,13 +17,14 @@ package arreflect import ( - "errors" "reflect" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/memory" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func testMem() memory.Allocator { return memory.NewGoAllocator() } @@ -39,12 +40,8 @@ func TestToGo(t *testing.T) { defer arr.Release() got, err := At[int32](arr, 0) - if err != nil { - t.Fatal(err) - } - if got != 10 { - t.Errorf("expected 10, got %d", got) - } + require.NoError(t, err) + assert.Equal(t, int32(10), got) }) t.Run("string element 1", func(t *testing.T) { @@ -55,12 +52,8 @@ func TestToGo(t *testing.T) { defer arr.Release() got, err := At[string](arr, 1) - if err != nil { - t.Fatal(err) - } - if got != "world" { - t.Errorf("expected world, got %q", got) - } + require.NoError(t, err) + assert.Equal(t, "world", got) }) t.Run("struct element 0", func(t *testing.T) { @@ -70,18 +63,13 @@ func TestToGo(t *testing.T) { } vals := []Person{{"Alice", 30}, {"Bob", 25}} arr, err := FromSlice(vals, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() got, err := At[Person](arr, 0) - if err != nil { - t.Fatal(err) - } - if got.Name != "Alice" || got.Age != 30 { - t.Errorf("expected {Alice 30}, got %+v", got) - } + require.NoError(t, err) + assert.Equal(t, "Alice", got.Name) + assert.Equal(t, int32(30), got.Age) }) t.Run("null element to *int32 is nil", func(t *testing.T) { @@ -92,12 +80,8 @@ func TestToGo(t *testing.T) { defer arr.Release() got, err := At[*int32](arr, 0) - if err != nil { - t.Fatal(err) - } - if got != nil { - t.Errorf("expected nil pointer for null, got %v", *got) - } + require.NoError(t, err) + assert.Nil(t, got) }) t.Run("null element to int32 is zero", func(t *testing.T) { @@ -108,12 +92,8 @@ func TestToGo(t *testing.T) { defer arr.Release() got, err := At[int32](arr, 0) - if err != nil { - t.Fatal(err) - } - if got != 0 { - t.Errorf("expected 0 for null, got %d", got) - } + require.NoError(t, err) + assert.Equal(t, int32(0), got) }) } @@ -128,17 +108,11 @@ func TestToGoSlice(t *testing.T) { defer arr.Release() got, err := ToSlice[int32](arr) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) want := []int32{1, 2, 3} - if len(got) != len(want) { - t.Fatalf("expected len %d, got %d", len(want), len(got)) - } + require.Len(t, got, len(want)) for i, v := range want { - if got[i] != v { - t.Errorf("index %d: expected %d, got %d", i, v, got[i]) - } + assert.Equal(t, v, got[i], "index %d", i) } }) @@ -150,17 +124,11 @@ func TestToGoSlice(t *testing.T) { defer arr.Release() got, err := ToSlice[string](arr) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) want := []string{"foo", "bar", "baz"} - if len(got) != len(want) { - t.Fatalf("expected len %d, got %d", len(want), len(got)) - } + require.Len(t, got, len(want)) for i, v := range want { - if got[i] != v { - t.Errorf("index %d: expected %q, got %q", i, v, got[i]) - } + assert.Equal(t, v, got[i], "index %d", i) } }) @@ -170,22 +138,14 @@ func TestToGoSlice(t *testing.T) { } vals := []Row{{"Alice"}, {"Bob"}, {"Charlie"}} arr, err := FromSlice(vals, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() got, err := ToSlice[Row](arr) - if err != nil { - t.Fatal(err) - } - if len(got) != len(vals) { - t.Fatalf("expected len %d, got %d", len(vals), len(got)) - } + require.NoError(t, err) + require.Len(t, got, len(vals)) for i, want := range vals { - if got[i].Name != want.Name { - t.Errorf("index %d: expected %q, got %q", i, want.Name, got[i].Name) - } + assert.Equal(t, want.Name, got[i].Name, "index %d", i) } }) @@ -196,15 +156,9 @@ func TestToGoSlice(t *testing.T) { defer arr.Release() got, err := ToSlice[int32](arr) - if err != nil { - t.Fatal(err) - } - if got == nil { - t.Error("expected non-nil empty slice, got nil") - } - if len(got) != 0 { - t.Errorf("expected len 0, got %d", len(got)) - } + require.NoError(t, err) + assert.NotNil(t, got, "expected non-nil empty slice, got nil") + assert.Len(t, got, 0) }) } @@ -213,36 +167,25 @@ func TestFromGoSlice(t *testing.T) { t.Run("[]int32", func(t *testing.T) { arr, err := FromSlice([]int32{1, 2, 3}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.Len() != 3 { - t.Fatalf("expected len 3, got %d", arr.Len()) - } + require.Equal(t, 3, arr.Len()) typed := arr.(*array.Int32) for i, want := range []int32{1, 2, 3} { - if typed.Value(i) != want { - t.Errorf("index %d: expected %d, got %d", i, want, typed.Value(i)) - } + assert.Equal(t, want, typed.Value(i), "index %d", i) } }) t.Run("[]string", func(t *testing.T) { arr, err := FromSlice([]string{"a", "b"}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.Len() != 2 { - t.Fatalf("expected len 2, got %d", arr.Len()) - } + require.Equal(t, 2, arr.Len()) typed := arr.(*array.String) - if typed.Value(0) != "a" || typed.Value(1) != "b" { - t.Errorf("expected [a b], got [%s %s]", typed.Value(0), typed.Value(1)) - } + assert.Equal(t, "a", typed.Value(0)) + assert.Equal(t, "b", typed.Value(1)) }) t.Run("[]struct{Name string; Score float64}", func(t *testing.T) { @@ -252,67 +195,44 @@ func TestFromGoSlice(t *testing.T) { } vals := []Row{{"Alice", 9.5}, {"Bob", 8.0}} arr, err := FromSlice(vals, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.Len() != 2 { - t.Fatalf("expected len 2, got %d", arr.Len()) - } + require.Equal(t, 2, arr.Len()) got, err := ToSlice[Row](arr) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) for i, want := range vals { - if got[i].Name != want.Name || got[i].Score != want.Score { - t.Errorf("index %d: expected %+v, got %+v", i, want, got[i]) - } + assert.Equal(t, want.Name, got[i].Name, "index %d Name", i) + assert.Equal(t, want.Score, got[i].Score, "index %d Score", i) } }) t.Run("[]*int32 with nil produces null", func(t *testing.T) { v := int32(42) arr, err := FromSlice([]*int32{&v, nil}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.Len() != 2 { - t.Fatalf("expected len 2, got %d", arr.Len()) - } - if arr.IsNull(1) == false { - t.Error("expected index 1 to be null") - } + require.Equal(t, 2, arr.Len()) + assert.True(t, arr.IsNull(1), "expected index 1 to be null") typed := arr.(*array.Int32) - if typed.Value(0) != 42 { - t.Errorf("expected 42 at index 0, got %d", typed.Value(0)) - } + assert.Equal(t, int32(42), typed.Value(0)) }) t.Run("empty []int32 gives length-0 array", func(t *testing.T) { arr, err := FromSlice([]int32{}, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.Len() != 0 { - t.Errorf("expected len 0, got %d", arr.Len()) - } + assert.Equal(t, 0, arr.Len()) }) t.Run("empty slice with WithListView", func(t *testing.T) { arr, err := FromSlice([][]int32{}, mem, WithListView()) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer arr.Release() - if arr.DataType().ID() != arrow.LIST_VIEW { - t.Errorf("expected LIST_VIEW, got %v", arr.DataType()) - } + assert.Equal(t, arrow.LIST_VIEW, arr.DataType().ID()) }) } @@ -350,16 +270,11 @@ func TestRecordToSlice(t *testing.T) { defer rec.Release() got, err := RecordToSlice[Row](rec) - if err != nil { - t.Fatal(err) - } - if len(got) != len(want) { - t.Fatalf("expected len %d, got %d", len(want), len(got)) - } + require.NoError(t, err) + require.Len(t, got, len(want)) for i, w := range want { - if got[i].Name != w.Name || got[i].Score != w.Score { - t.Errorf("index %d: expected %+v, got %+v", i, w, got[i]) - } + assert.Equal(t, w.Name, got[i].Name, "index %d Name", i) + assert.Equal(t, w.Score, got[i].Score, "index %d Score", i) } }) @@ -368,12 +283,8 @@ func TestRecordToSlice(t *testing.T) { defer rec.Release() got, err := RecordToSlice[Row](rec) - if err != nil { - t.Fatal(err) - } - if len(got) != 0 { - t.Errorf("expected empty slice, got len %d", len(got)) - } + require.NoError(t, err) + assert.Len(t, got, 0) }) } @@ -388,59 +299,38 @@ func TestRecordFromSlice(t *testing.T) { t.Run("struct slice produces correct schema and values", func(t *testing.T) { vals := []Row{{"Alice", 9.5}, {"Bob", 8.0}} rec, err := RecordFromSlice(vals, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer rec.Release() - if rec.NumCols() != 2 { - t.Fatalf("expected 2 cols, got %d", rec.NumCols()) - } - if rec.NumRows() != 2 { - t.Fatalf("expected 2 rows, got %d", rec.NumRows()) - } - if rec.Schema().Field(0).Name != "Name" { - t.Errorf("expected col 0 name 'Name', got %q", rec.Schema().Field(0).Name) - } - if rec.Schema().Field(1).Name != "Score" { - t.Errorf("expected col 1 name 'Score', got %q", rec.Schema().Field(1).Name) - } + require.Equal(t, int64(2), rec.NumCols()) + require.Equal(t, int64(2), rec.NumRows()) + assert.Equal(t, "Name", rec.Schema().Field(0).Name) + assert.Equal(t, "Score", rec.Schema().Field(1).Name) nameCol := rec.Column(0).(*array.String) - if nameCol.Value(0) != "Alice" || nameCol.Value(1) != "Bob" { - t.Errorf("unexpected name values: %q %q", nameCol.Value(0), nameCol.Value(1)) - } + assert.Equal(t, "Alice", nameCol.Value(0)) + assert.Equal(t, "Bob", nameCol.Value(1)) scoreCol := rec.Column(1).(*array.Float64) - if scoreCol.Value(0) != 9.5 || scoreCol.Value(1) != 8.0 { - t.Errorf("unexpected score values: %v %v", scoreCol.Value(0), scoreCol.Value(1)) - } + assert.Equal(t, 9.5, scoreCol.Value(0)) + assert.Equal(t, 8.0, scoreCol.Value(1)) }) t.Run("non-struct T returns error", func(t *testing.T) { _, err := RecordFromSlice([]int32{1, 2, 3}, mem) - if err == nil { - t.Fatal("expected error for non-struct T, got nil") - } + require.Error(t, err) }) t.Run("round-trip RecordFromSlice then RecordToSlice", func(t *testing.T) { want := []Row{{"Alice", 9.5}, {"Bob", 8.0}, {"Carol", 7.5}} rec, err := RecordFromSlice(want, mem) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer rec.Release() got, err := RecordToSlice[Row](rec) - if err != nil { - t.Fatal(err) - } - if len(got) != len(want) { - t.Fatalf("expected len %d, got %d", len(want), len(got)) - } + require.NoError(t, err) + require.Len(t, got, len(want)) for i, w := range want { - if got[i].Name != w.Name || got[i].Score != w.Score { - t.Errorf("index %d: expected %+v, got %+v", i, w, got[i]) - } + assert.Equal(t, w.Name, got[i].Name, "index %d Name", i) + assert.Equal(t, w.Score, got[i].Score, "index %d Score", i) } }) } @@ -455,20 +345,16 @@ func TestAtAny(t *testing.T) { defer arr.Release() got, err := AtAny(arr, 0) - if err != nil { - t.Fatalf("AtAny(0): %v", err) - } - if v, ok := got.(int32); !ok || v != 42 { - t.Errorf("AtAny(0) = %v (%T), want int32(42)", got, got) - } + require.NoError(t, err, "AtAny(0)") + v, ok := got.(int32) + assert.True(t, ok, "AtAny(0): expected int32 type, got %T", got) + assert.Equal(t, int32(42), v, "AtAny(0) value") got, err = AtAny(arr, 1) - if err != nil { - t.Fatalf("AtAny(1): %v", err) - } - if v, ok := got.(int32); !ok || v != 0 { - t.Errorf("AtAny(1) = %v, want int32(0)", got) - } + require.NoError(t, err, "AtAny(1)") + v, ok = got.(int32) + assert.True(t, ok, "AtAny(1): expected int32 type, got %T", got) + assert.Equal(t, int32(0), v, "AtAny(1) value") } func TestToAnySlice(t *testing.T) { @@ -481,15 +367,10 @@ func TestToAnySlice(t *testing.T) { defer arr.Release() got, err := ToAnySlice(arr) - if err != nil { - t.Fatalf("ToAnySlice: %v", err) - } - if len(got) != 2 { - t.Fatalf("len = %d, want 2", len(got)) - } - if got[0].(string) != "hello" || got[1].(string) != "world" { - t.Errorf("got %v, want [hello world]", got) - } + require.NoError(t, err, "ToAnySlice") + require.Len(t, got, 2) + assert.Equal(t, "hello", got[0].(string)) + assert.Equal(t, "world", got[1].(string)) } func TestErrSentinels(t *testing.T) { @@ -505,22 +386,14 @@ func TestErrSentinels(t *testing.T) { var got string v := reflect.ValueOf(&got).Elem() err := setValue(v, arr, 0) - if err == nil { - t.Fatal("expected error, got nil") - } - if !errors.Is(err, ErrTypeMismatch) { - t.Errorf("expected errors.Is(err, ErrTypeMismatch) = true, got false; err = %v", err) - } + require.Error(t, err) + assert.ErrorIs(t, err, ErrTypeMismatch) }) t.Run("ErrUnsupportedType via InferGoType", func(t *testing.T) { _, err := InferGoType(arrow.Null) - if err == nil { - t.Fatal("expected error, got nil") - } - if !errors.Is(err, ErrUnsupportedType) { - t.Errorf("expected errors.Is(err, ErrUnsupportedType) = true, got false; err = %v", err) - } + require.Error(t, err) + assert.ErrorIs(t, err, ErrUnsupportedType) }) t.Run("ErrTypeMismatch propagates through struct field context wrapper", func(t *testing.T) { @@ -536,12 +409,8 @@ func TestErrSentinels(t *testing.T) { Name int32 `arrow:"name"` } _, err := At[wrongType](arr, 0) - if err == nil { - t.Fatal("expected error, got nil") - } - if !errors.Is(err, ErrTypeMismatch) { - t.Errorf("ErrTypeMismatch not found through context wrapper; err = %v", err) - } + require.Error(t, err) + assert.ErrorIs(t, err, ErrTypeMismatch) }) } @@ -553,26 +422,16 @@ func TestRecordAt(t *testing.T) { } rows := []Row{{"alice", 9.5}, {"bob", 7.0}} rec, err := RecordFromSlice(rows, mem) - if err != nil { - t.Fatalf("RecordFromSlice: %v", err) - } + require.NoError(t, err, "RecordFromSlice") defer rec.Release() got, err := RecordAt[Row](rec, 0) - if err != nil { - t.Fatalf("RecordAt(0): %v", err) - } - if got != rows[0] { - t.Errorf("RecordAt(0) = %v, want %v", got, rows[0]) - } + require.NoError(t, err, "RecordAt(0)") + assert.Equal(t, rows[0], got) got, err = RecordAt[Row](rec, 1) - if err != nil { - t.Fatalf("RecordAt(1): %v", err) - } - if got != rows[1] { - t.Errorf("RecordAt(1) = %v, want %v", got, rows[1]) - } + require.NoError(t, err, "RecordAt(1)") + assert.Equal(t, rows[1], got) } func TestRecordAtAny(t *testing.T) { @@ -583,19 +442,13 @@ func TestRecordAtAny(t *testing.T) { } rows := []Row{{"alice", 9.5}, {"bob", 7.0}} rec, err := RecordFromSlice(rows, mem) - if err != nil { - t.Fatalf("RecordFromSlice: %v", err) - } + require.NoError(t, err, "RecordFromSlice") defer rec.Release() got, err := RecordAtAny(rec, 0) - if err != nil { - t.Fatalf("RecordAtAny(0): %v", err) - } + require.NoError(t, err, "RecordAtAny(0)") v := reflect.ValueOf(got) - if v.Kind() != reflect.Struct { - t.Fatalf("expected struct, got %s", v.Kind()) - } + require.Equal(t, reflect.Struct, v.Kind()) var nameField, scoreField reflect.Value for i := 0; i < v.NumField(); i++ { tag := v.Type().Field(i).Tag.Get("arrow") @@ -606,18 +459,10 @@ func TestRecordAtAny(t *testing.T) { scoreField = v.Field(i) } } - if !nameField.IsValid() { - t.Fatal("name field not found") - } - if !scoreField.IsValid() { - t.Fatal("score field not found") - } - if nameField.String() != "alice" { - t.Errorf("name = %q, want %q", nameField.String(), "alice") - } - if scoreField.Float() != 9.5 { - t.Errorf("score = %v, want 9.5", scoreField.Float()) - } + require.True(t, nameField.IsValid(), "name field not found") + require.True(t, scoreField.IsValid(), "score field not found") + assert.Equal(t, "alice", nameField.String()) + assert.Equal(t, 9.5, scoreField.Float()) } func TestRecordToAnySlice(t *testing.T) { @@ -628,32 +473,22 @@ func TestRecordToAnySlice(t *testing.T) { } rows := []Row{{"alice", 9.5}, {"bob", 7.0}} rec, err := RecordFromSlice(rows, mem) - if err != nil { - t.Fatalf("RecordFromSlice: %v", err) - } + require.NoError(t, err, "RecordFromSlice") defer rec.Release() got, err := RecordToAnySlice(rec) - if err != nil { - t.Fatalf("RecordToAnySlice: %v", err) - } - if len(got) != 2 { - t.Fatalf("len = %d, want 2", len(got)) - } + require.NoError(t, err, "RecordToAnySlice") + require.Len(t, got, 2) for i, row := range got { v := reflect.ValueOf(row) - if v.Kind() != reflect.Struct { - t.Fatalf("row %d: expected struct, got %s", i, v.Kind()) - } + require.Equal(t, reflect.Struct, v.Kind(), "row %d", i) var nameField reflect.Value for fi := 0; fi < v.NumField(); fi++ { if v.Type().Field(fi).Tag.Get("arrow") == "name" { nameField = v.Field(fi) } } - if nameField.String() != rows[i].Name { - t.Errorf("row %d name = %q, want %q", i, nameField.String(), rows[i].Name) - } + assert.Equal(t, rows[i].Name, nameField.String(), "row %d name", i) } } @@ -674,14 +509,10 @@ func TestAtAnyComposite(t *testing.T) { defer arr.Release() got, err := AtAny(arr, 0) - if err != nil { - t.Fatalf("AtAny: %v", err) - } + require.NoError(t, err, "AtAny") v := reflect.ValueOf(got) - if v.Kind() != reflect.Struct { - t.Fatalf("want struct, got %v", v.Kind()) - } + require.Equal(t, reflect.Struct, v.Kind()) vt := v.Type() var idField, nameField reflect.Value @@ -694,18 +525,10 @@ func TestAtAnyComposite(t *testing.T) { nameField = v.Field(i) } } - if !idField.IsValid() { - t.Fatal("id field not found") - } - if !nameField.IsValid() { - t.Fatal("name field not found") - } - if idField.Int() != 99 { - t.Errorf("id = %v, want 99", idField.Int()) - } - if nameField.String() != "alice" { - t.Errorf("name = %v, want alice", nameField.String()) - } + require.True(t, idField.IsValid(), "id field not found") + require.True(t, nameField.IsValid(), "name field not found") + assert.Equal(t, int64(99), idField.Int()) + assert.Equal(t, "alice", nameField.String()) }) t.Run("list", func(t *testing.T) { @@ -719,20 +542,13 @@ func TestAtAnyComposite(t *testing.T) { defer arr.Release() got, err := AtAny(arr, 0) - if err != nil { - t.Fatalf("AtAny: %v", err) - } + require.NoError(t, err, "AtAny") v := reflect.ValueOf(got) - if v.Kind() != reflect.Slice { - t.Fatalf("want slice, got %v", v.Kind()) - } - if v.Len() != 3 { - t.Fatalf("want 3 elems, got %d", v.Len()) - } - if v.Index(0).Int() != 1 || v.Index(2).Int() != 3 { - t.Errorf("list = %v, want [1 2 3]", got) - } + require.Equal(t, reflect.Slice, v.Kind()) + require.Equal(t, 3, v.Len()) + assert.Equal(t, int64(1), v.Index(0).Int()) + assert.Equal(t, int64(3), v.Index(2).Int()) }) t.Run("map", func(t *testing.T) { @@ -745,21 +561,89 @@ func TestAtAnyComposite(t *testing.T) { defer arr.Release() got, err := AtAny(arr, 0) - if err != nil { - t.Fatalf("AtAny: %v", err) - } + require.NoError(t, err, "AtAny") v := reflect.ValueOf(got) - if v.Kind() != reflect.Map { - t.Fatalf("want map, got %v", v.Kind()) - } + require.Equal(t, reflect.Map, v.Kind()) key := reflect.ValueOf("x") val := v.MapIndex(key) - if !val.IsValid() { - t.Fatal("key 'x' not found in map") + require.True(t, val.IsValid(), "key 'x' not found in map") + assert.Equal(t, int64(7), val.Int()) + }) +} + +func TestToAnySliceStructArray(t *testing.T) { + mem := testMem() + st := arrow.StructOf( + arrow.Field{Name: "id", Type: arrow.PrimitiveTypes.Int64, Nullable: false}, + arrow.Field{Name: "label", Type: arrow.BinaryTypes.String, Nullable: false}, + arrow.Field{Name: "score", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, + ) + sb := array.NewStructBuilder(mem, st) + defer sb.Release() + + sb.Append(true) + sb.FieldBuilder(0).(*array.Int64Builder).Append(1) + sb.FieldBuilder(1).(*array.StringBuilder).Append("alpha") + sb.FieldBuilder(2).(*array.Float64Builder).Append(9.5) + + sb.Append(true) + sb.FieldBuilder(0).(*array.Int64Builder).Append(2) + sb.FieldBuilder(1).(*array.StringBuilder).Append("beta") + sb.FieldBuilder(2).(*array.Float64Builder).Append(3.14) + + sb.Append(true) + sb.FieldBuilder(0).(*array.Int64Builder).Append(3) + sb.FieldBuilder(1).(*array.StringBuilder).Append("gamma") + sb.FieldBuilder(2).(*array.Float64Builder).AppendNull() + + arr := sb.NewArray() + defer arr.Release() + + got, err := ToAnySlice(arr) + require.NoError(t, err, "ToAnySlice") + require.Len(t, got, 3) + + type expected struct { + id int64 + label string + score float64 + } + want := []expected{ + {1, "alpha", 9.5}, + {2, "beta", 3.14}, + {3, "gamma", 0}, + } + + for i, row := range got { + v := reflect.ValueOf(row) + require.Equal(t, reflect.Struct, v.Kind(), "row %d", i) + require.Equal(t, 3, v.NumField(), "row %d", i) + + var id, label, score reflect.Value + for fi := 0; fi < v.NumField(); fi++ { + switch v.Type().Field(fi).Tag.Get("arrow") { + case "id": + id = v.Field(fi) + case "label": + label = v.Field(fi) + case "score": + score = v.Field(fi) + } } - if val.Int() != 7 { - t.Errorf("map[x] = %v, want 7", val.Int()) + require.True(t, id.IsValid() && label.IsValid() && score.IsValid(), "row %d: missing field(s)", i) + assert.Equal(t, want[i].id, id.Int(), "row %d id", i) + assert.Equal(t, want[i].label, label.String(), "row %d label", i) + if score.Kind() == reflect.Ptr { + if i == 2 { + assert.True(t, score.IsNil(), "row 2 score: want nil") + } else { + if assert.False(t, score.IsNil(), "row %d score: unexpected nil", i) { + assert.Equal(t, want[i].score, score.Elem().Float(), "row %d score", i) + } + } + } else { + assert.Equal(t, want[i].score, score.Float(), "row %d score", i) } - }) + } } diff --git a/arrow/arreflect/reflect_test.go b/arrow/arreflect/reflect_test.go index 87a4b807..d07976aa 100644 --- a/arrow/arreflect/reflect_test.go +++ b/arrow/arreflect/reflect_test.go @@ -19,6 +19,9 @@ package arreflect import ( "reflect" "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestParseTag(t *testing.T) { @@ -75,9 +78,7 @@ func TestParseTag(t *testing.T) { for _, tt := range tests { t.Run(tt.input, func(t *testing.T) { got := parseTag(tt.input) - if got != tt.want { - t.Errorf("parseTag(%q) = %+v, want %+v", tt.input, got, tt.want) - } + assert.Equal(t, tt.want, got, "parseTag(%q)", tt.input) }) } } @@ -89,15 +90,9 @@ func TestGetStructFields(t *testing.T) { Age int32 } fields := getStructFields(reflect.TypeOf(Simple{})) - if len(fields) != 2 { - t.Fatalf("expected 2 fields, got %d", len(fields)) - } - if fields[0].Name != "Name" { - t.Errorf("fields[0].Name = %q, want %q", fields[0].Name, "Name") - } - if fields[1].Name != "Age" { - t.Errorf("fields[1].Name = %q, want %q", fields[1].Name, "Age") - } + require.Len(t, fields, 2) + assert.Equal(t, "Name", fields[0].Name) + assert.Equal(t, "Age", fields[1].Name) }) t.Run("struct with arrow tags", func(t *testing.T) { @@ -107,15 +102,9 @@ func TestGetStructFields(t *testing.T) { Internal string `arrow:"-"` } fields := getStructFields(reflect.TypeOf(Tagged{})) - if len(fields) != 2 { - t.Fatalf("expected 2 fields, got %d: %v", len(fields), fields) - } - if fields[0].Name != "user_name" { - t.Errorf("fields[0].Name = %q, want %q", fields[0].Name, "user_name") - } - if fields[1].Name != "score" { - t.Errorf("fields[1].Name = %q, want %q", fields[1].Name, "score") - } + require.Len(t, fields, 2) + assert.Equal(t, "user_name", fields[0].Name) + assert.Equal(t, "score", fields[1].Name) }) t.Run("unexported fields skipped", func(t *testing.T) { @@ -124,12 +113,8 @@ func TestGetStructFields(t *testing.T) { unexported string //nolint:unused } fields := getStructFields(reflect.TypeOf(Mixed{})) - if len(fields) != 1 { - t.Fatalf("expected 1 field, got %d", len(fields)) - } - if fields[0].Name != "Exported" { - t.Errorf("fields[0].Name = %q, want %q", fields[0].Name, "Exported") - } + require.Len(t, fields, 1) + assert.Equal(t, "Exported", fields[0].Name) }) t.Run("pointer fields are nullable", func(t *testing.T) { @@ -138,15 +123,9 @@ func TestGetStructFields(t *testing.T) { Optional *string } fields := getStructFields(reflect.TypeOf(WithPointers{})) - if len(fields) != 2 { - t.Fatalf("expected 2 fields, got %d", len(fields)) - } - if fields[0].Nullable { - t.Errorf("Required.Nullable = true, want false") - } - if !fields[1].Nullable { - t.Errorf("Optional.Nullable = false, want true") - } + require.Len(t, fields, 2) + assert.False(t, fields[0].Nullable, "Required.Nullable = true, want false") + assert.True(t, fields[1].Nullable, "Optional.Nullable = false, want true") }) t.Run("embedded struct promotion", func(t *testing.T) { @@ -159,18 +138,14 @@ func TestGetStructFields(t *testing.T) { Inner } fields := getStructFields(reflect.TypeOf(Outer{})) - if len(fields) != 3 { - t.Fatalf("expected 3 fields, got %d: %v", len(fields), fields) - } + require.Len(t, fields, 3) names := make([]string, len(fields)) for i, f := range fields { names[i] = f.Name } wantNames := []string{"Name", "City", "Zip"} for i, want := range wantNames { - if names[i] != want { - t.Errorf("fields[%d].Name = %q, want %q", i, names[i], want) - } + assert.Equal(t, want, names[i], "fields[%d].Name", i) } }) @@ -182,9 +157,7 @@ func TestGetStructFields(t *testing.T) { B } fields := getStructFields(reflect.TypeOf(Conflicted{})) - if len(fields) != 0 { - t.Errorf("expected 0 fields due to conflict, got %d: %v", len(fields), fields) - } + assert.Len(t, fields, 0, "expected 0 fields due to conflict") }) t.Run("embedded with tag overrides promotion", func(t *testing.T) { @@ -196,12 +169,8 @@ func TestGetStructFields(t *testing.T) { Inner `arrow:"inner_struct"` } fields := getStructFields(reflect.TypeOf(HasTag{})) - if len(fields) != 1 { - t.Fatalf("expected 1 field, got %d: %v", len(fields), fields) - } - if fields[0].Name != "inner_struct" { - t.Errorf("fields[0].Name = %q, want %q", fields[0].Name, "inner_struct") - } + require.Len(t, fields, 1) + assert.Equal(t, "inner_struct", fields[0].Name) }) } @@ -214,25 +183,15 @@ func TestCachedStructFields(t *testing.T) { fields1 := cachedStructFields(reflect.TypeOf(S{})) fields2 := cachedStructFields(reflect.TypeOf(S{})) - if len(fields1) != len(fields2) { - t.Fatalf("cached call returned different lengths: %d vs %d", len(fields1), len(fields2)) - } + require.Len(t, fields2, len(fields1), "cached call returned different lengths") for i := range fields1 { - if fields1[i].Name != fields2[i].Name { - t.Errorf("fields[%d].Name mismatch: %q vs %q", i, fields1[i].Name, fields2[i].Name) - } + assert.Equal(t, fields1[i].Name, fields2[i].Name, "fields[%d].Name mismatch", i) } - if len(fields1) != 2 { - t.Fatalf("expected 2 fields, got %d", len(fields1)) - } - if fields1[0].Name != "X" { - t.Errorf("fields1[0].Name = %q, want %q", fields1[0].Name, "X") - } - if fields1[1].Name != "Y" { - t.Errorf("fields1[1].Name = %q, want %q", fields1[1].Name, "Y") - } + require.Len(t, fields1, 2) + assert.Equal(t, "X", fields1[0].Name) + assert.Equal(t, "Y", fields1[1].Name) } // ── shared test types used across reflect test files ────────────────────────── From debea625587f4c0a5b48bce85594dae8e08c180f Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 14 Apr 2026 14:22:54 -0400 Subject: [PATCH 25/82] fix(arreflect): prevent panics in InferGoType and nil []byte inconsistency - InferGoType STRUCT: detect duplicate exported field names after capitalization and return error instead of panicking in reflect.StructOf - InferGoType MAP: check keyType.Comparable() before reflect.MapOf to prevent panic on non-comparable key types (e.g. Arrow LIST keys) - appendPrimitiveValue BINARY: add nil guard matching appendValue to treat nil []byte as null consistently across top-level and struct paths --- arrow/arreflect/reflect_go_to_arrow.go | 6 +++++- arrow/arreflect/reflect_infer.go | 8 ++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index c52753e7..e9f877c5 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -136,7 +136,11 @@ func appendPrimitiveValue(b array.Builder, v reflect.Value, dt arrow.DataType) e case arrow.STRING: b.(*array.StringBuilder).Append(v.String()) case arrow.BINARY: - b.(*array.BinaryBuilder).Append(v.Bytes()) + if v.IsNil() { + b.(*array.BinaryBuilder).AppendNull() + } else { + b.(*array.BinaryBuilder).Append(v.Bytes()) + } case arrow.DURATION: d, err := asDuration(v) if err != nil { diff --git a/arrow/arreflect/reflect_infer.go b/arrow/arreflect/reflect_infer.go index b49855d5..f88be44c 100644 --- a/arrow/arreflect/reflect_infer.go +++ b/arrow/arreflect/reflect_infer.go @@ -353,6 +353,9 @@ func InferGoType(dt arrow.DataType) (reflect.Type, error) { if err != nil { return nil, err } + if !keyType.Comparable() { + return nil, fmt.Errorf("arreflect: InferGoType: MAP key type %v is not comparable in Go: %w", mt.KeyType(), ErrUnsupportedType) + } valType, err := InferGoType(mt.ItemField().Type) if err != nil { return nil, err @@ -362,6 +365,7 @@ func InferGoType(dt arrow.DataType) (reflect.Type, error) { case arrow.STRUCT: st := dt.(*arrow.StructType) fields := make([]reflect.StructField, st.NumFields()) + seen := make(map[string]string, st.NumFields()) for i := 0; i < st.NumFields(); i++ { f := st.Field(i) ft, err := InferGoType(f.Type) @@ -378,6 +382,10 @@ func InferGoType(dt arrow.DataType) (reflect.Type, error) { runes := []rune(f.Name) exportedName = string(unicode.ToUpper(runes[0])) + string(runes[1:]) } + if origName, dup := seen[exportedName]; dup { + return nil, fmt.Errorf("arreflect: InferGoType: field names %q and %q both export as %q: %w", origName, f.Name, exportedName, ErrUnsupportedType) + } + seen[exportedName] = f.Name fields[i] = reflect.StructField{ Name: exportedName, Type: ft, From 37fe93e64c5e5d6f9760545b155c1f3abe04339a Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 14 Apr 2026 14:35:48 -0400 Subject: [PATCH 26/82] fix(arreflect): address review findings from jobs 892-894 - Add LARGE_STRING/LARGE_BINARY cases to appendPrimitiveValue - Example nullable test: use tag-based field lookup instead of index - Split 12 compound assert.True patterns into individual assertions - Split compound require.True for field validity into per-field checks - Add regression tests: nil []byte null, MAP non-comparable key error, STRUCT duplicate exported name error --- arrow/arreflect/example_test.go | 12 +++- arrow/arreflect/reflect_go_to_arrow.go | 4 +- arrow/arreflect/reflect_go_to_arrow_test.go | 62 ++++++++++++++------- arrow/arreflect/reflect_infer_test.go | 19 +++++++ arrow/arreflect/reflect_public_test.go | 4 +- 5 files changed, 75 insertions(+), 26 deletions(-) diff --git a/arrow/arreflect/example_test.go b/arrow/arreflect/example_test.go index 95346f63..5d3b4de5 100644 --- a/arrow/arreflect/example_test.go +++ b/arrow/arreflect/example_test.go @@ -298,8 +298,16 @@ func ExampleToAnySlice_nullableFields() { } for _, row := range rows { v := reflect.ValueOf(row) - name := v.FieldByIndex([]int{0}).String() - scoreField := v.FieldByIndex([]int{1}) + var name string + var scoreField reflect.Value + for i := 0; i < v.NumField(); i++ { + switch v.Type().Field(i).Tag.Get("arrow") { + case "name": + name = v.Field(i).String() + case "score": + scoreField = v.Field(i) + } + } if scoreField.IsNil() { fmt.Printf("%s: \n", name) } else { diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index e9f877c5..76b93407 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -133,9 +133,9 @@ func appendPrimitiveValue(b array.Builder, v reflect.Value, dt arrow.DataType) e b.(*array.Float64Builder).Append(float64(v.Float())) case arrow.BOOL: b.(*array.BooleanBuilder).Append(v.Bool()) - case arrow.STRING: + case arrow.STRING, arrow.LARGE_STRING: b.(*array.StringBuilder).Append(v.String()) - case arrow.BINARY: + case arrow.BINARY, arrow.LARGE_BINARY: if v.IsNil() { b.(*array.BinaryBuilder).AppendNull() } else { diff --git a/arrow/arreflect/reflect_go_to_arrow_test.go b/arrow/arreflect/reflect_go_to_arrow_test.go index 709b0d5c..b8fc8865 100644 --- a/arrow/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/arreflect/reflect_go_to_arrow_test.go @@ -68,7 +68,8 @@ func TestBuildPrimitiveArray(t *testing.T) { defer arr.Release() assert.True(t, arr.IsNull(1), "expected index 1 to be null") typed := arr.(*array.Int32) - assert.True(t, typed.Value(0) == 10 && typed.Value(2) == 30, "unexpected values") + assert.Equal(t, int32(10), typed.Value(0)) + assert.Equal(t, int32(30), typed.Value(2)) }) t.Run("bool", func(t *testing.T) { @@ -78,7 +79,9 @@ func TestBuildPrimitiveArray(t *testing.T) { defer arr.Release() assert.Equal(t, arrow.BOOL, arr.DataType().ID()) typed := arr.(*array.Boolean) - assert.True(t, typed.Value(0) && !typed.Value(1) && typed.Value(2), "unexpected bool values") + assert.True(t, typed.Value(0), "expected Value(0) to be true") + assert.False(t, typed.Value(1), "expected Value(1) to be false") + assert.True(t, typed.Value(2), "expected Value(2) to be true") }) t.Run("binary", func(t *testing.T) { @@ -300,10 +303,12 @@ func TestBuildStructArray(t *testing.T) { require.Equal(t, arrow.STRUCT, arr.DataType().ID(), "expected STRUCT, got %v", arr.DataType()) typed := arr.(*array.Struct) aArr := typed.Field(0).(*array.Int32) - assert.True(t, aArr.Value(0) == 1 && aArr.Value(1) == 2, "unexpected A values") + assert.Equal(t, int32(1), aArr.Value(0)) + assert.Equal(t, int32(2), aArr.Value(1)) bArr := typed.Field(1).(*array.Struct) bxArr := bArr.Field(0).(*array.Int32) - assert.True(t, bxArr.Value(0) == 10 && bxArr.Value(1) == 20, "unexpected B.X values") + assert.Equal(t, int32(10), bxArr.Value(0)) + assert.Equal(t, int32(20), bxArr.Value(1)) }) } @@ -399,7 +404,9 @@ func TestBuildFixedSizeListArray(t *testing.T) { assert.Equal(t, int32(3), typed.DataType().(*arrow.FixedSizeListType).Len(), "expected fixed size 3") values := typed.ListValues().(*array.Int32) assert.Equal(t, 9, values.Len()) - assert.True(t, values.Value(0) == 1 && values.Value(3) == 4 && values.Value(6) == 7, "unexpected values") + assert.Equal(t, int32(1), values.Value(0)) + assert.Equal(t, int32(4), values.Value(3)) + assert.Equal(t, int32(7), values.Value(6)) }) t.Run("float64_n2", func(t *testing.T) { @@ -462,12 +469,14 @@ func TestBuildRunEndEncodedArray(t *testing.T) { assert.Equal(t, 6, ree.Len()) runEnds := ree.RunEndsArr().(*array.Int32) assert.Equal(t, 3, runEnds.Len(), "expected 3 runs, got %d", runEnds.Len()) - assert.True(t, runEnds.Value(0) == 3 && runEnds.Value(1) == 5 && runEnds.Value(2) == 6, - "unexpected run ends: %d %d %d", runEnds.Value(0), runEnds.Value(1), runEnds.Value(2)) + assert.Equal(t, int32(3), runEnds.Value(0)) + assert.Equal(t, int32(5), runEnds.Value(1)) + assert.Equal(t, int32(6), runEnds.Value(2)) values := ree.Values().(*array.Int32) assert.Equal(t, 3, values.Len(), "expected 3 values, got %d", values.Len()) - assert.True(t, values.Value(0) == 1 && values.Value(1) == 2 && values.Value(2) == 3, - "unexpected values: %d %d %d", values.Value(0), values.Value(1), values.Value(2)) + assert.Equal(t, int32(1), values.Value(0)) + assert.Equal(t, int32(2), values.Value(1)) + assert.Equal(t, int32(3), values.Value(2)) }) t.Run("string_runs", func(t *testing.T) { @@ -489,8 +498,8 @@ func TestBuildRunEndEncodedArray(t *testing.T) { ree := arr.(*array.RunEndEncoded) assert.Equal(t, 3, ree.Len()) runEnds := ree.RunEndsArr().(*array.Int32) - assert.True(t, runEnds.Len() == 1 && runEnds.Value(0) == 3, - "expected 1 run ending at 3, got %d runs, end=%d", runEnds.Len(), runEnds.Value(0)) + assert.Equal(t, 1, runEnds.Len()) + assert.Equal(t, int32(3), runEnds.Value(0)) }) t.Run("all_distinct", func(t *testing.T) { @@ -573,8 +582,9 @@ func TestBuildTemporalTaggedArray(t *testing.T) { assert.Equal(t, 2, arr.Len()) d32arr := arr.(*array.Date32) got0 := d32arr.Value(0).ToTime() - assert.True(t, got0.Year() == ref.Year() && got0.Month() == ref.Month() && got0.Day() == ref.Day(), - "date32 roundtrip: got %v, want %v", got0, ref) + assert.Equal(t, ref.Year(), got0.Year()) + assert.Equal(t, ref.Month(), got0.Month()) + assert.Equal(t, ref.Day(), got0.Day()) }) t.Run("date64", func(t *testing.T) { @@ -587,8 +597,9 @@ func TestBuildTemporalTaggedArray(t *testing.T) { assert.Equal(t, arrow.DATE64, arr.DataType().ID()) d64arr := arr.(*array.Date64) got0 := d64arr.Value(0).ToTime() - assert.True(t, got0.Year() == ref.Year() && got0.Month() == ref.Month() && got0.Day() == ref.Day(), - "date64 roundtrip: got %v, want %v", got0, ref) + assert.Equal(t, ref.Year(), got0.Year()) + assert.Equal(t, ref.Month(), got0.Month()) + assert.Equal(t, ref.Day(), got0.Day()) }) t.Run("time32", func(t *testing.T) { @@ -603,10 +614,9 @@ func TestBuildTemporalTaggedArray(t *testing.T) { t32arr := arr.(*array.Time32) unit := arr.DataType().(*arrow.Time32Type).Unit got0 := t32arr.Value(0).ToTime(unit) - assert.True(t, got0.Hour() == ref.Hour() && got0.Minute() == ref.Minute() && got0.Second() == ref.Second(), - "time32 roundtrip: got hour=%d min=%d sec=%d, want hour=%d min=%d sec=%d", - got0.Hour(), got0.Minute(), got0.Second(), - ref.Hour(), ref.Minute(), ref.Second()) + assert.Equal(t, ref.Hour(), got0.Hour()) + assert.Equal(t, ref.Minute(), got0.Minute()) + assert.Equal(t, ref.Second(), got0.Second()) refWithMs := time.Date(ref.Year(), ref.Month(), ref.Day(), ref.Hour(), ref.Minute(), ref.Second(), 500_000_000, ref.Location()) svMs := reflect.ValueOf([]time.Time{refWithMs}) arrMs, err := buildTemporalArray(svMs, tagOpts{Temporal: "time32"}, mem) @@ -629,8 +639,9 @@ func TestBuildTemporalTaggedArray(t *testing.T) { t64arr := arr.(*array.Time64) unit := arr.DataType().(*arrow.Time64Type).Unit got0 := t64arr.Value(0).ToTime(unit) - assert.True(t, got0.Hour() == ref.Hour() && got0.Minute() == ref.Minute() && got0.Second() == ref.Second(), - "time64 roundtrip: got %v, want %v", got0, ref) + assert.Equal(t, ref.Hour(), got0.Hour()) + assert.Equal(t, ref.Minute(), got0.Minute()) + assert.Equal(t, ref.Second(), got0.Second()) refWithNanos := time.Date(ref.Year(), ref.Month(), ref.Day(), ref.Hour(), ref.Minute(), ref.Second(), 123456789, ref.Location()) sv64 := reflect.ValueOf([]time.Time{refWithNanos}) arr64, err := buildTemporalArray(sv64, tagOpts{Temporal: "time64"}, mem) @@ -643,3 +654,12 @@ func TestBuildTemporalTaggedArray(t *testing.T) { "time64 nanosecond: got %d, want %d", got64.Nanosecond(), refWithNanos.Nanosecond()) }) } + +func TestNilByteSliceIsNull(t *testing.T) { + mem := memory.NewGoAllocator() + arr, err := FromSlice([][]byte{[]byte("hello"), nil}, mem) + require.NoError(t, err) + defer arr.Release() + assert.False(t, arr.IsNull(0), "non-nil byte slice should not be null") + assert.True(t, arr.IsNull(1), "nil byte slice should be null") +} diff --git a/arrow/arreflect/reflect_infer_test.go b/arrow/arreflect/reflect_infer_test.go index 614caa3f..d6eded42 100644 --- a/arrow/arreflect/reflect_infer_test.go +++ b/arrow/arreflect/reflect_infer_test.go @@ -389,3 +389,22 @@ func TestInferGoType(t *testing.T) { require.Error(t, err, "expected error for unsupported type") assert.ErrorIs(t, err, ErrUnsupportedType) } + +func TestInferGoTypeMapNonComparableKey(t *testing.T) { + t.Run("MAP with non-comparable key returns error", func(t *testing.T) { + dt := arrow.MapOf(arrow.ListOf(arrow.PrimitiveTypes.Int32), arrow.BinaryTypes.String) + _, err := InferGoType(dt) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) +} + +func TestInferGoTypeStructDuplicateExportedNames(t *testing.T) { + t.Run("STRUCT with colliding exported names returns error", func(t *testing.T) { + st := arrow.StructOf( + arrow.Field{Name: "foo", Type: arrow.PrimitiveTypes.Int32}, + arrow.Field{Name: "Foo", Type: arrow.PrimitiveTypes.Int64}, + ) + _, err := InferGoType(st) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) +} diff --git a/arrow/arreflect/reflect_public_test.go b/arrow/arreflect/reflect_public_test.go index abb3063c..ae8d2f10 100644 --- a/arrow/arreflect/reflect_public_test.go +++ b/arrow/arreflect/reflect_public_test.go @@ -631,7 +631,9 @@ func TestToAnySliceStructArray(t *testing.T) { score = v.Field(fi) } } - require.True(t, id.IsValid() && label.IsValid() && score.IsValid(), "row %d: missing field(s)", i) + require.True(t, id.IsValid(), "row %d: id field not found", i) + require.True(t, label.IsValid(), "row %d: label field not found", i) + require.True(t, score.IsValid(), "row %d: score field not found", i) assert.Equal(t, want[i].id, id.Int(), "row %d id", i) assert.Equal(t, want[i].label, label.String(), "row %d label", i) if score.Kind() == reflect.Ptr { From bf6d5a3be8748798f7c853fc93b6b144cb591ee8 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 14 Apr 2026 14:42:50 -0400 Subject: [PATCH 27/82] fix(arreflect): guard WithListView dispatch, deref ptr in empty-slice opts, drop dead LARGE_BINARY case - WithListView now returns error if element type is not slice-of-slices, preventing panic in buildListViewArray/inferListElemDT - Empty-slice path dereferences pointer goType before applyDecimalOpts and applyTemporalOpts so *decimal128.Num and *time.Time match - Remove unreachable LARGE_STRING/LARGE_BINARY from appendPrimitiveValue since inference never produces those types for top-level slices --- arrow/arreflect/reflect.go | 8 ++++++-- arrow/arreflect/reflect_go_to_arrow.go | 7 +++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/arrow/arreflect/reflect.go b/arrow/arreflect/reflect.go index 67b33151..e563bdbb 100644 --- a/arrow/arreflect/reflect.go +++ b/arrow/arreflect/reflect.go @@ -407,8 +407,12 @@ func FromSlice[T any](vals []T, mem memory.Allocator, opts ...Option) (arrow.Arr if err != nil { return nil, err } - dt = applyDecimalOpts(dt, goType, tOpts) - dt = applyTemporalOpts(dt, goType, tOpts) + derefType := goType + for derefType.Kind() == reflect.Ptr { + derefType = derefType.Elem() + } + dt = applyDecimalOpts(dt, derefType, tOpts) + dt = applyTemporalOpts(dt, derefType, tOpts) if tOpts.ListView { if lt, ok := dt.(*arrow.ListType); ok { dt = arrow.ListViewOf(lt.Elem()) diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index 76b93407..708046a9 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -46,6 +46,9 @@ func buildArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.A return buildRunEndEncodedArray(vals, mem) } if opts.ListView { + if elemType.Kind() != reflect.Slice || elemType == typeOfByteSlice { + return nil, fmt.Errorf("arreflect: WithListView requires a slice-of-slices element type, got %s: %w", elemType, ErrUnsupportedType) + } return buildListViewArray(vals, mem) } @@ -133,9 +136,9 @@ func appendPrimitiveValue(b array.Builder, v reflect.Value, dt arrow.DataType) e b.(*array.Float64Builder).Append(float64(v.Float())) case arrow.BOOL: b.(*array.BooleanBuilder).Append(v.Bool()) - case arrow.STRING, arrow.LARGE_STRING: + case arrow.STRING: b.(*array.StringBuilder).Append(v.String()) - case arrow.BINARY, arrow.LARGE_BINARY: + case arrow.BINARY: if v.IsNil() { b.(*array.BinaryBuilder).AppendNull() } else { From 40ad519e9c0de938d60820d0c85bcedc0b29c040 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 14 Apr 2026 14:49:26 -0400 Subject: [PATCH 28/82] fix(arreflect): validate InferGoType field names, remove dead REE branch, error on invalid WithListView - InferGoType STRUCT: reject field names that produce invalid Go identifiers (digit-prefixed, underscore-prefixed) with ErrUnsupportedType - appendValue REE: remove unreachable nil-pointer branch (ptr already dereferenced by loop at top of appendValue) - FromSlice empty-slice path: return error when WithListView applied to non-list type instead of silently ignoring --- arrow/arreflect/reflect.go | 6 ++++-- arrow/arreflect/reflect_go_to_arrow.go | 10 +++------- arrow/arreflect/reflect_infer.go | 6 +++++- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/arrow/arreflect/reflect.go b/arrow/arreflect/reflect.go index e563bdbb..f469363d 100644 --- a/arrow/arreflect/reflect.go +++ b/arrow/arreflect/reflect.go @@ -414,9 +414,11 @@ func FromSlice[T any](vals []T, mem memory.Allocator, opts ...Option) (arrow.Arr dt = applyDecimalOpts(dt, derefType, tOpts) dt = applyTemporalOpts(dt, derefType, tOpts) if tOpts.ListView { - if lt, ok := dt.(*arrow.ListType); ok { - dt = arrow.ListViewOf(lt.Elem()) + lt, ok := dt.(*arrow.ListType) + if !ok { + return nil, fmt.Errorf("arreflect: WithListView requires a slice-of-slices element type, got %s: %w", goType, ErrUnsupportedType) } + dt = arrow.ListViewOf(lt.Elem()) } if tOpts.REE { dt = arrow.RunEndEncodedOf(arrow.PrimitiveTypes.Int32, dt) diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index 708046a9..6b3204cb 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -614,14 +614,10 @@ func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { } } case *array.RunEndEncodedBuilder: - if v.Kind() == reflect.Ptr && v.IsNil() { - tb.AppendNull() - } else { - if err := appendValue(tb.ValueBuilder(), v, tagOpts{}); err != nil { - return err - } - tb.Append(1) + if err := appendValue(tb.ValueBuilder(), v, tagOpts{}); err != nil { + return err } + tb.Append(1) default: if db, ok := b.(array.DictionaryBuilder); ok { return appendToDictBuilder(db, v) diff --git a/arrow/arreflect/reflect_infer.go b/arrow/arreflect/reflect_infer.go index f88be44c..5beec5f4 100644 --- a/arrow/arreflect/reflect_infer.go +++ b/arrow/arreflect/reflect_infer.go @@ -380,7 +380,11 @@ func InferGoType(dt arrow.DataType) (reflect.Type, error) { exportedName = fmt.Sprintf("Field%d", i) } else { runes := []rune(f.Name) - exportedName = string(unicode.ToUpper(runes[0])) + string(runes[1:]) + runes[0] = unicode.ToUpper(runes[0]) + exportedName = string(runes) + } + if !unicode.IsLetter(rune(exportedName[0])) { + return nil, fmt.Errorf("arreflect: InferGoType: field name %q produces invalid Go identifier %q: %w", f.Name, exportedName, ErrUnsupportedType) } if origName, dup := seen[exportedName]; dup { return nil, fmt.Errorf("arreflect: InferGoType: field names %q and %q both export as %q: %w", origName, f.Name, exportedName, ErrUnsupportedType) From 2682e33e0b124ac52bc8de9a7c612b92f416d386 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 14 Apr 2026 14:56:27 -0400 Subject: [PATCH 29/82] fix(arreflect): use rune not byte for identifier check, use buildArray for empty REE values - InferGoType STRUCT: check runes[0] directly instead of casting exportedName[0] to rune, preventing misclassification of multi-byte Unicode field names - buildRunEndEncodedArray: use buildArray for empty values array instead of buildPrimitiveArray, so non-primitive element types route correctly --- arrow/arreflect/reflect_go_to_arrow.go | 2 +- arrow/arreflect/reflect_infer.go | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index 6b3204cb..f060bbbd 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -903,7 +903,7 @@ func buildRunEndEncodedArray(vals reflect.Value, mem memory.Allocator) (arrow.Ar return nil, err } defer runEndsArr.Release() - valuesArr, err := buildPrimitiveArray(reflect.MakeSlice(vals.Type(), 0, 0), mem) + valuesArr, err := buildArray(reflect.MakeSlice(vals.Type(), 0, 0), tagOpts{}, mem) if err != nil { return nil, err } diff --git a/arrow/arreflect/reflect_infer.go b/arrow/arreflect/reflect_infer.go index 5beec5f4..6261fa6c 100644 --- a/arrow/arreflect/reflect_infer.go +++ b/arrow/arreflect/reflect_infer.go @@ -381,11 +381,11 @@ func InferGoType(dt arrow.DataType) (reflect.Type, error) { } else { runes := []rune(f.Name) runes[0] = unicode.ToUpper(runes[0]) + if !unicode.IsLetter(runes[0]) { + return nil, fmt.Errorf("arreflect: InferGoType: field name %q produces invalid Go identifier: %w", f.Name, ErrUnsupportedType) + } exportedName = string(runes) } - if !unicode.IsLetter(rune(exportedName[0])) { - return nil, fmt.Errorf("arreflect: InferGoType: field name %q produces invalid Go identifier %q: %w", f.Name, exportedName, ErrUnsupportedType) - } if origName, dup := seen[exportedName]; dup { return nil, fmt.Errorf("arreflect: InferGoType: field names %q and %q both export as %q: %w", origName, f.Name, exportedName, ErrUnsupportedType) } From 7abf9d957f3ed377c3d04a8911aa060cabe73299 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 14 Apr 2026 15:07:31 -0400 Subject: [PATCH 30/82] fix(arreflect): correct REE append order, match Dict/REE priority, add coverage tests - appendValue REE: swap to Append(1) before ValueBuilder to match documented Arrow builder contract - FromSlice empty-slice: Dict takes priority over REE (matching buildArray) to prevent schema inconsistency between empty and non-empty paths - Add tests: WithTemporal invalid value error, empty-slice WithREE type --- arrow/arreflect/reflect.go | 5 ++--- arrow/arreflect/reflect_go_to_arrow.go | 2 +- arrow/arreflect/reflect_public_test.go | 14 ++++++++++++++ 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/arrow/arreflect/reflect.go b/arrow/arreflect/reflect.go index f469363d..4b7f164d 100644 --- a/arrow/arreflect/reflect.go +++ b/arrow/arreflect/reflect.go @@ -420,11 +420,10 @@ func FromSlice[T any](vals []T, mem memory.Allocator, opts ...Option) (arrow.Arr } dt = arrow.ListViewOf(lt.Elem()) } - if tOpts.REE { - dt = arrow.RunEndEncodedOf(arrow.PrimitiveTypes.Int32, dt) - } if tOpts.Dict { dt = &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: dt} + } else if tOpts.REE { + dt = arrow.RunEndEncodedOf(arrow.PrimitiveTypes.Int32, dt) } b := array.NewBuilder(mem, dt) defer b.Release() diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index f060bbbd..fe218b2b 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -614,10 +614,10 @@ func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { } } case *array.RunEndEncodedBuilder: + tb.Append(1) if err := appendValue(tb.ValueBuilder(), v, tagOpts{}); err != nil { return err } - tb.Append(1) default: if db, ok := b.(array.DictionaryBuilder); ok { return appendToDictBuilder(db, v) diff --git a/arrow/arreflect/reflect_public_test.go b/arrow/arreflect/reflect_public_test.go index ae8d2f10..c344499e 100644 --- a/arrow/arreflect/reflect_public_test.go +++ b/arrow/arreflect/reflect_public_test.go @@ -19,6 +19,7 @@ package arreflect import ( "reflect" "testing" + "time" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" @@ -234,6 +235,19 @@ func TestFromGoSlice(t *testing.T) { assert.Equal(t, arrow.LIST_VIEW, arr.DataType().ID()) }) + + t.Run("empty slice with WithREE", func(t *testing.T) { + arr, err := FromSlice([]int32{}, mem, WithREE()) + require.NoError(t, err) + defer arr.Release() + + assert.Equal(t, arrow.RUN_END_ENCODED, arr.DataType().ID()) + }) + + t.Run("WithTemporal invalid value returns error", func(t *testing.T) { + _, err := FromSlice([]time.Time{}, mem, WithTemporal("invalid")) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) } func TestRecordToSlice(t *testing.T) { From 8f712e4ff305259be7c1771151b5f5a47efcbc68 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 14 Apr 2026 15:14:45 -0400 Subject: [PATCH 31/82] fix(arreflect): guard FixedSizeList type mismatch, reject unsupported dict value types - appendValue FixedSizeListBuilder: check v.Kind is slice or array before calling Len(), returning ErrTypeMismatch instead of panic - buildDictionaryArray: early error for temporal/decimal/struct value types that appendToDictBuilder cannot handle, with clear message --- arrow/arreflect/reflect_go_to_arrow.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index fe218b2b..83ef6c4f 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -575,6 +575,9 @@ func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { case *array.ListBuilder, *array.LargeListBuilder, *array.ListViewBuilder, *array.LargeListViewBuilder: return appendListElement(b, v) case *array.FixedSizeListBuilder: + if v.Kind() != reflect.Slice && v.Kind() != reflect.Array { + return fmt.Errorf("arreflect: cannot set fixed-size list from %s: %w", v.Type(), ErrTypeMismatch) + } expectedLen := int(tb.Type().(*arrow.FixedSizeListType).Len()) if v.Len() != expectedLen { return fmt.Errorf("arreflect: fixed-size list length mismatch: got %d, want %d", v.Len(), expectedLen) @@ -873,6 +876,15 @@ func buildDictionaryArray(vals reflect.Value, mem memory.Allocator) (arrow.Array return nil, err } + switch valDT.ID() { + case arrow.INT8, arrow.INT16, arrow.INT32, arrow.INT64, + arrow.UINT8, arrow.UINT16, arrow.UINT32, arrow.UINT64, + arrow.FLOAT32, arrow.FLOAT64, + arrow.STRING, arrow.BINARY, arrow.BOOL: + default: + return nil, fmt.Errorf("arreflect: dictionary encoding not supported for %s: %w", valDT, ErrUnsupportedType) + } + dt := &arrow.DictionaryType{ IndexType: arrow.PrimitiveTypes.Int32, ValueType: valDT, From 19b6a487597375bc57d71ea8c87f5083d7d08d0e Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 14 Apr 2026 15:23:16 -0400 Subject: [PATCH 32/82] fix(arreflect): correct empty-slice ListView element type, validate Dict on empty path - FromSlice empty-slice WithListView: infer inner element type from Go type directly instead of using outer Arrow ListType, matching the non-empty buildListLikeArray path - Extract validateDictValueType helper and call it on empty-slice Dict path so unsupported types error consistently - Strengthen empty-slice WithListView test to assert element type is INT32 --- arrow/arreflect/reflect.go | 16 +++++++++++++--- arrow/arreflect/reflect_go_to_arrow.go | 21 ++++++++++++++------- arrow/arreflect/reflect_public_test.go | 1 + 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/arrow/arreflect/reflect.go b/arrow/arreflect/reflect.go index 4b7f164d..2b1af807 100644 --- a/arrow/arreflect/reflect.go +++ b/arrow/arreflect/reflect.go @@ -414,13 +414,23 @@ func FromSlice[T any](vals []T, mem memory.Allocator, opts ...Option) (arrow.Arr dt = applyDecimalOpts(dt, derefType, tOpts) dt = applyTemporalOpts(dt, derefType, tOpts) if tOpts.ListView { - lt, ok := dt.(*arrow.ListType) - if !ok { + if derefType.Kind() != reflect.Slice || derefType == typeOfByteSlice { return nil, fmt.Errorf("arreflect: WithListView requires a slice-of-slices element type, got %s: %w", goType, ErrUnsupportedType) } - dt = arrow.ListViewOf(lt.Elem()) + innerElem := derefType.Elem() + for innerElem.Kind() == reflect.Ptr { + innerElem = innerElem.Elem() + } + innerDT, err := inferArrowType(innerElem) + if err != nil { + return nil, err + } + dt = arrow.ListViewOf(innerDT) } if tOpts.Dict { + if err := validateDictValueType(dt); err != nil { + return nil, err + } dt = &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: dt} } else if tOpts.REE { dt = arrow.RunEndEncodedOf(arrow.PrimitiveTypes.Int32, dt) diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index 83ef6c4f..4f9e21c4 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -867,6 +867,18 @@ func buildFixedSizeListArray(vals reflect.Value, mem memory.Allocator) (arrow.Ar return fb.NewArray(), nil } +func validateDictValueType(dt arrow.DataType) error { + switch dt.ID() { + case arrow.INT8, arrow.INT16, arrow.INT32, arrow.INT64, + arrow.UINT8, arrow.UINT16, arrow.UINT32, arrow.UINT64, + arrow.FLOAT32, arrow.FLOAT64, + arrow.STRING, arrow.BINARY, arrow.BOOL: + return nil + default: + return fmt.Errorf("arreflect: dictionary encoding not supported for %s: %w", dt, ErrUnsupportedType) + } +} + func buildDictionaryArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { n := vals.Len() elemType, isPtr := derefSliceElem(vals) @@ -876,13 +888,8 @@ func buildDictionaryArray(vals reflect.Value, mem memory.Allocator) (arrow.Array return nil, err } - switch valDT.ID() { - case arrow.INT8, arrow.INT16, arrow.INT32, arrow.INT64, - arrow.UINT8, arrow.UINT16, arrow.UINT32, arrow.UINT64, - arrow.FLOAT32, arrow.FLOAT64, - arrow.STRING, arrow.BINARY, arrow.BOOL: - default: - return nil, fmt.Errorf("arreflect: dictionary encoding not supported for %s: %w", valDT, ErrUnsupportedType) + if err := validateDictValueType(valDT); err != nil { + return nil, err } dt := &arrow.DictionaryType{ diff --git a/arrow/arreflect/reflect_public_test.go b/arrow/arreflect/reflect_public_test.go index c344499e..d991b1ce 100644 --- a/arrow/arreflect/reflect_public_test.go +++ b/arrow/arreflect/reflect_public_test.go @@ -234,6 +234,7 @@ func TestFromGoSlice(t *testing.T) { defer arr.Release() assert.Equal(t, arrow.LIST_VIEW, arr.DataType().ID()) + assert.Equal(t, arrow.INT32, arr.DataType().(*arrow.ListViewType).Elem().ID()) }) t.Run("empty slice with WithREE", func(t *testing.T) { From 3b9d97f36fb21981c2fdcef4e36ad9bc185531a4 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 14 Apr 2026 15:30:24 -0400 Subject: [PATCH 33/82] fix(arreflect): remove BOOL from dict value whitelist, add bool dict error test arrow.NewDictionaryBuilder panics on BOOL value type; remove it from validateDictValueType allowlist and add test confirming the error. --- arrow/arreflect/reflect_go_to_arrow.go | 2 +- arrow/arreflect/reflect_go_to_arrow_test.go | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index 4f9e21c4..2b5c9fb2 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -872,7 +872,7 @@ func validateDictValueType(dt arrow.DataType) error { case arrow.INT8, arrow.INT16, arrow.INT32, arrow.INT64, arrow.UINT8, arrow.UINT16, arrow.UINT32, arrow.UINT64, arrow.FLOAT32, arrow.FLOAT64, - arrow.STRING, arrow.BINARY, arrow.BOOL: + arrow.STRING, arrow.BINARY: return nil default: return fmt.Errorf("arreflect: dictionary encoding not supported for %s: %w", dt, ErrUnsupportedType) diff --git a/arrow/arreflect/reflect_go_to_arrow_test.go b/arrow/arreflect/reflect_go_to_arrow_test.go index b8fc8865..1b764b67 100644 --- a/arrow/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/arreflect/reflect_go_to_arrow_test.go @@ -453,6 +453,11 @@ func TestBuildDictionaryArray(t *testing.T) { dt := arr.DataType().(*arrow.DictionaryType) assert.Equal(t, arrow.INT32, dt.IndexType.ID(), "expected INT32 index, got %v", dt.IndexType) }) + + t.Run("bool_dict_returns_error", func(t *testing.T) { + _, err := buildArray(reflect.ValueOf([]bool{true, false}), tagOpts{Dict: true}, mem) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) } func TestBuildRunEndEncodedArray(t *testing.T) { From 2f2bfd4b883b01783f7a035a29d918d7eda71deb Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 14 Apr 2026 15:38:20 -0400 Subject: [PATCH 34/82] fix(arreflect): remove dead REE appendValue case, add int/uint and tag fallback tests - Remove unreachable RunEndEncodedBuilder case from appendValue (REE struct fields are rejected by applyEncodingOpts before reaching it) - Add []int and []uint cases to numeric_types test table - Add parseTag test pinning silent fallback for unrecognized temporal tag (Date32 with wrong case produces empty Temporal) --- arrow/arreflect/reflect_go_to_arrow.go | 5 ----- arrow/arreflect/reflect_go_to_arrow_test.go | 2 ++ arrow/arreflect/reflect_test.go | 4 ++++ 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index 2b5c9fb2..f7758287 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -616,11 +616,6 @@ func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { return fmt.Errorf("struct field %q: %w", fm.Name, err) } } - case *array.RunEndEncodedBuilder: - tb.Append(1) - if err := appendValue(tb.ValueBuilder(), v, tagOpts{}); err != nil { - return err - } default: if db, ok := b.(array.DictionaryBuilder); ok { return appendToDictBuilder(db, v) diff --git a/arrow/arreflect/reflect_go_to_arrow_test.go b/arrow/arreflect/reflect_go_to_arrow_test.go index 1b764b67..4e785446 100644 --- a/arrow/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/arreflect/reflect_go_to_arrow_test.go @@ -106,6 +106,8 @@ func TestBuildPrimitiveArray(t *testing.T) { {[]uint64{1, 2}, arrow.UINT64}, {[]float32{1.0, 2.0}, arrow.FLOAT32}, {[]float64{1.1, 2.2}, arrow.FLOAT64}, + {[]int{1, -2, 3}, arrow.INT64}, + {[]uint{1, 2, 3}, arrow.UINT64}, } for _, tc := range cases { arr, err := buildArray(reflect.ValueOf(tc.vals), tagOpts{}, mem) diff --git a/arrow/arreflect/reflect_test.go b/arrow/arreflect/reflect_test.go index d07976aa..0df476ad 100644 --- a/arrow/arreflect/reflect_test.go +++ b/arrow/arreflect/reflect_test.go @@ -73,6 +73,10 @@ func TestParseTag(t *testing.T) { input: "name,unknown_option", want: tagOpts{Name: "name"}, }, + { + input: `field,Date32`, + want: tagOpts{Name: "field"}, + }, } for _, tt := range tests { From dc839f4784c769e42bf8b8b2a0e1ddb0089fc777 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 14 Apr 2026 16:14:48 -0400 Subject: [PATCH 35/82] fix(arreflect): validate full field name as Go identifier, guard dict tag on struct fields - InferGoType STRUCT: validate every rune of exported name (not just first) so hyphens, dots, spaces in Arrow field names return error instead of panicking in reflect.StructOf - applyEncodingOpts Dict: call validateDictValueType so dict-tagged struct fields with non-primitive types get a clear error - Add table-driven test for invalid identifier field names --- arrow/arreflect/reflect_infer.go | 13 +++++++++++-- arrow/arreflect/reflect_infer_test.go | 19 +++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/arrow/arreflect/reflect_infer.go b/arrow/arreflect/reflect_infer.go index 6261fa6c..a3ea813f 100644 --- a/arrow/arreflect/reflect_infer.go +++ b/arrow/arreflect/reflect_infer.go @@ -193,6 +193,9 @@ func applyTemporalOpts(dt arrow.DataType, origType reflect.Type, opts tagOpts) a func applyEncodingOpts(dt arrow.DataType, fm fieldMeta) (arrow.DataType, error) { switch { case fm.Opts.Dict: + if err := validateDictValueType(dt); err != nil { + return nil, fmt.Errorf("arreflect: dict tag on field %q: %w", fm.Name, err) + } return &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: dt}, nil case fm.Opts.ListView: lt, ok := dt.(*arrow.ListType) @@ -381,8 +384,14 @@ func InferGoType(dt arrow.DataType) (reflect.Type, error) { } else { runes := []rune(f.Name) runes[0] = unicode.ToUpper(runes[0]) - if !unicode.IsLetter(runes[0]) { - return nil, fmt.Errorf("arreflect: InferGoType: field name %q produces invalid Go identifier: %w", f.Name, ErrUnsupportedType) + for j, r := range runes { + if j == 0 { + if !unicode.IsLetter(r) { + return nil, fmt.Errorf("arreflect: InferGoType: field name %q produces invalid Go identifier: %w", f.Name, ErrUnsupportedType) + } + } else if !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '_' { + return nil, fmt.Errorf("arreflect: InferGoType: field name %q produces invalid Go identifier: %w", f.Name, ErrUnsupportedType) + } } exportedName = string(runes) } diff --git a/arrow/arreflect/reflect_infer_test.go b/arrow/arreflect/reflect_infer_test.go index d6eded42..736320ba 100644 --- a/arrow/arreflect/reflect_infer_test.go +++ b/arrow/arreflect/reflect_infer_test.go @@ -408,3 +408,22 @@ func TestInferGoTypeStructDuplicateExportedNames(t *testing.T) { assert.ErrorIs(t, err, ErrUnsupportedType) }) } + +func TestInferGoTypeStructInvalidIdentifier(t *testing.T) { + cases := []struct { + name string + fieldName string + }{ + {"hyphenated", "my-field"}, + {"space", "a b"}, + {"dot", "first.name"}, + {"digit prefix", "1st"}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + st := arrow.StructOf(arrow.Field{Name: tc.fieldName, Type: arrow.PrimitiveTypes.Int32}) + _, err := InferGoType(st) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) + } +} From 937e0433d431af436565e61277773255b0fd9a16 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 14 Apr 2026 16:25:15 -0400 Subject: [PATCH 36/82] fix(arreflect): thread opts through buildRunEndEncodedArray, use checked allocator - buildRunEndEncodedArray now accepts tagOpts and passes them (with REE=false) to buildArray for values, so WithTemporal/WithDecimal are applied to REE value arrays instead of being silently dropped - TestBuildTemporalTaggedArray: use memory.NewCheckedAllocator with AssertSize to detect allocation leaks --- arrow/arreflect/reflect_go_to_arrow.go | 10 ++++++---- arrow/arreflect/reflect_go_to_arrow_test.go | 3 ++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index f7758287..9f0cef5e 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -43,7 +43,7 @@ func buildArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.A return buildDictionaryArray(vals, mem) } if opts.REE { - return buildRunEndEncodedArray(vals, mem) + return buildRunEndEncodedArray(vals, opts, mem) } if opts.ListView { if elemType.Kind() != reflect.Slice || elemType == typeOfByteSlice { @@ -910,14 +910,16 @@ func buildDictionaryArray(vals reflect.Value, mem memory.Allocator) (arrow.Array return db.NewArray(), nil } -func buildRunEndEncodedArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { +func buildRunEndEncodedArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.Array, error) { + valOpts := opts + valOpts.REE = false if vals.Len() == 0 { runEndsArr, err := buildPrimitiveArray(reflect.MakeSlice(reflect.TypeOf([]int32{}), 0, 0), mem) if err != nil { return nil, err } defer runEndsArr.Release() - valuesArr, err := buildArray(reflect.MakeSlice(vals.Type(), 0, 0), tagOpts{}, mem) + valuesArr, err := buildArray(reflect.MakeSlice(vals.Type(), 0, 0), valOpts, mem) if err != nil { return nil, err } @@ -979,7 +981,7 @@ func buildRunEndEncodedArray(vals reflect.Value, mem memory.Allocator) (arrow.Ar for i, r := range runs { runValues.Index(i).Set(r.val) } - valuesArr, err := buildArray(runValues, tagOpts{}, mem) + valuesArr, err := buildArray(runValues, valOpts, mem) if err != nil { return nil, fmt.Errorf("run-end encoded values: %w", err) } diff --git a/arrow/arreflect/reflect_go_to_arrow_test.go b/arrow/arreflect/reflect_go_to_arrow_test.go index 4e785446..b9a416e9 100644 --- a/arrow/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/arreflect/reflect_go_to_arrow_test.go @@ -574,7 +574,8 @@ func TestBuildListViewArray(t *testing.T) { } func TestBuildTemporalTaggedArray(t *testing.T) { - mem := memory.NewGoAllocator() + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) ref := time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC) From 4f62a23da0c9d7d10a12b45d58d5c718f0f7a474 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 14 Apr 2026 16:27:44 -0400 Subject: [PATCH 37/82] fix(arreflect): remove dead typeOfDuration from buildArray and buildTemporalArray time.Duration has reflect.Kind Int64, not Struct, so the typeOfDuration case inside the reflect.Struct switch in buildArray was unreachable. Duration slices correctly route through buildPrimitiveArray. --- arrow/arreflect/reflect_go_to_arrow.go | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index 9f0cef5e..12b8afc4 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -72,7 +72,7 @@ func buildArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.A case reflect.Struct: switch elemType { - case typeOfTime, typeOfDuration: + case typeOfTime: return buildTemporalArray(vals, opts, mem) default: return buildStructArray(vals, mem) @@ -316,23 +316,6 @@ func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) return tb.NewArray(), nil } - case typeOfDuration: - dt := &arrow.DurationType{Unit: arrow.Nanosecond} - db := array.NewDurationBuilder(mem, dt) - defer db.Release() - db.Reserve(vals.Len()) - if err := iterSlice(vals, isPtr, db.AppendNull, func(v reflect.Value) error { - d, err := asDuration(v) - if err != nil { - return err - } - db.Append(arrow.Duration(d.Nanoseconds())) - return nil - }); err != nil { - return nil, err - } - return db.NewArray(), nil - default: return nil, fmt.Errorf("unsupported temporal type %v: %w", elemType, ErrUnsupportedType) } From 4fbb131212189b155678299993c0cb7e2594c8c8 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 14 Apr 2026 16:31:36 -0400 Subject: [PATCH 38/82] fix(arreflect): clear ListView in REE valOpts, add REE+temporal test - buildRunEndEncodedArray: also clear valOpts.ListView to prevent spurious error when REE and ListView are both set - Add ree_with_temporal_date32 test confirming REE values array gets DATE32 type when WithTemporal is combined with WithREE --- arrow/arreflect/reflect_go_to_arrow.go | 1 + arrow/arreflect/reflect_go_to_arrow_test.go | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index 12b8afc4..0a452686 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -896,6 +896,7 @@ func buildDictionaryArray(vals reflect.Value, mem memory.Allocator) (arrow.Array func buildRunEndEncodedArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.Array, error) { valOpts := opts valOpts.REE = false + valOpts.ListView = false if vals.Len() == 0 { runEndsArr, err := buildPrimitiveArray(reflect.MakeSlice(reflect.TypeOf([]int32{}), 0, 0), mem) if err != nil { diff --git a/arrow/arreflect/reflect_go_to_arrow_test.go b/arrow/arreflect/reflect_go_to_arrow_test.go index b9a416e9..b5b411d7 100644 --- a/arrow/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/arreflect/reflect_go_to_arrow_test.go @@ -530,6 +530,18 @@ func TestBuildRunEndEncodedArray(t *testing.T) { ree := arr.(*array.RunEndEncoded) assert.Equal(t, 2, ree.RunEndsArr().Len(), "expected 2 runs (x+x coalesced, y), got %d", ree.RunEndsArr().Len()) }) + + t.Run("ree_with_temporal_date32", func(t *testing.T) { + t1 := time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC) + t2 := time.Date(2024, 6, 15, 0, 0, 0, 0, time.UTC) + vals := []time.Time{t1, t1, t2} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{REE: true, Temporal: "date32"}, mem) + require.NoError(t, err) + defer arr.Release() + ree := arr.(*array.RunEndEncoded) + assert.Equal(t, 3, ree.Len()) + assert.Equal(t, arrow.DATE32, ree.Values().DataType().ID()) + }) } func TestBuildListViewArray(t *testing.T) { From 2162e389cc081eaae1b98595da129a686024defb Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 14 Apr 2026 16:47:53 -0400 Subject: [PATCH 39/82] fix(arreflect): checked listAppender assertion, multi-ptr deref in list builder, listview roundtrip test - appendListElement: use two-value type assertion for listAppender interface to return error instead of panic on unexpected builder type - buildListLikeArray: loop pointer dereference to support multi-level pointer elements ([]**[]T) instead of single-level only - Add listview_struct_field_roundtrip integration test covering FromSlice -> ToSlice with a listview-tagged slice field --- arrow/arreflect/reflect_go_to_arrow.go | 16 ++++++++++----- arrow/arreflect/reflect_integration_test.go | 22 +++++++++++++++++++++ 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index 0a452686..7b3ffebb 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -657,7 +657,10 @@ func appendListElement(b array.Builder, v reflect.Value) error { AppendNull() ValueBuilder() array.Builder } - la := b.(listAppender) + la, ok := b.(listAppender) + if !ok { + return fmt.Errorf("unexpected list builder type %T: %w", b, ErrUnsupportedType) + } if v.Kind() == reflect.Slice && v.IsNil() { la.AppendNull() return nil @@ -684,7 +687,7 @@ func appendListElement(b array.Builder, v reflect.Value) error { } func buildListLikeArray(vals reflect.Value, mem memory.Allocator, isView bool) (arrow.Array, error) { - elemDT, isOuterPtr, err := inferListElemDT(vals) + elemDT, _, err := inferListElemDT(vals) if err != nil { return nil, err } @@ -710,14 +713,17 @@ func buildListLikeArray(vals reflect.Value, mem memory.Allocator, isView bool) ( vb := bldr.ValueBuilder() for i := 0; i < vals.Len(); i++ { outer := vals.Index(i) - if isOuterPtr { + for outer.Kind() == reflect.Ptr { if outer.IsNil() { bldr.AppendNull() - continue + break } outer = outer.Elem() } - if outer.IsNil() { + if outer.Kind() == reflect.Ptr { + continue + } + if outer.Kind() == reflect.Slice && outer.IsNil() { bldr.AppendNull() continue } diff --git a/arrow/arreflect/reflect_integration_test.go b/arrow/arreflect/reflect_integration_test.go index 002b811c..30a91e10 100644 --- a/arrow/arreflect/reflect_integration_test.go +++ b/arrow/arreflect/reflect_integration_test.go @@ -20,6 +20,7 @@ import ( "testing" "github.com/apache/arrow-go/v18/arrow" + "github.com/apache/arrow-go/v18/arrow/array" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -352,6 +353,27 @@ func TestReflectIntegration(t *testing.T) { require.Len(t, output, len(rows)) assert.Equal(t, rows, output) }) + + t.Run("listview_struct_field_roundtrip", func(t *testing.T) { + type Row struct { + Name string `arrow:"name"` + Tags []string `arrow:"tags,listview"` + } + rows := []Row{ + {"alice", []string{"admin", "user"}}, + {"bob", []string{"guest"}}, + } + arr, err := FromSlice(rows, nil) + require.NoError(t, err) + defer arr.Release() + + sa := arr.(*array.Struct) + require.Equal(t, arrow.LIST_VIEW, sa.Field(1).DataType().ID()) + + output, err := ToSlice[Row](arr) + require.NoError(t, err) + assert.Equal(t, rows, output) + }) } func BenchmarkReflectFromGoSlice(b *testing.B) { From 38926fb0326ce4adca25d0ed1b7aa83522fa16b5 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 14 Apr 2026 16:53:25 -0400 Subject: [PATCH 40/82] fix(arreflect): error on non-slice after ptr deref in list builder, add nil pointer list test - buildListLikeArray: return ErrTypeMismatch if element is not slice/array after full pointer dereference, preventing panic on mistyped inputs - Add nil_pointer_list_element test with []*[]int32{&a, nil, &a} verifying null detection through pointer dereference loop --- arrow/arreflect/reflect_go_to_arrow.go | 3 +++ arrow/arreflect/reflect_go_to_arrow_test.go | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index 7b3ffebb..e6d28f38 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -727,6 +727,9 @@ func buildListLikeArray(vals reflect.Value, mem memory.Allocator, isView bool) ( bldr.AppendNull() continue } + if outer.Kind() != reflect.Slice && outer.Kind() != reflect.Array { + return nil, fmt.Errorf("arreflect: %s [%d]: expected slice, got %s: %w", label, i, outer.Type(), ErrTypeMismatch) + } beginRow(outer.Len()) for j := 0; j < outer.Len(); j++ { if err := appendValue(vb, outer.Index(j), tagOpts{}); err != nil { diff --git a/arrow/arreflect/reflect_go_to_arrow_test.go b/arrow/arreflect/reflect_go_to_arrow_test.go index b5b411d7..a1ea0777 100644 --- a/arrow/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/arreflect/reflect_go_to_arrow_test.go @@ -583,6 +583,18 @@ func TestBuildListViewArray(t *testing.T) { allVals := arr.(*array.ListView).ListValues().(*array.Int32) assert.Equal(t, 3, allVals.Len(), "expected 3 total values, got %d", allVals.Len()) }) + + t.Run("nil_pointer_list_element", func(t *testing.T) { + a := []int32{1, 2} + vals := []*[]int32{&a, nil, &a} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + require.NoError(t, err) + defer arr.Release() + assert.Equal(t, 3, arr.Len()) + assert.False(t, arr.IsNull(0)) + assert.True(t, arr.IsNull(1)) + assert.False(t, arr.IsNull(2)) + }) } func TestBuildTemporalTaggedArray(t *testing.T) { From ed35ef8fbc309fa607c00d43cd2e9ba1eecbddfd Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 14 Apr 2026 16:58:35 -0400 Subject: [PATCH 41/82] fix(arreflect): place nil-pointer list tests in correct test functions - Move nil_pointer_list_element to TestBuildListArray (exercises isView=false) - Add nil_pointer_listview_element in TestBuildListViewArray with ListView:true opts (exercises isView=true) --- arrow/arreflect/reflect_go_to_arrow_test.go | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/arrow/arreflect/reflect_go_to_arrow_test.go b/arrow/arreflect/reflect_go_to_arrow_test.go index a1ea0777..67628850 100644 --- a/arrow/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/arreflect/reflect_go_to_arrow_test.go @@ -337,6 +337,19 @@ func TestBuildListArray(t *testing.T) { assert.True(t, arr.IsNull(1), "expected index 1 to be null") }) + t.Run("nil_pointer_list_element", func(t *testing.T) { + a := []int32{1, 2} + vals := []*[]int32{&a, nil, &a} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + require.NoError(t, err) + defer arr.Release() + assert.Equal(t, arrow.LIST, arr.DataType().ID()) + assert.Equal(t, 3, arr.Len()) + assert.False(t, arr.IsNull(0)) + assert.True(t, arr.IsNull(1)) + assert.False(t, arr.IsNull(2)) + }) + t.Run("string_lists", func(t *testing.T) { vals := [][]string{{"a", "b"}, {"c"}} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) @@ -584,12 +597,13 @@ func TestBuildListViewArray(t *testing.T) { assert.Equal(t, 3, allVals.Len(), "expected 3 total values, got %d", allVals.Len()) }) - t.Run("nil_pointer_list_element", func(t *testing.T) { + t.Run("nil_pointer_listview_element", func(t *testing.T) { a := []int32{1, 2} vals := []*[]int32{&a, nil, &a} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{ListView: true}, mem) require.NoError(t, err) defer arr.Release() + assert.Equal(t, arrow.LIST_VIEW, arr.DataType().ID()) assert.Equal(t, 3, arr.Len()) assert.False(t, arr.IsNull(0)) assert.True(t, arr.IsNull(1)) From 2dfb0404a8ee4ec4ce136985c8e79a7a42e4b264 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 14 Apr 2026 17:06:22 -0400 Subject: [PATCH 42/82] fix(arreflect): nil FixedSizeList appends null, multi-level ptr deref in dict builder - appendValue FixedSizeListBuilder: nil non-pointer slice now appends null instead of returning length-mismatch error, matching list/listview - buildDictionaryArray: loop pointer dereference to support multi-level pointer elements ([]**string) instead of single deref --- arrow/arreflect/reflect_go_to_arrow.go | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index e6d28f38..73d0f463 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -561,6 +561,10 @@ func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { if v.Kind() != reflect.Slice && v.Kind() != reflect.Array { return fmt.Errorf("arreflect: cannot set fixed-size list from %s: %w", v.Type(), ErrTypeMismatch) } + if v.Kind() == reflect.Slice && v.IsNil() { + tb.AppendNull() + return nil + } expectedLen := int(tb.Type().(*arrow.FixedSizeListType).Len()) if v.Len() != expectedLen { return fmt.Errorf("arreflect: fixed-size list length mismatch: got %d, want %d", v.Len(), expectedLen) @@ -868,7 +872,7 @@ func validateDictValueType(dt arrow.DataType) error { func buildDictionaryArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { n := vals.Len() - elemType, isPtr := derefSliceElem(vals) + elemType, _ := derefSliceElem(vals) valDT, err := inferArrowType(elemType) if err != nil { @@ -888,13 +892,16 @@ func buildDictionaryArray(vals reflect.Value, mem memory.Allocator) (arrow.Array for i := 0; i < n; i++ { elem := vals.Index(i) - if isPtr { + for elem.Kind() == reflect.Ptr { if elem.IsNil() { db.AppendNull() - continue + break } elem = elem.Elem() } + if elem.Kind() == reflect.Ptr { + continue + } if err := appendToDictBuilder(db, elem); err != nil { return nil, fmt.Errorf("dictionary element [%d]: %w", i, err) } From c9cd4ee90bbc781a4085a726835a814cf2eb1fdf Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 14 Apr 2026 17:10:50 -0400 Subject: [PATCH 43/82] test(arreflect): add nil FixedSizeList and pointer dict test coverage - TestBuildFixedSizeListArray: add nil_slice_appends_null exercising the nil-slice null path through appendValue - TestBuildDictionaryArray: add pointer_string_with_nil exercising multi-level ptr deref with nil entries in dict builder --- arrow/arreflect/reflect_go_to_arrow_test.go | 32 +++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/arrow/arreflect/reflect_go_to_arrow_test.go b/arrow/arreflect/reflect_go_to_arrow_test.go index 67628850..06297cba 100644 --- a/arrow/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/arreflect/reflect_go_to_arrow_test.go @@ -432,6 +432,26 @@ func TestBuildFixedSizeListArray(t *testing.T) { require.Equal(t, arrow.FIXED_SIZE_LIST, arr.DataType().ID(), "expected FIXED_SIZE_LIST, got %v", arr.DataType()) assert.Equal(t, int32(2), arr.DataType().(*arrow.FixedSizeListType).Len(), "expected fixed size 2") }) + + t.Run("nil_slice_appends_null", func(t *testing.T) { + dt := arrow.FixedSizeListOf(3, arrow.PrimitiveTypes.Int32) + bldr := array.NewFixedSizeListBuilder(mem, int32(3), arrow.PrimitiveTypes.Int32) + defer bldr.Release() + + var nilSlice []int32 + err := appendValue(bldr, reflect.ValueOf(&nilSlice).Elem(), tagOpts{}) + require.NoError(t, err) + + bldr.Append(true) + vb := bldr.ValueBuilder().(*array.Int32Builder) + vb.AppendValues([]int32{1, 2, 3}, nil) + + arr := bldr.NewArray() + defer arr.Release() + _ = dt + assert.True(t, arr.IsNull(0), "nil slice should be null") + assert.False(t, arr.IsNull(1), "non-nil should not be null") + }) } func TestBuildDictionaryArray(t *testing.T) { @@ -473,6 +493,18 @@ func TestBuildDictionaryArray(t *testing.T) { _, err := buildArray(reflect.ValueOf([]bool{true, false}), tagOpts{Dict: true}, mem) assert.ErrorIs(t, err, ErrUnsupportedType) }) + + t.Run("pointer_string_with_nil", func(t *testing.T) { + s := "hello" + vals := []*string{&s, nil, &s} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{Dict: true}, mem) + require.NoError(t, err) + defer arr.Release() + assert.Equal(t, 3, arr.Len()) + assert.False(t, arr.IsNull(0)) + assert.True(t, arr.IsNull(1)) + assert.False(t, arr.IsNull(2)) + }) } func TestBuildRunEndEncodedArray(t *testing.T) { From 5bf71587c3f3974bd388f74bbd39bc10f5f77d1b Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 14 Apr 2026 17:15:59 -0400 Subject: [PATCH 44/82] test(arreflect): remove dead var, add dict dedup assertion and multi-level ptr test - Remove unused dt variable in nil_slice_appends_null test - pointer_string_with_nil: add DICTIONARY type and dedup assertions - Add multi_level_pointer_string test with []**string exercising the loop-based pointer dereference in buildDictionaryArray --- arrow/arreflect/reflect_go_to_arrow_test.go | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/arrow/arreflect/reflect_go_to_arrow_test.go b/arrow/arreflect/reflect_go_to_arrow_test.go index 06297cba..f2d9693a 100644 --- a/arrow/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/arreflect/reflect_go_to_arrow_test.go @@ -434,7 +434,6 @@ func TestBuildFixedSizeListArray(t *testing.T) { }) t.Run("nil_slice_appends_null", func(t *testing.T) { - dt := arrow.FixedSizeListOf(3, arrow.PrimitiveTypes.Int32) bldr := array.NewFixedSizeListBuilder(mem, int32(3), arrow.PrimitiveTypes.Int32) defer bldr.Release() @@ -448,7 +447,6 @@ func TestBuildFixedSizeListArray(t *testing.T) { arr := bldr.NewArray() defer arr.Release() - _ = dt assert.True(t, arr.IsNull(0), "nil slice should be null") assert.False(t, arr.IsNull(1), "non-nil should not be null") }) @@ -500,6 +498,24 @@ func TestBuildDictionaryArray(t *testing.T) { arr, err := buildArray(reflect.ValueOf(vals), tagOpts{Dict: true}, mem) require.NoError(t, err) defer arr.Release() + typed := arr.(*array.Dictionary) + assert.Equal(t, arrow.DICTIONARY, arr.DataType().ID()) + assert.Equal(t, 3, arr.Len()) + assert.False(t, arr.IsNull(0)) + assert.True(t, arr.IsNull(1)) + assert.False(t, arr.IsNull(2)) + assert.Equal(t, 1, typed.Dictionary().Len(), "expected 1 unique value") + }) + + t.Run("multi_level_pointer_string", func(t *testing.T) { + s := "world" + ps := &s + var nilPs *string + vals := []**string{&ps, &nilPs, &ps} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{Dict: true}, mem) + require.NoError(t, err) + defer arr.Release() + assert.Equal(t, arrow.DICTIONARY, arr.DataType().ID()) assert.Equal(t, 3, arr.Len()) assert.False(t, arr.IsNull(0)) assert.True(t, arr.IsNull(1)) From 4f8cc074c9ba38d37ea7b56b3f3253e2dd439931 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 15 Apr 2026 16:22:18 -0400 Subject: [PATCH 45/82] test(arreflect): add dict dedup assertion to multi-level pointer test --- arrow/arreflect/reflect_go_to_arrow_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arrow/arreflect/reflect_go_to_arrow_test.go b/arrow/arreflect/reflect_go_to_arrow_test.go index f2d9693a..c7d9036e 100644 --- a/arrow/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/arreflect/reflect_go_to_arrow_test.go @@ -515,11 +515,13 @@ func TestBuildDictionaryArray(t *testing.T) { arr, err := buildArray(reflect.ValueOf(vals), tagOpts{Dict: true}, mem) require.NoError(t, err) defer arr.Release() + typed := arr.(*array.Dictionary) assert.Equal(t, arrow.DICTIONARY, arr.DataType().ID()) assert.Equal(t, 3, arr.Len()) assert.False(t, arr.IsNull(0)) assert.True(t, arr.IsNull(1)) assert.False(t, arr.IsNull(2)) + assert.Equal(t, 1, typed.Dictionary().Len(), "expected 1 unique value") }) } From 33ed82d67ac13feef40fa2d43fe1b2b1a237841b Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 15 Apr 2026 16:47:05 -0400 Subject: [PATCH 46/82] fix(arreflect): drop dead inferListElemDT return, validate WithTemporal element type, add duration struct test - inferListElemDT: remove unused isOuterPtr from return signature - FromSlice: WithTemporal now returns ErrUnsupportedType when element type is not time.Time, preventing silent discard on e.g. []string - Add duration_struct_field_roundtrip integration test exercising the appendTemporalValue DurationBuilder path via struct fields --- arrow/arreflect/reflect.go | 10 ++++++++++ arrow/arreflect/reflect_go_to_arrow.go | 6 +++--- arrow/arreflect/reflect_integration_test.go | 22 +++++++++++++++++++++ 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/arrow/arreflect/reflect.go b/arrow/arreflect/reflect.go index 2b1af807..10a54161 100644 --- a/arrow/arreflect/reflect.go +++ b/arrow/arreflect/reflect.go @@ -401,6 +401,16 @@ func FromSlice[T any](vals []T, mem memory.Allocator, opts ...Option) (arrow.Arr if err := validateTemporalOpt(tOpts.Temporal); err != nil { return nil, err } + if tOpts.Temporal != "" && tOpts.Temporal != "timestamp" { + goType := reflect.TypeFor[T]() + deref := goType + for deref.Kind() == reflect.Ptr { + deref = deref.Elem() + } + if deref != typeOfTime { + return nil, fmt.Errorf("arreflect: WithTemporal requires a time.Time element type, got %s: %w", deref, ErrUnsupportedType) + } + } if len(vals) == 0 { goType := reflect.TypeFor[T]() dt, err := inferArrowType(goType) diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/arreflect/reflect_go_to_arrow.go index 73d0f463..84734639 100644 --- a/arrow/arreflect/reflect_go_to_arrow.go +++ b/arrow/arreflect/reflect_go_to_arrow.go @@ -220,8 +220,8 @@ func iterSlice(vals reflect.Value, isPtr bool, appendNull func(), appendVal func return nil } -func inferListElemDT(vals reflect.Value) (elemDT arrow.DataType, isOuterPtr bool, err error) { - outerSliceType, isOuterPtr := derefSliceElem(vals) +func inferListElemDT(vals reflect.Value) (elemDT arrow.DataType, err error) { + outerSliceType, _ := derefSliceElem(vals) innerElemType := outerSliceType.Elem() for innerElemType.Kind() == reflect.Ptr { innerElemType = innerElemType.Elem() @@ -691,7 +691,7 @@ func appendListElement(b array.Builder, v reflect.Value) error { } func buildListLikeArray(vals reflect.Value, mem memory.Allocator, isView bool) (arrow.Array, error) { - elemDT, _, err := inferListElemDT(vals) + elemDT, err := inferListElemDT(vals) if err != nil { return nil, err } diff --git a/arrow/arreflect/reflect_integration_test.go b/arrow/arreflect/reflect_integration_test.go index 30a91e10..adc0ed01 100644 --- a/arrow/arreflect/reflect_integration_test.go +++ b/arrow/arreflect/reflect_integration_test.go @@ -18,6 +18,7 @@ package arreflect import ( "testing" + "time" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" @@ -374,6 +375,27 @@ func TestReflectIntegration(t *testing.T) { require.NoError(t, err) assert.Equal(t, rows, output) }) + + t.Run("duration_struct_field_roundtrip", func(t *testing.T) { + type Row struct { + Name string `arrow:"name"` + Elapsed time.Duration `arrow:"elapsed"` + } + rows := []Row{ + {"fast", 100 * time.Millisecond}, + {"slow", 5 * time.Second}, + } + arr, err := FromSlice(rows, nil) + require.NoError(t, err) + defer arr.Release() + + sa := arr.(*array.Struct) + assert.Equal(t, arrow.DURATION, sa.Field(1).DataType().ID()) + + output, err := ToSlice[Row](arr) + require.NoError(t, err) + assert.Equal(t, rows, output) + }) } func BenchmarkReflectFromGoSlice(b *testing.B) { From 034a9dd8809721b4e3d9cb9c0d01af2b6de680cb Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 15 Apr 2026 16:48:39 -0400 Subject: [PATCH 47/82] refactor(arreflect): move package from arrow/arreflect to arrow/array/arreflect Users now import: github.com/apache/arrow-go/v18/arrow/array/arreflect --- arrow/{ => array}/arreflect/doc.go | 0 arrow/{ => array}/arreflect/example_test.go | 2 +- arrow/{ => array}/arreflect/reflect.go | 0 arrow/{ => array}/arreflect/reflect_arrow_to_go.go | 0 arrow/{ => array}/arreflect/reflect_arrow_to_go_test.go | 0 arrow/{ => array}/arreflect/reflect_go_to_arrow.go | 0 arrow/{ => array}/arreflect/reflect_go_to_arrow_test.go | 0 arrow/{ => array}/arreflect/reflect_infer.go | 0 arrow/{ => array}/arreflect/reflect_infer_test.go | 0 arrow/{ => array}/arreflect/reflect_integration_test.go | 0 arrow/{ => array}/arreflect/reflect_public_test.go | 0 arrow/{ => array}/arreflect/reflect_test.go | 0 12 files changed, 1 insertion(+), 1 deletion(-) rename arrow/{ => array}/arreflect/doc.go (100%) rename arrow/{ => array}/arreflect/example_test.go (99%) rename arrow/{ => array}/arreflect/reflect.go (100%) rename arrow/{ => array}/arreflect/reflect_arrow_to_go.go (100%) rename arrow/{ => array}/arreflect/reflect_arrow_to_go_test.go (100%) rename arrow/{ => array}/arreflect/reflect_go_to_arrow.go (100%) rename arrow/{ => array}/arreflect/reflect_go_to_arrow_test.go (100%) rename arrow/{ => array}/arreflect/reflect_infer.go (100%) rename arrow/{ => array}/arreflect/reflect_infer_test.go (100%) rename arrow/{ => array}/arreflect/reflect_integration_test.go (100%) rename arrow/{ => array}/arreflect/reflect_public_test.go (100%) rename arrow/{ => array}/arreflect/reflect_test.go (100%) diff --git a/arrow/arreflect/doc.go b/arrow/array/arreflect/doc.go similarity index 100% rename from arrow/arreflect/doc.go rename to arrow/array/arreflect/doc.go diff --git a/arrow/arreflect/example_test.go b/arrow/array/arreflect/example_test.go similarity index 99% rename from arrow/arreflect/example_test.go rename to arrow/array/arreflect/example_test.go index 5d3b4de5..439d9dff 100644 --- a/arrow/arreflect/example_test.go +++ b/arrow/array/arreflect/example_test.go @@ -22,7 +22,7 @@ import ( "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" - "github.com/apache/arrow-go/v18/arrow/arreflect" + "github.com/apache/arrow-go/v18/arrow/array/arreflect" "github.com/apache/arrow-go/v18/arrow/decimal128" "github.com/apache/arrow-go/v18/arrow/memory" ) diff --git a/arrow/arreflect/reflect.go b/arrow/array/arreflect/reflect.go similarity index 100% rename from arrow/arreflect/reflect.go rename to arrow/array/arreflect/reflect.go diff --git a/arrow/arreflect/reflect_arrow_to_go.go b/arrow/array/arreflect/reflect_arrow_to_go.go similarity index 100% rename from arrow/arreflect/reflect_arrow_to_go.go rename to arrow/array/arreflect/reflect_arrow_to_go.go diff --git a/arrow/arreflect/reflect_arrow_to_go_test.go b/arrow/array/arreflect/reflect_arrow_to_go_test.go similarity index 100% rename from arrow/arreflect/reflect_arrow_to_go_test.go rename to arrow/array/arreflect/reflect_arrow_to_go_test.go diff --git a/arrow/arreflect/reflect_go_to_arrow.go b/arrow/array/arreflect/reflect_go_to_arrow.go similarity index 100% rename from arrow/arreflect/reflect_go_to_arrow.go rename to arrow/array/arreflect/reflect_go_to_arrow.go diff --git a/arrow/arreflect/reflect_go_to_arrow_test.go b/arrow/array/arreflect/reflect_go_to_arrow_test.go similarity index 100% rename from arrow/arreflect/reflect_go_to_arrow_test.go rename to arrow/array/arreflect/reflect_go_to_arrow_test.go diff --git a/arrow/arreflect/reflect_infer.go b/arrow/array/arreflect/reflect_infer.go similarity index 100% rename from arrow/arreflect/reflect_infer.go rename to arrow/array/arreflect/reflect_infer.go diff --git a/arrow/arreflect/reflect_infer_test.go b/arrow/array/arreflect/reflect_infer_test.go similarity index 100% rename from arrow/arreflect/reflect_infer_test.go rename to arrow/array/arreflect/reflect_infer_test.go diff --git a/arrow/arreflect/reflect_integration_test.go b/arrow/array/arreflect/reflect_integration_test.go similarity index 100% rename from arrow/arreflect/reflect_integration_test.go rename to arrow/array/arreflect/reflect_integration_test.go diff --git a/arrow/arreflect/reflect_public_test.go b/arrow/array/arreflect/reflect_public_test.go similarity index 100% rename from arrow/arreflect/reflect_public_test.go rename to arrow/array/arreflect/reflect_public_test.go diff --git a/arrow/arreflect/reflect_test.go b/arrow/array/arreflect/reflect_test.go similarity index 100% rename from arrow/arreflect/reflect_test.go rename to arrow/array/arreflect/reflect_test.go From a7d77735822850931324aec694a98a28ba8f47bf Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 15 Apr 2026 16:50:20 -0400 Subject: [PATCH 48/82] test(arreflect): add WithTemporal non-time type test, note timestamp exclusion - Add test: FromSlice([]string{}, WithTemporal(date32)) returns ErrUnsupportedType, covering the element-type validation guard - Add inline comment explaining why timestamp is excluded from the non-time.Time check (it is already a no-op via applyTemporalOpts) --- arrow/array/arreflect/reflect.go | 1 + arrow/array/arreflect/reflect_public_test.go | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/arrow/array/arreflect/reflect.go b/arrow/array/arreflect/reflect.go index 10a54161..d110c8c2 100644 --- a/arrow/array/arreflect/reflect.go +++ b/arrow/array/arreflect/reflect.go @@ -401,6 +401,7 @@ func FromSlice[T any](vals []T, mem memory.Allocator, opts ...Option) (arrow.Arr if err := validateTemporalOpt(tOpts.Temporal); err != nil { return nil, err } + // "timestamp" is excluded: it is a no-op for non-time.Time types via applyTemporalOpts. if tOpts.Temporal != "" && tOpts.Temporal != "timestamp" { goType := reflect.TypeFor[T]() deref := goType diff --git a/arrow/array/arreflect/reflect_public_test.go b/arrow/array/arreflect/reflect_public_test.go index d991b1ce..3238ff5d 100644 --- a/arrow/array/arreflect/reflect_public_test.go +++ b/arrow/array/arreflect/reflect_public_test.go @@ -249,6 +249,11 @@ func TestFromGoSlice(t *testing.T) { _, err := FromSlice([]time.Time{}, mem, WithTemporal("invalid")) assert.ErrorIs(t, err, ErrUnsupportedType) }) + + t.Run("WithTemporal on non-time type returns error", func(t *testing.T) { + _, err := FromSlice([]string{}, mem, WithTemporal("date32")) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) } func TestRecordToSlice(t *testing.T) { From ab4e790033a69d4aec3a4595eaeadbbb9084fbf9 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 15 Apr 2026 17:01:53 -0400 Subject: [PATCH 49/82] fix(arreflect): full multi-level ptr deref in all builder loops, add tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace single-level if-isPtr-Elem() with for-Kind-Ptr-deref loop in: buildPrimitiveArray, iterSlice, buildStructArray, buildMapArray, buildFixedSizeListArray — matching buildDictionaryArray/buildListLikeArray. Add multi_level_pointer_int32 and multi_level_pointer_struct tests. --- arrow/array/arreflect/reflect_go_to_arrow.go | 55 ++++++++++++++----- .../arreflect/reflect_go_to_arrow_test.go | 34 +++++++++--- 2 files changed, 67 insertions(+), 22 deletions(-) diff --git a/arrow/array/arreflect/reflect_go_to_arrow.go b/arrow/array/arreflect/reflect_go_to_arrow.go index 84734639..7f048293 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow.go +++ b/arrow/array/arreflect/reflect_go_to_arrow.go @@ -98,11 +98,16 @@ func buildPrimitiveArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, for i := 0; i < vals.Len(); i++ { v := vals.Index(i) if isPtr { - if v.IsNil() { - b.AppendNull() + for v.Kind() == reflect.Ptr { + if v.IsNil() { + b.AppendNull() + break + } + v = v.Elem() + } + if v.Kind() == reflect.Ptr { continue } - v = v.Elem() } if err := appendPrimitiveValue(b, v, dt); err != nil { return nil, err @@ -207,11 +212,16 @@ func iterSlice(vals reflect.Value, isPtr bool, appendNull func(), appendVal func for i := 0; i < vals.Len(); i++ { v := vals.Index(i) if isPtr { - if v.IsNil() { - appendNull() + for v.Kind() == reflect.Ptr { + if v.IsNil() { + appendNull() + break + } + v = v.Elem() + } + if v.Kind() == reflect.Ptr { continue } - v = v.Elem() } if err := appendVal(v); err != nil { return err @@ -421,11 +431,16 @@ func buildStructArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, er for i := 0; i < vals.Len(); i++ { v := vals.Index(i) if isPtr { - if v.IsNil() { - sb.AppendNull() + for v.Kind() == reflect.Ptr { + if v.IsNil() { + sb.AppendNull() + break + } + v = v.Elem() + } + if v.Kind() == reflect.Ptr { continue } - v = v.Elem() } sb.Append(true) for fi, fm := range fields { @@ -787,11 +802,16 @@ func buildMapArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error for i := 0; i < vals.Len(); i++ { m := vals.Index(i) if isPtr { - if m.IsNil() { - mb.AppendNull() + for m.Kind() == reflect.Ptr { + if m.IsNil() { + mb.AppendNull() + break + } + m = m.Elem() + } + if m.Kind() == reflect.Ptr { continue } - m = m.Elem() } if m.IsNil() { mb.AppendNull() @@ -841,11 +861,16 @@ func buildFixedSizeListArray(vals reflect.Value, mem memory.Allocator) (arrow.Ar for i := 0; i < vals.Len(); i++ { elem := vals.Index(i) if isPtr { - if elem.IsNil() { - fb.AppendNull() + for elem.Kind() == reflect.Ptr { + if elem.IsNil() { + fb.AppendNull() + break + } + elem = elem.Elem() + } + if elem.Kind() == reflect.Ptr { continue } - elem = elem.Elem() } fb.Append(true) for j := 0; j < int(n); j++ { diff --git a/arrow/array/arreflect/reflect_go_to_arrow_test.go b/arrow/array/arreflect/reflect_go_to_arrow_test.go index c7d9036e..5e9f1c4f 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/array/arreflect/reflect_go_to_arrow_test.go @@ -48,16 +48,19 @@ func TestBuildPrimitiveArray(t *testing.T) { } }) - t.Run("string", func(t *testing.T) { - vals := []string{"hello", "world", "foo"} + t.Run("multi_level_pointer_int32", func(t *testing.T) { + v := int32(42) + pv := &v + var nilPv *int32 + vals := []**int32{&pv, &nilPv, &pv} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) require.NoError(t, err) defer arr.Release() - assert.Equal(t, arrow.STRING, arr.DataType().ID()) - typed := arr.(*array.String) - for i, want := range vals { - assert.Equal(t, want, typed.Value(i), "[%d] value mismatch", i) - } + assert.Equal(t, 3, arr.Len()) + assert.False(t, arr.IsNull(0)) + assert.True(t, arr.IsNull(1)) + assert.False(t, arr.IsNull(2)) + assert.Equal(t, int32(42), arr.(*array.Int32).Value(0)) }) t.Run("pointer_with_null", func(t *testing.T) { @@ -312,6 +315,23 @@ func TestBuildStructArray(t *testing.T) { assert.Equal(t, int32(10), bxArr.Value(0)) assert.Equal(t, int32(20), bxArr.Value(1)) }) + + t.Run("multi_level_pointer_struct", func(t *testing.T) { + type S struct { + X int32 + } + s := S{X: 99} + ps := &s + var nilPs *S + vals := []**S{&ps, &nilPs, &ps} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + require.NoError(t, err) + defer arr.Release() + assert.Equal(t, 3, arr.Len()) + assert.False(t, arr.IsNull(0)) + assert.True(t, arr.IsNull(1)) + assert.False(t, arr.IsNull(2)) + }) } func TestBuildListArray(t *testing.T) { From 768935b010259b6184a5eea8caa80317a75d2f28 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 15 Apr 2026 17:10:50 -0400 Subject: [PATCH 50/82] test(arreflect): strengthen multi-level ptr assertions, add map/fixedsizelist/list ptr tests - multi_level_pointer_struct: assert field values (X==99) not just nulls - Add multi_level_pointer_map, multi_level_pointer_fixed_size_list, multi_level_pointer_list subtests exercising the for-Kind-Ptr loop in buildMapArray, buildFixedSizeListArray, buildListArray --- .../arreflect/reflect_go_to_arrow_test.go | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/arrow/array/arreflect/reflect_go_to_arrow_test.go b/arrow/array/arreflect/reflect_go_to_arrow_test.go index 5e9f1c4f..78b9655d 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/array/arreflect/reflect_go_to_arrow_test.go @@ -331,6 +331,10 @@ func TestBuildStructArray(t *testing.T) { assert.False(t, arr.IsNull(0)) assert.True(t, arr.IsNull(1)) assert.False(t, arr.IsNull(2)) + sa := arr.(*array.Struct) + xArr := sa.Field(0).(*array.Int32) + assert.Equal(t, int32(99), xArr.Value(0)) + assert.Equal(t, int32(99), xArr.Value(2)) }) } @@ -370,6 +374,20 @@ func TestBuildListArray(t *testing.T) { assert.False(t, arr.IsNull(2)) }) + t.Run("multi_level_pointer_list", func(t *testing.T) { + a := []int32{1, 2} + pa := &a + var nilPa *[]int32 + vals := []**[]int32{&pa, &nilPa, &pa} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + require.NoError(t, err) + defer arr.Release() + assert.Equal(t, 3, arr.Len()) + assert.False(t, arr.IsNull(0)) + assert.True(t, arr.IsNull(1)) + assert.False(t, arr.IsNull(2)) + }) + t.Run("string_lists", func(t *testing.T) { vals := [][]string{{"a", "b"}, {"c"}} arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) @@ -422,6 +440,20 @@ func TestBuildMapArray(t *testing.T) { kvArr := arr.(*array.Map).ListValues().(*array.Struct) assert.Equal(t, 3, kvArr.Len(), "expected 3 key-value pairs, got %d", kvArr.Len()) }) + + t.Run("multi_level_pointer_map", func(t *testing.T) { + m := map[string]int32{"x": 1} + pm := &m + var nilPm *map[string]int32 + vals := []**map[string]int32{&pm, &nilPm, &pm} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + require.NoError(t, err) + defer arr.Release() + assert.Equal(t, 3, arr.Len()) + assert.False(t, arr.IsNull(0)) + assert.True(t, arr.IsNull(1)) + assert.False(t, arr.IsNull(2)) + }) } func TestBuildFixedSizeListArray(t *testing.T) { @@ -470,6 +502,20 @@ func TestBuildFixedSizeListArray(t *testing.T) { assert.True(t, arr.IsNull(0), "nil slice should be null") assert.False(t, arr.IsNull(1), "non-nil should not be null") }) + + t.Run("multi_level_pointer_fixed_size_list", func(t *testing.T) { + a := [3]int32{1, 2, 3} + pa := &a + var nilPa *[3]int32 + vals := []**[3]int32{&pa, &nilPa, &pa} + arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + require.NoError(t, err) + defer arr.Release() + assert.Equal(t, 3, arr.Len()) + assert.False(t, arr.IsNull(0)) + assert.True(t, arr.IsNull(1)) + assert.False(t, arr.IsNull(2)) + }) } func TestBuildDictionaryArray(t *testing.T) { From c4867d743c09132456c0883d1c06b470f5c1487e Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 15 Apr 2026 17:27:45 -0400 Subject: [PATCH 51/82] refactor(arreflect): consolidate duplication, extract helpers, fix deprecated APIs Production code: - Replace 5 inline ptr-deref loops with iterSlice calls (buildPrimitiveArray, buildStructArray, buildMapArray, buildFixedSizeListArray, buildDictionaryArray) - Consolidate buildTemporalArray: extract temporalBuilder helper, delegate per-element work to appendTemporalValue - Remove double-dispatch in appendListElement: eliminate local listAppender interface, use single concrete type switch - Extract inferGoStructType and exportedFieldName from InferGoType - Replace deprecated arrow.Record with arrow.RecordBatch in all public API signatures - Replace deprecated array.NewRecord with array.NewRecordBatch - Replace deprecated MapType.ValueType() with MapType.ItemField().Type - Fix unused variable in reflect_arrow_to_go_test.go Test fixtures: - Add reflect_helpers_test.go with checkedMem, assertMultiLevelPtrNullPattern, makeStringArray, makeInt32Array, makeStructArray helpers - Adopt checkedMem(t) in 11 test functions - Adopt assertMultiLevelPtrNullPattern in 8 multi-level ptr subtests - Adopt makeStringArray/makeInt32Array/makeStructArray in TestSetStructValue Net: -110 lines --- arrow/array/arreflect/reflect.go | 10 +- .../arreflect/reflect_arrow_to_go_test.go | 62 +---- arrow/array/arreflect/reflect_go_to_arrow.go | 261 ++++++------------ .../arreflect/reflect_go_to_arrow_test.go | 73 ++--- arrow/array/arreflect/reflect_helpers_test.go | 70 +++++ arrow/array/arreflect/reflect_infer.go | 88 +++--- arrow/array/arreflect/reflect_infer_test.go | 2 +- arrow/array/arreflect/reflect_public_test.go | 4 +- 8 files changed, 230 insertions(+), 340 deletions(-) create mode 100644 arrow/array/arreflect/reflect_helpers_test.go diff --git a/arrow/array/arreflect/reflect.go b/arrow/array/arreflect/reflect.go index d110c8c2..5a4daa7e 100644 --- a/arrow/array/arreflect/reflect.go +++ b/arrow/array/arreflect/reflect.go @@ -454,13 +454,13 @@ func FromSlice[T any](vals []T, mem memory.Allocator, opts ...Option) (arrow.Arr return buildArray(sv, tOpts, mem) } -func RecordToSlice[T any](rec arrow.Record) ([]T, error) { +func RecordToSlice[T any](rec arrow.RecordBatch) ([]T, error) { sa := array.RecordToStructArray(rec) defer sa.Release() return ToSlice[T](sa) } -func RecordFromSlice[T any](vals []T, mem memory.Allocator, opts ...Option) (arrow.Record, error) { +func RecordFromSlice[T any](vals []T, mem memory.Allocator, opts ...Option) (arrow.RecordBatch, error) { arr, err := FromSlice[T](vals, mem, opts...) if err != nil { return nil, err @@ -475,7 +475,7 @@ func RecordFromSlice[T any](vals []T, mem memory.Allocator, opts ...Option) (arr // RecordAt converts the row at index i of an Arrow Record to a Go value of type T. // T must be a struct type whose fields correspond to the record's columns. -func RecordAt[T any](rec arrow.Record, i int) (T, error) { +func RecordAt[T any](rec arrow.RecordBatch, i int) (T, error) { sa := array.RecordToStructArray(rec) defer sa.Release() return At[T](sa, i) @@ -484,7 +484,7 @@ func RecordAt[T any](rec arrow.Record, i int) (T, error) { // RecordAtAny converts the row at index i of an Arrow Record to a Go value, // inferring the Go type from the record's schema at runtime via [InferGoType]. // Equivalent to AtAny on the struct array underlying the record. -func RecordAtAny(rec arrow.Record, i int) (any, error) { +func RecordAtAny(rec arrow.RecordBatch, i int) (any, error) { sa := array.RecordToStructArray(rec) defer sa.Release() return AtAny(sa, i) @@ -493,7 +493,7 @@ func RecordAtAny(rec arrow.Record, i int) (any, error) { // RecordToAnySlice converts all rows of an Arrow Record to Go values, // inferring the Go type at runtime via [InferGoType]. // Equivalent to ToAnySlice on the struct array underlying the record. -func RecordToAnySlice(rec arrow.Record) ([]any, error) { +func RecordToAnySlice(rec arrow.RecordBatch) ([]any, error) { sa := array.RecordToStructArray(rec) defer sa.Release() return ToAnySlice(sa) diff --git a/arrow/array/arreflect/reflect_arrow_to_go_test.go b/arrow/array/arreflect/reflect_arrow_to_go_test.go index 0d5b37a0..0c2ac19d 100644 --- a/arrow/array/arreflect/reflect_arrow_to_go_test.go +++ b/arrow/array/arreflect/reflect_arrow_to_go_test.go @@ -333,30 +333,9 @@ func TestSetStructValue(t *testing.T) { mem := memory.NewGoAllocator() t.Run("basic struct", func(t *testing.T) { - nameArr := func() *array.String { - b := array.NewStringBuilder(mem) - defer b.Release() - b.Append("Alice") - b.Append("Bob") - return b.NewStringArray() - }() - defer nameArr.Release() - - ageArr := func() *array.Int32 { - b := array.NewInt32Builder(mem) - defer b.Release() - b.Append(30) - b.Append(25) - return b.NewInt32Array() - }() - defer ageArr.Release() - - sa, err := array.NewStructArray( - []arrow.Array{nameArr, ageArr}, - []string{"Name", "Age"}, - ) - require.NoError(t, err) - defer sa.Release() + nameArr := makeStringArray(t, mem, "Alice", "Bob") + ageArr := makeInt32Array(t, mem, 30, 25) + sa := makeStructArray(t, []arrow.Array{nameArr, ageArr}, []string{"Name", "Age"}) type Person struct { Name string @@ -374,20 +353,8 @@ func TestSetStructValue(t *testing.T) { }) t.Run("arrow tag mapping", func(t *testing.T) { - nameArr := func() *array.String { - b := array.NewStringBuilder(mem) - defer b.Release() - b.Append("Charlie") - return b.NewStringArray() - }() - defer nameArr.Release() - - sa, err := array.NewStructArray( - []arrow.Array{nameArr}, - []string{"full_name"}, - ) - require.NoError(t, err) - defer sa.Release() + nameArr := makeStringArray(t, mem, "Charlie") + sa := makeStructArray(t, []arrow.Array{nameArr}, []string{"full_name"}) type TaggedPerson struct { FullName string `arrow:"full_name"` @@ -399,20 +366,8 @@ func TestSetStructValue(t *testing.T) { }) t.Run("missing arrow field leaves go field zero", func(t *testing.T) { - nameArr := func() *array.String { - b := array.NewStringBuilder(mem) - defer b.Release() - b.Append("Dave") - return b.NewStringArray() - }() - defer nameArr.Release() - - sa, err := array.NewStructArray( - []arrow.Array{nameArr}, - []string{"Name"}, - ) - require.NoError(t, err) - defer sa.Release() + nameArr := makeStringArray(t, mem, "Dave") + sa := makeStructArray(t, []arrow.Array{nameArr}, []string{"Name"}) type PersonWithExtra struct { Name string @@ -430,11 +385,10 @@ func TestSetListValue(t *testing.T) { mem := memory.NewGoAllocator() t.Run("list of int32", func(t *testing.T) { - vb := array.NewInt32Builder(mem) lb := array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32) defer lb.Release() - vb = lb.ValueBuilder().(*array.Int32Builder) + vb := lb.ValueBuilder().(*array.Int32Builder) lb.Append(true) vb.AppendValues([]int32{1, 2, 3}, nil) lb.Append(true) diff --git a/arrow/array/arreflect/reflect_go_to_arrow.go b/arrow/array/arreflect/reflect_go_to_arrow.go index 7f048293..2d0d2494 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow.go +++ b/arrow/array/arreflect/reflect_go_to_arrow.go @@ -95,25 +95,11 @@ func buildPrimitiveArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, defer b.Release() b.Reserve(vals.Len()) - for i := 0; i < vals.Len(); i++ { - v := vals.Index(i) - if isPtr { - for v.Kind() == reflect.Ptr { - if v.IsNil() { - b.AppendNull() - break - } - v = v.Elem() - } - if v.Kind() == reflect.Ptr { - continue - } - } - if err := appendPrimitiveValue(b, v, dt); err != nil { - return nil, err - } + if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { + return appendPrimitiveValue(b, v, dt) + }); err != nil { + return nil, err } - return b.NewArray(), nil } @@ -240,95 +226,35 @@ func inferListElemDT(vals reflect.Value) (elemDT arrow.DataType, err error) { return } +func temporalBuilder(opts tagOpts, mem memory.Allocator) array.Builder { + switch opts.Temporal { + case "date32": + return array.NewDate32Builder(mem) + case "date64": + return array.NewDate64Builder(mem) + case "time32": + return array.NewTime32Builder(mem, &arrow.Time32Type{Unit: arrow.Millisecond}) + case "time64": + return array.NewTime64Builder(mem, &arrow.Time64Type{Unit: arrow.Nanosecond}) + default: + return array.NewTimestampBuilder(mem, &arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "UTC"}) + } +} + func buildTemporalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.Array, error) { elemType, isPtr := derefSliceElem(vals) - - switch elemType { - case typeOfTime: - switch opts.Temporal { - case "date32": - b := array.NewDate32Builder(mem) - defer b.Release() - b.Reserve(vals.Len()) - if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { - t, err := asTime(v) - if err != nil { - return err - } - b.Append(arrow.Date32FromTime(t)) - return nil - }); err != nil { - return nil, err - } - return b.NewArray(), nil - case "date64": - b := array.NewDate64Builder(mem) - defer b.Release() - b.Reserve(vals.Len()) - if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { - t, err := asTime(v) - if err != nil { - return err - } - b.Append(arrow.Date64FromTime(t)) - return nil - }); err != nil { - return nil, err - } - return b.NewArray(), nil - case "time32": - dt := &arrow.Time32Type{Unit: arrow.Millisecond} - b := array.NewTime32Builder(mem, dt) - defer b.Release() - b.Reserve(vals.Len()) - if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { - t, err := asTime(v) - if err != nil { - return err - } - b.Append(arrow.Time32(timeOfDayNanos(t) / int64(dt.Unit.Multiplier()))) - return nil - }); err != nil { - return nil, err - } - return b.NewArray(), nil - case "time64": - dt := &arrow.Time64Type{Unit: arrow.Nanosecond} - b := array.NewTime64Builder(mem, dt) - defer b.Release() - b.Reserve(vals.Len()) - if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { - t, err := asTime(v) - if err != nil { - return err - } - b.Append(arrow.Time64(timeOfDayNanos(t) / int64(dt.Unit.Multiplier()))) - return nil - }); err != nil { - return nil, err - } - return b.NewArray(), nil - default: - dt := &arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "UTC"} - tb := array.NewTimestampBuilder(mem, dt) - defer tb.Release() - tb.Reserve(vals.Len()) - if err := iterSlice(vals, isPtr, tb.AppendNull, func(v reflect.Value) error { - t, err := asTime(v) - if err != nil { - return err - } - tb.Append(arrow.Timestamp(t.UnixNano())) - return nil - }); err != nil { - return nil, err - } - return tb.NewArray(), nil - } - - default: + if elemType != typeOfTime { return nil, fmt.Errorf("unsupported temporal type %v: %w", elemType, ErrUnsupportedType) } + b := temporalBuilder(opts, mem) + defer b.Release() + b.Reserve(vals.Len()) + if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { + return appendTemporalValue(b, v) + }); err != nil { + return nil, err + } + return b.NewArray(), nil } func decimalPrecisionScale(opts tagOpts, defaultPrec int32) (precision, scale int32) { @@ -428,28 +354,18 @@ func buildStructArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, er defer sb.Release() sb.Reserve(vals.Len()) - for i := 0; i < vals.Len(); i++ { - v := vals.Index(i) - if isPtr { - for v.Kind() == reflect.Ptr { - if v.IsNil() { - sb.AppendNull() - break - } - v = v.Elem() - } - if v.Kind() == reflect.Ptr { - continue - } - } + if err := iterSlice(vals, isPtr, sb.AppendNull, func(v reflect.Value) error { sb.Append(true) for fi, fm := range fields { fv := v.FieldByIndex(fm.Index) fb := sb.FieldBuilder(fi) if err := appendValue(fb, fv, fm.Opts); err != nil { - return nil, fmt.Errorf("struct field %q: %w", fm.Name, err) + return fmt.Errorf("struct field %q: %w", fm.Name, err) } } + return nil + }); err != nil { + return nil, err } return sb.NewArray(), nil @@ -672,31 +588,40 @@ type listBuilderLike interface { } func appendListElement(b array.Builder, v reflect.Value) error { - type listAppender interface { - AppendNull() - ValueBuilder() array.Builder - } - la, ok := b.(listAppender) - if !ok { - return fmt.Errorf("unexpected list builder type %T: %w", b, ErrUnsupportedType) - } - if v.Kind() == reflect.Slice && v.IsNil() { - la.AppendNull() - return nil - } + isNil := v.Kind() == reflect.Slice && v.IsNil() + var vb array.Builder switch lb := b.(type) { - case *array.ListViewBuilder: - lb.AppendWithSize(true, v.Len()) - case *array.LargeListViewBuilder: - lb.AppendWithSize(true, v.Len()) case *array.ListBuilder: + if isNil { + lb.AppendNull() + return nil + } lb.Append(true) + vb = lb.ValueBuilder() case *array.LargeListBuilder: + if isNil { + lb.AppendNull() + return nil + } lb.Append(true) + vb = lb.ValueBuilder() + case *array.ListViewBuilder: + if isNil { + lb.AppendNull() + return nil + } + lb.AppendWithSize(true, v.Len()) + vb = lb.ValueBuilder() + case *array.LargeListViewBuilder: + if isNil { + lb.AppendNull() + return nil + } + lb.AppendWithSize(true, v.Len()) + vb = lb.ValueBuilder() default: return fmt.Errorf("unexpected list builder type %T: %w", b, ErrUnsupportedType) } - vb := la.ValueBuilder() for i := 0; i < v.Len(); i++ { if err := appendValue(vb, v.Index(i), tagOpts{}); err != nil { return err @@ -799,33 +724,23 @@ func buildMapArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error kb := mb.KeyBuilder() ib := mb.ItemBuilder() - for i := 0; i < vals.Len(); i++ { - m := vals.Index(i) - if isPtr { - for m.Kind() == reflect.Ptr { - if m.IsNil() { - mb.AppendNull() - break - } - m = m.Elem() - } - if m.Kind() == reflect.Ptr { - continue - } - } + if err := iterSlice(vals, isPtr, mb.AppendNull, func(m reflect.Value) error { if m.IsNil() { mb.AppendNull() - continue + return nil } mb.Append(true) for _, key := range m.MapKeys() { if err := appendValue(kb, key, tagOpts{}); err != nil { - return nil, fmt.Errorf("map key: %w", err) + return fmt.Errorf("map key: %w", err) } if err := appendValue(ib, m.MapIndex(key), tagOpts{}); err != nil { - return nil, fmt.Errorf("map value: %w", err) + return fmt.Errorf("map value: %w", err) } } + return nil + }); err != nil { + return nil, err } return mb.NewArray(), nil @@ -858,26 +773,16 @@ func buildFixedSizeListArray(vals reflect.Value, mem memory.Allocator) (arrow.Ar vb := fb.ValueBuilder() - for i := 0; i < vals.Len(); i++ { - elem := vals.Index(i) - if isPtr { - for elem.Kind() == reflect.Ptr { - if elem.IsNil() { - fb.AppendNull() - break - } - elem = elem.Elem() - } - if elem.Kind() == reflect.Ptr { - continue - } - } + if err := iterSlice(vals, isPtr, fb.AppendNull, func(elem reflect.Value) error { fb.Append(true) for j := 0; j < int(n); j++ { if err := appendValue(vb, elem.Index(j), tagOpts{}); err != nil { - return nil, fmt.Errorf("fixed-size list element [%d][%d]: %w", i, j, err) + return fmt.Errorf("fixed-size list element [%d]: %w", j, err) } } + return nil + }); err != nil { + return nil, err } return fb.NewArray(), nil @@ -896,8 +801,7 @@ func validateDictValueType(dt arrow.DataType) error { } func buildDictionaryArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { - n := vals.Len() - elemType, _ := derefSliceElem(vals) + elemType, isPtr := derefSliceElem(vals) valDT, err := inferArrowType(elemType) if err != nil { @@ -915,21 +819,10 @@ func buildDictionaryArray(vals reflect.Value, mem memory.Allocator) (arrow.Array db := array.NewDictionaryBuilder(mem, dt) defer db.Release() - for i := 0; i < n; i++ { - elem := vals.Index(i) - for elem.Kind() == reflect.Ptr { - if elem.IsNil() { - db.AppendNull() - break - } - elem = elem.Elem() - } - if elem.Kind() == reflect.Ptr { - continue - } - if err := appendToDictBuilder(db, elem); err != nil { - return nil, fmt.Errorf("dictionary element [%d]: %w", i, err) - } + if err := iterSlice(vals, isPtr, db.AppendNull, func(elem reflect.Value) error { + return appendToDictBuilder(db, elem) + }); err != nil { + return nil, err } return db.NewArray(), nil } diff --git a/arrow/array/arreflect/reflect_go_to_arrow_test.go b/arrow/array/arreflect/reflect_go_to_arrow_test.go index 78b9655d..1b722a2f 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/array/arreflect/reflect_go_to_arrow_test.go @@ -32,8 +32,7 @@ import ( ) func TestBuildPrimitiveArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) + mem := checkedMem(t) t.Run("int32", func(t *testing.T) { vals := []int32{1, 2, 3, 4, 5} @@ -56,10 +55,7 @@ func TestBuildPrimitiveArray(t *testing.T) { arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) require.NoError(t, err) defer arr.Release() - assert.Equal(t, 3, arr.Len()) - assert.False(t, arr.IsNull(0)) - assert.True(t, arr.IsNull(1)) - assert.False(t, arr.IsNull(2)) + assertMultiLevelPtrNullPattern(t, arr) assert.Equal(t, int32(42), arr.(*array.Int32).Value(0)) }) @@ -122,8 +118,7 @@ func TestBuildPrimitiveArray(t *testing.T) { } func TestBuildTemporalArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) + mem := checkedMem(t) t.Run("time_time", func(t *testing.T) { now := time.Now().UTC() @@ -152,8 +147,7 @@ func TestBuildTemporalArray(t *testing.T) { } func TestBuildDecimalArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) + mem := checkedMem(t) t.Run("decimal128", func(t *testing.T) { vals := []decimal128.Num{ @@ -249,8 +243,7 @@ type buildNullableStruct struct { } func TestBuildStructArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) + mem := checkedMem(t) t.Run("simple", func(t *testing.T) { vals := []buildSimpleStruct{ @@ -327,10 +320,7 @@ func TestBuildStructArray(t *testing.T) { arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) require.NoError(t, err) defer arr.Release() - assert.Equal(t, 3, arr.Len()) - assert.False(t, arr.IsNull(0)) - assert.True(t, arr.IsNull(1)) - assert.False(t, arr.IsNull(2)) + assertMultiLevelPtrNullPattern(t, arr) sa := arr.(*array.Struct) xArr := sa.Field(0).(*array.Int32) assert.Equal(t, int32(99), xArr.Value(0)) @@ -339,8 +329,7 @@ func TestBuildStructArray(t *testing.T) { } func TestBuildListArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) + mem := checkedMem(t) t.Run("int32_lists", func(t *testing.T) { vals := [][]int32{{1, 2, 3}, {4, 5}, {6}} @@ -368,10 +357,7 @@ func TestBuildListArray(t *testing.T) { require.NoError(t, err) defer arr.Release() assert.Equal(t, arrow.LIST, arr.DataType().ID()) - assert.Equal(t, 3, arr.Len()) - assert.False(t, arr.IsNull(0)) - assert.True(t, arr.IsNull(1)) - assert.False(t, arr.IsNull(2)) + assertMultiLevelPtrNullPattern(t, arr) }) t.Run("multi_level_pointer_list", func(t *testing.T) { @@ -382,10 +368,7 @@ func TestBuildListArray(t *testing.T) { arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) require.NoError(t, err) defer arr.Release() - assert.Equal(t, 3, arr.Len()) - assert.False(t, arr.IsNull(0)) - assert.True(t, arr.IsNull(1)) - assert.False(t, arr.IsNull(2)) + assertMultiLevelPtrNullPattern(t, arr) }) t.Run("string_lists", func(t *testing.T) { @@ -409,8 +392,7 @@ func TestBuildListArray(t *testing.T) { } func TestBuildMapArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) + mem := checkedMem(t) t.Run("string_int32", func(t *testing.T) { vals := []map[string]int32{ @@ -449,16 +431,12 @@ func TestBuildMapArray(t *testing.T) { arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) require.NoError(t, err) defer arr.Release() - assert.Equal(t, 3, arr.Len()) - assert.False(t, arr.IsNull(0)) - assert.True(t, arr.IsNull(1)) - assert.False(t, arr.IsNull(2)) + assertMultiLevelPtrNullPattern(t, arr) }) } func TestBuildFixedSizeListArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) + mem := checkedMem(t) t.Run("int32_n3", func(t *testing.T) { vals := [][3]int32{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}} @@ -511,16 +489,12 @@ func TestBuildFixedSizeListArray(t *testing.T) { arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) require.NoError(t, err) defer arr.Release() - assert.Equal(t, 3, arr.Len()) - assert.False(t, arr.IsNull(0)) - assert.True(t, arr.IsNull(1)) - assert.False(t, arr.IsNull(2)) + assertMultiLevelPtrNullPattern(t, arr) }) } func TestBuildDictionaryArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) + mem := checkedMem(t) t.Run("string_dict", func(t *testing.T) { vals := []string{"apple", "banana", "apple", "cherry", "banana", "apple"} @@ -583,17 +557,13 @@ func TestBuildDictionaryArray(t *testing.T) { defer arr.Release() typed := arr.(*array.Dictionary) assert.Equal(t, arrow.DICTIONARY, arr.DataType().ID()) - assert.Equal(t, 3, arr.Len()) - assert.False(t, arr.IsNull(0)) - assert.True(t, arr.IsNull(1)) - assert.False(t, arr.IsNull(2)) + assertMultiLevelPtrNullPattern(t, arr) assert.Equal(t, 1, typed.Dictionary().Len(), "expected 1 unique value") }) } func TestBuildRunEndEncodedArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) + mem := checkedMem(t) t.Run("int32_runs", func(t *testing.T) { vals := []int32{1, 1, 1, 2, 2, 3} @@ -674,8 +644,7 @@ func TestBuildRunEndEncodedArray(t *testing.T) { } func TestBuildListViewArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) + mem := checkedMem(t) t.Run("int32_listview", func(t *testing.T) { vals := [][]int32{{1, 2, 3}, {4, 5}, {6}} @@ -720,16 +689,12 @@ func TestBuildListViewArray(t *testing.T) { require.NoError(t, err) defer arr.Release() assert.Equal(t, arrow.LIST_VIEW, arr.DataType().ID()) - assert.Equal(t, 3, arr.Len()) - assert.False(t, arr.IsNull(0)) - assert.True(t, arr.IsNull(1)) - assert.False(t, arr.IsNull(2)) + assertMultiLevelPtrNullPattern(t, arr) }) } func TestBuildTemporalTaggedArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) + mem := checkedMem(t) ref := time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC) diff --git a/arrow/array/arreflect/reflect_helpers_test.go b/arrow/array/arreflect/reflect_helpers_test.go new file mode 100644 index 00000000..9c5e42fe --- /dev/null +++ b/arrow/array/arreflect/reflect_helpers_test.go @@ -0,0 +1,70 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package arreflect + +import ( + "testing" + + "github.com/apache/arrow-go/v18/arrow" + "github.com/apache/arrow-go/v18/arrow/array" + "github.com/apache/arrow-go/v18/arrow/memory" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func checkedMem(t *testing.T) *memory.CheckedAllocator { + t.Helper() + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + t.Cleanup(func() { mem.AssertSize(t, 0) }) + return mem +} + +func assertMultiLevelPtrNullPattern(t *testing.T, arr arrow.Array) { + t.Helper() + assert.Equal(t, 3, arr.Len()) + assert.False(t, arr.IsNull(0), "index 0 should not be null") + assert.True(t, arr.IsNull(1), "index 1 should be null") + assert.False(t, arr.IsNull(2), "index 2 should not be null") +} + +func makeStringArray(t *testing.T, mem memory.Allocator, vals ...string) *array.String { + t.Helper() + b := array.NewStringBuilder(mem) + defer b.Release() + b.AppendValues(vals, nil) + a := b.NewStringArray() + t.Cleanup(a.Release) + return a +} + +func makeInt32Array(t *testing.T, mem memory.Allocator, vals ...int32) *array.Int32 { + t.Helper() + b := array.NewInt32Builder(mem) + defer b.Release() + b.AppendValues(vals, nil) + a := b.NewInt32Array() + t.Cleanup(a.Release) + return a +} + +func makeStructArray(t *testing.T, arrays []arrow.Array, names []string) *array.Struct { + t.Helper() + sa, err := array.NewStructArray(arrays, names) + require.NoError(t, err) + t.Cleanup(sa.Release) + return sa +} diff --git a/arrow/array/arreflect/reflect_infer.go b/arrow/array/arreflect/reflect_infer.go index a3ea813f..47e6ad94 100644 --- a/arrow/array/arreflect/reflect_infer.go +++ b/arrow/array/arreflect/reflect_infer.go @@ -366,46 +366,7 @@ func InferGoType(dt arrow.DataType) (reflect.Type, error) { return reflect.MapOf(keyType, valType), nil case arrow.STRUCT: - st := dt.(*arrow.StructType) - fields := make([]reflect.StructField, st.NumFields()) - seen := make(map[string]string, st.NumFields()) - for i := 0; i < st.NumFields(); i++ { - f := st.Field(i) - ft, err := InferGoType(f.Type) - if err != nil { - return nil, err - } - if f.Nullable { - ft = reflect.PointerTo(ft) - } - var exportedName string - if len(f.Name) == 0 { - exportedName = fmt.Sprintf("Field%d", i) - } else { - runes := []rune(f.Name) - runes[0] = unicode.ToUpper(runes[0]) - for j, r := range runes { - if j == 0 { - if !unicode.IsLetter(r) { - return nil, fmt.Errorf("arreflect: InferGoType: field name %q produces invalid Go identifier: %w", f.Name, ErrUnsupportedType) - } - } else if !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '_' { - return nil, fmt.Errorf("arreflect: InferGoType: field name %q produces invalid Go identifier: %w", f.Name, ErrUnsupportedType) - } - } - exportedName = string(runes) - } - if origName, dup := seen[exportedName]; dup { - return nil, fmt.Errorf("arreflect: InferGoType: field names %q and %q both export as %q: %w", origName, f.Name, exportedName, ErrUnsupportedType) - } - seen[exportedName] = f.Name - fields[i] = reflect.StructField{ - Name: exportedName, - Type: ft, - Tag: reflect.StructTag(fmt.Sprintf(`arrow:%q`, f.Name)), - } - } - return reflect.StructOf(fields), nil + return inferGoStructType(dt.(*arrow.StructType)) case arrow.DICTIONARY: return InferGoType(dt.(*arrow.DictionaryType).ValueType) @@ -417,3 +378,50 @@ func InferGoType(dt arrow.DataType) (reflect.Type, error) { return nil, fmt.Errorf("unsupported Arrow type for Go inference: %v: %w", dt, ErrUnsupportedType) } } + +func exportedFieldName(name string, index int) (string, error) { + if len(name) == 0 { + return fmt.Sprintf("Field%d", index), nil + } + runes := []rune(name) + runes[0] = unicode.ToUpper(runes[0]) + for j, r := range runes { + if j == 0 { + if !unicode.IsLetter(r) { + return "", fmt.Errorf("arreflect: InferGoType: field name %q produces invalid Go identifier: %w", name, ErrUnsupportedType) + } + } else if !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '_' { + return "", fmt.Errorf("arreflect: InferGoType: field name %q produces invalid Go identifier: %w", name, ErrUnsupportedType) + } + } + return string(runes), nil +} + +func inferGoStructType(st *arrow.StructType) (reflect.Type, error) { + fields := make([]reflect.StructField, st.NumFields()) + seen := make(map[string]string, st.NumFields()) + for i := 0; i < st.NumFields(); i++ { + f := st.Field(i) + ft, err := InferGoType(f.Type) + if err != nil { + return nil, err + } + if f.Nullable { + ft = reflect.PointerTo(ft) + } + exportedName, err := exportedFieldName(f.Name, i) + if err != nil { + return nil, err + } + if origName, dup := seen[exportedName]; dup { + return nil, fmt.Errorf("arreflect: InferGoType: field names %q and %q both export as %q: %w", origName, f.Name, exportedName, ErrUnsupportedType) + } + seen[exportedName] = f.Name + fields[i] = reflect.StructField{ + Name: exportedName, + Type: ft, + Tag: reflect.StructTag(fmt.Sprintf(`arrow:%q`, f.Name)), + } + } + return reflect.StructOf(fields), nil +} diff --git a/arrow/array/arreflect/reflect_infer_test.go b/arrow/array/arreflect/reflect_infer_test.go index 736320ba..67310899 100644 --- a/arrow/array/arreflect/reflect_infer_test.go +++ b/arrow/array/arreflect/reflect_infer_test.go @@ -118,7 +118,7 @@ func TestInferArrowType(t *testing.T) { lt := dt.(*arrow.ListType) assert.Equal(t, arrow.MAP, lt.Elem().ID()) mt := lt.Elem().(*arrow.MapType) - assert.Equal(t, arrow.STRUCT, mt.ValueType().ID()) + assert.Equal(t, arrow.STRUCT, mt.ItemField().Type.ID()) }) t.Run("*[]string pointer to slice is LIST", func(t *testing.T) { diff --git a/arrow/array/arreflect/reflect_public_test.go b/arrow/array/arreflect/reflect_public_test.go index 3238ff5d..e1a0d145 100644 --- a/arrow/array/arreflect/reflect_public_test.go +++ b/arrow/array/arreflect/reflect_public_test.go @@ -264,7 +264,7 @@ func TestRecordToSlice(t *testing.T) { Score float64 } - buildRecord := func(rows []Row) arrow.Record { + buildRecord := func(rows []Row) arrow.RecordBatch { schema := arrow.NewSchema([]arrow.Field{ {Name: "Name", Type: arrow.BinaryTypes.String}, {Name: "Score", Type: arrow.PrimitiveTypes.Float64}, @@ -281,7 +281,7 @@ func TestRecordToSlice(t *testing.T) { defer nameArr.Release() scoreArr := scoreB.NewFloat64Array() defer scoreArr.Release() - return array.NewRecord(schema, []arrow.Array{nameArr, scoreArr}, int64(len(rows))) + return array.NewRecordBatch(schema, []arrow.Array{nameArr, scoreArr}, int64(len(rows))) } t.Run("basic 3-row record", func(t *testing.T) { From 790e3d0c8b07190d2e773e0d5a6438c397719f8e Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 15 Apr 2026 17:36:29 -0400 Subject: [PATCH 52/82] fix(arreflect): restore outer-element index in fixed-size list error messages Track index via closure-captured counter so iterSlice-based buildFixedSizeListArray preserves the [i][j] error format. --- arrow/array/arreflect/reflect_go_to_arrow.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arrow/array/arreflect/reflect_go_to_arrow.go b/arrow/array/arreflect/reflect_go_to_arrow.go index 2d0d2494..0e131f84 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow.go +++ b/arrow/array/arreflect/reflect_go_to_arrow.go @@ -773,13 +773,15 @@ func buildFixedSizeListArray(vals reflect.Value, mem memory.Allocator) (arrow.Ar vb := fb.ValueBuilder() + idx := 0 if err := iterSlice(vals, isPtr, fb.AppendNull, func(elem reflect.Value) error { fb.Append(true) for j := 0; j < int(n); j++ { if err := appendValue(vb, elem.Index(j), tagOpts{}); err != nil { - return fmt.Errorf("fixed-size list element [%d]: %w", j, err) + return fmt.Errorf("fixed-size list element [%d][%d]: %w", idx, j, err) } } + idx++ return nil }); err != nil { return nil, err From 3abfcb12707148fd8e61837682a3481366a5dd22 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 15 Apr 2026 17:46:46 -0400 Subject: [PATCH 53/82] =?UTF-8?q?refactor(arreflect):=20second-pass=20cons?= =?UTF-8?q?olidation=20=E2=80=94=20helpers,=20iterSlice=20clarity,=20dead?= =?UTF-8?q?=20code=20removal?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Production code: - iterSlice: replace implicit break/continue nil-detection with explicit wasNull boolean for self-documenting control flow - Extract appendStructFields eliminating duplication between buildStructArray and appendValue StructBuilder case - Promote inline listLike interface to package-level listElemTyper - Remove 4 dead decimal cases from appendPrimitiveValue (unreachable; decimal types route through buildDecimalArray) Test helpers (reflect_helpers_test.go): - Add mustBuildArray/mustBuildDefault — adopted at 48 sites in reflect_go_to_arrow_test.go, replacing 3-line build+check+defer - Add setValueInto[T] — adopted at 21 sites in reflect_arrow_to_go_test.go - Migrate reflect_arrow_to_go_test.go from NewGoAllocator to checkedMem(t) (10 functions now have leak detection) Net: -95 lines --- .../arreflect/reflect_arrow_to_go_test.go | 63 +++--- arrow/array/arreflect/reflect_go_to_arrow.go | 52 ++--- .../arreflect/reflect_go_to_arrow_test.go | 188 +++++------------- arrow/array/arreflect/reflect_helpers_test.go | 19 ++ arrow/array/arreflect/reflect_infer.go | 5 +- 5 files changed, 116 insertions(+), 211 deletions(-) diff --git a/arrow/array/arreflect/reflect_arrow_to_go_test.go b/arrow/array/arreflect/reflect_arrow_to_go_test.go index 0c2ac19d..233eecb6 100644 --- a/arrow/array/arreflect/reflect_arrow_to_go_test.go +++ b/arrow/array/arreflect/reflect_arrow_to_go_test.go @@ -26,7 +26,6 @@ import ( "github.com/apache/arrow-go/v18/arrow/decimal" "github.com/apache/arrow-go/v18/arrow/decimal128" "github.com/apache/arrow-go/v18/arrow/decimal256" - "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -34,12 +33,12 @@ import ( func setValueAt[T any](t *testing.T, arr arrow.Array, i int) T { t.Helper() var got T - require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, i)) + setValueInto(t, &got, arr, i) return got } func TestSetValue(t *testing.T) { - mem := memory.NewGoAllocator() + mem := checkedMem(t) t.Run("bool", func(t *testing.T) { b := array.NewBooleanBuilder(mem) @@ -53,7 +52,7 @@ func TestSetValue(t *testing.T) { assert.True(t, got, "expected true, got false") got = true - require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 1)) + setValueInto(t, &got, arr, 1) assert.False(t, got, "expected false (null → zero), got true") }) @@ -105,13 +104,13 @@ func TestSetValue(t *testing.T) { } got = new(string) - require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 1)) + setValueInto(t, &got, arr, 1) assert.Nil(t, got, "expected nil for null, got %v", got) }) } func TestSetPrimitiveValue(t *testing.T) { - mem := memory.NewGoAllocator() + mem := checkedMem(t) t.Run("int32", func(t *testing.T) { b := array.NewInt32Builder(mem) @@ -125,7 +124,7 @@ func TestSetPrimitiveValue(t *testing.T) { assert.Equal(t, int32(42), got) got = 99 - require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 1)) + setValueInto(t, &got, arr, 1) assert.Equal(t, int32(0), got, "expected 0 for null, got %d", got) }) @@ -179,7 +178,7 @@ func TestSetPrimitiveValue(t *testing.T) { } func TestSetTemporalValue(t *testing.T) { - mem := memory.NewGoAllocator() + mem := checkedMem(t) t.Run("timestamp", func(t *testing.T) { dt := &arrow.TimestampType{Unit: arrow.Second} @@ -266,7 +265,7 @@ func TestSetTemporalValue(t *testing.T) { } func TestSetDecimalValue(t *testing.T) { - mem := memory.NewGoAllocator() + mem := checkedMem(t) t.Run("decimal128", func(t *testing.T) { dt := &arrow.Decimal128Type{Precision: 10, Scale: 2} @@ -330,7 +329,7 @@ func TestSetDecimalValue(t *testing.T) { } func TestSetStructValue(t *testing.T) { - mem := memory.NewGoAllocator() + mem := checkedMem(t) t.Run("basic struct", func(t *testing.T) { nameArr := makeStringArray(t, mem, "Alice", "Bob") @@ -343,11 +342,11 @@ func TestSetStructValue(t *testing.T) { } var got Person - require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), sa, 0)) + setValueInto(t, &got, sa, 0) assert.Equal(t, "Alice", got.Name) assert.Equal(t, int32(30), got.Age) - require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), sa, 1)) + setValueInto(t, &got, sa, 1) assert.Equal(t, "Bob", got.Name) assert.Equal(t, int32(25), got.Age) }) @@ -361,7 +360,7 @@ func TestSetStructValue(t *testing.T) { } var got TaggedPerson - require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), sa, 0)) + setValueInto(t, &got, sa, 0) assert.Equal(t, "Charlie", got.FullName) }) @@ -375,14 +374,14 @@ func TestSetStructValue(t *testing.T) { } var got PersonWithExtra - require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), sa, 0)) + setValueInto(t, &got, sa, 0) assert.Equal(t, "Dave", got.Name) assert.Equal(t, "", got.Email) }) } func TestSetListValue(t *testing.T) { - mem := memory.NewGoAllocator() + mem := checkedMem(t) t.Run("list of int32", func(t *testing.T) { lb := array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32) @@ -401,10 +400,10 @@ func TestSetListValue(t *testing.T) { got := setValueAt[[]int32](t, arr, 0) assert.Equal(t, []int32{1, 2, 3}, got) - require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 1)) + setValueInto(t, &got, arr, 1) assert.Equal(t, []int32{4, 5}, got) - require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 2)) + setValueInto(t, &got, arr, 2) assert.Nil(t, got, "expected nil slice for null list, got %v", got) }) @@ -435,7 +434,7 @@ func TestSetListValue(t *testing.T) { defer outerArr.Release() var got [][]int32 - require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), outerArr, 0)) + setValueInto(t, &got, outerArr, 0) require.Len(t, got, 2, "expected 2 inner slices, got %d", len(got)) assert.Equal(t, []int32{10, 20}, got[0]) assert.Equal(t, []int32{30}, got[1]) @@ -457,13 +456,13 @@ func TestSetListValue(t *testing.T) { got := setValueAt[[]int32](t, arr, 0) assert.Equal(t, []int32{1, 2}, got, "row 0: expected [1,2], got %v", got) - require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 1)) + setValueInto(t, &got, arr, 1) assert.Equal(t, []int32{3}, got, "row 1: expected [3], got %v", got) }) } func TestSetMapValue(t *testing.T) { - mem := memory.NewGoAllocator() + mem := checkedMem(t) t.Run("map string to int32", func(t *testing.T) { mb := array.NewMapBuilder(mem, arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32, false) @@ -491,16 +490,16 @@ func TestSetMapValue(t *testing.T) { assert.Equal(t, int32(1), got["a"]) assert.Equal(t, int32(2), got["b"]) - require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 1)) + setValueInto(t, &got, arr, 1) assert.Equal(t, int32(10), got["x"]) - require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 2)) + setValueInto(t, &got, arr, 2) assert.Nil(t, got, "expected nil map for null, got %v", got) }) } func TestSetFixedSizeListValue(t *testing.T) { - mem := memory.NewGoAllocator() + mem := checkedMem(t) t.Run("go array", func(t *testing.T) { b := array.NewFixedSizeListBuilder(mem, 3, arrow.PrimitiveTypes.Int32) @@ -519,11 +518,11 @@ func TestSetFixedSizeListValue(t *testing.T) { got := setValueAt[[3]int32](t, arr, 0) assert.Equal(t, [3]int32{10, 20, 30}, got) - require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 1)) + setValueInto(t, &got, arr, 1) assert.Equal(t, [3]int32{40, 50, 60}, got) got = [3]int32{1, 2, 3} - require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 2)) + setValueInto(t, &got, arr, 2) assert.Equal(t, [3]int32{}, got, "expected zero array for null, got %v", got) }) @@ -559,7 +558,7 @@ func TestSetFixedSizeListValue(t *testing.T) { } func TestSetDictionaryValue(t *testing.T) { - mem := memory.NewGoAllocator() + mem := checkedMem(t) t.Run("dictionary int8 to string", func(t *testing.T) { dt := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.BinaryTypes.String} @@ -578,10 +577,10 @@ func TestSetDictionaryValue(t *testing.T) { got := setValueAt[string](t, arr, 0) assert.Equal(t, "foo", got) - require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 1)) + setValueInto(t, &got, arr, 1) assert.Equal(t, "bar", got) - require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 2)) + setValueInto(t, &got, arr, 2) assert.Equal(t, "foo", got) gotPtr := setValueAt[*string](t, arr, 3) @@ -590,7 +589,7 @@ func TestSetDictionaryValue(t *testing.T) { } func TestSetRunEndEncodedValue(t *testing.T) { - mem := memory.NewGoAllocator() + mem := checkedMem(t) t.Run("ree int32 to string", func(t *testing.T) { b := array.NewRunEndEncodedBuilder(mem, arrow.PrimitiveTypes.Int32, arrow.BinaryTypes.String) @@ -608,13 +607,13 @@ func TestSetRunEndEncodedValue(t *testing.T) { got := setValueAt[string](t, arr, 0) assert.Equal(t, "aaa", got, "expected aaa at logical 0, got %q", got) - require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 2)) + setValueInto(t, &got, arr, 2) assert.Equal(t, "aaa", got, "expected aaa at logical 2, got %q", got) - require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 3)) + setValueInto(t, &got, arr, 3) assert.Equal(t, "bbb", got, "expected bbb at logical 3, got %q", got) - require.NoError(t, setValue(reflect.ValueOf(&got).Elem(), arr, 4)) + setValueInto(t, &got, arr, 4) assert.Equal(t, "bbb", got, "expected bbb at logical 4, got %q", got) }) } diff --git a/arrow/array/arreflect/reflect_go_to_arrow.go b/arrow/array/arreflect/reflect_go_to_arrow.go index 0e131f84..f0f80f8f 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow.go +++ b/arrow/array/arreflect/reflect_go_to_arrow.go @@ -141,22 +141,6 @@ func appendPrimitiveValue(b array.Builder, v reflect.Value, dt arrow.DataType) e return err } b.(*array.DurationBuilder).Append(arrow.Duration(d.Nanoseconds())) - case arrow.DECIMAL128: - n, ok := reflect.TypeAssert[decimal128.Num](v) - if !ok { - return fmt.Errorf("expected decimal128.Num, got %s: %w", v.Type(), ErrTypeMismatch) - } - b.(*array.Decimal128Builder).Append(n) - case arrow.DECIMAL256: - n, ok := reflect.TypeAssert[decimal256.Num](v) - if !ok { - return fmt.Errorf("expected decimal256.Num, got %s: %w", v.Type(), ErrTypeMismatch) - } - b.(*array.Decimal256Builder).Append(n) - case arrow.DECIMAL32: - b.(*array.Decimal32Builder).Append(decimal.Decimal32(v.Int())) - case arrow.DECIMAL64: - b.(*array.Decimal64Builder).Append(decimal.Decimal64(v.Int())) default: return fmt.Errorf("unsupported Arrow type %v: %w", dt, ErrUnsupportedType) } @@ -198,14 +182,16 @@ func iterSlice(vals reflect.Value, isPtr bool, appendNull func(), appendVal func for i := 0; i < vals.Len(); i++ { v := vals.Index(i) if isPtr { + wasNull := false for v.Kind() == reflect.Ptr { if v.IsNil() { appendNull() + wasNull = true break } v = v.Elem() } - if v.Kind() == reflect.Ptr { + if wasNull { continue } } @@ -337,6 +323,16 @@ func buildDecimalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) ( } } +func appendStructFields(sb *array.StructBuilder, v reflect.Value, fields []fieldMeta) error { + sb.Append(true) + for fi, fm := range fields { + if err := appendValue(sb.FieldBuilder(fi), v.FieldByIndex(fm.Index), fm.Opts); err != nil { + return fmt.Errorf("struct field %q: %w", fm.Name, err) + } + } + return nil +} + func buildStructArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { elemType := vals.Type().Elem() isPtr := elemType.Kind() == reflect.Ptr @@ -355,15 +351,7 @@ func buildStructArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, er sb.Reserve(vals.Len()) if err := iterSlice(vals, isPtr, sb.AppendNull, func(v reflect.Value) error { - sb.Append(true) - for fi, fm := range fields { - fv := v.FieldByIndex(fm.Index) - fb := sb.FieldBuilder(fi) - if err := appendValue(fb, fv, fm.Opts); err != nil { - return fmt.Errorf("struct field %q: %w", fm.Name, err) - } - } - return nil + return appendStructFields(sb, v, fields) }); err != nil { return nil, err } @@ -524,16 +512,8 @@ func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { } } case *array.StructBuilder: - elemType := v.Type() - fields := cachedStructFields(elemType) - tb.Append(true) - for fi, fm := range fields { - fv := v.FieldByIndex(fm.Index) - fb := tb.FieldBuilder(fi) - if err := appendValue(fb, fv, fm.Opts); err != nil { - return fmt.Errorf("struct field %q: %w", fm.Name, err) - } - } + fields := cachedStructFields(v.Type()) + return appendStructFields(tb, v, fields) default: if db, ok := b.(array.DictionaryBuilder); ok { return appendToDictBuilder(db, v) diff --git a/arrow/array/arreflect/reflect_go_to_arrow_test.go b/arrow/array/arreflect/reflect_go_to_arrow_test.go index 1b722a2f..e4117e17 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/array/arreflect/reflect_go_to_arrow_test.go @@ -36,9 +36,7 @@ func TestBuildPrimitiveArray(t *testing.T) { t.Run("int32", func(t *testing.T) { vals := []int32{1, 2, 3, 4, 5} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) assert.Equal(t, 5, arr.Len()) assert.Equal(t, arrow.INT32, arr.DataType().ID()) typed := arr.(*array.Int32) @@ -52,9 +50,7 @@ func TestBuildPrimitiveArray(t *testing.T) { pv := &v var nilPv *int32 vals := []**int32{&pv, &nilPv, &pv} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) assertMultiLevelPtrNullPattern(t, arr) assert.Equal(t, int32(42), arr.(*array.Int32).Value(0)) }) @@ -62,9 +58,7 @@ func TestBuildPrimitiveArray(t *testing.T) { t.Run("pointer_with_null", func(t *testing.T) { v1, v3 := int32(10), int32(30) vals := []*int32{&v1, nil, &v3} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) assert.True(t, arr.IsNull(1), "expected index 1 to be null") typed := arr.(*array.Int32) assert.Equal(t, int32(10), typed.Value(0)) @@ -73,9 +67,7 @@ func TestBuildPrimitiveArray(t *testing.T) { t.Run("bool", func(t *testing.T) { vals := []bool{true, false, true} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) assert.Equal(t, arrow.BOOL, arr.DataType().ID()) typed := arr.(*array.Boolean) assert.True(t, typed.Value(0), "expected Value(0) to be true") @@ -85,9 +77,7 @@ func TestBuildPrimitiveArray(t *testing.T) { t.Run("binary", func(t *testing.T) { vals := [][]byte{{1, 2, 3}, {4, 5}, {6}} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) assert.Equal(t, arrow.BINARY, arr.DataType().ID()) }) @@ -123,9 +113,7 @@ func TestBuildTemporalArray(t *testing.T) { t.Run("time_time", func(t *testing.T) { now := time.Now().UTC() vals := []time.Time{now, now.Add(time.Hour)} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) assert.Equal(t, arrow.TIMESTAMP, arr.DataType().ID()) typed := arr.(*array.Timestamp) for i, want := range vals { @@ -135,9 +123,7 @@ func TestBuildTemporalArray(t *testing.T) { t.Run("time_duration", func(t *testing.T) { vals := []time.Duration{time.Second, time.Minute, time.Hour} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) assert.Equal(t, arrow.DURATION, arr.DataType().ID()) typed := arr.(*array.Duration) for i, want := range vals { @@ -155,9 +141,7 @@ func TestBuildDecimalArray(t *testing.T) { decimal128.New(0, 200), decimal128.New(0, 300), } - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) assert.Equal(t, arrow.DECIMAL128, arr.DataType().ID()) typed := arr.(*array.Decimal128) for i, want := range vals { @@ -170,9 +154,7 @@ func TestBuildDecimalArray(t *testing.T) { decimal256.New(0, 0, 0, 100), decimal256.New(0, 0, 0, 200), } - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) assert.Equal(t, arrow.DECIMAL256, arr.DataType().ID()) typed := arr.(*array.Decimal256) for i, want := range vals { @@ -183,9 +165,7 @@ func TestBuildDecimalArray(t *testing.T) { t.Run("decimal128_custom_opts", func(t *testing.T) { vals := []decimal128.Num{decimal128.New(0, 12345)} opts := tagOpts{HasDecimalOpts: true, DecimalPrecision: 10, DecimalScale: 3} - arr, err := buildArray(reflect.ValueOf(vals), opts, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildArray(t, vals, opts, mem) dt := arr.DataType().(*arrow.Decimal128Type) assert.Equal(t, int32(10), dt.Precision, "expected p=10, got p=%d", dt.Precision) assert.Equal(t, int32(3), dt.Scale, "expected s=3, got s=%d", dt.Scale) @@ -193,9 +173,7 @@ func TestBuildDecimalArray(t *testing.T) { t.Run("decimal32", func(t *testing.T) { vals := []decimal.Decimal32{100, 200, 300} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) assert.Equal(t, arrow.DECIMAL32, arr.DataType().ID()) typed := arr.(*array.Decimal32) for i, want := range vals { @@ -205,9 +183,7 @@ func TestBuildDecimalArray(t *testing.T) { t.Run("decimal64", func(t *testing.T) { vals := []decimal.Decimal64{1000, 2000} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) assert.Equal(t, arrow.DECIMAL64, arr.DataType().ID()) typed := arr.(*array.Decimal64) for i, want := range vals { @@ -218,9 +194,7 @@ func TestBuildDecimalArray(t *testing.T) { t.Run("decimal32_custom_opts", func(t *testing.T) { vals := []decimal.Decimal32{12345} opts := tagOpts{HasDecimalOpts: true, DecimalPrecision: 9, DecimalScale: 2} - arr, err := buildArray(reflect.ValueOf(vals), opts, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildArray(t, vals, opts, mem) dt := arr.DataType().(*arrow.Decimal32Type) assert.Equal(t, int32(9), dt.Precision, "expected p=9, got p=%d", dt.Precision) assert.Equal(t, int32(2), dt.Scale, "expected s=2, got s=%d", dt.Scale) @@ -251,9 +225,7 @@ func TestBuildStructArray(t *testing.T) { {X: 2, Y: "two"}, {X: 3, Y: "three"}, } - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) require.Equal(t, arrow.STRUCT, arr.DataType().ID(), "expected STRUCT, got %v", arr.DataType()) typed := arr.(*array.Struct) assert.Equal(t, 3, typed.Len()) @@ -268,9 +240,7 @@ func TestBuildStructArray(t *testing.T) { t.Run("pointer_null_row", func(t *testing.T) { v1 := buildSimpleStruct{X: 42, Y: "answer"} vals := []*buildSimpleStruct{&v1, nil} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) assert.Equal(t, 2, arr.Len()) assert.True(t, arr.IsNull(1), "expected index 1 to be null") }) @@ -282,9 +252,7 @@ func TestBuildStructArray(t *testing.T) { {X: &x1, Y: &y1}, {X: nil, Y: nil}, } - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) typed := arr.(*array.Struct) assert.True(t, typed.Field(0).IsNull(1), "expected X[1] to be null") assert.True(t, typed.Field(1).IsNull(1), "expected Y[1] to be null") @@ -295,9 +263,7 @@ func TestBuildStructArray(t *testing.T) { {A: 1, B: buildSimpleStruct{X: 10, Y: "inner1"}}, {A: 2, B: buildSimpleStruct{X: 20, Y: "inner2"}}, } - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) require.Equal(t, arrow.STRUCT, arr.DataType().ID(), "expected STRUCT, got %v", arr.DataType()) typed := arr.(*array.Struct) aArr := typed.Field(0).(*array.Int32) @@ -317,9 +283,7 @@ func TestBuildStructArray(t *testing.T) { ps := &s var nilPs *S vals := []**S{&ps, &nilPs, &ps} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) assertMultiLevelPtrNullPattern(t, arr) sa := arr.(*array.Struct) xArr := sa.Field(0).(*array.Int32) @@ -333,9 +297,7 @@ func TestBuildListArray(t *testing.T) { t.Run("int32_lists", func(t *testing.T) { vals := [][]int32{{1, 2, 3}, {4, 5}, {6}} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) require.Equal(t, arrow.LIST, arr.DataType().ID(), "expected LIST, got %v", arr.DataType()) typed := arr.(*array.List) assert.Equal(t, 3, typed.Len()) @@ -344,18 +306,14 @@ func TestBuildListArray(t *testing.T) { t.Run("null_inner", func(t *testing.T) { vals := [][]int32{{1, 2}, nil, {3}} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) assert.True(t, arr.IsNull(1), "expected index 1 to be null") }) t.Run("nil_pointer_list_element", func(t *testing.T) { a := []int32{1, 2} vals := []*[]int32{&a, nil, &a} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) assert.Equal(t, arrow.LIST, arr.DataType().ID()) assertMultiLevelPtrNullPattern(t, arr) }) @@ -365,25 +323,19 @@ func TestBuildListArray(t *testing.T) { pa := &a var nilPa *[]int32 vals := []**[]int32{&pa, &nilPa, &pa} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) assertMultiLevelPtrNullPattern(t, arr) }) t.Run("string_lists", func(t *testing.T) { vals := [][]string{{"a", "b"}, {"c"}} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) require.Equal(t, arrow.LIST, arr.DataType().ID(), "expected LIST, got %v", arr.DataType()) }) t.Run("nested", func(t *testing.T) { vals := [][][]int32{{{1, 2}, {3}}, {{4, 5, 6}}} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) require.Equal(t, arrow.LIST, arr.DataType().ID(), "expected outer LIST, got %v", arr.DataType()) outer := arr.(*array.List) assert.Equal(t, 2, outer.Len(), "expected 2 outer rows, got %d", outer.Len()) @@ -399,26 +351,20 @@ func TestBuildMapArray(t *testing.T) { {"a": 1, "b": 2}, {"c": 3}, } - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) require.Equal(t, arrow.MAP, arr.DataType().ID(), "expected MAP, got %v", arr.DataType()) assert.Equal(t, 2, arr.(*array.Map).Len()) }) t.Run("null_map", func(t *testing.T) { vals := []map[string]int32{{"a": 1}, nil} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) assert.True(t, arr.IsNull(1), "expected index 1 to be null") }) t.Run("entry_count", func(t *testing.T) { vals := []map[string]int32{{"x": 10, "y": 20, "z": 30}} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) kvArr := arr.(*array.Map).ListValues().(*array.Struct) assert.Equal(t, 3, kvArr.Len(), "expected 3 key-value pairs, got %d", kvArr.Len()) }) @@ -428,9 +374,7 @@ func TestBuildMapArray(t *testing.T) { pm := &m var nilPm *map[string]int32 vals := []**map[string]int32{&pm, &nilPm, &pm} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) assertMultiLevelPtrNullPattern(t, arr) }) } @@ -440,9 +384,7 @@ func TestBuildFixedSizeListArray(t *testing.T) { t.Run("int32_n3", func(t *testing.T) { vals := [][3]int32{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) require.Equal(t, arrow.FIXED_SIZE_LIST, arr.DataType().ID(), "expected FIXED_SIZE_LIST, got %v", arr.DataType()) typed := arr.(*array.FixedSizeList) assert.Equal(t, 3, typed.Len()) @@ -456,9 +398,7 @@ func TestBuildFixedSizeListArray(t *testing.T) { t.Run("float64_n2", func(t *testing.T) { vals := [][2]float64{{1.0, 2.0}, {3.0, 4.0}} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) require.Equal(t, arrow.FIXED_SIZE_LIST, arr.DataType().ID(), "expected FIXED_SIZE_LIST, got %v", arr.DataType()) assert.Equal(t, int32(2), arr.DataType().(*arrow.FixedSizeListType).Len(), "expected fixed size 2") }) @@ -486,9 +426,7 @@ func TestBuildFixedSizeListArray(t *testing.T) { pa := &a var nilPa *[3]int32 vals := []**[3]int32{&pa, &nilPa, &pa} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildDefault(t, vals, mem) assertMultiLevelPtrNullPattern(t, arr) }) } @@ -498,9 +436,7 @@ func TestBuildDictionaryArray(t *testing.T) { t.Run("string_dict", func(t *testing.T) { vals := []string{"apple", "banana", "apple", "cherry", "banana", "apple"} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{Dict: true}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildArray(t, vals, tagOpts{Dict: true}, mem) require.Equal(t, arrow.DICTIONARY, arr.DataType().ID(), "expected DICTIONARY, got %v", arr.DataType()) typed := arr.(*array.Dictionary) assert.Equal(t, 6, typed.Len()) @@ -509,9 +445,7 @@ func TestBuildDictionaryArray(t *testing.T) { t.Run("int32_dict", func(t *testing.T) { vals := []int32{1, 2, 1, 3, 2, 1} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{Dict: true}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildArray(t, vals, tagOpts{Dict: true}, mem) require.Equal(t, arrow.DICTIONARY, arr.DataType().ID(), "expected DICTIONARY, got %v", arr.DataType()) typed := arr.(*array.Dictionary) assert.Equal(t, 6, typed.Len()) @@ -520,9 +454,7 @@ func TestBuildDictionaryArray(t *testing.T) { t.Run("index_type_is_int32", func(t *testing.T) { vals := []string{"x", "y", "z"} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{Dict: true}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildArray(t, vals, tagOpts{Dict: true}, mem) dt := arr.DataType().(*arrow.DictionaryType) assert.Equal(t, arrow.INT32, dt.IndexType.ID(), "expected INT32 index, got %v", dt.IndexType) }) @@ -535,9 +467,7 @@ func TestBuildDictionaryArray(t *testing.T) { t.Run("pointer_string_with_nil", func(t *testing.T) { s := "hello" vals := []*string{&s, nil, &s} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{Dict: true}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildArray(t, vals, tagOpts{Dict: true}, mem) typed := arr.(*array.Dictionary) assert.Equal(t, arrow.DICTIONARY, arr.DataType().ID()) assert.Equal(t, 3, arr.Len()) @@ -552,9 +482,7 @@ func TestBuildDictionaryArray(t *testing.T) { ps := &s var nilPs *string vals := []**string{&ps, &nilPs, &ps} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{Dict: true}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildArray(t, vals, tagOpts{Dict: true}, mem) typed := arr.(*array.Dictionary) assert.Equal(t, arrow.DICTIONARY, arr.DataType().ID()) assertMultiLevelPtrNullPattern(t, arr) @@ -567,9 +495,7 @@ func TestBuildRunEndEncodedArray(t *testing.T) { t.Run("int32_runs", func(t *testing.T) { vals := []int32{1, 1, 1, 2, 2, 3} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{REE: true}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildArray(t, vals, tagOpts{REE: true}, mem) require.Equal(t, arrow.RUN_END_ENCODED, arr.DataType().ID(), "expected RUN_END_ENCODED, got %v", arr.DataType()) ree := arr.(*array.RunEndEncoded) assert.Equal(t, 6, ree.Len()) @@ -587,9 +513,7 @@ func TestBuildRunEndEncodedArray(t *testing.T) { t.Run("string_runs", func(t *testing.T) { vals := []string{"a", "a", "b", "b", "b", "c"} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{REE: true}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildArray(t, vals, tagOpts{REE: true}, mem) require.Equal(t, arrow.RUN_END_ENCODED, arr.DataType().ID(), "expected RUN_END_ENCODED, got %v", arr.DataType()) ree := arr.(*array.RunEndEncoded) assert.Equal(t, 6, ree.Len()) @@ -598,9 +522,7 @@ func TestBuildRunEndEncodedArray(t *testing.T) { t.Run("single_run", func(t *testing.T) { vals := []int32{42, 42, 42} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{REE: true}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildArray(t, vals, tagOpts{REE: true}, mem) ree := arr.(*array.RunEndEncoded) assert.Equal(t, 3, ree.Len()) runEnds := ree.RunEndsArr().(*array.Int32) @@ -610,9 +532,7 @@ func TestBuildRunEndEncodedArray(t *testing.T) { t.Run("all_distinct", func(t *testing.T) { vals := []int32{1, 2, 3, 4, 5} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{REE: true}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildArray(t, vals, tagOpts{REE: true}, mem) ree := arr.(*array.RunEndEncoded) assert.Equal(t, 5, ree.Len()) assert.Equal(t, 5, ree.RunEndsArr().Len(), "expected 5 runs for all-distinct, got %d", ree.RunEndsArr().Len()) @@ -623,9 +543,7 @@ func TestBuildRunEndEncodedArray(t *testing.T) { x2 := "x" y := "y" vals := []*string{&x1, &x2, &y} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{REE: true}, mem) - require.NoError(t, err, "unexpected error") - defer arr.Release() + arr := mustBuildArray(t, vals, tagOpts{REE: true}, mem) ree := arr.(*array.RunEndEncoded) assert.Equal(t, 2, ree.RunEndsArr().Len(), "expected 2 runs (x+x coalesced, y), got %d", ree.RunEndsArr().Len()) }) @@ -634,9 +552,7 @@ func TestBuildRunEndEncodedArray(t *testing.T) { t1 := time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC) t2 := time.Date(2024, 6, 15, 0, 0, 0, 0, time.UTC) vals := []time.Time{t1, t1, t2} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{REE: true, Temporal: "date32"}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildArray(t, vals, tagOpts{REE: true, Temporal: "date32"}, mem) ree := arr.(*array.RunEndEncoded) assert.Equal(t, 3, ree.Len()) assert.Equal(t, arrow.DATE32, ree.Values().DataType().ID()) @@ -648,9 +564,7 @@ func TestBuildListViewArray(t *testing.T) { t.Run("int32_listview", func(t *testing.T) { vals := [][]int32{{1, 2, 3}, {4, 5}, {6}} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{ListView: true}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildArray(t, vals, tagOpts{ListView: true}, mem) require.Equal(t, arrow.LIST_VIEW, arr.DataType().ID(), "expected LIST_VIEW, got %v", arr.DataType()) typed := arr.(*array.ListView) assert.Equal(t, 3, typed.Len()) @@ -658,26 +572,20 @@ func TestBuildListViewArray(t *testing.T) { t.Run("null_entry", func(t *testing.T) { vals := [][]int32{{1, 2}, nil, {3}} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{ListView: true}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildArray(t, vals, tagOpts{ListView: true}, mem) assert.True(t, arr.IsNull(1), "expected index 1 to be null") }) t.Run("string_listview", func(t *testing.T) { vals := [][]string{{"hello", "world"}, {"foo"}, {"a", "b", "c"}} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{ListView: true}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildArray(t, vals, tagOpts{ListView: true}, mem) require.Equal(t, arrow.LIST_VIEW, arr.DataType().ID(), "expected LIST_VIEW, got %v", arr.DataType()) assert.Equal(t, 3, arr.Len()) }) t.Run("total_values", func(t *testing.T) { vals := [][]int32{{10, 20}, {30}} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{ListView: true}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildArray(t, vals, tagOpts{ListView: true}, mem) allVals := arr.(*array.ListView).ListValues().(*array.Int32) assert.Equal(t, 3, allVals.Len(), "expected 3 total values, got %d", allVals.Len()) }) @@ -685,9 +593,7 @@ func TestBuildListViewArray(t *testing.T) { t.Run("nil_pointer_listview_element", func(t *testing.T) { a := []int32{1, 2} vals := []*[]int32{&a, nil, &a} - arr, err := buildArray(reflect.ValueOf(vals), tagOpts{ListView: true}, mem) - require.NoError(t, err) - defer arr.Release() + arr := mustBuildArray(t, vals, tagOpts{ListView: true}, mem) assert.Equal(t, arrow.LIST_VIEW, arr.DataType().ID()) assertMultiLevelPtrNullPattern(t, arr) }) diff --git a/arrow/array/arreflect/reflect_helpers_test.go b/arrow/array/arreflect/reflect_helpers_test.go index 9c5e42fe..6f1f241f 100644 --- a/arrow/array/arreflect/reflect_helpers_test.go +++ b/arrow/array/arreflect/reflect_helpers_test.go @@ -17,6 +17,7 @@ package arreflect import ( + "reflect" "testing" "github.com/apache/arrow-go/v18/arrow" @@ -33,6 +34,11 @@ func checkedMem(t *testing.T) *memory.CheckedAllocator { return mem } +func setValueInto[T any](t *testing.T, dst *T, arr arrow.Array, i int) { + t.Helper() + require.NoError(t, setValue(reflect.ValueOf(dst).Elem(), arr, i)) +} + func assertMultiLevelPtrNullPattern(t *testing.T, arr arrow.Array) { t.Helper() assert.Equal(t, 3, arr.Len()) @@ -68,3 +74,16 @@ func makeStructArray(t *testing.T, arrays []arrow.Array, names []string) *array. t.Cleanup(sa.Release) return sa } + +func mustBuildArray(t *testing.T, vals any, opts tagOpts, mem memory.Allocator) arrow.Array { + t.Helper() + arr, err := buildArray(reflect.ValueOf(vals), opts, mem) + require.NoError(t, err) + t.Cleanup(arr.Release) + return arr +} + +func mustBuildDefault(t *testing.T, vals any, mem memory.Allocator) arrow.Array { + t.Helper() + return mustBuildArray(t, vals, tagOpts{}, mem) +} diff --git a/arrow/array/arreflect/reflect_infer.go b/arrow/array/arreflect/reflect_infer.go index 47e6ad94..34ae26be 100644 --- a/arrow/array/arreflect/reflect_infer.go +++ b/arrow/array/arreflect/reflect_infer.go @@ -59,6 +59,8 @@ const ( dec256DefaultPrecision int32 = 76 ) +type listElemTyper interface{ Elem() arrow.DataType } + func inferPrimitiveArrowType(t reflect.Type) (arrow.DataType, error) { for t.Kind() == reflect.Ptr { t = t.Elem() @@ -330,8 +332,7 @@ func InferGoType(dt arrow.DataType) (reflect.Type, error) { return typeOfDec64, nil case arrow.LIST, arrow.LARGE_LIST, arrow.LIST_VIEW, arrow.LARGE_LIST_VIEW: - type listLike interface{ Elem() arrow.DataType } - ll, ok := dt.(listLike) + ll, ok := dt.(listElemTyper) if !ok { return nil, fmt.Errorf("unsupported Arrow type for Go inference: %v: %w", dt, ErrUnsupportedType) } From 39ccd63d17f8ea12d56796521fecb345a04ef45f Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 15 Apr 2026 17:54:17 -0400 Subject: [PATCH 54/82] fix(arreflect): increment idx counter on null path in fixed-size list error messages The appendNullIdx wrapper increments the closure-captured counter for both null and non-null elements, so error messages report the correct outer slice index when null elements precede the failing element. --- arrow/array/arreflect/reflect_go_to_arrow.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arrow/array/arreflect/reflect_go_to_arrow.go b/arrow/array/arreflect/reflect_go_to_arrow.go index f0f80f8f..5c5e97f9 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow.go +++ b/arrow/array/arreflect/reflect_go_to_arrow.go @@ -754,7 +754,8 @@ func buildFixedSizeListArray(vals reflect.Value, mem memory.Allocator) (arrow.Ar vb := fb.ValueBuilder() idx := 0 - if err := iterSlice(vals, isPtr, fb.AppendNull, func(elem reflect.Value) error { + appendNullIdx := func() { fb.AppendNull(); idx++ } + if err := iterSlice(vals, isPtr, appendNullIdx, func(elem reflect.Value) error { fb.Append(true) for j := 0; j < int(n); j++ { if err := appendValue(vb, elem.Index(j), tagOpts{}); err != nil { From 30cad4888cc1da11880aa8a754699497d1609338 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 15 Apr 2026 18:04:59 -0400 Subject: [PATCH 55/82] =?UTF-8?q?refactor(arreflect):=20third-pass=20conso?= =?UTF-8?q?lidation=20=E2=80=94=20eliminate=20appendPrimitiveValue,=20coll?= =?UTF-8?q?apse=20decimal=20builder,=20assertArray[T]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit reflect_go_to_arrow.go: - Delete appendPrimitiveValue (~43 lines); buildPrimitiveArray now delegates to appendValue directly - Collapse buildDecimalArray from 4 inline iterSlice blocks to single switch (builder creation) + shared iterSlice delegating to appendDecimalValue (~50 lines saved) - Hoist nil-check in appendListElement before type switch, eliminating 4 duplicate isNil guards (~8 lines saved) reflect_arrow_to_go.go: - Add assertArray[T] generic helper replacing 13 identical 3-line type-assertion guards across setValue, setTemporalValue, setDecimalValue Net: -90 lines --- arrow/array/arreflect/reflect_arrow_to_go.go | 105 ++++++++------ arrow/array/arreflect/reflect_go_to_arrow.go | 145 +++---------------- 2 files changed, 80 insertions(+), 170 deletions(-) diff --git a/arrow/array/arreflect/reflect_arrow_to_go.go b/arrow/array/arreflect/reflect_arrow_to_go.go index aa8b25d9..eb5aa3e1 100644 --- a/arrow/array/arreflect/reflect_arrow_to_go.go +++ b/arrow/array/arreflect/reflect_arrow_to_go.go @@ -25,6 +25,15 @@ import ( "github.com/apache/arrow-go/v18/arrow/array" ) +func assertArray[T any](arr arrow.Array) (*T, error) { + a, ok := any(arr).(*T) + if !ok { + var zero T + return nil, fmt.Errorf("expected *%T, got %T: %w", zero, arr, ErrTypeMismatch) + } + return a, nil +} + func isIntKind(k reflect.Kind) bool { return k == reflect.Int || k == reflect.Int8 || k == reflect.Int16 || k == reflect.Int32 || k == reflect.Int64 @@ -49,9 +58,9 @@ func setValue(v reflect.Value, arr arrow.Array, i int) error { switch arr.DataType().ID() { case arrow.BOOL: - a, ok := arr.(*array.Boolean) - if !ok { - return fmt.Errorf("expected *Boolean, got %T: %w", arr, ErrTypeMismatch) + a, err := assertArray[array.Boolean](arr) + if err != nil { + return err } if v.Kind() != reflect.Bool { return fmt.Errorf("cannot set bool into %s: %w", v.Type(), ErrTypeMismatch) @@ -93,9 +102,9 @@ func setValue(v reflect.Value, arr arrow.Array, i int) error { return setDecimalValue(v, arr, i) case arrow.STRUCT: - a, ok := arr.(*array.Struct) - if !ok { - return fmt.Errorf("expected *Struct, got %T: %w", arr, ErrTypeMismatch) + a, err := assertArray[array.Struct](arr) + if err != nil { + return err } return setStructValue(v, a, i) @@ -107,30 +116,30 @@ func setValue(v reflect.Value, arr arrow.Array, i int) error { return setListValue(v, a, i) case arrow.MAP: - a, ok := arr.(*array.Map) - if !ok { - return fmt.Errorf("expected *Map, got %T: %w", arr, ErrTypeMismatch) + a, err := assertArray[array.Map](arr) + if err != nil { + return err } return setMapValue(v, a, i) case arrow.FIXED_SIZE_LIST: - a, ok := arr.(*array.FixedSizeList) - if !ok { - return fmt.Errorf("expected *FixedSizeList, got %T: %w", arr, ErrTypeMismatch) + a, err := assertArray[array.FixedSizeList](arr) + if err != nil { + return err } return setFixedSizeListValue(v, a, i) case arrow.DICTIONARY: - a, ok := arr.(*array.Dictionary) - if !ok { - return fmt.Errorf("expected *Dictionary, got %T: %w", arr, ErrTypeMismatch) + a, err := assertArray[array.Dictionary](arr) + if err != nil { + return err } return setDictionaryValue(v, a, i) case arrow.RUN_END_ENCODED: - a, ok := arr.(*array.RunEndEncoded) - if !ok { - return fmt.Errorf("expected *RunEndEncoded, got %T: %w", arr, ErrTypeMismatch) + a, err := assertArray[array.RunEndEncoded](arr) + if err != nil { + return err } return setRunEndEncodedValue(v, a, i) @@ -209,47 +218,47 @@ func setTime(v reflect.Value, t time.Time) error { func setTemporalValue(v reflect.Value, arr arrow.Array, i int) error { switch arr.DataType().ID() { case arrow.TIMESTAMP: - a, ok := arr.(*array.Timestamp) - if !ok { - return fmt.Errorf("expected *Timestamp, got %T: %w", arr, ErrTypeMismatch) + a, err := assertArray[array.Timestamp](arr) + if err != nil { + return err } unit := arr.DataType().(*arrow.TimestampType).Unit return setTime(v, a.Value(i).ToTime(unit)) case arrow.DATE32: - a, ok := arr.(*array.Date32) - if !ok { - return fmt.Errorf("expected *Date32, got %T: %w", arr, ErrTypeMismatch) + a, err := assertArray[array.Date32](arr) + if err != nil { + return err } return setTime(v, a.Value(i).ToTime()) case arrow.DATE64: - a, ok := arr.(*array.Date64) - if !ok { - return fmt.Errorf("expected *Date64, got %T: %w", arr, ErrTypeMismatch) + a, err := assertArray[array.Date64](arr) + if err != nil { + return err } return setTime(v, a.Value(i).ToTime()) case arrow.TIME32: - a, ok := arr.(*array.Time32) - if !ok { - return fmt.Errorf("expected *Time32, got %T: %w", arr, ErrTypeMismatch) + a, err := assertArray[array.Time32](arr) + if err != nil { + return err } unit := arr.DataType().(*arrow.Time32Type).Unit return setTime(v, a.Value(i).ToTime(unit)) case arrow.TIME64: - a, ok := arr.(*array.Time64) - if !ok { - return fmt.Errorf("expected *Time64, got %T: %w", arr, ErrTypeMismatch) + a, err := assertArray[array.Time64](arr) + if err != nil { + return err } unit := arr.DataType().(*arrow.Time64Type).Unit return setTime(v, a.Value(i).ToTime(unit)) case arrow.DURATION: - a, ok := arr.(*array.Duration) - if !ok { - return fmt.Errorf("expected *Duration, got %T: %w", arr, ErrTypeMismatch) + a, err := assertArray[array.Duration](arr) + if err != nil { + return err } if v.Type() != typeOfDuration { return fmt.Errorf("cannot set time.Duration into %s: %w", v.Type(), ErrTypeMismatch) @@ -267,9 +276,9 @@ func setTemporalValue(v reflect.Value, arr arrow.Array, i int) error { func setDecimalValue(v reflect.Value, arr arrow.Array, i int) error { switch arr.DataType().ID() { case arrow.DECIMAL128: - a, ok := arr.(*array.Decimal128) - if !ok { - return fmt.Errorf("expected *Decimal128, got %T: %w", arr, ErrTypeMismatch) + a, err := assertArray[array.Decimal128](arr) + if err != nil { + return err } if v.Type() != typeOfDec128 { return fmt.Errorf("cannot set decimal128.Num into %s: %w", v.Type(), ErrTypeMismatch) @@ -278,9 +287,9 @@ func setDecimalValue(v reflect.Value, arr arrow.Array, i int) error { v.Set(reflect.ValueOf(num)) case arrow.DECIMAL256: - a, ok := arr.(*array.Decimal256) - if !ok { - return fmt.Errorf("expected *Decimal256, got %T: %w", arr, ErrTypeMismatch) + a, err := assertArray[array.Decimal256](arr) + if err != nil { + return err } if v.Type() != typeOfDec256 { return fmt.Errorf("cannot set decimal256.Num into %s: %w", v.Type(), ErrTypeMismatch) @@ -289,9 +298,9 @@ func setDecimalValue(v reflect.Value, arr arrow.Array, i int) error { v.Set(reflect.ValueOf(num)) case arrow.DECIMAL32: - a, ok := arr.(*array.Decimal32) - if !ok { - return fmt.Errorf("expected *Decimal32, got %T: %w", arr, ErrTypeMismatch) + a, err := assertArray[array.Decimal32](arr) + if err != nil { + return err } if v.Type() != typeOfDec32 { return fmt.Errorf("cannot set decimal.Decimal32 into %s: %w", v.Type(), ErrTypeMismatch) @@ -299,9 +308,9 @@ func setDecimalValue(v reflect.Value, arr arrow.Array, i int) error { v.Set(reflect.ValueOf(a.Value(i))) case arrow.DECIMAL64: - a, ok := arr.(*array.Decimal64) - if !ok { - return fmt.Errorf("expected *Decimal64, got %T: %w", arr, ErrTypeMismatch) + a, err := assertArray[array.Decimal64](arr) + if err != nil { + return err } if v.Type() != typeOfDec64 { return fmt.Errorf("cannot set decimal.Decimal64 into %s: %w", v.Type(), ErrTypeMismatch) diff --git a/arrow/array/arreflect/reflect_go_to_arrow.go b/arrow/array/arreflect/reflect_go_to_arrow.go index 5c5e97f9..c2862f06 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow.go +++ b/arrow/array/arreflect/reflect_go_to_arrow.go @@ -96,57 +96,13 @@ func buildPrimitiveArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, b.Reserve(vals.Len()) if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { - return appendPrimitiveValue(b, v, dt) + return appendValue(b, v, tagOpts{}) }); err != nil { return nil, err } return b.NewArray(), nil } -func appendPrimitiveValue(b array.Builder, v reflect.Value, dt arrow.DataType) error { - switch dt.ID() { - case arrow.INT8: - b.(*array.Int8Builder).Append(int8(v.Int())) - case arrow.INT16: - b.(*array.Int16Builder).Append(int16(v.Int())) - case arrow.INT32: - b.(*array.Int32Builder).Append(int32(v.Int())) - case arrow.INT64: - b.(*array.Int64Builder).Append(int64(v.Int())) - case arrow.UINT8: - b.(*array.Uint8Builder).Append(uint8(v.Uint())) - case arrow.UINT16: - b.(*array.Uint16Builder).Append(uint16(v.Uint())) - case arrow.UINT32: - b.(*array.Uint32Builder).Append(uint32(v.Uint())) - case arrow.UINT64: - b.(*array.Uint64Builder).Append(uint64(v.Uint())) - case arrow.FLOAT32: - b.(*array.Float32Builder).Append(float32(v.Float())) - case arrow.FLOAT64: - b.(*array.Float64Builder).Append(float64(v.Float())) - case arrow.BOOL: - b.(*array.BooleanBuilder).Append(v.Bool()) - case arrow.STRING: - b.(*array.StringBuilder).Append(v.String()) - case arrow.BINARY: - if v.IsNil() { - b.(*array.BinaryBuilder).AppendNull() - } else { - b.(*array.BinaryBuilder).Append(v.Bytes()) - } - case arrow.DURATION: - d, err := asDuration(v) - if err != nil { - return err - } - b.(*array.DurationBuilder).Append(arrow.Duration(d.Nanoseconds())) - default: - return fmt.Errorf("unsupported Arrow type %v: %w", dt, ErrUnsupportedType) - } - return nil -} - func timeOfDayNanos(t time.Time) int64 { t = t.UTC() midnight := time.Date(t.Year(), t.Month(), t.Day(), 0, 0, 0, 0, time.UTC) @@ -253,74 +209,31 @@ func decimalPrecisionScale(opts tagOpts, defaultPrec int32) (precision, scale in func buildDecimalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.Array, error) { elemType, isPtr := derefSliceElem(vals) + var b array.Builder switch elemType { case typeOfDec128: - precision, scale := decimalPrecisionScale(opts, dec128DefaultPrecision) - dt := &arrow.Decimal128Type{Precision: precision, Scale: scale} - b := array.NewDecimal128Builder(mem, dt) - defer b.Release() - b.Reserve(vals.Len()) - if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { - n, ok := reflect.TypeAssert[decimal128.Num](v) - if !ok { - return fmt.Errorf("expected decimal128.Num, got %s: %w", v.Type(), ErrTypeMismatch) - } - b.Append(n) - return nil - }); err != nil { - return nil, err - } - return b.NewArray(), nil - + p, s := decimalPrecisionScale(opts, dec128DefaultPrecision) + b = array.NewDecimal128Builder(mem, &arrow.Decimal128Type{Precision: p, Scale: s}) case typeOfDec256: - precision, scale := decimalPrecisionScale(opts, dec256DefaultPrecision) - dt := &arrow.Decimal256Type{Precision: precision, Scale: scale} - b := array.NewDecimal256Builder(mem, dt) - defer b.Release() - b.Reserve(vals.Len()) - if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { - n, ok := reflect.TypeAssert[decimal256.Num](v) - if !ok { - return fmt.Errorf("expected decimal256.Num, got %s: %w", v.Type(), ErrTypeMismatch) - } - b.Append(n) - return nil - }); err != nil { - return nil, err - } - return b.NewArray(), nil - + p, s := decimalPrecisionScale(opts, dec256DefaultPrecision) + b = array.NewDecimal256Builder(mem, &arrow.Decimal256Type{Precision: p, Scale: s}) case typeOfDec32: - precision, scale := decimalPrecisionScale(opts, dec32DefaultPrecision) - dt := &arrow.Decimal32Type{Precision: precision, Scale: scale} - b := array.NewDecimal32Builder(mem, dt) - defer b.Release() - b.Reserve(vals.Len()) - if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { - b.Append(decimal.Decimal32(v.Int())) - return nil - }); err != nil { - return nil, err - } - return b.NewArray(), nil - + p, s := decimalPrecisionScale(opts, dec32DefaultPrecision) + b = array.NewDecimal32Builder(mem, &arrow.Decimal32Type{Precision: p, Scale: s}) case typeOfDec64: - precision, scale := decimalPrecisionScale(opts, dec64DefaultPrecision) - dt := &arrow.Decimal64Type{Precision: precision, Scale: scale} - b := array.NewDecimal64Builder(mem, dt) - defer b.Release() - b.Reserve(vals.Len()) - if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { - b.Append(decimal.Decimal64(v.Int())) - return nil - }); err != nil { - return nil, err - } - return b.NewArray(), nil - + p, s := decimalPrecisionScale(opts, dec64DefaultPrecision) + b = array.NewDecimal64Builder(mem, &arrow.Decimal64Type{Precision: p, Scale: s}) default: return nil, fmt.Errorf("unsupported decimal type %v: %w", elemType, ErrUnsupportedType) } + defer b.Release() + b.Reserve(vals.Len()) + if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { + return appendDecimalValue(b, v) + }); err != nil { + return nil, err + } + return b.NewArray(), nil } func appendStructFields(sb *array.StructBuilder, v reflect.Value, fields []fieldMeta) error { @@ -568,35 +481,23 @@ type listBuilderLike interface { } func appendListElement(b array.Builder, v reflect.Value) error { - isNil := v.Kind() == reflect.Slice && v.IsNil() + if v.Kind() == reflect.Slice && v.IsNil() { + b.AppendNull() + return nil + } + var vb array.Builder switch lb := b.(type) { case *array.ListBuilder: - if isNil { - lb.AppendNull() - return nil - } lb.Append(true) vb = lb.ValueBuilder() case *array.LargeListBuilder: - if isNil { - lb.AppendNull() - return nil - } lb.Append(true) vb = lb.ValueBuilder() case *array.ListViewBuilder: - if isNil { - lb.AppendNull() - return nil - } lb.AppendWithSize(true, v.Len()) vb = lb.ValueBuilder() case *array.LargeListViewBuilder: - if isNil { - lb.AppendNull() - return nil - } lb.AppendWithSize(true, v.Len()) vb = lb.ValueBuilder() default: From 2e35ae5e38a813f81322ec022b01583cd48d5208 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 15 Apr 2026 18:24:39 -0400 Subject: [PATCH 56/82] =?UTF-8?q?refactor(arreflect):=20final=20cleanup=20?= =?UTF-8?q?=E2=80=94=20extract=20buildEmptyTyped,=20delete=20dead=20types,?= =?UTF-8?q?=20adopt=20derefSliceElem?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit reflect.go: - Extract buildEmptyTyped from FromSlice empty-slice branch, eliminating 37-line duplication of type inference + option application - Replace validateTemporalOpt map with idiomatic switch (zero-alloc) reflect_go_to_arrow.go: - Replace 3 inline deref patterns with derefSliceElem calls reflect_arrow_to_go.go: - Extract fillFixedSizeList eliminating duplicated loop between reflect.Array and reflect.Slice cases reflect_test.go: - Delete 5 unused test struct types and TestHelpers (-58 lines dead code) reflect_public_test.go: - Add fieldValueByTag helper, replace 4 inline tag-search loops Net: -88 lines --- arrow/array/arreflect/reflect.go | 85 ++++++++++---------- arrow/array/arreflect/reflect_arrow_to_go.go | 21 ++--- arrow/array/arreflect/reflect_go_to_arrow.go | 18 +---- arrow/array/arreflect/reflect_public_test.go | 55 ++++--------- arrow/array/arreflect/reflect_test.go | 59 -------------- 5 files changed, 75 insertions(+), 163 deletions(-) diff --git a/arrow/array/arreflect/reflect.go b/arrow/array/arreflect/reflect.go index 5a4daa7e..49630763 100644 --- a/arrow/array/arreflect/reflect.go +++ b/arrow/array/arreflect/reflect.go @@ -379,15 +379,51 @@ func WithTemporal(temporal string) Option { return func(o *tagOpts) { o.Temporal = temporal } } -var validTemporalOpts = map[string]bool{ - "": true, "timestamp": true, "date32": true, "date64": true, "time32": true, "time64": true, -} - func validateTemporalOpt(temporal string) error { - if !validTemporalOpts[temporal] { + switch temporal { + case "", "timestamp", "date32", "date64", "time32", "time64": + return nil + default: return fmt.Errorf("arreflect: invalid WithTemporal value %q; valid values are date32, date64, time32, time64, timestamp: %w", temporal, ErrUnsupportedType) } - return nil +} + +func buildEmptyTyped(goType reflect.Type, opts tagOpts, mem memory.Allocator) (arrow.Array, error) { + dt, err := inferArrowType(goType) + if err != nil { + return nil, err + } + derefType := goType + for derefType.Kind() == reflect.Ptr { + derefType = derefType.Elem() + } + dt = applyDecimalOpts(dt, derefType, opts) + dt = applyTemporalOpts(dt, derefType, opts) + if opts.ListView { + if derefType.Kind() != reflect.Slice || derefType == typeOfByteSlice { + return nil, fmt.Errorf("arreflect: WithListView requires a slice-of-slices element type, got %s: %w", goType, ErrUnsupportedType) + } + innerElem := derefType.Elem() + for innerElem.Kind() == reflect.Ptr { + innerElem = innerElem.Elem() + } + innerDT, err := inferArrowType(innerElem) + if err != nil { + return nil, err + } + dt = arrow.ListViewOf(innerDT) + } + if opts.Dict { + if err := validateDictValueType(dt); err != nil { + return nil, err + } + dt = &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: dt} + } else if opts.REE { + dt = arrow.RunEndEncodedOf(arrow.PrimitiveTypes.Int32, dt) + } + b := array.NewBuilder(mem, dt) + defer b.Release() + return b.NewArray(), nil } func FromSlice[T any](vals []T, mem memory.Allocator, opts ...Option) (arrow.Array, error) { @@ -413,42 +449,7 @@ func FromSlice[T any](vals []T, mem memory.Allocator, opts ...Option) (arrow.Arr } } if len(vals) == 0 { - goType := reflect.TypeFor[T]() - dt, err := inferArrowType(goType) - if err != nil { - return nil, err - } - derefType := goType - for derefType.Kind() == reflect.Ptr { - derefType = derefType.Elem() - } - dt = applyDecimalOpts(dt, derefType, tOpts) - dt = applyTemporalOpts(dt, derefType, tOpts) - if tOpts.ListView { - if derefType.Kind() != reflect.Slice || derefType == typeOfByteSlice { - return nil, fmt.Errorf("arreflect: WithListView requires a slice-of-slices element type, got %s: %w", goType, ErrUnsupportedType) - } - innerElem := derefType.Elem() - for innerElem.Kind() == reflect.Ptr { - innerElem = innerElem.Elem() - } - innerDT, err := inferArrowType(innerElem) - if err != nil { - return nil, err - } - dt = arrow.ListViewOf(innerDT) - } - if tOpts.Dict { - if err := validateDictValueType(dt); err != nil { - return nil, err - } - dt = &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: dt} - } else if tOpts.REE { - dt = arrow.RunEndEncodedOf(arrow.PrimitiveTypes.Int32, dt) - } - b := array.NewBuilder(mem, dt) - defer b.Release() - return b.NewArray(), nil + return buildEmptyTyped(reflect.TypeFor[T](), tOpts, mem) } sv := reflect.ValueOf(vals) return buildArray(sv, tOpts, mem) diff --git a/arrow/array/arreflect/reflect_arrow_to_go.go b/arrow/array/arreflect/reflect_arrow_to_go.go index eb5aa3e1..f03a2697 100644 --- a/arrow/array/arreflect/reflect_arrow_to_go.go +++ b/arrow/array/arreflect/reflect_arrow_to_go.go @@ -389,6 +389,15 @@ func setMapValue(v reflect.Value, arr *array.Map, i int) error { return nil } +func fillFixedSizeList(dst reflect.Value, child arrow.Array, start, n int) error { + for k := 0; k < n; k++ { + if err := setValue(dst.Index(k), child, start+k); err != nil { + return fmt.Errorf("arreflect: fixed-size list element %d: %w", k, err) + } + } + return nil +} + func setFixedSizeListValue(v reflect.Value, arr *array.FixedSizeList, i int) error { n := int(arr.DataType().(*arrow.FixedSizeListType).Len()) child := arr.ListValues() @@ -399,17 +408,11 @@ func setFixedSizeListValue(v reflect.Value, arr *array.FixedSizeList, i int) err if v.Len() != n { return fmt.Errorf("fixed-size list length %d does not match Go array length %d: %w", n, v.Len(), ErrTypeMismatch) } - for k := 0; k < n; k++ { - if err := setValue(v.Index(k), child, int(start)+k); err != nil { - return fmt.Errorf("arreflect: fixed-size list element %d: %w", k, err) - } - } + return fillFixedSizeList(v, child, int(start), n) case reflect.Slice: result := reflect.MakeSlice(v.Type(), n, n) - for k := 0; k < n; k++ { - if err := setValue(result.Index(k), child, int(start)+k); err != nil { - return fmt.Errorf("arreflect: fixed-size list element %d: %w", k, err) - } + if err := fillFixedSizeList(result, child, int(start), n); err != nil { + return err } v.Set(result) default: diff --git a/arrow/array/arreflect/reflect_go_to_arrow.go b/arrow/array/arreflect/reflect_go_to_arrow.go index c2862f06..a1408c52 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow.go +++ b/arrow/array/arreflect/reflect_go_to_arrow.go @@ -247,11 +247,7 @@ func appendStructFields(sb *array.StructBuilder, v reflect.Value, fields []field } func buildStructArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { - elemType := vals.Type().Elem() - isPtr := elemType.Kind() == reflect.Ptr - for elemType.Kind() == reflect.Ptr { - elemType = elemType.Elem() - } + elemType, isPtr := derefSliceElem(vals) st, err := inferStructType(elemType) if err != nil { @@ -574,11 +570,7 @@ func buildListViewArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, } func buildMapArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { - mapType := vals.Type().Elem() - isPtr := mapType.Kind() == reflect.Ptr - for mapType.Kind() == reflect.Ptr { - mapType = mapType.Elem() - } + mapType, isPtr := derefSliceElem(vals) keyType := mapType.Key() valType := mapType.Elem() @@ -628,11 +620,7 @@ func buildMapArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error } func buildFixedSizeListArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { - elemType := vals.Type().Elem() - isPtr := elemType.Kind() == reflect.Ptr - for elemType.Kind() == reflect.Ptr { - elemType = elemType.Elem() - } + elemType, isPtr := derefSliceElem(vals) if elemType.Kind() != reflect.Array { return nil, fmt.Errorf("arreflect: expected array element, got %v", elemType.Kind()) diff --git a/arrow/array/arreflect/reflect_public_test.go b/arrow/array/arreflect/reflect_public_test.go index e1a0d145..87f542b7 100644 --- a/arrow/array/arreflect/reflect_public_test.go +++ b/arrow/array/arreflect/reflect_public_test.go @@ -30,6 +30,15 @@ import ( func testMem() memory.Allocator { return memory.NewGoAllocator() } +func fieldValueByTag(v reflect.Value, tag string) reflect.Value { + for i := 0; i < v.NumField(); i++ { + if v.Type().Field(i).Tag.Get("arrow") == tag { + return v.Field(i) + } + } + return reflect.Value{} +} + func TestToGo(t *testing.T) { mem := testMem() @@ -469,16 +478,8 @@ func TestRecordAtAny(t *testing.T) { require.NoError(t, err, "RecordAtAny(0)") v := reflect.ValueOf(got) require.Equal(t, reflect.Struct, v.Kind()) - var nameField, scoreField reflect.Value - for i := 0; i < v.NumField(); i++ { - tag := v.Type().Field(i).Tag.Get("arrow") - switch tag { - case "name": - nameField = v.Field(i) - case "score": - scoreField = v.Field(i) - } - } + nameField := fieldValueByTag(v, "name") + scoreField := fieldValueByTag(v, "score") require.True(t, nameField.IsValid(), "name field not found") require.True(t, scoreField.IsValid(), "score field not found") assert.Equal(t, "alice", nameField.String()) @@ -502,12 +503,7 @@ func TestRecordToAnySlice(t *testing.T) { for i, row := range got { v := reflect.ValueOf(row) require.Equal(t, reflect.Struct, v.Kind(), "row %d", i) - var nameField reflect.Value - for fi := 0; fi < v.NumField(); fi++ { - if v.Type().Field(fi).Tag.Get("arrow") == "name" { - nameField = v.Field(fi) - } - } + nameField := fieldValueByTag(v, "name") assert.Equal(t, rows[i].Name, nameField.String(), "row %d name", i) } } @@ -534,17 +530,8 @@ func TestAtAnyComposite(t *testing.T) { v := reflect.ValueOf(got) require.Equal(t, reflect.Struct, v.Kind()) - vt := v.Type() - var idField, nameField reflect.Value - for i := 0; i < v.NumField(); i++ { - tag := vt.Field(i).Tag.Get("arrow") - switch tag { - case "id": - idField = v.Field(i) - case "name": - nameField = v.Field(i) - } - } + idField := fieldValueByTag(v, "id") + nameField := fieldValueByTag(v, "name") require.True(t, idField.IsValid(), "id field not found") require.True(t, nameField.IsValid(), "name field not found") assert.Equal(t, int64(99), idField.Int()) @@ -640,17 +627,9 @@ func TestToAnySliceStructArray(t *testing.T) { require.Equal(t, reflect.Struct, v.Kind(), "row %d", i) require.Equal(t, 3, v.NumField(), "row %d", i) - var id, label, score reflect.Value - for fi := 0; fi < v.NumField(); fi++ { - switch v.Type().Field(fi).Tag.Get("arrow") { - case "id": - id = v.Field(fi) - case "label": - label = v.Field(fi) - case "score": - score = v.Field(fi) - } - } + id := fieldValueByTag(v, "id") + label := fieldValueByTag(v, "label") + score := fieldValueByTag(v, "score") require.True(t, id.IsValid(), "row %d: id field not found", i) require.True(t, label.IsValid(), "row %d: label field not found", i) require.True(t, score.IsValid(), "row %d: score field not found", i) diff --git a/arrow/array/arreflect/reflect_test.go b/arrow/array/arreflect/reflect_test.go index 0df476ad..a170eb5c 100644 --- a/arrow/array/arreflect/reflect_test.go +++ b/arrow/array/arreflect/reflect_test.go @@ -197,62 +197,3 @@ func TestCachedStructFields(t *testing.T) { assert.Equal(t, "X", fields1[0].Name) assert.Equal(t, "Y", fields1[1].Name) } - -// ── shared test types used across reflect test files ────────────────────────── - -type testPrimitive struct { - I8 int8 - I16 int16 - I32 int32 - I64 int64 - U8 uint8 - U16 uint16 - U32 uint32 - U64 uint64 - F32 float32 - F64 float64 - B bool - S string - Blob []byte -} - -type testNested struct { - Name string - Scores []float64 - Tags map[string]string - Address struct { - City string - Zip int32 - } -} - -type testNullable struct { - Required string - Optional *string - MaybeInt *int32 -} - -type testEmbedded struct { - ID string - testEmbeddedInner -} - -type testEmbeddedInner struct { //nolint:unused - City string - Code int32 -} - -type testTagged struct { - UserName string `arrow:"user_name"` - Score float64 `arrow:"score"` - Hidden string `arrow:"-"` -} - -func TestHelpers(t *testing.T) { - // Verify shared test types are usable - _ = testPrimitive{I8: 1, I32: 2, S: "hi"} - _ = testNested{Name: "n", Scores: []float64{1.0}} - _ = testNullable{Required: "r"} - _ = testTagged{UserName: "u", Score: 3.14} - _ = testEmbedded{ID: "id"} -} From 095a046c1744ce6990c2a5ee79a4b7dde306eaa5 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 15 Apr 2026 18:31:57 -0400 Subject: [PATCH 57/82] refactor(arreflect): remove dead opts tagOpts parameter from appendValue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The parameter was never read — all routing is determined by the concrete builder type passed in, which already encodes dict/listview/temporal intent via applyEncodingOpts. All 10 call sites passed tagOpts{} or fm.Opts (ignored), now pass only (builder, value). --- arrow/array/arreflect/reflect_go_to_arrow.go | 22 +++++++++---------- .../arreflect/reflect_go_to_arrow_test.go | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arrow/array/arreflect/reflect_go_to_arrow.go b/arrow/array/arreflect/reflect_go_to_arrow.go index a1408c52..e7b50879 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow.go +++ b/arrow/array/arreflect/reflect_go_to_arrow.go @@ -96,7 +96,7 @@ func buildPrimitiveArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, b.Reserve(vals.Len()) if err := iterSlice(vals, isPtr, b.AppendNull, func(v reflect.Value) error { - return appendValue(b, v, tagOpts{}) + return appendValue(b, v) }); err != nil { return nil, err } @@ -239,7 +239,7 @@ func buildDecimalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) ( func appendStructFields(sb *array.StructBuilder, v reflect.Value, fields []fieldMeta) error { sb.Append(true) for fi, fm := range fields { - if err := appendValue(sb.FieldBuilder(fi), v.FieldByIndex(fm.Index), fm.Opts); err != nil { + if err := appendValue(sb.FieldBuilder(fi), v.FieldByIndex(fm.Index)); err != nil { return fmt.Errorf("struct field %q: %w", fm.Name, err) } } @@ -338,7 +338,7 @@ func appendDecimalValue(b array.Builder, v reflect.Value) error { return nil } -func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { +func appendValue(b array.Builder, v reflect.Value) error { for v.Kind() == reflect.Ptr { if v.IsNil() { b.AppendNull() @@ -400,7 +400,7 @@ func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { tb.Append(true) vb := tb.ValueBuilder() for i := 0; i < v.Len(); i++ { - if err := appendValue(vb, v.Index(i), tagOpts{}); err != nil { + if err := appendValue(vb, v.Index(i)); err != nil { return err } } @@ -412,10 +412,10 @@ func appendValue(b array.Builder, v reflect.Value, opts tagOpts) error { kb := tb.KeyBuilder() ib := tb.ItemBuilder() for _, key := range v.MapKeys() { - if err := appendValue(kb, key, tagOpts{}); err != nil { + if err := appendValue(kb, key); err != nil { return err } - if err := appendValue(ib, v.MapIndex(key), tagOpts{}); err != nil { + if err := appendValue(ib, v.MapIndex(key)); err != nil { return err } } @@ -500,7 +500,7 @@ func appendListElement(b array.Builder, v reflect.Value) error { return fmt.Errorf("unexpected list builder type %T: %w", b, ErrUnsupportedType) } for i := 0; i < v.Len(); i++ { - if err := appendValue(vb, v.Index(i), tagOpts{}); err != nil { + if err := appendValue(vb, v.Index(i)); err != nil { return err } } @@ -553,7 +553,7 @@ func buildListLikeArray(vals reflect.Value, mem memory.Allocator, isView bool) ( } beginRow(outer.Len()) for j := 0; j < outer.Len(); j++ { - if err := appendValue(vb, outer.Index(j), tagOpts{}); err != nil { + if err := appendValue(vb, outer.Index(j)); err != nil { return nil, fmt.Errorf("%s [%d][%d]: %w", label, i, j, err) } } @@ -604,10 +604,10 @@ func buildMapArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error } mb.Append(true) for _, key := range m.MapKeys() { - if err := appendValue(kb, key, tagOpts{}); err != nil { + if err := appendValue(kb, key); err != nil { return fmt.Errorf("map key: %w", err) } - if err := appendValue(ib, m.MapIndex(key), tagOpts{}); err != nil { + if err := appendValue(ib, m.MapIndex(key)); err != nil { return fmt.Errorf("map value: %w", err) } } @@ -647,7 +647,7 @@ func buildFixedSizeListArray(vals reflect.Value, mem memory.Allocator) (arrow.Ar if err := iterSlice(vals, isPtr, appendNullIdx, func(elem reflect.Value) error { fb.Append(true) for j := 0; j < int(n); j++ { - if err := appendValue(vb, elem.Index(j), tagOpts{}); err != nil { + if err := appendValue(vb, elem.Index(j)); err != nil { return fmt.Errorf("fixed-size list element [%d][%d]: %w", idx, j, err) } } diff --git a/arrow/array/arreflect/reflect_go_to_arrow_test.go b/arrow/array/arreflect/reflect_go_to_arrow_test.go index e4117e17..d8a34fbd 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/array/arreflect/reflect_go_to_arrow_test.go @@ -408,7 +408,7 @@ func TestBuildFixedSizeListArray(t *testing.T) { defer bldr.Release() var nilSlice []int32 - err := appendValue(bldr, reflect.ValueOf(&nilSlice).Elem(), tagOpts{}) + err := appendValue(bldr, reflect.ValueOf(&nilSlice).Elem()) require.NoError(t, err) bldr.Append(true) From 4bb3fd947bf657ff6d46e2e6cf8bfe69e2b762af Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 21 Apr 2026 14:04:14 -0400 Subject: [PATCH 58/82] fix(arreflect): address Copilot review findings on PR #771 - setValue: loop while reflect.Ptr so **T and deeper destinations are allocated correctly, matching documented multi-level pointer semantics. - setValue: clone strings via strings.Clone and copy []byte into a fresh slice so Go values remain valid after the Arrow array is released. - Introduce fieldByIndexSafe that walks the index path with nil checks at each pointer dereference; use it in setStructValue (Arrow->Go) and appendStructFields (Go->Arrow) to avoid a reflect panic on nil embedded pointers. Go->Arrow appends null; Arrow->Go leaves the field at zero. - exportedFieldName: prefix names starting with non-letter runes (e.g. '_id', '1st') with 'X' to produce a valid exported identifier; the original Arrow name is preserved in the struct tag. - FromSlice: validateOptions rejects conflicting encoding options (WithDict, WithREE, WithListView) with ErrUnsupportedType instead of silently picking a precedence. Add tests covering each fix: multi-level ptr still works, strings and []byte survive array.Release, nil embedded pointer produces nulls / zero values instead of panicking, non-letter-prefix field names map to exported identifiers, and conflicting encoding options return errors. --- arrow/array/arreflect/reflect.go | 33 ++++++++++++ arrow/array/arreflect/reflect_arrow_to_go.go | 21 ++++++-- .../arreflect/reflect_arrow_to_go_test.go | 53 +++++++++++++++++++ arrow/array/arreflect/reflect_go_to_arrow.go | 7 ++- .../arreflect/reflect_go_to_arrow_test.go | 43 +++++++++++++++ arrow/array/arreflect/reflect_infer.go | 16 ++++-- arrow/array/arreflect/reflect_infer_test.go | 16 +++++- arrow/array/arreflect/reflect_public_test.go | 19 +++++++ 8 files changed, 196 insertions(+), 12 deletions(-) diff --git a/arrow/array/arreflect/reflect.go b/arrow/array/arreflect/reflect.go index 49630763..a24d967f 100644 --- a/arrow/array/arreflect/reflect.go +++ b/arrow/array/arreflect/reflect.go @@ -328,6 +328,19 @@ func cachedStructFields(t reflect.Type) []fieldMeta { return v.([]fieldMeta) } +func fieldByIndexSafe(v reflect.Value, index []int) (reflect.Value, bool) { + for _, idx := range index { + if v.Kind() == reflect.Ptr { + if v.IsNil() { + return reflect.Value{}, false + } + v = v.Elem() + } + v = v.Field(idx) + } + return v, true +} + func At[T any](arr arrow.Array, i int) (T, error) { var result T v := reflect.ValueOf(&result).Elem() @@ -388,6 +401,23 @@ func validateTemporalOpt(temporal string) error { } } +func validateOptions(opts tagOpts) error { + n := 0 + if opts.Dict { + n++ + } + if opts.REE { + n++ + } + if opts.ListView { + n++ + } + if n > 1 { + return fmt.Errorf("arreflect: conflicting options: only one of WithDict, WithREE, WithListView may be specified: %w", ErrUnsupportedType) + } + return nil +} + func buildEmptyTyped(goType reflect.Type, opts tagOpts, mem memory.Allocator) (arrow.Array, error) { dt, err := inferArrowType(goType) if err != nil { @@ -434,6 +464,9 @@ func FromSlice[T any](vals []T, mem memory.Allocator, opts ...Option) (arrow.Arr for _, o := range opts { o(&tOpts) } + if err := validateOptions(tOpts); err != nil { + return nil, err + } if err := validateTemporalOpt(tOpts.Temporal); err != nil { return nil, err } diff --git a/arrow/array/arreflect/reflect_arrow_to_go.go b/arrow/array/arreflect/reflect_arrow_to_go.go index f03a2697..7fe39b7e 100644 --- a/arrow/array/arreflect/reflect_arrow_to_go.go +++ b/arrow/array/arreflect/reflect_arrow_to_go.go @@ -19,6 +19,7 @@ package arreflect import ( "fmt" "reflect" + "strings" "time" "github.com/apache/arrow-go/v18/arrow" @@ -51,8 +52,10 @@ func setValue(v reflect.Value, arr arrow.Array, i int) error { v.Set(reflect.Zero(v.Type())) return nil } - if v.Kind() == reflect.Ptr { - v.Set(reflect.New(v.Type().Elem())) + for v.Kind() == reflect.Ptr { + if v.IsNil() { + v.Set(reflect.New(v.Type().Elem())) + } v = v.Elem() } @@ -81,7 +84,7 @@ func setValue(v reflect.Value, arr arrow.Array, i int) error { if v.Kind() != reflect.String { return fmt.Errorf("cannot set string into %s: %w", v.Type(), ErrTypeMismatch) } - v.SetString(a.Value(i)) + v.SetString(strings.Clone(a.Value(i))) case arrow.BINARY, arrow.LARGE_BINARY: type byter interface{ Value(int) []byte } @@ -92,7 +95,10 @@ func setValue(v reflect.Value, arr arrow.Array, i int) error { if v.Kind() != reflect.Slice || v.Type().Elem().Kind() != reflect.Uint8 { return fmt.Errorf("cannot set []byte into %s: %w", v.Type(), ErrTypeMismatch) } - v.SetBytes(a.Value(i)) + src := a.Value(i) + dst := make([]byte, len(src)) + copy(dst, src) + v.SetBytes(dst) case arrow.TIMESTAMP, arrow.DATE32, arrow.DATE64, arrow.TIME32, arrow.TIME64, arrow.DURATION: @@ -336,7 +342,12 @@ func setStructValue(v reflect.Value, sa *array.Struct, i int) error { if !found { continue } - if err := setValue(v.FieldByIndex(fm.Index), sa.Field(arrowIdx), i); err != nil { + fv, ok := fieldByIndexSafe(v, fm.Index) + if !ok { + // embedded pointer is nil; leave the field at its zero value + continue + } + if err := setValue(fv, sa.Field(arrowIdx), i); err != nil { return fmt.Errorf("arreflect: field %q: %w", fm.Name, err) } } diff --git a/arrow/array/arreflect/reflect_arrow_to_go_test.go b/arrow/array/arreflect/reflect_arrow_to_go_test.go index 233eecb6..1f8be5a4 100644 --- a/arrow/array/arreflect/reflect_arrow_to_go_test.go +++ b/arrow/array/arreflect/reflect_arrow_to_go_test.go @@ -378,6 +378,59 @@ func TestSetStructValue(t *testing.T) { assert.Equal(t, "Dave", got.Name) assert.Equal(t, "", got.Email) }) + + t.Run("nil embedded pointer leaves promoted fields zero", func(t *testing.T) { + // Regression: reflect.Value.FieldByIndex panics on nil embedded pointer; + // the walker must stop and leave promoted fields at their zero value. + nameArr := makeStringArray(t, mem, "Alice") + cityArr := makeStringArray(t, mem, "NYC") + sa := makeStructArray(t, []arrow.Array{nameArr, cityArr}, []string{"Name", "City"}) + + type Inner struct { + City string + } + type Outer struct { + Name string + *Inner + } + + var got Outer + setValueInto(t, &got, sa, 0) + assert.Equal(t, "Alice", got.Name) + assert.Nil(t, got.Inner, "nil embedded pointer should remain nil; promoted City left at zero value") + }) +} + +func TestSetValueClonesStringAndBytes(t *testing.T) { + // Regression: String.Value / Binary.Value return views into the array's + // backing buffer. setValue must copy so Go values outlive the array. + mem := checkedMem(t) + + t.Run("string", func(t *testing.T) { + sb := array.NewStringBuilder(mem) + sb.Append("hello world") + arr := sb.NewStringArray() + sb.Release() + + var got string + setValueInto(t, &got, arr, 0) + assert.Equal(t, "hello world", got) + arr.Release() + assert.Equal(t, "hello world", got, "string must survive Arrow array release") + }) + + t.Run("bytes", func(t *testing.T) { + bb := array.NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) + bb.Append([]byte{0x01, 0x02, 0x03, 0x04}) + arr := bb.NewBinaryArray() + bb.Release() + + var got []byte + setValueInto(t, &got, arr, 0) + assert.Equal(t, []byte{0x01, 0x02, 0x03, 0x04}, got) + arr.Release() + assert.Equal(t, []byte{0x01, 0x02, 0x03, 0x04}, got, "[]byte must survive Arrow array release") + }) } func TestSetListValue(t *testing.T) { diff --git a/arrow/array/arreflect/reflect_go_to_arrow.go b/arrow/array/arreflect/reflect_go_to_arrow.go index e7b50879..7f363bf0 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow.go +++ b/arrow/array/arreflect/reflect_go_to_arrow.go @@ -239,7 +239,12 @@ func buildDecimalArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) ( func appendStructFields(sb *array.StructBuilder, v reflect.Value, fields []fieldMeta) error { sb.Append(true) for fi, fm := range fields { - if err := appendValue(sb.FieldBuilder(fi), v.FieldByIndex(fm.Index)); err != nil { + fv, ok := fieldByIndexSafe(v, fm.Index) + if !ok { + sb.FieldBuilder(fi).AppendNull() + continue + } + if err := appendValue(sb.FieldBuilder(fi), fv); err != nil { return fmt.Errorf("struct field %q: %w", fm.Name, err) } } diff --git a/arrow/array/arreflect/reflect_go_to_arrow_test.go b/arrow/array/arreflect/reflect_go_to_arrow_test.go index d8a34fbd..f5c031bf 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/array/arreflect/reflect_go_to_arrow_test.go @@ -290,6 +290,49 @@ func TestBuildStructArray(t *testing.T) { assert.Equal(t, int32(99), xArr.Value(0)) assert.Equal(t, int32(99), xArr.Value(2)) }) + + t.Run("nil_embedded_pointer_promoted_field", func(t *testing.T) { + // Regression: reflect.Value.FieldByIndex panics when traversing a nil + // embedded pointer; promoted fields must become null instead. + type Inner struct { + City string + Zip int32 + } + type Outer struct { + Name string + *Inner + } + vals := []Outer{ + {Name: "Alice", Inner: &Inner{City: "NYC", Zip: 10001}}, + {Name: "Bob", Inner: nil}, + {Name: "Carol", Inner: &Inner{City: "LA", Zip: 90001}}, + } + arr := mustBuildDefault(t, vals, mem) + require.Equal(t, arrow.STRUCT, arr.DataType().ID()) + sa := arr.(*array.Struct) + require.Equal(t, 3, sa.Len()) + require.Equal(t, 3, sa.NumField(), "expected 3 promoted fields (Name, City, Zip)") + + nameArr := sa.Field(0).(*array.String) + cityArr := sa.Field(1).(*array.String) + zipArr := sa.Field(2).(*array.Int32) + + assert.Equal(t, "Alice", nameArr.Value(0)) + assert.False(t, cityArr.IsNull(0)) + assert.Equal(t, "NYC", cityArr.Value(0)) + assert.False(t, zipArr.IsNull(0)) + assert.Equal(t, int32(10001), zipArr.Value(0)) + + assert.Equal(t, "Bob", nameArr.Value(1)) + assert.True(t, cityArr.IsNull(1), "City should be null when *Inner is nil") + assert.True(t, zipArr.IsNull(1), "Zip should be null when *Inner is nil") + + assert.Equal(t, "Carol", nameArr.Value(2)) + assert.False(t, cityArr.IsNull(2)) + assert.Equal(t, "LA", cityArr.Value(2)) + assert.False(t, zipArr.IsNull(2)) + assert.Equal(t, int32(90001), zipArr.Value(2)) + }) } func TestBuildListArray(t *testing.T) { diff --git a/arrow/array/arreflect/reflect_infer.go b/arrow/array/arreflect/reflect_infer.go index 34ae26be..83a05ce1 100644 --- a/arrow/array/arreflect/reflect_infer.go +++ b/arrow/array/arreflect/reflect_infer.go @@ -385,13 +385,19 @@ func exportedFieldName(name string, index int) (string, error) { return fmt.Sprintf("Field%d", index), nil } runes := []rune(name) - runes[0] = unicode.ToUpper(runes[0]) + // If the first rune is not a letter (e.g. '_', digit), prefix with "X" + // to produce a valid exported Go identifier while preserving the original + // name in the struct tag. + if !unicode.IsLetter(runes[0]) { + runes = append([]rune{'X'}, runes...) + } else { + runes[0] = unicode.ToUpper(runes[0]) + } for j, r := range runes { if j == 0 { - if !unicode.IsLetter(r) { - return "", fmt.Errorf("arreflect: InferGoType: field name %q produces invalid Go identifier: %w", name, ErrUnsupportedType) - } - } else if !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '_' { + continue + } + if !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '_' { return "", fmt.Errorf("arreflect: InferGoType: field name %q produces invalid Go identifier: %w", name, ErrUnsupportedType) } } diff --git a/arrow/array/arreflect/reflect_infer_test.go b/arrow/array/arreflect/reflect_infer_test.go index 67310899..0d65da67 100644 --- a/arrow/array/arreflect/reflect_infer_test.go +++ b/arrow/array/arreflect/reflect_infer_test.go @@ -417,7 +417,6 @@ func TestInferGoTypeStructInvalidIdentifier(t *testing.T) { {"hyphenated", "my-field"}, {"space", "a b"}, {"dot", "first.name"}, - {"digit prefix", "1st"}, } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { @@ -426,4 +425,19 @@ func TestInferGoTypeStructInvalidIdentifier(t *testing.T) { assert.ErrorIs(t, err, ErrUnsupportedType) }) } + + t.Run("non-letter prefix mapped", func(t *testing.T) { + for _, tc := range []struct { + name string + expected string + }{ + {"_id", "X_id"}, + {"1st", "X1st"}, + } { + st := arrow.StructOf(arrow.Field{Name: tc.name, Type: arrow.PrimitiveTypes.Int32}) + goType, err := InferGoType(st) + assert.NoError(t, err) + assert.Equal(t, tc.expected, goType.Field(0).Name) + } + }) } diff --git a/arrow/array/arreflect/reflect_public_test.go b/arrow/array/arreflect/reflect_public_test.go index 87f542b7..69b29870 100644 --- a/arrow/array/arreflect/reflect_public_test.go +++ b/arrow/array/arreflect/reflect_public_test.go @@ -263,6 +263,25 @@ func TestFromGoSlice(t *testing.T) { _, err := FromSlice([]string{}, mem, WithTemporal("date32")) assert.ErrorIs(t, err, ErrUnsupportedType) }) + + t.Run("conflicting options return error", func(t *testing.T) { + cases := []struct { + name string + opts []Option + }{ + {"WithDict+WithREE", []Option{WithDict(), WithREE()}}, + {"WithDict+WithListView", []Option{WithDict(), WithListView()}}, + {"WithREE+WithListView", []Option{WithREE(), WithListView()}}, + {"all three", []Option{WithDict(), WithREE(), WithListView()}}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + _, err := FromSlice([]int32{1, 2, 3}, mem, tc.opts...) + require.Error(t, err) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) + } + }) } func TestRecordToSlice(t *testing.T) { From 29f8ebed5a72de20c19b45ea002b5f67505b503a Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 21 Apr 2026 16:31:37 -0400 Subject: [PATCH 59/82] test(arreflect): expand coverage of error paths and uncovered branches Add targeted tests for primitive set-value kinds, dictionary/list-element builder dispatch, InferGoType primitives and composites, applyTemporal / applyDecimal branch coverage, buildEmptyTyped error and happy paths, and error paths for asTime/asDuration/appendTemporalValue, appendDecimalValue, buildMapArray, buildRunEndEncodedArray (empty, nil-pointer equality, DeepEqual for non-comparable elements), AtAny InferGoType errors, and fillFixedSizeList child-type mismatches. Package coverage rises from 88.0% to 91.4% and every previously sub-80% function now exceeds 80%. --- .../arreflect/reflect_arrow_to_go_test.go | 283 ++++++++++++++++++ .../arreflect/reflect_go_to_arrow_test.go | 272 +++++++++++++++++ arrow/array/arreflect/reflect_infer_test.go | 193 ++++++++++++ arrow/array/arreflect/reflect_public_test.go | 9 + arrow/array/arreflect/reflect_test.go | 104 +++++++ 5 files changed, 861 insertions(+) diff --git a/arrow/array/arreflect/reflect_arrow_to_go_test.go b/arrow/array/arreflect/reflect_arrow_to_go_test.go index 1f8be5a4..1a17b33a 100644 --- a/arrow/array/arreflect/reflect_arrow_to_go_test.go +++ b/arrow/array/arreflect/reflect_arrow_to_go_test.go @@ -175,6 +175,141 @@ func TestSetPrimitiveValue(t *testing.T) { err := setPrimitiveValue(reflect.ValueOf(&got).Elem(), arr, 0) assert.Error(t, err, "expected error for int32→float64 mismatch") }) + + t.Run("int8", func(t *testing.T) { + b := array.NewInt8Builder(mem) + defer b.Release() + b.Append(-42) + arr := b.NewArray().(*array.Int8) + defer arr.Release() + + var got int8 + require.NoError(t, setPrimitiveValue(reflect.ValueOf(&got).Elem(), arr, 0)) + assert.Equal(t, int8(-42), got) + + var bad float32 + assert.ErrorIs(t, setPrimitiveValue(reflect.ValueOf(&bad).Elem(), arr, 0), ErrTypeMismatch) + }) + + t.Run("int16", func(t *testing.T) { + b := array.NewInt16Builder(mem) + defer b.Release() + b.Append(-1234) + arr := b.NewArray().(*array.Int16) + defer arr.Release() + + var got int16 + require.NoError(t, setPrimitiveValue(reflect.ValueOf(&got).Elem(), arr, 0)) + assert.Equal(t, int16(-1234), got) + + var bad float32 + assert.ErrorIs(t, setPrimitiveValue(reflect.ValueOf(&bad).Elem(), arr, 0), ErrTypeMismatch) + }) + + t.Run("int64 mismatch", func(t *testing.T) { + b := array.NewInt64Builder(mem) + defer b.Release() + b.Append(1) + arr := b.NewArray().(*array.Int64) + defer arr.Release() + + var bad string + assert.ErrorIs(t, setPrimitiveValue(reflect.ValueOf(&bad).Elem(), arr, 0), ErrTypeMismatch) + }) + + t.Run("uint16", func(t *testing.T) { + b := array.NewUint16Builder(mem) + defer b.Release() + b.Append(65535) + arr := b.NewArray().(*array.Uint16) + defer arr.Release() + + var got uint16 + require.NoError(t, setPrimitiveValue(reflect.ValueOf(&got).Elem(), arr, 0)) + assert.Equal(t, uint16(65535), got) + + var bad int32 + assert.ErrorIs(t, setPrimitiveValue(reflect.ValueOf(&bad).Elem(), arr, 0), ErrTypeMismatch) + }) + + t.Run("uint32", func(t *testing.T) { + b := array.NewUint32Builder(mem) + defer b.Release() + b.Append(4_000_000_000) + arr := b.NewArray().(*array.Uint32) + defer arr.Release() + + var got uint32 + require.NoError(t, setPrimitiveValue(reflect.ValueOf(&got).Elem(), arr, 0)) + assert.Equal(t, uint32(4_000_000_000), got) + + var bad int32 + assert.ErrorIs(t, setPrimitiveValue(reflect.ValueOf(&bad).Elem(), arr, 0), ErrTypeMismatch) + }) + + t.Run("uint64", func(t *testing.T) { + b := array.NewUint64Builder(mem) + defer b.Release() + b.Append(1 << 63) + arr := b.NewArray().(*array.Uint64) + defer arr.Release() + + var got uint64 + require.NoError(t, setPrimitiveValue(reflect.ValueOf(&got).Elem(), arr, 0)) + assert.Equal(t, uint64(1<<63), got) + + var bad float64 + assert.ErrorIs(t, setPrimitiveValue(reflect.ValueOf(&bad).Elem(), arr, 0), ErrTypeMismatch) + }) + + t.Run("uint8 mismatch", func(t *testing.T) { + b := array.NewUint8Builder(mem) + defer b.Release() + b.Append(1) + arr := b.NewArray().(*array.Uint8) + defer arr.Release() + + var bad int8 + assert.ErrorIs(t, setPrimitiveValue(reflect.ValueOf(&bad).Elem(), arr, 0), ErrTypeMismatch) + }) + + t.Run("float32", func(t *testing.T) { + b := array.NewFloat32Builder(mem) + defer b.Release() + b.Append(2.5) + arr := b.NewArray().(*array.Float32) + defer arr.Release() + + var got float32 + require.NoError(t, setPrimitiveValue(reflect.ValueOf(&got).Elem(), arr, 0)) + assert.Equal(t, float32(2.5), got) + + var bad int32 + assert.ErrorIs(t, setPrimitiveValue(reflect.ValueOf(&bad).Elem(), arr, 0), ErrTypeMismatch) + }) + + t.Run("float64 mismatch", func(t *testing.T) { + b := array.NewFloat64Builder(mem) + defer b.Release() + b.Append(1.0) + arr := b.NewArray().(*array.Float64) + defer arr.Release() + + var bad int32 + assert.ErrorIs(t, setPrimitiveValue(reflect.ValueOf(&bad).Elem(), arr, 0), ErrTypeMismatch) + }) + + t.Run("unsupported primitive type returns error", func(t *testing.T) { + b := array.NewBooleanBuilder(mem) + defer b.Release() + b.Append(true) + arr := b.NewArray().(*array.Boolean) + defer arr.Release() + + var got bool + err := setPrimitiveValue(reflect.ValueOf(&got).Elem(), arr, 0) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) } func TestSetTemporalValue(t *testing.T) { @@ -262,6 +397,56 @@ func TestSetTemporalValue(t *testing.T) { assert.True(t, got.Hour() == 10 && got.Minute() == 30 && got.Second() == 0 && got.Nanosecond() == 123456789, "time64: got %v, want 10:30:00.123456789", got) }) + + t.Run("date64", func(t *testing.T) { + b := array.NewDate64Builder(mem) + defer b.Release() + ms := int64(1705276800000) + b.Append(arrow.Date64(ms)) + arr := b.NewArray().(*array.Date64) + defer arr.Release() + + got := setValueAt[time.Time](t, arr, 0) + expected := arrow.Date64(ms).ToTime() + assert.True(t, got.Equal(expected), "date64: expected %v, got %v", expected, got) + }) + + t.Run("type mismatch into non-time returns error", func(t *testing.T) { + b := array.NewTimestampBuilder(mem, &arrow.TimestampType{Unit: arrow.Second}) + defer b.Release() + b.Append(arrow.Timestamp(0)) + arr := b.NewArray().(*array.Timestamp) + defer arr.Release() + + var bad int64 + err := setTemporalValue(reflect.ValueOf(&bad).Elem(), arr, 0) + assert.ErrorIs(t, err, ErrTypeMismatch) + }) + + t.Run("duration into non-duration returns error", func(t *testing.T) { + dt := &arrow.DurationType{Unit: arrow.Second} + b := array.NewDurationBuilder(mem, dt) + defer b.Release() + b.Append(arrow.Duration(1)) + arr := b.NewArray().(*array.Duration) + defer arr.Release() + + var bad int64 + err := setTemporalValue(reflect.ValueOf(&bad).Elem(), arr, 0) + assert.ErrorIs(t, err, ErrTypeMismatch) + }) + + t.Run("unsupported temporal type returns error", func(t *testing.T) { + b := array.NewInt32Builder(mem) + defer b.Release() + b.Append(1) + arr := b.NewArray().(*array.Int32) + defer arr.Release() + + var got time.Time + err := setTemporalValue(reflect.ValueOf(&got).Elem(), arr, 0) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) } func TestSetDecimalValue(t *testing.T) { @@ -326,6 +511,72 @@ func TestSetDecimalValue(t *testing.T) { got := setValueAt[decimal.Decimal64](t, arr, 0) assert.Equal(t, num, got) }) + + t.Run("type mismatch into wrong decimal kind returns error", func(t *testing.T) { + b128 := array.NewDecimal128Builder(mem, &arrow.Decimal128Type{Precision: 10, Scale: 2}) + defer b128.Release() + b128.Append(decimal128.New(0, 1)) + arr128 := b128.NewDecimal128Array() + defer arr128.Release() + + var got256 decimal256.Num + assert.ErrorIs(t, setDecimalValue(reflect.ValueOf(&got256).Elem(), arr128, 0), ErrTypeMismatch) + + var got32 decimal.Decimal32 + assert.ErrorIs(t, setDecimalValue(reflect.ValueOf(&got32).Elem(), arr128, 0), ErrTypeMismatch) + + b256 := array.NewDecimal256Builder(mem, &arrow.Decimal256Type{Precision: 20, Scale: 4}) + defer b256.Release() + b256.Append(decimal256.New(0, 0, 0, 1)) + arr256 := b256.NewDecimal256Array() + defer arr256.Release() + + var got128 decimal128.Num + assert.ErrorIs(t, setDecimalValue(reflect.ValueOf(&got128).Elem(), arr256, 0), ErrTypeMismatch) + + b32 := array.NewDecimal32Builder(mem, &arrow.Decimal32Type{Precision: 9, Scale: 2}) + defer b32.Release() + b32.Append(decimal.Decimal32(1)) + arr32 := b32.NewArray().(*array.Decimal32) + defer arr32.Release() + + var got64 decimal.Decimal64 + assert.ErrorIs(t, setDecimalValue(reflect.ValueOf(&got64).Elem(), arr32, 0), ErrTypeMismatch) + + b64 := array.NewDecimal64Builder(mem, &arrow.Decimal64Type{Precision: 18, Scale: 3}) + defer b64.Release() + b64.Append(decimal.Decimal64(1)) + arr64 := b64.NewArray().(*array.Decimal64) + defer arr64.Release() + + var badF float64 + assert.ErrorIs(t, setDecimalValue(reflect.ValueOf(&badF).Elem(), arr64, 0), ErrTypeMismatch) + }) + + t.Run("unsupported decimal type returns error", func(t *testing.T) { + b := array.NewInt32Builder(mem) + defer b.Release() + b.Append(1) + arr := b.NewArray().(*array.Int32) + defer arr.Release() + + var got decimal128.Num + err := setDecimalValue(reflect.ValueOf(&got).Elem(), arr, 0) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) +} + +func TestAssertArrayTypeMismatch(t *testing.T) { + mem := checkedMem(t) + b := array.NewInt32Builder(mem) + defer b.Release() + b.Append(1) + arr := b.NewInt32Array() + defer arr.Release() + + _, err := assertArray[array.Float64](arr) + require.Error(t, err) + assert.ErrorIs(t, err, ErrTypeMismatch) } func TestSetStructValue(t *testing.T) { @@ -608,6 +859,38 @@ func TestSetFixedSizeListValue(t *testing.T) { err := setValue(reflect.ValueOf(&got).Elem(), arr, 0) assert.Error(t, err, "expected error for size mismatch") }) + + t.Run("child element type mismatch errors", func(t *testing.T) { + b := array.NewFixedSizeListBuilder(mem, 3, arrow.PrimitiveTypes.Int32) + defer b.Release() + vb := b.ValueBuilder().(*array.Int32Builder) + b.Append(true) + vb.AppendValues([]int32{1, 2, 3}, nil) + + arr := b.NewArray().(*array.FixedSizeList) + defer arr.Release() + + var got [3]string + err := setValue(reflect.ValueOf(&got).Elem(), arr, 0) + require.Error(t, err) + assert.ErrorIs(t, err, ErrTypeMismatch) + }) + + t.Run("child element type mismatch on slice errors", func(t *testing.T) { + b := array.NewFixedSizeListBuilder(mem, 2, arrow.PrimitiveTypes.Int32) + defer b.Release() + vb := b.ValueBuilder().(*array.Int32Builder) + b.Append(true) + vb.AppendValues([]int32{10, 20}, nil) + + arr := b.NewArray().(*array.FixedSizeList) + defer arr.Release() + + var got []string + err := setValue(reflect.ValueOf(&got).Elem(), arr, 0) + require.Error(t, err) + assert.ErrorIs(t, err, ErrTypeMismatch) + }) } func TestSetDictionaryValue(t *testing.T) { diff --git a/arrow/array/arreflect/reflect_go_to_arrow_test.go b/arrow/array/arreflect/reflect_go_to_arrow_test.go index f5c031bf..88758378 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/array/arreflect/reflect_go_to_arrow_test.go @@ -739,3 +739,275 @@ func TestNilByteSliceIsNull(t *testing.T) { assert.False(t, arr.IsNull(0), "non-nil byte slice should not be null") assert.True(t, arr.IsNull(1), "nil byte slice should be null") } + +func TestAppendToDictBuilderAllTypes(t *testing.T) { + mem := checkedMem(t) + + cases := []struct { + name string + run func(t *testing.T) + }{ + {"int8", func(t *testing.T) { + arr := mustBuildArray(t, []int8{1, 2, 1, 3}, tagOpts{Dict: true}, mem) + assert.Equal(t, arrow.DICTIONARY, arr.DataType().ID()) + assert.Equal(t, 3, arr.(*array.Dictionary).Dictionary().Len()) + }}, + {"int16", func(t *testing.T) { + arr := mustBuildArray(t, []int16{1, 2, 1, 3}, tagOpts{Dict: true}, mem) + assert.Equal(t, 3, arr.(*array.Dictionary).Dictionary().Len()) + }}, + {"int64", func(t *testing.T) { + arr := mustBuildArray(t, []int64{1, 2, 1, 3}, tagOpts{Dict: true}, mem) + assert.Equal(t, 3, arr.(*array.Dictionary).Dictionary().Len()) + }}, + {"uint8", func(t *testing.T) { + arr := mustBuildArray(t, []uint8{1, 2, 1, 3}, tagOpts{Dict: true}, mem) + assert.Equal(t, 3, arr.(*array.Dictionary).Dictionary().Len()) + }}, + {"uint16", func(t *testing.T) { + arr := mustBuildArray(t, []uint16{1, 2, 1, 3}, tagOpts{Dict: true}, mem) + assert.Equal(t, 3, arr.(*array.Dictionary).Dictionary().Len()) + }}, + {"uint32", func(t *testing.T) { + arr := mustBuildArray(t, []uint32{1, 2, 1, 3}, tagOpts{Dict: true}, mem) + assert.Equal(t, 3, arr.(*array.Dictionary).Dictionary().Len()) + }}, + {"uint64", func(t *testing.T) { + arr := mustBuildArray(t, []uint64{1, 2, 1, 3}, tagOpts{Dict: true}, mem) + assert.Equal(t, 3, arr.(*array.Dictionary).Dictionary().Len()) + }}, + {"float32", func(t *testing.T) { + arr := mustBuildArray(t, []float32{1.1, 2.2, 1.1, 3.3}, tagOpts{Dict: true}, mem) + assert.Equal(t, 3, arr.(*array.Dictionary).Dictionary().Len()) + }}, + {"float64", func(t *testing.T) { + arr := mustBuildArray(t, []float64{1.1, 2.2, 1.1, 3.3}, tagOpts{Dict: true}, mem) + assert.Equal(t, 3, arr.(*array.Dictionary).Dictionary().Len()) + }}, + {"binary bytes", func(t *testing.T) { + arr := mustBuildArray(t, [][]byte{[]byte("a"), []byte("b"), []byte("a")}, tagOpts{Dict: true}, mem) + assert.Equal(t, 2, arr.(*array.Dictionary).Dictionary().Len()) + }}, + {"binary nil is null", func(t *testing.T) { + arr := mustBuildArray(t, [][]byte{[]byte("a"), nil, []byte("a")}, tagOpts{Dict: true}, mem) + assert.True(t, arr.IsNull(1)) + assert.Equal(t, 1, arr.(*array.Dictionary).Dictionary().Len()) + }}, + } + for _, tc := range cases { + t.Run(tc.name, tc.run) + } + + t.Run("binary with unsupported kind returns error", func(t *testing.T) { + db := array.NewDictionaryBuilder(mem, &arrow.DictionaryType{ + IndexType: arrow.PrimitiveTypes.Int32, ValueType: arrow.BinaryTypes.Binary, + }).(*array.BinaryDictionaryBuilder) + defer db.Release() + err := appendToDictBuilder(db, reflect.ValueOf(int32(7))) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) + + t.Run("unsupported dict builder type returns error", func(t *testing.T) { + db := array.NewDictionaryBuilder(mem, &arrow.DictionaryType{ + IndexType: arrow.PrimitiveTypes.Int32, + ValueType: &arrow.Decimal128Type{Precision: 10, Scale: 2}, + }) + defer db.Release() + err := appendToDictBuilder(db, reflect.ValueOf(decimal128.New(0, 1))) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) +} + +func TestAppendListElementDirect(t *testing.T) { + mem := checkedMem(t) + + t.Run("nil slice appends null", func(t *testing.T) { + lb := array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32) + defer lb.Release() + var empty []int32 + require.NoError(t, appendListElement(lb, reflect.ValueOf(empty))) + arr := lb.NewArray() + defer arr.Release() + assert.True(t, arr.IsNull(0)) + }) + + t.Run("large list builder", func(t *testing.T) { + lb := array.NewLargeListBuilder(mem, arrow.PrimitiveTypes.Int32) + defer lb.Release() + require.NoError(t, appendListElement(lb, reflect.ValueOf([]int32{1, 2, 3}))) + arr := lb.NewArray().(*array.LargeList) + defer arr.Release() + assert.Equal(t, 1, arr.Len()) + vb := arr.ListValues().(*array.Int32) + assert.Equal(t, 3, vb.Len()) + }) + + t.Run("list view builder", func(t *testing.T) { + lb := array.NewListViewBuilder(mem, arrow.PrimitiveTypes.Int32) + defer lb.Release() + require.NoError(t, appendListElement(lb, reflect.ValueOf([]int32{4, 5}))) + arr := lb.NewArray().(*array.ListView) + defer arr.Release() + assert.Equal(t, 1, arr.Len()) + }) + + t.Run("large list view builder", func(t *testing.T) { + lb := array.NewLargeListViewBuilder(mem, arrow.PrimitiveTypes.Int32) + defer lb.Release() + require.NoError(t, appendListElement(lb, reflect.ValueOf([]int32{6}))) + arr := lb.NewArray().(*array.LargeListView) + defer arr.Release() + assert.Equal(t, 1, arr.Len()) + }) + + t.Run("unexpected builder type returns error", func(t *testing.T) { + b := array.NewInt32Builder(mem) + defer b.Release() + err := appendListElement(b, reflect.ValueOf([]int32{1})) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) +} + +func TestBuildRunEndEncodedArrayExtras(t *testing.T) { + mem := checkedMem(t) + + t.Run("empty_slice_direct", func(t *testing.T) { + empty := reflect.MakeSlice(reflect.TypeOf([]int32{}), 0, 0) + arr, err := buildRunEndEncodedArray(empty, tagOpts{REE: true}, mem) + require.NoError(t, err) + defer arr.Release() + assert.Equal(t, 0, arr.Len()) + assert.Equal(t, arrow.RUN_END_ENCODED, arr.DataType().ID()) + }) + + t.Run("nil_pointer_runs_collapse", func(t *testing.T) { + s := "x" + vals := []*string{nil, nil, &s, nil} + arr := mustBuildArray(t, vals, tagOpts{REE: true}, mem) + ree := arr.(*array.RunEndEncoded) + assert.Equal(t, 4, ree.Len()) + assert.Equal(t, 3, ree.RunEndsArr().Len(), + "expected 3 runs (nil,nil + x + nil), got %d", ree.RunEndsArr().Len()) + }) + + t.Run("nil_and_non_nil_pointer_are_not_equal", func(t *testing.T) { + s := "x" + vals := []*string{nil, &s} + arr := mustBuildArray(t, vals, tagOpts{REE: true}, mem) + ree := arr.(*array.RunEndEncoded) + assert.Equal(t, 2, ree.RunEndsArr().Len(), + "expected 2 runs (nil != &x), got %d", ree.RunEndsArr().Len()) + }) + + t.Run("non_comparable_elem_uses_deep_equal", func(t *testing.T) { + vals := [][]int32{{1, 2}, {1, 2}, {3}} + arr := mustBuildArray(t, vals, tagOpts{REE: true}, mem) + ree := arr.(*array.RunEndEncoded) + assert.Equal(t, 3, ree.Len()) + assert.Equal(t, 2, ree.RunEndsArr().Len(), + "expected 2 runs via DeepEqual, got %d", ree.RunEndsArr().Len()) + }) +} + +func TestBuildMapArrayExtras(t *testing.T) { + mem := checkedMem(t) + + t.Run("pointer_key_type", func(t *testing.T) { + k1, k2 := "a", "b" + vals := []map[*string]int32{{&k1: 1, &k2: 2}} + arr := mustBuildDefault(t, vals, mem) + require.Equal(t, arrow.MAP, arr.DataType().ID()) + assert.Equal(t, 1, arr.Len()) + }) + + t.Run("pointer_value_type", func(t *testing.T) { + v1, v2 := int32(1), int32(2) + vals := []map[string]*int32{{"a": &v1, "b": &v2}} + arr := mustBuildDefault(t, vals, mem) + require.Equal(t, arrow.MAP, arr.DataType().ID()) + assert.Equal(t, 1, arr.Len()) + }) + + t.Run("unsupported_key_type_errors", func(t *testing.T) { + vals := []map[complex64]int32{{1 + 2i: 1}} + _, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + require.Error(t, err) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) + + t.Run("unsupported_value_type_errors", func(t *testing.T) { + vals := []map[string]complex64{{"a": 1 + 2i}} + _, err := buildArray(reflect.ValueOf(vals), tagOpts{}, mem) + require.Error(t, err) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) +} + +func TestAppendTemporalValueErrors(t *testing.T) { + mem := checkedMem(t) + notATime := reflect.ValueOf(int32(42)) + + builderCases := []struct { + name string + builder array.Builder + }{ + {"timestamp", array.NewTimestampBuilder(mem, &arrow.TimestampType{Unit: arrow.Nanosecond})}, + {"date32", array.NewDate32Builder(mem)}, + {"date64", array.NewDate64Builder(mem)}, + {"time32", array.NewTime32Builder(mem, &arrow.Time32Type{Unit: arrow.Millisecond})}, + {"time64", array.NewTime64Builder(mem, &arrow.Time64Type{Unit: arrow.Nanosecond})}, + } + for _, tc := range builderCases { + t.Run(tc.name+"_requires_time_Time", func(t *testing.T) { + defer tc.builder.Release() + err := appendTemporalValue(tc.builder, notATime) + require.Error(t, err) + assert.ErrorIs(t, err, ErrTypeMismatch) + }) + } + + t.Run("duration_requires_time_Duration", func(t *testing.T) { + b := array.NewDurationBuilder(mem, &arrow.DurationType{Unit: arrow.Nanosecond}) + defer b.Release() + err := appendTemporalValue(b, notATime) + require.Error(t, err) + assert.ErrorIs(t, err, ErrTypeMismatch) + }) + + t.Run("unexpected_builder_type", func(t *testing.T) { + b := array.NewInt32Builder(mem) + defer b.Release() + err := appendTemporalValue(b, reflect.ValueOf(time.Now())) + require.Error(t, err) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) +} + +func TestAppendDecimalValueErrors(t *testing.T) { + mem := checkedMem(t) + notDecimal := reflect.ValueOf("not a decimal") + + t.Run("decimal128_wrong_type", func(t *testing.T) { + b := array.NewDecimal128Builder(mem, &arrow.Decimal128Type{Precision: 10, Scale: 2}) + defer b.Release() + err := appendDecimalValue(b, notDecimal) + require.Error(t, err) + assert.ErrorIs(t, err, ErrTypeMismatch) + }) + + t.Run("decimal256_wrong_type", func(t *testing.T) { + b := array.NewDecimal256Builder(mem, &arrow.Decimal256Type{Precision: 40, Scale: 2}) + defer b.Release() + err := appendDecimalValue(b, notDecimal) + require.Error(t, err) + assert.ErrorIs(t, err, ErrTypeMismatch) + }) + + t.Run("unexpected_builder_type", func(t *testing.T) { + b := array.NewInt32Builder(mem) + defer b.Release() + err := appendDecimalValue(b, reflect.ValueOf(decimal128.New(0, 1))) + require.Error(t, err) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) +} diff --git a/arrow/array/arreflect/reflect_infer_test.go b/arrow/array/arreflect/reflect_infer_test.go index 0d65da67..c1af8011 100644 --- a/arrow/array/arreflect/reflect_infer_test.go +++ b/arrow/array/arreflect/reflect_infer_test.go @@ -441,3 +441,196 @@ func TestInferGoTypeStructInvalidIdentifier(t *testing.T) { } }) } + +func TestInferGoTypeAllPrimitives(t *testing.T) { + cases := []struct { + name string + dt arrow.DataType + want reflect.Type + }{ + {"int8", arrow.PrimitiveTypes.Int8, reflect.TypeOf(int8(0))}, + {"int16", arrow.PrimitiveTypes.Int16, reflect.TypeOf(int16(0))}, + {"int64", arrow.PrimitiveTypes.Int64, reflect.TypeOf(int64(0))}, + {"uint8", arrow.PrimitiveTypes.Uint8, reflect.TypeOf(uint8(0))}, + {"uint16", arrow.PrimitiveTypes.Uint16, reflect.TypeOf(uint16(0))}, + {"uint32", arrow.PrimitiveTypes.Uint32, reflect.TypeOf(uint32(0))}, + {"uint64", arrow.PrimitiveTypes.Uint64, reflect.TypeOf(uint64(0))}, + {"float32", arrow.PrimitiveTypes.Float32, reflect.TypeOf(float32(0))}, + {"large_string", arrow.BinaryTypes.LargeString, reflect.TypeOf("")}, + {"large_binary", arrow.BinaryTypes.LargeBinary, reflect.TypeOf([]byte{})}, + {"date32", arrow.FixedWidthTypes.Date32, reflect.TypeOf(time.Time{})}, + {"date64", arrow.FixedWidthTypes.Date64, reflect.TypeOf(time.Time{})}, + {"time32_ms", &arrow.Time32Type{Unit: arrow.Millisecond}, reflect.TypeOf(time.Time{})}, + {"time64_ns", &arrow.Time64Type{Unit: arrow.Nanosecond}, reflect.TypeOf(time.Time{})}, + {"decimal32", &arrow.Decimal32Type{Precision: 9, Scale: 2}, reflect.TypeOf(decimal.Decimal32(0))}, + {"decimal64", &arrow.Decimal64Type{Precision: 18, Scale: 3}, reflect.TypeOf(decimal.Decimal64(0))}, + {"decimal128", &arrow.Decimal128Type{Precision: 10, Scale: 2}, reflect.TypeOf(decimal128.Num{})}, + {"decimal256", &arrow.Decimal256Type{Precision: 20, Scale: 4}, reflect.TypeOf(decimal256.Num{})}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got, err := InferGoType(tc.dt) + require.NoError(t, err) + assert.Equal(t, tc.want, got) + }) + } +} + +func TestInferGoTypeCompositeTypes(t *testing.T) { + t.Run("large_list", func(t *testing.T) { + got, err := InferGoType(arrow.LargeListOf(arrow.PrimitiveTypes.Int64)) + require.NoError(t, err) + assert.Equal(t, reflect.Slice, got.Kind()) + assert.Equal(t, reflect.Int64, got.Elem().Kind()) + }) + + t.Run("list_view", func(t *testing.T) { + got, err := InferGoType(arrow.ListViewOf(arrow.PrimitiveTypes.Int32)) + require.NoError(t, err) + assert.Equal(t, reflect.Slice, got.Kind()) + assert.Equal(t, reflect.Int32, got.Elem().Kind()) + }) + + t.Run("large_list_view", func(t *testing.T) { + got, err := InferGoType(arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32)) + require.NoError(t, err) + assert.Equal(t, reflect.Slice, got.Kind()) + }) + + t.Run("list with unsupported element returns error", func(t *testing.T) { + _, err := InferGoType(arrow.ListOf(arrow.Null)) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) + + t.Run("fixed size list with unsupported element returns error", func(t *testing.T) { + _, err := InferGoType(arrow.FixedSizeListOf(3, arrow.Null)) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) + + t.Run("map with unsupported key returns error", func(t *testing.T) { + _, err := InferGoType(arrow.MapOf(arrow.Null, arrow.BinaryTypes.String)) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) + + t.Run("map with unsupported value returns error", func(t *testing.T) { + _, err := InferGoType(arrow.MapOf(arrow.BinaryTypes.String, arrow.Null)) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) + + t.Run("map with comparable key builds map type", func(t *testing.T) { + got, err := InferGoType(arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32)) + require.NoError(t, err) + assert.Equal(t, reflect.Map, got.Kind()) + assert.Equal(t, reflect.String, got.Key().Kind()) + assert.Equal(t, reflect.Int32, got.Elem().Kind()) + }) + + t.Run("dictionary unwraps to value type", func(t *testing.T) { + dt := &arrow.DictionaryType{ + IndexType: arrow.PrimitiveTypes.Int32, + ValueType: arrow.BinaryTypes.String, + } + got, err := InferGoType(dt) + require.NoError(t, err) + assert.Equal(t, reflect.String, got.Kind()) + }) + + t.Run("run end encoded unwraps to encoded type", func(t *testing.T) { + dt := arrow.RunEndEncodedOf(arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int64) + got, err := InferGoType(dt) + require.NoError(t, err) + assert.Equal(t, reflect.Int64, got.Kind()) + }) +} + +func TestApplyTemporalOptsAllBranches(t *testing.T) { + timeType := reflect.TypeOf(time.Time{}) + base := arrow.FixedWidthTypes.Timestamp_ns + + t.Run("non-time type returns dt unchanged", func(t *testing.T) { + got := applyTemporalOpts(base, reflect.TypeOf(int32(0)), tagOpts{Temporal: "date32"}) + assert.Equal(t, base, got) + }) + + t.Run("empty temporal returns dt unchanged", func(t *testing.T) { + got := applyTemporalOpts(base, timeType, tagOpts{Temporal: ""}) + assert.Equal(t, base, got) + }) + + t.Run("timestamp returns dt unchanged", func(t *testing.T) { + got := applyTemporalOpts(base, timeType, tagOpts{Temporal: "timestamp"}) + assert.Equal(t, base, got) + }) + + t.Run("date32", func(t *testing.T) { + got := applyTemporalOpts(base, timeType, tagOpts{Temporal: "date32"}) + assert.Equal(t, arrow.DATE32, got.ID()) + }) + + t.Run("date64", func(t *testing.T) { + got := applyTemporalOpts(base, timeType, tagOpts{Temporal: "date64"}) + assert.Equal(t, arrow.DATE64, got.ID()) + }) + + t.Run("time32", func(t *testing.T) { + got := applyTemporalOpts(base, timeType, tagOpts{Temporal: "time32"}) + assert.Equal(t, arrow.TIME32, got.ID()) + }) + + t.Run("time64", func(t *testing.T) { + got := applyTemporalOpts(base, timeType, tagOpts{Temporal: "time64"}) + assert.Equal(t, arrow.TIME64, got.ID()) + }) + + t.Run("unknown temporal falls through", func(t *testing.T) { + got := applyTemporalOpts(base, timeType, tagOpts{Temporal: "bogus"}) + assert.Equal(t, base, got) + }) +} + +func TestApplyDecimalOptsAllBranches(t *testing.T) { + base := arrow.BinaryTypes.String + opts := tagOpts{HasDecimalOpts: true, DecimalPrecision: 18, DecimalScale: 4} + + t.Run("no_decimal_opts_returns_dt_unchanged", func(t *testing.T) { + got := applyDecimalOpts(base, reflect.TypeOf(decimal128.Num{}), tagOpts{}) + assert.Equal(t, base, got) + }) + + t.Run("decimal128", func(t *testing.T) { + got := applyDecimalOpts(base, reflect.TypeOf(decimal128.Num{}), opts) + dt, ok := got.(*arrow.Decimal128Type) + require.True(t, ok, "expected *arrow.Decimal128Type, got %T", got) + assert.Equal(t, int32(18), dt.Precision) + assert.Equal(t, int32(4), dt.Scale) + }) + + t.Run("decimal256", func(t *testing.T) { + got := applyDecimalOpts(base, reflect.TypeOf(decimal256.Num{}), opts) + dt, ok := got.(*arrow.Decimal256Type) + require.True(t, ok, "expected *arrow.Decimal256Type, got %T", got) + assert.Equal(t, int32(18), dt.Precision) + assert.Equal(t, int32(4), dt.Scale) + }) + + t.Run("decimal32", func(t *testing.T) { + got := applyDecimalOpts(base, reflect.TypeOf(decimal.Decimal32(0)), opts) + dt, ok := got.(*arrow.Decimal32Type) + require.True(t, ok, "expected *arrow.Decimal32Type, got %T", got) + assert.Equal(t, int32(18), dt.Precision) + assert.Equal(t, int32(4), dt.Scale) + }) + + t.Run("decimal64", func(t *testing.T) { + got := applyDecimalOpts(base, reflect.TypeOf(decimal.Decimal64(0)), opts) + dt, ok := got.(*arrow.Decimal64Type) + require.True(t, ok, "expected *arrow.Decimal64Type, got %T", got) + assert.Equal(t, int32(18), dt.Precision) + assert.Equal(t, int32(4), dt.Scale) + }) + + t.Run("non_decimal_type_returns_dt_unchanged", func(t *testing.T) { + got := applyDecimalOpts(base, reflect.TypeOf(int32(0)), opts) + assert.Equal(t, base, got) + }) +} diff --git a/arrow/array/arreflect/reflect_public_test.go b/arrow/array/arreflect/reflect_public_test.go index 69b29870..0b04848d 100644 --- a/arrow/array/arreflect/reflect_public_test.go +++ b/arrow/array/arreflect/reflect_public_test.go @@ -405,6 +405,15 @@ func TestAtAny(t *testing.T) { assert.Equal(t, int32(0), v, "AtAny(1) value") } +func TestAtAnyErrors(t *testing.T) { + arr := array.NewNull(1) + defer arr.Release() + + _, err := AtAny(arr, 0) + require.Error(t, err) + assert.ErrorIs(t, err, ErrUnsupportedType) +} + func TestToAnySlice(t *testing.T) { mem := testMem() b := array.NewStringBuilder(mem) diff --git a/arrow/array/arreflect/reflect_test.go b/arrow/array/arreflect/reflect_test.go index a170eb5c..f5d23163 100644 --- a/arrow/array/arreflect/reflect_test.go +++ b/arrow/array/arreflect/reflect_test.go @@ -19,7 +19,9 @@ package arreflect import ( "reflect" "testing" + "time" + "github.com/apache/arrow-go/v18/arrow" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -176,6 +178,33 @@ func TestGetStructFields(t *testing.T) { require.Len(t, fields, 1) assert.Equal(t, "inner_struct", fields[0].Name) }) + + t.Run("pointer to struct is dereferenced", func(t *testing.T) { + type Simple struct { + X int32 + Y string + } + fields := getStructFields(reflect.TypeOf(&Simple{})) + require.Len(t, fields, 2) + assert.Equal(t, "X", fields[0].Name) + assert.Equal(t, "Y", fields[1].Name) + }) + + t.Run("multi-level pointer to struct is dereferenced", func(t *testing.T) { + type Simple struct { + X int32 + } + var pp **Simple + fields := getStructFields(reflect.TypeOf(pp)) + require.Len(t, fields, 1) + assert.Equal(t, "X", fields[0].Name) + }) + + t.Run("non-struct type returns nil", func(t *testing.T) { + assert.Nil(t, getStructFields(reflect.TypeOf(int32(0)))) + assert.Nil(t, getStructFields(reflect.TypeOf(""))) + assert.Nil(t, getStructFields(reflect.TypeOf([]int32{}))) + }) } func TestCachedStructFields(t *testing.T) { @@ -197,3 +226,78 @@ func TestCachedStructFields(t *testing.T) { assert.Equal(t, "X", fields1[0].Name) assert.Equal(t, "Y", fields1[1].Name) } + +func TestBuildEmptyTyped(t *testing.T) { + mem := checkedMem(t) + + t.Run("unsupported_type_returns_error", func(t *testing.T) { + _, err := buildEmptyTyped(reflect.TypeOf((chan int)(nil)), tagOpts{}, mem) + require.Error(t, err) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) + + t.Run("pointer_element_type_is_dereferenced", func(t *testing.T) { + arr, err := buildEmptyTyped(reflect.TypeOf((*int32)(nil)), tagOpts{}, mem) + require.NoError(t, err) + defer arr.Release() + assert.Equal(t, 0, arr.Len()) + assert.Equal(t, arrow.INT32, arr.DataType().ID()) + }) + + t.Run("multi_level_pointer_element_type", func(t *testing.T) { + arr, err := buildEmptyTyped(reflect.TypeOf((**int32)(nil)), tagOpts{}, mem) + require.NoError(t, err) + defer arr.Release() + assert.Equal(t, 0, arr.Len()) + assert.Equal(t, arrow.INT32, arr.DataType().ID()) + }) + + t.Run("listview_on_non_slice_type_errors", func(t *testing.T) { + _, err := buildEmptyTyped(reflect.TypeOf(int32(0)), tagOpts{ListView: true}, mem) + require.Error(t, err) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) + + t.Run("listview_on_byte_slice_errors", func(t *testing.T) { + _, err := buildEmptyTyped(reflect.TypeOf([]byte(nil)), tagOpts{ListView: true}, mem) + require.Error(t, err) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) + + t.Run("listview_with_slice_of_pointers_derefs_inner", func(t *testing.T) { + arr, err := buildEmptyTyped(reflect.TypeOf([]*int32(nil)), tagOpts{ListView: true}, mem) + require.NoError(t, err) + defer arr.Release() + assert.Equal(t, 0, arr.Len()) + assert.Equal(t, arrow.LIST_VIEW, arr.DataType().ID()) + }) + + t.Run("listview_happy_path", func(t *testing.T) { + arr, err := buildEmptyTyped(reflect.TypeOf([]int32(nil)), tagOpts{ListView: true}, mem) + require.NoError(t, err) + defer arr.Release() + assert.Equal(t, arrow.LIST_VIEW, arr.DataType().ID()) + }) + + t.Run("dict_with_unsupported_value_type_errors", func(t *testing.T) { + _, err := buildEmptyTyped(reflect.TypeOf(time.Time{}), tagOpts{Dict: true}, mem) + require.Error(t, err) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) + + t.Run("dict_happy_path", func(t *testing.T) { + arr, err := buildEmptyTyped(reflect.TypeOf(""), tagOpts{Dict: true}, mem) + require.NoError(t, err) + defer arr.Release() + assert.Equal(t, 0, arr.Len()) + assert.Equal(t, arrow.DICTIONARY, arr.DataType().ID()) + }) + + t.Run("ree_happy_path", func(t *testing.T) { + arr, err := buildEmptyTyped(reflect.TypeOf(int32(0)), tagOpts{REE: true}, mem) + require.NoError(t, err) + defer arr.Release() + assert.Equal(t, 0, arr.Len()) + assert.Equal(t, arrow.RUN_END_ENCODED, arr.DataType().ID()) + }) +} From 49018a5ea4a4fd932282cdeb4d277710ca6c8f52 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 21 Apr 2026 16:50:30 -0400 Subject: [PATCH 60/82] fix(arreflect): address review findings from job 363 - Scale Timestamp/Duration appends by unit multiplier in appendTemporalValue so non-nanosecond builders roundtrip correctly, matching Time32/Time64. - Surface malformed decimal(p,s) struct tags via DecimalParseErr on tagOpts, checked in validateOptions and wired into struct-field parsing, replacing the previous silent fallback to defaults. - Reject WithTemporal("timestamp") on non-time.Time element types, consistent with date32/date64/time32/time64. --- arrow/array/arreflect/reflect.go | 33 ++++++++---- arrow/array/arreflect/reflect_go_to_arrow.go | 6 ++- .../arreflect/reflect_go_to_arrow_test.go | 52 +++++++++++++++++++ arrow/array/arreflect/reflect_infer.go | 3 ++ arrow/array/arreflect/reflect_public_test.go | 15 ++++++ arrow/array/arreflect/reflect_test.go | 34 ++++++++++++ 6 files changed, 130 insertions(+), 13 deletions(-) diff --git a/arrow/array/arreflect/reflect.go b/arrow/array/arreflect/reflect.go index a24d967f..028c25aa 100644 --- a/arrow/array/arreflect/reflect.go +++ b/arrow/array/arreflect/reflect.go @@ -45,6 +45,7 @@ type tagOpts struct { DecimalScale int32 HasDecimalOpts bool Temporal string // "timestamp" (default), "date32", "date64", "time32", "time64" + DecimalParseErr string // diagnostic set when decimal(p,s) tag fails to parse; surfaced by validateOptions } type fieldMeta struct { @@ -125,15 +126,23 @@ func parseDecimalOpt(opts *tagOpts, token string) { inner := strings.TrimPrefix(token, "decimal(") inner = strings.TrimSuffix(inner, ")") parts := strings.SplitN(inner, ",", 2) - if len(parts) == 2 { - p, errP := strconv.ParseInt(strings.TrimSpace(parts[0]), 10, 32) - s, errS := strconv.ParseInt(strings.TrimSpace(parts[1]), 10, 32) - if errP == nil && errS == nil { - opts.HasDecimalOpts = true - opts.DecimalPrecision = int32(p) - opts.DecimalScale = int32(s) - } - } + if len(parts) != 2 { + opts.DecimalParseErr = fmt.Sprintf("invalid decimal tag %q: expected decimal(precision,scale)", token) + return + } + p, errP := strconv.ParseInt(strings.TrimSpace(parts[0]), 10, 32) + if errP != nil { + opts.DecimalParseErr = fmt.Sprintf("invalid decimal tag %q: precision %q is not an integer", token, strings.TrimSpace(parts[0])) + return + } + s, errS := strconv.ParseInt(strings.TrimSpace(parts[1]), 10, 32) + if errS != nil { + opts.DecimalParseErr = fmt.Sprintf("invalid decimal tag %q: scale %q is not an integer", token, strings.TrimSpace(parts[1])) + return + } + opts.HasDecimalOpts = true + opts.DecimalPrecision = int32(p) + opts.DecimalScale = int32(s) } type bfsEntry struct { @@ -402,6 +411,9 @@ func validateTemporalOpt(temporal string) error { } func validateOptions(opts tagOpts) error { + if opts.DecimalParseErr != "" { + return fmt.Errorf("arreflect: %s: %w", opts.DecimalParseErr, ErrUnsupportedType) + } n := 0 if opts.Dict { n++ @@ -470,8 +482,7 @@ func FromSlice[T any](vals []T, mem memory.Allocator, opts ...Option) (arrow.Arr if err := validateTemporalOpt(tOpts.Temporal); err != nil { return nil, err } - // "timestamp" is excluded: it is a no-op for non-time.Time types via applyTemporalOpts. - if tOpts.Temporal != "" && tOpts.Temporal != "timestamp" { + if tOpts.Temporal != "" { goType := reflect.TypeFor[T]() deref := goType for deref.Kind() == reflect.Ptr { diff --git a/arrow/array/arreflect/reflect_go_to_arrow.go b/arrow/array/arreflect/reflect_go_to_arrow.go index 7f363bf0..a0055e14 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow.go +++ b/arrow/array/arreflect/reflect_go_to_arrow.go @@ -276,11 +276,12 @@ func buildStructArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, er func appendTemporalValue(b array.Builder, v reflect.Value) error { switch tb := b.(type) { case *array.TimestampBuilder: + unit := tb.Type().(*arrow.TimestampType).Unit t, err := asTime(v) if err != nil { return err } - tb.Append(arrow.Timestamp(t.UnixNano())) + tb.Append(arrow.Timestamp(t.UnixNano() / int64(unit.Multiplier()))) case *array.Date32Builder: t, err := asTime(v) if err != nil { @@ -308,11 +309,12 @@ func appendTemporalValue(b array.Builder, v reflect.Value) error { } tb.Append(arrow.Time64(timeOfDayNanos(t) / int64(unit.Multiplier()))) case *array.DurationBuilder: + unit := tb.Type().(*arrow.DurationType).Unit d, err := asDuration(v) if err != nil { return err } - tb.Append(arrow.Duration(d.Nanoseconds())) + tb.Append(arrow.Duration(d.Nanoseconds() / int64(unit.Multiplier()))) default: return fmt.Errorf("unexpected temporal builder %T: %w", b, ErrUnsupportedType) } diff --git a/arrow/array/arreflect/reflect_go_to_arrow_test.go b/arrow/array/arreflect/reflect_go_to_arrow_test.go index 88758378..fde81719 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/array/arreflect/reflect_go_to_arrow_test.go @@ -1011,3 +1011,55 @@ func TestAppendDecimalValueErrors(t *testing.T) { assert.ErrorIs(t, err, ErrUnsupportedType) }) } + +func TestAppendTemporalValueUnitHandling(t *testing.T) { + mem := checkedMem(t) + ref := time.Date(2024, 1, 15, 12, 34, 56, 789_000_000, time.UTC) + + timestampCases := []struct { + name string + unit arrow.TimeUnit + }{ + {"timestamp_second", arrow.Second}, + {"timestamp_millisecond", arrow.Millisecond}, + {"timestamp_microsecond", arrow.Microsecond}, + {"timestamp_nanosecond", arrow.Nanosecond}, + } + for _, tc := range timestampCases { + t.Run(tc.name, func(t *testing.T) { + dt := &arrow.TimestampType{Unit: tc.unit} + b := array.NewTimestampBuilder(mem, dt) + defer b.Release() + require.NoError(t, appendTemporalValue(b, reflect.ValueOf(ref))) + arr := b.NewArray().(*array.Timestamp) + defer arr.Release() + got := int64(arr.Value(0)) + want := ref.UnixNano() / int64(tc.unit.Multiplier()) + assert.Equal(t, want, got, "%s: stored value should be scaled by unit", tc.name) + }) + } + + durationCases := []struct { + name string + unit arrow.TimeUnit + d time.Duration + }{ + {"duration_second", arrow.Second, 90 * time.Second}, + {"duration_millisecond", arrow.Millisecond, 1500 * time.Millisecond}, + {"duration_microsecond", arrow.Microsecond, 2500 * time.Microsecond}, + {"duration_nanosecond", arrow.Nanosecond, 12345 * time.Nanosecond}, + } + for _, tc := range durationCases { + t.Run(tc.name, func(t *testing.T) { + dt := &arrow.DurationType{Unit: tc.unit} + b := array.NewDurationBuilder(mem, dt) + defer b.Release() + require.NoError(t, appendTemporalValue(b, reflect.ValueOf(tc.d))) + arr := b.NewArray().(*array.Duration) + defer arr.Release() + got := int64(arr.Value(0)) + want := tc.d.Nanoseconds() / int64(tc.unit.Multiplier()) + assert.Equal(t, want, got, "%s: stored value should be scaled by unit", tc.name) + }) + } +} diff --git a/arrow/array/arreflect/reflect_infer.go b/arrow/array/arreflect/reflect_infer.go index 83a05ce1..66779feb 100644 --- a/arrow/array/arreflect/reflect_infer.go +++ b/arrow/array/arreflect/reflect_infer.go @@ -223,6 +223,9 @@ func inferStructType(t reflect.Type) (*arrow.StructType, error) { arrowFields := make([]arrow.Field, 0, len(fields)) for _, fm := range fields { + if err := validateOptions(fm.Opts); err != nil { + return nil, fmt.Errorf("struct field %q: %w", fm.Name, err) + } origType := fm.Type for origType.Kind() == reflect.Ptr { origType = origType.Elem() diff --git a/arrow/array/arreflect/reflect_public_test.go b/arrow/array/arreflect/reflect_public_test.go index 0b04848d..de8b59a4 100644 --- a/arrow/array/arreflect/reflect_public_test.go +++ b/arrow/array/arreflect/reflect_public_test.go @@ -23,6 +23,7 @@ import ( "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" + "github.com/apache/arrow-go/v18/arrow/decimal128" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -264,6 +265,20 @@ func TestFromGoSlice(t *testing.T) { assert.ErrorIs(t, err, ErrUnsupportedType) }) + t.Run("WithTemporal timestamp on non-time type returns error", func(t *testing.T) { + _, err := FromSlice([]string{}, mem, WithTemporal("timestamp")) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) + + t.Run("struct field with malformed decimal tag returns error", func(t *testing.T) { + type BadDecimal struct { + Amount decimal128.Num `arrow:",decimal(18,two)"` + } + _, err := FromSlice([]BadDecimal{}, mem) + require.Error(t, err) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) + t.Run("conflicting options return error", func(t *testing.T) { cases := []struct { name string diff --git a/arrow/array/arreflect/reflect_test.go b/arrow/array/arreflect/reflect_test.go index f5d23163..a79055d3 100644 --- a/arrow/array/arreflect/reflect_test.go +++ b/arrow/array/arreflect/reflect_test.go @@ -301,3 +301,37 @@ func TestBuildEmptyTyped(t *testing.T) { assert.Equal(t, arrow.RUN_END_ENCODED, arr.DataType().ID()) }) } + +func TestParseDecimalOpt(t *testing.T) { + t.Run("valid_tag_sets_precision_and_scale", func(t *testing.T) { + got := parseTag(",decimal(18,2)") + assert.True(t, got.HasDecimalOpts) + assert.Equal(t, int32(18), got.DecimalPrecision) + assert.Equal(t, int32(2), got.DecimalScale) + assert.Empty(t, got.DecimalParseErr) + }) + + t.Run("non_integer_precision_records_error", func(t *testing.T) { + got := parseTag(",decimal(abc,2)") + assert.False(t, got.HasDecimalOpts) + assert.NotEmpty(t, got.DecimalParseErr) + }) + + t.Run("non_integer_scale_records_error", func(t *testing.T) { + got := parseTag(",decimal(18,two)") + assert.False(t, got.HasDecimalOpts) + assert.NotEmpty(t, got.DecimalParseErr) + }) + + t.Run("missing_scale_records_error", func(t *testing.T) { + got := parseTag(",decimal(18)") + assert.False(t, got.HasDecimalOpts) + assert.NotEmpty(t, got.DecimalParseErr) + }) + + t.Run("validateOptions_surfaces_parse_error", func(t *testing.T) { + err := validateOptions(tagOpts{DecimalParseErr: "bad decimal tag"}) + require.Error(t, err) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) +} From 50d1eb41ead370698465be4d582af28e9c89af4f Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 22 Apr 2026 13:21:30 -0400 Subject: [PATCH 61/82] generalize ParseErr --- arrow/array/arreflect/reflect.go | 24 ++++++++------------ arrow/array/arreflect/reflect_go_to_arrow.go | 4 ++-- arrow/array/arreflect/reflect_test.go | 14 ++++++------ 3 files changed, 18 insertions(+), 24 deletions(-) diff --git a/arrow/array/arreflect/reflect.go b/arrow/array/arreflect/reflect.go index 028c25aa..b3a43b2a 100644 --- a/arrow/array/arreflect/reflect.go +++ b/arrow/array/arreflect/reflect.go @@ -45,7 +45,7 @@ type tagOpts struct { DecimalScale int32 HasDecimalOpts bool Temporal string // "timestamp" (default), "date32", "date64", "time32", "time64" - DecimalParseErr string // diagnostic set when decimal(p,s) tag fails to parse; surfaced by validateOptions + ParseErr string // diagnostic set when decimal(p,s) tag fails to parse; surfaced by validateOptions } type fieldMeta struct { @@ -61,15 +61,7 @@ func parseTag(tag string) tagOpts { return tagOpts{Skip: true} } - var name, rest string - if idx := strings.Index(tag, ","); idx >= 0 { - name = tag[:idx] - rest = tag[idx+1:] - } else { - name = tag - rest = "" - } - + name, rest, _ := strings.Cut(tag, ",") opts := tagOpts{Name: name} if rest == "" { @@ -118,6 +110,8 @@ func parseOptions(opts *tagOpts, rest string) { opts.REE = true case "date32", "date64", "time32", "time64", "timestamp": opts.Temporal = token + default: + opts.ParseErr = fmt.Sprintf("unknown option %q", token) } } } @@ -127,17 +121,17 @@ func parseDecimalOpt(opts *tagOpts, token string) { inner = strings.TrimSuffix(inner, ")") parts := strings.SplitN(inner, ",", 2) if len(parts) != 2 { - opts.DecimalParseErr = fmt.Sprintf("invalid decimal tag %q: expected decimal(precision,scale)", token) + opts.ParseErr = fmt.Sprintf("invalid decimal tag %q: expected decimal(precision,scale)", token) return } p, errP := strconv.ParseInt(strings.TrimSpace(parts[0]), 10, 32) if errP != nil { - opts.DecimalParseErr = fmt.Sprintf("invalid decimal tag %q: precision %q is not an integer", token, strings.TrimSpace(parts[0])) + opts.ParseErr = fmt.Sprintf("invalid decimal tag %q: precision %q is not an integer", token, strings.TrimSpace(parts[0])) return } s, errS := strconv.ParseInt(strings.TrimSpace(parts[1]), 10, 32) if errS != nil { - opts.DecimalParseErr = fmt.Sprintf("invalid decimal tag %q: scale %q is not an integer", token, strings.TrimSpace(parts[1])) + opts.ParseErr = fmt.Sprintf("invalid decimal tag %q: scale %q is not an integer", token, strings.TrimSpace(parts[1])) return } opts.HasDecimalOpts = true @@ -411,8 +405,8 @@ func validateTemporalOpt(temporal string) error { } func validateOptions(opts tagOpts) error { - if opts.DecimalParseErr != "" { - return fmt.Errorf("arreflect: %s: %w", opts.DecimalParseErr, ErrUnsupportedType) + if opts.ParseErr != "" { + return fmt.Errorf("arreflect: %s: %w", opts.ParseErr, ErrUnsupportedType) } n := 0 if opts.Dict { diff --git a/arrow/array/arreflect/reflect_go_to_arrow.go b/arrow/array/arreflect/reflect_go_to_arrow.go index a0055e14..a7d2984e 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow.go +++ b/arrow/array/arreflect/reflect_go_to_arrow.go @@ -377,9 +377,9 @@ func appendValue(b array.Builder, v reflect.Value) error { tb.Append(float64(v.Float())) case *array.BooleanBuilder: tb.Append(v.Bool()) - case *array.StringBuilder: + case array.StringLikeBuilder: tb.Append(v.String()) - case *array.BinaryBuilder: + case array.BinaryLikeBuilder: if v.IsNil() { tb.AppendNull() } else { diff --git a/arrow/array/arreflect/reflect_test.go b/arrow/array/arreflect/reflect_test.go index a79055d3..ae3d194d 100644 --- a/arrow/array/arreflect/reflect_test.go +++ b/arrow/array/arreflect/reflect_test.go @@ -73,11 +73,11 @@ func TestParseTag(t *testing.T) { }, { input: "name,unknown_option", - want: tagOpts{Name: "name"}, + want: tagOpts{Name: "name", ParseErr: "unknown option \"unknown_option\""}, }, { input: `field,Date32`, - want: tagOpts{Name: "field"}, + want: tagOpts{Name: "field", ParseErr: "unknown option \"Date32\""}, }, } @@ -308,29 +308,29 @@ func TestParseDecimalOpt(t *testing.T) { assert.True(t, got.HasDecimalOpts) assert.Equal(t, int32(18), got.DecimalPrecision) assert.Equal(t, int32(2), got.DecimalScale) - assert.Empty(t, got.DecimalParseErr) + assert.Empty(t, got.ParseErr) }) t.Run("non_integer_precision_records_error", func(t *testing.T) { got := parseTag(",decimal(abc,2)") assert.False(t, got.HasDecimalOpts) - assert.NotEmpty(t, got.DecimalParseErr) + assert.NotEmpty(t, got.ParseErr) }) t.Run("non_integer_scale_records_error", func(t *testing.T) { got := parseTag(",decimal(18,two)") assert.False(t, got.HasDecimalOpts) - assert.NotEmpty(t, got.DecimalParseErr) + assert.NotEmpty(t, got.ParseErr) }) t.Run("missing_scale_records_error", func(t *testing.T) { got := parseTag(",decimal(18)") assert.False(t, got.HasDecimalOpts) - assert.NotEmpty(t, got.DecimalParseErr) + assert.NotEmpty(t, got.ParseErr) }) t.Run("validateOptions_surfaces_parse_error", func(t *testing.T) { - err := validateOptions(tagOpts{DecimalParseErr: "bad decimal tag"}) + err := validateOptions(tagOpts{ParseErr: "bad decimal tag"}) require.Error(t, err) assert.ErrorIs(t, err, ErrUnsupportedType) }) From 9ea66db6009e8f36ba60f81ca31e81be59a4af1b Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 22 Apr 2026 13:31:11 -0400 Subject: [PATCH 62/82] feat(arreflect): add Large field to tagOpts, parse large tag, add WithLarge option --- arrow/array/arreflect/reflect.go | 7 +++++++ arrow/array/arreflect/reflect_test.go | 12 ++++++++++++ 2 files changed, 19 insertions(+) diff --git a/arrow/array/arreflect/reflect.go b/arrow/array/arreflect/reflect.go index b3a43b2a..4ec40634 100644 --- a/arrow/array/arreflect/reflect.go +++ b/arrow/array/arreflect/reflect.go @@ -41,6 +41,7 @@ type tagOpts struct { Dict bool ListView bool REE bool + Large bool DecimalPrecision int32 DecimalScale int32 HasDecimalOpts bool @@ -108,6 +109,8 @@ func parseOptions(opts *tagOpts, rest string) { opts.ListView = true case "ree": opts.REE = true + case "large": + opts.Large = true case "date32", "date64", "time32", "time64", "timestamp": opts.Temporal = token default: @@ -395,6 +398,10 @@ func WithTemporal(temporal string) Option { return func(o *tagOpts) { o.Temporal = temporal } } +// WithLarge requests Large type variants (LARGE_STRING, LARGE_BINARY, LARGE_LIST, +// LARGE_LIST_VIEW) for the top-level array and recursively for nested types. +func WithLarge() Option { return func(o *tagOpts) { o.Large = true } } + func validateTemporalOpt(temporal string) error { switch temporal { case "", "timestamp", "date32", "date64", "time32", "time64": diff --git a/arrow/array/arreflect/reflect_test.go b/arrow/array/arreflect/reflect_test.go index ae3d194d..07f83690 100644 --- a/arrow/array/arreflect/reflect_test.go +++ b/arrow/array/arreflect/reflect_test.go @@ -79,6 +79,18 @@ func TestParseTag(t *testing.T) { input: `field,Date32`, want: tagOpts{Name: "field", ParseErr: "unknown option \"Date32\""}, }, + { + input: "name,large", + want: tagOpts{Name: "name", Large: true}, + }, + { + input: "name,large,listview", + want: tagOpts{Name: "name", Large: true, ListView: true}, + }, + { + input: "name,large,dict", + want: tagOpts{Name: "name", Large: true, Dict: true}, + }, } for _, tt := range tests { From a2912524cf6dbaea63bfe23b29554e54e184c4ed Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 22 Apr 2026 13:37:24 -0400 Subject: [PATCH 63/82] feat(arreflect): add applyLargeOpts and hasLargeableType --- arrow/array/arreflect/reflect_infer.go | 56 ++++++++++++++ arrow/array/arreflect/reflect_infer_test.go | 85 +++++++++++++++++++++ 2 files changed, 141 insertions(+) diff --git a/arrow/array/arreflect/reflect_infer.go b/arrow/array/arreflect/reflect_infer.go index 66779feb..4a89ea45 100644 --- a/arrow/array/arreflect/reflect_infer.go +++ b/arrow/array/arreflect/reflect_infer.go @@ -192,6 +192,62 @@ func applyTemporalOpts(dt arrow.DataType, origType reflect.Type, opts tagOpts) a return dt } +func applyLargeOpts(dt arrow.DataType) arrow.DataType { + switch dt.ID() { + case arrow.STRING: + return arrow.BinaryTypes.LargeString + case arrow.BINARY: + return arrow.BinaryTypes.LargeBinary + case arrow.LIST: + return arrow.LargeListOf(applyLargeOpts(dt.(*arrow.ListType).Elem())) + case arrow.LIST_VIEW: + return arrow.LargeListViewOf(applyLargeOpts(dt.(*arrow.ListViewType).Elem())) + case arrow.LARGE_LIST: + return arrow.LargeListOf(applyLargeOpts(dt.(*arrow.LargeListType).Elem())) + case arrow.LARGE_LIST_VIEW: + return arrow.LargeListViewOf(applyLargeOpts(dt.(*arrow.LargeListViewType).Elem())) + case arrow.FIXED_SIZE_LIST: + fsl := dt.(*arrow.FixedSizeListType) + return arrow.FixedSizeListOf(fsl.Len(), applyLargeOpts(fsl.Elem())) + case arrow.MAP: + mt := dt.(*arrow.MapType) + return arrow.MapOf(applyLargeOpts(mt.KeyType()), applyLargeOpts(mt.ItemField().Type)) + case arrow.STRUCT: + st := dt.(*arrow.StructType) + fields := make([]arrow.Field, st.NumFields()) + for i := 0; i < st.NumFields(); i++ { + f := st.Field(i) + f.Type = applyLargeOpts(f.Type) + fields[i] = f + } + return arrow.StructOf(fields...) + default: + return dt + } +} + +func hasLargeableType(dt arrow.DataType) bool { + switch dt.ID() { + case arrow.STRING, arrow.BINARY, arrow.LIST, arrow.LIST_VIEW: + return true + case arrow.STRUCT: + st := dt.(*arrow.StructType) + for i := 0; i < st.NumFields(); i++ { + if hasLargeableType(st.Field(i).Type) { + return true + } + } + return false + case arrow.FIXED_SIZE_LIST: + return hasLargeableType(dt.(*arrow.FixedSizeListType).Elem()) + case arrow.MAP: + mt := dt.(*arrow.MapType) + return hasLargeableType(mt.KeyType()) || hasLargeableType(mt.ItemField().Type) + default: + return false + } +} + func applyEncodingOpts(dt arrow.DataType, fm fieldMeta) (arrow.DataType, error) { switch { case fm.Opts.Dict: diff --git a/arrow/array/arreflect/reflect_infer_test.go b/arrow/array/arreflect/reflect_infer_test.go index c1af8011..8e9f0d73 100644 --- a/arrow/array/arreflect/reflect_infer_test.go +++ b/arrow/array/arreflect/reflect_infer_test.go @@ -634,3 +634,88 @@ func TestApplyDecimalOptsAllBranches(t *testing.T) { assert.Equal(t, base, got) }) } + +func TestApplyLargeOpts(t *testing.T) { + cases := []struct { + name string + input arrow.DataType + want arrow.Type + }{ + {"string→large_string", arrow.BinaryTypes.String, arrow.LARGE_STRING}, + {"binary→large_binary", arrow.BinaryTypes.Binary, arrow.LARGE_BINARY}, + {"list→large_list", arrow.ListOf(arrow.BinaryTypes.String), arrow.LARGE_LIST}, + {"list_view→large_list_view", arrow.ListViewOf(arrow.BinaryTypes.Binary), arrow.LARGE_LIST_VIEW}, + {"int64 unchanged", arrow.PrimitiveTypes.Int64, arrow.INT64}, + {"float32 unchanged", arrow.PrimitiveTypes.Float32, arrow.FLOAT32}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := applyLargeOpts(tc.input) + assert.Equal(t, tc.want, got.ID()) + }) + } + + t.Run("list elem is large_binary", func(t *testing.T) { + got := applyLargeOpts(arrow.ListOf(arrow.BinaryTypes.Binary)) + ll, ok := got.(*arrow.LargeListType) + require.True(t, ok) + assert.Equal(t, arrow.LARGE_BINARY, ll.Elem().ID()) + }) + + t.Run("fixed_size_list recurses", func(t *testing.T) { + got := applyLargeOpts(arrow.FixedSizeListOf(3, arrow.BinaryTypes.String)) + fsl, ok := got.(*arrow.FixedSizeListType) + require.True(t, ok) + assert.Equal(t, arrow.LARGE_STRING, fsl.Elem().ID()) + }) + + t.Run("map recurses", func(t *testing.T) { + got := applyLargeOpts(arrow.MapOf(arrow.BinaryTypes.String, arrow.BinaryTypes.Binary)) + mt, ok := got.(*arrow.MapType) + require.True(t, ok) + assert.Equal(t, arrow.LARGE_STRING, mt.KeyType().ID()) + assert.Equal(t, arrow.LARGE_BINARY, mt.ItemField().Type.ID()) + }) + + t.Run("struct recurses into fields", func(t *testing.T) { + st := arrow.StructOf( + arrow.Field{Name: "name", Type: arrow.BinaryTypes.String}, + arrow.Field{Name: "count", Type: arrow.PrimitiveTypes.Int64}, + ) + got := applyLargeOpts(st) + gst, ok := got.(*arrow.StructType) + require.True(t, ok) + assert.Equal(t, arrow.LARGE_STRING, gst.Field(0).Type.ID()) + assert.Equal(t, arrow.INT64, gst.Field(1).Type.ID()) + }) +} + +func TestHasLargeableType(t *testing.T) { + assert.True(t, hasLargeableType(arrow.BinaryTypes.String)) + assert.True(t, hasLargeableType(arrow.BinaryTypes.Binary)) + assert.True(t, hasLargeableType(arrow.ListOf(arrow.PrimitiveTypes.Int64))) + assert.True(t, hasLargeableType(arrow.ListViewOf(arrow.PrimitiveTypes.Int64))) + assert.False(t, hasLargeableType(arrow.PrimitiveTypes.Int64)) + assert.False(t, hasLargeableType(arrow.PrimitiveTypes.Float32)) + + t.Run("struct with string field is true", func(t *testing.T) { + st := arrow.StructOf(arrow.Field{Name: "x", Type: arrow.BinaryTypes.String}) + assert.True(t, hasLargeableType(st)) + }) + t.Run("struct with only ints is false", func(t *testing.T) { + st := arrow.StructOf(arrow.Field{Name: "x", Type: arrow.PrimitiveTypes.Int32}) + assert.False(t, hasLargeableType(st)) + }) + t.Run("fixed_size_list is true", func(t *testing.T) { + assert.True(t, hasLargeableType(arrow.FixedSizeListOf(4, arrow.BinaryTypes.String))) + }) + t.Run("fixed_size_list is false", func(t *testing.T) { + assert.False(t, hasLargeableType(arrow.FixedSizeListOf(4, arrow.PrimitiveTypes.Int32))) + }) + t.Run("map with string key is true", func(t *testing.T) { + assert.True(t, hasLargeableType(arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int64))) + }) + t.Run("map with no strings is false", func(t *testing.T) { + assert.False(t, hasLargeableType(arrow.MapOf(arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Int64))) + }) +} From 7d810f53c90639482d342da13f99319e07ce293b Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 22 Apr 2026 13:42:06 -0400 Subject: [PATCH 64/82] feat(arreflect): thread large through inferStructType and applyEncodingOpts --- arrow/array/arreflect/reflect_infer.go | 12 ++++++--- arrow/array/arreflect/reflect_infer_test.go | 28 +++++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/arrow/array/arreflect/reflect_infer.go b/arrow/array/arreflect/reflect_infer.go index 4a89ea45..23a454c2 100644 --- a/arrow/array/arreflect/reflect_infer.go +++ b/arrow/array/arreflect/reflect_infer.go @@ -256,11 +256,14 @@ func applyEncodingOpts(dt arrow.DataType, fm fieldMeta) (arrow.DataType, error) } return &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: dt}, nil case fm.Opts.ListView: - lt, ok := dt.(*arrow.ListType) - if !ok { + switch lt := dt.(type) { + case *arrow.ListType: + return arrow.ListViewOf(lt.Elem()), nil + case *arrow.LargeListType: + return arrow.LargeListViewOf(lt.Elem()), nil + default: return nil, fmt.Errorf("arreflect: listview tag on field %q requires a slice type, got %v", fm.Name, dt) } - return arrow.ListViewOf(lt.Elem()), nil case fm.Opts.REE: return nil, fmt.Errorf("arreflect: ree tag on struct field %q is not supported; use ree at top-level via FromSlice", fm.Name) } @@ -294,6 +297,9 @@ func inferStructType(t reflect.Type) (*arrow.StructType, error) { dt = applyDecimalOpts(dt, origType, fm.Opts) dt = applyTemporalOpts(dt, origType, fm.Opts) + if fm.Opts.Large { + dt = applyLargeOpts(dt) + } dt, err = applyEncodingOpts(dt, fm) if err != nil { return nil, err diff --git a/arrow/array/arreflect/reflect_infer_test.go b/arrow/array/arreflect/reflect_infer_test.go index 8e9f0d73..8a61c703 100644 --- a/arrow/array/arreflect/reflect_infer_test.go +++ b/arrow/array/arreflect/reflect_infer_test.go @@ -690,6 +690,34 @@ func TestApplyLargeOpts(t *testing.T) { }) } +func TestInferStructTypeWithLarge(t *testing.T) { + type Row struct { + Name string `arrow:",large"` + Count int64 + } + st, err := inferStructType(reflect.TypeOf(Row{})) + require.NoError(t, err) + assert.Equal(t, arrow.LARGE_STRING, st.Field(0).Type.ID(), "Name should be LARGE_STRING") + assert.Equal(t, arrow.INT64, st.Field(1).Type.ID(), "Count should be INT64") +} + +func TestApplyEncodingOptsLargeListview(t *testing.T) { + // large+listview: applyLargeOpts turns LIST→LARGE_LIST first, then + // applyEncodingOpts should emit LARGE_LIST_VIEW + fm := fieldMeta{ + Name: "tags", + Type: reflect.TypeOf([]string{}), + Opts: tagOpts{Large: true, ListView: true}, + } + dt := applyLargeOpts(arrow.ListOf(arrow.BinaryTypes.LargeString)) + // dt is now LARGE_LIST + got, err := applyEncodingOpts(dt, fm) + require.NoError(t, err) + assert.Equal(t, arrow.LARGE_LIST_VIEW, got.ID()) + llv := got.(*arrow.LargeListViewType) + assert.Equal(t, arrow.LARGE_STRING, llv.Elem().ID()) +} + func TestHasLargeableType(t *testing.T) { assert.True(t, hasLargeableType(arrow.BinaryTypes.String)) assert.True(t, hasLargeableType(arrow.BinaryTypes.Binary)) From 1c7ef89d0627b6298c8f07d8ecee77efec7c6e03 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 22 Apr 2026 13:49:36 -0400 Subject: [PATCH 65/82] feat(arreflect): thread large opts through builder functions --- arrow/array/arreflect/reflect_go_to_arrow.go | 64 +++++++++++++------ .../arreflect/reflect_go_to_arrow_test.go | 57 +++++++++++++++++ 2 files changed, 101 insertions(+), 20 deletions(-) diff --git a/arrow/array/arreflect/reflect_go_to_arrow.go b/arrow/array/arreflect/reflect_go_to_arrow.go index a7d2984e..9bc52b7f 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow.go +++ b/arrow/array/arreflect/reflect_go_to_arrow.go @@ -40,7 +40,7 @@ func buildArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.A } if opts.Dict { - return buildDictionaryArray(vals, mem) + return buildDictionaryArray(vals, opts, mem) } if opts.REE { return buildRunEndEncodedArray(vals, opts, mem) @@ -49,7 +49,7 @@ func buildArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.A if elemType.Kind() != reflect.Slice || elemType == typeOfByteSlice { return nil, fmt.Errorf("arreflect: WithListView requires a slice-of-slices element type, got %s: %w", elemType, ErrUnsupportedType) } - return buildListViewArray(vals, mem) + return buildListViewArray(vals, opts, mem) } switch elemType { @@ -60,36 +60,39 @@ func buildArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.A switch elemType.Kind() { case reflect.Slice: if elemType == typeOfByteSlice { - return buildPrimitiveArray(vals, mem) + return buildPrimitiveArray(vals, opts, mem) } - return buildListArray(vals, mem) + return buildListArray(vals, opts, mem) case reflect.Array: return buildFixedSizeListArray(vals, mem) case reflect.Map: - return buildMapArray(vals, mem) + return buildMapArray(vals, opts, mem) case reflect.Struct: switch elemType { case typeOfTime: return buildTemporalArray(vals, opts, mem) default: - return buildStructArray(vals, mem) + return buildStructArray(vals, opts, mem) } default: - return buildPrimitiveArray(vals, mem) + return buildPrimitiveArray(vals, opts, mem) } } -func buildPrimitiveArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { +func buildPrimitiveArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.Array, error) { elemType, isPtr := derefSliceElem(vals) dt, err := inferArrowType(elemType) if err != nil { return nil, err } + if opts.Large { + dt = applyLargeOpts(dt) + } b := array.NewBuilder(mem, dt) defer b.Release() @@ -251,13 +254,16 @@ func appendStructFields(sb *array.StructBuilder, v reflect.Value, fields []field return nil } -func buildStructArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { +func buildStructArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.Array, error) { elemType, isPtr := derefSliceElem(vals) st, err := inferStructType(elemType) if err != nil { return nil, err } + if opts.Large { + st = applyLargeOpts(st).(*arrow.StructType) + } fields := cachedStructFields(elemType) sb := array.NewStructBuilder(mem, st) @@ -514,11 +520,14 @@ func appendListElement(b array.Builder, v reflect.Value) error { return nil } -func buildListLikeArray(vals reflect.Value, mem memory.Allocator, isView bool) (arrow.Array, error) { +func buildListLikeArray(vals reflect.Value, mem memory.Allocator, opts tagOpts, isView bool) (arrow.Array, error) { elemDT, err := inferListElemDT(vals) if err != nil { return nil, err } + if opts.Large { + elemDT = applyLargeOpts(elemDT) + } label := "list element" if isView { @@ -527,11 +536,20 @@ func buildListLikeArray(vals reflect.Value, mem memory.Allocator, isView bool) ( var bldr listBuilderLike var beginRow func(int) - if isView { + switch { + case isView && opts.Large: + b := array.NewLargeListViewBuilder(mem, elemDT) + bldr = b + beginRow = func(n int) { b.AppendWithSize(true, n) } + case isView: b := array.NewListViewBuilder(mem, elemDT) bldr = b beginRow = func(n int) { b.AppendWithSize(true, n) } - } else { + case opts.Large: + b := array.NewLargeListBuilder(mem, elemDT) + bldr = b + beginRow = func(_ int) { b.Append(true) } + default: b := array.NewListBuilder(mem, elemDT) bldr = b beginRow = func(_ int) { b.Append(true) } @@ -568,15 +586,15 @@ func buildListLikeArray(vals reflect.Value, mem memory.Allocator, isView bool) ( return bldr.NewArray(), nil } -func buildListArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { - return buildListLikeArray(vals, mem, false) +func buildListArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.Array, error) { + return buildListLikeArray(vals, mem, opts, false) } -func buildListViewArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { - return buildListLikeArray(vals, mem, true) +func buildListViewArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.Array, error) { + return buildListLikeArray(vals, mem, opts, true) } -func buildMapArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { +func buildMapArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.Array, error) { mapType, isPtr := derefSliceElem(vals) keyType := mapType.Key() @@ -597,6 +615,10 @@ func buildMapArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error if err != nil { return nil, fmt.Errorf("map value type: %w", err) } + if opts.Large { + keyDT = applyLargeOpts(keyDT) + valDT = applyLargeOpts(valDT) + } mb := array.NewMapBuilder(mem, keyDT, valDT, false) defer mb.Release() @@ -679,13 +701,15 @@ func validateDictValueType(dt arrow.DataType) error { } } -func buildDictionaryArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { +func buildDictionaryArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.Array, error) { elemType, isPtr := derefSliceElem(vals) valDT, err := inferArrowType(elemType) if err != nil { return nil, err } + // large is intentionally NOT applied here: Dictionary is + // unimplemented in the Arrow library (NewDictionaryBuilder panics). if err := validateDictValueType(valDT); err != nil { return nil, err @@ -711,7 +735,7 @@ func buildRunEndEncodedArray(vals reflect.Value, opts tagOpts, mem memory.Alloca valOpts.REE = false valOpts.ListView = false if vals.Len() == 0 { - runEndsArr, err := buildPrimitiveArray(reflect.MakeSlice(reflect.TypeOf([]int32{}), 0, 0), mem) + runEndsArr, err := buildPrimitiveArray(reflect.MakeSlice(reflect.TypeOf([]int32{}), 0, 0), tagOpts{}, mem) if err != nil { return nil, err } @@ -768,7 +792,7 @@ func buildRunEndEncodedArray(vals reflect.Value, opts tagOpts, mem memory.Alloca runEnds[i] = r.end } runEndsSlice := reflect.ValueOf(runEnds) - runEndsArr, err := buildPrimitiveArray(runEndsSlice, mem) + runEndsArr, err := buildPrimitiveArray(runEndsSlice, tagOpts{}, mem) if err != nil { return nil, fmt.Errorf("run-end encoded run ends: %w", err) } diff --git a/arrow/array/arreflect/reflect_go_to_arrow_test.go b/arrow/array/arreflect/reflect_go_to_arrow_test.go index fde81719..99c1bc78 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/array/arreflect/reflect_go_to_arrow_test.go @@ -1012,6 +1012,63 @@ func TestAppendDecimalValueErrors(t *testing.T) { }) } +func TestBuildLargeTypes(t *testing.T) { + mem := checkedMem(t) + largeOpts := tagOpts{Large: true} + + t.Run("string→LARGE_STRING", func(t *testing.T) { + arr := mustBuildArray(t, []string{"a", "b", "c"}, largeOpts, mem) + assert.Equal(t, arrow.LARGE_STRING, arr.DataType().ID()) + ls := arr.(*array.LargeString) + assert.Equal(t, "a", ls.Value(0)) + assert.Equal(t, "b", ls.Value(1)) + assert.Equal(t, "c", ls.Value(2)) + }) + + t.Run("[]byte→LARGE_BINARY", func(t *testing.T) { + arr := mustBuildArray(t, [][]byte{{1, 2}, {3}}, largeOpts, mem) + assert.Equal(t, arrow.LARGE_BINARY, arr.DataType().ID()) + }) + + t.Run("[]string→LARGE_LIST", func(t *testing.T) { + arr := mustBuildArray(t, [][]string{{"a", "b"}, {"c"}}, largeOpts, mem) + assert.Equal(t, arrow.LARGE_LIST, arr.DataType().ID()) + ll := arr.DataType().(*arrow.LargeListType) + assert.Equal(t, arrow.LARGE_STRING, ll.Elem().ID()) + }) + + t.Run("[][]byte→LARGE_LIST", func(t *testing.T) { + arr := mustBuildArray(t, [][][]byte{{{1}, {2}}, {{3}}}, largeOpts, mem) + assert.Equal(t, arrow.LARGE_LIST, arr.DataType().ID()) + ll := arr.DataType().(*arrow.LargeListType) + assert.Equal(t, arrow.LARGE_BINARY, ll.Elem().ID()) + }) + + t.Run("listview+large→LARGE_LIST_VIEW", func(t *testing.T) { + opts := tagOpts{Large: true, ListView: true} + arr := mustBuildArray(t, [][]string{{"x"}, {"y", "z"}}, opts, mem) + assert.Equal(t, arrow.LARGE_LIST_VIEW, arr.DataType().ID()) + llv := arr.DataType().(*arrow.LargeListViewType) + assert.Equal(t, arrow.LARGE_STRING, llv.Elem().ID()) + }) + + t.Run("map with large", func(t *testing.T) { + arr := mustBuildArray(t, []map[string]string{{"k": "v"}}, largeOpts, mem) + assert.Equal(t, arrow.MAP, arr.DataType().ID()) + mt := arr.DataType().(*arrow.MapType) + assert.Equal(t, arrow.LARGE_STRING, mt.KeyType().ID()) + assert.Equal(t, arrow.LARGE_STRING, mt.ItemField().Type.ID()) + }) + + t.Run("dict+large on string→Dictionary (large ignored for dict)", func(t *testing.T) { + opts := tagOpts{Large: true, Dict: true} + arr := mustBuildArray(t, []string{"a", "b", "a"}, opts, mem) + assert.Equal(t, arrow.DICTIONARY, arr.DataType().ID()) + dt := arr.DataType().(*arrow.DictionaryType) + assert.Equal(t, arrow.STRING, dt.ValueType.ID()) // large not applied, library limitation + }) +} + func TestAppendTemporalValueUnitHandling(t *testing.T) { mem := checkedMem(t) ref := time.Date(2024, 1, 15, 12, 34, 56, 789_000_000, time.UTC) From 90efaf4bd833f6783f2c1027aa2fb97562147480 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 22 Apr 2026 13:50:37 -0400 Subject: [PATCH 66/82] fix(arreflect): silence unused opts param in buildDictionaryArray --- arrow/array/arreflect/reflect_go_to_arrow.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow/array/arreflect/reflect_go_to_arrow.go b/arrow/array/arreflect/reflect_go_to_arrow.go index 9bc52b7f..2ec6a3d2 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow.go +++ b/arrow/array/arreflect/reflect_go_to_arrow.go @@ -701,7 +701,7 @@ func validateDictValueType(dt arrow.DataType) error { } } -func buildDictionaryArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.Array, error) { +func buildDictionaryArray(vals reflect.Value, _ tagOpts, mem memory.Allocator) (arrow.Array, error) { elemType, isPtr := derefSliceElem(vals) valDT, err := inferArrowType(elemType) From 6d1be8db2ca54e5ae6e64ca6a9ba7cde3b71c758 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 22 Apr 2026 13:54:19 -0400 Subject: [PATCH 67/82] fix(arreflect): thread opts into buildFixedSizeListArray and document idempotent large on struct --- arrow/array/arreflect/reflect_go_to_arrow.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arrow/array/arreflect/reflect_go_to_arrow.go b/arrow/array/arreflect/reflect_go_to_arrow.go index 2ec6a3d2..321e2f5a 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow.go +++ b/arrow/array/arreflect/reflect_go_to_arrow.go @@ -65,7 +65,7 @@ func buildArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.A return buildListArray(vals, opts, mem) case reflect.Array: - return buildFixedSizeListArray(vals, mem) + return buildFixedSizeListArray(vals, opts, mem) case reflect.Map: return buildMapArray(vals, opts, mem) @@ -262,6 +262,8 @@ func buildStructArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (a return nil, err } if opts.Large { + // applyLargeOpts is idempotent, so per-field "large" tags already applied + // by inferStructType are safe to walk again here. st = applyLargeOpts(st).(*arrow.StructType) } @@ -648,7 +650,7 @@ func buildMapArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arro return mb.NewArray(), nil } -func buildFixedSizeListArray(vals reflect.Value, mem memory.Allocator) (arrow.Array, error) { +func buildFixedSizeListArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.Array, error) { elemType, isPtr := derefSliceElem(vals) if elemType.Kind() != reflect.Array { @@ -665,6 +667,9 @@ func buildFixedSizeListArray(vals reflect.Value, mem memory.Allocator) (arrow.Ar if err != nil { return nil, err } + if opts.Large { + innerDT = applyLargeOpts(innerDT) + } fb := array.NewFixedSizeListBuilder(mem, n, innerDT) defer fb.Release() From 615de8e4e893c0d50b34d7ac8b55e69b7854cd34 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 22 Apr 2026 13:57:33 -0400 Subject: [PATCH 68/82] feat(arreflect): validate large option at entrypoints --- arrow/array/arreflect/reflect.go | 6 ++++ arrow/array/arreflect/reflect_go_to_arrow.go | 10 ++++++ .../arreflect/reflect_go_to_arrow_test.go | 35 +++++++++++++++++++ 3 files changed, 51 insertions(+) diff --git a/arrow/array/arreflect/reflect.go b/arrow/array/arreflect/reflect.go index 4ec40634..6940a462 100644 --- a/arrow/array/arreflect/reflect.go +++ b/arrow/array/arreflect/reflect.go @@ -442,6 +442,12 @@ func buildEmptyTyped(goType reflect.Type, opts tagOpts, mem memory.Allocator) (a } dt = applyDecimalOpts(dt, derefType, opts) dt = applyTemporalOpts(dt, derefType, opts) + if opts.Large { + if !hasLargeableType(dt) { + return nil, fmt.Errorf("arreflect: large option has no effect on type %s: %w", dt, ErrUnsupportedType) + } + dt = applyLargeOpts(dt) + } if opts.ListView { if derefType.Kind() != reflect.Slice || derefType == typeOfByteSlice { return nil, fmt.Errorf("arreflect: WithListView requires a slice-of-slices element type, got %s: %w", goType, ErrUnsupportedType) diff --git a/arrow/array/arreflect/reflect_go_to_arrow.go b/arrow/array/arreflect/reflect_go_to_arrow.go index 321e2f5a..f10ce5ff 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow.go +++ b/arrow/array/arreflect/reflect_go_to_arrow.go @@ -39,6 +39,16 @@ func buildArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.A elemType = elemType.Elem() } + if opts.Large { + dt, err := inferArrowType(elemType) + if err != nil { + return nil, err + } + if !hasLargeableType(dt) { + return nil, fmt.Errorf("arreflect: large option has no effect on type %s: %w", dt, ErrUnsupportedType) + } + } + if opts.Dict { return buildDictionaryArray(vals, opts, mem) } diff --git a/arrow/array/arreflect/reflect_go_to_arrow_test.go b/arrow/array/arreflect/reflect_go_to_arrow_test.go index 99c1bc78..ed63d760 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/array/arreflect/reflect_go_to_arrow_test.go @@ -1120,3 +1120,38 @@ func TestAppendTemporalValueUnitHandling(t *testing.T) { }) } } + +func TestWithLargeErrors(t *testing.T) { + mem := checkedMem(t) + + t.Run("large on int64 slice errors", func(t *testing.T) { + _, err := FromSlice([]int64{1, 2, 3}, mem, WithLarge()) + require.Error(t, err) + assert.ErrorIs(t, err, ErrUnsupportedType) + assert.Contains(t, err.Error(), "large option has no effect") + }) + + t.Run("large on float32 slice errors", func(t *testing.T) { + _, err := FromSlice([]float32{1.0}, mem, WithLarge()) + require.Error(t, err) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) + + t.Run("large on struct with no string fields errors", func(t *testing.T) { + type NoStrings struct { + X int32 + Y float64 + } + _, err := FromSlice([]NoStrings{{1, 2.0}}, mem, WithLarge()) + require.Error(t, err) + assert.ErrorIs(t, err, ErrUnsupportedType) + assert.Contains(t, err.Error(), "large option has no effect") + }) + + t.Run("large on string slice succeeds", func(t *testing.T) { + arr, err := FromSlice([]string{"a"}, mem, WithLarge()) + require.NoError(t, err) + defer arr.Release() + assert.Equal(t, arrow.LARGE_STRING, arr.DataType().ID()) + }) +} From 11b3eff489a8a5f7e13bbba4e747f4b38c2d056e Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 22 Apr 2026 14:03:36 -0400 Subject: [PATCH 69/82] docs(arreflect): add WithLarge and large struct tag examples --- arrow/array/arreflect/example_test.go | 47 +++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/arrow/array/arreflect/example_test.go b/arrow/array/arreflect/example_test.go index 439d9dff..26bcfbe3 100644 --- a/arrow/array/arreflect/example_test.go +++ b/arrow/array/arreflect/example_test.go @@ -318,3 +318,50 @@ func ExampleToAnySlice_nullableFields() { // alice: 9.5 // bob: } + +func ExampleWithLarge() { + mem := memory.NewGoAllocator() + + arr, err := arreflect.FromSlice([]string{"hello", "world"}, mem, arreflect.WithLarge()) + if err != nil { + panic(err) + } + defer arr.Release() + + fmt.Println("Type:", arr.DataType()) + fmt.Println("Len:", arr.Len()) + // Output: + // Type: large_utf8 + // Len: 2 +} + +func ExampleFromSlice_largeStruct() { + type Event struct { + Name string `arrow:"name,large"` + Code int32 `arrow:"code"` + } + + schema, err := arreflect.InferSchema[Event]() + if err != nil { + panic(err) + } + fmt.Println("Schema:", schema) + + mem := memory.NewGoAllocator() + arr, err := arreflect.FromSlice([]Event{{"click", 1}, {"view", 2}}, mem) + if err != nil { + panic(err) + } + defer arr.Release() + + sa := arr.(*array.Struct) + fmt.Println("Name type:", sa.Field(0).DataType()) + fmt.Println("Code type:", sa.Field(1).DataType()) + // Output: + // Schema: schema: + // fields: 2 + // - name: type=large_utf8 + // - code: type=int32 + // Name type: large_utf8 + // Code type: int32 +} From 6674bb97079a79492d47d437b27342a8391cc3c1 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 22 Apr 2026 14:03:46 -0400 Subject: [PATCH 70/82] test(arreflect): add WithLarge round-trip tests --- arrow/array/arreflect/reflect_public_test.go | 36 ++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/arrow/array/arreflect/reflect_public_test.go b/arrow/array/arreflect/reflect_public_test.go index de8b59a4..b3ef829c 100644 --- a/arrow/array/arreflect/reflect_public_test.go +++ b/arrow/array/arreflect/reflect_public_test.go @@ -691,3 +691,39 @@ func TestToAnySliceStructArray(t *testing.T) { } } } + +func TestWithLargeRoundTrip(t *testing.T) { + mem := testMem() + + t.Run("[]string WithLarge round-trips via ToSlice", func(t *testing.T) { + input := []string{"alpha", "beta", "gamma"} + arr, err := FromSlice(input, mem, WithLarge()) + require.NoError(t, err) + defer arr.Release() + + assert.Equal(t, arrow.LARGE_STRING, arr.DataType().ID()) + + got, err := ToSlice[string](arr) + require.NoError(t, err) + assert.Equal(t, input, got) + }) + + t.Run("struct with large tag round-trips", func(t *testing.T) { + type Row struct { + Label string `arrow:"label,large"` + Count int32 `arrow:"count"` + } + input := []Row{{"a", 1}, {"b", 2}} + arr, err := FromSlice(input, mem) + require.NoError(t, err) + defer arr.Release() + + sa := arr.(*array.Struct) + assert.Equal(t, arrow.LARGE_STRING, sa.Field(0).DataType().ID()) + assert.Equal(t, arrow.INT32, sa.Field(1).DataType().ID()) + + got, err := ToSlice[Row](arr) + require.NoError(t, err) + assert.Equal(t, input, got) + }) +} From 9d69407b64af6723d52f0522d43df6aa4161dcb3 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 22 Apr 2026 14:13:38 -0400 Subject: [PATCH 71/82] fix(arreflect): produce LARGE_LIST_VIEW in buildEmptyTyped when Large+ListView both set --- arrow/array/arreflect/reflect.go | 7 ++++++- arrow/array/arreflect/reflect_test.go | 22 ++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/arrow/array/arreflect/reflect.go b/arrow/array/arreflect/reflect.go index 6940a462..d6f5e974 100644 --- a/arrow/array/arreflect/reflect.go +++ b/arrow/array/arreflect/reflect.go @@ -460,7 +460,12 @@ func buildEmptyTyped(goType reflect.Type, opts tagOpts, mem memory.Allocator) (a if err != nil { return nil, err } - dt = arrow.ListViewOf(innerDT) + if opts.Large { + innerDT = applyLargeOpts(innerDT) + dt = arrow.LargeListViewOf(innerDT) + } else { + dt = arrow.ListViewOf(innerDT) + } } if opts.Dict { if err := validateDictValueType(dt); err != nil { diff --git a/arrow/array/arreflect/reflect_test.go b/arrow/array/arreflect/reflect_test.go index 07f83690..7fe00318 100644 --- a/arrow/array/arreflect/reflect_test.go +++ b/arrow/array/arreflect/reflect_test.go @@ -312,6 +312,28 @@ func TestBuildEmptyTyped(t *testing.T) { assert.Equal(t, 0, arr.Len()) assert.Equal(t, arrow.RUN_END_ENCODED, arr.DataType().ID()) }) + + t.Run("large_string_empty", func(t *testing.T) { + arr, err := buildEmptyTyped(reflect.TypeOf(""), tagOpts{Large: true}, mem) + require.NoError(t, err) + defer arr.Release() + assert.Equal(t, arrow.LARGE_STRING, arr.DataType().ID()) + }) + + t.Run("large_listview_empty", func(t *testing.T) { + arr, err := buildEmptyTyped(reflect.TypeOf([]string(nil)), tagOpts{Large: true, ListView: true}, mem) + require.NoError(t, err) + defer arr.Release() + assert.Equal(t, arrow.LARGE_LIST_VIEW, arr.DataType().ID()) + llv := arr.DataType().(*arrow.LargeListViewType) + assert.Equal(t, arrow.LARGE_STRING, llv.Elem().ID()) + }) + + t.Run("large_on_int_errors", func(t *testing.T) { + _, err := buildEmptyTyped(reflect.TypeOf(int32(0)), tagOpts{Large: true}, mem) + require.Error(t, err) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) } func TestParseDecimalOpt(t *testing.T) { From 266fb4c161735d1dfbe329bab9129a4bcca0d35b Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 22 Apr 2026 14:16:54 -0400 Subject: [PATCH 72/82] test(arreflect): add StringView/BinaryView builder and unknown-tag public error tests --- .../arreflect/reflect_go_to_arrow_test.go | 37 +++++++++++++++++++ arrow/array/arreflect/reflect_public_test.go | 19 ++++++++++ 2 files changed, 56 insertions(+) diff --git a/arrow/array/arreflect/reflect_go_to_arrow_test.go b/arrow/array/arreflect/reflect_go_to_arrow_test.go index ed63d760..84078228 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/array/arreflect/reflect_go_to_arrow_test.go @@ -1155,3 +1155,40 @@ func TestWithLargeErrors(t *testing.T) { assert.Equal(t, arrow.LARGE_STRING, arr.DataType().ID()) }) } + +func TestAppendValueViewBuilders(t *testing.T) { + mem := checkedMem(t) + + t.Run("StringViewBuilder appends string value", func(t *testing.T) { + b := array.NewStringViewBuilder(mem) + defer b.Release() + err := appendValue(b, reflect.ValueOf("hello")) + require.NoError(t, err) + arr := b.NewArray() + defer arr.Release() + assert.Equal(t, 1, arr.Len()) + assert.Equal(t, "hello", arr.(*array.StringView).Value(0)) + }) + + t.Run("BinaryViewBuilder appends binary value", func(t *testing.T) { + b := array.NewBinaryViewBuilder(mem) + defer b.Release() + err := appendValue(b, reflect.ValueOf([]byte{1, 2, 3})) + require.NoError(t, err) + arr := b.NewArray() + defer arr.Release() + assert.Equal(t, 1, arr.Len()) + assert.Equal(t, []byte{1, 2, 3}, arr.(*array.BinaryView).Value(0)) + }) + + t.Run("BinaryViewBuilder appends null for nil slice", func(t *testing.T) { + b := array.NewBinaryViewBuilder(mem) + defer b.Release() + var nilSlice []byte + err := appendValue(b, reflect.ValueOf(nilSlice)) + require.NoError(t, err) + arr := b.NewArray() + defer arr.Release() + assert.True(t, arr.IsNull(0)) + }) +} diff --git a/arrow/array/arreflect/reflect_public_test.go b/arrow/array/arreflect/reflect_public_test.go index b3ef829c..d075ea7a 100644 --- a/arrow/array/arreflect/reflect_public_test.go +++ b/arrow/array/arreflect/reflect_public_test.go @@ -727,3 +727,22 @@ func TestWithLargeRoundTrip(t *testing.T) { assert.Equal(t, input, got) }) } + +func TestUnknownTagOptionError(t *testing.T) { + type Bad struct { + Name string `arrow:"name,unknown_option"` + } + mem := testMem() + + t.Run("FromSlice surfaces ErrUnsupportedType for unknown tag", func(t *testing.T) { + _, err := FromSlice([]Bad{{"x"}}, mem) + require.Error(t, err) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) + + t.Run("InferSchema surfaces ErrUnsupportedType for unknown tag", func(t *testing.T) { + _, err := InferSchema[Bad]() + require.Error(t, err) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) +} From 62df4515732fbfdeb7d06225296af30cba58a3a2 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 22 Apr 2026 15:33:30 -0400 Subject: [PATCH 73/82] =?UTF-8?q?feat(arreflect):=20rename=20ListView?= =?UTF-8?q?=E2=86=92View=20in=20tagOpts,=20token,=20and=20WithListView?= =?UTF-8?q?=E2=86=92WithView?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- arrow/array/arreflect/reflect.go | 19 ++++++++++--------- arrow/array/arreflect/reflect_test.go | 18 +++++++++--------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/arrow/array/arreflect/reflect.go b/arrow/array/arreflect/reflect.go index d6f5e974..a5980143 100644 --- a/arrow/array/arreflect/reflect.go +++ b/arrow/array/arreflect/reflect.go @@ -39,7 +39,7 @@ type tagOpts struct { Name string Skip bool Dict bool - ListView bool + View bool REE bool Large bool DecimalPrecision int32 @@ -105,8 +105,8 @@ func parseOptions(opts *tagOpts, rest string) { switch token { case "dict": opts.Dict = true - case "listview": - opts.ListView = true + case "view": + opts.View = true case "ree": opts.REE = true case "large": @@ -375,8 +375,9 @@ type Option func(*tagOpts) // WithDict requests dictionary encoding for the top-level array. func WithDict() Option { return func(o *tagOpts) { o.Dict = true } } -// WithListView requests ListView encoding instead of List for slice types. -func WithListView() Option { return func(o *tagOpts) { o.ListView = true } } +// WithView requests view-type encoding (STRING_VIEW, BINARY_VIEW, LIST_VIEW) +// for the top-level array and recursively for nested types. +func WithView() Option { return func(o *tagOpts) { o.View = true } } // WithREE requests run-end encoding for the top-level array. func WithREE() Option { return func(o *tagOpts) { o.REE = true } } @@ -422,11 +423,11 @@ func validateOptions(opts tagOpts) error { if opts.REE { n++ } - if opts.ListView { + if opts.View { n++ } if n > 1 { - return fmt.Errorf("arreflect: conflicting options: only one of WithDict, WithREE, WithListView may be specified: %w", ErrUnsupportedType) + return fmt.Errorf("arreflect: conflicting options: only one of WithDict, WithREE, WithView may be specified: %w", ErrUnsupportedType) } return nil } @@ -448,9 +449,9 @@ func buildEmptyTyped(goType reflect.Type, opts tagOpts, mem memory.Allocator) (a } dt = applyLargeOpts(dt) } - if opts.ListView { + if opts.View { if derefType.Kind() != reflect.Slice || derefType == typeOfByteSlice { - return nil, fmt.Errorf("arreflect: WithListView requires a slice-of-slices element type, got %s: %w", goType, ErrUnsupportedType) + return nil, fmt.Errorf("arreflect: WithView requires a slice-of-slices element type, got %s: %w", goType, ErrUnsupportedType) } innerElem := derefType.Elem() for innerElem.Kind() == reflect.Ptr { diff --git a/arrow/array/arreflect/reflect_test.go b/arrow/array/arreflect/reflect_test.go index 7fe00318..84f8bad5 100644 --- a/arrow/array/arreflect/reflect_test.go +++ b/arrow/array/arreflect/reflect_test.go @@ -52,8 +52,8 @@ func TestParseTag(t *testing.T) { want: tagOpts{Name: "name", Dict: true}, }, { - input: "name,listview", - want: tagOpts{Name: "name", ListView: true}, + input: "name,view", + want: tagOpts{Name: "name", View: true}, }, { input: "name,ree", @@ -84,8 +84,8 @@ func TestParseTag(t *testing.T) { want: tagOpts{Name: "name", Large: true}, }, { - input: "name,large,listview", - want: tagOpts{Name: "name", Large: true, ListView: true}, + input: "name,large,view", + want: tagOpts{Name: "name", Large: true, View: true}, }, { input: "name,large,dict", @@ -265,19 +265,19 @@ func TestBuildEmptyTyped(t *testing.T) { }) t.Run("listview_on_non_slice_type_errors", func(t *testing.T) { - _, err := buildEmptyTyped(reflect.TypeOf(int32(0)), tagOpts{ListView: true}, mem) + _, err := buildEmptyTyped(reflect.TypeOf(int32(0)), tagOpts{View: true}, mem) require.Error(t, err) assert.ErrorIs(t, err, ErrUnsupportedType) }) t.Run("listview_on_byte_slice_errors", func(t *testing.T) { - _, err := buildEmptyTyped(reflect.TypeOf([]byte(nil)), tagOpts{ListView: true}, mem) + _, err := buildEmptyTyped(reflect.TypeOf([]byte(nil)), tagOpts{View: true}, mem) require.Error(t, err) assert.ErrorIs(t, err, ErrUnsupportedType) }) t.Run("listview_with_slice_of_pointers_derefs_inner", func(t *testing.T) { - arr, err := buildEmptyTyped(reflect.TypeOf([]*int32(nil)), tagOpts{ListView: true}, mem) + arr, err := buildEmptyTyped(reflect.TypeOf([]*int32(nil)), tagOpts{View: true}, mem) require.NoError(t, err) defer arr.Release() assert.Equal(t, 0, arr.Len()) @@ -285,7 +285,7 @@ func TestBuildEmptyTyped(t *testing.T) { }) t.Run("listview_happy_path", func(t *testing.T) { - arr, err := buildEmptyTyped(reflect.TypeOf([]int32(nil)), tagOpts{ListView: true}, mem) + arr, err := buildEmptyTyped(reflect.TypeOf([]int32(nil)), tagOpts{View: true}, mem) require.NoError(t, err) defer arr.Release() assert.Equal(t, arrow.LIST_VIEW, arr.DataType().ID()) @@ -321,7 +321,7 @@ func TestBuildEmptyTyped(t *testing.T) { }) t.Run("large_listview_empty", func(t *testing.T) { - arr, err := buildEmptyTyped(reflect.TypeOf([]string(nil)), tagOpts{Large: true, ListView: true}, mem) + arr, err := buildEmptyTyped(reflect.TypeOf([]string(nil)), tagOpts{Large: true, View: true}, mem) require.NoError(t, err) defer arr.Release() assert.Equal(t, arrow.LARGE_LIST_VIEW, arr.DataType().ID()) From e2a7f288a1a3b5119521533887b48358d0a6fcd9 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 22 Apr 2026 15:37:42 -0400 Subject: [PATCH 74/82] feat(arreflect): add applyViewOpts and hasViewableType --- arrow/array/arreflect/reflect_infer.go | 56 ++++++++++++++ arrow/array/arreflect/reflect_infer_test.go | 84 +++++++++++++++++++++ 2 files changed, 140 insertions(+) diff --git a/arrow/array/arreflect/reflect_infer.go b/arrow/array/arreflect/reflect_infer.go index 23a454c2..d11d36a1 100644 --- a/arrow/array/arreflect/reflect_infer.go +++ b/arrow/array/arreflect/reflect_infer.go @@ -248,6 +248,62 @@ func hasLargeableType(dt arrow.DataType) bool { } } +func applyViewOpts(dt arrow.DataType) arrow.DataType { + switch dt.ID() { + case arrow.STRING: + return arrow.BinaryTypes.StringView + case arrow.BINARY: + return arrow.BinaryTypes.BinaryView + case arrow.LIST: + return arrow.ListViewOf(applyViewOpts(dt.(*arrow.ListType).Elem())) + case arrow.LIST_VIEW: + return arrow.ListViewOf(applyViewOpts(dt.(*arrow.ListViewType).Elem())) + case arrow.LARGE_LIST: + return arrow.LargeListViewOf(applyViewOpts(dt.(*arrow.LargeListType).Elem())) + case arrow.LARGE_LIST_VIEW: + return arrow.LargeListViewOf(applyViewOpts(dt.(*arrow.LargeListViewType).Elem())) + case arrow.FIXED_SIZE_LIST: + fsl := dt.(*arrow.FixedSizeListType) + return arrow.FixedSizeListOf(fsl.Len(), applyViewOpts(fsl.Elem())) + case arrow.MAP: + mt := dt.(*arrow.MapType) + return arrow.MapOf(applyViewOpts(mt.KeyType()), applyViewOpts(mt.ItemField().Type)) + case arrow.STRUCT: + st := dt.(*arrow.StructType) + fields := make([]arrow.Field, st.NumFields()) + for i := 0; i < st.NumFields(); i++ { + f := st.Field(i) + f.Type = applyViewOpts(f.Type) + fields[i] = f + } + return arrow.StructOf(fields...) + default: + return dt + } +} + +func hasViewableType(dt arrow.DataType) bool { + switch dt.ID() { + case arrow.STRING, arrow.BINARY, arrow.LIST: + return true + case arrow.STRUCT: + st := dt.(*arrow.StructType) + for i := 0; i < st.NumFields(); i++ { + if hasViewableType(st.Field(i).Type) { + return true + } + } + return false + case arrow.FIXED_SIZE_LIST: + return hasViewableType(dt.(*arrow.FixedSizeListType).Elem()) + case arrow.MAP: + mt := dt.(*arrow.MapType) + return hasViewableType(mt.KeyType()) || hasViewableType(mt.ItemField().Type) + default: + return false + } +} + func applyEncodingOpts(dt arrow.DataType, fm fieldMeta) (arrow.DataType, error) { switch { case fm.Opts.Dict: diff --git a/arrow/array/arreflect/reflect_infer_test.go b/arrow/array/arreflect/reflect_infer_test.go index 8a61c703..78b61dec 100644 --- a/arrow/array/arreflect/reflect_infer_test.go +++ b/arrow/array/arreflect/reflect_infer_test.go @@ -747,3 +747,87 @@ func TestHasLargeableType(t *testing.T) { assert.False(t, hasLargeableType(arrow.MapOf(arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Int64))) }) } + +func TestApplyViewOpts(t *testing.T) { + cases := []struct { + name string + input arrow.DataType + want arrow.Type + }{ + {"string→string_view", arrow.BinaryTypes.String, arrow.STRING_VIEW}, + {"binary→binary_view", arrow.BinaryTypes.Binary, arrow.BINARY_VIEW}, + {"list→list_view", arrow.ListOf(arrow.BinaryTypes.String), arrow.LIST_VIEW}, + {"int64 unchanged", arrow.PrimitiveTypes.Int64, arrow.INT64}, + {"float32 unchanged", arrow.PrimitiveTypes.Float32, arrow.FLOAT32}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := applyViewOpts(tc.input) + assert.Equal(t, tc.want, got.ID()) + }) + } + + t.Run("list elem is string_view", func(t *testing.T) { + got := applyViewOpts(arrow.ListOf(arrow.BinaryTypes.String)) + lv, ok := got.(*arrow.ListViewType) + require.True(t, ok) + assert.Equal(t, arrow.STRING_VIEW, lv.Elem().ID()) + }) + + t.Run("fixed_size_list recurses", func(t *testing.T) { + got := applyViewOpts(arrow.FixedSizeListOf(3, arrow.BinaryTypes.String)) + fsl, ok := got.(*arrow.FixedSizeListType) + require.True(t, ok) + assert.Equal(t, arrow.STRING_VIEW, fsl.Elem().ID()) + }) + + t.Run("map recurses", func(t *testing.T) { + got := applyViewOpts(arrow.MapOf(arrow.BinaryTypes.String, arrow.BinaryTypes.Binary)) + mt, ok := got.(*arrow.MapType) + require.True(t, ok) + assert.Equal(t, arrow.STRING_VIEW, mt.KeyType().ID()) + assert.Equal(t, arrow.BINARY_VIEW, mt.ItemField().Type.ID()) + }) + + t.Run("struct recurses into fields", func(t *testing.T) { + st := arrow.StructOf( + arrow.Field{Name: "name", Type: arrow.BinaryTypes.String}, + arrow.Field{Name: "count", Type: arrow.PrimitiveTypes.Int64}, + ) + got := applyViewOpts(st) + gst, ok := got.(*arrow.StructType) + require.True(t, ok) + assert.Equal(t, arrow.STRING_VIEW, gst.Field(0).Type.ID()) + assert.Equal(t, arrow.INT64, gst.Field(1).Type.ID()) + }) + + t.Run("list_view is idempotent", func(t *testing.T) { + got := applyViewOpts(arrow.ListViewOf(arrow.BinaryTypes.String)) + lv, ok := got.(*arrow.ListViewType) + require.True(t, ok) + assert.Equal(t, arrow.STRING_VIEW, lv.Elem().ID()) + }) +} + +func TestHasViewableType(t *testing.T) { + assert.True(t, hasViewableType(arrow.BinaryTypes.String)) + assert.True(t, hasViewableType(arrow.BinaryTypes.Binary)) + assert.True(t, hasViewableType(arrow.ListOf(arrow.PrimitiveTypes.Int64))) + assert.False(t, hasViewableType(arrow.PrimitiveTypes.Int64)) + assert.False(t, hasViewableType(arrow.PrimitiveTypes.Float32)) + + t.Run("struct with string field is true", func(t *testing.T) { + st := arrow.StructOf(arrow.Field{Name: "x", Type: arrow.BinaryTypes.String}) + assert.True(t, hasViewableType(st)) + }) + t.Run("struct with only ints is false", func(t *testing.T) { + st := arrow.StructOf(arrow.Field{Name: "x", Type: arrow.PrimitiveTypes.Int32}) + assert.False(t, hasViewableType(st)) + }) + t.Run("fixed_size_list is true", func(t *testing.T) { + assert.True(t, hasViewableType(arrow.FixedSizeListOf(4, arrow.BinaryTypes.String))) + }) + t.Run("map with string key is true", func(t *testing.T) { + assert.True(t, hasViewableType(arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int64))) + }) +} From 2ca6425cc3a8858d44782723aa0e59a5cbc896a4 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 22 Apr 2026 15:44:25 -0400 Subject: [PATCH 75/82] feat(arreflect): thread view through inferStructType, applyEncodingOpts, and buildEmptyTyped --- arrow/array/arreflect/reflect.go | 36 ++++++++++------- arrow/array/arreflect/reflect_infer.go | 12 ++---- arrow/array/arreflect/reflect_infer_test.go | 43 ++++++++++++++------- arrow/array/arreflect/reflect_test.go | 31 +++++++++++---- 4 files changed, 75 insertions(+), 47 deletions(-) diff --git a/arrow/array/arreflect/reflect.go b/arrow/array/arreflect/reflect.go index a5980143..248350b5 100644 --- a/arrow/array/arreflect/reflect.go +++ b/arrow/array/arreflect/reflect.go @@ -450,22 +450,28 @@ func buildEmptyTyped(goType reflect.Type, opts tagOpts, mem memory.Allocator) (a dt = applyLargeOpts(dt) } if opts.View { - if derefType.Kind() != reflect.Slice || derefType == typeOfByteSlice { - return nil, fmt.Errorf("arreflect: WithView requires a slice-of-slices element type, got %s: %w", goType, ErrUnsupportedType) - } - innerElem := derefType.Elem() - for innerElem.Kind() == reflect.Ptr { - innerElem = innerElem.Elem() - } - innerDT, err := inferArrowType(innerElem) - if err != nil { - return nil, err - } - if opts.Large { - innerDT = applyLargeOpts(innerDT) - dt = arrow.LargeListViewOf(innerDT) + if derefType.Kind() == reflect.Slice && derefType != typeOfByteSlice { + // slice-of-slices: build a LIST_VIEW or LARGE_LIST_VIEW + innerElem := derefType.Elem() + for innerElem.Kind() == reflect.Ptr { + innerElem = innerElem.Elem() + } + innerDT, err := inferArrowType(innerElem) + if err != nil { + return nil, err + } + innerDT = applyViewOpts(innerDT) + if opts.Large { + dt = arrow.LargeListViewOf(innerDT) + } else { + dt = arrow.ListViewOf(innerDT) + } } else { - dt = arrow.ListViewOf(innerDT) + // primitive/string/binary: apply view recursively + if !hasViewableType(dt) { + return nil, fmt.Errorf("arreflect: view option has no effect on type %s: %w", dt, ErrUnsupportedType) + } + dt = applyViewOpts(dt) } } if opts.Dict { diff --git a/arrow/array/arreflect/reflect_infer.go b/arrow/array/arreflect/reflect_infer.go index d11d36a1..52bbce4f 100644 --- a/arrow/array/arreflect/reflect_infer.go +++ b/arrow/array/arreflect/reflect_infer.go @@ -311,15 +311,6 @@ func applyEncodingOpts(dt arrow.DataType, fm fieldMeta) (arrow.DataType, error) return nil, fmt.Errorf("arreflect: dict tag on field %q: %w", fm.Name, err) } return &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: dt}, nil - case fm.Opts.ListView: - switch lt := dt.(type) { - case *arrow.ListType: - return arrow.ListViewOf(lt.Elem()), nil - case *arrow.LargeListType: - return arrow.LargeListViewOf(lt.Elem()), nil - default: - return nil, fmt.Errorf("arreflect: listview tag on field %q requires a slice type, got %v", fm.Name, dt) - } case fm.Opts.REE: return nil, fmt.Errorf("arreflect: ree tag on struct field %q is not supported; use ree at top-level via FromSlice", fm.Name) } @@ -356,6 +347,9 @@ func inferStructType(t reflect.Type) (*arrow.StructType, error) { if fm.Opts.Large { dt = applyLargeOpts(dt) } + if fm.Opts.View { + dt = applyViewOpts(dt) + } dt, err = applyEncodingOpts(dt, fm) if err != nil { return nil, err diff --git a/arrow/array/arreflect/reflect_infer_test.go b/arrow/array/arreflect/reflect_infer_test.go index 78b61dec..2ac3b6f9 100644 --- a/arrow/array/arreflect/reflect_infer_test.go +++ b/arrow/array/arreflect/reflect_infer_test.go @@ -323,9 +323,9 @@ func TestInferArrowSchemaStructFieldEncoding(t *testing.T) { assert.Equal(t, arrow.DICTIONARY, f[0].Type.ID()) }) - t.Run("listview-tagged []string field becomes LIST_VIEW", func(t *testing.T) { + t.Run("view-tagged []string field becomes LIST_VIEW", func(t *testing.T) { type S struct { - Tags []string `arrow:"tags,listview"` + Tags []string `arrow:"tags,view"` } schema, err := InferSchema[S]() require.NoError(t, err) @@ -701,21 +701,34 @@ func TestInferStructTypeWithLarge(t *testing.T) { assert.Equal(t, arrow.INT64, st.Field(1).Type.ID(), "Count should be INT64") } -func TestApplyEncodingOptsLargeListview(t *testing.T) { - // large+listview: applyLargeOpts turns LIST→LARGE_LIST first, then - // applyEncodingOpts should emit LARGE_LIST_VIEW - fm := fieldMeta{ - Name: "tags", - Type: reflect.TypeOf([]string{}), - Opts: tagOpts{Large: true, ListView: true}, +func TestApplyViewOptsViewCombinations(t *testing.T) { + t.Run("view+large: LARGE_LIST→LARGE_LIST_VIEW", func(t *testing.T) { + dt := applyLargeOpts(arrow.ListOf(arrow.BinaryTypes.String)) + // dt is now LARGE_LIST + got := applyViewOpts(dt) + assert.Equal(t, arrow.LARGE_LIST_VIEW, got.ID()) + }) + + t.Run("view only: LIST→LIST_VIEW", func(t *testing.T) { + dt := arrow.ListOf(arrow.BinaryTypes.String) + got := applyViewOpts(dt) + assert.Equal(t, arrow.LIST_VIEW, got.ID()) + lv := got.(*arrow.ListViewType) + assert.Equal(t, arrow.STRING_VIEW, lv.Elem().ID()) + }) +} + +func TestInferStructTypeWithView(t *testing.T) { + type Row struct { + Name string `arrow:",view"` + Tags []string `arrow:"tags,view"` } - dt := applyLargeOpts(arrow.ListOf(arrow.BinaryTypes.LargeString)) - // dt is now LARGE_LIST - got, err := applyEncodingOpts(dt, fm) + st, err := inferStructType(reflect.TypeOf(Row{})) require.NoError(t, err) - assert.Equal(t, arrow.LARGE_LIST_VIEW, got.ID()) - llv := got.(*arrow.LargeListViewType) - assert.Equal(t, arrow.LARGE_STRING, llv.Elem().ID()) + assert.Equal(t, arrow.STRING_VIEW, st.Field(0).Type.ID(), "Name should be STRING_VIEW") + assert.Equal(t, arrow.LIST_VIEW, st.Field(1).Type.ID(), "Tags should be LIST_VIEW") + lv := st.Field(1).Type.(*arrow.ListViewType) + assert.Equal(t, arrow.STRING_VIEW, lv.Elem().ID()) } func TestHasLargeableType(t *testing.T) { diff --git a/arrow/array/arreflect/reflect_test.go b/arrow/array/arreflect/reflect_test.go index 84f8bad5..3f335b7d 100644 --- a/arrow/array/arreflect/reflect_test.go +++ b/arrow/array/arreflect/reflect_test.go @@ -264,19 +264,20 @@ func TestBuildEmptyTyped(t *testing.T) { assert.Equal(t, arrow.INT32, arr.DataType().ID()) }) - t.Run("listview_on_non_slice_type_errors", func(t *testing.T) { + t.Run("view_on_non_slice_type_errors", func(t *testing.T) { _, err := buildEmptyTyped(reflect.TypeOf(int32(0)), tagOpts{View: true}, mem) require.Error(t, err) assert.ErrorIs(t, err, ErrUnsupportedType) }) - t.Run("listview_on_byte_slice_errors", func(t *testing.T) { - _, err := buildEmptyTyped(reflect.TypeOf([]byte(nil)), tagOpts{View: true}, mem) - require.Error(t, err) - assert.ErrorIs(t, err, ErrUnsupportedType) + t.Run("view_happy_path_binary", func(t *testing.T) { + arr, err := buildEmptyTyped(reflect.TypeOf([]byte(nil)), tagOpts{View: true}, mem) + require.NoError(t, err) + defer arr.Release() + assert.Equal(t, arrow.BINARY_VIEW, arr.DataType().ID()) }) - t.Run("listview_with_slice_of_pointers_derefs_inner", func(t *testing.T) { + t.Run("view_with_slice_of_pointers_derefs_inner", func(t *testing.T) { arr, err := buildEmptyTyped(reflect.TypeOf([]*int32(nil)), tagOpts{View: true}, mem) require.NoError(t, err) defer arr.Release() @@ -284,13 +285,27 @@ func TestBuildEmptyTyped(t *testing.T) { assert.Equal(t, arrow.LIST_VIEW, arr.DataType().ID()) }) - t.Run("listview_happy_path", func(t *testing.T) { + t.Run("view_happy_path_list", func(t *testing.T) { arr, err := buildEmptyTyped(reflect.TypeOf([]int32(nil)), tagOpts{View: true}, mem) require.NoError(t, err) defer arr.Release() assert.Equal(t, arrow.LIST_VIEW, arr.DataType().ID()) }) + t.Run("view_happy_path_string", func(t *testing.T) { + arr, err := buildEmptyTyped(reflect.TypeOf(""), tagOpts{View: true}, mem) + require.NoError(t, err) + defer arr.Release() + assert.Equal(t, arrow.STRING_VIEW, arr.DataType().ID()) + }) + + t.Run("large_view_empty", func(t *testing.T) { + arr, err := buildEmptyTyped(reflect.TypeOf([]string(nil)), tagOpts{Large: true, View: true}, mem) + require.NoError(t, err) + defer arr.Release() + assert.Equal(t, arrow.LARGE_LIST_VIEW, arr.DataType().ID()) + }) + t.Run("dict_with_unsupported_value_type_errors", func(t *testing.T) { _, err := buildEmptyTyped(reflect.TypeOf(time.Time{}), tagOpts{Dict: true}, mem) require.Error(t, err) @@ -326,7 +341,7 @@ func TestBuildEmptyTyped(t *testing.T) { defer arr.Release() assert.Equal(t, arrow.LARGE_LIST_VIEW, arr.DataType().ID()) llv := arr.DataType().(*arrow.LargeListViewType) - assert.Equal(t, arrow.LARGE_STRING, llv.Elem().ID()) + assert.Equal(t, arrow.STRING_VIEW, llv.Elem().ID()) }) t.Run("large_on_int_errors", func(t *testing.T) { From cdb5172049a29dc4d4ef9bd7efc8f91845676b97 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 22 Apr 2026 15:48:39 -0400 Subject: [PATCH 76/82] fix(arreflect): remove stale large_listview_empty duplicate, fold assertion into large_view_empty --- arrow/array/arreflect/reflect_test.go | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/arrow/array/arreflect/reflect_test.go b/arrow/array/arreflect/reflect_test.go index 3f335b7d..f5752e5a 100644 --- a/arrow/array/arreflect/reflect_test.go +++ b/arrow/array/arreflect/reflect_test.go @@ -304,6 +304,8 @@ func TestBuildEmptyTyped(t *testing.T) { require.NoError(t, err) defer arr.Release() assert.Equal(t, arrow.LARGE_LIST_VIEW, arr.DataType().ID()) + llv := arr.DataType().(*arrow.LargeListViewType) + assert.Equal(t, arrow.STRING_VIEW, llv.Elem().ID()) }) t.Run("dict_with_unsupported_value_type_errors", func(t *testing.T) { @@ -335,15 +337,6 @@ func TestBuildEmptyTyped(t *testing.T) { assert.Equal(t, arrow.LARGE_STRING, arr.DataType().ID()) }) - t.Run("large_listview_empty", func(t *testing.T) { - arr, err := buildEmptyTyped(reflect.TypeOf([]string(nil)), tagOpts{Large: true, View: true}, mem) - require.NoError(t, err) - defer arr.Release() - assert.Equal(t, arrow.LARGE_LIST_VIEW, arr.DataType().ID()) - llv := arr.DataType().(*arrow.LargeListViewType) - assert.Equal(t, arrow.STRING_VIEW, llv.Elem().ID()) - }) - t.Run("large_on_int_errors", func(t *testing.T) { _, err := buildEmptyTyped(reflect.TypeOf(int32(0)), tagOpts{Large: true}, mem) require.Error(t, err) From e44aa8f3b91c346dd62b1a6e8915397c37b157e2 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 22 Apr 2026 15:52:58 -0400 Subject: [PATCH 77/82] feat(arreflect): thread view opts through builder functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Renames opts.ListView → opts.View, adds applyViewOpts calls in buildPrimitiveArray, buildListLikeArray, buildStructArray, buildMapArray, and buildFixedSizeListArray, and adds a view-validation guard in buildArray. Updates TestBuildListViewArray → TestBuildViewArray with STRING_VIEW/BINARY_VIEW subtests; replaces listview+large subtest with view+large. --- arrow/array/arreflect/reflect_go_to_arrow.go | 32 ++++++++- .../arreflect/reflect_go_to_arrow_test.go | 65 +++++++++++-------- 2 files changed, 67 insertions(+), 30 deletions(-) diff --git a/arrow/array/arreflect/reflect_go_to_arrow.go b/arrow/array/arreflect/reflect_go_to_arrow.go index f10ce5ff..23d110a6 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow.go +++ b/arrow/array/arreflect/reflect_go_to_arrow.go @@ -49,15 +49,25 @@ func buildArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.A } } + if opts.View { + dt, err := inferArrowType(elemType) + if err != nil { + return nil, err + } + if !hasViewableType(dt) { + return nil, fmt.Errorf("arreflect: view option has no effect on type %s: %w", dt, ErrUnsupportedType) + } + } + if opts.Dict { return buildDictionaryArray(vals, opts, mem) } if opts.REE { return buildRunEndEncodedArray(vals, opts, mem) } - if opts.ListView { + if opts.View { if elemType.Kind() != reflect.Slice || elemType == typeOfByteSlice { - return nil, fmt.Errorf("arreflect: WithListView requires a slice-of-slices element type, got %s: %w", elemType, ErrUnsupportedType) + return buildPrimitiveArray(vals, opts, mem) } return buildListViewArray(vals, opts, mem) } @@ -103,6 +113,9 @@ func buildPrimitiveArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) if opts.Large { dt = applyLargeOpts(dt) } + if opts.View { + dt = applyViewOpts(dt) + } b := array.NewBuilder(mem, dt) defer b.Release() @@ -276,6 +289,9 @@ func buildStructArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (a // by inferStructType are safe to walk again here. st = applyLargeOpts(st).(*arrow.StructType) } + if opts.View { + st = applyViewOpts(st).(*arrow.StructType) + } fields := cachedStructFields(elemType) sb := array.NewStructBuilder(mem, st) @@ -540,6 +556,9 @@ func buildListLikeArray(vals reflect.Value, mem memory.Allocator, opts tagOpts, if opts.Large { elemDT = applyLargeOpts(elemDT) } + if opts.View { + elemDT = applyViewOpts(elemDT) + } label := "list element" if isView { @@ -631,6 +650,10 @@ func buildMapArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arro keyDT = applyLargeOpts(keyDT) valDT = applyLargeOpts(valDT) } + if opts.View { + keyDT = applyViewOpts(keyDT) + valDT = applyViewOpts(valDT) + } mb := array.NewMapBuilder(mem, keyDT, valDT, false) defer mb.Release() @@ -680,6 +703,9 @@ func buildFixedSizeListArray(vals reflect.Value, opts tagOpts, mem memory.Alloca if opts.Large { innerDT = applyLargeOpts(innerDT) } + if opts.View { + innerDT = applyViewOpts(innerDT) + } fb := array.NewFixedSizeListBuilder(mem, n, innerDT) defer fb.Release() @@ -748,7 +774,7 @@ func buildDictionaryArray(vals reflect.Value, _ tagOpts, mem memory.Allocator) ( func buildRunEndEncodedArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.Array, error) { valOpts := opts valOpts.REE = false - valOpts.ListView = false + valOpts.View = false if vals.Len() == 0 { runEndsArr, err := buildPrimitiveArray(reflect.MakeSlice(reflect.TypeOf([]int32{}), 0, 0), tagOpts{}, mem) if err != nil { diff --git a/arrow/array/arreflect/reflect_go_to_arrow_test.go b/arrow/array/arreflect/reflect_go_to_arrow_test.go index 84078228..9d58aa47 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/array/arreflect/reflect_go_to_arrow_test.go @@ -602,43 +602,56 @@ func TestBuildRunEndEncodedArray(t *testing.T) { }) } -func TestBuildListViewArray(t *testing.T) { +func TestBuildViewArray(t *testing.T) { mem := checkedMem(t) - t.Run("int32_listview", func(t *testing.T) { - vals := [][]int32{{1, 2, 3}, {4, 5}, {6}} - arr := mustBuildArray(t, vals, tagOpts{ListView: true}, mem) - require.Equal(t, arrow.LIST_VIEW, arr.DataType().ID(), "expected LIST_VIEW, got %v", arr.DataType()) + t.Run("string→STRING_VIEW", func(t *testing.T) { + arr := mustBuildArray(t, []string{"a", "b"}, tagOpts{View: true}, mem) + assert.Equal(t, arrow.STRING_VIEW, arr.DataType().ID()) + sv := arr.(*array.StringView) + assert.Equal(t, "a", sv.Value(0)) + assert.Equal(t, "b", sv.Value(1)) + }) + + t.Run("[]byte→BINARY_VIEW", func(t *testing.T) { + arr := mustBuildArray(t, [][]byte{{1, 2}, {3}}, tagOpts{View: true}, mem) + assert.Equal(t, arrow.BINARY_VIEW, arr.DataType().ID()) + }) + + t.Run("int32_view", func(t *testing.T) { + vals := [][]int32{{1, 2, 3}, {4, 5}} + arr := mustBuildArray(t, vals, tagOpts{View: true}, mem) + assert.Equal(t, arrow.LIST_VIEW, arr.DataType().ID()) typed := arr.(*array.ListView) - assert.Equal(t, 3, typed.Len()) + assert.Equal(t, 2, typed.Len()) }) - t.Run("null_entry", func(t *testing.T) { - vals := [][]int32{{1, 2}, nil, {3}} - arr := mustBuildArray(t, vals, tagOpts{ListView: true}, mem) - assert.True(t, arr.IsNull(1), "expected index 1 to be null") + t.Run("nil_outer_listview", func(t *testing.T) { + var nilSlice [][]int32 + arr := mustBuildArray(t, nilSlice, tagOpts{View: true}, mem) + assert.Equal(t, 0, arr.Len()) }) t.Run("string_listview", func(t *testing.T) { - vals := [][]string{{"hello", "world"}, {"foo"}, {"a", "b", "c"}} - arr := mustBuildArray(t, vals, tagOpts{ListView: true}, mem) - require.Equal(t, arrow.LIST_VIEW, arr.DataType().ID(), "expected LIST_VIEW, got %v", arr.DataType()) - assert.Equal(t, 3, arr.Len()) + vals := [][]string{{"a", "b"}, {"c"}} + arr := mustBuildArray(t, vals, tagOpts{View: true}, mem) + assert.Equal(t, arrow.LIST_VIEW, arr.DataType().ID()) + lv := arr.DataType().(*arrow.ListViewType) + assert.Equal(t, arrow.STRING_VIEW, lv.Elem().ID()) }) - t.Run("total_values", func(t *testing.T) { - vals := [][]int32{{10, 20}, {30}} - arr := mustBuildArray(t, vals, tagOpts{ListView: true}, mem) + t.Run("null_in_listview", func(t *testing.T) { + vals := [][]int32{{1, 2, 3}, nil, {4, 5}} + arr := mustBuildArray(t, vals, tagOpts{View: true}, mem) allVals := arr.(*array.ListView).ListValues().(*array.Int32) - assert.Equal(t, 3, allVals.Len(), "expected 3 total values, got %d", allVals.Len()) + assert.Equal(t, 5, allVals.Len()) }) - t.Run("nil_pointer_listview_element", func(t *testing.T) { + t.Run("nil_pointer_view_element", func(t *testing.T) { a := []int32{1, 2} - vals := []*[]int32{&a, nil, &a} - arr := mustBuildArray(t, vals, tagOpts{ListView: true}, mem) - assert.Equal(t, arrow.LIST_VIEW, arr.DataType().ID()) - assertMultiLevelPtrNullPattern(t, arr) + vals := []*[]int32{&a, nil} + arr := mustBuildArray(t, vals, tagOpts{View: true}, mem) + assert.True(t, arr.IsNull(1)) }) } @@ -1044,12 +1057,10 @@ func TestBuildLargeTypes(t *testing.T) { assert.Equal(t, arrow.LARGE_BINARY, ll.Elem().ID()) }) - t.Run("listview+large→LARGE_LIST_VIEW", func(t *testing.T) { - opts := tagOpts{Large: true, ListView: true} + t.Run("view+large→LARGE_LIST_VIEW", func(t *testing.T) { + opts := tagOpts{Large: true, View: true} arr := mustBuildArray(t, [][]string{{"x"}, {"y", "z"}}, opts, mem) assert.Equal(t, arrow.LARGE_LIST_VIEW, arr.DataType().ID()) - llv := arr.DataType().(*arrow.LargeListViewType) - assert.Equal(t, arrow.LARGE_STRING, llv.Elem().ID()) }) t.Run("map with large", func(t *testing.T) { From b721749134b52b90ba40636e2566db51cc672f51 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 22 Apr 2026 16:00:16 -0400 Subject: [PATCH 78/82] =?UTF-8?q?fix(arreflect):=20LARGE=5FSTRING/BINARY?= =?UTF-8?q?=E2=86=92STRING=5FVIEW/BINARY=5FVIEW=20in=20applyViewOpts;=20me?= =?UTF-8?q?rge=20double=20inferArrowType=20in=20buildArray?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- arrow/array/arreflect/reflect_go_to_arrow.go | 13 +++---------- arrow/array/arreflect/reflect_go_to_arrow_test.go | 5 ++++- arrow/array/arreflect/reflect_infer.go | 4 ++-- arrow/array/arreflect/reflect_infer_test.go | 2 ++ 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/arrow/array/arreflect/reflect_go_to_arrow.go b/arrow/array/arreflect/reflect_go_to_arrow.go index 23d110a6..e2acfabc 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow.go +++ b/arrow/array/arreflect/reflect_go_to_arrow.go @@ -39,22 +39,15 @@ func buildArray(vals reflect.Value, opts tagOpts, mem memory.Allocator) (arrow.A elemType = elemType.Elem() } - if opts.Large { + if opts.Large || opts.View { dt, err := inferArrowType(elemType) if err != nil { return nil, err } - if !hasLargeableType(dt) { + if opts.Large && !hasLargeableType(dt) { return nil, fmt.Errorf("arreflect: large option has no effect on type %s: %w", dt, ErrUnsupportedType) } - } - - if opts.View { - dt, err := inferArrowType(elemType) - if err != nil { - return nil, err - } - if !hasViewableType(dt) { + if opts.View && !hasViewableType(dt) { return nil, fmt.Errorf("arreflect: view option has no effect on type %s: %w", dt, ErrUnsupportedType) } } diff --git a/arrow/array/arreflect/reflect_go_to_arrow_test.go b/arrow/array/arreflect/reflect_go_to_arrow_test.go index 9d58aa47..7f098874 100644 --- a/arrow/array/arreflect/reflect_go_to_arrow_test.go +++ b/arrow/array/arreflect/reflect_go_to_arrow_test.go @@ -1057,10 +1057,13 @@ func TestBuildLargeTypes(t *testing.T) { assert.Equal(t, arrow.LARGE_BINARY, ll.Elem().ID()) }) - t.Run("view+large→LARGE_LIST_VIEW", func(t *testing.T) { + t.Run("view+large→LARGE_LIST_VIEW", func(t *testing.T) { + // large→LARGE_LIST, then view→LARGE_LIST_VIEW; view wins on string elem (no LARGE_STRING_VIEW) opts := tagOpts{Large: true, View: true} arr := mustBuildArray(t, [][]string{{"x"}, {"y", "z"}}, opts, mem) assert.Equal(t, arrow.LARGE_LIST_VIEW, arr.DataType().ID()) + llv := arr.DataType().(*arrow.LargeListViewType) + assert.Equal(t, arrow.STRING_VIEW, llv.Elem().ID()) }) t.Run("map with large", func(t *testing.T) { diff --git a/arrow/array/arreflect/reflect_infer.go b/arrow/array/arreflect/reflect_infer.go index 52bbce4f..f01d28cb 100644 --- a/arrow/array/arreflect/reflect_infer.go +++ b/arrow/array/arreflect/reflect_infer.go @@ -250,9 +250,9 @@ func hasLargeableType(dt arrow.DataType) bool { func applyViewOpts(dt arrow.DataType) arrow.DataType { switch dt.ID() { - case arrow.STRING: + case arrow.STRING, arrow.LARGE_STRING: return arrow.BinaryTypes.StringView - case arrow.BINARY: + case arrow.BINARY, arrow.LARGE_BINARY: return arrow.BinaryTypes.BinaryView case arrow.LIST: return arrow.ListViewOf(applyViewOpts(dt.(*arrow.ListType).Elem())) diff --git a/arrow/array/arreflect/reflect_infer_test.go b/arrow/array/arreflect/reflect_infer_test.go index 2ac3b6f9..e8e58901 100644 --- a/arrow/array/arreflect/reflect_infer_test.go +++ b/arrow/array/arreflect/reflect_infer_test.go @@ -769,6 +769,8 @@ func TestApplyViewOpts(t *testing.T) { }{ {"string→string_view", arrow.BinaryTypes.String, arrow.STRING_VIEW}, {"binary→binary_view", arrow.BinaryTypes.Binary, arrow.BINARY_VIEW}, + {"large_string→string_view", arrow.BinaryTypes.LargeString, arrow.STRING_VIEW}, + {"large_binary→binary_view", arrow.BinaryTypes.LargeBinary, arrow.BINARY_VIEW}, {"list→list_view", arrow.ListOf(arrow.BinaryTypes.String), arrow.LIST_VIEW}, {"int64 unchanged", arrow.PrimitiveTypes.Int64, arrow.INT64}, {"float32 unchanged", arrow.PrimitiveTypes.Float32, arrow.FLOAT32}, From 96dee20834efb319abefcdbca6ea503f22a8529e Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 22 Apr 2026 16:03:03 -0400 Subject: [PATCH 79/82] =?UTF-8?q?feat(arreflect):=20WithView=20round-trip?= =?UTF-8?q?=20tests,=20fix=20WithListView=E2=86=92WithView=20in=20public?= =?UTF-8?q?=20tests,=20add=20examples?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- arrow/array/arreflect/doc.go | 2 +- arrow/array/arreflect/example_test.go | 34 +++++++++++ arrow/array/arreflect/reflect_arrow_to_go.go | 4 +- arrow/array/arreflect/reflect_infer.go | 4 +- .../arreflect/reflect_integration_test.go | 2 +- arrow/array/arreflect/reflect_public_test.go | 61 +++++++++++++++++-- 6 files changed, 97 insertions(+), 10 deletions(-) diff --git a/arrow/array/arreflect/doc.go b/arrow/array/arreflect/doc.go index 07690ca6..31da401e 100644 --- a/arrow/array/arreflect/doc.go +++ b/arrow/array/arreflect/doc.go @@ -42,7 +42,7 @@ // // Additional tag options: // -// arrow:"field,listview" — use ListView instead of List for slice fields +// arrow:"field,view" — use StringView/BinaryView instead of String/Binary for strings, or ListVi for slices // arrow:"field,ree" — run-end encoding at top-level only (struct fields not supported) // arrow:"field,decimal(precision,scale)" — override decimal precision and scale (e.g., arrow:",decimal(18,2)") package arreflect diff --git a/arrow/array/arreflect/example_test.go b/arrow/array/arreflect/example_test.go index 26bcfbe3..8a3a6c64 100644 --- a/arrow/array/arreflect/example_test.go +++ b/arrow/array/arreflect/example_test.go @@ -365,3 +365,37 @@ func ExampleFromSlice_largeStruct() { // Name type: large_utf8 // Code type: int32 } + +func ExampleWithView() { + mem := memory.NewGoAllocator() + + arr, err := arreflect.FromSlice([]string{"hello", "world"}, mem, arreflect.WithView()) + if err != nil { + panic(err) + } + defer arr.Release() + + fmt.Println("Type:", arr.DataType()) + fmt.Println("Len:", arr.Len()) + // Output: + // Type: string_view + // Len: 2 +} + +func ExampleFromSlice_viewStruct() { + type Event struct { + Name string `arrow:"name,view"` + Code int32 `arrow:"code"` + } + + schema, err := arreflect.InferSchema[Event]() + if err != nil { + panic(err) + } + fmt.Println("Schema:", schema) + // Output: + // Schema: schema: + // fields: 2 + // - name: type=string_view + // - code: type=int32 +} diff --git a/arrow/array/arreflect/reflect_arrow_to_go.go b/arrow/array/arreflect/reflect_arrow_to_go.go index 7fe39b7e..c5b1288b 100644 --- a/arrow/array/arreflect/reflect_arrow_to_go.go +++ b/arrow/array/arreflect/reflect_arrow_to_go.go @@ -75,7 +75,7 @@ func setValue(v reflect.Value, arr arrow.Array, i int) error { arrow.FLOAT32, arrow.FLOAT64: return setPrimitiveValue(v, arr, i) - case arrow.STRING, arrow.LARGE_STRING: + case arrow.STRING, arrow.LARGE_STRING, arrow.STRING_VIEW: type stringer interface{ Value(int) string } a, ok := arr.(stringer) if !ok { @@ -86,7 +86,7 @@ func setValue(v reflect.Value, arr arrow.Array, i int) error { } v.SetString(strings.Clone(a.Value(i))) - case arrow.BINARY, arrow.LARGE_BINARY: + case arrow.BINARY, arrow.LARGE_BINARY, arrow.BINARY_VIEW: type byter interface{ Value(int) []byte } a, ok := arr.(byter) if !ok { diff --git a/arrow/array/arreflect/reflect_infer.go b/arrow/array/arreflect/reflect_infer.go index f01d28cb..b3c4fa86 100644 --- a/arrow/array/arreflect/reflect_infer.go +++ b/arrow/array/arreflect/reflect_infer.go @@ -429,9 +429,9 @@ func InferGoType(dt arrow.DataType) (reflect.Type, error) { return typeOfFloat64, nil case arrow.BOOL: return typeOfBool, nil - case arrow.STRING, arrow.LARGE_STRING: + case arrow.STRING, arrow.LARGE_STRING, arrow.STRING_VIEW: return typeOfString, nil - case arrow.BINARY, arrow.LARGE_BINARY: + case arrow.BINARY, arrow.LARGE_BINARY, arrow.BINARY_VIEW: return typeOfByteSlice, nil case arrow.TIMESTAMP, arrow.DATE32, arrow.DATE64, arrow.TIME32, arrow.TIME64: return typeOfTime, nil diff --git a/arrow/array/arreflect/reflect_integration_test.go b/arrow/array/arreflect/reflect_integration_test.go index adc0ed01..e958dc25 100644 --- a/arrow/array/arreflect/reflect_integration_test.go +++ b/arrow/array/arreflect/reflect_integration_test.go @@ -358,7 +358,7 @@ func TestReflectIntegration(t *testing.T) { t.Run("listview_struct_field_roundtrip", func(t *testing.T) { type Row struct { Name string `arrow:"name"` - Tags []string `arrow:"tags,listview"` + Tags []string `arrow:"tags,view"` } rows := []Row{ {"alice", []string{"admin", "user"}}, diff --git a/arrow/array/arreflect/reflect_public_test.go b/arrow/array/arreflect/reflect_public_test.go index d075ea7a..ba8cc997 100644 --- a/arrow/array/arreflect/reflect_public_test.go +++ b/arrow/array/arreflect/reflect_public_test.go @@ -239,7 +239,7 @@ func TestFromGoSlice(t *testing.T) { }) t.Run("empty slice with WithListView", func(t *testing.T) { - arr, err := FromSlice([][]int32{}, mem, WithListView()) + arr, err := FromSlice([][]int32{}, mem, WithView()) require.NoError(t, err) defer arr.Release() @@ -285,9 +285,9 @@ func TestFromGoSlice(t *testing.T) { opts []Option }{ {"WithDict+WithREE", []Option{WithDict(), WithREE()}}, - {"WithDict+WithListView", []Option{WithDict(), WithListView()}}, - {"WithREE+WithListView", []Option{WithREE(), WithListView()}}, - {"all three", []Option{WithDict(), WithREE(), WithListView()}}, + {"WithDict+WithView", []Option{WithDict(), WithView()}}, + {"WithREE+WithView", []Option{WithREE(), WithView()}}, + {"all three", []Option{WithDict(), WithREE(), WithView()}}, } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { @@ -746,3 +746,56 @@ func TestUnknownTagOptionError(t *testing.T) { assert.ErrorIs(t, err, ErrUnsupportedType) }) } + +func TestWithViewRoundTrip(t *testing.T) { + mem := testMem() + + t.Run("[]string WithView round-trips via ToSlice", func(t *testing.T) { + input := []string{"alpha", "beta", "gamma"} + arr, err := FromSlice(input, mem, WithView()) + require.NoError(t, err) + defer arr.Release() + + assert.Equal(t, arrow.STRING_VIEW, arr.DataType().ID()) + + got, err := ToSlice[string](arr) + require.NoError(t, err) + assert.Equal(t, input, got) + }) + + t.Run("[][]string WithView produces LIST_VIEW", func(t *testing.T) { + input := [][]string{{"a", "b"}, {"c"}} + arr, err := FromSlice(input, mem, WithView()) + require.NoError(t, err) + defer arr.Release() + + assert.Equal(t, arrow.LIST_VIEW, arr.DataType().ID()) + lv := arr.DataType().(*arrow.ListViewType) + assert.Equal(t, arrow.STRING_VIEW, lv.Elem().ID()) + }) + + t.Run("struct with view tag round-trips", func(t *testing.T) { + type Row struct { + Label string `arrow:"label,view"` + Count int32 `arrow:"count"` + } + input := []Row{{"a", 1}, {"b", 2}} + arr, err := FromSlice(input, mem) + require.NoError(t, err) + defer arr.Release() + + sa := arr.(*array.Struct) + assert.Equal(t, arrow.STRING_VIEW, sa.Field(0).DataType().ID()) + assert.Equal(t, arrow.INT32, sa.Field(1).DataType().ID()) + + got, err := ToSlice[Row](arr) + require.NoError(t, err) + assert.Equal(t, input, got) + }) + + t.Run("WithView on int64 errors", func(t *testing.T) { + _, err := FromSlice([]int64{1}, mem, WithView()) + require.Error(t, err) + assert.ErrorIs(t, err, ErrUnsupportedType) + }) +} From e573f55fe56f2177981ad843da37895cb3a722b4 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 22 Apr 2026 16:06:22 -0400 Subject: [PATCH 80/82] fix(arreflect): fix truncated doc comment; rename stale WithListView test name --- arrow/array/arreflect/doc.go | 2 +- arrow/array/arreflect/reflect_public_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arrow/array/arreflect/doc.go b/arrow/array/arreflect/doc.go index 31da401e..7730810b 100644 --- a/arrow/array/arreflect/doc.go +++ b/arrow/array/arreflect/doc.go @@ -42,7 +42,7 @@ // // Additional tag options: // -// arrow:"field,view" — use StringView/BinaryView instead of String/Binary for strings, or ListVi for slices +// arrow:"field,view" — use STRING_VIEW/BINARY_VIEW for string/bytes fields, or LIST_VIEW for slice fields // arrow:"field,ree" — run-end encoding at top-level only (struct fields not supported) // arrow:"field,decimal(precision,scale)" — override decimal precision and scale (e.g., arrow:",decimal(18,2)") package arreflect diff --git a/arrow/array/arreflect/reflect_public_test.go b/arrow/array/arreflect/reflect_public_test.go index ba8cc997..8767634f 100644 --- a/arrow/array/arreflect/reflect_public_test.go +++ b/arrow/array/arreflect/reflect_public_test.go @@ -238,7 +238,7 @@ func TestFromGoSlice(t *testing.T) { assert.Equal(t, 0, arr.Len()) }) - t.Run("empty slice with WithListView", func(t *testing.T) { + t.Run("empty slice with WithView", func(t *testing.T) { arr, err := FromSlice([][]int32{}, mem, WithView()) require.NoError(t, err) defer arr.Release() From d976f0884ac5b41769500003757f3b4de419aed3 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 22 Apr 2026 16:15:50 -0400 Subject: [PATCH 81/82] fix(arreflect): hasViewableType accepts LARGE_STRING/BINARY; add large+view empty-slice regression test --- arrow/array/arreflect/reflect_infer.go | 2 +- arrow/array/arreflect/reflect_infer_test.go | 6 ++++++ arrow/array/arreflect/reflect_test.go | 8 ++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/arrow/array/arreflect/reflect_infer.go b/arrow/array/arreflect/reflect_infer.go index b3c4fa86..cc7cae51 100644 --- a/arrow/array/arreflect/reflect_infer.go +++ b/arrow/array/arreflect/reflect_infer.go @@ -284,7 +284,7 @@ func applyViewOpts(dt arrow.DataType) arrow.DataType { func hasViewableType(dt arrow.DataType) bool { switch dt.ID() { - case arrow.STRING, arrow.BINARY, arrow.LIST: + case arrow.STRING, arrow.BINARY, arrow.LARGE_STRING, arrow.LARGE_BINARY, arrow.LIST: return true case arrow.STRUCT: st := dt.(*arrow.StructType) diff --git a/arrow/array/arreflect/reflect_infer_test.go b/arrow/array/arreflect/reflect_infer_test.go index e8e58901..16617acf 100644 --- a/arrow/array/arreflect/reflect_infer_test.go +++ b/arrow/array/arreflect/reflect_infer_test.go @@ -845,4 +845,10 @@ func TestHasViewableType(t *testing.T) { t.Run("map with string key is true", func(t *testing.T) { assert.True(t, hasViewableType(arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int64))) }) + t.Run("large_string is true", func(t *testing.T) { + assert.True(t, hasViewableType(arrow.BinaryTypes.LargeString)) + }) + t.Run("large_binary is true", func(t *testing.T) { + assert.True(t, hasViewableType(arrow.BinaryTypes.LargeBinary)) + }) } diff --git a/arrow/array/arreflect/reflect_test.go b/arrow/array/arreflect/reflect_test.go index f5752e5a..b0ad487c 100644 --- a/arrow/array/arreflect/reflect_test.go +++ b/arrow/array/arreflect/reflect_test.go @@ -308,6 +308,14 @@ func TestBuildEmptyTyped(t *testing.T) { assert.Equal(t, arrow.STRING_VIEW, llv.Elem().ID()) }) + t.Run("large_view_string_empty", func(t *testing.T) { + // large applied first: STRING→LARGE_STRING; then view: LARGE_STRING→STRING_VIEW + arr, err := buildEmptyTyped(reflect.TypeOf(""), tagOpts{Large: true, View: true}, mem) + require.NoError(t, err) + defer arr.Release() + assert.Equal(t, arrow.STRING_VIEW, arr.DataType().ID()) + }) + t.Run("dict_with_unsupported_value_type_errors", func(t *testing.T) { _, err := buildEmptyTyped(reflect.TypeOf(time.Time{}), tagOpts{Dict: true}, mem) require.Error(t, err) From c8ba88ee2f9926aa5074170d9970f94456b67233 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 22 Apr 2026 16:25:43 -0400 Subject: [PATCH 82/82] fix(arreflect): hasViewableType accepts all view/large-list/view-typed variants; add idempotency regression test --- arrow/array/arreflect/reflect_infer.go | 4 +++- arrow/array/arreflect/reflect_infer_test.go | 15 +++++++++++++++ arrow/array/arreflect/reflect_public_test.go | 19 +++++++++++++++++++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/arrow/array/arreflect/reflect_infer.go b/arrow/array/arreflect/reflect_infer.go index cc7cae51..811edf11 100644 --- a/arrow/array/arreflect/reflect_infer.go +++ b/arrow/array/arreflect/reflect_infer.go @@ -284,7 +284,9 @@ func applyViewOpts(dt arrow.DataType) arrow.DataType { func hasViewableType(dt arrow.DataType) bool { switch dt.ID() { - case arrow.STRING, arrow.BINARY, arrow.LARGE_STRING, arrow.LARGE_BINARY, arrow.LIST: + case arrow.STRING, arrow.BINARY, arrow.LARGE_STRING, arrow.LARGE_BINARY, + arrow.STRING_VIEW, arrow.BINARY_VIEW, + arrow.LIST, arrow.LIST_VIEW, arrow.LARGE_LIST, arrow.LARGE_LIST_VIEW: return true case arrow.STRUCT: st := dt.(*arrow.StructType) diff --git a/arrow/array/arreflect/reflect_infer_test.go b/arrow/array/arreflect/reflect_infer_test.go index 16617acf..744d296f 100644 --- a/arrow/array/arreflect/reflect_infer_test.go +++ b/arrow/array/arreflect/reflect_infer_test.go @@ -851,4 +851,19 @@ func TestHasViewableType(t *testing.T) { t.Run("large_binary is true", func(t *testing.T) { assert.True(t, hasViewableType(arrow.BinaryTypes.LargeBinary)) }) + t.Run("string_view is true", func(t *testing.T) { + assert.True(t, hasViewableType(arrow.BinaryTypes.StringView)) + }) + t.Run("binary_view is true", func(t *testing.T) { + assert.True(t, hasViewableType(arrow.BinaryTypes.BinaryView)) + }) + t.Run("list_view is true", func(t *testing.T) { + assert.True(t, hasViewableType(arrow.ListViewOf(arrow.PrimitiveTypes.Int64))) + }) + t.Run("large_list is true", func(t *testing.T) { + assert.True(t, hasViewableType(arrow.LargeListOf(arrow.PrimitiveTypes.Int64))) + }) + t.Run("large_list_view is true", func(t *testing.T) { + assert.True(t, hasViewableType(arrow.LargeListViewOf(arrow.PrimitiveTypes.Int64))) + }) } diff --git a/arrow/array/arreflect/reflect_public_test.go b/arrow/array/arreflect/reflect_public_test.go index 8767634f..edb3adf6 100644 --- a/arrow/array/arreflect/reflect_public_test.go +++ b/arrow/array/arreflect/reflect_public_test.go @@ -798,4 +798,23 @@ func TestWithViewRoundTrip(t *testing.T) { require.Error(t, err) assert.ErrorIs(t, err, ErrUnsupportedType) }) + + t.Run("struct with view-tagged fields via WithView is idempotent", func(t *testing.T) { + // Fields already tagged ,view infer to STRING_VIEW; WithView() should still + // accept the struct and the top-level applyViewOpts walk is a no-op on views. + type Row struct { + Name string `arrow:"name,view"` + Code int32 `arrow:"code"` + } + input := []Row{{"click", 1}, {"view", 2}} + arr, err := FromSlice(input, mem, WithView()) + require.NoError(t, err) + defer arr.Release() + sa := arr.(*array.Struct) + assert.Equal(t, arrow.STRING_VIEW, sa.Field(0).DataType().ID()) + + got, err := ToSlice[Row](arr) + require.NoError(t, err) + assert.Equal(t, input, got) + }) }