-
Notifications
You must be signed in to change notification settings - Fork 22
/
nulls.go
108 lines (94 loc) · 2.71 KB
/
nulls.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
package plugin
import (
"github.com/apache/arrow/go/v15/arrow"
"github.com/apache/arrow/go/v15/arrow/array"
"github.com/apache/arrow/go/v15/arrow/memory"
)
func stripNullsFromLists(list array.ListLike) array.ListLike {
// TODO: handle Arrow maps separately if required
bldr := array.NewBuilder(memory.DefaultAllocator, list.DataType()).(array.ListLikeBuilder)
for j := 0; j < list.Len(); j++ {
if list.IsNull(j) {
bldr.AppendNull()
continue
}
bldr.Append(true)
vBldr := bldr.ValueBuilder()
from, to := list.ValueOffsets(j)
slc := array.NewSlice(list.ListValues(), from, to)
for k := 0; k < int(to-from); k++ {
if slc.IsNull(k) {
continue
}
err := vBldr.AppendValueFromString(slc.ValueStr(k))
if err != nil {
panic(err)
}
}
}
return bldr.NewArray().(array.ListLike)
}
type AllowNullFunc func(arrow.DataType) bool
func (s *WriterTestSuite) replaceNullsByEmpty(arr arrow.Array) arrow.Array {
if s.allowNull == nil {
return arr
}
if !s.allowNull(arr.DataType()) && arr.NullN() > 0 {
builder := array.NewBuilder(memory.DefaultAllocator, arr.DataType())
for j := 0; j < arr.Len(); j++ {
if arr.IsNull(j) {
builder.AppendEmptyValue()
continue
}
if err := builder.AppendValueFromString(arr.ValueStr(j)); err != nil {
panic(err)
}
}
arr = builder.NewArray()
}
// we need to process the nested arrays, too
return s.replaceNullsByEmptyNestedArray(arr)
}
func (s *WriterTestSuite) replaceNullsByEmptyNestedArray(arr arrow.Array) arrow.Array {
if s.allowNull == nil {
return arr
}
switch arr := arr.(type) {
case array.ListLike: // TODO: handle Arrow maps separately if required
values := s.handleNullsArray(arr.ListValues())
return array.MakeFromData(
array.NewData(arr.DataType(), arr.Len(),
arr.Data().Buffers(),
[]arrow.ArrayData{values.Data()},
arr.NullN(), arr.Data().Offset(),
),
)
case *array.Struct:
children := make([]arrow.ArrayData, arr.NumField())
for i := 0; i < arr.NumField(); i++ {
children[i] = s.handleNullsArray(arr.Field(i)).Data()
}
return array.MakeFromData(
array.NewData(arr.DataType(), arr.Len(),
arr.Data().Buffers(),
children,
arr.NullN(), arr.Data().Offset(),
),
)
default:
return arr
}
}
func (s *WriterTestSuite) handleNulls(record arrow.Record) arrow.Record {
cols := record.Columns()
for c, col := range cols {
cols[c] = s.handleNullsArray(col)
}
return array.NewRecord(record.Schema(), cols, record.NumRows())
}
func (s *WriterTestSuite) handleNullsArray(arr arrow.Array) arrow.Array {
if list, ok := arr.(array.ListLike); ok && s.ignoreNullsInLists {
arr = stripNullsFromLists(list) // TODO: handle Arrow maps separately if required
}
return s.replaceNullsByEmpty(arr)
}