Skip to content

Commit

Permalink
ARROW-14400: [Go] Equals and ApproxEquals for Tables and Chunked Arrays
Browse files Browse the repository at this point in the history
Closes #11488 from zeroshade/extra-comparisons

Authored-by: Matthew Topol <mtopol@factset.com>
Signed-off-by: Matthew Topol <mtopol@factset.com>
  • Loading branch information
Matthew Topol committed Oct 20, 2021
1 parent a8e1c81 commit 9841dc8
Show file tree
Hide file tree
Showing 2 changed files with 221 additions and 0 deletions.
140 changes: 140 additions & 0 deletions go/arrow/array/compare.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,136 @@ func RecordApproxEqual(left, right Record, opts ...EqualOption) bool {
return true
}

// helper function to evaluate a function on two chunked object having possibly different
// chunk layouts. the function passed in will be called for each corresponding slice of the
// two chunked arrays and if the function returns false it will end the loop early.
func chunkedBinaryApply(left, right *Chunked, fn func(left Interface, lbeg, lend int64, right Interface, rbeg, rend int64) bool) {
var (
pos int64
length int64 = int64(left.length)
leftIdx, rightIdx int
leftPos, rightPos int64
)

for pos < length {
var cleft, cright Interface
for {
cleft, cright = left.Chunk(leftIdx), right.Chunk(rightIdx)
if leftPos == int64(cleft.Len()) {
leftPos = 0
leftIdx++
continue
}
if rightPos == int64(cright.Len()) {
rightPos = 0
rightIdx++
continue
}
break
}

sz := int64(min(cleft.Len()-int(leftPos), cright.Len()-int(rightPos)))
pos += sz
if !fn(cleft, leftPos, leftPos+sz, cright, rightPos, rightPos+sz) {
return
}

leftPos += sz
rightPos += sz
}
}

// ChunkedEqual reports whether two chunked arrays are equal regardless of their chunkings
func ChunkedEqual(left, right *Chunked) bool {
switch {
case left == right:
return true
case left.length != right.length:
return false
case left.nulls != right.nulls:
return false
case !arrow.TypeEqual(left.dtype, right.dtype):
return false
}

var isequal bool
chunkedBinaryApply(left, right, func(left Interface, lbeg, lend int64, right Interface, rbeg, rend int64) bool {
isequal = ArraySliceEqual(left, lbeg, lend, right, rbeg, rend)
return isequal
})

return isequal
}

// ChunkedApproxEqual reports whether two chunked arrays are approximately equal regardless of their chunkings
// for non-floating point arrays, this is equivalent to ChunkedEqual
func ChunkedApproxEqual(left, right *Chunked, opts ...EqualOption) bool {
switch {
case left == right:
return true
case left.length != right.length:
return false
case left.nulls != right.nulls:
return false
case !arrow.TypeEqual(left.dtype, right.dtype):
return false
}

var isequal bool
chunkedBinaryApply(left, right, func(left Interface, lbeg, lend int64, right Interface, rbeg, rend int64) bool {
isequal = ArraySliceApproxEqual(left, lbeg, lend, right, rbeg, rend, opts...)
return isequal
})

return isequal
}

// TableEqual returns if the two tables have the same data in the same schema
func TableEqual(left, right Table) bool {
switch {
case left.NumCols() != right.NumCols():
return false
case left.NumRows() != right.NumRows():
return false
}

for i := 0; int64(i) < left.NumCols(); i++ {
lc := left.Column(i)
rc := right.Column(i)
if !lc.field.Equal(rc.field) {
return false
}

if !ChunkedEqual(lc.data, rc.data) {
return false
}
}
return true
}

// TableEqual returns if the two tables have the approximately equal data in the same schema
func TableApproxEqual(left, right Table, opts ...EqualOption) bool {
switch {
case left.NumCols() != right.NumCols():
return false
case left.NumRows() != right.NumRows():
return false
}

for i := 0; int64(i) < left.NumCols(); i++ {
lc := left.Column(i)
rc := right.Column(i)
if !lc.field.Equal(rc.field) {
return false
}

if !ChunkedApproxEqual(lc.data, rc.data, opts...) {
return false
}
}
return true
}

// ArrayEqual reports whether the two provided arrays are equal.
func ArrayEqual(left, right Interface) bool {
switch {
Expand Down Expand Up @@ -188,6 +318,16 @@ func ArraySliceEqual(left Interface, lbeg, lend int64, right Interface, rbeg, re
return ArrayEqual(l, r)
}

// ArraySliceApproxEqual reports whether slices left[lbeg:lend] and right[rbeg:rend] are approximately equal.
func ArraySliceApproxEqual(left Interface, lbeg, lend int64, right Interface, rbeg, rend int64, opts ...EqualOption) bool {
l := NewSlice(left, lbeg, lend)
defer l.Release()
r := NewSlice(right, rbeg, rend)
defer r.Release()

return ArrayApproxEqual(l, r, opts...)
}

const defaultAbsoluteTolerance = 1e-5

type equalOption struct {
Expand Down
81 changes: 81 additions & 0 deletions go/arrow/array/compare_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,12 @@ import (
"math"
"testing"

"github.com/apache/arrow/go/arrow"
"github.com/apache/arrow/go/arrow/array"
"github.com/apache/arrow/go/arrow/float16"
"github.com/apache/arrow/go/arrow/internal/arrdata"
"github.com/apache/arrow/go/arrow/memory"
"github.com/stretchr/testify/assert"
)

func TestArrayEqual(t *testing.T) {
Expand Down Expand Up @@ -529,3 +531,82 @@ func TestRecordApproxEqual(t *testing.T) {
})
}
}

func TestChunkedEqual(t *testing.T) {
for name, recs := range arrdata.Records {
t.Run(name, func(t *testing.T) {
tbl := array.NewTableFromRecords(recs[0].Schema(), recs)
defer tbl.Release()

for i := 0; i < int(tbl.NumCols()); i++ {
if !array.ChunkedEqual(tbl.Column(i).Data(), tbl.Column(i).Data()) && name != "nulls" {
t.Fatalf("identical chunked arrays should compare as equal:\narr:%v\n", tbl.Column(i).Data())
}
}
})
}
}

func TestChunkedApproxEqual(t *testing.T) {
fb := array.NewFloat64Builder(memory.DefaultAllocator)
defer fb.Release()

fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil)
f1 := fb.NewFloat64Array()
defer f1.Release()

fb.AppendValues([]float64{6, 7}, nil)
f2 := fb.NewFloat64Array()
defer f2.Release()

fb.AppendValues([]float64{8, 9, 10}, nil)
f3 := fb.NewFloat64Array()
defer f3.Release()

c1 := array.NewChunked(
arrow.PrimitiveTypes.Float64,
[]array.Interface{f1, f2, f3},
)
defer c1.Release()

fb.AppendValues([]float64{1, 2, 3}, nil)
f4 := fb.NewFloat64Array()
defer f4.Release()

fb.AppendValues([]float64{4, 5}, nil)
f5 := fb.NewFloat64Array()
defer f5.Release()

fb.AppendValues([]float64{6, 7, 8, 9}, nil)
f6 := fb.NewFloat64Array()
defer f6.Release()

fb.AppendValues([]float64{10}, nil)
f7 := fb.NewFloat64Array()
defer f7.Release()

c2 := array.NewChunked(
arrow.PrimitiveTypes.Float64,
[]array.Interface{f4, f5, f6, f7},
)
defer c2.Release()

assert.True(t, array.ChunkedEqual(c1, c2))
assert.True(t, array.ChunkedApproxEqual(c1, c2))
}

func TestTableEqual(t *testing.T) {
for name, recs := range arrdata.Records {
t.Run(name, func(t *testing.T) {
tbl := array.NewTableFromRecords(recs[0].Schema(), recs)
defer tbl.Release()

if !array.TableEqual(tbl, tbl) {
t.Fatalf("identical tables should compare as equal:\tbl:%v\n", tbl)
}
if !array.TableApproxEqual(tbl, tbl) {
t.Fatalf("identical tables should compare as approx equal:\tbl:%v\n", tbl)
}
})
}
}

0 comments on commit 9841dc8

Please sign in to comment.