Skip to content

Commit

Permalink
GH-32950: [Go] REE Benchmarks (#34666)
Browse files Browse the repository at this point in the history
### Rationale for this change
Adding benchmarks for `run_end_encode` and `run_end_decode` to track and hopefully improve the performance of these kernels.

* Closes: #32950

Authored-by: Matt Topol <zotthewizard@gmail.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
  • Loading branch information
zeroshade committed Mar 22, 2023
1 parent fd5d710 commit a1153a8
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 9 deletions.
4 changes: 2 additions & 2 deletions go/arrow/compute/internal/kernels/vector_run_end_encode.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ func (re *runEndEncodeLoopFixedWidth[R, V]) WriteEncodedRuns(out *exec.ExecResul
readOffset++

var writeOffset int64
var value V
for readOffset < re.inputOffset+re.inputLen {
var value V
valid := re.readValue(re.inputValidity, re.inputValues, readOffset, &value)
if valid != curRunValid || value != currentRun {
// close the current run by writing it out
Expand Down Expand Up @@ -122,8 +122,8 @@ func (re *runEndEncodeLoopFixedWidth[R, V]) CountNumberOfRuns() (numValid, numOu
}
numOutput = 1

var value V
for offset < re.inputOffset+re.inputLen {
var value V
valid := re.readValue(re.inputValidity, re.inputValues, offset, &value)
offset++
// new run
Expand Down
126 changes: 126 additions & 0 deletions go/arrow/compute/vector_run_end_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,17 @@ package compute_test

import (
"context"
"fmt"
"math"
"strings"
"testing"

"github.com/apache/arrow/go/v12/arrow"
"github.com/apache/arrow/go/v12/arrow/array"
"github.com/apache/arrow/go/v12/arrow/bitutil"
"github.com/apache/arrow/go/v12/arrow/compute"
"github.com/apache/arrow/go/v12/arrow/compute/internal/exec"
"github.com/apache/arrow/go/v12/arrow/internal/testing/gen"
"github.com/apache/arrow/go/v12/arrow/memory"
"github.com/stretchr/testify/suite"
)
Expand Down Expand Up @@ -295,3 +299,125 @@ func TestRunEndFunctions(t *testing.T) {
})
}
}

func benchRunEndEncode(b *testing.B, sz int, nullProb float64, runEndType, valueType arrow.DataType) {
b.Run("encode", func(b *testing.B) {
var (
mem = memory.NewCheckedAllocator(memory.DefaultAllocator)
rng = gen.NewRandomArrayGenerator(seed, mem)
)

values := rng.ArrayOf(valueType.ID(), int64(sz), nullProb)
b.Cleanup(func() {
values.Release()
})

var (
res compute.Datum
err error
ctx = compute.WithAllocator(context.Background(), mem)
input = &compute.ArrayDatum{Value: values.Data()}
opts = compute.RunEndEncodeOptions{RunEndType: runEndType}

byts int64
)

for _, buf := range values.Data().Buffers() {
if buf != nil {
byts += int64(buf.Len())
}
}

b.SetBytes(byts)
b.ResetTimer()
for n := 0; n < b.N; n++ {
res, err = compute.RunEndEncode(ctx, opts, input)
b.StopTimer()
if err != nil {
b.Fatal(err)
}
res.Release()
b.StartTimer()
}
})
}

func benchRunEndDecode(b *testing.B, sz int, nullProb float64, runEndType, valueType arrow.DataType) {
b.Run("decode", func(b *testing.B) {
var (
mem = memory.NewCheckedAllocator(memory.DefaultAllocator)
rng = gen.NewRandomArrayGenerator(seed, mem)
)

values := rng.ArrayOf(valueType.ID(), int64(sz), nullProb)
b.Cleanup(func() {
values.Release()
})

var (
res compute.Datum
ctx = compute.WithAllocator(context.Background(), mem)
opts = compute.RunEndEncodeOptions{RunEndType: runEndType}
input, err = compute.RunEndEncode(ctx, opts, &compute.ArrayDatum{Value: values.Data()})
byts int64
)

if err != nil {
b.Fatal(err)
}

for _, buf := range values.Data().Buffers() {
if buf != nil {
byts += int64(buf.Len())
}
}

b.SetBytes(byts)
b.ResetTimer()
for n := 0; n < b.N; n++ {
res, err = compute.RunEndDecode(ctx, input)
b.StopTimer()
if err != nil {
b.Fatal(err)
}
res.Release()
b.StartTimer()
}
})
}

func BenchmarkRunEndKernels(b *testing.B) {
args := []struct {
sz int
nullProb float64
}{
{CpuCacheSizes[2], 0},
{CpuCacheSizes[2], 0.5},
{CpuCacheSizes[2], 1},
}

runEnds := []struct {
dt arrow.DataType
maxLen int
}{
{arrow.PrimitiveTypes.Int16, math.MaxInt16},
{arrow.PrimitiveTypes.Int32, math.MaxInt32},
{arrow.PrimitiveTypes.Int64, math.MaxInt64},
}

for _, a := range args {
b.Run(fmt.Sprintf("nullprob=%.1f", a.nullProb), func(b *testing.B) {
for _, runEndType := range runEnds {
sz := exec.Min(a.sz, runEndType.maxLen)
b.Run("run_ends_type="+runEndType.dt.String(), func(b *testing.B) {
for _, valType := range append(numericTypes, arrow.BinaryTypes.String, arrow.FixedWidthTypes.Boolean) {
b.Run("value_type="+valType.String(), func(b *testing.B) {
benchRunEndEncode(b, sz, a.nullProb, runEndType.dt, valType)
benchRunEndDecode(b, sz, a.nullProb, runEndType.dt, valType)
})
}
})
}
})
}
}
22 changes: 15 additions & 7 deletions go/arrow/internal/testing/gen/random_array_gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ func (r *RandomArrayGenerator) Uint8(size int64, min, max uint8, prob float64) a
dist := rand.New(rand.NewSource(r.seed + r.extra))
out := arrow.Uint8Traits.CastFromBytes(buffers[1].Bytes())
for i := int64(0); i < size; i++ {
out[i] = uint8(dist.Intn(int(max-min+1))) + min
out[i] = uint8(dist.Intn(int(max)-int(min)+1)) + min
}

data := array.NewData(arrow.PrimitiveTypes.Uint8, int(size), buffers, nil, int(nullcount), 0)
Expand All @@ -144,7 +144,7 @@ func (r *RandomArrayGenerator) Int16(size int64, min, max int16, prob float64) a
dist := rand.New(rand.NewSource(r.seed + r.extra))
out := arrow.Int16Traits.CastFromBytes(buffers[1].Bytes())
for i := int64(0); i < size; i++ {
out[i] = int16(dist.Intn(int(max-min+1))) + min
out[i] = int16(dist.Intn(int(max)-int(min)+1)) + min
}

data := array.NewData(arrow.PrimitiveTypes.Int16, int(size), buffers, nil, int(nullcount), 0)
Expand All @@ -162,7 +162,7 @@ func (r *RandomArrayGenerator) Uint16(size int64, min, max uint16, prob float64)
dist := rand.New(rand.NewSource(r.seed + r.extra))
out := arrow.Uint16Traits.CastFromBytes(buffers[1].Bytes())
for i := int64(0); i < size; i++ {
out[i] = uint16(dist.Intn(int(max-min+1))) + min
out[i] = uint16(dist.Intn(int(max)-int(min)+1)) + min
}

data := array.NewData(arrow.PrimitiveTypes.Uint16, int(size), buffers, nil, int(nullcount), 0)
Expand All @@ -180,7 +180,7 @@ func (r *RandomArrayGenerator) Int32(size int64, min, max int32, prob float64) a
dist := rand.New(rand.NewSource(r.seed + r.extra))
out := arrow.Int32Traits.CastFromBytes(buffers[1].Bytes())
for i := int64(0); i < size; i++ {
out[i] = dist.Int31n(max-min+1) + min
out[i] = int32(dist.Intn(int(max)-int(min)+1)) + min
}

data := array.NewData(arrow.PrimitiveTypes.Int32, int(size), buffers, nil, int(nullcount), 0)
Expand All @@ -198,7 +198,7 @@ func (r *RandomArrayGenerator) Uint32(size int64, min, max uint32, prob float64)
dist := rand.New(rand.NewSource(r.seed + r.extra))
out := arrow.Uint32Traits.CastFromBytes(buffers[1].Bytes())
for i := int64(0); i < size; i++ {
out[i] = uint32(dist.Uint64n(uint64(max-min+1))) + min
out[i] = uint32(dist.Uint64n(uint64(max)-uint64(min)+1)) + min
}

data := array.NewData(arrow.PrimitiveTypes.Uint32, int(size), buffers, nil, int(nullcount), 0)
Expand Down Expand Up @@ -239,8 +239,14 @@ func (r *RandomArrayGenerator) Uint64(size int64, min, max uint64, prob float64)
r.extra++
dist := rand.New(rand.NewSource(r.seed + r.extra))
out := arrow.Uint64Traits.CastFromBytes(buffers[1].Bytes())
for i := int64(0); i < size; i++ {
out[i] = dist.Uint64n(max-min+1) + min
if max == math.MaxUint64 {
for i := int64(0); i < size; i++ {
out[i] = dist.Uint64() + min
}
} else {
for i := int64(0); i < size; i++ {
out[i] = dist.Uint64n(max-min+1) + min
}
}

data := array.NewData(arrow.PrimitiveTypes.Uint64, int(size), buffers, nil, int(nullcount), 0)
Expand Down Expand Up @@ -372,6 +378,8 @@ func (r *RandomArrayGenerator) Numeric(dt arrow.Type, size int64, min, max int64

func (r *RandomArrayGenerator) ArrayOf(dt arrow.Type, size int64, nullprob float64) arrow.Array {
switch dt {
case arrow.BOOL:
return r.Boolean(size, 0.50, nullprob)
case arrow.STRING:
return r.String(size, 0, 20, nullprob)
case arrow.LARGE_STRING:
Expand Down

0 comments on commit a1153a8

Please sign in to comment.