Skip to content

Commit

Permalink
add godoc strings and other extra comments
Browse files Browse the repository at this point in the history
  • Loading branch information
zeroshade committed Aug 25, 2022
1 parent 49317fd commit 672e4d8
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 25 deletions.
23 changes: 22 additions & 1 deletion go/arrow/compute/cast.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ func newCastFunction(name string, outType arrow.Type) *castFunction {
}

func (cf *castFunction) AddTypeCast(in arrow.Type, kernel exec.ScalarKernel) error {
kernel.Init = exec.OptionsInit[CastOptions]
kernel.Init = exec.OptionsInit[kernels.CastState]
if err := cf.AddKernel(kernel); err != nil {
return err
}
Expand Down Expand Up @@ -167,10 +167,31 @@ func getBooleanCasts() []*castFunction {
return []*castFunction{fn}
}

// CastDatum is a convenience function for casting a Datum to another type.
// It is equivalent to calling CallFunction(ctx, "cast", opts, Datum) and
// should work for Scalar, Array or ChunkedArray Datums.
func CastDatum(ctx context.Context, val Datum, opts *CastOptions) (Datum, error) {
return CallFunction(ctx, "cast", opts, val)
}

// CastArray is a convenience function for casting an Array to another type.
// It is equivalent to constructing a Datum for the array and using
// CallFunction(ctx, "cast", ...).
func CastArray(ctx context.Context, val arrow.Array, opts *CastOptions) (arrow.Array, error) {
d := NewDatum(val)
defer d.Release()

out, err := CastDatum(ctx, d, opts)
if err != nil {
return nil, err
}

defer out.Release()
return out.(*ArrayDatum).MakeArray(), nil
}

// CanCast returns true if there is an implementation for casting an array
// or scalar value from the specified DataType to the other data type.
func CanCast(from, to arrow.DataType) bool {
fn, err := getCastFunction(to)
if err != nil {
Expand Down
17 changes: 5 additions & 12 deletions go/arrow/compute/cast_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,7 @@ func checkCast(t *testing.T, input arrow.Array, exp arrow.Array, opts compute.Ca
}

func checkCastFails(t *testing.T, input arrow.Array, opt compute.CastOptions) {
d := compute.NewDatum(input)
defer d.Release()
_, err := compute.CastDatum(context.Background(), d, &opt)
_, err := compute.CastArray(context.Background(), input, &opt)
assert.ErrorIs(t, err, arrow.ErrInvalid)

// for scalars, check that at least one of the input fails
Expand All @@ -183,19 +181,14 @@ func checkCastFails(t *testing.T, input arrow.Array, opt compute.CastOptions) {
}

func checkCastZeroCopy(t *testing.T, input arrow.Array, toType arrow.DataType, opts *compute.CastOptions) {
d := compute.NewDatum(input)
defer d.Release()
opts.ToType = toType
out, err := compute.CastDatum(context.Background(), d, opts)
out, err := compute.CastArray(context.Background(), input, opts)
assert.NoError(t, err)
defer out.Release()

outArr := out.(*compute.ArrayDatum).MakeArray()
defer outArr.Release()

assert.Len(t, outArr.Data().Buffers(), len(input.Data().Buffers()))
for i := range outArr.Data().Buffers() {
assertBufferSame(t, outArr, input, i)
assert.Len(t, out.Data().Buffers(), len(input.Data().Buffers()))
for i := range out.Data().Buffers() {
assertBufferSame(t, out, input, i)
}
}

Expand Down
15 changes: 15 additions & 0 deletions go/arrow/compute/functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -307,14 +307,29 @@ func (s *ScalarFunction) Execute(ctx context.Context, opts FunctionOptions, args
return execInternal(ctx, s, opts, -1, args...)
}

// MetaFunctionImpl is the signature needed for implementing a MetaFunction
// which is a function that dispatches to another function instead.
type MetaFunctionImpl func(context.Context, FunctionOptions, ...Datum) (Datum, error)

// MetaFunction is a function which dispatches to other functions, the impl
// must not be nil.
//
// For Array, ChunkedArray and Scalar datums, this may rely on the execution
// of concrete function types, but this must handle other Datum kinds on its
// own.
type MetaFunction struct {
baseFunction
impl MetaFunctionImpl
}

// NewMetaFunction constructs a new MetaFunction which will call the provided
// impl for dispatching with the expected arity.
//
// Will panic if impl is nil.
func NewMetaFunction(name string, arity Arity, doc FunctionDoc, impl MetaFunctionImpl) Function {
if impl == nil {
panic("arrow/compute: cannot construct MetaFunction with nil impl")
}
return &MetaFunction{
baseFunction: baseFunction{
name: name,
Expand Down
2 changes: 1 addition & 1 deletion go/arrow/compute/internal/exec/span.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ func (a *ArraySpan) MakeData() arrow.ArrayData {
)

if a.Type.ID() == arrow.NULL {
nulls = int(length)
nulls = length
} else if len(a.Buffers[0].Buf) == 0 {
nulls = 0
}
Expand Down
6 changes: 6 additions & 0 deletions go/arrow/compute/internal/exec/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,12 @@ func Min[T constraints.Ordered](a, b T) T {
return b
}

// OptionsInit should be used in the case where a KernelState is simply
// represented with a specific type by value (instead of pointer).
// This will initialize the KernelState as a value-copied instance of
// the passed in function options argument to ensure separation
// and allow the kernel to manipulate the options if necessary without
// any negative consequences since it will have its own copy of the options.
func OptionsInit[T any](_ *KernelCtx, args KernelInitArgs) (KernelState, error) {
if opts, ok := args.Options.(*T); ok {
return *opts, nil
Expand Down
6 changes: 4 additions & 2 deletions go/arrow/compute/internal/kernels/boolean_cast.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ func isNonZero[T exec.FixedWidthTypes](ctx *exec.KernelCtx, in []T, out []byte)
return nil
}

// GetBooleanCastKernels returns the slice of scalar kernels for casting
// values *to* a boolean type.
func GetBooleanCastKernels() []exec.ScalarKernel {
kernels := GetCommonCastKernels(arrow.BOOL, arrow.FixedWidthTypes.Boolean)
kernels = append(kernels, GetZeroCastKernel(arrow.BOOL,
Expand Down Expand Up @@ -74,12 +76,12 @@ func GetBooleanCastKernels() []exec.ScalarKernel {
var ex exec.ArrayKernelExec
switch ty.ID() {
case arrow.BINARY, arrow.STRING:
ex = ScalarUnaryNotNullBinaryArgBoolOut[int32](func(_ *exec.KernelCtx, b []byte) (bool, error) {
ex = ScalarUnaryNotNullBinaryArgBoolOut[int32](false, func(_ *exec.KernelCtx, b []byte) (bool, error) {
v := *(*string)(unsafe.Pointer(&b))
return strconv.ParseBool(v)
})
case arrow.LARGE_BINARY, arrow.LARGE_STRING:
ex = ScalarUnaryNotNullBinaryArgBoolOut[int64](func(_ *exec.KernelCtx, b []byte) (bool, error) {
ex = ScalarUnaryNotNullBinaryArgBoolOut[int64](false, func(_ *exec.KernelCtx, b []byte) (bool, error) {
v := *(*string)(unsafe.Pointer(&b))
return strconv.ParseBool(v)
})
Expand Down
31 changes: 24 additions & 7 deletions go/arrow/compute/internal/kernels/cast.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,16 @@ type CastOptions struct {

func (CastOptions) TypeName() string { return "CastOptions" }

// CastState is the kernel state for Cast functions, it is an alias to
// the CastOptions object.
type CastState = CastOptions

func ZeroCopyCastExec(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error {
// ZeroCopyCastExec is a kernel for performing a cast which can be executed
// as a zero-copy operation. It simply forwards the buffers to the output.
//
// This can be used for casting a type to itself, or for casts between
// equivalent representations such as Int32 and Date32.
func ZeroCopyCastExec(_ *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error {
dt := out.Type
*out = batch.Values[0].Array
out.Type = dt
Expand All @@ -58,6 +65,9 @@ func recursiveSetSelfAlloc(arr *exec.ArraySpan) {
}
}

// CastFromNull is a simple kernel for constructing an array of null values
// for the requested data type, allowing casting of an arrow.Null typed value
// to any other arbitrary data type.
func CastFromNull(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error {
arr := array.MakeArrayOfNull(exec.GetAllocator(ctx.Ctx), out.Type, int(batch.Len))
defer arr.Release()
Expand All @@ -67,7 +77,9 @@ func CastFromNull(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResul
return nil
}

func OutputAllNull(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error {
// OutputAllNull is a simple kernel that initializes the output as an array
// whose output is all null by setting nulls to the length.
func OutputAllNull(_ *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error {
out.Nulls = batch.Len
return nil
}
Expand All @@ -76,13 +88,18 @@ func canCastFromDict(id arrow.Type) bool {
return arrow.IsPrimitive(id) || arrow.IsBaseBinary(id) || arrow.IsFixedSizeBinary(id)
}

// GetZeroCastKernel returns a kernel for performing ZeroCast execution using
// the ZeroCopyCastExec kernel function.
func GetZeroCastKernel(inID arrow.Type, inType exec.InputType, out exec.OutputType) exec.ScalarKernel {
k := exec.NewScalarKernel([]exec.InputType{inType}, out, ZeroCopyCastExec, nil)
k.NullHandling = exec.NullComputedNoPrealloc
k.MemAlloc = exec.MemNoPrealloc
return k
}

// GetCommonCastKernels returns the list of kernels common to all types
// such as casting from null or from Extension types of the appropriate
// underlying type.
func GetCommonCastKernels(outID arrow.Type, outType arrow.DataType) (out []exec.ScalarKernel) {
out = make([]exec.ScalarKernel, 0, 2)

Expand All @@ -92,11 +109,11 @@ func GetCommonCastKernels(outID arrow.Type, outType arrow.DataType) (out []exec.
kernel.MemAlloc = exec.MemNoPrealloc
out = append(out, kernel)

if canCastFromDict(outID) {
// dictionary unpacking not implemented for boolean or nested types
// TODO dict cast
// panic(fmt.Errorf("%w: dictionary casting", arrow.ErrNotImplemented))
}
// if canCastFromDict(outID) {
// dictionary unpacking not implemented for boolean or nested types
// TODO dict cast
// panic(fmt.Errorf("%w: dictionary casting", arrow.ErrNotImplemented))
// }

// Cast from extension
return
Expand Down
30 changes: 28 additions & 2 deletions go/arrow/compute/internal/kernels/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@ import (
"github.com/apache/arrow/go/v10/internal/bitutils"
)

// ScalarUnary returns a kernel for performing a unary operation on
// FixedWidth types which is implemented using the passed in function
// which will receive a slice containing the raw input data along with
// a slice to populate for the output data.
//
// Note that bool is not included in exec.FixedWidthTypes since it is
// represented as a bitmap, not as a slice of bool.
func ScalarUnary[OutT, Arg0T exec.FixedWidthTypes](op func(*exec.KernelCtx, []Arg0T, []OutT) error) exec.ArrayKernelExec {
return func(ctx *exec.KernelCtx, in *exec.ExecSpan, out *exec.ExecResult) error {
arg0 := in.Values[0].Array
Expand All @@ -31,6 +38,9 @@ func ScalarUnary[OutT, Arg0T exec.FixedWidthTypes](op func(*exec.KernelCtx, []Ar
}
}

// ScalarUnaryBoolOutput is like ScalarUnary only it is for cases of boolean
// output. The function should take in a slice of the input type and a slice
// of bytes to fill with the output boolean bitmap.
func ScalarUnaryBoolOutput[Arg0T exec.FixedWidthTypes](op func(*exec.KernelCtx, []Arg0T, []byte) error) exec.ArrayKernelExec {
return func(ctx *exec.KernelCtx, in *exec.ExecSpan, out *exec.ExecResult) error {
arg0 := in.Values[0].Array
Expand All @@ -39,7 +49,15 @@ func ScalarUnaryBoolOutput[Arg0T exec.FixedWidthTypes](op func(*exec.KernelCtx,
}
}

func ScalarUnaryNotNullBinaryArgBoolOut[OffsetT int32 | int64](op func(*exec.KernelCtx, []byte) (bool, error)) exec.ArrayKernelExec {
// ScalarUnaryNotNullBinaryArgBoolOut creates a unary kernel that accepts
// a binary type input (Binary [offset int32], String [offset int32],
// LargeBinary [offset int64], LargeString [offset int64]) and returns
// a boolean output which is never null.
//
// It implements the handling to iterate the offsets and values calling
// the provided function on each byte slice. The provided default value
// will be used as the output for elements of the input that are null.
func ScalarUnaryNotNullBinaryArgBoolOut[OffsetT int32 | int64](defVal bool, op func(*exec.KernelCtx, []byte) (bool, error)) exec.ArrayKernelExec {
return func(ctx *exec.KernelCtx, in *exec.ExecSpan, out *exec.ExecResult) error {
var (
arg0 = in.Values[0].Array
Expand All @@ -59,13 +77,21 @@ func ScalarUnaryNotNullBinaryArgBoolOut[OffsetT int32 | int64](op func(*exec.Ker
bitutil.SetBitTo(outData, int(out.Offset)+outPos, res)
outPos++
}, func() {
bitutil.ClearBit(outData, int(out.Offset)+outPos)
bitutil.SetBitTo(outData, int(out.Offset)+outPos, defVal)
outPos++
})
return err
}
}

// ScalarUnaryNotNullBinaryArg creates a unary kernel that accepts
// a binary type input (Binary [offset int32], String [offset int32],
// LargeBinary [offset int64], LargeString [offset int64]) and returns
// a FixedWidthType output which is never null.
//
// It implements the handling to iterate the offsets and values calling
// the provided function on each byte slice. The zero value of the OutT
// will be used as the output for elements of the input that are null.
func ScalarUnaryNotNullBinaryArg[OutT exec.FixedWidthTypes, OffsetT int32 | int64](op func(*exec.KernelCtx, []byte) (OutT, error)) exec.ArrayKernelExec {
return func(ctx *exec.KernelCtx, in *exec.ExecSpan, out *exec.ExecResult) error {
var (
Expand Down

0 comments on commit 672e4d8

Please sign in to comment.