Refactor, add expressions, add filter node.

cube2222 · Aug 7, 2023 · 1bb9bbc · 1bb9bbc
1 parent d49fbe7
commit 1bb9bbc
Show file tree

Hide file tree

Showing 7 changed files with 356 additions and 14 deletions.
diff --git a/arrowexec/execution/execution.go b/arrowexec/execution/execution.go
@@ -6,10 +6,18 @@ import (
 	"github.com/apache/arrow/go/v13/arrow"
 )
 
-const BatchSize = 16 * 1024
-
-type Context context.Context
-type ProduceContext context.Context
+// All nodes will try to create batches of approximately this size. Different sizes are allowed.
+const IdealBatchSize = 16 * 1024
+
+type Context struct {
+	Context context.Context
+	// TODO: We'll also need the variable context here.
+	//       Maybe instead of storing a linked list of lists here, we should store references and indices of the underlying arrays?
+	//       Basically, store a reference to each parent scope, the relevant record, and entry index of the value in that record.
+}
+type ProduceContext struct {
+	Context
+}
 
 type Node interface {
 	Run(ctx Context, produce ProduceFunc) error
@@ -20,7 +28,7 @@ type NodeWithMeta struct {
 	Schema *arrow.Schema
 }
 
-type ProduceFunc func(ctx ProduceContext, record Record) error
+type ProduceFunc func(produceCtx ProduceContext, record Record) error
 
 type Record struct {
 	arrow.Record

diff --git a/arrowexec/execution/expression.go b/arrowexec/execution/expression.go
@@ -0,0 +1,39 @@
+package execution
+
+import (
+	"github.com/apache/arrow/go/v13/arrow"
+	"github.com/apache/arrow/go/v13/arrow/scalar"
+)
+
+type ScalarExpression interface {
+	EvaluateScalar(ctx Context) (scalar.Scalar, error)
+}
+
+type Expression interface {
+	Evaluate(ctx Context, record Record) (arrow.Array, error)
+}
+
+type RecordVariable struct {
+	index int
+}
+
+func (r *RecordVariable) Evaluate(ctx Context, record Record) (arrow.Array, error) {
+	return record.Column(r.index), nil
+}
+
+// TODO: Add ConstArray expression for testing.
+
+type ConstArray struct {
+	Array arrow.Array
+}
+
+func (c *ConstArray) Evaluate(ctx Context, record Record) (arrow.Array, error) {
+	if c.Array.Len() != int(record.NumRows()) {
+		panic("const array length doesn't match record length")
+	}
+	return c.Array, nil
+}
+
+// type ParentScopeVariable struct {
+//
+// }
diff --git a/arrowexec/materialize/materialize.go b/arrowexec/materialize/materialize.go
@@ -0,0 +1,4 @@
+package materialize
+
+// TODO: Here we'll want some code to turn a physical plan into an execution plan.
+//       We'll need a plan transformer, but also e.g. translate octosql schemas into arrow schemas.
diff --git a/arrowexec/nodes/filter.go b/arrowexec/nodes/filter.go
@@ -0,0 +1,127 @@
+package nodes
+
+import (
+	"fmt"
+
+	"github.com/apache/arrow/go/v13/arrow"
+	"github.com/apache/arrow/go/v13/arrow/array"
+	"github.com/apache/arrow/go/v13/arrow/compute"
+	"github.com/apache/arrow/go/v13/arrow/memory"
+	"github.com/cube2222/octosql/arrowexec/execution"
+	"golang.org/x/sync/errgroup"
+)
+
+// Here there are two Filter implementations.
+// The NaiveFilter just uses the arrow library function.
+// Its advantage is that it supports all formats of data and is
+// a bit (~1.4x) faster if most of the rows are filtered out.
+// The RebatchingFilter has a custom routine for filtering records, it
+// re-batches the filtered records so that they aren't too far off the
+// ideal batch size.
+// It actually ends up being *much* (~3x) faster if only few
+// records are being filtered out.
+// The break-even point for some naive integer arrays is at ~3.5% of records
+// being filtered out.
+//
+// It's interesting, cause the original idea for the re-batching was
+// that downstream operators should be faster if batches aren't too small.
+// However, with most of the records filtered out, the workload for downstream
+// operators is so small that it doesn't really matter.
+
+// NaiveFilter uses the arrow libraries selection function.
+type NaiveFilter struct {
+	Source    execution.NodeWithMeta
+	Predicate execution.Expression
+}
+
+func (f *NaiveFilter) Run(ctx execution.Context, produce execution.ProduceFunc) error {
+	return f.Source.Node.Run(ctx, func(produceCtx execution.ProduceContext, record execution.Record) error {
+		selection, err := f.Predicate.Evaluate(produceCtx.Context, record)
+		if err != nil {
+			return fmt.Errorf("couldn't evaluate filter predicate: %w", err)
+		}
+
+		out, err := compute.FilterRecordBatch(ctx.Context, record, selection, &compute.FilterOptions{
+			NullSelection: compute.SelectionDropNulls,
+		})
+		if err != nil {
+			return fmt.Errorf("couldn't filter record batch: %w", err)
+		}
+
+		if err := produce(produceCtx, execution.Record{Record: out}); err != nil {
+			return fmt.Errorf("couldn't produce record: %w", err)
+		}
+
+		return nil
+	})
+}
+
+// RebatchingFilter has a custom routine for filtering records, it re-batches the filtered records so that they aren't too far off the ideal batch size.
+type RebatchingFilter struct {
+	Source    execution.NodeWithMeta
+	Predicate execution.Expression
+}
+
+func (f *RebatchingFilter) Run(ctx execution.Context, produce execution.ProduceFunc) error {
+	recordBuilder := array.NewRecordBuilder(memory.NewGoAllocator(), f.Source.Schema) // TODO: Get allocator as argument.
+	if err := f.Source.Node.Run(ctx, func(produceCtx execution.ProduceContext, record execution.Record) error {
+		selection, err := f.Predicate.Evaluate(produceCtx.Context, record)
+		if err != nil {
+			return fmt.Errorf("couldn't evaluate filter predicate: %w", err)
+		}
+
+		g, _ := errgroup.WithContext(ctx.Context)
+		columns := record.Columns()
+		for i, column := range columns {
+			rewriter := MakeColumnRewriter(recordBuilder.Field(i), column)
+			g.Go(func() error {
+				Rewrite(selection, rewriter)
+				return nil
+			})
+		}
+		g.Wait()
+
+		// TODO: What if there are no fields...? This is a case that's generally unhandled right now everywhere. Need to add a count to the record struct.
+		if recordBuilder.Field(0).Len() > execution.IdealBatchSize/2 {
+			outRecord := recordBuilder.NewRecord()
+			if err := produce(produceCtx, execution.Record{Record: outRecord}); err != nil {
+				return fmt.Errorf("couldn't produce record: %w", err)
+			}
+		}
+		return nil
+	}); err != nil {
+		return fmt.Errorf("couldn't run source node: %w", err)
+	}
+
+	if recordBuilder.Field(0).Len() > 0 {
+		outRecord := recordBuilder.NewRecord()
+		if err := produce(execution.ProduceContext{Context: ctx}, execution.Record{Record: outRecord}); err != nil {
+			return fmt.Errorf("couldn't produce record: %w", err)
+		}
+	}
+
+	return nil
+}
+
+func MakeColumnRewriter(builder array.Builder, arr arrow.Array) func(rowIndex int) {
+	// TODO: Should this operate on row ranges instead of single rows? Would make low-selectivity workloads faster, as well as nested types.
+	switch builder.Type().ID() {
+	case arrow.INT64:
+		typedBuilder := builder.(*array.Int64Builder)
+		typedArr := arr.(*array.Int64)
+		return func(rowIndex int) {
+			typedBuilder.Append(typedArr.Value(rowIndex))
+		}
+	default:
+		panic(fmt.Errorf("unsupported type for filtering: %v", builder.Type().ID()))
+	}
+}
+
+func Rewrite(selection arrow.Array, rewriteFunc func(rowIndex int)) {
+	typedSelection := selection.(*array.Boolean)
+	for i := 0; i < typedSelection.Len(); i++ {
+		if typedSelection.Value(i) {
+			rewriteFunc(i)
+		}
+	}
+}
diff --git a/arrowexec/nodes/filter_test.go b/arrowexec/nodes/filter_test.go
@@ -0,0 +1,156 @@
+package nodes
+
+import (
+	"context"
+	"math/rand"
+	"testing"
+
+	"github.com/apache/arrow/go/v13/arrow"
+	"github.com/apache/arrow/go/v13/arrow/array"
+	"github.com/apache/arrow/go/v13/arrow/memory"
+	"github.com/cube2222/octosql/arrowexec/execution"
+)
+
+// selectivity as a tenth of a percent (so 1000 means 100%)
+const selectivity = 35
+const rounds = 1024
+
+var predicateArr = func() arrow.Array {
+	predicateBuilder := array.NewBooleanBuilder(memory.DefaultAllocator)
+	for i := 0; i < execution.IdealBatchSize; i++ {
+		if rand.Intn(1000) < selectivity {
+			predicateBuilder.Append(true)
+		} else {
+			predicateBuilder.Append(false)
+		}
+	}
+	return predicateBuilder.NewArray()
+}()
+
+func BenchmarkNaiveFilter(b *testing.B) {
+	groupBuilder := array.NewInt64Builder(memory.DefaultAllocator)
+	for i := 0; i < execution.IdealBatchSize; i++ {
+		groupBuilder.Append(1)
+	}
+	groupArr := groupBuilder.NewArray()
+	numbersBuilder := array.NewInt64Builder(memory.DefaultAllocator)
+	for i := 0; i < execution.IdealBatchSize; i++ {
+		numbersBuilder.Append(int64(i))
+	}
+	numbersArr := numbersBuilder.NewArray()
+
+	schema := arrow.NewSchema(
+		[]arrow.Field{
+			{Name: "a", Type: arrow.PrimitiveTypes.Int64, Nullable: false},
+			{Name: "b", Type: arrow.PrimitiveTypes.Int64, Nullable: false},
+		},
+		nil,
+	)
+
+	var node execution.NodeWithMeta
+	node = execution.NodeWithMeta{
+		Node: &TestNode{
+			Records:     []execution.Record{{Record: array.NewRecord(schema, []arrow.Array{groupArr, numbersArr}, execution.IdealBatchSize)}},
+			Repetitions: rounds,
+		},
+		Schema: schema,
+	}
+	node = execution.NodeWithMeta{
+		Node: &NaiveFilter{
+			Source: node,
+			Predicate: &execution.ConstArray{
+				Array: predicateArr,
+			},
+		},
+		Schema: schema,
+	}
+	node = execution.NodeWithMeta{
+		Node: &GroupBy{
+			OutSchema:             schema,
+			Source:                node,
+			KeyExprs:              []int{0},
+			AggregateConstructors: []func(dt arrow.DataType) Aggregate{MakeCount},
+			AggregateExprs:        []int{1},
+		},
+		Schema: schema,
+	}
+
+	for i := 0; i < b.N; i++ {
+		var outRecords []execution.Record
+		var count int64
+		if err := node.Node.Run(execution.Context{Context: context.Background()}, func(produceCtx execution.ProduceContext, record execution.Record) error {
+			// log.Println(record)
+			outRecords = append(outRecords, record)
+			count += record.NumRows()
+			return nil
+		}); err != nil {
+			panic(err)
+		}
+		outRecords = outRecords
+		// log.Println("naive count:", count)
+	}
+}
+
+func BenchmarkRebatchingFilter(b *testing.B) {
+	groupBuilder := array.NewInt64Builder(memory.DefaultAllocator)
+	for i := 0; i < execution.IdealBatchSize; i++ {
+		groupBuilder.Append(1)
+	}
+	groupArr := groupBuilder.NewArray()
+	numbersBuilder := array.NewInt64Builder(memory.DefaultAllocator)
+	for i := 0; i < execution.IdealBatchSize; i++ {
+		numbersBuilder.Append(int64(i))
+	}
+	numbersArr := numbersBuilder.NewArray()
+
+	schema := arrow.NewSchema(
+		[]arrow.Field{
+			{Name: "a", Type: arrow.PrimitiveTypes.Int64, Nullable: false},
+			{Name: "b", Type: arrow.PrimitiveTypes.Int64, Nullable: false},
+		},
+		nil,
+	)
+
+	var node execution.NodeWithMeta
+	node = execution.NodeWithMeta{
+		Node: &TestNode{
+			Records:     []execution.Record{{Record: array.NewRecord(schema, []arrow.Array{groupArr, numbersArr}, execution.IdealBatchSize)}},
+			Repetitions: rounds,
+		},
+		Schema: schema,
+	}
+	node = execution.NodeWithMeta{
+		Node: &RebatchingFilter{
+			Source: node,
+			Predicate: &execution.ConstArray{
+				Array: predicateArr,
+			},
+		},
+		Schema: schema,
+	}
+	node = execution.NodeWithMeta{
+		Node: &GroupBy{
+			OutSchema:             schema,
+			Source:                node,
+			KeyExprs:              []int{0},
+			AggregateConstructors: []func(dt arrow.DataType) Aggregate{MakeCount},
+			AggregateExprs:        []int{1},
+		},
+		Schema: schema,
+	}
+
+	for i := 0; i < b.N; i++ {
+		var outRecords []execution.Record
+		var count int64
+		if err := node.Node.Run(execution.Context{Context: context.Background()}, func(produceCtx execution.ProduceContext, record execution.Record) error {
+			// log.Println(record)
+			outRecords = append(outRecords, record)
+			count += record.NumRows()
+			return nil
+		}); err != nil {
+			panic(err)
+		}
+		outRecords = outRecords
+		// log.Println("rebatching count:", count)
+	}
+}
diff --git a/arrowexec/nodes/group_by.go b/arrowexec/nodes/group_by.go
@@ -85,11 +85,11 @@ func (g *GroupBy) Run(ctx execution.Context, produce execution.ProduceFunc) erro
 		return err
 	}
 
-	for batchIndex := 0; batchIndex < (entryCount/execution.BatchSize)+1; batchIndex++ {
-		offset := batchIndex * execution.BatchSize
+	for batchIndex := 0; batchIndex < (entryCount/execution.IdealBatchSize)+1; batchIndex++ {
+		offset := batchIndex * execution.IdealBatchSize
 		length := entryCount - offset
-		if length > execution.BatchSize {
-			length = execution.BatchSize
+		if length > execution.IdealBatchSize {
+			length = execution.IdealBatchSize
 		}
 
 		columns := make([]arrow.Array, len(g.OutSchema.Fields()))
@@ -102,7 +102,7 @@ func (g *GroupBy) Run(ctx execution.Context, produce execution.ProduceFunc) erro
 
 		record := array.NewRecord(g.OutSchema, columns, int64(length))
 
-		if err := produce(ctx, execution.Record{Record: record}); err != nil {
+		if err := produce(execution.ProduceContext{Context: ctx}, execution.Record{Record: record}); err != nil {
 			return err
 		}
 	}