Skip to content

Commit

Permalink
opt: Prune Delete operator input columns
Browse files Browse the repository at this point in the history
Prune input columns that are not needed by a Delete operator. Needed
columns include returned columns and index key columns. All other columns
can be pruned.

Pruning Delete columns causes new interactions with the Delete "fast path"
that uses range delete. When multiple column families are in use, the
range delete needs to cover all column families. The "forDelete" flag
is used to construct spans that cover all columns, rather than just
"needed" columns. A new ConstructDeleteRange exec factory function sets
this flag correctly.

After this change, optimizer deletes no longer need to stay behind a
feature flag, as all fast paths should now work at least as well as they
do with the heuristic planner.

Release note: None
  • Loading branch information
andy-kimball committed Jan 28, 2019
1 parent 3e9a72a commit 5add467
Show file tree
Hide file tree
Showing 20 changed files with 493 additions and 47 deletions.
8 changes: 8 additions & 0 deletions pkg/sql/delete.go
Expand Up @@ -463,6 +463,9 @@ func canDeleteFastInterleaved(table *ImmutableTableDescriptor, fkTables row.FkTa
// canDeleteFast determines if the deletion of `rows` can be done
// without actually scanning them.
// This should be called after plan simplification for optimal results.
//
// This logic should be kept in sync with exec.Builder.canUseDeleteRange.
// TODO(andyk): Remove when the heuristic planner code is removed.
func canDeleteFast(ctx context.Context, source planNode, r *deleteRun) (*scanNode, bool) {
// Check that there are no secondary indexes, interleaving, FK
// references checks, etc., ie. there is no extra work to be done
Expand Down Expand Up @@ -505,6 +508,11 @@ func canDeleteFast(ctx context.Context, source planNode, r *deleteRun) (*scanNod
return nil, false
}

// Delete range does not support limits.
if scan.hardLimit != 0 {
return nil, false
}

return scan, true
}

Expand Down
39 changes: 37 additions & 2 deletions pkg/sql/logictest/testdata/logic_test/delete
Expand Up @@ -124,16 +124,25 @@ k v
5 6
7 8

statement count 4
DELETE FROM unindexed

# Delete of range with limit.
statement count 4
INSERT INTO unindexed VALUES (1, 2), (3, 4), (5, 6), (7, 8)

statement count 1
DELETE FROM unindexed WHERE k >= 4 ORDER BY k LIMIT 1

query II colnames,rowsort
SELECT k, v FROM unindexed
----
k v
1 2
3 4
5 6
7 8

statement count 4
statement count 3
DELETE FROM unindexed

query II colnames
Expand Down Expand Up @@ -241,3 +250,29 @@ query II
DELETE FROM t33361 RETURNING *; COMMIT
----
1 3

# Test that delete works with column families (no indexes, so fast path).
statement ok
CREATE TABLE family (
x INT PRIMARY KEY,
y INT,
FAMILY (x),
FAMILY (y)
);
INSERT INTO family VALUES (1, 1), (2, 2), (3, 3)

statement ok
BEGIN; ALTER TABLE family ADD COLUMN z INT CREATE FAMILY

# Check that the new column is not usable in RETURNING
statement ok
DELETE FROM family WHERE x=2

statement ok
COMMIT

query III rowsort
SELECT x, y, z FROM family
----
1 1 NULL
3 3 NULL
6 changes: 0 additions & 6 deletions pkg/sql/logictest/testdata/logic_test/optimizer
Expand Up @@ -123,14 +123,8 @@ SET experimental_optimizer_mutations = false
statement error pq: no data source matches prefix: t
UPDATE t SET v=(SELECT v+1 FROM t AS t2 WHERE t2.k=t.k)

statement error pq: no data source matches prefix: t
DELETE FROM t WHERE EXISTS(SELECT * FROM t AS t2 WHERE t2.k=t.k)

statement ok
SET experimental_optimizer_mutations = true

statement ok
UPDATE t SET v=(SELECT v+1 FROM t AS t2 WHERE t2.k=t.k)

statement ok
DELETE FROM t WHERE EXISTS(SELECT * FROM t AS t2 WHERE t2.k=t.k)
18 changes: 12 additions & 6 deletions pkg/sql/opt/bench/stub_factory.go
Expand Up @@ -214,12 +214,6 @@ func (f *stubFactory) ConstructUpdate(
return struct{}{}, nil
}

func (f *stubFactory) ConstructCreateTable(
input exec.Node, schema cat.Schema, ct *tree.CreateTable,
) (exec.Node, error) {
return struct{}{}, nil
}

func (f *stubFactory) ConstructUpsert(
input exec.Node,
table cat.Table,
Expand All @@ -239,6 +233,18 @@ func (f *stubFactory) ConstructDelete(
return struct{}{}, nil
}

func (f *stubFactory) ConstructDeleteRange(
table cat.Table, needed exec.ColumnOrdinalSet, indexConstraint *constraint.Constraint,
) (exec.Node, error) {
return struct{}{}, nil
}

func (f *stubFactory) ConstructCreateTable(
input exec.Node, schema cat.Schema, ct *tree.CreateTable,
) (exec.Node, error) {
return struct{}{}, nil
}

func (f *stubFactory) ConstructSequenceSelect(seq cat.Sequence) (exec.Node, error) {
return struct{}{}, nil
}
8 changes: 8 additions & 0 deletions pkg/sql/opt/cat/table.go
Expand Up @@ -34,6 +34,14 @@ type Table interface {
// information_schema tables.
IsVirtualTable() bool

// IsInterleaved returns true if any of this table's indexes are interleaved
// with index(es) from other table(s).
IsInterleaved() bool

// IsReferenced returns true if this table is referenced by at least one
// foreign key defined on another table (or this one if self-referential).
IsReferenced() bool

// ColumnCount returns the number of public columns in the table. Public
// columns are not currently being added or dropped from the table. This
// method should be used when mutation columns can be ignored (the common
Expand Down
69 changes: 65 additions & 4 deletions pkg/sql/opt/exec/execbuilder/relational_builder.go
Expand Up @@ -1298,6 +1298,11 @@ func (b *Builder) buildUpsert(ups *memo.UpsertExpr) (execPlan, error) {
}

func (b *Builder) buildDelete(del *memo.DeleteExpr) (execPlan, error) {
// Check for the fast-path delete case that can use a range delete.
if b.canUseDeleteRange(del) {
return b.buildDeleteRange(del)
}

// Build the input query and ensure that the fetch columns are projected.
input, err := b.buildRelational(del.Input)
if err != nil {
Expand All @@ -1308,7 +1313,9 @@ func (b *Builder) buildDelete(del *memo.DeleteExpr) (execPlan, error) {
//
// TODO(andyk): Using ensureColumns here can result in an extra Render.
// Upgrade execution engine to not require this.
input, err = b.ensureColumns(input, del.FetchCols, nil, del.ProvidedPhysical().Ordering)
colList := make(opt.ColList, 0, len(del.FetchCols))
colList = appendColsWhenPresent(colList, del.FetchCols)
input, err = b.ensureColumns(input, colList, nil, del.ProvidedPhysical().Ordering)
if err != nil {
return execPlan{}, err
}
Expand All @@ -1330,6 +1337,58 @@ func (b *Builder) buildDelete(del *memo.DeleteExpr) (execPlan, error) {
return ep, nil
}

// canUseDeleteRange checks whether a logical Delete operator can be implemented
// by a fast delete range execution operator. This logic should be kept in sync
// with canDeleteFast.
func (b *Builder) canUseDeleteRange(del *memo.DeleteExpr) bool {
// If rows need to be returned from the Delete operator (i.e. RETURNING
// clause), no fast path is possible, because row values must be fetched.
if del.NeedResults {
return false
}

tab := b.mem.Metadata().Table(del.Table)
if tab.DeletableIndexCount() > 1 {
// Any secondary index prevents fast path, because separate delete batches
// must be formulated to delete rows from them.
return false
}
if tab.IsInterleaved() {
// There is a separate fast path for interleaved tables in sql/delete.go.
return false
}
if tab.IsReferenced() {
// If the table is referenced by other tables' foreign keys, no fast path
// is possible, because the integrity of those references must be checked.
return false
}

// Check for simple Scan input operator without a limit; anything else is not
// supported by a range delete.
if scan, ok := del.Input.(*memo.ScanExpr); !ok || scan.HardLimit != 0 {
return false
}

return true
}

// buildDeleteRange constructs a DeleteRange operator that deletes contiguous
// rows in the primary index. canUseDeleteRange should have already been called.
func (b *Builder) buildDeleteRange(del *memo.DeleteExpr) (execPlan, error) {
// canUseDeleteRange has already validated that input is a Scan operator.
scan := del.Input.(*memo.ScanExpr)
tab := b.mem.Metadata().Table(scan.Table)
needed, output := b.getColumns(scan.Cols, scan.Table)
res := execPlan{outputCols: output}

root, err := b.factory.ConstructDeleteRange(tab, needed, scan.Constraint)
if err != nil {
return execPlan{}, err
}
res.root = root
return res, nil
}

func (b *Builder) buildCreateTable(ct *memo.CreateTableExpr) (execPlan, error) {
var root exec.Node
if ct.Syntax.As() {
Expand Down Expand Up @@ -1385,9 +1444,11 @@ func (b *Builder) needProjection(
return nil, false
}
}
cols := make([]exec.ColumnOrdinal, len(colList))
for i, col := range colList {
cols[i] = input.getColumnOrdinal(col)
cols := make([]exec.ColumnOrdinal, 0, len(colList))
for _, col := range colList {
if col != 0 {
cols = append(cols, input.getColumnOrdinal(col))
}
}
return cols, true
}
Expand Down
35 changes: 23 additions & 12 deletions pkg/sql/opt/exec/execbuilder/testdata/delete
Expand Up @@ -95,20 +95,31 @@ count · ·
· spans ALL
· filter v = 5

# TODO(andyk): Prune columns so that index-join is not necessary.
# Check DELETE with LIMIT clause that gets pushed into scan.
# The fast deleter should not be used, since it can't handle LIMIT.
query TTT
EXPLAIN DELETE FROM unindexed WHERE k > 5 LIMIT 10
----
count · ·
└── delete · ·
│ from unindexed
│ strategy deleter
└── scan · ·
· table unindexed@primary
· spans /6-
· limit 10

query TTT
EXPLAIN DELETE FROM indexed WHERE value = 5 LIMIT 10
----
count · ·
└── delete · ·
│ from indexed
│ strategy deleter
└── index-join · ·
│ table indexed@primary
└── scan · ·
· table indexed@indexed_value_idx
· spans /5-/6
· limit 10
count · ·
└── delete · ·
│ from indexed
│ strategy deleter
└── scan · ·
· table indexed@indexed_value_idx
· spans /5-/6
· limit 10

query TTT
EXPLAIN DELETE FROM indexed LIMIT 10
Expand All @@ -118,7 +129,7 @@ count · ·
│ from indexed
│ strategy deleter
└── scan · ·
· table indexed@primary
· table indexed@indexed_value_idx
· spans ALL
· limit 10

Expand Down
11 changes: 11 additions & 0 deletions pkg/sql/opt/exec/factory.go
Expand Up @@ -319,6 +319,17 @@ type Factory interface {
input Node, table cat.Table, fetchCols ColumnOrdinalSet, rowsNeeded bool,
) (Node, error)

// ConstructDeleteRange creates a node that efficiently deletes contiguous
// rows stored in the given table's primary index. This fast path is only
// possible when certain conditions hold true (see canUseDeleteRange for more
// details). See the comment for ConstructScan for descriptions of the
// parameters, since FastDelete combines Delete + Scan into a single operator.
ConstructDeleteRange(
table cat.Table,
needed ColumnOrdinalSet,
indexConstraint *constraint.Constraint,
) (Node, error)

// ConstructCreateTable returns a node that implements a CREATE TABLE
// statement.
ConstructCreateTable(input Node, schema cat.Schema, ct *tree.CreateTable) (Node, error)
Expand Down
4 changes: 3 additions & 1 deletion pkg/sql/opt/memo/expr_format.go
Expand Up @@ -655,7 +655,9 @@ func (f *ExprFmtCtx) formatColList(
f.Buffer.Reset()
f.Buffer.WriteString(heading)
for _, col := range colList {
formatCol(f, "" /* label */, col, notNullCols, false /* omitType */)
if col != 0 {
formatCol(f, "" /* label */, col, notNullCols, false /* omitType */)
}
}
tp.Child(f.Buffer.String())
}
Expand Down
58 changes: 58 additions & 0 deletions pkg/sql/opt/norm/prune_cols.go
Expand Up @@ -40,6 +40,30 @@ func (c *CustomFuncs) NeededColsExplain(private *memo.ExplainPrivate) opt.ColSet
return private.Props.ColSet()
}

// NeededColsMutation returns the columns needed by a mutation operator.
func (c *CustomFuncs) NeededColsMutation(private *memo.MutationPrivate) opt.ColSet {
var cols opt.ColSet
tabMeta := c.mem.Metadata().TableMeta(private.Table)

// If the operator returns results, then include all non-mutation columns in
// the table.
// TODO(andyk): The returned columns need to be pruned as well.
if private.NeedResults {
cols = tabMeta.Columns()
}

// Add in all strict key columns from all indexes, including mutation indexes,
// since it is necessary to delete rows even from indexes that are being
// added/dropped.
for i, n := 0, tabMeta.Table.DeletableIndexCount(); i < n; i++ {
cols.UnionWith(tabMeta.IndexKeyColumns(i))
}

// Map to mutation input columns.
cols = private.MapToInputCols(cols)
return cols
}

// CanPruneCols returns true if the target expression has extra columns that are
// not needed at this level of the tree, and can be eliminated by one of the
// PruneCols rules. CanPruneCols uses the PruneCols property to determine the
Expand Down Expand Up @@ -109,6 +133,40 @@ func (c *CustomFuncs) PruneAggCols(
return aggs
}

// PruneMutationCols rewrites the given mutation private to no longer reference
// InsertCols, FetchCols, UpdateCols, or CheckCols that are not part of the
// neededCols set. The caller must have already done the analysis to prove that
// these columns are not needed.
// TODO(andyk): Add support for pruning column lists other than FetchCols.
func (c *CustomFuncs) PruneMutationCols(
private *memo.MutationPrivate, neededCols opt.ColSet,
) *memo.MutationPrivate {
newPrivate := *private
newPrivate.FetchCols = filterMutationList(newPrivate.FetchCols, neededCols)
return &newPrivate
}

// filterMutationList filters the given mutation list by setting any columns
// that are not in the neededCols set to zero. This indicates that those columns
// are not present in the input.
func filterMutationList(inList opt.ColList, neededCols opt.ColSet) opt.ColList {
var newList opt.ColList
for i, c := range inList {
if !neededCols.Contains(int(c)) {
// Copy-on-write the list for efficiency.
if newList == nil {
newList = make(opt.ColList, len(inList))
copy(newList, inList)
}
newList[i] = 0
}
}
if newList != nil {
return newList
}
return inList
}

// pruneScanCols constructs a new Scan operator based on the given existing Scan
// operator, but projecting only the needed columns.
func (c *CustomFuncs) pruneScanCols(scan *memo.ScanExpr, neededCols opt.ColSet) memo.RelExpr {
Expand Down

0 comments on commit 5add467

Please sign in to comment.