opt: Prune Delete operator input columns

Prune input columns that are not needed by a Delete operator. Needed columns include returned columns and index key columns. All other columns can be pruned. Pruning Delete columns causes new interactions with the Delete "fast path" that uses range delete. When multiple column families are in use, the range delete needs to cover all column families. The "forDelete" flag is used to construct spans that cover all columns, rather than just "needed" columns. A new ConstructDeleteRange exec factory function sets this flag correctly. After this change, optimizer deletes no longer need to stay behind a feature flag, as all fast paths should now work at least as well as they do with the heuristic planner. Release note: None
cockroachdb · Jan 28, 2019 · 5add467 · 5add467
1 parent 3e9a72a
commit 5add467
Show file tree

Hide file tree

Showing 20 changed files with 493 additions and 47 deletions.
diff --git a/pkg/sql/delete.go b/pkg/sql/delete.go
@@ -463,6 +463,9 @@ func canDeleteFastInterleaved(table *ImmutableTableDescriptor, fkTables row.FkTa
 // canDeleteFast determines if the deletion of `rows` can be done
 // without actually scanning them.
 // This should be called after plan simplification for optimal results.
+//
+// This logic should be kept in sync with exec.Builder.canUseDeleteRange.
+// TODO(andyk): Remove when the heuristic planner code is removed.
 func canDeleteFast(ctx context.Context, source planNode, r *deleteRun) (*scanNode, bool) {
 	// Check that there are no secondary indexes, interleaving, FK
 	// references checks, etc., ie. there is no extra work to be done
@@ -505,6 +508,11 @@ func canDeleteFast(ctx context.Context, source planNode, r *deleteRun) (*scanNod
 		return nil, false
 	}
 
+	// Delete range does not support limits.
+	if scan.hardLimit != 0 {
+		return nil, false
+	}
+
 	return scan, true
 }
 

diff --git a/pkg/sql/logictest/testdata/logic_test/delete b/pkg/sql/logictest/testdata/logic_test/delete
@@ -124,16 +124,25 @@ k v
 5 6
 7 8
 
+statement count 4
+DELETE FROM unindexed
+
+# Delete of range with limit.
+statement count 4
+INSERT INTO unindexed VALUES (1, 2), (3, 4), (5, 6), (7, 8)
+
+statement count 1
+DELETE FROM unindexed WHERE k >= 4 ORDER BY k LIMIT 1
+
 query II colnames,rowsort
 SELECT k, v FROM unindexed
 ----
 k v
 1 2
 3 4
-5 6
 7 8
 
-statement count 4
+statement count 3
 DELETE FROM unindexed
 
 query II colnames
@@ -241,3 +250,29 @@ query II
 DELETE FROM t33361 RETURNING *; COMMIT
 ----
 1 3
+
+# Test that delete works with column families (no indexes, so fast path).
+statement ok
+CREATE TABLE family (
+	x INT PRIMARY KEY,
+	y INT,
+	FAMILY (x),
+	FAMILY (y)
+);
+INSERT INTO family VALUES (1, 1), (2, 2), (3, 3)
+
+statement ok
+BEGIN; ALTER TABLE family ADD COLUMN z INT CREATE FAMILY
+
+# Check that the new column is not usable in RETURNING
+statement ok
+DELETE FROM family WHERE x=2
+
+statement ok
+COMMIT
+
+query III rowsort
+SELECT x, y, z FROM family
+----
+1  1  NULL
+3  3  NULL
diff --git a/pkg/sql/logictest/testdata/logic_test/optimizer b/pkg/sql/logictest/testdata/logic_test/optimizer
@@ -123,14 +123,8 @@ SET experimental_optimizer_mutations = false
 statement error pq: no data source matches prefix: t
 UPDATE t SET v=(SELECT v+1 FROM t AS t2 WHERE t2.k=t.k)
 
-statement error pq: no data source matches prefix: t
-DELETE FROM t WHERE EXISTS(SELECT * FROM t AS t2 WHERE t2.k=t.k)
-
 statement ok
 SET experimental_optimizer_mutations = true
 
 statement ok
 UPDATE t SET v=(SELECT v+1 FROM t AS t2 WHERE t2.k=t.k)
-
-statement ok
-DELETE FROM t WHERE EXISTS(SELECT * FROM t AS t2 WHERE t2.k=t.k)
diff --git a/pkg/sql/opt/bench/stub_factory.go b/pkg/sql/opt/bench/stub_factory.go
@@ -214,12 +214,6 @@ func (f *stubFactory) ConstructUpdate(
 	return struct{}{}, nil
 }
 
-func (f *stubFactory) ConstructCreateTable(
-	input exec.Node, schema cat.Schema, ct *tree.CreateTable,
-) (exec.Node, error) {
-	return struct{}{}, nil
-}
-
 func (f *stubFactory) ConstructUpsert(
 	input exec.Node,
 	table cat.Table,
@@ -239,6 +233,18 @@ func (f *stubFactory) ConstructDelete(
 	return struct{}{}, nil
 }
 
+func (f *stubFactory) ConstructDeleteRange(
+	table cat.Table, needed exec.ColumnOrdinalSet, indexConstraint *constraint.Constraint,
+) (exec.Node, error) {
+	return struct{}{}, nil
+}
+
+func (f *stubFactory) ConstructCreateTable(
+	input exec.Node, schema cat.Schema, ct *tree.CreateTable,
+) (exec.Node, error) {
+	return struct{}{}, nil
+}
+
 func (f *stubFactory) ConstructSequenceSelect(seq cat.Sequence) (exec.Node, error) {
 	return struct{}{}, nil
 }
diff --git a/pkg/sql/opt/cat/table.go b/pkg/sql/opt/cat/table.go
@@ -34,6 +34,14 @@ type Table interface {
 	// information_schema tables.
 	IsVirtualTable() bool
 
+	// IsInterleaved returns true if any of this table's indexes are interleaved
+	// with index(es) from other table(s).
+	IsInterleaved() bool
+
+	// IsReferenced returns true if this table is referenced by at least one
+	// foreign key defined on another table (or this one if self-referential).
+	IsReferenced() bool
+
 	// ColumnCount returns the number of public columns in the table. Public
 	// columns are not currently being added or dropped from the table. This
 	// method should be used when mutation columns can be ignored (the common

diff --git a/pkg/sql/opt/exec/execbuilder/relational_builder.go b/pkg/sql/opt/exec/execbuilder/relational_builder.go
@@ -1298,6 +1298,11 @@ func (b *Builder) buildUpsert(ups *memo.UpsertExpr) (execPlan, error) {
 }
 
 func (b *Builder) buildDelete(del *memo.DeleteExpr) (execPlan, error) {
+	// Check for the fast-path delete case that can use a range delete.
+	if b.canUseDeleteRange(del) {
+		return b.buildDeleteRange(del)
+	}
+
 	// Build the input query and ensure that the fetch columns are projected.
 	input, err := b.buildRelational(del.Input)
 	if err != nil {
@@ -1308,7 +1313,9 @@ func (b *Builder) buildDelete(del *memo.DeleteExpr) (execPlan, error) {
 	//
 	// TODO(andyk): Using ensureColumns here can result in an extra Render.
 	// Upgrade execution engine to not require this.
-	input, err = b.ensureColumns(input, del.FetchCols, nil, del.ProvidedPhysical().Ordering)
+	colList := make(opt.ColList, 0, len(del.FetchCols))
+	colList = appendColsWhenPresent(colList, del.FetchCols)
+	input, err = b.ensureColumns(input, colList, nil, del.ProvidedPhysical().Ordering)
 	if err != nil {
 		return execPlan{}, err
 	}
@@ -1330,6 +1337,58 @@ func (b *Builder) buildDelete(del *memo.DeleteExpr) (execPlan, error) {
 	return ep, nil
 }
 
+// canUseDeleteRange checks whether a logical Delete operator can be implemented
+// by a fast delete range execution operator. This logic should be kept in sync
+// with canDeleteFast.
+func (b *Builder) canUseDeleteRange(del *memo.DeleteExpr) bool {
+	// If rows need to be returned from the Delete operator (i.e. RETURNING
+	// clause), no fast path is possible, because row values must be fetched.
+	if del.NeedResults {
+		return false
+	}
+
+	tab := b.mem.Metadata().Table(del.Table)
+	if tab.DeletableIndexCount() > 1 {
+		// Any secondary index prevents fast path, because separate delete batches
+		// must be formulated to delete rows from them.
+		return false
+	}
+	if tab.IsInterleaved() {
+		// There is a separate fast path for interleaved tables in sql/delete.go.
+		return false
+	}
+	if tab.IsReferenced() {
+		// If the table is referenced by other tables' foreign keys, no fast path
+		// is possible, because the integrity of those references must be checked.
+		return false
+	}
+
+	// Check for simple Scan input operator without a limit; anything else is not
+	// supported by a range delete.
+	if scan, ok := del.Input.(*memo.ScanExpr); !ok || scan.HardLimit != 0 {
+		return false
+	}
+
+	return true
+}
+
+// buildDeleteRange constructs a DeleteRange operator that deletes contiguous
+// rows in the primary index. canUseDeleteRange should have already been called.
+func (b *Builder) buildDeleteRange(del *memo.DeleteExpr) (execPlan, error) {
+	// canUseDeleteRange has already validated that input is a Scan operator.
+	scan := del.Input.(*memo.ScanExpr)
+	tab := b.mem.Metadata().Table(scan.Table)
+	needed, output := b.getColumns(scan.Cols, scan.Table)
+	res := execPlan{outputCols: output}
+
+	root, err := b.factory.ConstructDeleteRange(tab, needed, scan.Constraint)
+	if err != nil {
+		return execPlan{}, err
+	}
+	res.root = root
+	return res, nil
+}
+
 func (b *Builder) buildCreateTable(ct *memo.CreateTableExpr) (execPlan, error) {
 	var root exec.Node
 	if ct.Syntax.As() {
@@ -1385,9 +1444,11 @@ func (b *Builder) needProjection(
 			return nil, false
 		}
 	}
-	cols := make([]exec.ColumnOrdinal, len(colList))
-	for i, col := range colList {
-		cols[i] = input.getColumnOrdinal(col)
+	cols := make([]exec.ColumnOrdinal, 0, len(colList))
+	for _, col := range colList {
+		if col != 0 {
+			cols = append(cols, input.getColumnOrdinal(col))
+		}
 	}
 	return cols, true
 }

diff --git a/pkg/sql/opt/exec/execbuilder/testdata/delete b/pkg/sql/opt/exec/execbuilder/testdata/delete
@@ -95,20 +95,31 @@ count                ·         ·
 ·                    spans     ALL
 ·                    filter    v = 5
 
-# TODO(andyk): Prune columns so that index-join is not necessary.
+# Check DELETE with LIMIT clause that gets pushed into scan.
+# The fast deleter should not be used, since it can't handle LIMIT.
+query TTT
+EXPLAIN DELETE FROM unindexed WHERE k > 5 LIMIT 10
+----
+count           ·         ·
+ └── delete     ·         ·
+      │         from      unindexed
+      │         strategy  deleter
+      └── scan  ·         ·
+·               table     unindexed@primary
+·               spans     /6-
+·               limit     10
+
 query TTT
 EXPLAIN DELETE FROM indexed WHERE value = 5 LIMIT 10
 ----
-count                 ·         ·
- └── delete           ·         ·
-      │               from      indexed
-      │               strategy  deleter
-      └── index-join  ·         ·
-           │          table     indexed@primary
-           └── scan   ·         ·
-·                     table     indexed@indexed_value_idx
-·                     spans     /5-/6
-·                     limit     10
+count           ·         ·
+ └── delete     ·         ·
+      │         from      indexed
+      │         strategy  deleter
+      └── scan  ·         ·
+·               table     indexed@indexed_value_idx
+·               spans     /5-/6
+·               limit     10
 
 query TTT
 EXPLAIN DELETE FROM indexed LIMIT 10
@@ -118,7 +129,7 @@ count           ·         ·
       │         from      indexed
       │         strategy  deleter
       └── scan  ·         ·
-·               table     indexed@primary
+·               table     indexed@indexed_value_idx
 ·               spans     ALL
 ·               limit     10
 

diff --git a/pkg/sql/opt/exec/factory.go b/pkg/sql/opt/exec/factory.go
@@ -319,6 +319,17 @@ type Factory interface {
 		input Node, table cat.Table, fetchCols ColumnOrdinalSet, rowsNeeded bool,
 	) (Node, error)
 
+	// ConstructDeleteRange creates a node that efficiently deletes contiguous
+	// rows stored in the given table's primary index. This fast path is only
+	// possible when certain conditions hold true (see canUseDeleteRange for more
+	// details). See the comment for ConstructScan for descriptions of the
+	// parameters, since FastDelete combines Delete + Scan into a single operator.
+	ConstructDeleteRange(
+		table cat.Table,
+		needed ColumnOrdinalSet,
+		indexConstraint *constraint.Constraint,
+	) (Node, error)
+
 	// ConstructCreateTable returns a node that implements a CREATE TABLE
 	// statement.
 	ConstructCreateTable(input Node, schema cat.Schema, ct *tree.CreateTable) (Node, error)

diff --git a/pkg/sql/opt/memo/expr_format.go b/pkg/sql/opt/memo/expr_format.go
@@ -655,7 +655,9 @@ func (f *ExprFmtCtx) formatColList(
 		f.Buffer.Reset()
 		f.Buffer.WriteString(heading)
 		for _, col := range colList {
-			formatCol(f, "" /* label */, col, notNullCols, false /* omitType */)
+			if col != 0 {
+				formatCol(f, "" /* label */, col, notNullCols, false /* omitType */)
+			}
 		}
 		tp.Child(f.Buffer.String())
 	}

diff --git a/pkg/sql/opt/norm/prune_cols.go b/pkg/sql/opt/norm/prune_cols.go
@@ -40,6 +40,30 @@ func (c *CustomFuncs) NeededColsExplain(private *memo.ExplainPrivate) opt.ColSet
 	return private.Props.ColSet()
 }
 
+// NeededColsMutation returns the columns needed by a mutation operator.
+func (c *CustomFuncs) NeededColsMutation(private *memo.MutationPrivate) opt.ColSet {
+	var cols opt.ColSet
+	tabMeta := c.mem.Metadata().TableMeta(private.Table)
+
+	// If the operator returns results, then include all non-mutation columns in
+	// the table.
+	// TODO(andyk): The returned columns need to be pruned as well.
+	if private.NeedResults {
+		cols = tabMeta.Columns()
+	}
+
+	// Add in all strict key columns from all indexes, including mutation indexes,
+	// since it is necessary to delete rows even from indexes that are being
+	// added/dropped.
+	for i, n := 0, tabMeta.Table.DeletableIndexCount(); i < n; i++ {
+		cols.UnionWith(tabMeta.IndexKeyColumns(i))
+	}
+
+	// Map to mutation input columns.
+	cols = private.MapToInputCols(cols)
+	return cols
+}
+
 // CanPruneCols returns true if the target expression has extra columns that are
 // not needed at this level of the tree, and can be eliminated by one of the
 // PruneCols rules. CanPruneCols uses the PruneCols property to determine the
@@ -109,6 +133,40 @@ func (c *CustomFuncs) PruneAggCols(
 	return aggs
 }
 
+// PruneMutationCols rewrites the given mutation private to no longer reference
+// InsertCols, FetchCols, UpdateCols, or CheckCols that are not part of the
+// neededCols set. The caller must have already done the analysis to prove that
+// these columns are not needed.
+// TODO(andyk): Add support for pruning column lists other than FetchCols.
+func (c *CustomFuncs) PruneMutationCols(
+	private *memo.MutationPrivate, neededCols opt.ColSet,
+) *memo.MutationPrivate {
+	newPrivate := *private
+	newPrivate.FetchCols = filterMutationList(newPrivate.FetchCols, neededCols)
+	return &newPrivate
+}
+
+// filterMutationList filters the given mutation list by setting any columns
+// that are not in the neededCols set to zero. This indicates that those columns
+// are not present in the input.
+func filterMutationList(inList opt.ColList, neededCols opt.ColSet) opt.ColList {
+	var newList opt.ColList
+	for i, c := range inList {
+		if !neededCols.Contains(int(c)) {
+			// Copy-on-write the list for efficiency.
+			if newList == nil {
+				newList = make(opt.ColList, len(inList))
+				copy(newList, inList)
+			}
+			newList[i] = 0
+		}
+	}
+	if newList != nil {
+		return newList
+	}
+	return inList
+}
+
 // pruneScanCols constructs a new Scan operator based on the given existing Scan
 // operator, but projecting only the needed columns.
 func (c *CustomFuncs) pruneScanCols(scan *memo.ScanExpr, neededCols opt.ColSet) memo.RelExpr {