Skip to content

Commit

Permalink
opt: push limit into FK and self-joins in more cases
Browse files Browse the repository at this point in the history
Previously, a constant equality condition pushed into both sides of a
foreign key join or self-join would prevent a limit from being pushed
into the left side of the join. This was because the multiplicity
builder could not determine that right filter would not remove any
values also removed by the left filter. Without the join being labelled
as left-preserving, the limit could not be pushed down.

The multiplicity builder has been updated to recognize a few additional
cases where left rows are preserved in foreign key joins and self-joins,
allowing a limit to be pushed into the left side of the join. Currently,
the multiplicity builder only recognizes cases where corresponding left
and right columns are held equal to the same constant value. It is
possible to extend this to more complex inequalities and boolean
expressions, but this is left as a TODO for now.

Fixes #74419

Release note (performance improvement): A LIMIT can now be pushed below
a foreign key join or self-join in more cases, which may result in more
efficient query plans.
  • Loading branch information
mgartner committed Jan 31, 2022
1 parent b9e28cf commit 40390d6
Show file tree
Hide file tree
Showing 4 changed files with 386 additions and 18 deletions.
120 changes: 105 additions & 15 deletions pkg/sql/opt/memo/multiplicity_builder.go
Expand Up @@ -234,13 +234,18 @@ func filtersMatchLeftRowsAtMostOnce(left, right RelExpr, filters FiltersExpr) bo
// must come from the same foreign key.
//
// In both the self-join and the foreign key cases, the left columns must be
// not-null, and the right columns must be unfiltered.
// not-null, and the right columns must be either unfiltered, or the left and
// right must be Select expressions where the left side filters imply the right
// side filters and right columns are unfiltered in the right Select's input
// (see condition #3b in the comment for verifyFiltersAreValidEqualities).
//
// Why do the left columns have to be not-null and the right columns
// unfiltered? In both the self-join and the foreign-key cases, a non-null
// value in the left column guarantees a corresponding value in the right
// column. As long as no nulls have been added to the left column and no values
// have been removed from the right, this property will be valid.
// Why do the left columns have to be non-null, and the right columns unfiltered
// or filtered identically as their corresponding left column? In both the
// self-join and the foreign-key cases, a non-null value in the left column
// guarantees a corresponding value in the right column. As long as no nulls
// have been added to the left column and no values have been removed from the
// right that have not also been removed from the left, this property will be
// valid.
//
// Why do all foreign key columns in the foreign key case have to come from the
// same foreign key? Equalities on different foreign keys may each be
Expand Down Expand Up @@ -285,12 +290,16 @@ func filtersMatchAllLeftRows(left, right RelExpr, filters FiltersExpr) bool {
}

// verifyFiltersAreValidEqualities returns the set of equality columns in the
// right relation and true when all of the following conditions are satisfied:
// right relation and true when all the following conditions are satisfied:
//
// 1. All filters are equalities.
// 2. All equalities directly compare two columns.
// 3. All equalities contain one column from the left not-null columns, and
// one column from the right unfiltered columns.
// 3. All equalities x=y (or y=x) have x as a left non-null column and y as a
// right column, and either:
// a. y is an unfiltered column in the right expression, or
// b. both the left and right expressions are Selects; the left side
// filters imply the right side filters when replacing x with y; and y
// is an unfiltered column in the right Select's input.
// 4. All equality columns come from a base table.
// 5. All left columns come from a single table, and all right columns come
// from a single table.
Expand All @@ -304,10 +313,6 @@ func verifyFiltersAreValidEqualities(
var leftTab, rightTab opt.TableID
leftNotNullCols := left.Relational().NotNullCols
rightUnfilteredCols := deriveUnfilteredCols(right)
if rightUnfilteredCols.Empty() {
// There are no unfiltered columns from the right input.
return opt.ColSet{}, false
}

for i := range filters {
eq, _ := filters[i].Condition.(*EqExpr)
Expand All @@ -329,9 +334,21 @@ func verifyFiltersAreValidEqualities(
// Normalize leftColID to come from leftColIDs.
if !leftNotNullCols.Contains(leftColID) {
leftColID, rightColID = rightColID, leftColID
if !leftNotNullCols.Contains(leftColID) {
// Condition #3: Left column is not guaranteed to be non-null.
return opt.ColSet{}, false
}
}
if !leftNotNullCols.Contains(leftColID) || !rightUnfilteredCols.Contains(rightColID) {
// Condition #3: Columns don't come from both the left and right ColSets.

switch {
case rightUnfilteredCols.Contains(rightColID):
// Condition #3a: the right column is unfiltered.
case rightHasSingleFilterThatMatchesLeft(left, right, leftColID, rightColID):
// Condition #3b: The left and right are Selects where the left filters
// imply the right filters when replacing the left column with the right
// column, and the right column is unfiltered in the right Select's
// input.
default:
return opt.ColSet{}, false
}

Expand All @@ -356,6 +373,79 @@ func verifyFiltersAreValidEqualities(
return rightEqualityCols, true
}

// rightHasSingleFilterThatMatchesLeft returns true if:
//
// 1. Both left and right are Select expressions.
// 2. rightCol is unfiltered in right's input.
// 3. The left Select has a filter in the form leftCol=const.
// 4. The right Select has a single filter in the form rightCol=const where
// the const value is the same as the const value in (2).
//
// This function is used by verifyFiltersAreValidEqualities to try to prove that
// every row in the left input of a join will have a match in the right input
// (see condition #3b in the comment of verifyFiltersAreValidEqualities).
//
// TODO(mgartner): Extend this to return true when the left filters imply the
// right filters, after remapping leftCol to rightCol in the left filters. For
// example, leftCol<10 implies rightCol<20 when leftCol and rightCol are held
// equal by the join filters. This may be a good opportunity to reuse
// partialidx.Implicator. Be aware that it might not be possible to simply
// replace columns in a filter when one of the columns has a composite type.
func rightHasSingleFilterThatMatchesLeft(left, right RelExpr, leftCol, rightCol opt.ColumnID) bool {
leftSelect, ok := left.(*SelectExpr)
if !ok {
return false
}
rightSelect, ok := right.(*SelectExpr)
if !ok {
return false
}

// Return false if the right column has been filtered in the input to
// rightSelect.
rightUnfilteredCols := deriveUnfilteredCols(rightSelect.Input)
if !rightUnfilteredCols.Contains(rightCol) {
return false
}

// Return false if rightSelect has more than one filter.
if len(rightSelect.Filters) > 1 {
return false
}

// constValueForCol searches for an expression in the form
// (Eq (Var col) Const) and returns the Const expression, if one is found.
constValueForCol := func(filters FiltersExpr, col opt.ColumnID) (_ *ConstExpr, ok bool) {
var constant *ConstExpr
for i := range filters {
if !filters[i].ScalarProps().OuterCols.Contains(col) {
continue
}
eq, _ := filters[i].Condition.(*EqExpr)
if eq == nil {
continue
}
v, _ := eq.Left.(*VariableExpr)
c, _ := eq.Right.(*ConstExpr)
if v == nil || v.Col != col || c == nil {
continue
}
constant = c
}
return constant, constant != nil
}

leftConst, ok := constValueForCol(leftSelect.Filters, leftCol)
if !ok {
return false
}
rightConst, ok := constValueForCol(rightSelect.Filters, rightCol)
if !ok {
return false
}
return leftConst == rightConst
}

// checkSelfJoinCase returns true if all equalities in the given FiltersExpr
// are between columns from the same position in the same base table. Panics
// if verifyFilters is not checked first.
Expand Down
136 changes: 134 additions & 2 deletions pkg/sql/opt/memo/multiplicity_builder_test.go
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/opt/testutils/testcat"
"github.com/cockroachdb/cockroach/pkg/sql/parser"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/errors"
)

Expand Down Expand Up @@ -67,6 +68,7 @@ func TestGetJoinMultiplicity(t *testing.T) {

xyScan, xyCols := ob.xyScan()
xyScan2, xyCols2 := ob.xyScan()
xyScanFiltered, xyColsFiltered := ob.makeFilteredScan("xy")
uvScan, uvCols := ob.uvScan()
fkScan, fkCols := ob.fkScan()
abcScan, abcCols := ob.abcScan()
Expand Down Expand Up @@ -300,6 +302,108 @@ func TestGetJoinMultiplicity(t *testing.T) {
on: ob.makeFilters(ob.makeEquality(fkCols[0], xyCols2[0])),
expected: "left-rows(exactly-one), right-rows(zero-or-more)",
},
{ // 21
// SELECT * FROM fk_tab INNER JOIN xy ON r1 = x WHERE r1 = 5 AND x = 5;
joinOp: opt.InnerJoinOp,
left: ob.makeSelect(fkScan, ob.makeFilters(ob.makeConstEquality(fkCols[0], 5))),
right: ob.makeSelect(xyScan, ob.makeFilters(ob.makeConstEquality(xyCols[0], 5))),
on: ob.makeFilters(ob.makeEquality(fkCols[0], xyCols[0])),
expected: "left-rows(exactly-one), right-rows(zero-or-more)",
},
{ // 22
// SELECT * FROM xy INNER JOIN xy AS xy2 ON xy.x = xy2.x WHERE xy.x = 5 AND xy2.x = 5;
joinOp: opt.InnerJoinOp,
left: ob.makeSelect(xyScan, ob.makeFilters(ob.makeConstEquality(xyCols[0], 5))),
right: ob.makeSelect(xyScan2, ob.makeFilters(ob.makeConstEquality(xyCols2[0], 5))),
on: ob.makeFilters(ob.makeEquality(xyCols[0], xyCols2[0])),
expected: "left-rows(exactly-one), right-rows(exactly-one)",
},
{ // 23
// SELECT * FROM fk_tab INNER JOIN xy ON r1 = x WHERE r1 = 5 AND x >= 5;
joinOp: opt.InnerJoinOp,
left: ob.makeSelect(fkScan, ob.makeFilters(ob.makeConstEquality(fkCols[0], 5))),
right: ob.makeSelect(xyScan, ob.makeFilters(ob.makeConstInequality(xyCols[0], 5))),
on: ob.makeFilters(ob.makeEquality(fkCols[0], xyCols[0])),
expected: "left-rows(zero-or-one), right-rows(zero-or-more)",
},
{ // 24
// SELECT * FROM xy INNER JOIN xy AS xy2 ON xy.x = xy2.x WHERE xy.x = 5 AND xy2.x >= 5;
joinOp: opt.InnerJoinOp,
left: ob.makeSelect(xyScan, ob.makeFilters(ob.makeConstEquality(xyCols[0], 5))),
right: ob.makeSelect(xyScan2, ob.makeFilters(ob.makeConstInequality(xyCols2[0], 5))),
on: ob.makeFilters(ob.makeEquality(xyCols[0], xyCols2[0])),
expected: "left-rows(zero-or-one), right-rows(zero-or-one)",
},
{ // 25
// SELECT * FROM fk_tab INNER JOIN xy ON r1 = x WHERE r1 = 5 AND y = 5;
joinOp: opt.InnerJoinOp,
left: ob.makeSelect(fkScan, ob.makeFilters(ob.makeConstEquality(fkCols[0], 5))),
right: ob.makeSelect(xyScan, ob.makeFilters(ob.makeConstEquality(xyCols[1], 5))),
on: ob.makeFilters(ob.makeEquality(fkCols[0], xyCols[0])),
expected: "left-rows(zero-or-one), right-rows(zero-or-more)",
},
{ // 26
// SELECT * FROM xy INNER JOIN xy AS xy2 ON xy.x = xy2.x WHERE xy.x = 5 AND xy2.y = 5;
joinOp: opt.InnerJoinOp,
left: ob.makeSelect(xyScan, ob.makeFilters(ob.makeConstEquality(xyCols[0], 5))),
right: ob.makeSelect(xyScan2, ob.makeFilters(ob.makeConstEquality(xyCols2[1], 5))),
on: ob.makeFilters(ob.makeEquality(xyCols[0], xyCols2[0])),
expected: "left-rows(zero-or-one), right-rows(zero-or-one)",
},
{ // 27
// SELECT * FROM fk_tab INNER JOIN (
// SELECT * FROM xy WHERE x = 5 LIMIT 10
// ) AS xy2 ON r1 = x
// WHERE xy.x = 5;
joinOp: opt.InnerJoinOp,
left: ob.makeSelect(fkScan, ob.makeFilters(ob.makeConstEquality(fkCols[0], 5))),
right: ob.makeSelect(xyScanFiltered, ob.makeFilters(
ob.makeConstEquality(xyColsFiltered[0], 5),
)),
on: ob.makeFilters(ob.makeEquality(fkCols[0], xyColsFiltered[0])),
expected: "left-rows(zero-or-one), right-rows(zero-or-more)",
},
{ // 28
// SELECT * FROM xy INNER JOIN (
// SELECT * FROM xy WHERE x = 5 LIMIT 10
// ) AS xy2 ON xy.x = xy2.x
// WHERE xy.x = 5;
joinOp: opt.InnerJoinOp,
left: ob.makeSelect(xyScan, ob.makeFilters(ob.makeConstEquality(xyCols[0], 5))),
right: ob.makeSelect(xyScanFiltered, ob.makeFilters(
ob.makeConstEquality(xyColsFiltered[0], 5),
)),
on: ob.makeFilters(ob.makeEquality(xyCols[0], xyColsFiltered[0])),
expected: "left-rows(zero-or-one), right-rows(exactly-one)",
},
{ // 29
// SELECT * FROM fk_tab INNER JOIN (
// SELECT * FROM xy WHERE x = 5 AND y = 2
// ) AS xy2 ON r1 = x
// WHERE xy.x = 5;
joinOp: opt.InnerJoinOp,
left: ob.makeSelect(fkScan, ob.makeFilters(ob.makeConstEquality(fkCols[0], 5))),
right: ob.makeSelect(xyScan, ob.makeFilters(
ob.makeConstEquality(xyCols[0], 5),
ob.makeConstEquality(xyCols[1], 2),
)),
on: ob.makeFilters(ob.makeEquality(fkCols[0], xyCols[0])),
expected: "left-rows(zero-or-one), right-rows(zero-or-more)",
},
{ // 30
// SELECT * FROM xy INNER JOIN (
// SELECT * FROM xy WHERE x = 5 AND y = 2
// ) AS xy2 ON xy.x = xy2.x
// WHERE xy.x = 5;
joinOp: opt.InnerJoinOp,
left: ob.makeSelect(xyScan, ob.makeFilters(ob.makeConstEquality(xyCols[0], 5))),
right: ob.makeSelect(xyScan2, ob.makeFilters(
ob.makeConstEquality(xyCols2[0], 5),
ob.makeConstEquality(xyCols2[1], 2),
)),
on: ob.makeFilters(ob.makeEquality(xyCols[0], xyCols2[0])),
expected: "left-rows(zero-or-one), right-rows(exactly-one)",
},
}

for i, tc := range testCases {
Expand All @@ -308,7 +412,7 @@ func TestGetJoinMultiplicity(t *testing.T) {
joinWithMult, _ := join.(joinWithMultiplicity)
multiplicity := joinWithMult.getMultiplicity()
if multiplicity.Format(tc.joinOp) != tc.expected {
t.Fatalf("\nexpected: %s\nactual: %s", tc.expected, multiplicity.Format(tc.joinOp))
t.Errorf("\nexpected: %s\nactual: %s", tc.expected, multiplicity.Format(tc.joinOp))
}
})
}
Expand Down Expand Up @@ -355,6 +459,18 @@ func (ob *testOpBuilder) createTables(stmts string) {
}

func (ob *testOpBuilder) makeScan(tableName tree.Name) (scan RelExpr, vars []*VariableExpr) {
return ob.makeScanImpl(tableName, false /* filtered */)
}

func (ob *testOpBuilder) makeFilteredScan(
tableName tree.Name,
) (scan RelExpr, vars []*VariableExpr) {
return ob.makeScanImpl(tableName, true /* filtered */)
}

func (ob *testOpBuilder) makeScanImpl(
tableName tree.Name, filtered bool,
) (scan RelExpr, vars []*VariableExpr) {
tn := tree.NewUnqualifiedTableName(tableName)
tab := ob.cat.Table(tn)
tabID := ob.mem.Metadata().AddTable(tab, tn)
Expand All @@ -365,7 +481,11 @@ func (ob *testOpBuilder) makeScan(tableName tree.Name) (scan RelExpr, vars []*Va
newVar := ob.mem.MemoizeVariable(col)
vars = append(vars, newVar)
}
return ob.mem.MemoizeScan(&ScanPrivate{Table: tabID, Cols: cols}), vars
sp := &ScanPrivate{Table: tabID, Cols: cols}
if filtered {
sp.HardLimit = 10
}
return ob.mem.MemoizeScan(sp), vars
}

func (ob *testOpBuilder) xyScan() (scan RelExpr, vars []*VariableExpr) {
Expand All @@ -392,6 +512,10 @@ func (ob *testOpBuilder) oneNullMultiColFKScan() (scan RelExpr, vars []*Variable
return ob.makeScan("one_null_multi_col_fk_tab")
}

func (ob *testOpBuilder) makeSelect(input RelExpr, filters FiltersExpr) RelExpr {
return ob.mem.MemoizeSelect(input, filters)
}

func (ob *testOpBuilder) makeInnerJoin(left, right RelExpr, on FiltersExpr) RelExpr {
return ob.mem.MemoizeInnerJoin(left, right, on, EmptyJoinPrivate)
}
Expand Down Expand Up @@ -433,6 +557,14 @@ func (ob *testOpBuilder) makeEquality(left, right *VariableExpr) opt.ScalarExpr
return ob.mem.MemoizeEq(left, right)
}

func (ob *testOpBuilder) makeConstEquality(v *VariableExpr, c int) opt.ScalarExpr {
return ob.mem.MemoizeEq(v, ob.mem.MemoizeConst(tree.NewDInt(tree.DInt(c)), types.Int))
}

func (ob *testOpBuilder) makeConstInequality(v *VariableExpr, c int) opt.ScalarExpr {
return ob.mem.MemoizeGe(v, ob.mem.MemoizeConst(tree.NewDInt(tree.DInt(c)), types.Int))
}

func (ob *testOpBuilder) makeFilters(conditions ...opt.ScalarExpr) (filters FiltersExpr) {
for i := range conditions {
filtersItem := FiltersItem{Condition: conditions[i]}
Expand Down

0 comments on commit 40390d6

Please sign in to comment.