-
-
Notifications
You must be signed in to change notification settings - Fork 191
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #299 from dolthub/aaron/indexed-in-subquery-filter
sql/{plan,analyzer}: Add IndexedInSubqueryFilter for selecting the subquery results first and then making indexed lookups into the child.
- Loading branch information
Showing
6 changed files
with
446 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
// Copyright 2021 Dolthub, Inc. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package analyzer | ||
|
||
import ( | ||
"github.com/dolthub/go-mysql-server/sql" | ||
"github.com/dolthub/go-mysql-server/sql/expression" | ||
"github.com/dolthub/go-mysql-server/sql/plan" | ||
) | ||
|
||
// applyIndexesForSubqueryComparisons converts a `Filter(id = (SELECT ...), | ||
// Child)` or a `Filter(id in (SELECT ...), Child)` to be iterated lookups on | ||
// the Child instead. This analysis phase is currently very concrete. It only | ||
// applies when: | ||
// 1. There is a single `=` or `IN` expression in the Filter. | ||
// 2. The Subquery is on the right hand side of the expression. | ||
// 3. The left hand side is a GetField expression against the Child. | ||
// 4. The Child is a *plan.ResolvedTable. | ||
// 5. The referenced field in the Child is indexed. | ||
func applyIndexesForSubqueryComparisons(ctx *sql.Context, a *Analyzer, n sql.Node, scope *Scope) (sql.Node, error) { | ||
aliases, err := getTableAliases(n, scope) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
return plan.TransformUp(n, func(node sql.Node) (sql.Node, error) { | ||
switch node := node.(type) { | ||
case *plan.Filter: | ||
var replacement sql.Node | ||
if eq, isEqual := node.Expression.(*expression.Equals); isEqual { | ||
replacement = getIndexedInSubqueryFilter(ctx, a, eq.Left(), eq.Right(), node, true, scope, aliases) | ||
} else if is, isInSubquery := node.Expression.(*plan.InSubquery); isInSubquery { | ||
replacement = getIndexedInSubqueryFilter(ctx, a, is.Left, is.Right, node, false, scope, aliases) | ||
} | ||
if replacement != nil { | ||
return replacement, nil | ||
} | ||
} | ||
return node, nil | ||
}) | ||
} | ||
|
||
func getIndexedInSubqueryFilter(ctx *sql.Context, a *Analyzer, left, right sql.Expression, node *plan.Filter, equals bool, scope *Scope, tableAliases TableAliases) sql.Node { | ||
gf, isGetField := left.(*expression.GetField) | ||
subq, isSubquery := right.(*plan.Subquery) | ||
rt, isResolved := node.Child.(*plan.ResolvedTable) | ||
if !isGetField || !isSubquery || !isResolved { | ||
return nil | ||
} | ||
referencesChildRow := nodeHasGetFieldReferenceBetween(subq.Query, len(scope.Schema()), len(scope.Schema())+len(node.Child.Schema())) | ||
if referencesChildRow { | ||
return nil | ||
} | ||
indexes, err := getIndexesForNode(ctx, a, rt) | ||
if err != nil { | ||
return nil | ||
} | ||
defer indexes.releaseUsedIndexes() | ||
idx := indexes.IndexByExpression(ctx, ctx.GetCurrentDatabase(), normalizeExpressions(tableAliases, gf)...) | ||
if idx == nil { | ||
return nil | ||
} | ||
keyExpr := gf.WithIndex(0) | ||
ita := plan.NewIndexedTableAccess(rt, idx, []sql.Expression{keyExpr}) | ||
return plan.NewIndexedInSubqueryFilter(subq, ita, len(node.Child.Schema()), gf, equals) | ||
} | ||
|
||
// nodeHasGetFieldReferenceBetween returns `true` if the given sql.Node has a | ||
// GetField expression anywhere within the tree that references an index in the | ||
// range [low, high). | ||
func nodeHasGetFieldReferenceBetween(n sql.Node, low, high int) bool { | ||
var found bool | ||
plan.Inspect(n, func(n sql.Node) bool { | ||
if er, ok := n.(sql.Expressioner); ok { | ||
for _, e := range er.Expressions() { | ||
if expressionHasGetFieldReferenceBetween(e, low, high) { | ||
found = true | ||
return false | ||
} | ||
} | ||
} | ||
// TODO: Descend into *plan.Subquery? | ||
_, ok := n.(*plan.IndexedInSubqueryFilter) | ||
return !ok | ||
}) | ||
return found | ||
} | ||
|
||
// expressionHasGetFieldReferenceBetween returns `true` if the given sql.Expression | ||
// has a GetField expression within it that references an index in the range | ||
// [low, high). | ||
func expressionHasGetFieldReferenceBetween(e sql.Expression, low, high int) bool { | ||
var found bool | ||
sql.Inspect(e, func(e sql.Expression) bool { | ||
if gf, ok := e.(*expression.GetField); ok { | ||
if gf.Index() >= low && gf.Index() < high { | ||
found = true | ||
return false | ||
} | ||
} | ||
return true | ||
}) | ||
return found | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
// Copyright 2021 Dolthub, Inc. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package plan | ||
|
||
import ( | ||
"io" | ||
|
||
"github.com/dolthub/go-mysql-server/sql" | ||
"github.com/dolthub/go-mysql-server/sql/expression" | ||
) | ||
|
||
// NewIndexedInSubqueryFilter returns an IndexedInSubqueryFilter | ||
// sql.Node. The Node implements the semantics of `Filter(field IN | ||
// (SELECT ...), Child)`, but runs the subquery `SELECT` first, and | ||
// the makes repeated calls to `child.RowIter()` to get the matching | ||
// Child rows. Typically `child` should be an `IndexedTableAccess`, | ||
// and for this to be the right tradeoff, the results from `subquery` | ||
// should be much smaller than the unfiltered results from `child`. | ||
// `padding` is the number of `null` columns which will be appended to | ||
// an incoming `Row` in the `RowIter` call because calling `Eval` on | ||
// the `Subquery`, since the `Subquery` was originally expecting to | ||
// expect within the scope of something like `child`. `child` itself | ||
// should expect `RowIter()` calls with a single column `Row`, which | ||
// will be the results from the `Subquery`. `filterField` is a | ||
// `GetField` expression which will extract the field from `child` | ||
// results that should be matched against the `subquery` results; this | ||
// condition is still checked here because `child` is allowed to | ||
// return non-matching rows. `equals` true means this node will call | ||
// `subquery.Eval` and expect a single result, whereas `equals` false | ||
// means this node will call `subquery.EvalMultiple` and expect 0 or | ||
// more results. | ||
func NewIndexedInSubqueryFilter(subquery *Subquery, child sql.Node, padding int, filterField *expression.GetField, equals bool) sql.Node { | ||
return &IndexedInSubqueryFilter{subquery, child, padding, filterField, equals} | ||
} | ||
|
||
type IndexedInSubqueryFilter struct { | ||
subquery *Subquery | ||
child sql.Node | ||
padding int | ||
getField *expression.GetField | ||
equals bool | ||
} | ||
|
||
func (i *IndexedInSubqueryFilter) Resolved() bool { | ||
return i.subquery.Resolved() && i.child.Resolved() | ||
} | ||
|
||
func (i *IndexedInSubqueryFilter) String() string { | ||
pr := sql.NewTreePrinter() | ||
_ = pr.WriteNode("IndexedInSubqueryFilter(%s IN (%s))", i.getField, i.subquery) | ||
_ = pr.WriteChildren(i.child.String()) | ||
return pr.String() | ||
} | ||
|
||
func (i *IndexedInSubqueryFilter) DebugString() string { | ||
pr := sql.NewTreePrinter() | ||
_ = pr.WriteNode("IndexedInSubqueryFilter(%s IN (%s))", sql.DebugString(i.getField), sql.DebugString(i.subquery)) | ||
_ = pr.WriteChildren(sql.DebugString(i.child)) | ||
return pr.String() | ||
} | ||
|
||
func (i *IndexedInSubqueryFilter) Schema() sql.Schema { | ||
return i.child.Schema() | ||
} | ||
|
||
func (i *IndexedInSubqueryFilter) Children() []sql.Node { | ||
return []sql.Node{i.child} | ||
} | ||
|
||
func (i *IndexedInSubqueryFilter) WithChildren(children ...sql.Node) (sql.Node, error) { | ||
if len(children) != 1 { | ||
return nil, sql.ErrInvalidChildrenNumber.New(i, len(children), 1) | ||
} | ||
return NewIndexedInSubqueryFilter(i.subquery, children[0], i.padding, i.getField, i.equals), nil | ||
} | ||
|
||
func (i *IndexedInSubqueryFilter) RowIter(ctx *sql.Context, row sql.Row) (sql.RowIter, error) { | ||
padded := make(sql.Row, len(row)+i.padding) | ||
copy(padded[:], row[:]) | ||
var res []interface{} | ||
var err error | ||
if i.equals { | ||
resi, err := i.subquery.Eval(ctx, padded) | ||
if err != nil { | ||
return nil, err | ||
} | ||
res = append(res, resi) | ||
} else { | ||
res, err = i.subquery.EvalMultiple(ctx, padded) | ||
if err != nil { | ||
return nil, err | ||
} | ||
} | ||
tupLits := make([]sql.Expression, len(res)) | ||
for j := range res { | ||
tupLits[j] = expression.NewLiteral(res[j], i.subquery.Type()) | ||
} | ||
expr := expression.NewInTuple(i.getField, expression.NewTuple(tupLits...)) | ||
return NewFilterIter(ctx, expr, &indexedInSubqueryIter{ctx, res, i.child, nil, 0}), nil | ||
} | ||
|
||
type indexedInSubqueryIter struct { | ||
ctx *sql.Context | ||
rows []interface{} | ||
child sql.Node | ||
cur sql.RowIter | ||
i int | ||
} | ||
|
||
func (iter *indexedInSubqueryIter) Next() (sql.Row, error) { | ||
var ret sql.Row | ||
err := io.EOF | ||
for err == io.EOF { | ||
if iter.cur == nil { | ||
if iter.i >= len(iter.rows) { | ||
return nil, io.EOF | ||
} | ||
childIter, err := iter.child.RowIter(iter.ctx, sql.NewRow(iter.rows[iter.i])) | ||
if err != nil { | ||
return nil, err | ||
} | ||
iter.i += 1 | ||
iter.cur = childIter | ||
} | ||
ret, err = iter.cur.Next() | ||
if err == io.EOF { | ||
cerr := iter.cur.Close() | ||
iter.cur = nil | ||
if cerr != nil { | ||
return nil, cerr | ||
} | ||
} | ||
} | ||
return ret, err | ||
} | ||
|
||
func (iter *indexedInSubqueryIter) Close() error { | ||
if iter.cur != nil { | ||
err := iter.cur.Close() | ||
iter.cur = nil | ||
return err | ||
} | ||
return nil | ||
} |
Oops, something went wrong.