Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sql/{plan,analyzer}: Add IndexedInSubqueryFilter for selecting the subquery results first and then making indexed lookups into the child. #299

Merged
merged 2 commits into from
Feb 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
20 changes: 20 additions & 0 deletions enginetest/queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,26 @@ var QueryTests = []QueryTest{
{"first row"},
},
},
{
Query: "SELECT mytable.s FROM mytable WHERE mytable.i IN (SELECT othertable.i2 FROM othertable) ORDER BY mytable.i ASC",
Expected: []sql.Row{
{"first row"},
{"second row"},
{"third row"},
},
},
{
Query: "SELECT mytable.s FROM mytable WHERE mytable.i = (SELECT othertable.i2 FROM othertable WHERE othertable.s2 = 'second')",
Expected: []sql.Row{
{"second row"},
},
},
{
Query: "SELECT mytable.s FROM mytable WHERE mytable.i IN (SELECT othertable.i2 FROM othertable WHERE CONCAT(othertable.s2, ' row') = mytable.s)",
Expected: []sql.Row{
{"second row"},
},
},
{
Query: "SELECT s,i FROM MyTable ORDER BY 2",
Expected: []sql.Row{
Expand Down
38 changes: 28 additions & 10 deletions enginetest/query_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,14 @@ var PlanTests = []QueryPlanTest{
" └─ IndexedTableAccess(mytable on [mytable.i])\n" +
"",
},
{
Query: "SELECT s2, i2, i FROM othertable JOIN mytable ON i = i2",
ExpectedPlan: "Project(othertable.s2, othertable.i2, mytable.i)\n" +
" └─ IndexedJoin(mytable.i = othertable.i2)\n" +
" ├─ Table(othertable)\n" +
" └─ IndexedTableAccess(mytable on [mytable.i])\n" +
"",
},
{
Query: "SELECT s2, i2, i FROM othertable JOIN mytable ON i = i2 LIMIT 1",
ExpectedPlan: "Limit(1)\n" +
Expand All @@ -196,19 +204,29 @@ var PlanTests = []QueryPlanTest{
"",
},
{
Query: "SELECT i, i2, s2 FROM othertable JOIN mytable ON i2 = i",
ExpectedPlan: "Project(mytable.i, othertable.i2, othertable.s2)\n" +
" └─ IndexedJoin(othertable.i2 = mytable.i)\n" +
" ├─ Table(othertable)\n" +
" └─ IndexedTableAccess(mytable on [mytable.i])\n" +
Query: "SELECT mytable.i, mytable.s FROM mytable WHERE mytable.i = (SELECT i2 FROM othertable LIMIT 1)",
ExpectedPlan: "IndexedInSubqueryFilter(mytable.i IN ((Limit(1)\n" +
" └─ Project(othertable.i2)\n" +
" └─ Table(othertable)\n" +
")))\n" +
" └─ IndexedTableAccess(mytable on [mytable.i])\n" +
"",
},
{
Query: "SELECT s2, i2, i FROM othertable JOIN mytable ON i2 = i",
ExpectedPlan: "Project(othertable.s2, othertable.i2, mytable.i)\n" +
" └─ IndexedJoin(othertable.i2 = mytable.i)\n" +
" ├─ Table(othertable)\n" +
" └─ IndexedTableAccess(mytable on [mytable.i])\n" +
Query: "SELECT mytable.i, mytable.s FROM mytable WHERE mytable.i IN (SELECT i2 FROM othertable)",
ExpectedPlan: "IndexedInSubqueryFilter(mytable.i IN ((Project(othertable.i2)\n" +
" └─ Table(othertable)\n" +
")))\n" +
" └─ IndexedTableAccess(mytable on [mytable.i])\n" +
"",
},
{
Query: "SELECT mytable.i, mytable.s FROM mytable WHERE mytable.i IN (SELECT i2 FROM othertable WHERE mytable.i = othertable.i2)",
ExpectedPlan: "Filter(mytable.i IN (Project(othertable.i2)\n" +
" └─ Filter(mytable.i = othertable.i2)\n" +
" └─ IndexedTableAccess(othertable on [othertable.i2])\n" +
"))\n" +
" └─ Table(mytable)\n" +
"",
},
{
Expand Down
116 changes: 116 additions & 0 deletions sql/analyzer/apply_indexes_for_subquery_comparisons.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
// Copyright 2021 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package analyzer

import (
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/expression"
"github.com/dolthub/go-mysql-server/sql/plan"
)

// applyIndexesForSubqueryComparisons converts a `Filter(id = (SELECT ...),
// Child)` or a `Filter(id in (SELECT ...), Child)` to be iterated lookups on
// the Child instead. This analysis phase is currently very concrete. It only
// applies when:
// 1. There is a single `=` or `IN` expression in the Filter.
// 2. The Subquery is on the right hand side of the expression.
// 3. The left hand side is a GetField expression against the Child.
// 4. The Child is a *plan.ResolvedTable.
// 5. The referenced field in the Child is indexed.
func applyIndexesForSubqueryComparisons(ctx *sql.Context, a *Analyzer, n sql.Node, scope *Scope) (sql.Node, error) {
aliases, err := getTableAliases(n, scope)
if err != nil {
return nil, err
}

return plan.TransformUp(n, func(node sql.Node) (sql.Node, error) {
switch node := node.(type) {
case *plan.Filter:
var replacement sql.Node
if eq, isEqual := node.Expression.(*expression.Equals); isEqual {
replacement = getIndexedInSubqueryFilter(ctx, a, eq.Left(), eq.Right(), node, true, scope, aliases)
} else if is, isInSubquery := node.Expression.(*plan.InSubquery); isInSubquery {
replacement = getIndexedInSubqueryFilter(ctx, a, is.Left, is.Right, node, false, scope, aliases)
}
if replacement != nil {
return replacement, nil
}
}
return node, nil
})
}

func getIndexedInSubqueryFilter(ctx *sql.Context, a *Analyzer, left, right sql.Expression, node *plan.Filter, equals bool, scope *Scope, tableAliases TableAliases) sql.Node {
gf, isGetField := left.(*expression.GetField)
subq, isSubquery := right.(*plan.Subquery)
rt, isResolved := node.Child.(*plan.ResolvedTable)
if !isGetField || !isSubquery || !isResolved {
return nil
}
referencesChildRow := nodeHasGetFieldReferenceBetween(subq.Query, len(scope.Schema()), len(scope.Schema())+len(node.Child.Schema()))
if referencesChildRow {
return nil
}
indexes, err := getIndexesForNode(ctx, a, rt)
if err != nil {
return nil
}
defer indexes.releaseUsedIndexes()
idx := indexes.IndexByExpression(ctx, ctx.GetCurrentDatabase(), normalizeExpressions(tableAliases, gf)...)
if idx == nil {
return nil
}
keyExpr := gf.WithIndex(0)
ita := plan.NewIndexedTableAccess(rt, idx, []sql.Expression{keyExpr})
return plan.NewIndexedInSubqueryFilter(subq, ita, len(node.Child.Schema()), gf, equals)
}

// nodeHasGetFieldReferenceBetween returns `true` if the given sql.Node has a
// GetField expression anywhere within the tree that references an index in the
// range [low, high).
func nodeHasGetFieldReferenceBetween(n sql.Node, low, high int) bool {
var found bool
plan.Inspect(n, func(n sql.Node) bool {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can simplifiy this with plan.InspectExpressions (no need to do plan.Inspect and then sql.Inspect separately)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think InspectExpressions or InspectExpressionWithNode lets me do the shenanigans with excluding the walk into IndexedInSubqueryFilter nodes.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh that's true

if er, ok := n.(sql.Expressioner); ok {
for _, e := range er.Expressions() {
if expressionHasGetFieldReferenceBetween(e, low, high) {
found = true
return false
}
}
}
// TODO: Descend into *plan.Subquery?
_, ok := n.(*plan.IndexedInSubqueryFilter)
return !ok
})
return found
}

// expressionHasGetFieldReferenceBetween returns `true` if the given sql.Expression
// has a GetField expression within it that references an index in the range
// [low, high).
func expressionHasGetFieldReferenceBetween(e sql.Expression, low, high int) bool {
var found bool
sql.Inspect(e, func(e sql.Expression) bool {
if gf, ok := e.(*expression.GetField); ok {
if gf.Index() >= low && gf.Index() < high {
found = true
return false
}
}
return true
})
return found
}
1 change: 1 addition & 0 deletions sql/analyzer/rules.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ var OnceAfterDefault = []Rule{
{"optimize_joins", constructJoinPlan},
{"pushdown_filters", pushdownFilters},
{"subquery_indexes", applyIndexesFromOuterScope},
{"in_subquery_indexes", applyIndexesForSubqueryComparisons},
{"pushdown_projections", pushdownProjections},
{"erase_projection", eraseProjection},
// One final pass at analyzing subqueries to handle rewriting field indexes after changes to outer scope by
Expand Down
156 changes: 156 additions & 0 deletions sql/plan/indexed_in_subquery_filter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
// Copyright 2021 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package plan

import (
"io"

"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/expression"
)

// NewIndexedInSubqueryFilter returns an IndexedInSubqueryFilter
// sql.Node. The Node implements the semantics of `Filter(field IN
// (SELECT ...), Child)`, but runs the subquery `SELECT` first, and
// the makes repeated calls to `child.RowIter()` to get the matching
// Child rows. Typically `child` should be an `IndexedTableAccess`,
// and for this to be the right tradeoff, the results from `subquery`
// should be much smaller than the unfiltered results from `child`.
// `padding` is the number of `null` columns which will be appended to
// an incoming `Row` in the `RowIter` call because calling `Eval` on
// the `Subquery`, since the `Subquery` was originally expecting to
// expect within the scope of something like `child`. `child` itself
// should expect `RowIter()` calls with a single column `Row`, which
// will be the results from the `Subquery`. `filterField` is a
// `GetField` expression which will extract the field from `child`
// results that should be matched against the `subquery` results; this
// condition is still checked here because `child` is allowed to
// return non-matching rows. `equals` true means this node will call
// `subquery.Eval` and expect a single result, whereas `equals` false
// means this node will call `subquery.EvalMultiple` and expect 0 or
// more results.
func NewIndexedInSubqueryFilter(subquery *Subquery, child sql.Node, padding int, filterField *expression.GetField, equals bool) sql.Node {
return &IndexedInSubqueryFilter{subquery, child, padding, filterField, equals}
}

type IndexedInSubqueryFilter struct {
subquery *Subquery
child sql.Node
padding int
getField *expression.GetField
equals bool
}

func (i *IndexedInSubqueryFilter) Resolved() bool {
return i.subquery.Resolved() && i.child.Resolved()
}

func (i *IndexedInSubqueryFilter) String() string {
pr := sql.NewTreePrinter()
_ = pr.WriteNode("IndexedInSubqueryFilter(%s IN (%s))", i.getField, i.subquery)
_ = pr.WriteChildren(i.child.String())
return pr.String()
}

func (i *IndexedInSubqueryFilter) DebugString() string {
pr := sql.NewTreePrinter()
_ = pr.WriteNode("IndexedInSubqueryFilter(%s IN (%s))", sql.DebugString(i.getField), sql.DebugString(i.subquery))
_ = pr.WriteChildren(sql.DebugString(i.child))
return pr.String()
}

func (i *IndexedInSubqueryFilter) Schema() sql.Schema {
return i.child.Schema()
}

func (i *IndexedInSubqueryFilter) Children() []sql.Node {
return []sql.Node{i.child}
}

func (i *IndexedInSubqueryFilter) WithChildren(children ...sql.Node) (sql.Node, error) {
if len(children) != 1 {
return nil, sql.ErrInvalidChildrenNumber.New(i, len(children), 1)
}
return NewIndexedInSubqueryFilter(i.subquery, children[0], i.padding, i.getField, i.equals), nil
}

func (i *IndexedInSubqueryFilter) RowIter(ctx *sql.Context, row sql.Row) (sql.RowIter, error) {
padded := make(sql.Row, len(row)+i.padding)
copy(padded[:], row[:])
var res []interface{}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sql.Row?

var err error
if i.equals {
resi, err := i.subquery.Eval(ctx, padded)
if err != nil {
return nil, err
}
res = append(res, resi)
} else {
res, err = i.subquery.EvalMultiple(ctx, padded)
if err != nil {
return nil, err
}
}
tupLits := make([]sql.Expression, len(res))
for j := range res {
tupLits[j] = expression.NewLiteral(res[j], i.subquery.Type())
}
expr := expression.NewInTuple(i.getField, expression.NewTuple(tupLits...))
return NewFilterIter(ctx, expr, &indexedInSubqueryIter{ctx, res, i.child, nil, 0}), nil
}

type indexedInSubqueryIter struct {
ctx *sql.Context
rows []interface{}
child sql.Node
cur sql.RowIter
i int
}

func (iter *indexedInSubqueryIter) Next() (sql.Row, error) {
var ret sql.Row
err := io.EOF
for err == io.EOF {
if iter.cur == nil {
if iter.i >= len(iter.rows) {
return nil, io.EOF
}
childIter, err := iter.child.RowIter(iter.ctx, sql.NewRow(iter.rows[iter.i]))
if err != nil {
return nil, err
}
iter.i += 1
iter.cur = childIter
}
ret, err = iter.cur.Next()
if err == io.EOF {
cerr := iter.cur.Close()
iter.cur = nil
if cerr != nil {
return nil, cerr
}
}
}
return ret, err
}

func (iter *indexedInSubqueryIter) Close() error {
if iter.cur != nil {
err := iter.cur.Close()
iter.cur = nil
return err
}
return nil
}