Skip to content

Commit

Permalink
Merge pull request #299 from dolthub/aaron/indexed-in-subquery-filter
Browse files Browse the repository at this point in the history
sql/{plan,analyzer}: Add IndexedInSubqueryFilter for selecting the subquery results first and then making indexed lookups into the child.
  • Loading branch information
reltuk committed Feb 15, 2021
2 parents 1acb0aa + 5240aea commit 620d95d
Show file tree
Hide file tree
Showing 6 changed files with 446 additions and 10 deletions.
20 changes: 20 additions & 0 deletions enginetest/queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,26 @@ var QueryTests = []QueryTest{
{"first row"},
},
},
{
Query: "SELECT mytable.s FROM mytable WHERE mytable.i IN (SELECT othertable.i2 FROM othertable) ORDER BY mytable.i ASC",
Expected: []sql.Row{
{"first row"},
{"second row"},
{"third row"},
},
},
{
Query: "SELECT mytable.s FROM mytable WHERE mytable.i = (SELECT othertable.i2 FROM othertable WHERE othertable.s2 = 'second')",
Expected: []sql.Row{
{"second row"},
},
},
{
Query: "SELECT mytable.s FROM mytable WHERE mytable.i IN (SELECT othertable.i2 FROM othertable WHERE CONCAT(othertable.s2, ' row') = mytable.s)",
Expected: []sql.Row{
{"second row"},
},
},
{
Query: "SELECT s,i FROM MyTable ORDER BY 2",
Expected: []sql.Row{
Expand Down
38 changes: 28 additions & 10 deletions enginetest/query_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,14 @@ var PlanTests = []QueryPlanTest{
" └─ IndexedTableAccess(mytable on [mytable.i])\n" +
"",
},
{
Query: "SELECT s2, i2, i FROM othertable JOIN mytable ON i = i2",
ExpectedPlan: "Project(othertable.s2, othertable.i2, mytable.i)\n" +
" └─ IndexedJoin(mytable.i = othertable.i2)\n" +
" ├─ Table(othertable)\n" +
" └─ IndexedTableAccess(mytable on [mytable.i])\n" +
"",
},
{
Query: "SELECT s2, i2, i FROM othertable JOIN mytable ON i = i2 LIMIT 1",
ExpectedPlan: "Limit(1)\n" +
Expand All @@ -196,19 +204,29 @@ var PlanTests = []QueryPlanTest{
"",
},
{
Query: "SELECT i, i2, s2 FROM othertable JOIN mytable ON i2 = i",
ExpectedPlan: "Project(mytable.i, othertable.i2, othertable.s2)\n" +
" └─ IndexedJoin(othertable.i2 = mytable.i)\n" +
" ├─ Table(othertable)\n" +
" └─ IndexedTableAccess(mytable on [mytable.i])\n" +
Query: "SELECT mytable.i, mytable.s FROM mytable WHERE mytable.i = (SELECT i2 FROM othertable LIMIT 1)",
ExpectedPlan: "IndexedInSubqueryFilter(mytable.i IN ((Limit(1)\n" +
" └─ Project(othertable.i2)\n" +
" └─ Table(othertable)\n" +
")))\n" +
" └─ IndexedTableAccess(mytable on [mytable.i])\n" +
"",
},
{
Query: "SELECT s2, i2, i FROM othertable JOIN mytable ON i2 = i",
ExpectedPlan: "Project(othertable.s2, othertable.i2, mytable.i)\n" +
" └─ IndexedJoin(othertable.i2 = mytable.i)\n" +
" ├─ Table(othertable)\n" +
" └─ IndexedTableAccess(mytable on [mytable.i])\n" +
Query: "SELECT mytable.i, mytable.s FROM mytable WHERE mytable.i IN (SELECT i2 FROM othertable)",
ExpectedPlan: "IndexedInSubqueryFilter(mytable.i IN ((Project(othertable.i2)\n" +
" └─ Table(othertable)\n" +
")))\n" +
" └─ IndexedTableAccess(mytable on [mytable.i])\n" +
"",
},
{
Query: "SELECT mytable.i, mytable.s FROM mytable WHERE mytable.i IN (SELECT i2 FROM othertable WHERE mytable.i = othertable.i2)",
ExpectedPlan: "Filter(mytable.i IN (Project(othertable.i2)\n" +
" └─ Filter(mytable.i = othertable.i2)\n" +
" └─ IndexedTableAccess(othertable on [othertable.i2])\n" +
"))\n" +
" └─ Table(mytable)\n" +
"",
},
{
Expand Down
116 changes: 116 additions & 0 deletions sql/analyzer/apply_indexes_for_subquery_comparisons.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
// Copyright 2021 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package analyzer

import (
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/expression"
"github.com/dolthub/go-mysql-server/sql/plan"
)

// applyIndexesForSubqueryComparisons converts a `Filter(id = (SELECT ...),
// Child)` or a `Filter(id in (SELECT ...), Child)` to be iterated lookups on
// the Child instead. This analysis phase is currently very concrete. It only
// applies when:
// 1. There is a single `=` or `IN` expression in the Filter.
// 2. The Subquery is on the right hand side of the expression.
// 3. The left hand side is a GetField expression against the Child.
// 4. The Child is a *plan.ResolvedTable.
// 5. The referenced field in the Child is indexed.
func applyIndexesForSubqueryComparisons(ctx *sql.Context, a *Analyzer, n sql.Node, scope *Scope) (sql.Node, error) {
aliases, err := getTableAliases(n, scope)
if err != nil {
return nil, err
}

return plan.TransformUp(n, func(node sql.Node) (sql.Node, error) {
switch node := node.(type) {
case *plan.Filter:
var replacement sql.Node
if eq, isEqual := node.Expression.(*expression.Equals); isEqual {
replacement = getIndexedInSubqueryFilter(ctx, a, eq.Left(), eq.Right(), node, true, scope, aliases)
} else if is, isInSubquery := node.Expression.(*plan.InSubquery); isInSubquery {
replacement = getIndexedInSubqueryFilter(ctx, a, is.Left, is.Right, node, false, scope, aliases)
}
if replacement != nil {
return replacement, nil
}
}
return node, nil
})
}

func getIndexedInSubqueryFilter(ctx *sql.Context, a *Analyzer, left, right sql.Expression, node *plan.Filter, equals bool, scope *Scope, tableAliases TableAliases) sql.Node {
gf, isGetField := left.(*expression.GetField)
subq, isSubquery := right.(*plan.Subquery)
rt, isResolved := node.Child.(*plan.ResolvedTable)
if !isGetField || !isSubquery || !isResolved {
return nil
}
referencesChildRow := nodeHasGetFieldReferenceBetween(subq.Query, len(scope.Schema()), len(scope.Schema())+len(node.Child.Schema()))
if referencesChildRow {
return nil
}
indexes, err := getIndexesForNode(ctx, a, rt)
if err != nil {
return nil
}
defer indexes.releaseUsedIndexes()
idx := indexes.IndexByExpression(ctx, ctx.GetCurrentDatabase(), normalizeExpressions(tableAliases, gf)...)
if idx == nil {
return nil
}
keyExpr := gf.WithIndex(0)
ita := plan.NewIndexedTableAccess(rt, idx, []sql.Expression{keyExpr})
return plan.NewIndexedInSubqueryFilter(subq, ita, len(node.Child.Schema()), gf, equals)
}

// nodeHasGetFieldReferenceBetween returns `true` if the given sql.Node has a
// GetField expression anywhere within the tree that references an index in the
// range [low, high).
func nodeHasGetFieldReferenceBetween(n sql.Node, low, high int) bool {
var found bool
plan.Inspect(n, func(n sql.Node) bool {
if er, ok := n.(sql.Expressioner); ok {
for _, e := range er.Expressions() {
if expressionHasGetFieldReferenceBetween(e, low, high) {
found = true
return false
}
}
}
// TODO: Descend into *plan.Subquery?
_, ok := n.(*plan.IndexedInSubqueryFilter)
return !ok
})
return found
}

// expressionHasGetFieldReferenceBetween returns `true` if the given sql.Expression
// has a GetField expression within it that references an index in the range
// [low, high).
func expressionHasGetFieldReferenceBetween(e sql.Expression, low, high int) bool {
var found bool
sql.Inspect(e, func(e sql.Expression) bool {
if gf, ok := e.(*expression.GetField); ok {
if gf.Index() >= low && gf.Index() < high {
found = true
return false
}
}
return true
})
return found
}
1 change: 1 addition & 0 deletions sql/analyzer/rules.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ var OnceAfterDefault = []Rule{
{"optimize_joins", constructJoinPlan},
{"pushdown_filters", pushdownFilters},
{"subquery_indexes", applyIndexesFromOuterScope},
{"in_subquery_indexes", applyIndexesForSubqueryComparisons},
{"pushdown_projections", pushdownProjections},
{"erase_projection", eraseProjection},
// One final pass at analyzing subqueries to handle rewriting field indexes after changes to outer scope by
Expand Down
156 changes: 156 additions & 0 deletions sql/plan/indexed_in_subquery_filter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
// Copyright 2021 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package plan

import (
"io"

"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/expression"
)

// NewIndexedInSubqueryFilter returns an IndexedInSubqueryFilter
// sql.Node. The Node implements the semantics of `Filter(field IN
// (SELECT ...), Child)`, but runs the subquery `SELECT` first, and
// the makes repeated calls to `child.RowIter()` to get the matching
// Child rows. Typically `child` should be an `IndexedTableAccess`,
// and for this to be the right tradeoff, the results from `subquery`
// should be much smaller than the unfiltered results from `child`.
// `padding` is the number of `null` columns which will be appended to
// an incoming `Row` in the `RowIter` call because calling `Eval` on
// the `Subquery`, since the `Subquery` was originally expecting to
// expect within the scope of something like `child`. `child` itself
// should expect `RowIter()` calls with a single column `Row`, which
// will be the results from the `Subquery`. `filterField` is a
// `GetField` expression which will extract the field from `child`
// results that should be matched against the `subquery` results; this
// condition is still checked here because `child` is allowed to
// return non-matching rows. `equals` true means this node will call
// `subquery.Eval` and expect a single result, whereas `equals` false
// means this node will call `subquery.EvalMultiple` and expect 0 or
// more results.
func NewIndexedInSubqueryFilter(subquery *Subquery, child sql.Node, padding int, filterField *expression.GetField, equals bool) sql.Node {
return &IndexedInSubqueryFilter{subquery, child, padding, filterField, equals}
}

type IndexedInSubqueryFilter struct {
subquery *Subquery
child sql.Node
padding int
getField *expression.GetField
equals bool
}

func (i *IndexedInSubqueryFilter) Resolved() bool {
return i.subquery.Resolved() && i.child.Resolved()
}

func (i *IndexedInSubqueryFilter) String() string {
pr := sql.NewTreePrinter()
_ = pr.WriteNode("IndexedInSubqueryFilter(%s IN (%s))", i.getField, i.subquery)
_ = pr.WriteChildren(i.child.String())
return pr.String()
}

func (i *IndexedInSubqueryFilter) DebugString() string {
pr := sql.NewTreePrinter()
_ = pr.WriteNode("IndexedInSubqueryFilter(%s IN (%s))", sql.DebugString(i.getField), sql.DebugString(i.subquery))
_ = pr.WriteChildren(sql.DebugString(i.child))
return pr.String()
}

func (i *IndexedInSubqueryFilter) Schema() sql.Schema {
return i.child.Schema()
}

func (i *IndexedInSubqueryFilter) Children() []sql.Node {
return []sql.Node{i.child}
}

func (i *IndexedInSubqueryFilter) WithChildren(children ...sql.Node) (sql.Node, error) {
if len(children) != 1 {
return nil, sql.ErrInvalidChildrenNumber.New(i, len(children), 1)
}
return NewIndexedInSubqueryFilter(i.subquery, children[0], i.padding, i.getField, i.equals), nil
}

func (i *IndexedInSubqueryFilter) RowIter(ctx *sql.Context, row sql.Row) (sql.RowIter, error) {
padded := make(sql.Row, len(row)+i.padding)
copy(padded[:], row[:])
var res []interface{}
var err error
if i.equals {
resi, err := i.subquery.Eval(ctx, padded)
if err != nil {
return nil, err
}
res = append(res, resi)
} else {
res, err = i.subquery.EvalMultiple(ctx, padded)
if err != nil {
return nil, err
}
}
tupLits := make([]sql.Expression, len(res))
for j := range res {
tupLits[j] = expression.NewLiteral(res[j], i.subquery.Type())
}
expr := expression.NewInTuple(i.getField, expression.NewTuple(tupLits...))
return NewFilterIter(ctx, expr, &indexedInSubqueryIter{ctx, res, i.child, nil, 0}), nil
}

type indexedInSubqueryIter struct {
ctx *sql.Context
rows []interface{}
child sql.Node
cur sql.RowIter
i int
}

func (iter *indexedInSubqueryIter) Next() (sql.Row, error) {
var ret sql.Row
err := io.EOF
for err == io.EOF {
if iter.cur == nil {
if iter.i >= len(iter.rows) {
return nil, io.EOF
}
childIter, err := iter.child.RowIter(iter.ctx, sql.NewRow(iter.rows[iter.i]))
if err != nil {
return nil, err
}
iter.i += 1
iter.cur = childIter
}
ret, err = iter.cur.Next()
if err == io.EOF {
cerr := iter.cur.Close()
iter.cur = nil
if cerr != nil {
return nil, cerr
}
}
}
return ret, err
}

func (iter *indexedInSubqueryIter) Close() error {
if iter.cur != nil {
err := iter.cur.Close()
iter.cur = nil
return err
}
return nil
}

0 comments on commit 620d95d

Please sign in to comment.