From fe6585f6eab2f04a1fbbf2d9dae369e165f5b419 Mon Sep 17 00:00:00 2001 From: Aaron Son Date: Mon, 1 Mar 2021 12:44:50 -0800 Subject: [PATCH 1/6] sql/{plan,analyzer}: Cache subquery results when joining against a subquery. --- enginetest/queries.go | 8 ++ enginetest/query_plans.go | 38 +++++++++ sql/analyzer/resolve_subqueries.go | 29 +++++++ sql/analyzer/rules.go | 1 + sql/plan/cached_results.go | 129 +++++++++++++++++++++++++++++ 5 files changed, 205 insertions(+) create mode 100644 sql/plan/cached_results.go diff --git a/enginetest/queries.go b/enginetest/queries.go index 0c89d493a7..58c21f495e 100755 --- a/enginetest/queries.go +++ b/enginetest/queries.go @@ -204,6 +204,14 @@ var QueryTests = []QueryTest{ {"second row"}, }, }, + { + Query: "SELECT mytable.i, selfjoined.s FROM mytable LEFT JOIN (SELECT * FROM mytable) selfjoined ON mytable.i = selfjoined.i", + Expected: []sql.Row{ + {1, "first row"}, + {2, "second row"}, + {3, "third row"}, + }, + }, { Query: "SELECT s,i FROM MyTable ORDER BY 2", Expected: []sql.Row{ diff --git a/enginetest/query_plans.go b/enginetest/query_plans.go index 72a4d34f10..748691174f 100755 --- a/enginetest/query_plans.go +++ b/enginetest/query_plans.go @@ -226,6 +226,44 @@ var PlanTests = []QueryPlanTest{ " └─ IndexedTableAccess(othertable on [othertable.i2])\n" + "", }, + { + Query: "SELECT /*+ JOIN_ORDER(mytable, othertable) */ s2, i2, i FROM mytable INNER JOIN (SELECT * FROM othertable) othertable ON i2 = i", + ExpectedPlan: "Project(othertable.s2, othertable.i2, mytable.i)\n" + + " └─ InnerJoin(othertable.i2 = mytable.i)\n" + + " ├─ Table(mytable)\n" + + " └─ CachedResults(SubqueryAlias(othertable)\n" + + " └─ Table(othertable)\n" + + " )\n" + + "", + }, + { + Query: "SELECT s2, i2, i FROM mytable LEFT JOIN (SELECT * FROM othertable) othertable ON i2 = i", + ExpectedPlan: "Project(othertable.s2, othertable.i2, mytable.i)\n" + + " └─ LeftJoin(othertable.i2 = mytable.i)\n" + + " ├─ Table(mytable)\n" + + " └─ CachedResults(SubqueryAlias(othertable)\n" + + " └─ Table(othertable)\n" + + " )\n" + + "", + }, + { + Query: "SELECT s2, i2, i FROM mytable RIGHT JOIN (SELECT * FROM othertable) othertable ON i2 = i", + ExpectedPlan: "Project(othertable.s2, othertable.i2, mytable.i)\n" + + " └─ RightIndexedJoin(othertable.i2 = mytable.i)\n" + + " ├─ SubqueryAlias(othertable)\n" + + " │ └─ Table(othertable)\n" + + " └─ IndexedTableAccess(mytable on [mytable.i])\n" + + "", + }, + { + Query: "SELECT s2, i2, i FROM mytable INNER JOIN (SELECT * FROM othertable) othertable ON i2 = i", + ExpectedPlan: "Project(othertable.s2, othertable.i2, mytable.i)\n" + + " └─ IndexedJoin(othertable.i2 = mytable.i)\n" + + " ├─ SubqueryAlias(othertable)\n" + + " │ └─ Table(othertable)\n" + + " └─ IndexedTableAccess(mytable on [mytable.i])\n" + + "", + }, { Query: "SELECT mytable.i, mytable.s FROM mytable WHERE mytable.i = (SELECT i2 FROM othertable LIMIT 1)", ExpectedPlan: "IndexedInSubqueryFilter(mytable.i IN ((Limit(1)\n" + diff --git a/sql/analyzer/resolve_subqueries.go b/sql/analyzer/resolve_subqueries.go index 7cc5873f3e..28943ef3bc 100644 --- a/sql/analyzer/resolve_subqueries.go +++ b/sql/analyzer/resolve_subqueries.go @@ -122,6 +122,18 @@ func nodeIsCacheable(n sql.Node, lowestAllowedIdx int) bool { return cacheable } +func isDeterminstic(sa *plan.SubqueryAlias) bool { + isDeterminstic := true + plan.InspectExpressions(sa.Child, func(e sql.Expression) bool { + if nd, ok := e.(sql.NonDeterministicExpression); ok && nd.IsNonDeterministic() { + isDeterminstic = false + return false + } + return true + }) + return isDeterminstic +} + // cacheSubqueryResults determines whether it's safe to cache the results for any subquery expressions, and marks the // subquery as cacheable if so. Caching subquery results is safe in the case that no outer scope columns are referenced, // and if all expressions in the subquery are deterministic. @@ -142,3 +154,20 @@ func cacheSubqueryResults(ctx *sql.Context, a *Analyzer, n sql.Node, scope *Scop return s, nil }) } + +// cacheSubqueryAlisesInJoins will look for joins against subquery aliases that +// will repeatedly execute the subquery, and will insert a *plan.CachedResults +// node on top of those nodes when it is safe to do so. +func cacheSubqueryAlisesInJoins(ctx *sql.Context, a *Analyzer, n sql.Node, scope *Scope) (sql.Node, error) { + return plan.TransformUpWithParent(n, func(child, parent sql.Node, childNum int) (sql.Node, error) { + if join, isJoin := parent.(plan.JoinNode); isJoin { + sa, isSubqueryAlias := child.(*plan.SubqueryAlias) + if isSubqueryAlias && isDeterminstic(sa) { + if (join.JoinType() == plan.JoinTypeLeft || join.JoinType() == plan.JoinTypeInner) && childNum == 1 || (join.JoinType() == plan.JoinTypeRight && childNum == 0) { + return plan.NewCachedResults(child), nil + } + } + } + return child, nil + }) +} diff --git a/sql/analyzer/rules.go b/sql/analyzer/rules.go index bd88687812..cbbb8326d1 100644 --- a/sql/analyzer/rules.go +++ b/sql/analyzer/rules.go @@ -77,6 +77,7 @@ var OnceAfterDefault = []Rule{ // previous rules. {"resolve_subquery_exprs", resolveSubqueryExpressions}, {"cache_subquery_results", cacheSubqueryResults}, + {"cache_subquery_aliases_in_joins", cacheSubqueryAlisesInJoins}, {"resolve_insert_rows", resolveInsertRows}, {"apply_triggers", applyTriggers}, {"apply_procedures", applyProcedures}, diff --git a/sql/plan/cached_results.go b/sql/plan/cached_results.go new file mode 100644 index 0000000000..ba620ad8fa --- /dev/null +++ b/sql/plan/cached_results.go @@ -0,0 +1,129 @@ +// Copyright 2021 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package plan + +import ( + "io" + "sync" + + "github.com/dolthub/go-mysql-server/sql" +) + +// NewCachedResults returns a cached results plan Node, which will use a +// RowCache to cache results generated by Child.RowIter() and return those +// results for future calls to RowIter. This node is only safe to use if the +// Child is determinstic and is not dependent on the |row| parameter in the +// call to RowIter. +func NewCachedResults(n sql.Node) *CachedResults { + return &CachedResults{UnaryNode: UnaryNode{n}} +} + +type CachedResults struct { + UnaryNode + cache sql.RowsCache + dispose sql.DisposeFunc + mutex sync.Mutex + noCache bool +} + +func (n *CachedResults) RowIter(ctx *sql.Context, r sql.Row) (sql.RowIter, error) { + n.mutex.Lock() + defer n.mutex.Unlock() + if n.cache != nil { + return sql.RowsToRowIter(n.cache.Get()...), nil + } else if n.noCache { + return n.UnaryNode.Child.RowIter(ctx, r) + } + ci, err := n.UnaryNode.Child.RowIter(ctx, r) + if err != nil { + return nil, err + } + cache, dispose := ctx.Memory.NewRowsCache() + return &cachedResultsIter{n, ci, cache, dispose}, nil +} + +func (n *CachedResults) Dispose() { + if n.dispose != nil { + n.dispose() + } +} + +func (n *CachedResults) String() string { + return "CachedResults(" + n.UnaryNode.Child.String() + ")" +} + +func (n *CachedResults) DebugString() string { + pr := sql.NewTreePrinter() + _ = pr.WriteNode("CachedResults") + _ = pr.WriteChildren(sql.DebugString(n.UnaryNode)) + return pr.String() +} + +func (n *CachedResults) WithChildren(children ...sql.Node) (sql.Node, error) { + if len(children) != 1 { + return nil, sql.ErrInvalidChildrenNumber.New(n, len(children), 1) + } + nn := *n + nn.UnaryNode.Child = children[0] + return &nn, nil +} + + + +type cachedResultsIter struct { + parent *CachedResults + iter sql.RowIter + cache sql.RowsCache + dispose sql.DisposeFunc +} + +func (i *cachedResultsIter) Next() (sql.Row, error) { + r, err := i.iter.Next() + if i.cache != nil { + if err != nil { + if err == io.EOF { + i.parent.mutex.Lock() + defer i.parent.mutex.Unlock() + if i.parent.cache == nil { + i.parent.cache = i.cache + i.parent.dispose = i.dispose + } else { + i.dispose() + i.cache = nil + i.dispose = nil + } + } else { + i.dispose() + i.cache = nil + i.dispose = nil + } + } else { + aerr := i.cache.Add(r) + if aerr != nil { + i.dispose() + i.cache = nil + i.dispose = nil + i.parent.mutex.Lock() + defer i.parent.mutex.Unlock() + i.parent.noCache = true + } + } + } + return r, err +} + +func (i *cachedResultsIter) Close(ctx *sql.Context) error { + return i.Close(ctx) +} From e164b8a5e86d96ca2a95bb3e5c2543691bb4c18a Mon Sep 17 00:00:00 2001 From: Aaron Son Date: Mon, 1 Mar 2021 13:49:24 -0800 Subject: [PATCH 2/6] sql/analyzer/resolve_subqueries.go: Cache subqueries that are primary side of a join as well. --- enginetest/query_plans.go | 55 ++++++++++++++++-------------- sql/analyzer/resolve_subqueries.go | 27 +++++++++------ sql/plan/cached_results.go | 7 ++-- 3 files changed, 51 insertions(+), 38 deletions(-) diff --git a/enginetest/query_plans.go b/enginetest/query_plans.go index 748691174f..2a5f913dec 100755 --- a/enginetest/query_plans.go +++ b/enginetest/query_plans.go @@ -132,11 +132,12 @@ var PlanTests = []QueryPlanTest{ Query: "SELECT sub.i, sub.i2, sub.s2, ot.i2, ot.s2 FROM (SELECT i, i2, s2 FROM mytable INNER JOIN othertable ON i = i2) sub INNER JOIN othertable ot ON sub.i = ot.i2", ExpectedPlan: "Project(sub.i, sub.i2, sub.s2, ot.i2, ot.s2)\n" + " └─ IndexedJoin(sub.i = ot.i2)\n" + - " ├─ SubqueryAlias(sub)\n" + - " │ └─ Project(mytable.i, othertable.i2, othertable.s2)\n" + - " │ └─ IndexedJoin(mytable.i = othertable.i2)\n" + - " │ ├─ Table(mytable)\n" + - " │ └─ IndexedTableAccess(othertable on [othertable.i2])\n" + + " ├─ CachedResults\n" + + " │ └─ SubqueryAlias(sub)\n" + + " │ └─ Project(mytable.i, othertable.i2, othertable.s2)\n" + + " │ └─ IndexedJoin(mytable.i = othertable.i2)\n" + + " │ ├─ Table(mytable)\n" + + " │ └─ IndexedTableAccess(othertable on [othertable.i2])\n" + " └─ TableAlias(ot)\n" + " └─ IndexedTableAccess(othertable on [othertable.i2])\n" + "", @@ -145,11 +146,12 @@ var PlanTests = []QueryPlanTest{ Query: "SELECT sub.i, sub.i2, sub.s2, ot.i2, ot.s2 FROM othertable ot INNER JOIN (SELECT i, i2, s2 FROM mytable INNER JOIN othertable ON i = i2) sub ON sub.i = ot.i2", ExpectedPlan: "Project(sub.i, sub.i2, sub.s2, ot.i2, ot.s2)\n" + " └─ IndexedJoin(sub.i = ot.i2)\n" + - " ├─ SubqueryAlias(sub)\n" + - " │ └─ Project(mytable.i, othertable.i2, othertable.s2)\n" + - " │ └─ IndexedJoin(mytable.i = othertable.i2)\n" + - " │ ├─ Table(mytable)\n" + - " │ └─ IndexedTableAccess(othertable on [othertable.i2])\n" + + " ├─ CachedResults\n" + + " │ └─ SubqueryAlias(sub)\n" + + " │ └─ Project(mytable.i, othertable.i2, othertable.s2)\n" + + " │ └─ IndexedJoin(mytable.i = othertable.i2)\n" + + " │ ├─ Table(mytable)\n" + + " │ └─ IndexedTableAccess(othertable on [othertable.i2])\n" + " └─ TableAlias(ot)\n" + " └─ IndexedTableAccess(othertable on [othertable.i2])\n" + "", @@ -161,11 +163,12 @@ var PlanTests = []QueryPlanTest{ " └─ Project(i, s)\n" + " └─ Project(sub.i + 10, ot.s2)\n" + " └─ IndexedJoin(sub.i = ot.i2)\n" + - " ├─ SubqueryAlias(sub)\n" + - " │ └─ Project(mytable.i)\n" + - " │ └─ IndexedJoin(mytable.i = othertable.i2)\n" + - " │ ├─ Table(mytable)\n" + - " │ └─ IndexedTableAccess(othertable on [othertable.i2])\n" + + " ├─ CachedResults\n" + + " │ └─ SubqueryAlias(sub)\n" + + " │ └─ Project(mytable.i)\n" + + " │ └─ IndexedJoin(mytable.i = othertable.i2)\n" + + " │ ├─ Table(mytable)\n" + + " │ └─ IndexedTableAccess(othertable on [othertable.i2])\n" + " └─ TableAlias(ot)\n" + " └─ IndexedTableAccess(othertable on [othertable.i2])\n", }, @@ -231,9 +234,9 @@ var PlanTests = []QueryPlanTest{ ExpectedPlan: "Project(othertable.s2, othertable.i2, mytable.i)\n" + " └─ InnerJoin(othertable.i2 = mytable.i)\n" + " ├─ Table(mytable)\n" + - " └─ CachedResults(SubqueryAlias(othertable)\n" + - " └─ Table(othertable)\n" + - " )\n" + + " └─ CachedResults\n" + + " └─ SubqueryAlias(othertable)\n" + + " └─ Table(othertable)\n" + "", }, { @@ -241,17 +244,18 @@ var PlanTests = []QueryPlanTest{ ExpectedPlan: "Project(othertable.s2, othertable.i2, mytable.i)\n" + " └─ LeftJoin(othertable.i2 = mytable.i)\n" + " ├─ Table(mytable)\n" + - " └─ CachedResults(SubqueryAlias(othertable)\n" + - " └─ Table(othertable)\n" + - " )\n" + + " └─ CachedResults\n" + + " └─ SubqueryAlias(othertable)\n" + + " └─ Table(othertable)\n" + "", }, { Query: "SELECT s2, i2, i FROM mytable RIGHT JOIN (SELECT * FROM othertable) othertable ON i2 = i", ExpectedPlan: "Project(othertable.s2, othertable.i2, mytable.i)\n" + " └─ RightIndexedJoin(othertable.i2 = mytable.i)\n" + - " ├─ SubqueryAlias(othertable)\n" + - " │ └─ Table(othertable)\n" + + " ├─ CachedResults\n" + + " │ └─ SubqueryAlias(othertable)\n" + + " │ └─ Table(othertable)\n" + " └─ IndexedTableAccess(mytable on [mytable.i])\n" + "", }, @@ -259,8 +263,9 @@ var PlanTests = []QueryPlanTest{ Query: "SELECT s2, i2, i FROM mytable INNER JOIN (SELECT * FROM othertable) othertable ON i2 = i", ExpectedPlan: "Project(othertable.s2, othertable.i2, mytable.i)\n" + " └─ IndexedJoin(othertable.i2 = mytable.i)\n" + - " ├─ SubqueryAlias(othertable)\n" + - " │ └─ Table(othertable)\n" + + " ├─ CachedResults\n" + + " │ └─ SubqueryAlias(othertable)\n" + + " │ └─ Table(othertable)\n" + " └─ IndexedTableAccess(mytable on [mytable.i])\n" + "", }, diff --git a/sql/analyzer/resolve_subqueries.go b/sql/analyzer/resolve_subqueries.go index 28943ef3bc..ed522334f9 100644 --- a/sql/analyzer/resolve_subqueries.go +++ b/sql/analyzer/resolve_subqueries.go @@ -122,16 +122,21 @@ func nodeIsCacheable(n sql.Node, lowestAllowedIdx int) bool { return cacheable } -func isDeterminstic(sa *plan.SubqueryAlias) bool { - isDeterminstic := true - plan.InspectExpressions(sa.Child, func(e sql.Expression) bool { - if nd, ok := e.(sql.NonDeterministicExpression); ok && nd.IsNonDeterministic() { - isDeterminstic = false +func isDeterminstic(n sql.Node) bool { + res := true + plan.InspectExpressions(n, func(e sql.Expression) bool { + if s, ok := e.(*plan.Subquery); ok { + if !isDeterminstic(s.Query) { + res = false + } + return false + } else if nd, ok := e.(sql.NonDeterministicExpression); ok && nd.IsNonDeterministic() { + res = false return false } return true }) - return isDeterminstic + return res } // cacheSubqueryResults determines whether it's safe to cache the results for any subquery expressions, and marks the @@ -160,12 +165,12 @@ func cacheSubqueryResults(ctx *sql.Context, a *Analyzer, n sql.Node, scope *Scop // node on top of those nodes when it is safe to do so. func cacheSubqueryAlisesInJoins(ctx *sql.Context, a *Analyzer, n sql.Node, scope *Scope) (sql.Node, error) { return plan.TransformUpWithParent(n, func(child, parent sql.Node, childNum int) (sql.Node, error) { - if join, isJoin := parent.(plan.JoinNode); isJoin { + _, isJoin := parent.(plan.JoinNode) + _, isIndexedJoin := parent.(*plan.IndexedJoin) + if isJoin || isIndexedJoin { sa, isSubqueryAlias := child.(*plan.SubqueryAlias) - if isSubqueryAlias && isDeterminstic(sa) { - if (join.JoinType() == plan.JoinTypeLeft || join.JoinType() == plan.JoinTypeInner) && childNum == 1 || (join.JoinType() == plan.JoinTypeRight && childNum == 0) { - return plan.NewCachedResults(child), nil - } + if isSubqueryAlias && isDeterminstic(sa.Child) { + return plan.NewCachedResults(child), nil } } return child, nil diff --git a/sql/plan/cached_results.go b/sql/plan/cached_results.go index ba620ad8fa..5215baeafc 100644 --- a/sql/plan/cached_results.go +++ b/sql/plan/cached_results.go @@ -61,7 +61,10 @@ func (n *CachedResults) Dispose() { } func (n *CachedResults) String() string { - return "CachedResults(" + n.UnaryNode.Child.String() + ")" + pr := sql.NewTreePrinter() + _ = pr.WriteNode("CachedResults") + _ = pr.WriteChildren(n.UnaryNode.Child.String()) + return pr.String() } func (n *CachedResults) DebugString() string { @@ -125,5 +128,5 @@ func (i *cachedResultsIter) Next() (sql.Row, error) { } func (i *cachedResultsIter) Close(ctx *sql.Context) error { - return i.Close(ctx) + return i.iter.Close(ctx) } From 6a1c94340a1c21fdeb81be463af6ee3df7b647ef Mon Sep 17 00:00:00 2001 From: Aaron Son Date: Mon, 1 Mar 2021 14:08:55 -0800 Subject: [PATCH 3/6] Fix plan.CachedResults DebugString. --- sql/plan/cached_results.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/plan/cached_results.go b/sql/plan/cached_results.go index 5215baeafc..53d0528537 100644 --- a/sql/plan/cached_results.go +++ b/sql/plan/cached_results.go @@ -70,7 +70,7 @@ func (n *CachedResults) String() string { func (n *CachedResults) DebugString() string { pr := sql.NewTreePrinter() _ = pr.WriteNode("CachedResults") - _ = pr.WriteChildren(sql.DebugString(n.UnaryNode)) + _ = pr.WriteChildren(sql.DebugString(n.UnaryNode.Child)) return pr.String() } From 8253f578c0ef226341aa054e87c17e50dadfde9b Mon Sep 17 00:00:00 2001 From: Aaron Son Date: Tue, 2 Mar 2021 09:48:38 -0800 Subject: [PATCH 4/6] sql/plan/cached_results.go: Small cleanup for PR feedback. --- sql/plan/cached_results.go | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/sql/plan/cached_results.go b/sql/plan/cached_results.go index 53d0528537..e0ee5cbfa2 100644 --- a/sql/plan/cached_results.go +++ b/sql/plan/cached_results.go @@ -99,25 +99,14 @@ func (i *cachedResultsIter) Next() (sql.Row, error) { if err == io.EOF { i.parent.mutex.Lock() defer i.parent.mutex.Unlock() - if i.parent.cache == nil { - i.parent.cache = i.cache - i.parent.dispose = i.dispose - } else { - i.dispose() - i.cache = nil - i.dispose = nil - } + i.setCacheInParent() } else { - i.dispose() - i.cache = nil - i.dispose = nil + i.cleanUp() } } else { aerr := i.cache.Add(r) if aerr != nil { - i.dispose() - i.cache = nil - i.dispose = nil + i.cleanUp() i.parent.mutex.Lock() defer i.parent.mutex.Unlock() i.parent.noCache = true @@ -127,6 +116,26 @@ func (i *cachedResultsIter) Next() (sql.Row, error) { return r, err } +func (i *cachedResultsIter) setCacheInParent() { + if i.parent.cache == nil { + i.parent.cache = i.cache + i.parent.dispose = i.dispose + i.cache = nil + i.dispose = nil + } else { + i.cleanUp() + } +} + +func (i *cachedResultsIter) cleanUp() { + if i.dispose != nil { + i.dispose() + i.cache = nil + i.dispose = nil + } +} + func (i *cachedResultsIter) Close(ctx *sql.Context) error { + i.cleanUp() return i.iter.Close(ctx) } From 29dcfc94addd4929d02454009b9d346341e3a29e Mon Sep 17 00:00:00 2001 From: Aaron Son Date: Tue, 2 Mar 2021 10:09:52 -0800 Subject: [PATCH 5/6] sql/analyzer/resolve_subqueries.go: Remove unnecessary most-primary CachedResults nodes from join plans. --- enginetest/query_plans.go | 38 +++++++++++++----------------- sql/analyzer/resolve_subqueries.go | 35 ++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 22 deletions(-) diff --git a/enginetest/query_plans.go b/enginetest/query_plans.go index 2a5f913dec..5f8473e23d 100755 --- a/enginetest/query_plans.go +++ b/enginetest/query_plans.go @@ -132,12 +132,11 @@ var PlanTests = []QueryPlanTest{ Query: "SELECT sub.i, sub.i2, sub.s2, ot.i2, ot.s2 FROM (SELECT i, i2, s2 FROM mytable INNER JOIN othertable ON i = i2) sub INNER JOIN othertable ot ON sub.i = ot.i2", ExpectedPlan: "Project(sub.i, sub.i2, sub.s2, ot.i2, ot.s2)\n" + " └─ IndexedJoin(sub.i = ot.i2)\n" + - " ├─ CachedResults\n" + - " │ └─ SubqueryAlias(sub)\n" + - " │ └─ Project(mytable.i, othertable.i2, othertable.s2)\n" + - " │ └─ IndexedJoin(mytable.i = othertable.i2)\n" + - " │ ├─ Table(mytable)\n" + - " │ └─ IndexedTableAccess(othertable on [othertable.i2])\n" + + " ├─ SubqueryAlias(sub)\n" + + " │ └─ Project(mytable.i, othertable.i2, othertable.s2)\n" + + " │ └─ IndexedJoin(mytable.i = othertable.i2)\n" + + " │ ├─ Table(mytable)\n" + + " │ └─ IndexedTableAccess(othertable on [othertable.i2])\n" + " └─ TableAlias(ot)\n" + " └─ IndexedTableAccess(othertable on [othertable.i2])\n" + "", @@ -146,12 +145,11 @@ var PlanTests = []QueryPlanTest{ Query: "SELECT sub.i, sub.i2, sub.s2, ot.i2, ot.s2 FROM othertable ot INNER JOIN (SELECT i, i2, s2 FROM mytable INNER JOIN othertable ON i = i2) sub ON sub.i = ot.i2", ExpectedPlan: "Project(sub.i, sub.i2, sub.s2, ot.i2, ot.s2)\n" + " └─ IndexedJoin(sub.i = ot.i2)\n" + - " ├─ CachedResults\n" + - " │ └─ SubqueryAlias(sub)\n" + - " │ └─ Project(mytable.i, othertable.i2, othertable.s2)\n" + - " │ └─ IndexedJoin(mytable.i = othertable.i2)\n" + - " │ ├─ Table(mytable)\n" + - " │ └─ IndexedTableAccess(othertable on [othertable.i2])\n" + + " ├─ SubqueryAlias(sub)\n" + + " │ └─ Project(mytable.i, othertable.i2, othertable.s2)\n" + + " │ └─ IndexedJoin(mytable.i = othertable.i2)\n" + + " │ ├─ Table(mytable)\n" + + " │ └─ IndexedTableAccess(othertable on [othertable.i2])\n" + " └─ TableAlias(ot)\n" + " └─ IndexedTableAccess(othertable on [othertable.i2])\n" + "", @@ -163,12 +161,11 @@ var PlanTests = []QueryPlanTest{ " └─ Project(i, s)\n" + " └─ Project(sub.i + 10, ot.s2)\n" + " └─ IndexedJoin(sub.i = ot.i2)\n" + - " ├─ CachedResults\n" + - " │ └─ SubqueryAlias(sub)\n" + - " │ └─ Project(mytable.i)\n" + - " │ └─ IndexedJoin(mytable.i = othertable.i2)\n" + - " │ ├─ Table(mytable)\n" + - " │ └─ IndexedTableAccess(othertable on [othertable.i2])\n" + + " ├─ SubqueryAlias(sub)\n" + + " │ └─ Project(mytable.i)\n" + + " │ └─ IndexedJoin(mytable.i = othertable.i2)\n" + + " │ ├─ Table(mytable)\n" + + " │ └─ IndexedTableAccess(othertable on [othertable.i2])\n" + " └─ TableAlias(ot)\n" + " └─ IndexedTableAccess(othertable on [othertable.i2])\n", }, @@ -263,9 +260,8 @@ var PlanTests = []QueryPlanTest{ Query: "SELECT s2, i2, i FROM mytable INNER JOIN (SELECT * FROM othertable) othertable ON i2 = i", ExpectedPlan: "Project(othertable.s2, othertable.i2, mytable.i)\n" + " └─ IndexedJoin(othertable.i2 = mytable.i)\n" + - " ├─ CachedResults\n" + - " │ └─ SubqueryAlias(othertable)\n" + - " │ └─ Table(othertable)\n" + + " ├─ SubqueryAlias(othertable)\n" + + " │ └─ Table(othertable)\n" + " └─ IndexedTableAccess(mytable on [mytable.i])\n" + "", }, diff --git a/sql/analyzer/resolve_subqueries.go b/sql/analyzer/resolve_subqueries.go index ed522334f9..89db467a2a 100644 --- a/sql/analyzer/resolve_subqueries.go +++ b/sql/analyzer/resolve_subqueries.go @@ -164,7 +164,7 @@ func cacheSubqueryResults(ctx *sql.Context, a *Analyzer, n sql.Node, scope *Scop // will repeatedly execute the subquery, and will insert a *plan.CachedResults // node on top of those nodes when it is safe to do so. func cacheSubqueryAlisesInJoins(ctx *sql.Context, a *Analyzer, n sql.Node, scope *Scope) (sql.Node, error) { - return plan.TransformUpWithParent(n, func(child, parent sql.Node, childNum int) (sql.Node, error) { + n, err := plan.TransformUpWithParent(n, func(child, parent sql.Node, childNum int) (sql.Node, error) { _, isJoin := parent.(plan.JoinNode) _, isIndexedJoin := parent.(*plan.IndexedJoin) if isJoin || isIndexedJoin { @@ -175,4 +175,37 @@ func cacheSubqueryAlisesInJoins(ctx *sql.Context, a *Analyzer, n sql.Node, scope } return child, nil }) + if err != nil { + return n, err + } + + // If the most primary table in the top level join is a CachedResults, remove it. + // We only want to do this if we're at the top of the tree. + // TODO: Not a perfect indicator of whether we're at the top of the tree... + if scope == nil { + selector := func(parent sql.Node, child sql.Node, childNum int) bool { + if j, isJoin := parent.(plan.JoinNode); isJoin { + if j.JoinType() == plan.JoinTypeRight { + return childNum == 1 + } else { + return childNum == 0 + } + } else if j, isIndexedJoin := parent.(*plan.IndexedJoin); isIndexedJoin { + if j.JoinType() == plan.JoinTypeRight { + return childNum == 1 + } else { + return childNum == 0 + } + } + return true + } + n, err = plan.TransformUpWithSelector(n, selector, func(n sql.Node) (sql.Node, error) { + cr, isCR := n.(*plan.CachedResults) + if isCR { + return cr.UnaryNode.Child, nil + } + return n, nil + }) + } + return n, err } From b5bfb1905b11f637e6f12120456ae1641401a82c Mon Sep 17 00:00:00 2001 From: Aaron Son Date: Tue, 2 Mar 2021 11:09:38 -0800 Subject: [PATCH 6/6] sql/analyzer/resolve_subqueries.go: PR feedback for getting to correct child of indexed joins. --- enginetest/query_plans.go | 5 ++--- sql/analyzer/resolve_subqueries.go | 10 +++------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/enginetest/query_plans.go b/enginetest/query_plans.go index 5f8473e23d..ba96dbb8ef 100755 --- a/enginetest/query_plans.go +++ b/enginetest/query_plans.go @@ -250,9 +250,8 @@ var PlanTests = []QueryPlanTest{ Query: "SELECT s2, i2, i FROM mytable RIGHT JOIN (SELECT * FROM othertable) othertable ON i2 = i", ExpectedPlan: "Project(othertable.s2, othertable.i2, mytable.i)\n" + " └─ RightIndexedJoin(othertable.i2 = mytable.i)\n" + - " ├─ CachedResults\n" + - " │ └─ SubqueryAlias(othertable)\n" + - " │ └─ Table(othertable)\n" + + " ├─ SubqueryAlias(othertable)\n" + + " │ └─ Table(othertable)\n" + " └─ IndexedTableAccess(mytable on [mytable.i])\n" + "", }, diff --git a/sql/analyzer/resolve_subqueries.go b/sql/analyzer/resolve_subqueries.go index 89db467a2a..df6682ecef 100644 --- a/sql/analyzer/resolve_subqueries.go +++ b/sql/analyzer/resolve_subqueries.go @@ -184,13 +184,9 @@ func cacheSubqueryAlisesInJoins(ctx *sql.Context, a *Analyzer, n sql.Node, scope // TODO: Not a perfect indicator of whether we're at the top of the tree... if scope == nil { selector := func(parent sql.Node, child sql.Node, childNum int) bool { - if j, isJoin := parent.(plan.JoinNode); isJoin { - if j.JoinType() == plan.JoinTypeRight { - return childNum == 1 - } else { - return childNum == 0 - } - } else if j, isIndexedJoin := parent.(*plan.IndexedJoin); isIndexedJoin { + if _, isIndexedJoin := parent.(*plan.IndexedJoin); isIndexedJoin { + return childNum == 0 + } else if j, isJoin := parent.(plan.JoinNode); isJoin { if j.JoinType() == plan.JoinTypeRight { return childNum == 1 } else {