From 715641c9a6911fb9729cf21a573a3f3e9233f484 Mon Sep 17 00:00:00 2001 From: Max Hoffman Date: Mon, 4 Dec 2023 12:41:46 -0800 Subject: [PATCH 1/3] Improve IN_SUBQUERY table disambiguation --- enginetest/queries/query_plans.go | 135 +++++++++++++++++++++++ sql/analyzer/unnest_exists_subqueries.go | 20 ++++ sql/analyzer/unnest_insubqueries.go | 70 +++++++++--- 3 files changed, 212 insertions(+), 13 deletions(-) diff --git a/enginetest/queries/query_plans.go b/enginetest/queries/query_plans.go index a867818ab3..34b9b4aa99 100644 --- a/enginetest/queries/query_plans.go +++ b/enginetest/queries/query_plans.go @@ -17,6 +17,141 @@ package queries var PlanTests = []QueryPlanTest{ + { + Query: `select x from xy where y in (select x from xy where exists (select y from xy t1 where t1.y = xy.x));`, + ExpectedPlan: "Project\n" + + " ├─ columns: [xy.x:1!null]\n" + + " └─ LookupJoin\n" + + " ├─ Eq\n" + + " │ ├─ xy.y:2\n" + + " │ └─ xy_1.x:0!null\n" + + " ├─ Distinct\n" + + " │ └─ Project\n" + + " │ ├─ columns: [xy_1.x:0!null]\n" + + " │ └─ SemiLookupJoin\n" + + " │ ├─ TableAlias(xy_1)\n" + + " │ │ └─ ProcessTable\n" + + " │ │ └─ Table\n" + + " │ │ ├─ name: xy\n" + + " │ │ └─ columns: [x y]\n" + + " │ └─ TableAlias(t1)\n" + + " │ └─ IndexedTableAccess(xy)\n" + + " │ ├─ index: [xy.y]\n" + + " │ ├─ keys: [xy.x:3!null]\n" + + " │ ├─ colSet: (5,6)\n" + + " │ ├─ tableId: 3\n" + + " │ └─ Table\n" + + " │ ├─ name: xy\n" + + " │ └─ columns: [x y]\n" + + " └─ IndexedTableAccess(xy)\n" + + " ├─ index: [xy.y]\n" + + " ├─ keys: [xy_1.x:0!null]\n" + + " ├─ colSet: (1,2)\n" + + " ├─ tableId: 1\n" + + " └─ Table\n" + + " ├─ name: xy\n" + + " └─ columns: [x y]\n" + + "", + }, + { + Query: `select x from xy where y in (select xy.x from xy join (select t2.y from xy t2 where exists (select t3.y from xy t3 where t3.y = xy.x)) t1);`, + ExpectedPlan: "Project\n" + + " ├─ columns: [xy.x:0!null]\n" + + " └─ Filter\n" + + " ├─ InSubquery\n" + + " │ ├─ left: xy.y:1\n" + + " │ └─ right: Subquery\n" + + " │ ├─ cacheable: false\n" + + " │ ├─ alias-string: select xy.x from xy join (select t2.y from xy as t2 where exists (select t3.y from xy as t3 where t3.y = xy.x)) as t1\n" + + " │ └─ Project\n" + + " │ ├─ columns: [xy.x:3!null]\n" + + " │ └─ CrossHashJoin\n" + + " │ ├─ SubqueryAlias\n" + + " │ │ ├─ name: t1\n" + + " │ │ ├─ outerVisibility: true\n" + + " │ │ ├─ isLateral: false\n" + + " │ │ ├─ cacheable: false\n" + + " │ │ ├─ colSet: (9)\n" + + " │ │ ├─ tableId: 5\n" + + " │ │ └─ Project\n" + + " │ │ ├─ columns: [t2.y:3]\n" + + " │ │ └─ Filter\n" + + " │ │ ├─ EXISTS Subquery\n" + + " │ │ │ ├─ cacheable: false\n" + + " │ │ │ ├─ alias-string: select t3.y from xy as t3 where t3.y = xy.x\n" + + " │ │ │ └─ Project\n" + + " │ │ │ ├─ columns: [t3.y:5]\n" + + " │ │ │ └─ Filter\n" + + " │ │ │ ├─ Eq\n" + + " │ │ │ │ ├─ t3.y:5\n" + + " │ │ │ │ └─ xy.x:0!null\n" + + " │ │ │ └─ TableAlias(t3)\n" + + " │ │ │ └─ IndexedTableAccess(xy)\n" + + " │ │ │ ├─ index: [xy.y]\n" + + " │ │ │ ├─ keys: [xy.x:0!null]\n" + + " │ │ │ ├─ colSet: (7,8)\n" + + " │ │ │ ├─ tableId: 4\n" + + " │ │ │ └─ Table\n" + + " │ │ │ ├─ name: xy\n" + + " │ │ │ └─ columns: [x y]\n" + + " │ │ └─ TableAlias(t2)\n" + + " │ │ └─ Table\n" + + " │ │ ├─ name: xy\n" + + " │ │ ├─ columns: [x y]\n" + + " │ │ ├─ colSet: (5,6)\n" + + " │ │ └─ tableId: 3\n" + + " │ └─ HashLookup\n" + + " │ ├─ left-key: TUPLE()\n" + + " │ ├─ right-key: TUPLE()\n" + + " │ └─ Table\n" + + " │ ├─ name: xy\n" + + " │ ├─ columns: [x]\n" + + " │ ├─ colSet: (3,4)\n" + + " │ └─ tableId: 2\n" + + " └─ ProcessTable\n" + + " └─ Table\n" + + " ├─ name: xy\n" + + " └─ columns: [x y]\n" + + "", + }, + { + Query: `select x from xy where y in (select x from xy where x in (select y from xy));`, + ExpectedPlan: "Project\n" + + " ├─ columns: [xy.x:1!null]\n" + + " └─ LookupJoin\n" + + " ├─ Eq\n" + + " │ ├─ xy.y:2\n" + + " │ └─ xy_1.x:0!null\n" + + " ├─ Distinct\n" + + " │ └─ Project\n" + + " │ ├─ columns: [xy_1.x:0!null]\n" + + " │ └─ SemiLookupJoin\n" + + " │ ├─ TableAlias(xy_1)\n" + + " │ │ └─ ProcessTable\n" + + " │ │ └─ Table\n" + + " │ │ ├─ name: xy\n" + + " │ │ └─ columns: [x y]\n" + + " │ └─ Project\n" + + " │ ├─ columns: [xy_2.y:1]\n" + + " │ └─ TableAlias(xy_2)\n" + + " │ └─ IndexedTableAccess(xy)\n" + + " │ ├─ index: [xy.y]\n" + + " │ ├─ keys: [xy_1.x:0!null]\n" + + " │ ├─ colSet: (5,6)\n" + + " │ ├─ tableId: 3\n" + + " │ └─ Table\n" + + " │ ├─ name: xy\n" + + " │ └─ columns: [x y]\n" + + " └─ IndexedTableAccess(xy)\n" + + " ├─ index: [xy.y]\n" + + " ├─ keys: [xy_1.x:0!null]\n" + + " ├─ colSet: (1,2)\n" + + " ├─ tableId: 1\n" + + " └─ Table\n" + + " ├─ name: xy\n" + + " └─ columns: [x y]\n" + + "", + }, { Query: `select * from xy join uv on (x = u and u > 0) where u < 2`, ExpectedPlan: "Project\n" + diff --git a/sql/analyzer/unnest_exists_subqueries.go b/sql/analyzer/unnest_exists_subqueries.go index 19ff6e9135..21966b4997 100644 --- a/sql/analyzer/unnest_exists_subqueries.go +++ b/sql/analyzer/unnest_exists_subqueries.go @@ -201,6 +201,10 @@ func unnestExistSubqueries(ctx *sql.Context, scope *plan.Scope, a *Analyzer, fil } outerFilters := s.joinFilters + if referencesOuterScope(outerFilters, scope) { + retFilters = append(retFilters, f) + continue + } switch joinType { case plan.JoinTypeAnti: @@ -221,6 +225,22 @@ func unnestExistSubqueries(ctx *sql.Context, scope *plan.Scope, a *Analyzer, fil return ret, transform.NewTree, nil } +// referencesOuterScope returns true if a filter in the set is from an outer scope +func referencesOuterScope(filters []sql.Expression, scope *plan.Scope) bool { + if scope == nil { + return false + } + for _, e := range filters { + if transform.InspectExpr(e, func(e sql.Expression) bool { + gf, ok := e.(*expression.GetField) + return ok && scope.Correlated().Contains(gf.Id()) + }) { + return true + } + } + return false +} + type hoistSubquery struct { inner sql.Node joinFilters []sql.Expression diff --git a/sql/analyzer/unnest_insubqueries.go b/sql/analyzer/unnest_insubqueries.go index f1a2cf0224..587c4ba016 100644 --- a/sql/analyzer/unnest_insubqueries.go +++ b/sql/analyzer/unnest_insubqueries.go @@ -46,6 +46,7 @@ func unnestInSubqueries(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.S } var unnested bool + var aliases map[string]int ret := n var err error @@ -69,7 +70,6 @@ func unnestInSubqueries(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.S var matches []applyJoin var newFilters []sql.Expression - var aliases map[string]int // separate decorrelation candidates for _, e := range filters { @@ -138,7 +138,8 @@ func unnestInSubqueries(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.S } } - newSubq, err := disambiguateTables(aliases, subq.Query) + var newSubq sql.Node + newSubq, aliases, err = disambiguateTables(aliases, subq.Query) if err != nil { return ret, transform.SameTree, nil } @@ -180,24 +181,42 @@ func unnestInSubqueries(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.S return ret, transform.TreeIdentity(!unnested), nil } -func disambiguateTables(used map[string]int, n sql.Node) (sql.Node, error) { - duplicates := make(map[string]int) - n, _, err := transform.Node(n, func(n sql.Node) (sql.Node, transform.TreeIdentity, error) { - switch n := n.(type) { +func disambiguateTables(used map[string]int, n sql.Node) (sql.Node, map[string]int, error) { + rename := make(map[sql.TableId]string) + n, _, err := transform.NodeWithCtx(n, nil, func(c transform.Context) (sql.Node, transform.TreeIdentity, error) { + switch n := c.Node.(type) { case sql.RenameableNode: name := strings.ToLower(n.Name()) + if _, ok := c.Parent.(sql.RenameableNode); ok { + // skip checking when: TableAlias(ResolvedTable) + return n, transform.SameTree, nil + } if cnt, ok := used[name]; ok { - duplicates[name] = cnt + 1 - return n.WithName(fmt.Sprintf("%s_%d", name, cnt+1)), transform.NewTree, nil + used[name] = cnt + 1 + newName := name + for ok { + cnt++ + newName = fmt.Sprintf("%s_%d", name, cnt) + _, ok = used[newName] + + } + used[newName] = 0 + + tin, ok := n.(plan.TableIdNode) + if !ok { + return n, transform.SameTree, fmt.Errorf("expected sql.Renameable to implement plan.TableIdNode") + } + rename[tin.Id()] = newName + return n.WithName(newName), transform.NewTree, nil } else { - duplicates[name] = 0 + used[name] = 0 } return n, transform.NewTree, nil default: return transform.NodeExprs(n, func(e sql.Expression) (sql.Expression, transform.TreeIdentity, error) { switch e := e.(type) { case *expression.GetField: - if cnt, ok := duplicates[strings.ToLower(e.Table())]; ok && cnt > 0 { + if cnt, ok := used[strings.ToLower(e.Table())]; ok && cnt > 0 { return e.WithTable(fmt.Sprintf("%s_%d", e.Table(), cnt)), transform.NewTree, nil } default: @@ -206,10 +225,35 @@ func disambiguateTables(used map[string]int, n sql.Node) (sql.Node, error) { }) } }) - for k, v := range duplicates { - used[k] = v + if err != nil { + return nil, nil, err + } + if len(rename) > 0 { + n, _, err = renameExpressionTables(n, rename) } - return n, err + return n, used, err +} + +// renameExpressionTables renames table references recursively. We use +// table ids to avoid improperly renaming tables in lower scopes with the +// same name. +func renameExpressionTables(n sql.Node, rename map[sql.TableId]string) (sql.Node, transform.TreeIdentity, error) { + return transform.NodeExprs(n, func(e sql.Expression) (sql.Expression, transform.TreeIdentity, error) { + switch e := e.(type) { + case *expression.GetField: + if to, ok := rename[e.TableId()]; ok { + return e.WithTable(to), transform.NewTree, nil + } + case *plan.Subquery: + newQ, same, err := renameExpressionTables(e.Query, rename) + if !same || err != nil { + return e, same, err + } + return e.WithQuery(newQ), transform.NewTree, nil + default: + } + return e, transform.NewTree, nil + }) } // getHighestProjection returns a set of projection expressions responsible From 47de261d60143d8768e81037e8b20d2cec4a64dc Mon Sep 17 00:00:00 2001 From: Max Hoffman Date: Mon, 4 Dec 2023 15:26:47 -0800 Subject: [PATCH 2/3] query tests --- enginetest/queries/queries.go | 8 +++++++ enginetest/queries/query_plans.go | 36 ------------------------------- 2 files changed, 8 insertions(+), 36 deletions(-) diff --git a/enginetest/queries/queries.go b/enginetest/queries/queries.go index 9b8546f55e..caeae77438 100644 --- a/enginetest/queries/queries.go +++ b/enginetest/queries/queries.go @@ -764,6 +764,14 @@ var SpatialQueryTests = []QueryTest{ } var QueryTests = []QueryTest{ + { + Query: "select x from xy where y in (select xy.x from xy join (select t2.y from xy t2 where exists (select t3.y from xy t3 where t3.y = xy.x)) t1) order by 1;", + Expected: []sql.Row{{0}, {1}, {2}, {3}}, + }, + { + Query: "select x from xy where y in (select x from xy where x in (select y from xy)) order by 1;", + Expected: []sql.Row{{0}, {1}, {2}, {3}}, + }, { Query: "SELECT 1 WHERE ((1 IN (NULL >= 1)) IS NULL);", Expected: []sql.Row{{1}}, diff --git a/enginetest/queries/query_plans.go b/enginetest/queries/query_plans.go index 34b9b4aa99..f9b75a3bbb 100644 --- a/enginetest/queries/query_plans.go +++ b/enginetest/queries/query_plans.go @@ -17,42 +17,6 @@ package queries var PlanTests = []QueryPlanTest{ - { - Query: `select x from xy where y in (select x from xy where exists (select y from xy t1 where t1.y = xy.x));`, - ExpectedPlan: "Project\n" + - " ├─ columns: [xy.x:1!null]\n" + - " └─ LookupJoin\n" + - " ├─ Eq\n" + - " │ ├─ xy.y:2\n" + - " │ └─ xy_1.x:0!null\n" + - " ├─ Distinct\n" + - " │ └─ Project\n" + - " │ ├─ columns: [xy_1.x:0!null]\n" + - " │ └─ SemiLookupJoin\n" + - " │ ├─ TableAlias(xy_1)\n" + - " │ │ └─ ProcessTable\n" + - " │ │ └─ Table\n" + - " │ │ ├─ name: xy\n" + - " │ │ └─ columns: [x y]\n" + - " │ └─ TableAlias(t1)\n" + - " │ └─ IndexedTableAccess(xy)\n" + - " │ ├─ index: [xy.y]\n" + - " │ ├─ keys: [xy.x:3!null]\n" + - " │ ├─ colSet: (5,6)\n" + - " │ ├─ tableId: 3\n" + - " │ └─ Table\n" + - " │ ├─ name: xy\n" + - " │ └─ columns: [x y]\n" + - " └─ IndexedTableAccess(xy)\n" + - " ├─ index: [xy.y]\n" + - " ├─ keys: [xy_1.x:0!null]\n" + - " ├─ colSet: (1,2)\n" + - " ├─ tableId: 1\n" + - " └─ Table\n" + - " ├─ name: xy\n" + - " └─ columns: [x y]\n" + - "", - }, { Query: `select x from xy where y in (select xy.x from xy join (select t2.y from xy t2 where exists (select t3.y from xy t3 where t3.y = xy.x)) t1);`, ExpectedPlan: "Project\n" + From 8f271af0f039c163f2e1aad5b9934e41bd04178b Mon Sep 17 00:00:00 2001 From: Max Hoffman Date: Mon, 4 Dec 2023 15:48:50 -0800 Subject: [PATCH 3/3] docstring for disambiguateTables --- sql/analyzer/unnest_insubqueries.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/analyzer/unnest_insubqueries.go b/sql/analyzer/unnest_insubqueries.go index 587c4ba016..bbe5838ae1 100644 --- a/sql/analyzer/unnest_insubqueries.go +++ b/sql/analyzer/unnest_insubqueries.go @@ -181,6 +181,8 @@ func unnestInSubqueries(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.S return ret, transform.TreeIdentity(!unnested), nil } +// returns an updated sql.Node with aliases de-duplicated, and an +// updated alias mapping with new conflicts and tables added. func disambiguateTables(used map[string]int, n sql.Node) (sql.Node, map[string]int, error) { rename := make(map[sql.TableId]string) n, _, err := transform.NodeWithCtx(n, nil, func(c transform.Context) (sql.Node, transform.TreeIdentity, error) {