Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve IN_SUBQUERY table disambiguation #2181

Merged
merged 4 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 8 additions & 0 deletions enginetest/queries/queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -764,6 +764,14 @@ var SpatialQueryTests = []QueryTest{
}

var QueryTests = []QueryTest{
{
Query: "select x from xy where y in (select xy.x from xy join (select t2.y from xy t2 where exists (select t3.y from xy t3 where t3.y = xy.x)) t1) order by 1;",
Expected: []sql.Row{{0}, {1}, {2}, {3}},
},
{
Query: "select x from xy where y in (select x from xy where x in (select y from xy)) order by 1;",
Expected: []sql.Row{{0}, {1}, {2}, {3}},
},
{
Query: "SELECT 1 WHERE ((1 IN (NULL >= 1)) IS NULL);",
Expected: []sql.Row{{1}},
Expand Down
99 changes: 99 additions & 0 deletions enginetest/queries/query_plans.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions sql/analyzer/unnest_exists_subqueries.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,10 @@ func unnestExistSubqueries(ctx *sql.Context, scope *plan.Scope, a *Analyzer, fil
}

outerFilters := s.joinFilters
if referencesOuterScope(outerFilters, scope) {
retFilters = append(retFilters, f)
continue
}

switch joinType {
case plan.JoinTypeAnti:
Expand All @@ -221,6 +225,22 @@ func unnestExistSubqueries(ctx *sql.Context, scope *plan.Scope, a *Analyzer, fil
return ret, transform.NewTree, nil
}

// referencesOuterScope returns true if a filter in the set is from an outer scope
func referencesOuterScope(filters []sql.Expression, scope *plan.Scope) bool {
if scope == nil {
return false
}
for _, e := range filters {
if transform.InspectExpr(e, func(e sql.Expression) bool {
gf, ok := e.(*expression.GetField)
return ok && scope.Correlated().Contains(gf.Id())
}) {
return true
}
}
return false
}

type hoistSubquery struct {
inner sql.Node
joinFilters []sql.Expression
Expand Down
72 changes: 59 additions & 13 deletions sql/analyzer/unnest_insubqueries.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ func unnestInSubqueries(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.S
}

var unnested bool
var aliases map[string]int

ret := n
var err error
Expand All @@ -69,7 +70,6 @@ func unnestInSubqueries(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.S

var matches []applyJoin
var newFilters []sql.Expression
var aliases map[string]int

// separate decorrelation candidates
for _, e := range filters {
Expand Down Expand Up @@ -138,7 +138,8 @@ func unnestInSubqueries(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.S
}
}

newSubq, err := disambiguateTables(aliases, subq.Query)
var newSubq sql.Node
newSubq, aliases, err = disambiguateTables(aliases, subq.Query)
if err != nil {
return ret, transform.SameTree, nil
}
Expand Down Expand Up @@ -180,24 +181,44 @@ func unnestInSubqueries(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.S
return ret, transform.TreeIdentity(!unnested), nil
}

func disambiguateTables(used map[string]int, n sql.Node) (sql.Node, error) {
duplicates := make(map[string]int)
n, _, err := transform.Node(n, func(n sql.Node) (sql.Node, transform.TreeIdentity, error) {
switch n := n.(type) {
// returns an updated sql.Node with aliases de-duplicated, and an
// updated alias mapping with new conflicts and tables added.
func disambiguateTables(used map[string]int, n sql.Node) (sql.Node, map[string]int, error) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Document the return params

rename := make(map[sql.TableId]string)
n, _, err := transform.NodeWithCtx(n, nil, func(c transform.Context) (sql.Node, transform.TreeIdentity, error) {
switch n := c.Node.(type) {
case sql.RenameableNode:
name := strings.ToLower(n.Name())
if _, ok := c.Parent.(sql.RenameableNode); ok {
// skip checking when: TableAlias(ResolvedTable)
return n, transform.SameTree, nil
}
if cnt, ok := used[name]; ok {
duplicates[name] = cnt + 1
return n.WithName(fmt.Sprintf("%s_%d", name, cnt+1)), transform.NewTree, nil
used[name] = cnt + 1
newName := name
for ok {
cnt++
newName = fmt.Sprintf("%s_%d", name, cnt)
_, ok = used[newName]

}
used[newName] = 0

tin, ok := n.(plan.TableIdNode)
if !ok {
return n, transform.SameTree, fmt.Errorf("expected sql.Renameable to implement plan.TableIdNode")
}
rename[tin.Id()] = newName
return n.WithName(newName), transform.NewTree, nil
} else {
duplicates[name] = 0
used[name] = 0
}
return n, transform.NewTree, nil
default:
return transform.NodeExprs(n, func(e sql.Expression) (sql.Expression, transform.TreeIdentity, error) {
switch e := e.(type) {
case *expression.GetField:
if cnt, ok := duplicates[strings.ToLower(e.Table())]; ok && cnt > 0 {
if cnt, ok := used[strings.ToLower(e.Table())]; ok && cnt > 0 {
return e.WithTable(fmt.Sprintf("%s_%d", e.Table(), cnt)), transform.NewTree, nil
}
default:
Expand All @@ -206,10 +227,35 @@ func disambiguateTables(used map[string]int, n sql.Node) (sql.Node, error) {
})
}
})
for k, v := range duplicates {
used[k] = v
if err != nil {
return nil, nil, err
}
if len(rename) > 0 {
n, _, err = renameExpressionTables(n, rename)
}
return n, err
return n, used, err
}

// renameExpressionTables renames table references recursively. We use
// table ids to avoid improperly renaming tables in lower scopes with the
// same name.
func renameExpressionTables(n sql.Node, rename map[sql.TableId]string) (sql.Node, transform.TreeIdentity, error) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems fine but I'm a little confused why this should be necessary. Isn't the point of table IDs so that we can uniquely identify every table without doing string matching?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

assignExecIndexes still does string matching right now, but in general it would be nice to expand unique id use

return transform.NodeExprs(n, func(e sql.Expression) (sql.Expression, transform.TreeIdentity, error) {
switch e := e.(type) {
case *expression.GetField:
if to, ok := rename[e.TableId()]; ok {
return e.WithTable(to), transform.NewTree, nil
}
case *plan.Subquery:
newQ, same, err := renameExpressionTables(e.Query, rename)
if !same || err != nil {
return e, same, err
}
return e.WithQuery(newQ), transform.NewTree, nil
default:
}
return e, transform.NewTree, nil
})
}

// getHighestProjection returns a set of projection expressions responsible
Expand Down