diff --git a/pkg/sql/opt/testutils/opttester/reorder_joins.go b/pkg/sql/opt/testutils/opttester/reorder_joins.go index 261b4d2d2918..2e807e810ac8 100644 --- a/pkg/sql/opt/testutils/opttester/reorder_joins.go +++ b/pkg/sql/opt/testutils/opttester/reorder_joins.go @@ -37,6 +37,9 @@ import ( func (ot *OptTester) ReorderJoins() (string, error) { ot.builder.Reset() o := ot.makeOptimizer() + o.NotifyOnMatchedRule(func(ruleName opt.RuleName) bool { + return !ot.Flags.DisableRules.Contains(int(ruleName)) + }) jof := newJoinOrderFormatter(o) // joinsConsidered counts the number of joins which joinOrderBuilder attempts @@ -106,16 +109,15 @@ func (ot *OptTester) ReorderJoins() (string, error) { type joinOrderFormatter struct { o *xform.Optimizer - // relLabels is a map from the first ColumnID of each base relation to its - // assigned label. - relLabels map[opt.ColumnID]string + // relLabels is a map from each base relation to its assigned label. + relLabels map[memo.RelExpr]string } // newJoinOrderFormatter returns an initialized joinOrderFormatter. func newJoinOrderFormatter(o *xform.Optimizer) *joinOrderFormatter { return &joinOrderFormatter{ o: o, - relLabels: make(map[opt.ColumnID]string), + relLabels: make(map[memo.RelExpr]string), } } @@ -188,11 +190,7 @@ func (jof *joinOrderFormatter) formatRules(rules []xform.OnReorderRuleParam) str // relLabel returns the label for the given relation. Labels will follow the // pattern A, B, ..., Z, A1, B1, etc. func (jof *joinOrderFormatter) relLabel(e memo.RelExpr) string { - firstCol, ok := e.Relational().OutputCols.Next(0) - if !ok { - panic(errors.AssertionFailedf("failed to retrieve column from %v", e.Op())) - } - if label, ok := jof.relLabels[firstCol]; ok { + if label, ok := jof.relLabels[e]; ok { return label } const lenAlphabet = 26 @@ -203,7 +201,7 @@ func (jof *joinOrderFormatter) relLabel(e memo.RelExpr) string { // Names will follow the pattern: A, B, ..., Z, A1, B1, etc. label += strconv.Itoa(number) } - jof.relLabels[firstCol] = label + jof.relLabels[e] = label return label } diff --git a/pkg/sql/opt/xform/join_order_builder.go b/pkg/sql/opt/xform/join_order_builder.go index 2934fa49556c..fa12b9a69645 100644 --- a/pkg/sql/opt/xform/join_order_builder.go +++ b/pkg/sql/opt/xform/join_order_builder.go @@ -82,16 +82,16 @@ type OnAddJoinFunc func(left, right, all, refs []memo.RelExpr, op opt.Operator) // tuples, where |m| is left input cardinality and |n| is right input // cardinality. With a query like this: // -// SELECT * -// FROM (SELECT * FROM xy INNER JOIN ab ON x = a) -// INNER JOIN uv ON x = u +// SELECT * +// FROM (SELECT * FROM xy INNER JOIN ab ON x = a) +// INNER JOIN uv ON x = u // // An ordering like the following is valid but not desirable, since the cross // join will likely be very expensive compared to a join with a predicate: // -// SELECT * -// FROM (SELECT * FROM uv INNER JOIN ab ON True) -// INNER JOIN xy ON x = a AND x = u +// SELECT * +// FROM (SELECT * FROM uv INNER JOIN ab ON True) +// INNER JOIN xy ON x = a AND x = u // // Avoiding cross joins significantly decreases the search space (and therefore // planning time) without preventing the best plan from being found in most @@ -113,16 +113,16 @@ type OnAddJoinFunc func(left, right, all, refs []memo.RelExpr, op opt.Operator) // // Taking this query as an example: // -// SELECT * -// FROM (SELECT * FROM xy LEFT JOIN ab ON x = a) -// INNER JOIN uv ON x = u AND (y = b OR b IS NULL) +// SELECT * +// FROM (SELECT * FROM xy LEFT JOIN ab ON x = a) +// INNER JOIN uv ON x = u AND (y = b OR b IS NULL) // // The vertexes of the graph would represent the base relations xy, ab and uv. // The three edges would be: // -// x = a [left] -// x = u [inner] -// y = b OR b IS NULL [inner] +// x = a [left] +// x = u [inner] +// y = b OR b IS NULL [inner] // // Then, the DPSube algorithm is executed (see citations: [8]). DPSube // enumerates all disjoint pairs of subsets of base relations such as @@ -145,10 +145,10 @@ type OnAddJoinFunc func(left, right, all, refs []memo.RelExpr, op opt.Operator) // contained in the SES of a join must be present in the join's input. For // example, take the following query: // -// SELECT * -// FROM xy -// LEFT JOIN (SELECT * FROM ab INNER JOIN uv ON a = u) -// ON x = u +// SELECT * +// FROM xy +// LEFT JOIN (SELECT * FROM ab INNER JOIN uv ON a = u) +// ON x = u // // The SES for the left join will contain relations xy and uv because both are // referenced by the join's predicate. Therefore, both must be in the input of @@ -166,10 +166,10 @@ type OnAddJoinFunc func(left, right, all, refs []memo.RelExpr, op opt.Operator) // // Consider the following (invalid) reordering of the above example): // -// SELECT * -// FROM ab -// INNER JOIN (SELECT * FROM xy LEFT JOIN uv ON x = u) -// ON a = u +// SELECT * +// FROM ab +// INNER JOIN (SELECT * FROM xy LEFT JOIN uv ON x = u) +// ON a = u // // The left join's TES will include relations xy and uv because they are in the // SES. The TES will also contain ab because the right-asscom property does not @@ -195,9 +195,9 @@ type OnAddJoinFunc func(left, right, all, refs []memo.RelExpr, op opt.Operator) // their original operator, free to be combined with conjuncts from other inner // joins. For example, take this query: // -// SELECT * -// FROM (SELECT * FROM xy INNER JOIN ab ON x = a) -// INNER JOIN uv ON x = u AND a = u +// SELECT * +// FROM (SELECT * FROM xy INNER JOIN ab ON x = a) +// INNER JOIN uv ON x = u AND a = u // // Treating the ON conditions of these joins as a conglomerate (as we do with // non-inner joins), a join between base relations xy and uv would not be @@ -206,37 +206,38 @@ type OnAddJoinFunc func(left, right, all, refs []memo.RelExpr, op opt.Operator) // conjunct solves this problem, allowing a reordering like the following // (the ab and uv relations are switched, along with the filters): // -// SELECT * -// FROM (SELECT * FROM xy INNER JOIN uv ON x = u) -// INNER JOIN ab ON x = a AND a = u +// SELECT * +// FROM (SELECT * FROM xy INNER JOIN uv ON x = u) +// INNER JOIN ab ON x = a AND a = u // // In fact, this idea can be taken even further. Take this query as an example: // -// SELECT * -// FROM xy -// INNER JOIN (SELECT * FROM ab LEFT JOIN uv ON b = v) -// ON x = a AND (y = u OR u IS NULL) +// SELECT * +// FROM xy +// INNER JOIN (SELECT * FROM ab LEFT JOIN uv ON b = v) +// ON x = a AND (y = u OR u IS NULL) // // The following is a valid reformulation: // -// SELECT * -// FROM (SELECT * FROM xy INNER JOIN ab ON x = a) -// LEFT JOIN uv ON b = v -// WHERE y = u OR u IS NULL +// SELECT * +// FROM (SELECT * FROM xy INNER JOIN ab ON x = a) +// LEFT JOIN uv ON b = v +// WHERE y = u OR u IS NULL // // Notice the new Select operation that now carries the inner join conjunct that // references the right side of the left join. We can model the process that // leads to this reformulation as follows: -// 1. The inner join is rewritten as a cross join and two selects, each -// carrying a conjunct: (x = a) for one and (y = u OR u IS NULL) for the -// other. -// 2. The Select operators are pulled above the inner join. -// 3. The left join and inner join are reordered according to the associative -// property (see citations: [8] table 2). -// 4. Finally, the inner join conjuncts are pushed back down the reordered -// join tree as far as possible. The x = a conjunct can be pushed to the -// inner join, but the (y = u OR u IS NULL) conjunct must remain on the -// Select. +// 1. The inner join is rewritten as a cross join and two selects, each +// carrying a conjunct: (x = a) for one and (y = u OR u IS NULL) for the +// other. +// 2. The Select operators are pulled above the inner join. +// 3. The left join and inner join are reordered according to the associative +// property (see citations: [8] table 2). +// 4. Finally, the inner join conjuncts are pushed back down the reordered +// join tree as far as possible. The x = a conjunct can be pushed to the +// inner join, but the (y = u OR u IS NULL) conjunct must remain on the +// Select. +// // JoinOrderBuilder is able to effect this transformation (though it is not // accomplished in so many steps). // @@ -258,9 +259,9 @@ type OnAddJoinFunc func(left, right, all, refs []memo.RelExpr, op opt.Operator) // we can add new edges that are implied by the transitive closure of the inner // join edges. For example, take this query: // -// SELECT * FROM xy -// INNER JOIN ab ON x = a -// INNER JOIN uv ON a = u +// SELECT * FROM xy +// INNER JOIN ab ON x = a +// INNER JOIN uv ON a = u // // The two edges x = a and a = u are explicit in this join tree. However, there // is the additional implicit edge x = u which can be added to the join graph. @@ -306,6 +307,11 @@ type JoinOrderBuilder struct { // The group for a single base relation is simply the base relation itself. plans map[vertexSet]memo.RelExpr + // applicableEdges maps from each (sub)set of vertexes to the set of edges + // that must be used when building join trees for the set. See + // checkAppliedEdges for more information. + applicableEdges map[vertexSet]edgeSet + // joinCount counts the number of joins that have been added to the join // graph. It is used to ensure that the number of joins that are reordered at // once does not exceed the session limit. @@ -331,11 +337,12 @@ func (jb *JoinOrderBuilder) Init(f *norm.Factory, evalCtx *tree.EvalContext) { // This initialization pattern ensures that fields are not unwittingly // reused. Field reuse must be explicit. *jb = JoinOrderBuilder{ - f: f, - evalCtx: evalCtx, - plans: make(map[vertexSet]memo.RelExpr), - onReorderFunc: jb.onReorderFunc, - onAddJoinFunc: jb.onAddJoinFunc, + f: f, + evalCtx: evalCtx, + plans: make(map[vertexSet]memo.RelExpr), + applicableEdges: make(map[vertexSet]edgeSet), + onReorderFunc: jb.onReorderFunc, + onAddJoinFunc: jb.onAddJoinFunc, } } @@ -447,7 +454,7 @@ func (jb *JoinOrderBuilder) populateGraph(rel memo.RelExpr) (vertexSet, edgeSet) // reflect the transitive closure of all equality filters between columns. // As an example, take a query like the following: // -// SELECT * FROM xy INNER JOIN ab ON x = a INNER JOIN uv ON u = a +// SELECT * FROM xy INNER JOIN ab ON x = a INNER JOIN uv ON u = a // // Contains the explicit edges x = a and u = a, and the implicit edge x = u. // This implicit edge will be added by ensureClosure. @@ -553,6 +560,8 @@ func (jb *JoinOrderBuilder) dpSube() { // relation. We need at least two relations in order to create a new join. continue } + jb.setApplicableEdges(subset) + // Enumerate all possible pairwise-disjoint binary partitions of the subset, // s1 AND s2. These represent sets of relations that may be joined together. // @@ -571,6 +580,19 @@ func (jb *JoinOrderBuilder) dpSube() { } } +// setApplicableEdges initializes applicableEdges with all edges that must show +// up in any join tree that is constructed for the given set of vertexes. See +// checkAppliedEdges for how this information is used. +func (jb *JoinOrderBuilder) setApplicableEdges(s vertexSet) { + applicableEdges := edgeSet{} + for i := range jb.edges { + if jb.edges[i].tes.isSubsetOf(s) { + applicableEdges.Add(i) + } + } + jb.applicableEdges[s] = applicableEdges +} + // addJoins iterates through the edges of the join graph and checks whether any // joins can be constructed between the memo groups for the two given sets of // base relations without creating an invalid plan or introducing cross joins. @@ -580,6 +602,8 @@ func (jb *JoinOrderBuilder) addJoins(s1, s2 vertexSet) { // Both inputs must have plans. return } + // Keep track of which edges are applicable to this join. + var appliedEdges edgeSet var fds props.FuncDepSet fds.AddEquivFrom(&jb.plans[s1].Relational().FuncDeps) @@ -587,27 +611,21 @@ func (jb *JoinOrderBuilder) addJoins(s1, s2 vertexSet) { // Gather all inner edges that connect the left and right relation sets. var innerJoinFilters memo.FiltersExpr - var addInnerJoin bool - var joinIsRedundant bool for i, ok := jb.innerEdges.Next(0); ok; i, ok = jb.innerEdges.Next(i + 1) { e := &jb.edges[i] // Ensure that this edge forms a valid connection between the two sets. See // the checkNonInnerJoin and checkInnerJoin comments for more information. if e.checkInnerJoin(s1, s2) { + // Record this edge as applied even if it's redundant, since redundant + // edges are trivially applied. + appliedEdges.Add(i) if areFiltersRedundant(&fds, e.filters) { // Avoid adding redundant filters. continue } - if !joinIsRedundant { - // If this edge was originally part of a join between relation sets s1 - // and s2, any other edges that apply will also be part of that original - // join. - joinIsRedundant = jb.joinIsRedundant(e, s1, s2) - } getEquivFDs(&fds, e.filters) innerJoinFilters = append(innerJoinFilters, e.filters...) - addInnerJoin = true } } @@ -619,13 +637,17 @@ func (jb *JoinOrderBuilder) addJoins(s1, s2 vertexSet) { // Ensure that this edge forms a valid connection between the two sets. See // the checkNonInnerJoin and checkInnerJoin comments for more information. if e.checkNonInnerJoin(s1, s2) { + appliedEdges.Add(i) + // Construct a non-inner join. If any inner join filters also apply to the // pair of relationSets, construct a select on top of the join with the // inner join filters. - jb.addJoin(e.op.joinType, s1, s2, e.filters, innerJoinFilters, jb.joinIsRedundant(e, s1, s2)) + jb.addJoin(e.op.joinType, s1, s2, e.filters, innerJoinFilters, appliedEdges) return } if e.checkNonInnerJoin(s2, s1) { + appliedEdges.Add(i) + // If joining s1, s2 is not valid, try s2, s1. We only do this if the // s1, s2 join fails, because commutation is handled by the addJoin // function. This is necessary because we only iterate s1 up to subset / 2 @@ -648,17 +670,17 @@ func (jb *JoinOrderBuilder) addJoins(s1, s2 vertexSet) { // 010 on the right. 101 is larger than 111 / 2, so we will not enumerate // this plan unless we consider a join with s2 on the left and s1 on the // right. - jb.addJoin(e.op.joinType, s2, s1, e.filters, innerJoinFilters, jb.joinIsRedundant(e, s2, s1)) + jb.addJoin(e.op.joinType, s2, s1, e.filters, innerJoinFilters, appliedEdges) return } } - if addInnerJoin { + if !appliedEdges.Empty() { // Construct an inner join. Don't add in the case when a non-inner join has // already been constructed, because doing so can lead to a case where a // non-inner join operator 'disappears' because an inner join has replaced // it. - jb.addJoin(opt.InnerJoinOp, s1, s2, innerJoinFilters, nil /* selectFilters */, joinIsRedundant) + jb.addJoin(opt.InnerJoinOp, s1, s2, innerJoinFilters, nil /* selectFilters */, appliedEdges) } } @@ -766,6 +788,39 @@ func (jb *JoinOrderBuilder) hasEqEdge(leftCol, rightCol opt.ColumnID) bool { return false } +// checkAppliedEdges checks that each join plan includes every edge for which +// the TES is a subset of the relations that are joined together by the plan. +// This is necessary to recover a property which the original algorithm relies +// on - namely that if any edge cannot be applied in a given plan, that plan +// must be invalid. Consider the following three points: +// +// 1. The join reordering algorithm never includes a cross-product in an +// enumerated plan unless it was part of the original join tree. This +// means that a join between two sub-plans is only considered if there is +// an applicable edge that can be used to construct the join. +// +// 2. The original paper associates each join in the original join tree with +// exactly one edge in the join hypergraph. +// +// 3. The JoinOrderBuilder departs from the paper by associating each inner +// join conjunct with an edge. This means that each join can be associated +// with one or more edges. See the section in the JoinOrderBuilder comment +// titled "Special handling of inner joins" for details. +// +// (1) and (2) together imply that a reordered join tree is only considered if +// every edge in the hypergraph could be applied to construct a join for every +// subtree. This allows the original algorithm to prevent invalid orderings by +// making a single edge inapplicable. However, because of (3) the same is no +// longer true for the `JoinOrderBuilder`. checkAppliedEdges corrects for this +// by explicitly checking that all applicable edges have been applied when a +// join plan is considered. +func (jb *JoinOrderBuilder) checkAppliedEdges(s1, s2 vertexSet, appliedEdges edgeSet) bool { + leftApplied, rightApplied := jb.applicableEdges[s1], jb.applicableEdges[s2] + allAppliedEdges := appliedEdges.Union(leftApplied).Union(rightApplied) + expectedAppliedEdges := jb.applicableEdges[s1.union(s2)] + return allAppliedEdges.Equals(expectedAppliedEdges) +} + // addJoin adds a join between the given left and right subsets of relations on // the given set of edges. If the group containing joins between this set of // relations is already contained in the plans field, the new join is added to @@ -776,11 +831,14 @@ func (jb *JoinOrderBuilder) addJoin( op opt.Operator, s1, s2 vertexSet, joinFilters, selectFilters memo.FiltersExpr, - joinIsRedundant bool, + appliedEdges edgeSet, ) { if s1.intersects(s2) { panic(errors.AssertionFailedf("sets are not disjoint")) } + if !jb.checkAppliedEdges(s1, s2, appliedEdges) { + return + } if jb.onAddJoinFunc != nil { // Hook for testing purposes. jb.callOnAddJoinFunc(s1, s2, joinFilters, op) @@ -789,7 +847,7 @@ func (jb *JoinOrderBuilder) addJoin( left := jb.plans[s1] right := jb.plans[s2] union := s1.union(s2) - if !joinIsRedundant { + if !jb.joinIsRedundant(s1, s2, appliedEdges) { if jb.plans[union] != nil { jb.addToGroup(op, left, right, joinFilters, selectFilters, jb.plans[union]) } else { @@ -995,10 +1053,22 @@ func (jb *JoinOrderBuilder) addBaseRelation(rel memo.RelExpr) { // joinIsRedundant returns true if a join between the two sets of base relations // was already present in the original join tree. If so, enumerating this join // would be redundant, so it should be skipped. -func (jb *JoinOrderBuilder) joinIsRedundant(e *edge, s1, s2 vertexSet) bool { - // The join is never redundant when rebuildAllJoins is true, because - // rebuildAllJoins indicates we don't want to reuse the original joins. - return !jb.rebuildAllJoins && e.op.leftVertexes == s1 && e.op.rightVertexes == s2 +func (jb *JoinOrderBuilder) joinIsRedundant(s1, s2 vertexSet, appliedEdges edgeSet) bool { + if jb.rebuildAllJoins { + // The join is never redundant when rebuildAllJoins is true, because + // rebuildAllJoins indicates we don't want to reuse the original joins. + return false + } + for i, ok := appliedEdges.Next(0); ok; i, ok = appliedEdges.Next(i + 1) { + e := &jb.edges[i] + if e.op.leftVertexes == s1 && e.op.rightVertexes == s2 { + // If this edge was originally part of a join between relation sets s1 + // and s2, any other edges that apply will also be part of that original + // join. + return true + } + } + return false } // checkSize panics if the number of relations is greater than or equal to @@ -1135,9 +1205,9 @@ type operator struct { // 'to' set must be a subset of the input relations (from -> to). Take the // following query as an example: // -// SELECT * FROM xy -// INNER JOIN (SELECT * FROM ab LEFT JOIN uv ON a = u) -// ON x = a +// SELECT * FROM xy +// INNER JOIN (SELECT * FROM ab LEFT JOIN uv ON a = u) +// ON x = a // // During execution of the CD-C algorithm, the following conflict rule would // be added to inner join edge: [uv -> ab]. This means that, for any join that @@ -1168,19 +1238,19 @@ func (e *edge) calcNullRejectedRels(jb *JoinOrderBuilder) { // a join uses a predicate in its ON condition, all relations in the SES must be // part of the join's inputs. For example, in this query: // -// SELECT * -// FROM xy -// INNER JOIN (SELECT * FROM ab INNER JOIN uv ON b = (u*2)) -// ON x = a +// SELECT * +// FROM xy +// INNER JOIN (SELECT * FROM ab INNER JOIN uv ON b = (u*2)) +// ON x = a // // The SES for the x = a edge would contain relations xy and ab. The SES for the // b = u*2 edge would contain ab and uv. Therefore, this query could be // reordered like so: // -// SELECT * -// FROM (SELECT * FROM xy INNER JOIN ab ON x = a) -// INNER JOIN uv -// ON b = (u*2) +// SELECT * +// FROM (SELECT * FROM xy INNER JOIN ab ON x = a) +// INNER JOIN uv +// ON b = (u*2) // // While still satisfying the syntactic eligibility sets of the edges. func (e *edge) calcSES(jb *JoinOrderBuilder) { @@ -1389,13 +1459,13 @@ func (e *edge) checkNonInnerJoin(s1, s2 vertexSet) bool { // operators. For example, take this perfectly valid (and desirable) // transformation: // -// SELECT * FROM xy -// INNER JOIN (SELECT * FROM ab INNER JOIN uv ON a = u) -// ON x = a AND x = u -// => -// SELECT * FROM ab -// INNER JOIN (SELECT * FROM xy INNER JOIN uv ON x = u) -// ON x = a AND a = u +// SELECT * FROM xy +// INNER JOIN (SELECT * FROM ab INNER JOIN uv ON a = u) +// ON x = a AND x = u +// => +// SELECT * FROM ab +// INNER JOIN (SELECT * FROM xy INNER JOIN uv ON x = u) +// ON x = a AND a = u // // Note that, from the perspective of the x = a edge, it looks like the join has // been commuted (the xy and ab relations switched sides). From the perspective @@ -1439,21 +1509,20 @@ func commute(op opt.Operator) bool { // by the given edges are associative with each other. An example of an // application of the associative property: // -// SELECT * FROM -// ( -// SELECT * FROM xy -// INNER JOIN ab ON x = a -// ) -// INNER JOIN uv ON a = u -// => -// SELECT * FROM xy -// INNER JOIN -// ( -// SELECT * FROM ab -// INNER JOIN uv ON a = u -// ) -// ON x = a -// +// SELECT * FROM +// ( +// SELECT * FROM xy +// INNER JOIN ab ON x = a +// ) +// INNER JOIN uv ON a = u +// => +// SELECT * FROM xy +// INNER JOIN +// ( +// SELECT * FROM ab +// INNER JOIN uv ON a = u +// ) +// ON x = a func assoc(edgeA, edgeB *edge) bool { return checkProperty(assocTable, edgeA, edgeB) } @@ -1462,20 +1531,19 @@ func assoc(edgeA, edgeB *edge) bool { // described by the given edges allow the left-asscom property. An example of // an application of the left-asscom property: // -// SELECT * FROM -// ( -// SELECT * FROM xy -// INNER JOIN ab ON x = a -// ) -// INNER JOIN uv ON x = u -// => -// SELECT * FROM -// ( -// SELECT * FROM xy -// INNER JOIN uv ON x = u -// ) -// INNER JOIN ab ON x = a -// +// SELECT * FROM +// ( +// SELECT * FROM xy +// INNER JOIN ab ON x = a +// ) +// INNER JOIN uv ON x = u +// => +// SELECT * FROM +// ( +// SELECT * FROM xy +// INNER JOIN uv ON x = u +// ) +// INNER JOIN ab ON x = a func leftAsscom(edgeA, edgeB *edge) bool { return checkProperty(leftAsscomTable, edgeA, edgeB) } @@ -1484,22 +1552,21 @@ func leftAsscom(edgeA, edgeB *edge) bool { // described by the given edges allow the right-asscom property. An example of // an application of the right-asscom property: // -// SELECT * FROM uv -// INNER JOIN -// ( -// SELECT * FROM xy -// INNER JOIN ab ON x = a -// ) -// ON a = u -// => -// SELECT * FROM xy -// INNER JOIN -// ( -// SELECT * FROM uv -// INNER JOIN ab ON a = u -// ) -// ON x = a -// +// SELECT * FROM uv +// INNER JOIN +// ( +// SELECT * FROM xy +// INNER JOIN ab ON x = a +// ) +// ON a = u +// => +// SELECT * FROM xy +// INNER JOIN +// ( +// SELECT * FROM uv +// INNER JOIN ab ON a = u +// ) +// ON x = a func rightAsscom(edgeA, edgeB *edge) bool { return checkProperty(rightAsscomTable, edgeA, edgeB) } diff --git a/pkg/sql/opt/xform/testdata/rules/join_order b/pkg/sql/opt/xform/testdata/rules/join_order index a93314f8310a..9099a070c726 100644 --- a/pkg/sql/opt/xform/testdata/rules/join_order +++ b/pkg/sql/opt/xform/testdata/rules/join_order @@ -2016,14 +2016,14 @@ Join Tree #2 Vertexes A: scan abc [as=a2] - B: + C: distinct-on └── scan abc [as=a5] Edges - a2.c = a5.c [inner, ses=AB, tes=AB, rules=()] -Joining AB - A B [inner, refs=AB] - B A [inner, refs=AB] + a2.c = a5.c [inner, ses=AC, tes=AC, rules=()] +Joining AC + A C [inner, refs=AC] + C A [inner, refs=AC] Joins Considered: 2 -------------------------------------------------------------------------------- Join Tree #3 @@ -2038,7 +2038,7 @@ Join Tree #3 └── filters └── a1.a = a2.a Vertexes - C: + D: scan abc [as=a1] A: scan abc [as=a2] @@ -2046,16 +2046,16 @@ Vertexes scan abc [as=a5] Edges a2.c = a5.c [semi, ses=AB, tes=AB, rules=()] - a1.a = a2.a [inner, ses=CA, tes=CA, rules=()] -Joining CA - C A [inner, refs=CA] - A C [inner, refs=CA] + a1.a = a2.a [inner, ses=DA, tes=DA, rules=()] +Joining DA + D A [inner, refs=DA] + A D [inner, refs=DA] Joining AB A B [semi, refs=AB] -Joining CAB - C AB [inner, refs=CA] - AB C [inner, refs=CA] - CA B [semi, refs=AB] +Joining DAB + D AB [inner, refs=DA] + AB D [inner, refs=DA] + DA B [semi, refs=AB] Joins Considered: 6 -------------------------------------------------------------------------------- Join Tree #4 @@ -2074,30 +2074,30 @@ Join Tree #4 └── filters └── a2.b = a3.b Vertexes - C: + D: scan abc [as=a1] - A: + E: semi-join (hash) ├── scan abc [as=a2] ├── scan abc [as=a5] └── filters └── a2.c = a5.c - D: + F: scan abc [as=a3] Edges - a1.a = a2.a [inner, ses=CA, tes=CA, rules=()] - a2.b = a3.b [inner, ses=AD, tes=AD, rules=()] -Joining CA - C A [inner, refs=CA] - A C [inner, refs=CA] -Joining AD - A D [inner, refs=AD] - D A [inner, refs=AD] -Joining CAD - C AD [inner, refs=CA] - AD C [inner, refs=CA] - CA D [inner, refs=AD] - D CA [inner, refs=AD] + a1.a = a2.a [inner, ses=DE, tes=DE, rules=()] + a2.b = a3.b [inner, ses=EF, tes=EF, rules=()] +Joining DE + D E [inner, refs=DE] + E D [inner, refs=DE] +Joining EF + E F [inner, refs=EF] + F E [inner, refs=EF] +Joining DEF + D EF [inner, refs=DE] + EF D [inner, refs=DE] + DE F [inner, refs=EF] + F DE [inner, refs=EF] Joins Considered: 8 -------------------------------------------------------------------------------- Join Tree #5 @@ -2120,7 +2120,7 @@ Join Tree #5 └── filters └── a3.a = a4.a Vertexes - C: + G: inner-join (hash) ├── scan abc [as=a1] ├── semi-join (hash) @@ -2130,24 +2130,24 @@ Vertexes │ └── a2.c = a5.c └── filters └── a1.a = a2.a - D: + F: scan abc [as=a3] - E: + H: scan abc [as=a4] Edges - a2.b = a3.b [inner, ses=CD, tes=CD, rules=()] - a3.a = a4.a [inner, ses=DE, tes=DE, rules=()] -Joining CD - C D [inner, refs=CD] - D C [inner, refs=CD] -Joining DE - D E [inner, refs=DE] - E D [inner, refs=DE] -Joining CDE - C DE [inner, refs=CD] - DE C [inner, refs=CD] - CD E [inner, refs=DE] - E CD [inner, refs=DE] + a2.b = a3.b [inner, ses=GF, tes=GF, rules=()] + a3.a = a4.a [inner, ses=FH, tes=FH, rules=()] +Joining GF + G F [inner, refs=GF] + F G [inner, refs=GF] +Joining FH + F H [inner, refs=FH] + H F [inner, refs=FH] +Joining GFH + G FH [inner, refs=GF] + FH G [inner, refs=GF] + GF H [inner, refs=FH] + H GF [inner, refs=FH] Joins Considered: 8 ================================================================================ Final Plan @@ -2267,29 +2267,29 @@ Vertexes ├── scan bx └── filters └── x IS NOT NULL - C: + D: distinct-on └── scan cy Edges a = x [inner, ses=AB, tes=AB, rules=()] - x = y [inner, ses=BC, tes=BC, rules=()] - a = y [inner, ses=AC, tes=AC, rules=()] + x = y [inner, ses=BD, tes=BD, rules=()] + a = y [inner, ses=AD, tes=AD, rules=()] Joining AB A B [inner, refs=AB] B A [inner, refs=AB] -Joining AC - A C [inner, refs=AC] - C A [inner, refs=AC] -Joining BC - B C [inner, refs=BC] - C B [inner, refs=BC] -Joining ABC - A BC [inner, refs=AB] - BC A [inner, refs=AB] - B AC [inner, refs=AB] - AC B [inner, refs=AB] - AB C [inner, refs=BC] - C AB [inner, refs=BC] +Joining AD + A D [inner, refs=AD] + D A [inner, refs=AD] +Joining BD + B D [inner, refs=BD] + D B [inner, refs=BD] +Joining ABD + A BD [inner, refs=AB] + BD A [inner, refs=AB] + B AD [inner, refs=AB] + AD B [inner, refs=AB] + AB D [inner, refs=BD] + D AB [inner, refs=BD] Joins Considered: 12 -------------------------------------------------------------------------------- Join Tree #4 @@ -2320,13 +2320,13 @@ Vertexes └── x IS NOT NULL C: scan cy - D: + E: scan dz Edges a = x [inner, ses=AB, tes=AB, rules=()] x = y [semi, ses=BC, tes=BC, rules=()] - a = z [inner, ses=AD, tes=AD, rules=(C->B)] - x = z [inner, ses=BD, tes=BD, rules=()] + a = z [inner, ses=AE, tes=AE, rules=(C->B)] + x = z [inner, ses=BE, tes=BE, rules=()] Joining AB A B [inner, refs=AB] B A [inner, refs=AB] @@ -2336,31 +2336,31 @@ Joining ABC A BC [inner, refs=AB] BC A [inner, refs=AB] AB C [semi, refs=BC] -Joining AD - A D [inner, refs=AD] - D A [inner, refs=AD] -Joining BD - B D [inner, refs=BD] - D B [inner, refs=BD] -Joining ABD - A BD [inner, refs=AB] - BD A [inner, refs=AB] - B AD [inner, refs=AB] - AD B [inner, refs=AB] - AB D [inner, refs=AD] - D AB [inner, refs=AD] -Joining BCD - BD C [semi, refs=BC] - BC D [inner, refs=BD] - D BC [inner, refs=BD] -Joining ABCD - A BCD [inner, refs=AB] - BCD A [inner, refs=AB] - ABD C [semi, refs=BC] - BC AD [inner, refs=AB] - AD BC [inner, refs=AB] - ABC D [inner, refs=AD] - D ABC [inner, refs=AD] +Joining AE + A E [inner, refs=AE] + E A [inner, refs=AE] +Joining BE + B E [inner, refs=BE] + E B [inner, refs=BE] +Joining ABE + A BE [inner, refs=AB] + BE A [inner, refs=AB] + B AE [inner, refs=AB] + AE B [inner, refs=AB] + AB E [inner, refs=AE] + E AB [inner, refs=AE] +Joining BCE + BE C [semi, refs=BC] + BC E [inner, refs=BE] + E BC [inner, refs=BE] +Joining ABCE + A BCE [inner, refs=AB] + BCE A [inner, refs=AB] + ABE C [semi, refs=BC] + BC AE [inner, refs=AB] + AE BC [inner, refs=AB] + ABC E [inner, refs=AE] + E ABC [inner, refs=AE] Joins Considered: 26 ================================================================================ Final Plan @@ -2688,3 +2688,168 @@ inner-join (lookup t88659) │ └── filters (true) └── filters └── c:9 = c:15 [outer=(9,15), immutable, constraints=(/9: (/NULL - ]; /15: (/NULL - ]), fd=(9)==(15), (15)==(9)] + +# Regression test for #90761 - don't drop LeftJoin filter when there are enough +# InnerJoin edges to "link" all relations and the LeftJoin doesn't get +# simplified. +exec-ddl +CREATE TABLE t90761 (a INT, b INT, c INT); +---- + +# The 't2.b > t4.b' filter should not be dropped. +reorderjoins disable=RejectNullsUnderJoinRight +SELECT 1 +FROM t90761 AS t1 +JOIN t90761 AS t2 + LEFT JOIN t90761 AS t3 + JOIN t90761 AS t4 ON true + ON t2.b > t4.b +ON t1.a = t4.a AND t1.c = t2.c; +---- +-------------------------------------------------------------------------------- +Join Tree #1 +-------------------------------------------------------------------------------- + inner-join (cross) + ├── scan t90761 [as=t3] + ├── scan t90761 [as=t4] + └── filters (true) +Vertexes + A: + scan t90761 [as=t3] + B: + scan t90761 [as=t4] +Edges + cross [inner, ses=, tes=AB, rules=()] +Joining AB + A B [inner, refs=] + B A [inner, refs=] +Joins Considered: 2 +-------------------------------------------------------------------------------- +Join Tree #2 +-------------------------------------------------------------------------------- + left-join (cross) + ├── scan t90761 [as=t2] + ├── inner-join (cross) + │ ├── scan t90761 [as=t3] + │ ├── scan t90761 [as=t4] + │ └── filters (true) + └── filters + └── t2.b > t4.b +Vertexes + C: + scan t90761 [as=t2] + A: + scan t90761 [as=t3] + B: + scan t90761 [as=t4] +Edges + cross [inner, ses=, tes=AB, rules=()] + t2.b > t4.b [left, ses=CB, tes=CAB, rules=()] +Joining AB + A B [inner, refs=] + B A [inner, refs=] +Joining CAB + C AB [left, refs=CB] +Joins Considered: 3 +-------------------------------------------------------------------------------- +Join Tree #3 +-------------------------------------------------------------------------------- + inner-join (hash) + ├── scan t90761 [as=t1] + ├── left-join (cross) + │ ├── scan t90761 [as=t2] + │ ├── inner-join (cross) + │ │ ├── scan t90761 [as=t3] + │ │ ├── scan t90761 [as=t4] + │ │ └── filters (true) + │ └── filters + │ └── t2.b > t4.b + └── filters + ├── t1.a = t4.a + └── t1.c = t2.c +Vertexes + D: + scan t90761 [as=t1] + C: + scan t90761 [as=t2] + A: + scan t90761 [as=t3] + B: + scan t90761 [as=t4] +Edges + cross [inner, ses=, tes=AB, rules=()] + t2.b > t4.b [left, ses=CB, tes=CAB, rules=()] + t1.a = t4.a [inner, ses=DB, tes=DCB, rules=()] + t1.c = t2.c [inner, ses=DC, tes=DC, rules=()] +Joining DC + D C [inner, refs=DC] + C D [inner, refs=DC] +Joining DCB + DC B [inner, refs=DB] + B DC [inner, refs=DB] +Joining AB + A B [inner, refs=] + B A [inner, refs=] +Joining CAB + C AB [left, refs=CB] +Joining DCAB + D CAB [inner, refs=DCB] + CAB D [inner, refs=DCB] + DC AB [left, refs=CB] +Joins Considered: 10 +================================================================================ +Final Plan +================================================================================ +project + ├── columns: "?column?":25!null + ├── fd: ()-->(25) + ├── inner-join (hash) + │ ├── columns: t1.a:1!null t1.c:3!null t2.b:8 t2.c:9!null t4.a:19!null t4.b:20 + │ ├── fd: (1)==(19), (19)==(1), (3)==(9), (9)==(3) + │ ├── right-join (cross) + │ │ ├── columns: t2.b:8 t2.c:9 t4.a:19 t4.b:20 + │ │ ├── inner-join (cross) + │ │ │ ├── columns: t4.a:19 t4.b:20 + │ │ │ ├── scan t90761 [as=t3] + │ │ │ ├── scan t90761 [as=t4] + │ │ │ │ └── columns: t4.a:19 t4.b:20 + │ │ │ └── filters (true) + │ │ ├── scan t90761 [as=t2] + │ │ │ └── columns: t2.b:8 t2.c:9 + │ │ └── filters + │ │ └── t2.b:8 > t4.b:20 [outer=(8,20), constraints=(/8: (/NULL - ]; /20: (/NULL - ])] + │ ├── scan t90761 [as=t1] + │ │ └── columns: t1.a:1 t1.c:3 + │ └── filters + │ ├── t1.a:1 = t4.a:19 [outer=(1,19), constraints=(/1: (/NULL - ]; /19: (/NULL - ]), fd=(1)==(19), (19)==(1)] + │ └── t1.c:3 = t2.c:9 [outer=(3,9), constraints=(/3: (/NULL - ]; /9: (/NULL - ]), fd=(3)==(9), (9)==(3)] + └── projections + └── 1 [as="?column?":25] + +# The 't2.b > t4.b' filter should not be dropped. Case with 'IS NOT NULL' +# instead of a disabled rule. +opt format=hide-all +SELECT 1 +FROM t90761 AS t1 +JOIN t90761 AS t2 + LEFT JOIN t90761 AS t3 + JOIN t90761 AS t4 ON true + ON t2.b > t4.b +ON (t1.a = t4.a OR t4.a IS NULL) AND t1.c = t2.c; +---- +project + ├── inner-join (hash) + │ ├── right-join (cross) + │ │ ├── inner-join (cross) + │ │ │ ├── scan t90761 [as=t3] + │ │ │ ├── scan t90761 [as=t4] + │ │ │ └── filters (true) + │ │ ├── scan t90761 [as=t2] + │ │ └── filters + │ │ └── t2.b > t4.b + │ ├── scan t90761 [as=t1] + │ └── filters + │ ├── (t1.a = t4.a) OR (t4.a IS NULL) + │ └── t1.c = t2.c + └── projections + └── 1