Skip to content

Commit

Permalink
Merge pull request #471 from dolthub/aaron/support-join-on-or-clause-…
Browse files Browse the repository at this point in the history
…uses-indexes

sql/analyzer: Add support for using indexes in joins where ON condition includes top level OR clauses.
  • Loading branch information
reltuk committed Jun 16, 2021
2 parents 394c2ef + 373aca7 commit a25dd40
Show file tree
Hide file tree
Showing 7 changed files with 497 additions and 20 deletions.
48 changes: 48 additions & 0 deletions enginetest/queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -1807,6 +1807,54 @@ var QueryTests = []QueryTest{
{3, 3},
},
},
{
Query: "SELECT i, s, i2, s2 FROM mytable INNER JOIN othertable ON i = i2 OR s = s2 order by 1",
Expected: []sql.Row{
{1, "first row", 1, "third"},
{2, "second row", 2, "second"},
{3, "third row", 3, "first"},
},
},
{
Query: "SELECT i, s, i2, s2 FROM mytable INNER JOIN othertable ON i = i2 OR SUBSTRING_INDEX(s, ' ', 1) = s2 order by 1, 3",
Expected: []sql.Row{
{1, "first row", 1, "third"},
{1, "first row", 3, "first"},
{2, "second row", 2, "second"},
{3, "third row", 1, "third"},
{3, "third row", 3, "first"},
},
},
{
Query: "SELECT i, s, i2, s2 FROM mytable INNER JOIN othertable ON i = i2 OR SUBSTRING_INDEX(s, ' ', 1) = s2 OR SUBSTRING_INDEX(s, ' ', 2) = s2 order by 1, 3",
Expected: []sql.Row{
{1, "first row", 1, "third"},
{1, "first row", 3, "first"},
{2, "second row", 2, "second"},
{3, "third row", 1, "third"},
{3, "third row", 3, "first"},
},
},
{
Query: "SELECT i, s, i2, s2 FROM mytable INNER JOIN othertable ON i = i2 OR SUBSTRING_INDEX(s, ' ', 2) = s2 OR SUBSTRING_INDEX(s, ' ', 1) = s2 order by 1, 3",
Expected: []sql.Row{
{1, "first row", 1, "third"},
{1, "first row", 3, "first"},
{2, "second row", 2, "second"},
{3, "third row", 1, "third"},
{3, "third row", 3, "first"},
},
},
{
Query: "SELECT i, s, i2, s2 FROM mytable INNER JOIN othertable ON SUBSTRING_INDEX(s, ' ', 2) = s2 OR SUBSTRING_INDEX(s, ' ', 1) = s2 OR i = i2 order by 1, 3",
Expected: []sql.Row{
{1, "first row", 1, "third"},
{1, "first row", 3, "first"},
{2, "second row", 2, "second"},
{3, "third row", 1, "third"},
{3, "third row", 3, "first"},
},
},
{
Query: `select row_number() over (order by i desc), mytable.i as i2
from mytable join othertable on i = i2 order by 1`,
Expand Down
53 changes: 53 additions & 0 deletions enginetest/query_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,49 @@ var PlanTests = []QueryPlanTest{
" └─ IndexedTableAccess(othertable on [othertable.i2])\n" +
"",
},
{
Query: `SELECT i, i2, s2 FROM mytable INNER JOIN othertable ON i = i2 OR s = s2`,
ExpectedPlan: "Project(mytable.i, othertable.i2, othertable.s2)\n" +
" └─ IndexedJoin((mytable.i = othertable.i2) OR (mytable.s = othertable.s2))\n" +
" ├─ Table(mytable)\n" +
" └─ Concat\n" +
" ├─ IndexedTableAccess(othertable on [othertable.i2])\n" +
" └─ IndexedTableAccess(othertable on [othertable.s2])\n" +
"",
},
{
Query: `SELECT i, i2, s2 FROM mytable INNER JOIN othertable ot ON i = i2 OR s = s2`,
ExpectedPlan: "Project(mytable.i, ot.i2, ot.s2)\n" +
" └─ IndexedJoin((mytable.i = ot.i2) OR (mytable.s = ot.s2))\n" +
" ├─ Table(mytable)\n" +
" └─ TableAlias(ot)\n" +
" └─ Concat\n" +
" ├─ IndexedTableAccess(othertable on [othertable.i2])\n" +
" └─ IndexedTableAccess(othertable on [othertable.s2])\n" +
"",
},
{
Query: `SELECT i, i2, s2 FROM mytable INNER JOIN othertable ON i = i2 OR SUBSTRING_INDEX(s, ' ', 1) = s2`,
ExpectedPlan: "Project(mytable.i, othertable.i2, othertable.s2)\n" +
" └─ IndexedJoin((mytable.i = othertable.i2) OR (SUBSTRING_INDEX(mytable.s, \" \", &{1 {257}}) = othertable.s2))\n" +
" ├─ Table(mytable)\n" +
" └─ Concat\n" +
" ├─ IndexedTableAccess(othertable on [othertable.i2])\n" +
" └─ IndexedTableAccess(othertable on [othertable.s2])\n" +
"",
},
{
Query: `SELECT i, i2, s2 FROM mytable INNER JOIN othertable ON i = i2 OR SUBSTRING_INDEX(s, ' ', 1) = s2 OR SUBSTRING_INDEX(s, ' ', 2) = s2`,
ExpectedPlan: "Project(mytable.i, othertable.i2, othertable.s2)\n" +
" └─ IndexedJoin(((mytable.i = othertable.i2) OR (SUBSTRING_INDEX(mytable.s, \" \", &{1 {257}}) = othertable.s2)) OR (SUBSTRING_INDEX(mytable.s, \" \", &{2 {257}}) = othertable.s2))\n" +
" ├─ Table(mytable)\n" +
" └─ Concat\n" +
" ├─ Concat\n" +
" │ ├─ IndexedTableAccess(othertable on [othertable.i2])\n" +
" │ └─ IndexedTableAccess(othertable on [othertable.s2])\n" +
" └─ IndexedTableAccess(othertable on [othertable.s2])\n" +
"",
},
{
Query: `SELECT i, i2, s2 FROM mytable INNER JOIN othertable ON i = i2 UNION SELECT i, i2, s2 FROM mytable INNER JOIN othertable ON i = i2`,
ExpectedPlan: "Distinct\n" +
Expand Down Expand Up @@ -531,6 +574,16 @@ var PlanTests = []QueryPlanTest{
" └─ IndexedTableAccess(two_pk on [two_pk.pk1,two_pk.pk2])\n" +
"",
},
{
Query: `SELECT pk,pk1,pk2 FROM one_pk JOIN two_pk ON one_pk.pk=two_pk.pk1 AND one_pk.pk=two_pk.pk2 OR one_pk.c2 = two_pk.c3`,
ExpectedPlan: "Project(one_pk.pk, two_pk.pk1, two_pk.pk2)\n" +
" └─ InnerJoin(((one_pk.pk = two_pk.pk1) AND (one_pk.pk = two_pk.pk2)) OR (one_pk.c2 = two_pk.c3))\n" +
" ├─ Projected table access on [pk c2]\n" +
" │ └─ Table(one_pk)\n" +
" └─ Projected table access on [pk1 pk2 c3]\n" +
" └─ Table(two_pk)\n" +
"",
},
{
Query: `SELECT pk,pk1,pk2 FROM one_pk opk JOIN two_pk tpk ON opk.pk=tpk.pk1 AND opk.pk=tpk.pk2`,
ExpectedPlan: "Project(opk.pk, tpk.pk1, tpk.pk2)\n" +
Expand Down
74 changes: 74 additions & 0 deletions enginetest/script_queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,80 @@ var ScriptTests = []ScriptTest{
{3, 3},
},
},
{
Name: "Indexed Join On Keyless Table",
SetUpScript: []string{
"create table l (pk int primary key, c0 int, c1 int);",
"create table r (c0 int, c1 int, third int);",
"create index r_c0 on r (c0);",
"create index r_c1 on r (c1);",
"create index r_third on r (third);",
"insert into l values (0, 0, 0), (1, 0, 1), (2, 1, 0), (3, 0, 2), (4, 2, 0), (5, 1, 2), (6, 2, 1), (7, 2, 2);",
"insert into l values (256, 1024, 4096);",
"insert into r values (1, 1, -1), (2, 2, -1), (2, 2, -1);",
"insert into r values (-1, -1, 256);",
},
Assertions: []ScriptTestAssertion{
{
Query: "select pk, l.c0, l.c1 from l join r on l.c0 = r.c0 or l.c1 = r.c1 order by 1, 2, 3;",
Expected: []sql.Row{
{1, 0, 1},
{2, 1, 0},
{3, 0, 2},
{3, 0, 2},
{4, 2, 0},
{4, 2, 0},
{5, 1, 2},
{5, 1, 2},
{5, 1, 2},
{6, 2, 1},
{6, 2, 1},
{6, 2, 1},
{7, 2, 2},
{7, 2, 2},
},
},
{
Query: "select pk, l.c0, l.c1 from l join r on l.c0 = r.c0 or l.c1 = r.c1 or l.pk = r.third order by 1, 2, 3;",
Expected: []sql.Row{
{1, 0, 1},
{2, 1, 0},
{3, 0, 2},
{3, 0, 2},
{4, 2, 0},
{4, 2, 0},
{5, 1, 2},
{5, 1, 2},
{5, 1, 2},
{6, 2, 1},
{6, 2, 1},
{6, 2, 1},
{7, 2, 2},
{7, 2, 2},
{256, 1024, 4096},
},
},
{
Query: "select pk, l.c0, l.c1 from l join r on l.c0 = r.c0 or l.c1 < 4 and l.c1 = r.c1 or l.c1 >= 4 and l.c1 = r.c1 order by 1, 2, 3;",
Expected: []sql.Row{
{1, 0, 1},
{2, 1, 0},
{3, 0, 2},
{3, 0, 2},
{4, 2, 0},
{4, 2, 0},
{5, 1, 2},
{5, 1, 2},
{5, 1, 2},
{6, 2, 1},
{6, 2, 1},
{6, 2, 1},
{7, 2, 2},
{7, 2, 2},
},
},
},
},
{
Name: "Group Concat Queries",
SetUpScript: []string{
Expand Down
4 changes: 2 additions & 2 deletions sql/analyzer/aliases.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ func getTableAliases(n sql.Node, scope *Scope) (TableAliases, error) {

if at, ok := node.(*plan.TableAlias); ok {
switch t := at.Child.(type) {
case *plan.ResolvedTable, *plan.SubqueryAlias, *plan.ValueDerivedTable:
case *plan.ResolvedTable, *plan.SubqueryAlias, *plan.ValueDerivedTable, *plan.TransformedNamedNode:
analysisErr = passAliases.add(at, t.(NameableNode))
case *plan.DecoratedNode:
rt := getResolvedTable(at.Child)
Expand Down Expand Up @@ -102,7 +102,7 @@ func getTableAliases(n sql.Node, scope *Scope) (TableAliases, error) {
rt := getResolvedTable(node.Destination)
analysisErr = passAliases.add(rt, rt)
return false
case *plan.ResolvedTable, *plan.SubqueryAlias, *plan.ValueDerivedTable:
case *plan.ResolvedTable, *plan.SubqueryAlias, *plan.ValueDerivedTable, *plan.TransformedNamedNode:
analysisErr = passAliases.add(node.(sql.Nameable), node.(sql.Nameable))
return false
case *plan.DecoratedNode:
Expand Down
113 changes: 95 additions & 18 deletions sql/analyzer/indexed_joins.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,35 @@ func replaceTableAccessWithIndexedAccess(
tableAliases TableAliases,
) (sql.Node, bool, error) {

var toIndexedTableAccess func(node *plan.ResolvedTable, indexToApply *joinIndex) (sql.Node, bool, error)
toIndexedTableAccess = func(node *plan.ResolvedTable, indexToApply *joinIndex) (sql.Node, bool, error) {
if _, ok := node.Table.(sql.IndexAddressableTable); !ok {
return node, false, nil
}

if indexToApply.index != nil {
keyExprs := createIndexLookupKeyExpression(ctx, indexToApply, tableAliases)
keyExprs, err := FixFieldIndexesOnExpressions(ctx, scope, a, schema, keyExprs...)
if err != nil {
return nil, false, err
}
return plan.NewIndexedTableAccess(node, indexToApply.index, keyExprs), true, nil
} else {
ln, lr, lerr := toIndexedTableAccess(node, indexToApply.disjunction[0])
if lerr != nil {
return node, false, lerr
}
rn, rr, rerr := toIndexedTableAccess(node, indexToApply.disjunction[1])
if rerr != nil {
return node, false, rerr
}
if lr && rr {
return plan.NewTransformedNamedNode(plan.NewConcat(ln, rn), node.Name()), true, nil
}
return node, false, nil
}
}

switch node := node.(type) {
case *plan.TableAlias, *plan.ResolvedTable:
// If the available schema makes an index on this table possible, use it, replacing the table with indexed access
Expand All @@ -131,18 +160,9 @@ func replaceTableAccessWithIndexedAccess(
node, err := plan.TransformUp(node, func(node sql.Node) (sql.Node, error) {
switch node := node.(type) {
case *plan.ResolvedTable:
if _, ok := node.Table.(sql.IndexAddressableTable); !ok {
return node, nil
}

keyExprs := createIndexLookupKeyExpression(ctx, indexToApply, tableAliases)
keyExprs, err := FixFieldIndexesOnExpressions(ctx, scope, a, schema, keyExprs...)
if err != nil {
return nil, err
}

replaced = true
return plan.NewIndexedTableAccess(node, indexToApply.index, keyExprs), nil
n, r, err := toIndexedTableAccess(node, indexToApply)
replaced = r
return n, err
default:
return node, nil
}
Expand Down Expand Up @@ -402,6 +422,12 @@ type joinIndex struct {
table string
// The index that can be used in this join, if any. nil otherwise
index sql.Index
// This field stores exactly two joinIndexes, representing the two
// branches of an OR expression when the top-level condition is an OR
// expression that could potentially make use of different indexes. If
// disjunction[0] != nil, disjunction[1] will also be nonnil and index
// will be nil.
disjunction [2]*joinIndex
// The join condition
joinCond sql.Expression
// The join type
Expand All @@ -418,13 +444,23 @@ type joinIndex struct {
comparandExprs []sql.Expression
}

func (ji *joinIndex) hasIndex() bool {
if ji.index != nil {
return true
}
if ji.disjunction[0] != nil {
return ji.disjunction[0].hasIndex() && ji.disjunction[1].hasIndex()
}
return false
}

type joinIndexes []*joinIndex
type joinIndexesByTable map[string]joinIndexes

// getUsableIndex returns an index that can be satisfied by the schema given, or nil if no such index exists.
func (j joinIndexes) getUsableIndex(schema sql.Schema) *joinIndex {
for _, joinIndex := range j {
if joinIndex.index == nil {
if !joinIndex.hasIndex() {
continue
}
// If every comparand for this join index is present in the schema given, we can use the corresponding index
Expand Down Expand Up @@ -577,14 +613,14 @@ func getJoinIndexes(
ctx *sql.Context,
a *Analyzer,
ia *indexAnalyzer,
joinCond joinCond,
jc joinCond,
tableAliases TableAliases,
) joinIndexesByTable {

switch joinCond.cond.(type) {
switch cond := jc.cond.(type) {
case *expression.Equals, *expression.NullSafeEquals:
result := make(joinIndexesByTable)
left, right := getEqualityIndexes(ctx, a, ia, joinCond, tableAliases)
left, right := getEqualityIndexes(ctx, a, ia, jc, tableAliases)

// If we can't identify a join index for this condition, return nothing.
if left == nil || right == nil {
Expand All @@ -595,7 +631,7 @@ func getJoinIndexes(
result[right.table] = append(result[right.table], right)
return result
case *expression.And:
exprs := splitConjunction(joinCond.cond)
exprs := splitConjunction(jc.cond)
for _, expr := range exprs {
switch e := expr.(type) {
case *expression.Equals, *expression.NullSafeEquals, *expression.IsNull:
Expand All @@ -608,7 +644,48 @@ func getJoinIndexes(
}
}

return getJoinIndex(ctx, joinCond, exprs, ia, tableAliases)
return getJoinIndex(ctx, jc, exprs, ia, tableAliases)
case *expression.Or:
leftCond := joinCond{cond.Left, jc.joinType, jc.rightHandTable}
rightCond := joinCond{cond.Right, jc.joinType, jc.rightHandTable}
leftIdxByTbl := getJoinIndexes(ctx, a, ia, leftCond, tableAliases)
rightIdxByTbl := getJoinIndexes(ctx, a, ia, rightCond, tableAliases)
result := make(joinIndexesByTable)
for table, lefts := range leftIdxByTbl {
if rights, ok := rightIdxByTbl[table]; ok {
var v joinIndexes
for _, left := range lefts {
for _, right := range rights {
cols := make([]*expression.GetField, 0, len(left.cols)+len(right.cols))
cols = append(cols, left.cols...)
cols = append(cols, right.cols...)
colExprs := make([]sql.Expression, 0, len(left.colExprs)+len(right.colExprs))
colExprs = append(colExprs, left.colExprs...)
colExprs = append(colExprs, right.colExprs...)
comparandCols := make([]*expression.GetField, 0, len(left.comparandCols)+len(right.comparandCols))
comparandCols = append(comparandCols, left.comparandCols...)
comparandCols = append(comparandCols, right.comparandCols...)
comparandExprs := make([]sql.Expression, 0, len(left.comparandExprs)+len(right.comparandExprs))
comparandExprs = append(comparandExprs, left.comparandExprs...)
comparandExprs = append(comparandExprs, right.comparandExprs...)
v = append(v, &joinIndex{
table: table,
index: nil,
disjunction: [2]*joinIndex{left, right},
joinCond: jc.cond,
joinType: jc.joinType,
joinPosition: left.joinPosition,
cols: cols,
colExprs: colExprs,
comparandCols: comparandCols,
comparandExprs: comparandExprs,
})
}
}
result[table] = v
}
}
return result
}
// TODO: handle additional kinds of expressions other than equality

Expand Down
Loading

0 comments on commit a25dd40

Please sign in to comment.