Skip to content

Commit

Permalink
Merge pull request #118360 from cockroachdb/blathers/backport-release…
Browse files Browse the repository at this point in the history
…-23.2-118256

release-23.2: inverted: fix intersection of SpanExpressions in some cases
  • Loading branch information
yuzefovich committed Jan 29, 2024
2 parents 1002748 + 778652a commit 472a9cb
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 32 deletions.
73 changes: 41 additions & 32 deletions pkg/sql/inverted/expression.go
Expand Up @@ -705,8 +705,8 @@ func unionSpanExpressions(left, right *SpanExpression) *SpanExpression {
return expr
}

// tryPruneChildren takes an expr with two child *SpanExpression and removes the empty
// children.
// tryPruneChildren takes an expr with two child *SpanExpression and removes
// children when safe to do so.
func tryPruneChildren(expr *SpanExpression, op SetOperator) {
isEmptyExpr := func(e *SpanExpression) bool {
return len(e.FactoredUnionSpans) == 0 && e.Left == nil && e.Right == nil
Expand All @@ -717,41 +717,50 @@ func tryPruneChildren(expr *SpanExpression, op SetOperator) {
if isEmptyExpr(expr.Right.(*SpanExpression)) {
expr.Right = nil
}
// Promotes the left and right sub-expressions of child to the parent expr, when
// the other child is empty.
promoteChild := func(child *SpanExpression) {
// For SetUnion, the FactoredUnionSpans for the child is already nil
// since it has been unioned into expr. For SetIntersection, the
// FactoredUnionSpans for the child may be non-empty, but is being
// intersected with the other child that is empty, so can be discarded.
// Either way, we don't need to update expr.FactoredUnionSpans.
expr.Operator = child.Operator
expr.Left = child.Left
expr.Right = child.Right

// If child.FactoredUnionSpans is non-empty, we need to recalculate
// SpansToRead since it may have contained some spans that were removed by
// discarding child.FactoredUnionSpans.
if child.FactoredUnionSpans != nil {
expr.SpansToRead = expr.FactoredUnionSpans
if expr.Left != nil {
expr.SpansToRead = unionSpans(expr.SpansToRead, expr.Left.(*SpanExpression).SpansToRead)
}
if expr.Right != nil {
expr.SpansToRead = unionSpans(expr.SpansToRead, expr.Right.(*SpanExpression).SpansToRead)
if expr.Operator == SetUnion {
// Promotes the left and right sub-expressions of child to the parent
// expr, when the other child is empty.
promoteChild := func(child *SpanExpression) {
// For SetUnion, the FactoredUnionSpans for the child is already nil
// since it has been unioned into expr. Therefore, we don't need to
// update expr.FactoredUnionSpans.
expr.Operator = child.Operator
expr.Left = child.Left
expr.Right = child.Right

// If child.FactoredUnionSpans is non-empty, we need to recalculate
// SpansToRead since it may have contained some spans that were
// removed by discarding child.FactoredUnionSpans.
if child.FactoredUnionSpans != nil {
expr.SpansToRead = expr.FactoredUnionSpans
if expr.Left != nil {
expr.SpansToRead = unionSpans(expr.SpansToRead, expr.Left.(*SpanExpression).SpansToRead)
}
if expr.Right != nil {
expr.SpansToRead = unionSpans(expr.SpansToRead, expr.Right.(*SpanExpression).SpansToRead)
}
}
}
}
promoteLeft := expr.Left != nil && expr.Right == nil
promoteRight := expr.Left == nil && expr.Right != nil
if promoteLeft {
promoteChild(expr.Left.(*SpanExpression))
}
if promoteRight {
promoteChild(expr.Right.(*SpanExpression))
promoteLeft := expr.Left != nil && expr.Right == nil
promoteRight := expr.Left == nil && expr.Right != nil
if promoteLeft {
promoteChild(expr.Left.(*SpanExpression))
}
if promoteRight {
promoteChild(expr.Right.(*SpanExpression))
}
} else if expr.Operator == SetIntersection {
// The result of intersecting with the empty set is the empty set. In
// this case, we can discard the non-empty child.
if expr.Left == nil {
expr.Right = nil
} else if expr.Right == nil {
expr.Left = nil
}
}
if expr.Left == nil && expr.Right == nil {
expr.Operator = None
expr.SpansToRead = expr.FactoredUnionSpans
}
}

Expand Down
57 changes: 57 additions & 0 deletions pkg/sql/inverted/testdata/expression
Expand Up @@ -588,3 +588,60 @@ node: <
to-proto name=none
----
<nil>

# The following set of expressions (up to 'a-and-a-or-b-and-c') is a regression
# test for incorrectly simplifying 'a AND (a OR (b AND c))' to 'a OR (b AND c)'
# (#117979).

new-span-leaf name=a tight=true unique=true span=a
----
span expression
├── tight: true, unique: true
├── to read: ["a", "a"]
└── union spans: ["a", "a"]

new-span-leaf name=c tight=true unique=true span=c
----
span expression
├── tight: true, unique: true
├── to read: ["c", "c"]
└── union spans: ["c", "c"]

and result=b-and-c left=b right=c
----
span expression
├── tight: true, unique: true
├── to read: ["b", "d")
├── union spans: empty
└── INTERSECTION
├── span expression
│ ├── tight: true, unique: true
│ ├── to read: ["b", "b"]
│ └── union spans: ["b", "b"]
└── span expression
├── tight: true, unique: true
├── to read: ["c", "c"]
└── union spans: ["c", "c"]

or result=a-or-b-and-c left=a right=b-and-c
----
span expression
├── tight: true, unique: false
├── to read: ["a", "d")
├── union spans: ["a", "a"]
└── INTERSECTION
├── span expression
│ ├── tight: true, unique: true
│ ├── to read: ["b", "b"]
│ └── union spans: ["b", "b"]
└── span expression
├── tight: true, unique: true
├── to read: ["c", "c"]
└── union spans: ["c", "c"]

and result=a-and-a-or-b-and-c left=a right=a-or-b-and-c
----
span expression
├── tight: true, unique: false
├── to read: ["a", "a"]
└── union spans: ["a", "a"]
16 changes: 16 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/inverted_index
Expand Up @@ -2044,3 +2044,19 @@ SELECT primes FROM cb WHERE primes && numbers ORDER BY primes
statement ok
CREATE TABLE t84569 (name_col NAME NOT NULL, INVERTED INDEX (name_col gin_trgm_ops));
INSERT INTO t84569 (name_col) VALUES ('X'::NAME)

# Regression test for incorrectly simplifying inverted expressions (#117979).
statement ok
CREATE TABLE t117979 (id INT PRIMARY KEY, links text[], INVERTED INDEX idx_links(links));
INSERT INTO t117979 (id, links) VALUES (1, '{str2,str3}');

query IT
SELECT *
FROM t117979@{FORCE_INDEX=idx_links} as t
WHERE
ARRAY['str1'] <@ t.links
AND (
ARRAY ['str1'] && t.links
OR (ARRAY ['str2'] && t.links AND ARRAY ['str3'] && t.links)
);
----
44 changes: 44 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/inverted_index
Expand Up @@ -3530,3 +3530,47 @@ query T kvtrace
SELECT k FROM m@i2 WHERE a IN (10, 20) AND b IN (15, 25) AND j @> '{"a": "b"}'
----
Scan /Table/112/3/10/15/"a"/"b"{-/PrefixEnd}, /Table/112/3/10/25/"a"/"b"{-/PrefixEnd}, /Table/112/3/20/15/"a"/"b"{-/PrefixEnd}, /Table/112/3/20/25/"a"/"b"{-/PrefixEnd}

# Regression test for incorrectly simplifying inverted expressions (#117979).
statement ok
CREATE TABLE t117979 (id INT PRIMARY KEY, links text[], INVERTED INDEX idx_links(links));

query T
EXPLAIN (OPT, VERBOSE)
SELECT *
FROM t117979@{FORCE_INDEX=idx_links} as t
WHERE
ARRAY['str1'] <@ t.links
AND (
ARRAY ['str1'] && t.links
OR (ARRAY ['str2'] && t.links AND ARRAY ['str3'] && t.links)
);
----
index-join t117979
├── columns: id:1 links:2
├── immutable
├── stats: [rows=111.1111]
├── cost: 813.615556
├── key: (1)
├── fd: (1)-->(2)
├── distribution: test
├── prune: (1)
└── inverted-filter
├── columns: id:1
├── inverted expression: /5
│ ├── tight: true, unique: false
│ └── union spans: ["\x12str1\x00\x01", "\x12str1\x00\x01"]
├── stats: [rows=111.1111]
├── cost: 139.151111
├── key: (1)
├── distribution: test
└── scan t117979@idx_links [as=t]
├── columns: id:1 links_inverted_key:5
├── inverted constraint: /5/1
│ └── spans: ["\x12str1\x00\x01", "\x12str1\x00\x01"]
├── flags: force-index=idx_links
├── stats: [rows=111.1111, distinct(1)=111.111, null(1)=0, distinct(5)=100, null(5)=0]
├── cost: 138.02
├── key: (1)
├── fd: (1)-->(5)
└── distribution: test

0 comments on commit 472a9cb

Please sign in to comment.