Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release-23.2: inverted: fix intersection of SpanExpressions in some cases #118360

Merged
merged 1 commit into from Jan 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
73 changes: 41 additions & 32 deletions pkg/sql/inverted/expression.go
Expand Up @@ -705,8 +705,8 @@ func unionSpanExpressions(left, right *SpanExpression) *SpanExpression {
return expr
}

// tryPruneChildren takes an expr with two child *SpanExpression and removes the empty
// children.
// tryPruneChildren takes an expr with two child *SpanExpression and removes
// children when safe to do so.
func tryPruneChildren(expr *SpanExpression, op SetOperator) {
isEmptyExpr := func(e *SpanExpression) bool {
return len(e.FactoredUnionSpans) == 0 && e.Left == nil && e.Right == nil
Expand All @@ -717,41 +717,50 @@ func tryPruneChildren(expr *SpanExpression, op SetOperator) {
if isEmptyExpr(expr.Right.(*SpanExpression)) {
expr.Right = nil
}
// Promotes the left and right sub-expressions of child to the parent expr, when
// the other child is empty.
promoteChild := func(child *SpanExpression) {
// For SetUnion, the FactoredUnionSpans for the child is already nil
// since it has been unioned into expr. For SetIntersection, the
// FactoredUnionSpans for the child may be non-empty, but is being
// intersected with the other child that is empty, so can be discarded.
// Either way, we don't need to update expr.FactoredUnionSpans.
expr.Operator = child.Operator
expr.Left = child.Left
expr.Right = child.Right

// If child.FactoredUnionSpans is non-empty, we need to recalculate
// SpansToRead since it may have contained some spans that were removed by
// discarding child.FactoredUnionSpans.
if child.FactoredUnionSpans != nil {
expr.SpansToRead = expr.FactoredUnionSpans
if expr.Left != nil {
expr.SpansToRead = unionSpans(expr.SpansToRead, expr.Left.(*SpanExpression).SpansToRead)
}
if expr.Right != nil {
expr.SpansToRead = unionSpans(expr.SpansToRead, expr.Right.(*SpanExpression).SpansToRead)
if expr.Operator == SetUnion {
// Promotes the left and right sub-expressions of child to the parent
// expr, when the other child is empty.
promoteChild := func(child *SpanExpression) {
// For SetUnion, the FactoredUnionSpans for the child is already nil
// since it has been unioned into expr. Therefore, we don't need to
// update expr.FactoredUnionSpans.
expr.Operator = child.Operator
expr.Left = child.Left
expr.Right = child.Right

// If child.FactoredUnionSpans is non-empty, we need to recalculate
// SpansToRead since it may have contained some spans that were
// removed by discarding child.FactoredUnionSpans.
if child.FactoredUnionSpans != nil {
expr.SpansToRead = expr.FactoredUnionSpans
if expr.Left != nil {
expr.SpansToRead = unionSpans(expr.SpansToRead, expr.Left.(*SpanExpression).SpansToRead)
}
if expr.Right != nil {
expr.SpansToRead = unionSpans(expr.SpansToRead, expr.Right.(*SpanExpression).SpansToRead)
}
}
}
}
promoteLeft := expr.Left != nil && expr.Right == nil
promoteRight := expr.Left == nil && expr.Right != nil
if promoteLeft {
promoteChild(expr.Left.(*SpanExpression))
}
if promoteRight {
promoteChild(expr.Right.(*SpanExpression))
promoteLeft := expr.Left != nil && expr.Right == nil
promoteRight := expr.Left == nil && expr.Right != nil
if promoteLeft {
promoteChild(expr.Left.(*SpanExpression))
}
if promoteRight {
promoteChild(expr.Right.(*SpanExpression))
}
} else if expr.Operator == SetIntersection {
// The result of intersecting with the empty set is the empty set. In
// this case, we can discard the non-empty child.
if expr.Left == nil {
expr.Right = nil
} else if expr.Right == nil {
expr.Left = nil
}
}
if expr.Left == nil && expr.Right == nil {
expr.Operator = None
expr.SpansToRead = expr.FactoredUnionSpans
}
}

Expand Down
57 changes: 57 additions & 0 deletions pkg/sql/inverted/testdata/expression
Expand Up @@ -588,3 +588,60 @@ node: <
to-proto name=none
----
<nil>

# The following set of expressions (up to 'a-and-a-or-b-and-c') is a regression
# test for incorrectly simplifying 'a AND (a OR (b AND c))' to 'a OR (b AND c)'
# (#117979).

new-span-leaf name=a tight=true unique=true span=a
----
span expression
├── tight: true, unique: true
├── to read: ["a", "a"]
└── union spans: ["a", "a"]

new-span-leaf name=c tight=true unique=true span=c
----
span expression
├── tight: true, unique: true
├── to read: ["c", "c"]
└── union spans: ["c", "c"]

and result=b-and-c left=b right=c
----
span expression
├── tight: true, unique: true
├── to read: ["b", "d")
├── union spans: empty
└── INTERSECTION
├── span expression
│ ├── tight: true, unique: true
│ ├── to read: ["b", "b"]
│ └── union spans: ["b", "b"]
└── span expression
├── tight: true, unique: true
├── to read: ["c", "c"]
└── union spans: ["c", "c"]

or result=a-or-b-and-c left=a right=b-and-c
----
span expression
├── tight: true, unique: false
├── to read: ["a", "d")
├── union spans: ["a", "a"]
└── INTERSECTION
├── span expression
│ ├── tight: true, unique: true
│ ├── to read: ["b", "b"]
│ └── union spans: ["b", "b"]
└── span expression
├── tight: true, unique: true
├── to read: ["c", "c"]
└── union spans: ["c", "c"]

and result=a-and-a-or-b-and-c left=a right=a-or-b-and-c
----
span expression
├── tight: true, unique: false
├── to read: ["a", "a"]
└── union spans: ["a", "a"]
16 changes: 16 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/inverted_index
Expand Up @@ -2044,3 +2044,19 @@ SELECT primes FROM cb WHERE primes && numbers ORDER BY primes
statement ok
CREATE TABLE t84569 (name_col NAME NOT NULL, INVERTED INDEX (name_col gin_trgm_ops));
INSERT INTO t84569 (name_col) VALUES ('X'::NAME)

# Regression test for incorrectly simplifying inverted expressions (#117979).
statement ok
CREATE TABLE t117979 (id INT PRIMARY KEY, links text[], INVERTED INDEX idx_links(links));
INSERT INTO t117979 (id, links) VALUES (1, '{str2,str3}');

query IT
SELECT *
FROM t117979@{FORCE_INDEX=idx_links} as t
WHERE
ARRAY['str1'] <@ t.links
AND (
ARRAY ['str1'] && t.links
OR (ARRAY ['str2'] && t.links AND ARRAY ['str3'] && t.links)
);
----
44 changes: 44 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/inverted_index
Expand Up @@ -3530,3 +3530,47 @@ query T kvtrace
SELECT k FROM m@i2 WHERE a IN (10, 20) AND b IN (15, 25) AND j @> '{"a": "b"}'
----
Scan /Table/112/3/10/15/"a"/"b"{-/PrefixEnd}, /Table/112/3/10/25/"a"/"b"{-/PrefixEnd}, /Table/112/3/20/15/"a"/"b"{-/PrefixEnd}, /Table/112/3/20/25/"a"/"b"{-/PrefixEnd}

# Regression test for incorrectly simplifying inverted expressions (#117979).
statement ok
CREATE TABLE t117979 (id INT PRIMARY KEY, links text[], INVERTED INDEX idx_links(links));

query T
EXPLAIN (OPT, VERBOSE)
SELECT *
FROM t117979@{FORCE_INDEX=idx_links} as t
WHERE
ARRAY['str1'] <@ t.links
AND (
ARRAY ['str1'] && t.links
OR (ARRAY ['str2'] && t.links AND ARRAY ['str3'] && t.links)
);
----
index-join t117979
├── columns: id:1 links:2
├── immutable
├── stats: [rows=111.1111]
├── cost: 813.615556
├── key: (1)
├── fd: (1)-->(2)
├── distribution: test
├── prune: (1)
└── inverted-filter
├── columns: id:1
├── inverted expression: /5
│ ├── tight: true, unique: false
│ └── union spans: ["\x12str1\x00\x01", "\x12str1\x00\x01"]
├── stats: [rows=111.1111]
├── cost: 139.151111
├── key: (1)
├── distribution: test
└── scan t117979@idx_links [as=t]
├── columns: id:1 links_inverted_key:5
├── inverted constraint: /5/1
│ └── spans: ["\x12str1\x00\x01", "\x12str1\x00\x01"]
├── flags: force-index=idx_links
├── stats: [rows=111.1111, distinct(1)=111.111, null(1)=0, distinct(5)=100, null(5)=0]
├── cost: 138.02
├── key: (1)
├── fd: (1)-->(5)
└── distribution: test