diff --git a/pkg/sql/inverted/expression.go b/pkg/sql/inverted/expression.go index 1f0f502547e2..0433b660c790 100644 --- a/pkg/sql/inverted/expression.go +++ b/pkg/sql/inverted/expression.go @@ -705,8 +705,8 @@ func unionSpanExpressions(left, right *SpanExpression) *SpanExpression { return expr } -// tryPruneChildren takes an expr with two child *SpanExpression and removes the empty -// children. +// tryPruneChildren takes an expr with two child *SpanExpression and removes +// children when safe to do so. func tryPruneChildren(expr *SpanExpression, op SetOperator) { isEmptyExpr := func(e *SpanExpression) bool { return len(e.FactoredUnionSpans) == 0 && e.Left == nil && e.Right == nil @@ -717,41 +717,50 @@ func tryPruneChildren(expr *SpanExpression, op SetOperator) { if isEmptyExpr(expr.Right.(*SpanExpression)) { expr.Right = nil } - // Promotes the left and right sub-expressions of child to the parent expr, when - // the other child is empty. - promoteChild := func(child *SpanExpression) { - // For SetUnion, the FactoredUnionSpans for the child is already nil - // since it has been unioned into expr. For SetIntersection, the - // FactoredUnionSpans for the child may be non-empty, but is being - // intersected with the other child that is empty, so can be discarded. - // Either way, we don't need to update expr.FactoredUnionSpans. - expr.Operator = child.Operator - expr.Left = child.Left - expr.Right = child.Right - - // If child.FactoredUnionSpans is non-empty, we need to recalculate - // SpansToRead since it may have contained some spans that were removed by - // discarding child.FactoredUnionSpans. - if child.FactoredUnionSpans != nil { - expr.SpansToRead = expr.FactoredUnionSpans - if expr.Left != nil { - expr.SpansToRead = unionSpans(expr.SpansToRead, expr.Left.(*SpanExpression).SpansToRead) - } - if expr.Right != nil { - expr.SpansToRead = unionSpans(expr.SpansToRead, expr.Right.(*SpanExpression).SpansToRead) + if expr.Operator == SetUnion { + // Promotes the left and right sub-expressions of child to the parent + // expr, when the other child is empty. + promoteChild := func(child *SpanExpression) { + // For SetUnion, the FactoredUnionSpans for the child is already nil + // since it has been unioned into expr. Therefore, we don't need to + // update expr.FactoredUnionSpans. + expr.Operator = child.Operator + expr.Left = child.Left + expr.Right = child.Right + + // If child.FactoredUnionSpans is non-empty, we need to recalculate + // SpansToRead since it may have contained some spans that were + // removed by discarding child.FactoredUnionSpans. + if child.FactoredUnionSpans != nil { + expr.SpansToRead = expr.FactoredUnionSpans + if expr.Left != nil { + expr.SpansToRead = unionSpans(expr.SpansToRead, expr.Left.(*SpanExpression).SpansToRead) + } + if expr.Right != nil { + expr.SpansToRead = unionSpans(expr.SpansToRead, expr.Right.(*SpanExpression).SpansToRead) + } } } - } - promoteLeft := expr.Left != nil && expr.Right == nil - promoteRight := expr.Left == nil && expr.Right != nil - if promoteLeft { - promoteChild(expr.Left.(*SpanExpression)) - } - if promoteRight { - promoteChild(expr.Right.(*SpanExpression)) + promoteLeft := expr.Left != nil && expr.Right == nil + promoteRight := expr.Left == nil && expr.Right != nil + if promoteLeft { + promoteChild(expr.Left.(*SpanExpression)) + } + if promoteRight { + promoteChild(expr.Right.(*SpanExpression)) + } + } else if expr.Operator == SetIntersection { + // The result of intersecting with the empty set is the empty set. In + // this case, we can discard the non-empty child. + if expr.Left == nil { + expr.Right = nil + } else if expr.Right == nil { + expr.Left = nil + } } if expr.Left == nil && expr.Right == nil { expr.Operator = None + expr.SpansToRead = expr.FactoredUnionSpans } } diff --git a/pkg/sql/inverted/testdata/expression b/pkg/sql/inverted/testdata/expression index 15b9f0ab4acb..90bf405e0d52 100644 --- a/pkg/sql/inverted/testdata/expression +++ b/pkg/sql/inverted/testdata/expression @@ -588,3 +588,60 @@ node: < to-proto name=none ---- + +# The following set of expressions (up to 'a-and-a-or-b-and-c') is a regression +# test for incorrectly simplifying 'a AND (a OR (b AND c))' to 'a OR (b AND c)' +# (#117979). + +new-span-leaf name=a tight=true unique=true span=a +---- +span expression + ├── tight: true, unique: true + ├── to read: ["a", "a"] + └── union spans: ["a", "a"] + +new-span-leaf name=c tight=true unique=true span=c +---- +span expression + ├── tight: true, unique: true + ├── to read: ["c", "c"] + └── union spans: ["c", "c"] + +and result=b-and-c left=b right=c +---- +span expression + ├── tight: true, unique: true + ├── to read: ["b", "d") + ├── union spans: empty + └── INTERSECTION + ├── span expression + │ ├── tight: true, unique: true + │ ├── to read: ["b", "b"] + │ └── union spans: ["b", "b"] + └── span expression + ├── tight: true, unique: true + ├── to read: ["c", "c"] + └── union spans: ["c", "c"] + +or result=a-or-b-and-c left=a right=b-and-c +---- +span expression + ├── tight: true, unique: false + ├── to read: ["a", "d") + ├── union spans: ["a", "a"] + └── INTERSECTION + ├── span expression + │ ├── tight: true, unique: true + │ ├── to read: ["b", "b"] + │ └── union spans: ["b", "b"] + └── span expression + ├── tight: true, unique: true + ├── to read: ["c", "c"] + └── union spans: ["c", "c"] + +and result=a-and-a-or-b-and-c left=a right=a-or-b-and-c +---- +span expression + ├── tight: true, unique: false + ├── to read: ["a", "a"] + └── union spans: ["a", "a"] diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_index b/pkg/sql/logictest/testdata/logic_test/inverted_index index 7beefaf75ca1..e848fcada96b 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_index +++ b/pkg/sql/logictest/testdata/logic_test/inverted_index @@ -2044,3 +2044,19 @@ SELECT primes FROM cb WHERE primes && numbers ORDER BY primes statement ok CREATE TABLE t84569 (name_col NAME NOT NULL, INVERTED INDEX (name_col gin_trgm_ops)); INSERT INTO t84569 (name_col) VALUES ('X'::NAME) + +# Regression test for incorrectly simplifying inverted expressions (#117979). +statement ok +CREATE TABLE t117979 (id INT PRIMARY KEY, links text[], INVERTED INDEX idx_links(links)); +INSERT INTO t117979 (id, links) VALUES (1, '{str2,str3}'); + +query IT +SELECT * +FROM t117979@{FORCE_INDEX=idx_links} as t +WHERE + ARRAY['str1'] <@ t.links + AND ( + ARRAY ['str1'] && t.links + OR (ARRAY ['str2'] && t.links AND ARRAY ['str3'] && t.links) + ); +---- diff --git a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index index 319477be8164..4a2667daf225 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index +++ b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index @@ -3530,3 +3530,47 @@ query T kvtrace SELECT k FROM m@i2 WHERE a IN (10, 20) AND b IN (15, 25) AND j @> '{"a": "b"}' ---- Scan /Table/112/3/10/15/"a"/"b"{-/PrefixEnd}, /Table/112/3/10/25/"a"/"b"{-/PrefixEnd}, /Table/112/3/20/15/"a"/"b"{-/PrefixEnd}, /Table/112/3/20/25/"a"/"b"{-/PrefixEnd} + +# Regression test for incorrectly simplifying inverted expressions (#117979). +statement ok +CREATE TABLE t117979 (id INT PRIMARY KEY, links text[], INVERTED INDEX idx_links(links)); + +query T +EXPLAIN (OPT, VERBOSE) +SELECT * +FROM t117979@{FORCE_INDEX=idx_links} as t +WHERE + ARRAY['str1'] <@ t.links + AND ( + ARRAY ['str1'] && t.links + OR (ARRAY ['str2'] && t.links AND ARRAY ['str3'] && t.links) + ); +---- +index-join t117979 + ├── columns: id:1 links:2 + ├── immutable + ├── stats: [rows=111.1111] + ├── cost: 813.615556 + ├── key: (1) + ├── fd: (1)-->(2) + ├── distribution: test + ├── prune: (1) + └── inverted-filter + ├── columns: id:1 + ├── inverted expression: /5 + │ ├── tight: true, unique: false + │ └── union spans: ["\x12str1\x00\x01", "\x12str1\x00\x01"] + ├── stats: [rows=111.1111] + ├── cost: 139.151111 + ├── key: (1) + ├── distribution: test + └── scan t117979@idx_links [as=t] + ├── columns: id:1 links_inverted_key:5 + ├── inverted constraint: /5/1 + │ └── spans: ["\x12str1\x00\x01", "\x12str1\x00\x01"] + ├── flags: force-index=idx_links + ├── stats: [rows=111.1111, distinct(1)=111.111, null(1)=0, distinct(5)=100, null(5)=0] + ├── cost: 138.02 + ├── key: (1) + ├── fd: (1)-->(5) + └── distribution: test