Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

opt: index accelerate chained fetch value operators #59494

Merged
merged 2 commits into from
Jan 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
22 changes: 21 additions & 1 deletion pkg/sql/logictest/testdata/logic_test/inverted_index
Expand Up @@ -744,7 +744,15 @@ INSERT INTO f VALUES
(2, '{"b": 2}'),
(3, '{"b": 2, "a": 1}'),
(4, '{"a": 1, "c": 3}'),
(5, '{"a": [1, 2]}')
(5, '{"a": [1, 2]}'),
(6, '{"a": {"b": 1}}'),
(7, '{"a": {"b": 1, "d": 2}}'),
(8, '{"a": {"d": 2}}'),
(9, '{"a": {"b": [1, 2]}}'),
(10, '{"a": {"b": {"c": 1}}}'),
(11, '{"a": {"b": {"c": 1, "d": 2}}}}'),
(12, '{"a": {"b": {"d": 2}}}}'),
(13, '{"a": {"b": {"c": [1, 2]}}}')

query T
SELECT j FROM f@i WHERE j->'a' = '1' ORDER BY k
Expand All @@ -769,6 +777,18 @@ SELECT j FROM f@i WHERE j->'a' = '1' OR j @> '{"b": 2}' ORDER BY k
{"a": 1, "b": 2}
{"a": 1, "c": 3}

query T
SELECT j FROM f@i WHERE j->'a'->'b' = '1' ORDER BY k
----
{"a": {"b": 1}}
{"a": {"b": 1, "d": 2}}

query T
SELECT j FROM f@i WHERE j->'a'->'b'->'c' = '1' ORDER BY k
----
{"a": {"b": {"c": 1}}}
{"a": {"b": {"c": 1, "d": 2}}}

subtest arrays

statement ok
Expand Down
17 changes: 8 additions & 9 deletions pkg/sql/opt/exec/execbuilder/testdata/inverted_index
Expand Up @@ -350,24 +350,23 @@ vectorized: true
table: d@foo_inv
spans: /"a"/"b"-/"a"/"b"/PrefixEnd

# TODO(mgartner): Add support for building inverted index constraints for chained JSON
# fetch operators.
query T
EXPLAIN (VERBOSE) SELECT * from d where b->'a'->'c' = '"b"'
EXPLAIN (VERBOSE) SELECT * from d@foo_inv where b->'a'->'c' = '"b"'
----
distribution: local
vectorized: true
·
filter
index join
│ columns: (a, b)
│ estimated row count: 111 (missing stats)
│ filter: ((b->'a')->'c') = '"b"'
│ table: d@primary
│ key columns: a
└── • scan
columns: (a, b)
estimated row count: 1,000 (missing stats)
table: d@primary
spans: FULL SCAN
columns: (a)
estimated row count: 111 (missing stats)
table: d@foo_inv
spans: /"a"/"c"/"b"-/"a"/"c"/"b"/PrefixEnd

query T
EXPLAIN (VERBOSE) SELECT * from d where b->(NULL::STRING) = '"b"'
Expand Down
111 changes: 80 additions & 31 deletions pkg/sql/opt/invertedidx/json_array.go
Expand Up @@ -334,35 +334,18 @@ func (j *jsonOrArrayFilterPlanner) extractJSONOrArrayContainsCondition(

// extractJSONFetchValEqCondition extracts an InvertedExpression representing an
// inverted filter over the planner's inverted index, based on equality between
// a fetch val expression and a right scalar expression. If the following criteria
// are not met, an empty InvertedExpression is returned.
// a chain of fetch val expressions and a right scalar expression. If an
// InvertedExpression cannot be generated from the expression, an
// inverted.NonInvertedColExpression is returned.
//
// 1. The fetch value operator's left expression must be a variable
// referencing the inverted column in the index.
// 2. The fetch value operator's right expression must be a constant string.
// 3. The right expression in the equality expression must be a constant JSON
// value that is not an object or an array.
//
// TODO(mgartner): Support chained fetch val operators, e.g., j->'a'->'b' = '1'.
// In order to generate an InvertedExpression, left must be a fetch val
// expression in the form [col]->[index0]->[index1]->...->[indexN] where col is
// a variable or expression referencing the inverted column in the inverted
// index and each index is a constant string. The right expression must be a
// constant JSON value that is not an object or an array.
func (j *jsonOrArrayFilterPlanner) extractJSONFetchValEqCondition(
evalCtx *tree.EvalContext, fetch *memo.FetchValExpr, right opt.ScalarExpr,
evalCtx *tree.EvalContext, left *memo.FetchValExpr, right opt.ScalarExpr,
) inverted.Expression {
// The left side of the fetch val expression, the Json field, should be a
// variable corresponding to the index column.
if !isIndexColumn(j.tabID, j.index, fetch.Json, j.computedColumns) {
return inverted.NonInvertedColExpression{}
}

// The right side of the fetch val expression, the Index field, should be a
// constant string.
if !memo.CanExtractConstDatum(fetch.Index) {
return inverted.NonInvertedColExpression{}
}
key, ok := memo.ExtractConstDatum(fetch.Index).(*tree.DString)
if !ok {
return inverted.NonInvertedColExpression{}
}

// The right side of the equals expression should be a constant JSON value
// that is not an object or array.
if !memo.CanExtractConstDatum(right) {
Expand All @@ -377,10 +360,76 @@ func (j *jsonOrArrayFilterPlanner) extractJSONFetchValEqCondition(
return inverted.NonInvertedColExpression{}
}

// Build a new JSON object of the form: {<key>: <right>}.
b := json.NewObjectBuilder(1)
b.Add(string(*key), val.JSON)
obj := tree.NewDJSON(b.Build())
// Recursively traverse fetch val expressions and collect keys with which to
// build the InvertedExpression. If it is not possible to build an inverted
// expression from the tree of fetch val expressions, collectKeys returns
// early and foundKeys remains false. If successful, foundKeys is set to
// true and JSON fetch value indexes are collected in keys. The keys are
// ordered by the outer-most fetch val index first. The outer-most fetch val
// index is the right-most in the -> chain, for example (j->'a'->'b') is
// equivalent to ((j->'a')->'b') and 'b' is the outer-most fetch val index.
//
// Later on, we iterate forward through these keys to build a JSON object
// from the inside-out with the inner-most value being the JSON scalar
// extracted above from the right ScalarExpr function argument. In the
// resulting JSON object, the outer-most JSON fetch value indexes are the
// inner most JSON object keys.
//
// As an example, when left is (j->'a'->'b') and right is ('1'), the keys
// {"b", "a"} are collected and the JSON object {"a": {"b": 1}} is built.
foundKeys := false
var keys []string
var collectKeys func(fetch *memo.FetchValExpr)
collectKeys = func(fetch *memo.FetchValExpr) {
// The right side of the fetch val expression, the Index field, must be
// a constant string. If not, then we cannot build an inverted
// expression.
if !memo.CanExtractConstDatum(fetch.Index) {
return
}
key, ok := memo.ExtractConstDatum(fetch.Index).(*tree.DString)
if !ok {
return
}

// Append the key to the list of keys.
keys = append(keys, string(*key))

// If the left side of the fetch val expression, the Json field, is a
// variable or expression corresponding to the index column, then we
// have found a valid list of keys to build an inverted expression.
if isIndexColumn(j.tabID, j.index, fetch.Json, j.computedColumns) {
foundKeys = true
return
}

// If the left side of the fetch val expression is another fetch val
// expression, recursively collect its keys.
if innerFetch, ok := fetch.Json.(*memo.FetchValExpr); ok {
collectKeys(innerFetch)
}

// Otherwise, we cannot build an inverted expression.
}
collectKeys(left)
if !foundKeys {
return inverted.NonInvertedColExpression{}
}

// Build a new JSON object of the form:
// {<keyN>: ... {<key1>: {key0: <val>}}}
// Note that key0 is the outer-most fetch val index, so the expression
// j->'a'->'b' = 1 results in {"a": {"b": 1}}.
var obj json.JSON
for i := 0; i < len(keys); i++ {
b := json.NewObjectBuilder(1)
if i == 0 {
b.Add(keys[i], val.JSON)
} else {
b.Add(keys[i], obj)
}
obj = b.Build()
}

return getInvertedExprForJSONOrArrayIndex(evalCtx, obj)
return getInvertedExprForJSONOrArrayIndex(evalCtx, tree.NewDJSON(obj))
}
27 changes: 27 additions & 0 deletions pkg/sql/opt/invertedidx/json_array_test.go
Expand Up @@ -424,6 +424,33 @@ func TestTryFilterJsonOrArrayIndex(t *testing.T) {
indexOrd: arrayOrd,
ok: false,
},
{
filters: "j->'a'->'b' = '1'",
indexOrd: jsonOrd,
ok: true,
tight: true,
unique: true,
},
{
filters: "j->'a'->'b'->'c' = '1'",
indexOrd: jsonOrd,
ok: true,
tight: true,
unique: true,
},
{
// Integer indexes are not yet supported.
filters: "j->0->'b' = '1'",
indexOrd: jsonOrd,
ok: false,
},
{
// The inner most expression is not a fetch val expression with an
// indexed column on the left.
filters: "(j-'c')->'a'->'b' = '1'",
indexOrd: jsonOrd,
ok: false,
},
{
filters: "j->'a' = '1' AND j->'b' = '2'",
indexOrd: jsonOrd,
Expand Down
38 changes: 38 additions & 0 deletions pkg/sql/opt/xform/testdata/rules/select
Expand Up @@ -1951,6 +1951,44 @@ project
│ └── spans: ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"]
└── key: (1)

# Chained fetch val operators.
opt expect=GenerateInvertedIndexScans
SELECT k FROM b WHERE j->'a'->'b' = '"c"'
----
project
├── columns: k:1!null
├── immutable
├── key: (1)
└── scan b@j_inv_idx
├── columns: k:1!null
├── inverted constraint: /6/1
│ └── spans: ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"]
└── key: (1)

# Do not generate an inverted scan when the fetch val and equality operators are
# wrapped in a NOT operator. The -> operator returns NULL if the key does not
# exist in the JSON object, so (NOT j->'a' = '"b"') is not equivalent to the
# inverse of the existence of the key/value pair {"a": "b"} in the inverted
# index. See #49143 and #55316.
opt expect-not=GenerateInvertedIndexScans
SELECT k FROM b WHERE NOT j->'a' = '"b"'
----
project
├── columns: k:1!null
├── immutable
├── key: (1)
└── select
├── columns: k:1!null j:4
├── immutable
├── key: (1)
├── fd: (1)-->(4)
├── scan b
│ ├── columns: k:1!null j:4
│ ├── key: (1)
│ └── fd: (1)-->(4)
└── filters
└── (j:4->'a') != '"b"' [outer=(4), immutable]

# Do not generate an inverted scan when the index of the fetch val operator is
# not a string.
opt expect-not=GenerateInvertedIndexScans
Expand Down