diff --git a/pkg/sql/opt/invertedidx/json_array.go b/pkg/sql/opt/invertedidx/json_array.go index c7d4b39912e1..79b73662222d 100644 --- a/pkg/sql/opt/invertedidx/json_array.go +++ b/pkg/sql/opt/invertedidx/json_array.go @@ -568,7 +568,13 @@ func (j *jsonOrArrayFilterPlanner) extractJSONExistsCondition( func (j *jsonOrArrayFilterPlanner) extractJSONEqCondition( ctx context.Context, evalCtx *eval.Context, left *memo.VariableExpr, right opt.ScalarExpr, ) inverted.Expression { - // The right side of the expression should be a constant JSON value. + // The left side of the expression must be a variable expression of the + // indexed column. + if !isIndexColumn(j.tabID, j.index, left, j.computedColumns) { + return inverted.NonInvertedColExpression{} + } + + // The right side of the expression must be a constant JSON value. if !memo.CanExtractConstDatum(right) { return inverted.NonInvertedColExpression{} } @@ -577,8 +583,7 @@ func (j *jsonOrArrayFilterPlanner) extractJSONEqCondition( return inverted.NonInvertedColExpression{} } - // For Equals expressions, we will generate the inverted expression for the - // single object built from the keys and val. + // For Equals expressions, we will generate the inverted expression for val. invertedExpr := getInvertedExprForJSONOrArrayIndexForContaining(ctx, evalCtx, val) // Generated inverted expression won't be tight as we are searching for rows diff --git a/pkg/sql/opt/invertedidx/json_array_test.go b/pkg/sql/opt/invertedidx/json_array_test.go index 689934de715d..87eab23594ea 100644 --- a/pkg/sql/opt/invertedidx/json_array_test.go +++ b/pkg/sql/opt/invertedidx/json_array_test.go @@ -224,8 +224,16 @@ func TestTryFilterJsonOrArrayIndex(t *testing.T) { evalCtx := eval.NewTestingEvalContext(st) tc := testcat.New() - if _, err := tc.ExecuteDDL( - "CREATE TABLE t (j JSON, a INT[], str STRING[], INVERTED INDEX (j), INVERTED INDEX (a), INVERTED INDEX (str))", + if _, err := tc.ExecuteDDL(` + CREATE TABLE t ( + j JSON, + j2 JSON, + a INT[], + str STRING[], + INVERTED INDEX (j), + INVERTED INDEX (a), + INVERTED INDEX (str) + )`, ); err != nil { t.Fatal(err) } @@ -309,6 +317,12 @@ func TestTryFilterJsonOrArrayIndex(t *testing.T) { indexOrd: arrayOrd, ok: false, }, + { + // Filtering a non-indexed column. + filters: "j2 @> '1'", + indexOrd: jsonOrd, + ok: false, + }, { // When operations affecting two different variables are OR-ed, we cannot // constrain either index. @@ -468,6 +482,12 @@ func TestTryFilterJsonOrArrayIndex(t *testing.T) { unique: true, remainingFilters: "j->0 = '1'", }, + { + // Filtering a non-indexed column. + filters: "j2->0 = '1'", + indexOrd: jsonOrd, + ok: false, + }, { // Arrays on the right side of the equality are supported. filters: "j->'a' = '[1]'", @@ -594,6 +614,12 @@ func TestTryFilterJsonOrArrayIndex(t *testing.T) { unique: true, remainingFilters: `j->0 @> '{"b": "c"}'`, }, + { + // Filtering a non-indexed column. + filters: `j2->0 @> '{"b": "c"}'`, + indexOrd: jsonOrd, + ok: false, + }, { // The inner most expression is not a fetch val expression with an // indexed column on the left. @@ -791,6 +817,12 @@ func TestTryFilterJsonOrArrayIndex(t *testing.T) { unique: false, remainingFilters: "", }, + { + // Filtering a non-indexed column. + filters: `'1' <@ j2->'a'`, + indexOrd: jsonOrd, + ok: false, + }, { // JSONExists is supported. Unique is false for all Exists predicates // because they check containment within arrays as well. @@ -1080,6 +1112,12 @@ func TestTryFilterJsonOrArrayIndex(t *testing.T) { unique: false, remainingFilters: `j = '{"a": "b"}' OR j = '[1, 2, 3]'`, }, + { + // Filtering a non-indexed column. + filters: `j2 = '"a"'`, + indexOrd: jsonOrd, + ok: false, + }, { // Testing the IN operator without the fetch value operator. filters: `j IN ('1', '2', '3')`, @@ -1129,6 +1167,12 @@ func TestTryFilterJsonOrArrayIndex(t *testing.T) { unique: false, remainingFilters: `j IN ('[1, 2, 3]', '{"a": "b"}', '1', '"a"')`, }, + { + // Filtering a non-indexed column. + filters: `j2 IN ('1', '2', '3')`, + indexOrd: jsonOrd, + ok: false, + }, } for _, tc := range testCases { diff --git a/pkg/sql/opt/xform/testdata/rules/select b/pkg/sql/opt/xform/testdata/rules/select index 0d5aaf128d3e..9f6276be0ce1 100644 --- a/pkg/sql/opt/xform/testdata/rules/select +++ b/pkg/sql/opt/xform/testdata/rules/select @@ -8198,6 +8198,77 @@ select └── filters └── val:3 > st_maxdistance(geom:1, '010100000000000000000000000000000000000000') [outer=(1,3), immutable, constraints=(/3: (/NULL - ])] +# Regression test for #111963. Do not plan inverted index scans on columns that +# are not filtered in the query. +exec-ddl +CREATE TABLE t111963 ( + j1 JSON, + j2 JSON +) +---- + +exec-ddl +CREATE INVERTED INDEX idx111963 ON t111963 (j1) +---- + +opt expect-not=GenerateInvertedIndexScans +SELECT * FROM t111963 WHERE j2 = '1' +---- +select + ├── columns: j1:1 j2:2!null + ├── immutable + ├── fd: ()-->(2) + ├── scan t111963 + │ └── columns: j1:1 j2:2 + └── filters + └── j2:2 = '1' [outer=(2), immutable, constraints=(/2: [/'1' - /'1']; tight), fd=()-->(2)] + +opt expect-not=GenerateInvertedIndexScans +SELECT * FROM t111963 WHERE j2 IN ('1', '10', '100') +---- +select + ├── columns: j1:1 j2:2!null + ├── scan t111963 + │ └── columns: j1:1 j2:2 + └── filters + └── j2:2 IN ('1', '10', '100') [outer=(2), constraints=(/2: [/'1' - /'1'] [/'10' - /'10'] [/'100' - /'100']; tight)] + +exec-ddl +DROP INDEX idx111963 +---- + +exec-ddl +CREATE INVERTED INDEX idx111963 ON t111963 ((j1->'foo')) +---- + +opt expect-not=GenerateInvertedIndexScans +SELECT * FROM t111963 WHERE j1 = '1' +---- +select + ├── columns: j1:1!null j2:2 + ├── immutable + ├── fd: ()-->(1) + ├── scan t111963 + │ ├── columns: j1:1 j2:2 + │ └── computed column expressions + │ └── crdb_internal_idx_expr:7 + │ └── j1:1->'foo' + └── filters + └── j1:1 = '1' [outer=(1), immutable, constraints=(/1: [/'1' - /'1']; tight), fd=()-->(1)] + +opt expect-not=GenerateInvertedIndexScans +SELECT * FROM t111963 WHERE j1 IN ('1', '10', '100') +---- +select + ├── columns: j1:1!null j2:2 + ├── scan t111963 + │ ├── columns: j1:1 j2:2 + │ └── computed column expressions + │ └── crdb_internal_idx_expr:7 + │ └── j1:1->'foo' + └── filters + └── j1:1 IN ('1', '10', '100') [outer=(1), constraints=(/1: [/'1' - /'1'] [/'10' - /'10'] [/'100' - /'100']; tight)] + # -------------------------------------------------- # GenerateZigzagJoins # -------------------------------------------------- diff --git a/pkg/sql/randgen/schema.go b/pkg/sql/randgen/schema.go index 234c6518b418..d47f13365684 100644 --- a/pkg/sql/randgen/schema.go +++ b/pkg/sql/randgen/schema.go @@ -517,7 +517,28 @@ func randIndexTableDefFromCols( cpy := make([]*tree.ColumnTableDef, len(columnTableDefs)) copy(cpy, columnTableDefs) rng.Shuffle(len(cpy), func(i, j int) { cpy[i], cpy[j] = cpy[j], cpy[i] }) - nCols := rng.Intn(len(cpy)) + 1 + + // Determine the number of indexed columns. + var nCols int + r := rng.Intn(100) + switch { + case r < 50: + // Create a single-column index 40% of the time. Single-column indexes + // are more likely then multi-column indexes to be used in query plans + // for randomly generated queries, so there is some benefit to + // guaranteeing that they are generated often. + nCols = 1 + case r < 75: + nCols = 2 + case r < 90: + nCols = 3 + default: + nCols = rng.Intn(len(cpy)) + 1 + } + if nCols > len(cpy) { + // nCols cannot be greater than the length of columnTableDefs. + nCols = len(cpy) + } cols := cpy[:nCols]