cockroachdb · mgartner · Oct 20, 2023 · Oct 18, 2023 · Oct 18, 2023
@@ -568,7 +568,13 @@ func (j *jsonOrArrayFilterPlanner) extractJSONExistsCondition(
 func (j *jsonOrArrayFilterPlanner) extractJSONEqCondition(
 	ctx context.Context, evalCtx *eval.Context, left *memo.VariableExpr, right opt.ScalarExpr,
 ) inverted.Expression {
-	// The right side of the expression should be a constant JSON value.
+	// The left side of the expression must be a variable expression of the
+	// indexed column.
+	if !isIndexColumn(j.tabID, j.index, left, j.computedColumns) {
+		return inverted.NonInvertedColExpression{}
+	}
+
+	// The right side of the expression must be a constant JSON value.
 	if !memo.CanExtractConstDatum(right) {
 		return inverted.NonInvertedColExpression{}
 	}
@@ -577,8 +583,7 @@ func (j *jsonOrArrayFilterPlanner) extractJSONEqCondition(
 		return inverted.NonInvertedColExpression{}
 	}
 
-	// For Equals expressions, we will generate the inverted expression for the
-	// single object built from the keys and val.
+	// For Equals expressions, we will generate the inverted expression for val.
 	invertedExpr := getInvertedExprForJSONOrArrayIndexForContaining(ctx, evalCtx, val)
 
 	// Generated inverted expression won't be tight as we are searching for rows

@@ -224,8 +224,16 @@ func TestTryFilterJsonOrArrayIndex(t *testing.T) {
 	evalCtx := eval.NewTestingEvalContext(st)
 
 	tc := testcat.New()
-	if _, err := tc.ExecuteDDL(
-		"CREATE TABLE t (j JSON, a INT[], str STRING[], INVERTED INDEX (j), INVERTED INDEX (a), INVERTED INDEX (str))",
+	if _, err := tc.ExecuteDDL(`
+		CREATE TABLE t (
+			j JSON,
+			j2 JSON,
+			a INT[],
+			str STRING[],
+			INVERTED INDEX (j),
+			INVERTED INDEX (a),
+			INVERTED INDEX (str)
+		)`,
 	); err != nil {
 		t.Fatal(err)
 	}
@@ -309,6 +317,12 @@ func TestTryFilterJsonOrArrayIndex(t *testing.T) {
 			indexOrd: arrayOrd,
 			ok:       false,
 		},
+		{
+			// Filtering a non-indexed column.
+			filters:  "j2 @> '1'",
+			indexOrd: jsonOrd,
+			ok:       false,
+		},
 		{
 			// When operations affecting two different variables are OR-ed, we cannot
 			// constrain either index.
@@ -468,6 +482,12 @@ func TestTryFilterJsonOrArrayIndex(t *testing.T) {
 			unique:           true,
 			remainingFilters: "j->0 = '1'",
 		},
+		{
+			// Filtering a non-indexed column.
+			filters:  "j2->0 = '1'",
+			indexOrd: jsonOrd,
+			ok:       false,
+		},
 		{
 			// Arrays on the right side of the equality are supported.
 			filters:          "j->'a' = '[1]'",
@@ -594,6 +614,12 @@ func TestTryFilterJsonOrArrayIndex(t *testing.T) {
 			unique:           true,
 			remainingFilters: `j->0 @> '{"b": "c"}'`,
 		},
+		{
+			// Filtering a non-indexed column.
+			filters:  `j2->0 @> '{"b": "c"}'`,
+			indexOrd: jsonOrd,
+			ok:       false,
+		},
 		{
 			// The inner most expression is not a fetch val expression with an
 			// indexed column on the left.
@@ -791,6 +817,12 @@ func TestTryFilterJsonOrArrayIndex(t *testing.T) {
 			unique:           false,
 			remainingFilters: "",
 		},
+		{
+			// Filtering a non-indexed column.
+			filters:  `'1' <@ j2->'a'`,
+			indexOrd: jsonOrd,
+			ok:       false,
+		},
 		{
 			// JSONExists is supported. Unique is false for all Exists predicates
 			// because they check containment within arrays as well.
@@ -1080,6 +1112,12 @@ func TestTryFilterJsonOrArrayIndex(t *testing.T) {
 			unique:           false,
 			remainingFilters: `j = '{"a": "b"}' OR j = '[1, 2, 3]'`,
 		},
+		{
+			// Filtering a non-indexed column.
+			filters:  `j2 = '"a"'`,
+			indexOrd: jsonOrd,
+			ok:       false,
+		},
 		{
 			// Testing the IN operator without the fetch value operator.
 			filters:          `j IN ('1', '2', '3')`,
@@ -1129,6 +1167,12 @@ func TestTryFilterJsonOrArrayIndex(t *testing.T) {
 			unique:           false,
 			remainingFilters: `j IN ('[1, 2, 3]', '{"a": "b"}', '1', '"a"')`,
 		},
+		{
+			// Filtering a non-indexed column.
+			filters:  `j2 IN ('1', '2', '3')`,
+			indexOrd: jsonOrd,
+			ok:       false,
+		},
 	}
 
 	for _, tc := range testCases {

@@ -8198,6 +8198,77 @@ select
  └── filters
       └── val:3 > st_maxdistance(geom:1, '010100000000000000000000000000000000000000') [outer=(1,3), immutable, constraints=(/3: (/NULL - ])]
 
+# Regression test for #111963. Do not plan inverted index scans on columns that
+# are not filtered in the query.
+exec-ddl
+CREATE TABLE t111963 (
+  j1 JSON,
+  j2 JSON
+)
+----
+
+exec-ddl
+CREATE INVERTED INDEX idx111963 ON t111963 (j1)
+----
+
+opt expect-not=GenerateInvertedIndexScans
+SELECT * FROM t111963 WHERE j2 = '1'
+----
+select
+ ├── columns: j1:1 j2:2!null
+ ├── immutable
+ ├── fd: ()-->(2)
+ ├── scan t111963
+ │    └── columns: j1:1 j2:2
+ └── filters
+      └── j2:2 = '1' [outer=(2), immutable, constraints=(/2: [/'1' - /'1']; tight), fd=()-->(2)]
+
+opt expect-not=GenerateInvertedIndexScans
+SELECT * FROM t111963 WHERE j2 IN ('1', '10', '100')
+----
+select
+ ├── columns: j1:1 j2:2!null
+ ├── scan t111963
+ │    └── columns: j1:1 j2:2
+ └── filters
+      └── j2:2 IN ('1', '10', '100') [outer=(2), constraints=(/2: [/'1' - /'1'] [/'10' - /'10'] [/'100' - /'100']; tight)]
+
+exec-ddl
+DROP INDEX idx111963
+----
+
+exec-ddl
+CREATE INVERTED INDEX idx111963 ON t111963 ((j1->'foo'))
+----
+
+opt expect-not=GenerateInvertedIndexScans
+SELECT * FROM t111963 WHERE j1 = '1'
+----
+select
+ ├── columns: j1:1!null j2:2
+ ├── immutable
+ ├── fd: ()-->(1)
+ ├── scan t111963
+ │    ├── columns: j1:1 j2:2
+ │    └── computed column expressions
+ │         └── crdb_internal_idx_expr:7
+ │              └── j1:1->'foo'
+ └── filters
+      └── j1:1 = '1' [outer=(1), immutable, constraints=(/1: [/'1' - /'1']; tight), fd=()-->(1)]
+
+opt expect-not=GenerateInvertedIndexScans
+SELECT * FROM t111963 WHERE j1 IN ('1', '10', '100')
+----
+select
+ ├── columns: j1:1!null j2:2
+ ├── scan t111963
+ │    ├── columns: j1:1 j2:2
+ │    └── computed column expressions
+ │         └── crdb_internal_idx_expr:7
+ │              └── j1:1->'foo'
+ └── filters
+      └── j1:1 IN ('1', '10', '100') [outer=(1), constraints=(/1: [/'1' - /'1'] [/'10' - /'10'] [/'100' - /'100']; tight)]
+
 # --------------------------------------------------
 # GenerateZigzagJoins
 # --------------------------------------------------

diff --git a/pkg/sql/randgen/schema.go b/pkg/sql/randgen/schema.go
@@ -517,7 +517,28 @@ func randIndexTableDefFromCols(
 	cpy := make([]*tree.ColumnTableDef, len(columnTableDefs))
 	copy(cpy, columnTableDefs)
 	rng.Shuffle(len(cpy), func(i, j int) { cpy[i], cpy[j] = cpy[j], cpy[i] })
-	nCols := rng.Intn(len(cpy)) + 1
+
+	// Determine the number of indexed columns.
+	var nCols int
+	r := rng.Intn(100)
+	switch {
+	case r < 50:
+		// Create a single-column index 40% of the time. Single-column indexes
+		// are more likely then multi-column indexes to be used in query plans
+		// for randomly generated queries, so there is some benefit to
+		// guaranteeing that they are generated often.
+		nCols = 1
+	case r < 75:
+		nCols = 2
+	case r < 90:
+		nCols = 3
+	default:
+		nCols = rng.Intn(len(cpy)) + 1
+	}
+	if nCols > len(cpy) {
+		// nCols cannot be greater than the length of columnTableDefs.
+		nCols = len(cpy)
+	}
 
 	cols := cpy[:nCols]