opt: fix remaining filters when using partitioned constraints

The index constraint code can use check expressions or partitioning information to better refine the spans. However, the code related to the handling of remaining filters is very fragile. For check constraints, the code needs to remove any remaining filters due to check constraints (to avoid unnecessary overhead). For the partitioning code, this is necessary for correctness - we can't have any "in-between" filters as part of the final remaining filters (see big comment in `GenerateConstrainedScans`). The code assumes that a remaining filter is identical to one of the input filters (and thus only keeps the remaining filters that are the same with an explicit filter); unfortunately, this is not necessarily the case - the library tries to simplify the filters w.r.t the spans to make them cheaper to evaluate. This change cleans this up by extending the index constraints library a little bit: we can now specify the "required" and "optional" filters separately, with the only difference being that optional filters don't generate remaining filters. This way we don't have to "guess" what filters we need to keep. Fixes #44154. Release note (bug fix): fixed invalid query results in some corner cases where part of a WHERE clause is incorrectly discarded.
cockroachdb · Feb 5, 2020 · 789146a · 789146a
1 parent 4654024
commit 789146a
Show file tree

Hide file tree

Showing 7 changed files with 185 additions and 131 deletions.
diff --git a/pkg/ccl/logictestccl/testdata/logic_test/partitioning_constrained_scans b/pkg/ccl/logictestccl/testdata/logic_test/partitioning_constrained_scans
@@ -238,3 +238,25 @@ query T
 SELECT feature_name FROM crdb_internal.feature_usage WHERE feature_name='sql.partitioning.partition-constrained-scan' AND usage_count > 0
 ----
 sql.partitioning.partition-constrained-scan
+
+# Regression test for #44154: a remaining filter that is not identical to an
+# input filter should not be dropped.
+statement ok
+CREATE TABLE t0(c0 BOOL UNIQUE, c1 BOOL CHECK (true))
+
+statement ok
+INSERT INTO t0(c0) VALUES (true)
+
+query T
+EXPLAIN (OPT) SELECT * FROM t0 WHERE t0.c0 AND (c1 OR (c0 > false AND c0 < false))
+----
+select
+ ├── index-join t0
+ │    └── scan t0@t0_c0_key
+ │         └── constraint: /1: [/true - /true]
+ └── filters
+      └── c1 OR (c0 < false)
+
+query BB
+SELECT * FROM t0 WHERE t0.c0 AND (c1 OR (c0 > false AND c0 < false))
+----
diff --git a/pkg/sql/opt/idxconstraint/index_constraints.go b/pkg/sql/opt/idxconstraint/index_constraints.go
@@ -1061,35 +1061,53 @@ func (c *indexConstraintCtx) simplifyFilter(
 type Instance struct {
 	indexConstraintCtx
 
-	filters memo.FiltersExpr
+	requiredFilters memo.FiltersExpr
+	// allFilters includes requiredFilters along with optional filters that don't
+	// need to generate remaining filters (see Instance.Init()).
+	allFilters memo.FiltersExpr
 
 	constraint   constraint.Constraint
 	consolidated constraint.Constraint
 	tight        bool
 	initialized  bool
 }
 
-// Init processes the filter and calculates the spans.
+// Init processes the filters and calculates the spans.
+//
+// Optional filters are filters that can be used for determining spans, but
+// they need not generate remaining filters. This is e.g. used for check
+// constraints that can help generate better spans but don't actually need to be
+// enforced.
 func (ic *Instance) Init(
-	filters memo.FiltersExpr,
+	requiredFilters memo.FiltersExpr,
+	optionalFilters memo.FiltersExpr,
 	columns []opt.OrderingColumn,
 	notNullCols opt.ColSet,
 	isInverted bool,
 	evalCtx *tree.EvalContext,
 	factory *norm.Factory,
 ) {
-	ic.filters = filters
+	ic.requiredFilters = requiredFilters
+	if len(optionalFilters) == 0 {
+		ic.allFilters = requiredFilters
+	} else {
+		// Force allocation of a bigger slice.
+		// TODO(radu): we should keep the required and optional filters separate and
+		// add a helper for iterating through all of them.
+		ic.allFilters = requiredFilters[:len(requiredFilters):len(requiredFilters)]
+		ic.allFilters = append(ic.allFilters, optionalFilters...)
+	}
 	ic.indexConstraintCtx.init(columns, notNullCols, isInverted, evalCtx, factory)
-	constraints := make([]*constraint.Constraint, 0, 1)
 	if isInverted {
-		ic.tight, constraints = ic.makeInvertedIndexSpansForExpr(
-			&ic.filters, constraints, false,
+		tight, constraints := ic.makeInvertedIndexSpansForExpr(
+			&ic.allFilters, nil /* constraints */, false,
 		)
 		// makeInvertedIndexSpansForExpr is guaranteed to add at least one
 		// constraint. It may be an "unconstrained" constraint.
+		ic.tight = tight
 		ic.constraint = *constraints[0]
 	} else {
-		ic.tight = ic.makeSpansForExpr(0 /* offset */, &ic.filters, &ic.constraint)
+		ic.tight = ic.makeSpansForExpr(0 /* offset */, &ic.allFilters, &ic.constraint)
 	}
 	// Note: we only consolidate spans at the end; consolidating partial results
 	// can lead to worse spans, for example:
@@ -1125,8 +1143,7 @@ func (ic *Instance) Constraint() *constraint.Constraint {
 // AllInvertedIndexConstraints returns all constraints that can be created on
 // the specified inverted index. Only works for inverted indexes.
 func (ic *Instance) AllInvertedIndexConstraints() ([]*constraint.Constraint, error) {
-	allConstraints := make([]*constraint.Constraint, 0)
-	_, allConstraints = ic.makeInvertedIndexSpansForExpr(&ic.filters, allConstraints, true)
+	_, allConstraints := ic.makeInvertedIndexSpansForExpr(&ic.allFilters, nil /* constraints */, true /* allPaths */)
 
 	return allConstraints, nil
 }
@@ -1139,15 +1156,15 @@ func (ic *Instance) RemainingFilters() memo.FiltersExpr {
 		return memo.TrueFilter
 	}
 	if ic.constraint.IsUnconstrained() {
-		return ic.filters
+		return ic.requiredFilters
 	}
 	var newFilters memo.FiltersExpr
-	for i := range ic.filters {
+	for i := range ic.requiredFilters {
 		prefix := ic.getMaxSimplifyPrefix(&ic.constraint)
-		condition := ic.simplifyFilter(ic.filters[i].Condition, &ic.constraint, prefix)
+		condition := ic.simplifyFilter(ic.requiredFilters[i].Condition, &ic.constraint, prefix)
 		if condition.Op() != opt.TrueOp {
 			if newFilters == nil {
-				newFilters = make(memo.FiltersExpr, 0, len(ic.filters))
+				newFilters = make(memo.FiltersExpr, 0, len(ic.requiredFilters))
 			}
 			newFilters = append(newFilters, memo.FiltersItem{Condition: condition})
 		}

diff --git a/pkg/sql/opt/idxconstraint/index_constraints_test.go b/pkg/sql/opt/idxconstraint/index_constraints_test.go
@@ -67,7 +67,6 @@ func TestIndexConstraints(t *testing.T) {
 	defer leaktest.AfterTest(t)()
 
 	datadriven.Walk(t, "testdata", func(t *testing.T, path string) {
-		ctx := context.Background()
 		semaCtx := tree.MakeSemaContext()
 		evalCtx := tree.MakeTestingEvalContext(cluster.MakeTestingClusterSettings())
 
@@ -124,36 +123,30 @@ func TestIndexConstraints(t *testing.T) {
 
 			switch d.Cmd {
 			case "index-constraints":
-				typedExpr, err := testutils.ParseScalarExpr(d.Input, iVarHelper.Container())
-				if err != nil {
-					d.Fatalf(t, "%v", err)
-				}
-
-				if normalizeTypedExpr {
-					typedExpr, err = evalCtx.NormalizeExpr(typedExpr)
+				// Allow specifying optional filters using the "optional:" delimiter.
+				var filters, optionalFilters memo.FiltersExpr
+				if idx := strings.Index(d.Input, "optional:"); idx >= 0 {
+					optional := d.Input[idx+len("optional:"):]
+					optionalFilters, err = buildFilters(
+						optional, &semaCtx, &evalCtx, &f, &iVarHelper, normalizeTypedExpr,
+					)
 					if err != nil {
 						d.Fatalf(t, "%v", err)
 					}
+					d.Input = d.Input[:idx]
+				}
+				if filters, err = buildFilters(
+					d.Input, &semaCtx, &evalCtx, &f, &iVarHelper, normalizeTypedExpr,
+				); err != nil {
+					d.Fatalf(t, "%v", err)
 				}
 
 				varNames := make([]string, len(varTypes))
 				for i := range varNames {
 					varNames[i] = fmt.Sprintf("@%d", i+1)
 				}
-				b := optbuilder.NewScalar(ctx, &semaCtx, &evalCtx, &f)
-				b.AllowUnsupportedExpr = true
-				err = b.Build(typedExpr)
-				if err != nil {
-					return fmt.Sprintf("error: %v\n", err)
-				}
-				root := f.Memo().RootExpr().(opt.ScalarExpr)
-				filters := memo.FiltersExpr{{Condition: root}}
-				if _, ok := root.(*memo.TrueExpr); ok {
-					filters = memo.TrueFilter
-				}
-
 				var ic idxconstraint.Instance
-				ic.Init(filters, indexCols, notNullCols, invertedIndex, &evalCtx, &f)
+				ic.Init(filters, optionalFilters, indexCols, notNullCols, invertedIndex, &evalCtx, &f)
 				result := ic.Constraint()
 				var buf bytes.Buffer
 				for i := 0; i < result.Spans.Count(); i++ {
@@ -232,6 +225,7 @@ func BenchmarkIndexConstraints(b *testing.B) {
 		testCases = append(testCases, tc)
 	}
 
+	semaCtx := tree.MakeSemaContext()
 	evalCtx := tree.MakeTestingEvalContext(cluster.MakeTestingClusterSettings())
 
 	for _, tc := range testCases {
@@ -249,26 +243,15 @@ func BenchmarkIndexConstraints(b *testing.B) {
 			indexCols, notNullCols := parseIndexColumns(b, md, strings.Split(tc.indexInfo, ", "))
 
 			iVarHelper := tree.MakeTypesOnlyIndexedVarHelper(varTypes)
-			typedExpr, err := testutils.ParseScalarExpr(tc.expr, iVarHelper.Container())
+			filters, err := buildFilters(tc.expr, &semaCtx, &evalCtx, &f, &iVarHelper, false /* normalizeTypedExpr */)
 			if err != nil {
 				b.Fatal(err)
 			}
 
-			semaCtx := tree.MakeSemaContext()
-			evalCtx := tree.MakeTestingEvalContext(cluster.MakeTestingClusterSettings())
-			bld := optbuilder.NewScalar(context.Background(), &semaCtx, &evalCtx, &f)
-
-			err = bld.Build(typedExpr)
-			if err != nil {
-				b.Fatal(err)
-			}
-			nd := f.Memo().RootExpr()
-			filters := memo.FiltersExpr{{Condition: nd.(opt.ScalarExpr)}}
-
 			b.ResetTimer()
 			for i := 0; i < b.N; i++ {
 				var ic idxconstraint.Instance
-				ic.Init(filters, indexCols, notNullCols, false /*isInverted */, &evalCtx, &f)
+				ic.Init(filters, nil /* optionalFilters */, indexCols, notNullCols, false /*isInverted */, &evalCtx, &f)
 				_ = ic.Constraint()
 				_ = ic.RemainingFilters()
 			}
@@ -316,3 +299,37 @@ func parseIndexColumns(
 	}
 	return columns, notNullCols
 }
+
+func buildFilters(
+	input string,
+	semaCtx *tree.SemaContext,
+	evalCtx *tree.EvalContext,
+	f *norm.Factory,
+	iVarHelper *tree.IndexedVarHelper,
+	normalizeTypedExpr bool,
+) (memo.FiltersExpr, error) {
+	if input == "" {
+		return memo.TrueFilter, nil
+	}
+	typedExpr, err := testutils.ParseScalarExpr(input, iVarHelper.Container())
+	if err != nil {
+		return memo.FiltersExpr{}, err
+	}
+	if normalizeTypedExpr {
+		typedExpr, err = evalCtx.NormalizeExpr(typedExpr)
+		if err != nil {
+			return memo.FiltersExpr{}, err
+		}
+	}
+
+	b := optbuilder.NewScalar(context.Background(), semaCtx, evalCtx, f)
+	b.AllowUnsupportedExpr = true
+	if err := b.Build(typedExpr); err != nil {
+		return memo.FiltersExpr{}, err
+	}
+	root := f.Memo().RootExpr().(opt.ScalarExpr)
+	if _, ok := root.(*memo.TrueExpr); ok {
+		return memo.TrueFilter, nil
+	}
+	return memo.FiltersExpr{{Condition: root}}, nil
+}
diff --git a/pkg/sql/opt/idxconstraint/testdata/optional b/pkg/sql/opt/idxconstraint/testdata/optional
@@ -0,0 +1,16 @@
+index-constraints vars=(int, int) index=(@1)
+@1 > 2 AND @1 < 4 AND @2 = 2
+----
+[/3 - /3]
+Remaining filter: @2 = 2
+
+index-constraints vars=(int, int) index=(@1)
+@1 > 2
+optional: @1 < 4 AND @2 = 2
+----
+[/3 - /3]
+
+index-constraints vars=(int, int) index=(@1 desc, @2 desc)
+optional: @1 >= 2 AND @1 <= 4 AND @2 IN (1, 2, 3)
+----
+[/4/3 - /2/1]
diff --git a/pkg/sql/opt/memo/expr.go b/pkg/sql/opt/memo/expr.go
@@ -234,21 +234,6 @@ func (n *FiltersExpr) Deduplicate() {
 	*n = dedup
 }
 
-// RetainCommonFilters retains only the filters found in n and other.
-func (n *FiltersExpr) RetainCommonFilters(other FiltersExpr) {
-	// TODO(ridwanmsharif): Faster intersection using a map
-	common := (*n)[:0]
-	for _, filter := range *n {
-		for _, otherFilter := range other {
-			if filter.Condition == otherFilter.Condition {
-				common = append(common, filter)
-				break
-			}
-		}
-	}
-	*n = common
-}
-
 // RemoveCommonFilters removes the filters found in other from n.
 func (n *FiltersExpr) RemoveCommonFilters(other FiltersExpr) {
 	// TODO(ridwanmsharif): Faster intersection using a map

diff --git a/pkg/sql/opt/memo/memo.go b/pkg/sql/opt/memo/memo.go
@@ -223,10 +223,8 @@ func (m *Memo) SetRoot(e RelExpr, phys *physical.Required) {
 }
 
 // SetScalarRoot stores the root memo expression when it is a scalar expression.
+// Used only for testing.
 func (m *Memo) SetScalarRoot(scalar opt.ScalarExpr) {
-	if m.rootExpr != nil {
-		panic(errors.AssertionFailedf("cannot set scalar root multiple times"))
-	}
 	m.rootExpr = scalar
 }