cockroachdb · craig · Oct 30, 2018 · Oct 26, 2018
@@ -448,6 +448,28 @@ func (c *Constraint) ExactPrefix(evalCtx *tree.EvalContext) int {
 	}
 }
 
+// ConstrainedColumns returns the number of columns which are constrained by
+// the Constraint. For example:
+//   /a/b/c: [/1/1 - /1] [/3 - /3]
+// has 2 constrained columns. This may be less than the total number of columns
+// in the constraint, especially if it represents an index constraint.
+func (c *Constraint) ConstrainedColumns(evalCtx *tree.EvalContext) int {
+	count := 0
+	for i := 0; i < c.Spans.Count(); i++ {
+		sp := c.Spans.Get(i)
+		start := sp.StartKey()
+		end := sp.EndKey()
+		if start.Length() > count {
+			count = start.Length()
+		}
+		if end.Length() > count {
+			count = end.Length()
+		}
+	}
+
+	return count
+}
+
 // Prefix returns the length of the longest prefix of columns for which all the
 // spans have the same start and end values. For example:
 //   /a/b/c: [/1/1/1 - /1/1/2] [/3/3/3 - /3/3/4]

@@ -454,22 +454,18 @@ func (sb *statisticsBuilder) buildScan(scan *ScanExpr, relProps *props.Relationa
 	if scan.Constraint != nil {
 		// Calculate distinct counts for constrained columns
 		// -------------------------------------------------
-		applied := sb.applyConstraint(scan.Constraint, scan, relProps)
+		numUnappliedConjuncts := sb.applyIndexConstraint(scan.Constraint, scan, relProps)
 
 		var cols opt.ColSet
-		for i := 0; i < scan.Constraint.Columns.Count(); i++ {
+		for i, n := 0, scan.Constraint.ConstrainedColumns(sb.evalCtx); i < n; i++ {
 			cols.Add(int(scan.Constraint.Columns.Get(i).ID()))
 		}
 
 		// Calculate row count and selectivity
 		// -----------------------------------
 		inputRowCount := s.RowCount
-		if applied {
-			s.ApplySelectivity(sb.selectivityFromDistinctCounts(cols, scan, s))
-		} else {
-			numUnappliedConjuncts := sb.numConjunctsInConstraint(scan.Constraint)
-			s.ApplySelectivity(sb.selectivityFromUnappliedConjuncts(numUnappliedConjuncts))
-		}
+		s.ApplySelectivity(sb.selectivityFromDistinctCounts(cols, scan, s))
+		s.ApplySelectivity(sb.selectivityFromUnappliedConjuncts(numUnappliedConjuncts))
 
 		// Set null counts to 0 for non-nullable columns
 		// -------------------------------------------------
@@ -1846,18 +1842,26 @@ func (sb *statisticsBuilder) applyFilter(
 	return numUnappliedConjuncts, constrainedCols
 }
 
-func (sb *statisticsBuilder) applyConstraint(
+func (sb *statisticsBuilder) applyIndexConstraint(
 	c *constraint.Constraint, e RelExpr, relProps *props.Relational,
-) (applied bool) {
+) (numUnappliedConjuncts float64) {
 	// If unconstrained, then no constraint could be derived from the expression,
 	// so fall back to estimate.
 	// If a contradiction, then optimizations must not be enabled (say for
 	// testing), or else this would have been reduced.
 	if c.IsUnconstrained() || c.IsContradiction() {
-		return false /* applied */
+		return 0 /* numUnappliedConjuncts */
 	}
 
-	return sb.updateDistinctCountsFromConstraint(c, e, relProps)
+	applied := sb.updateDistinctCountsFromConstraint(c, e, relProps)
+	for i, n := applied, c.ConstrainedColumns(sb.evalCtx); i < n; i++ {
+		// Unlike the constraints found in Select and Join filters, an index
+		// constraint may represent multiple conjuncts. Therefore, we need to
+		// calculate the number of unapplied conjuncts for each constrained column.
+		numUnappliedConjuncts += sb.numConjunctsInConstraint(c, i)
+	}
+
+	return numUnappliedConjuncts
 }
 
 func (sb *statisticsBuilder) applyConstraintSet(
@@ -1874,13 +1878,13 @@ func (sb *statisticsBuilder) applyConstraintSet(
 	numUnappliedConjuncts = 0
 	for i := 0; i < cs.Length(); i++ {
 		applied := sb.updateDistinctCountsFromConstraint(cs.Constraint(i), e, relProps)
-		if !applied {
+		if applied == 0 {
 			// If a constraint cannot be applied, it may represent an
 			// inequality like x < 1. As a result, distinctCounts does not fully
 			// represent the selectivity of the constraint set.
 			// We return an estimate of the number of unapplied conjuncts to the
 			// caller function to be used for selectivity calculation.
-			numUnappliedConjuncts += sb.numConjunctsInConstraint(cs.Constraint(i))
+			numUnappliedConjuncts += sb.numConjunctsInConstraint(cs.Constraint(i), 0 /* nth */)
 		}
 	}
 
@@ -1923,10 +1927,10 @@ func (sb *statisticsBuilder) updateNullCountsFromProps(
 
 // updateDistinctCountsFromConstraint updates the distinct count for each
 // column in a constraint that can be determined to have a finite number of
-// possible values. It returns a boolean indicating if the constraint was
-// applied (i.e., the distinct count for at least one column could be inferred
-// from the constraint). If the same column appears in multiple constraints,
-// the distinct count is the minimum for that column across all constraints.
+// possible values. It returns the number of columns for which the distinct
+// count could be inferred from the constraint. If the same column appears
+// in multiple constraints, the distinct count is the minimum for that column
+// across all constraints.
 //
 // For example, consider the following constraint set:
 //
@@ -1952,7 +1956,7 @@ func (sb *statisticsBuilder) updateNullCountsFromProps(
 // discrepancy must be resolved by the calling function.
 func (sb *statisticsBuilder) updateDistinctCountsFromConstraint(
 	c *constraint.Constraint, e RelExpr, relProps *props.Relational,
-) (applied bool) {
+) (applied int) {
 	// All of the columns that are part of the prefix have a finite number of
 	// distinct values.
 	prefix := c.Prefix(sb.evalCtx)
@@ -2024,7 +2028,7 @@ func (sb *statisticsBuilder) updateDistinctCountsFromConstraint(
 
 		colID := c.Columns.Get(col).ID()
 		sb.ensureColStat(util.MakeFastIntSet(int(colID)), distinctCount, e, relProps)
-		applied = true
+		applied = col + 1
 	}
 
 	return applied
@@ -2072,8 +2076,8 @@ func (sb *statisticsBuilder) updateDistinctNullCountsFromEquivalency(
 }
 
 // selectivityFromDistinctCounts calculates the selectivity of a filter by
-// taking the product of selectivities of each constrained column. In the general case,
-// this can be represented by the formula:
+// taking the product of selectivities of each constrained column. In the
+// general case, this can be represented by the formula:
 //
 //                  ┬-┬ ⎛ new distinct(i) ⎞
 //   selectivity =  │ │ ⎜ --------------- ⎟
@@ -2313,9 +2317,9 @@ func isEqualityWithTwoVars(cond opt.ScalarExpr) bool {
 }
 
 // numConjunctsInConstraint returns a rough estimate of the number of conjuncts
-// used to build the given constraint.
+// used to build the given constraint for the column at position nth.
 func (sb *statisticsBuilder) numConjunctsInConstraint(
-	c *constraint.Constraint,
+	c *constraint.Constraint, nth int,
 ) (numConjuncts float64) {
 	if c.Spans.Count() == 0 {
 		return 0 /* numConjuncts */
@@ -2325,25 +2329,20 @@ func (sb *statisticsBuilder) numConjunctsInConstraint(
 	for i := 0; i < c.Spans.Count(); i++ {
 		span := c.Spans.Get(i)
 		numSpanConjuncts := float64(0)
-		// The first start and end keys in each span are the only ones that matter
-		// for determining selectivity when we have no knowledge of the data
-		// distribution. Technically, /a/b: [/5 - ] is more selective than
-		// /a/b: [/4/5 - ], which is more selective than /a/b: [/4 - ]. But we
-		// treat them all the same, with selectivity=1/3.
-		if span.StartKey().Length() > 0 {
+		if span.StartKey().Length() > nth {
 			// Cases of NULL in a constraint should be ignored. For example,
 			// without knowledge of the data distribution, /a: (/NULL - /10] should
 			// have the same estimated selectivity as /a: [/10 - ]. Selectivity
 			// of NULL constraints is handled in selectivityFromNullCounts.
-			if c.Columns.Get(0).Descending() ||
-				span.StartKey().Value(0) != tree.DNull {
+			if c.Columns.Get(nth).Descending() ||
+				span.StartKey().Value(nth) != tree.DNull {
 				numSpanConjuncts++
 			}
 		}
-		if span.EndKey().Length() > 0 {
+		if span.EndKey().Length() > nth {
 			// Ignore cases of NULL in constraints. (see above comment).
-			if !c.Columns.Get(0).Descending() ||
-				span.EndKey().Value(0) != tree.DNull {
+			if !c.Columns.Get(nth).Descending() ||
+				span.EndKey().Value(nth) != tree.DNull {
 				numSpanConjuncts++
 			}
 		}

@@ -246,13 +246,13 @@ index-join a
  ├── fd: (1)-->(2-5), (3,4)-->(1,2,5)
  └── select
       ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null)
-      ├── stats: [rows=111.111111, distinct(1)=110.489355, null(1)=0, distinct(3)=2, null(3)=0, distinct(4)=97.0976681, null(4)=0]
+      ├── stats: [rows=37.037037, distinct(1)=36.9747958, null(1)=0, distinct(3)=1.99999999, null(3)=0, distinct(4)=35.7721483, null(4)=0]
       ├── key: (1)
       ├── fd: (1)-->(3,4), (3,4)-->(1)
       ├── scan a@secondary
       │    ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null)
       │    ├── constraint: /-3/4: [ - /'foobar'/5.0] [/'foo' - /'bar'/5.0]
-      │    ├── stats: [rows=1000, distinct(1)=911.337892, null(1)=0, distinct(3)=2, null(3)=0, distinct(4)=294.797541, null(4)=0]
+      │    ├── stats: [rows=333.333333, distinct(1)=323.895037, null(1)=0, distinct(3)=2, null(3)=0, distinct(4)=207.616156, null(4)=0]
       │    ├── key: (1)
       │    └── fd: (1)-->(3,4), (3,4)-->(1)
       └── filters
@@ -407,15 +407,60 @@ index-join a
  ├── fd: (1)-->(2-5), (3,4)-->(1,2,5)
  └── select
       ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null)
-      ├── stats: [rows=74.0740741, distinct(1)=74.0740741, null(1)=0, distinct(3)=2, null(3)=0, distinct(4)=68.9343053, null(4)=0]
+      ├── stats: [rows=24.691358, distinct(1)=24.691358, null(1)=0, distinct(3)=1.99999586, null(3)=0, distinct(4)=24.5913408, null(4)=0]
       ├── key: (1)
       ├── fd: (1)-->(3,4), (3,4)-->(1)
       ├── scan a@secondary
       │    ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null)
       │    ├── constraint: /-3/4: [ - /'foobar'/5.0] [/'foo' - /'bar'/5.0]
-      │    ├── stats: [rows=666.666667, distinct(1)=666.666667, null(1)=0, distinct(3)=2, null(3)=0, distinct(4)=294.797541, null(4)=0]
+      │    ├── stats: [rows=222.222222, distinct(1)=222.222222, null(1)=0, distinct(3)=2, null(3)=0, distinct(4)=207.616156, null(4)=0]
       │    ├── key: (1)
       │    └── fd: (1)-->(3,4), (3,4)-->(1)
       └── filters
            ├── (s <= 'foo') OR (s >= 'foobar') [type=bool, outer=(3)]
            └── d <= 5.0 [type=bool, outer=(4), constraints=(/4: (/NULL - /5.0]; tight)]
+
+exec-ddl
+CREATE TABLE abcde (
+  a INT PRIMARY KEY,
+  b INT,
+  c STRING,
+  d INT,
+  e INT,
+  INDEX bad(b, d),
+  INDEX good(b, c, d)
+)
+----
+TABLE abcde
+ ├── a int not null
+ ├── b int
+ ├── c string
+ ├── d int
+ ├── e int
+ ├── INDEX primary
+ │    └── a int not null
+ ├── INDEX bad
+ │    ├── b int
+ │    ├── d int
+ │    └── a int not null
+ └── INDEX good
+      ├── b int
+      ├── c string
+      ├── d int
+      └── a int not null
+
+# Regression test for #31929. Ensure that the good index is chosen.
+opt
+SELECT * FROM abcde WHERE b = 1 AND c LIKE '+1-1000%'
+----
+index-join abcde
+ ├── columns: a:1(int!null) b:2(int!null) c:3(string!null) d:4(int) e:5(int)
+ ├── stats: [rows=1.089, distinct(1)=1.089, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1.089, null(3)=0]
+ ├── key: (1)
+ ├── fd: ()-->(2), (1)-->(3-5)
+ └── scan abcde@good
+      ├── columns: a:1(int!null) b:2(int!null) c:3(string!null) d:4(int)
+      ├── constraint: /2/3/4/1: [/1/'+1-1000' - /1/'+1-1001')
+      ├── stats: [rows=1.089, distinct(1)=1.089, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1.089, null(3)=0]
+      ├── key: (1)
+      └── fd: ()-->(2), (1)-->(3,4)
@@ -601,7 +601,7 @@ memo (optimized, ~4KB, required=[presentation: k:1,u:2,v:3,j:4])
  ├── G1: (select G2 G3) (select G4 G3)
  │    └── [presentation: k:1,u:2,v:3,j:4]
  │         ├── best: (select G4 G3)
- │         └── cost: 407.09
+ │         └── cost: 45.23
  ├── G2: (scan b)
  │    └── []
  │         ├── best: (scan b)
@@ -610,13 +610,13 @@ memo (optimized, ~4KB, required=[presentation: k:1,u:2,v:3,j:4])
  ├── G4: (index-join G7 b,cols=(1-4))
  │    └── []
  │         ├── best: (index-join G7 b,cols=(1-4))
- │         └── cost: 406.30
+ │         └── cost: 45.14
  ├── G5: (gt G8 G9)
  ├── G6: (lt G8 G10)
  ├── G7: (scan b@u,cols=(1,2),constrained)
  │    └── []
  │         ├── best: (scan b@u,cols=(1,2),constrained)
- │         └── cost: 82.37
+ │         └── cost: 9.15
  ├── G8: (tuple G11)
  ├── G9: (tuple G12)
  ├── G10: (tuple G13)