Skip to content

Commit

Permalink
Merge #115304
Browse files Browse the repository at this point in the history
115304: opt: make floating-point stats calculations precise r=DrewKimball a=DrewKimball

This patch makes two changes to the floating-point operators in the `adjustNullCountsForOuterJoins` statistics method:
1. The results of floating-point subtractions are adjusted to be at least `epsilon`; this avoids precision errors.
2. Calculation is avoided entirely when the result is known to be zero. This ensures an exact result in the common case when no adjustment is made to the null count estimate.

This commit also reverts #115287, since the failing tests are now fixed.

Fixes #115278

Release note: None

Co-authored-by: Drew Kimball <drewk@cockroachlabs.com>
  • Loading branch information
craig[bot] and DrewKimball committed Nov 30, 2023
2 parents ccc3d54 + a02173e commit 830d4f2
Showing 1 changed file with 12 additions and 20 deletions.
32 changes: 12 additions & 20 deletions pkg/sql/opt/memo/statistics_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -1652,24 +1652,25 @@ func (sb *statisticsBuilder) adjustNullCountsForOuterJoins(
// extension - such as right columns for non-matching rows in left joins.
switch joinType {
case opt.LeftJoinOp, opt.LeftJoinApplyOp:
if !rightColsAreEmpty {
colStat.NullCount += (rowCount - innerJoinRowCount) * leftNullCount / leftRowCount
if !rightColsAreEmpty && leftNullCount > 0 && rowCount > innerJoinRowCount {
addedRows := max(rowCount-innerJoinRowCount, epsilon)
colStat.NullCount += addedRows * leftNullCount / leftRowCount
}

case opt.RightJoinOp:
if !leftColsAreEmpty {
colStat.NullCount += (rowCount - innerJoinRowCount) * rightNullCount / rightRowCount
if !leftColsAreEmpty && rightNullCount > 0 && rowCount > innerJoinRowCount {
addedRows := max(rowCount-innerJoinRowCount, epsilon)
colStat.NullCount += addedRows * rightNullCount / rightRowCount
}

case opt.FullJoinOp:
leftJoinRowCount := max(innerJoinRowCount, leftRowCount)
rightJoinRowCount := max(innerJoinRowCount, rightRowCount)

if !leftColsAreEmpty {
colStat.NullCount += (rightJoinRowCount - innerJoinRowCount) * rightNullCount / rightRowCount
if !leftColsAreEmpty && rightNullCount > 0 && rightRowCount > innerJoinRowCount {
addedRows := max(rightRowCount-innerJoinRowCount, epsilon)
colStat.NullCount += addedRows * rightNullCount / rightRowCount
}
if !rightColsAreEmpty {
colStat.NullCount += (leftJoinRowCount - innerJoinRowCount) * leftNullCount / leftRowCount
if !rightColsAreEmpty && leftNullCount > 0 && leftRowCount > innerJoinRowCount {
addedRows := max(leftRowCount-innerJoinRowCount, epsilon)
colStat.NullCount += addedRows * leftNullCount / leftRowCount
}
}
}
Expand Down Expand Up @@ -3008,15 +3009,6 @@ func (sb *statisticsBuilder) rowsProcessed(e RelExpr) float64 {
}
}

// TODO(#115278): We should be able to replace this with Go's built-in max
// function, but doing breaks some optimizer tests on ARM64.
func max(a, b float64) float64 {
if a > b {
return a
}
return b
}

//////////////////////////////////////////////////
// Helper functions for selectivity calculation //
//////////////////////////////////////////////////
Expand Down

0 comments on commit 830d4f2

Please sign in to comment.