Skip to content

Commit

Permalink
address comments
Browse files Browse the repository at this point in the history
  • Loading branch information
cloud-fan committed Dec 13, 2017
1 parent ebcd6d1 commit 8fe0c49
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -163,50 +163,48 @@ object EstimationUtils {
* Returns the number of histogram bins holding values within the given range
* [lowerBound, upperBound].
*
* Note that the return value is double type, because the range boundaries usually occupy a
* Note that the returned value is double type, because the range boundaries usually occupy a
* portion of a bin. An extrema case is [value, value] which is generated by equal predicate
* `col = value`, we can get more accuracy by allowing returning portion of histogram bins.
* `col = value`, we can get higher accuracy by allowing returning portion of histogram bins.
*
* @param upperBound the highest value of the given range
* @param upperBoundInclusive whether the upperBound is included in the range
* @param lowerBound the lowest value of the given range
* @param lowerBoundInclusive whether the lowerBound is included in the range
* @param histogram a numeric equi-height histogram
* @param bins an array of bins for a given numeric equi-height histogram
*/
def numBinsHoldingRange(
upperBound: Double,
upperBoundInclusive: Boolean,
lowerBound: Double,
lowerBoundInclusive: Boolean,
histogram: Histogram): Double = {
assert(histogram.bins.head.lo <= lowerBound &&
lowerBound <= upperBound &&
upperBound <= histogram.bins.last.hi,
bins: Array[HistogramBin]): Double = {
assert(bins.head.lo <= lowerBound && lowerBound <= upperBound && upperBound <= bins.last.hi,
"Given range does not fit in the given histogram.")
assert(upperBound != lowerBound || upperBoundInclusive || lowerBoundInclusive,
s"'$lowerBound < value < $upperBound' is an invalid range.")

val upperBinIndex = if (upperBoundInclusive) {
findLastBinForValue(upperBound, histogram.bins)
findLastBinForValue(upperBound, bins)
} else {
findFirstBinForValue(upperBound, histogram.bins)
findFirstBinForValue(upperBound, bins)
}
val lowerBinIndex = if (lowerBoundInclusive) {
findFirstBinForValue(lowerBound, histogram.bins)
findFirstBinForValue(lowerBound, bins)
} else {
findLastBinForValue(lowerBound, histogram.bins)
findLastBinForValue(lowerBound, bins)
}
assert(lowerBinIndex <= upperBinIndex, "Invalid histogram data.")


if (lowerBinIndex == upperBinIndex) {
binHoldingRangePossibility(upperBound, lowerBound, histogram.bins(lowerBinIndex))
binHoldingRangePossibility(upperBound, lowerBound, bins(lowerBinIndex))
} else {
// Computes the occupied portion of bins of the upperBound and lowerBound.
val lowerBin = histogram.bins(lowerBinIndex)
val lowerBin = bins(lowerBinIndex)
val lowerPart = binHoldingRangePossibility(lowerBin.hi, lowerBound, lowerBin)

val higherBin = histogram.bins(upperBinIndex)
val higherBin = bins(upperBinIndex)
val higherPart = binHoldingRangePossibility(upperBound, higherBin.lo, higherBin)

// The total number of bins is lowerPart + higherPart + bins between them
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,7 @@ case class FilterEstimation(plan: Filter) extends Logging {
}

/**
* Computes the possibility of a equal predicate using histogram.
* Computes the possibility of an equality predicate using histogram.
*/
private def computeEqualityPossibilityByHistogram(
literal: Literal, colStat: ColumnStat): Double = {
Expand All @@ -557,14 +557,14 @@ case class FilterEstimation(plan: Filter) extends Logging {
upperBoundInclusive = true,
lowerBound = min,
lowerBoundInclusive = true,
histogram)
histogram.bins)

val numBinsHoldingDatum = EstimationUtils.numBinsHoldingRange(
upperBound = datum,
upperBoundInclusive = true,
lowerBound = datum,
lowerBoundInclusive = true,
histogram)
histogram.bins)

numBinsHoldingDatum / numBinsHoldingEntireRange
}
Expand All @@ -584,9 +584,9 @@ case class FilterEstimation(plan: Filter) extends Logging {

// compute how many bins the column's current valid range [min, max] occupies.
val numBinsHoldingEntireRange = EstimationUtils.numBinsHoldingRange(
max, upperBoundInclusive = true, min, lowerBoundInclusive = true, histogram)
max, upperBoundInclusive = true, min, lowerBoundInclusive = true, histogram.bins)

val numBinsHoldingDatum = op match {
val numBinsHoldingRange = op match {
// LessThan and LessThanOrEqual share the same logic, the only difference is whether to
// include the upperBound in the range.
case _: LessThan =>
Expand All @@ -595,14 +595,14 @@ case class FilterEstimation(plan: Filter) extends Logging {
upperBoundInclusive = false,
lowerBound = min,
lowerBoundInclusive = true,
histogram)
histogram.bins)
case _: LessThanOrEqual =>
EstimationUtils.numBinsHoldingRange(
upperBound = datum,
upperBoundInclusive = true,
lowerBound = min,
lowerBoundInclusive = true,
histogram)
histogram.bins)

// GreaterThan and GreaterThanOrEqual share the same logic, the only difference is whether to
// include the lowerBound in the range.
Expand All @@ -612,17 +612,17 @@ case class FilterEstimation(plan: Filter) extends Logging {
upperBoundInclusive = true,
lowerBound = datum,
lowerBoundInclusive = false,
histogram)
histogram.bins)
case _: GreaterThanOrEqual =>
EstimationUtils.numBinsHoldingRange(
upperBound = max,
upperBoundInclusive = true,
lowerBound = datum,
lowerBoundInclusive = true,
histogram)
histogram.bins)
}

numBinsHoldingDatum / numBinsHoldingEntireRange
numBinsHoldingRange / numBinsHoldingEntireRange
}

/**
Expand Down

0 comments on commit 8fe0c49

Please sign in to comment.