Skip to content

Commit

Permalink
Start using mean instead of median
Browse files Browse the repository at this point in the history
  • Loading branch information
ehmicky committed Sep 5, 2021
1 parent 41e6a25 commit 5f21fb8
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 56 deletions.
37 changes: 16 additions & 21 deletions src/stats/compute.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,18 @@ import { getMean } from './sum.js'
// - This happens when the task is faster than the minimum time resolution
// - This is bad because:
// - It is confusing to users
// - It prevents computing `stdev` and related stats if the `median` is `0`
// - It prevents computing `stdev` and related stats if the `mean` is `0`
// - It decreases `stdev` if many measures are exactly `0`. This leads to
// reaching the target `precision` even though stats are imprecise.
// - This is mostly prevented by:
// - Including the minimum time resolution in `minLoopDuration`
// - Multiplying `repeat` when the `sampleMedian` is `0`, including during
// calibration
// We do not compute the mode because:
// - Reporting it together with the median might make it look like it is as
// important. However, the median is a far more useful statistic.
// - Reporting it together with the mean might make it look like it is as
// important. However, the mean is a far more useful statistic.
// - This would create too many statistics for the average, together with the
// median and the mean.
// mean and the median.
export const computeStats = function (measures) {
const { minIndex, maxIndex, length, min, max } = getOutliersStats(measures)

Expand All @@ -45,20 +45,20 @@ export const computeStats = function (measures) {
bucketCount: HISTOGRAM_SIZE,
})

const { stdev, rstdev, moe, rmoe, medianMin, medianMax } = getPrecisionStats({
const { stdev, rstdev, moe, rmoe, meanMin, meanMax } = getPrecisionStats({
measures,
minIndex,
maxIndex,
length,
min,
max,
median,
mean,
})

return {
median,
medianMin,
medianMax,
medianMin: meanMin,
medianMax: meanMax,
mean,
min,
max,
Expand All @@ -76,7 +76,7 @@ const HISTOGRAM_SIZE = 1e2

// Retrieve stats related to `stdev`.
// Those might be absent if the number of loops is low.
// `median: 0` is very unlikely (but possible) after calibration, which would
// `mean: 0` is very unlikely (but possible) after calibration, which would
// make those stats not compute correctly.
const getPrecisionStats = function ({
measures,
Expand All @@ -85,23 +85,18 @@ const getPrecisionStats = function ({
length,
min,
max,
median,
mean,
}) {
if (length < MIN_STDEV_LOOPS || median === 0) {
if (length < MIN_STDEV_LOOPS || mean === 0) {
return {}
}

const stdev = getStdev(measures, { minIndex, maxIndex, median })
const rstdev = getRstdev(stdev, median)
const stdev = getStdev(measures, { minIndex, maxIndex, mean })
const rstdev = getRstdev(stdev, mean)
const moe = getMoe(stdev, length)
const rmoe = getRmoe(moe, median)
const { medianMin, medianMax } = getConfidenceInterval({
median,
moe,
min,
max,
})
return { stdev, rstdev, moe, rmoe, medianMin, medianMax }
const rmoe = getRmoe(moe, mean)
const { meanMin, meanMax } = getConfidenceInterval({ mean, moe, min, max })
return { stdev, rstdev, moe, rmoe, meanMin, meanMax }
}

// `stdev` might be very imprecise when there are not enough values to compute
Expand Down
20 changes: 10 additions & 10 deletions src/stats/confidence.js
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
// Retrieve the confidence interval.
// This is represented as `stats.medianMin` and `stats.medianMax`.
// When `showPrecision` is `true`, this is reported instead of `stats.median`.
// This allows reporting both the approximate median and its precision at once.
// - When this happens, `stats.median` is not reported
// - This is easier to understand than reporting `stats.median` and
// This is represented as `stats.meanMin` and `stats.meanMax`.
// When `showPrecision` is `true`, this is reported instead of `stats.mean`.
// This allows reporting both the approximate mean and its precision at once.
// - When this happens, `stats.mean` is not reported
// - This is easier to understand than reporting `stats.mean` and
// `stats.moe|rmoe` because it is easier to visualize and to compare with
// other combinations.
// We do not allow `stats.medianMin|medianMax` to go beyond `stats.min|max`:
// We do not allow `stats.meanMin|meanMax` to go beyond `stats.min|max`:
// - This is very unlikely to happen, although technically possible providing
// both:
// - The number of loops is very low
// - `stats.rstdev` is very high, i.e. the distribution is very skewed
// - This allows `stats.min|max` to be used in reporting as extreme boundaries
export const getConfidenceInterval = function ({ median, moe, min, max }) {
const medianMin = Math.max(median - moe, min, 0)
const medianMax = Math.min(median + moe, max)
return { medianMin, medianMax }
export const getConfidenceInterval = function ({ mean, moe, min, max }) {
const meanMin = Math.max(mean - moe, min, 0)
const meanMax = Math.min(mean + moe, max)
return { meanMin, meanMax }
}
26 changes: 13 additions & 13 deletions src/stats/moe.js
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
import { getTvalue } from './tvalue.js'

// Retrieve margin of error, relative to the median
// Retrieve margin of error, relative to the mean
// The standard error:
// - Is the standard deviation that would be obtained by repeating the same
// benchmark
// - This is providing the underlying distribution remained the same. In
// practice, there is always a variation due to the environment. So it is
// more accurately described as the maximum difference between the current
// median and the future median if we kept measuring forever.
// - In other terms, it measures the precision of the current median, but not
// the potential variation with the next median
// - I.e. this does not measure the possible range of median in future
// mean and the future mean if we kept measuring forever.
// - In other terms, it measures the precision of the current mean, but not
// the potential variation with the next mean
// - I.e. this does not measure the possible range of mean in future
// measures
// - For example, variation might come from the environment (such as machine
// load)
// The margin of error:
// - Computes a range around the median where there is 95% of probability that
// the real median (if we kept measuring forever) would fall.
// - Computes a range around the mean where there is 95% of probability that
// the real mean (if we kept measuring forever) would fall.
// - It uses time duration, which is easier when considering a single
// combination precision
// The moe is meant to be reported:
// - by median-focused reporters (not distribution-focused)
// - by mean-focused reporters (not distribution-focused)
// - either graphically (stats.moe) or as a duration (stats.moePretty)
// The moe is useful:
// - both for:
Expand All @@ -30,7 +30,7 @@ import { getTvalue } from './tvalue.js'
// - this is statistically imperfect, i.e. just an approximation
// - the proper way would be to use a welch's t-test
// - for those reasons, using the moe as an absolute duration is more useful in
// reporting than using the moe relative to the median (percentage)
// reporting than using the moe relative to the mean (percentage)
// This all relies on measures following a normal distribution
// - In practice, this is rarely the case:
// - Several distributions are usually summed, i.e. producing several modes.
Expand All @@ -50,7 +50,7 @@ import { getTvalue } from './tvalue.js'
// - one combination might be equivalent with two other combinations, but
// those two other combinations might not be equivalent between each
// other, making ordering complicated
// - reporting it on medians does not work either because whether combinations
// - reporting it on means does not work either because whether combinations
// are comparable must be done for each combinations pair, not only the next
// slower combination
// - this can still be added in the future with a reporter showing a list of
Expand All @@ -63,11 +63,11 @@ export const getMoe = function (stdev, length) {
return marginOfError
}

// Retrieve margin of error relative to the median.
// Retrieve margin of error relative to the mean.
// This is more useful than moe when comparing different combinations, or when
// targetting a specific precision threshold.
export const getRmoe = function (moe, median) {
return moe / median
export const getRmoe = function (moe, mean) {
return moe / mean
}

// Find the `length` that gets a specific `moe` with a given `stdev`.
Expand Down
2 changes: 1 addition & 1 deletion src/stats/outliers.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,6 @@ export const getLoopsFromLength = function (length) {
// A lower value makes it more likely for outliers to overtake the histogram,
// concentrating most of the values into far fewer buckets.
const MIN_OUTLIERS = 0.05
// Having the same percentage for slow/fast outliers ensures the `median`
// Having the same percentage for slow/fast outliers ensures the `mean`
// remains the same.
const MAX_OUTLIERS = MIN_OUTLIERS
20 changes: 9 additions & 11 deletions src/stats/stdev.js
Original file line number Diff line number Diff line change
@@ -1,39 +1,37 @@
// Retrieve standard deviation of an array of floats (cannot be NaN/Infinity).
// Array must not be empty.
// We use the median, not the mean, because it is more stable and is privileged
// in reporting.
// We use the absolute standard deviation, as opposed to making it relative to
// the median (as a percentage)
// the mean (as a percentage)
// - It makes it easier to understand:
// - the spread of a given combination
// - its relation to moe and distribution-related stats such as percentiles
// (that are also not percentages)
// - On the flipside, it makes it harder to compare combinations (since they
// most likely have different medians)
export const getStdev = function (array, { minIndex, maxIndex, median }) {
// most likely have different means)
export const getStdev = function (array, { minIndex, maxIndex, mean }) {
const length = maxIndex - minIndex + 1
const variance =
getSumDeviation({ array, minIndex, maxIndex, median }) / (length - 1)
getSumDeviation({ array, minIndex, maxIndex, mean }) / (length - 1)
return Math.sqrt(variance)
}

// We use a separate function from `getSum()` because it is much more performant
const getSumDeviation = function ({ array, minIndex, maxIndex, median }) {
const getSumDeviation = function ({ array, minIndex, maxIndex, mean }) {
// eslint-disable-next-line fp/no-let
let sum = 0

// eslint-disable-next-line fp/no-loops, fp/no-let, fp/no-mutation
for (let index = minIndex; index <= maxIndex; index += 1) {
// eslint-disable-next-line fp/no-mutation
sum += (array[index] - median) ** 2
sum += (array[index] - mean) ** 2
}

return sum
}

// Retrieve stdev relative to the median.
// Retrieve stdev relative to the mean.
// This is more useful than stdev when comparing different combinations, or when
// targetting a specific precision threshold.
export const getRstdev = function (stdev, median) {
return stdev / median
export const getRstdev = function (stdev, mean) {
return stdev / mean
}

0 comments on commit 5f21fb8

Please sign in to comment.