From 9554c6e7e9116351fa330623ac28e3242dacb99c Mon Sep 17 00:00:00 2001 From: Nedyalko Andreev Date: Fri, 1 Nov 2019 10:33:17 +0200 Subject: [PATCH] Document the aggregation code some more --- stats/cloud/config.go | 8 ++++++-- stats/cloud/data.go | 16 +++++++++------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/stats/cloud/config.go b/stats/cloud/config.go index bcdeb5cbfe2..61259805f6a 100644 --- a/stats/cloud/config.go +++ b/stats/cloud/config.go @@ -161,8 +161,12 @@ func NewConfig() Config { AggregationMinSamples: null.NewInt(25, false), AggregationOutlierAlgoThreshold: null.NewInt(75, false), AggregationOutlierIqrRadius: null.NewFloat(0.25, false), - AggregationOutlierIqrCoefLower: null.NewFloat(1.5, false), - AggregationOutlierIqrCoefUpper: null.NewFloat(1.3, false), + + // Since we're measuring durations, the upper coefficient is slightly + // lower, since outliers from that side are more interesting than ones + // close to zero. + AggregationOutlierIqrCoefLower: null.NewFloat(1.5, false), + AggregationOutlierIqrCoefUpper: null.NewFloat(1.3, false), } } diff --git a/stats/cloud/data.go b/stats/cloud/data.go index d104da31be1..2e5dcc1264f 100644 --- a/stats/cloud/data.go +++ b/stats/cloud/data.go @@ -213,7 +213,7 @@ func (am *AggregatedMetric) Add(t time.Duration) { am.sumD += t } -// Calc populates the float fields for min and max and calulates the average value +// Calc populates the float fields for min and max and calculates the average value func (am *AggregatedMetric) Calc(count float64) { am.Min = stats.D(am.minD) am.Max = stats.D(am.maxD) @@ -254,12 +254,14 @@ func (d durations) SortGetNormalBounds(radius, iqrLowerCoef, iqrUpperCoef float6 return floor + time.Duration(float64(ceil-floor)*posDiff) } - radius = math.Min(0.5, radius) - q1 := getValue(0.5 - radius) - q3 := getValue(0.5 + radius) - iqr := float64(q3 - q1) - min = q1 - time.Duration(iqrLowerCoef*iqr) - max = q3 + time.Duration(iqrUpperCoef*iqr) + // See https://en.wikipedia.org/wiki/Quartile#Outliers for details + radius = math.Min(0.5, radius) // guard against a radius greater than 50%, see AggregationOutlierIqrRadius + q1 := getValue(0.5 - radius) // get Q1, the (interpolated) value at a `radius` distance before the median + q3 := getValue(0.5 + radius) // get Q3, the (interpolated) value at a `radius` distance after the median + iqr := float64(q3 - q1) // calculate the interquartile range (IQR) + + min = q1 - time.Duration(iqrLowerCoef*iqr) // lower fence, anything below this is an outlier + max = q3 + time.Duration(iqrUpperCoef*iqr) // upper fence, anything above this is an outlier return }