Skip to content

Commit

Permalink
cloudv2: Trend conversion as HDR Histogram
Browse files Browse the repository at this point in the history
  • Loading branch information
codebien committed Apr 24, 2023
1 parent 34d36f7 commit fb719c9
Show file tree
Hide file tree
Showing 5 changed files with 515 additions and 11 deletions.
226 changes: 226 additions & 0 deletions output/cloud/expv2/hdr.go
@@ -0,0 +1,226 @@
package expv2

import (
"math"
"time"

"go.k6.io/k6/output/cloud/expv2/pbcloud"
"google.golang.org/protobuf/types/known/timestamppb"
)

const (
// lowestTrackable represents the minimum value that the histogram tracks.
// Essentially, it excludes negative numbers.
// Most of metrics tracked by histograms are durations
// where we don't expect negative numbers.
//
// In the future, we may expand and include them,
// probably after https://github.com/grafana/k6/issues/763.
lowestTrackable = 0

// highestTrackable represents the maximum
// value that the histogram is able to track with high accuracy (0.1% of error).
// It should be a high enough
// and rationale value for the k6 context; 2^30 = 1_073_741_824
highestTrackable = 1 << 30
)

// histogram represents a distribution
// of metrics samples' values as histogram.
//
// The histogram is the representation of base-2 exponential Histogram with two layers.
// The first layer has primary buckets in the form of a power of two, and a second layer of buckets
// for each primary bucket with an equally distributed amount of buckets inside.
//
// The histogram has a series of (N * 2^m) buckets, where:
// N = a power of 2 that defines the number of primary buckets
// m = a power of 2 that defines the number of the secondary buckets
// The current version is: f(N = 25, m = 7) = 3200.
type histogram struct {
// Buckets stores the counters for each bin of the histogram.
// It does not include the first and the last absolute bucket,
// because they contain exception cases
// and they requires to be tracked in a dedicated way.
//
// It is expected to start and end with a non-zero bucket,
// in this way we can avoid extra allocation for not significant buckets.
// All the zero buckets in between are preserved.
Buckets []uint32

// ExtraLowBucket counts occurrences of observed values smaller
// than the minimum trackable value.
ExtraLowBucket uint32

// ExtraLowBucket counts occurrences of observed values bigger
// than the maximum trackable value.
ExtraHighBucket uint32

// FirstNotZeroBucket represents the index of the first bucket
// with a significant counter in the Buckets slice (a not zero value).
// In this way, all the buckets before can be omitted.
FirstNotZeroBucket uint32

// LastNotZeroBucket represents the index of the last bucket
// with a significant counter in the Buckets slice (a not zero value).
// In this way, all the buckets after can be omitted.
LastNotZeroBucket uint32

// Max is the absolute maximum observed value.
Max float64

// Min is the absolute minimum observed value.
Min float64

// Sum is the sum of all observed values.
Sum float64

// Count is counts the amount of observed values.
Count uint32
}

// newHistogram creates an histogram of the provided values.
func newHistogram(values []float64) histogram {
h := histogram{}
if len(values) < 1 {
return h
}

for i := 0; i < len(values); i++ {
h.addToBucket(values[i])
}

h.trimzeros()
return h
}

// addToBucket increments the counter of the bucket
// releated to the provided value.
// If the value is lower or higher than the trackable limits
// then it is counted into specific buckets.
// All the stats are also updated accordingly.
func (h *histogram) addToBucket(v float64) {
if h.Count == 0 {
h.Max, h.Min = v, v
} else {
if v > h.Max {
h.Max = v
}
if v < h.Min {
h.Min = v
}
}

h.Count++
h.Sum += v

if v > highestTrackable {
h.ExtraHighBucket++
return
}
if v < lowestTrackable {
h.ExtraLowBucket++
return
}

index := resolveBucketIndex(v)
blen := uint32(len(h.Buckets))
if blen == 0 {
h.FirstNotZeroBucket = index
h.LastNotZeroBucket = index
} else {
if index < h.FirstNotZeroBucket {
h.FirstNotZeroBucket = index
}
if index > h.LastNotZeroBucket {
h.LastNotZeroBucket = index
}
}

if index >= blen {
// TODO: evaluate if a pool can improve

// expand with zeros up to the required index
h.Buckets = append(h.Buckets, make([]uint32, index-blen+1)...)
}
h.Buckets[index]++
}

// trimzeros removes all buckets that have a zero value
// from the begin and from the end until
// the first not zero bucket.
func (h *histogram) trimzeros() {
if h.Count < 1 || len(h.Buckets) < 1 {
return
}

// all the counters are set to zero, we can remove all
if h.FirstNotZeroBucket == 0 && h.LastNotZeroBucket == 0 {
h.Buckets = []uint32{}
return
}

h.Buckets = h.Buckets[h.FirstNotZeroBucket : h.LastNotZeroBucket+1]
}

// histogramAsProto converts the histogram into the equivalent Protobuf version.
func histogramAsProto(h histogram, time time.Time) *pbcloud.TrendHdrValue {
hval := &pbcloud.TrendHdrValue{
Time: timestamppb.New(time),
MinResolution: 1.0,
SignificantDigits: 2,
LowerCounterIndex: h.FirstNotZeroBucket,
MinValue: h.Min,
MaxValue: h.Max,
Sum: h.Sum,
Count: h.Count,
Counters: h.Buckets,
}
if h.ExtraLowBucket > 0 {
hval.ExtraLowValuesCounter = &h.ExtraLowBucket
}
if h.ExtraHighBucket > 0 {
hval.ExtraHighValuesCounter = &h.ExtraHighBucket
}
return hval
}

// resolveBucketIndex returns the index
// of the bucket in the histogram for the provided value.
func resolveBucketIndex(val float64) uint32 {
// the lowest trackable value is zero
// negative number are not expected
if val < 0 {
return 0
}

upscaled := int32(math.Ceil(val))
bucketIdx := upscaled

// k is a power of 2 closest to 10^precision_points
// At the moment the precision_points is a fixed value set to 2.
//
// i.e 2^7 = 128 ~ 100 = 10^2
// 2^10 = 1024 ~ 1000 = 10^3
// f(x) = 3*x + 1 - empiric formula that works for us
// since f(2)=7 and f(3)=10
const k = 7

// 256 = 1 << (k+1)
if upscaled >= 256 {
//
// Here we use some math to get simple formula
// derivation:
// let n = msb(u) - most significant digit position
// i.e. n = floor(log(u, 2))
// major_bucket_index = n - k + 1
// sub_bucket_index = u>>(n - k) - (1<<k)
// bucket = major_bucket_index << k + sub_bucket_index =
// = (n-k+1)<<k + u>>(n-k) - (1<<k) =
// = (n-k)<<k + u>>(n-k)
//
nkdiff := int32(math.Floor(math.Log2(float64(upscaled >> k))))
bucketIdx = (nkdiff << k) + (upscaled >> nkdiff)
}

return uint32(bucketIdx)
}

0 comments on commit fb719c9

Please sign in to comment.