forked from grafana/k6
/
hdr.go
262 lines (227 loc) · 8.05 KB
/
hdr.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
package expv2
import (
"math"
"math/bits"
"go.k6.io/k6/output/cloud/expv2/pbcloud"
)
const (
// lowestTrackable represents the minimum value that the histogram tracks.
// Essentially, it excludes negative numbers.
// Most of metrics tracked by histograms are durations
// where we don't expect negative numbers.
//
// In the future, we may expand and include them,
// probably after https://github.com/grafana/k6/issues/763.
lowestTrackable = 0
// highestTrackable represents the maximum
// value that the histogram is able to track with high accuracy (0.1% of error).
// It should be a high enough
// and rationale value for the k6 context; 2^30 = 1_073_741_824
highestTrackable = 1 << 30
)
// histogram represents a distribution
// of metrics samples' values as histogram.
//
// The histogram is the representation of base-2 exponential Histogram with two layers.
// The first layer has primary buckets in the form of a power of two, and a second layer of buckets
// for each primary bucket with an equally distributed amount of buckets inside.
//
// The histogram has a series of (N * 2^m) buckets, where:
// N = a power of 2 that defines the number of primary buckets
// m = a power of 2 that defines the number of the secondary buckets
// The current version is: f(N = 25, m = 7) = 3200.
type histogram struct {
// Buckets stores the counters for each bin of the histogram.
// It does not include the first and the last absolute bucket,
// because they contain exception cases
// and they requires to be tracked in a dedicated way.
//
// It is expected to start and end with a non-zero bucket,
// in this way we can avoid extra allocation for not significant buckets.
// All the zero buckets in between are preserved.
Buckets []uint32
// ExtraLowBucket counts occurrences of observed values smaller
// than the minimum trackable value.
ExtraLowBucket uint32
// ExtraLowBucket counts occurrences of observed values bigger
// than the maximum trackable value.
ExtraHighBucket uint32
// FirstNotZeroBucket represents the index of the first bucket
// with a significant counter in the Buckets slice (a not zero value).
// In this way, all the buckets before can be omitted.
FirstNotZeroBucket uint32
// LastNotZeroBucket represents the index of the last bucket
// with a significant counter in the Buckets slice (a not zero value).
// In this way, all the buckets after can be omitted.
LastNotZeroBucket uint32
// Max is the absolute maximum observed value.
Max float64
// Min is the absolute minimum observed value.
Min float64
// Sum is the sum of all observed values.
Sum float64
// Count is counts the amount of observed values.
Count uint32
}
// addToBucket increments the counter of the bucket of the provided value.
// If the value is lower or higher than the trackable limits
// then it is counted into specific buckets. All the stats are also updated accordingly.
func (h *histogram) addToBucket(v float64) {
if h.Count == 0 {
h.Max, h.Min = v, v
} else {
if v > h.Max {
h.Max = v
}
if v < h.Min {
h.Min = v
}
}
h.Count++
h.Sum += v
if v > highestTrackable {
h.ExtraHighBucket++
return
}
if v < lowestTrackable {
h.ExtraLowBucket++
return
}
index := resolveBucketIndex(v)
// they grow the current Buckets slice if there isn't enough capacity.
//
// An example with growRight:
// With Buckets [4, 1] and index equals to 5
// then we expect a slice like [4,1,0,0,0,0]
// then the counter at 5th position will be incremented
// generating the final slice [4,1,0,0,0,1]
switch {
case len(h.Buckets) == 0:
h.init(index)
case index < h.FirstNotZeroBucket:
h.prependBuckets(index)
case index > h.LastNotZeroBucket:
h.appendBuckets(index)
default:
h.Buckets[index-h.FirstNotZeroBucket]++
}
}
func (h *histogram) init(index uint32) {
h.FirstNotZeroBucket = index
h.LastNotZeroBucket = index
h.Buckets = make([]uint32, 1, 32)
h.Buckets[0] = 1
}
// prependBuckets expands the buckets slice with zeros up to the required index,
// then it increments the required bucket.
func (h *histogram) prependBuckets(index uint32) {
if h.FirstNotZeroBucket <= index {
panic("buckets is already contains the requested index")
}
newLen := (h.FirstNotZeroBucket - index) + uint32(len(h.Buckets))
// TODO: we may consider to swap by sub-groups
// e.g [4, 1] => [4, 1, 0, 0] => [0, 0, 4, 1]
// It requires a benchmark if it is better than just copy it.
newBuckets := make([]uint32, newLen)
copy(newBuckets[h.FirstNotZeroBucket-index:], h.Buckets)
h.Buckets = newBuckets
// Update the stats
h.Buckets[0] = 1
h.FirstNotZeroBucket = index
}
// appendBuckets expands the buckets slice with zeros buckets till the required index,
// then it increments the required bucket.
// If the slice has enough capacity then it reuses it without allocate.
func (h *histogram) appendBuckets(index uint32) {
if h.LastNotZeroBucket >= index {
panic("buckets is already bigger than requested index")
}
newLen := index - h.FirstNotZeroBucket + 1
if uint32(cap(h.Buckets)) > newLen {
// See https://go.dev/ref/spec#Slice_expressions
// "For slices, the upper index bound is
// the slice capacity cap(a) rather than the length"
h.Buckets = h.Buckets[:newLen]
} else {
newBuckets := make([]uint32, newLen)
copy(newBuckets, h.Buckets)
h.Buckets = newBuckets
}
// Update the stats
h.Buckets[len(h.Buckets)-1] = 1
h.LastNotZeroBucket = index
}
// histogramAsProto converts the histogram into the equivalent Protobuf version.
func histogramAsProto(h *histogram, time int64) *pbcloud.TrendHdrValue {
hval := &pbcloud.TrendHdrValue{
Time: timestampAsProto(time),
MinResolution: 1.0,
SignificantDigits: 2,
LowerCounterIndex: h.FirstNotZeroBucket,
MinValue: h.Min,
MaxValue: h.Max,
Sum: h.Sum,
Count: h.Count,
Counters: h.Buckets,
}
if h.ExtraLowBucket > 0 {
hval.ExtraLowValuesCounter = &h.ExtraLowBucket
}
if h.ExtraHighBucket > 0 {
hval.ExtraHighValuesCounter = &h.ExtraHighBucket
}
return hval
}
// resolveBucketIndex returns the index
// of the bucket in the histogram for the provided value.
func resolveBucketIndex(val float64) uint32 {
if val < lowestTrackable {
return 0
}
// We upscale to the next integer to ensure that each sample falls
// within a specific bucket, even when the value is fractional.
// This avoids under-representing the distribution in the histogram.
upscaled := uint32(math.Ceil(val))
// In histograms, bucket boundaries are usually defined as multiples of powers of 2,
// allowing for efficient computation of bucket indexes.
//
// We define k=7 in our case, because it allows for sufficient granularity in the
// distribution (2^7=128 primary buckets of which each can be further
// subdivided if needed).
//
// k is the constant balancing factor between granularity and
// computational efficiency.
//
// In our case:
// i.e 2^7 = 128 ~ 100 = 10^2
// 2^10 = 1024 ~ 1000 = 10^3
// f(x) = 3*x + 1 - empiric formula that works for us
// since f(2)=7 and f(3)=10
const k = uint32(7)
// 256 = 1 << (k+1)
if upscaled < 256 {
return upscaled
}
// `nkdiff` helps us find the right bucket for `upscaled`. It does so by determining the
// index for the "major" bucket (a set of values within a power of two range) and then
// the "sub" bucket within that major bucket. This system provides us with a fine level
// of granularity within a computationally efficient bucketing system. The result is a
// histogram that provides a detailed representation of the distribution of values.
//
// Here we use some math to get simple formula
// derivation:
// let u = upscaled
// let n = msb(u) - most significant digit position
// i.e. n = floor(log(u, 2))
// major_bucket_index = n - k + 1
// sub_bucket_index = u>>(n - k) - (1<<k)
// bucket = major_bucket_index << k + sub_bucket_index =
// = (n-k+1)<<k + u>>(n-k) - (1<<k) =
// = (n-k)<<k + u>>(n-k)
//
nkdiff := uint32(bits.Len32(upscaled>>k) - 1) // msb index
return (nkdiff << k) + (upscaled >> nkdiff)
}
func (h *histogram) Add(v float64) {
h.addToBucket(v)
}