/
hdr.go
215 lines (186 loc) · 6.4 KB
/
hdr.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
package expv2
import (
"math"
"math/bits"
"sort"
"github.com/gesemaya/k6/output/cloud/expv2/pbcloud"
)
const (
// lowestTrackable represents the minimum value that the histogram tracks.
// Essentially, it excludes negative numbers.
// Most of metrics tracked by histograms are durations
// where we don't expect negative numbers.
//
// In the future, we may expand and include them,
// probably after https://github.com/grafana/k6/issues/763.
lowestTrackable = 0
// highestTrackable represents the maximum
// value that the histogram is able to track with high accuracy (0.1% of error).
// It should be a high enough
// and rationale value for the k6 context; 2^30 = 1_073_741_824
highestTrackable = 1 << 30
)
// histogram represents a distribution
// of metrics samples' values as histogram.
//
// The histogram is the representation of base-2 exponential Histogram with two layers.
// The first layer has primary buckets in the form of a power of two, and a second layer of buckets
// for each primary bucket with an equally distributed amount of buckets inside.
//
// The histogram has a series of (N * 2^m) buckets, where:
// N = a power of 2 that defines the number of primary buckets
// m = a power of 2 that defines the number of the secondary buckets
// The current version is: f(N = 25, m = 7) = 3200.
type histogram struct {
// Buckets stores the counters for each bin of the histogram.
// It does not include counters for the untrackable values,
// because they contain exception cases and require to be tracked in a dedicated way.
Buckets map[uint32]uint32
// ExtraLowBucket counts occurrences of observed values smaller
// than the minimum trackable value.
ExtraLowBucket uint32
// ExtraLowBucket counts occurrences of observed values bigger
// than the maximum trackable value.
ExtraHighBucket uint32
// Max is the absolute maximum observed value.
Max float64
// Min is the absolute minimum observed value.
Min float64
// Sum is the sum of all observed values.
Sum float64
// Count is counts the amount of observed values.
Count uint32
}
func newHistogram() *histogram {
return &histogram{
Buckets: make(map[uint32]uint32),
Max: -math.MaxFloat64,
Min: math.MaxFloat64,
}
}
// addToBucket increments the counter of the bucket of the provided value.
// If the value is lower or higher than the trackable limits
// then it is counted into specific buckets. All the stats are also updated accordingly.
func (h *histogram) addToBucket(v float64) {
if v > h.Max {
h.Max = v
}
if v < h.Min {
h.Min = v
}
h.Count++
h.Sum += v
if v > highestTrackable {
h.ExtraHighBucket++
return
}
if v < lowestTrackable {
h.ExtraLowBucket++
return
}
h.Buckets[resolveBucketIndex(v)]++
}
// histogramAsProto converts the histogram into the equivalent Protobuf version.
func histogramAsProto(h *histogram, time int64) *pbcloud.TrendHdrValue {
var (
indexes []uint32
counters []uint32
spans []*pbcloud.BucketSpan
)
// allocate only if at least one item is available, in the case of only
// untrackable values, then Indexes and Buckets are expected to be empty.
if len(h.Buckets) > 0 {
indexes = make([]uint32, 0, len(h.Buckets))
for index := range h.Buckets {
indexes = append(indexes, index)
}
sort.Slice(indexes, func(i, j int) bool {
return indexes[i] < indexes[j]
})
// init the counters
counters = make([]uint32, 1, len(h.Buckets))
counters[0] = h.Buckets[indexes[0]]
// open the first span
spans = append(spans, &pbcloud.BucketSpan{Offset: indexes[0], Length: 1})
}
for i := 1; i < len(indexes); i++ {
counters = append(counters, h.Buckets[indexes[i]])
// if the current and the previous indexes are not consecutive
// consider as closed the current on-going span and start a new one.
if zerosBetween := indexes[i] - indexes[i-1] - 1; zerosBetween > 0 {
spans = append(spans, &pbcloud.BucketSpan{Offset: zerosBetween, Length: 1})
continue
}
spans[len(spans)-1].Length++
}
hval := &pbcloud.TrendHdrValue{
Time: timestampAsProto(time),
MinValue: h.Min,
MaxValue: h.Max,
Sum: h.Sum,
Count: h.Count,
Counters: counters,
Spans: spans,
}
if h.ExtraLowBucket > 0 {
hval.ExtraLowValuesCounter = &h.ExtraLowBucket
}
if h.ExtraHighBucket > 0 {
hval.ExtraHighValuesCounter = &h.ExtraHighBucket
}
return hval
}
// resolveBucketIndex returns the index
// of the bucket in the histogram for the provided value.
func resolveBucketIndex(val float64) uint32 {
if val < lowestTrackable {
return 0
}
// We upscale to the next integer to ensure that each sample falls
// within a specific bucket, even when the value is fractional.
// This avoids under-representing the distribution in the histogram.
upscaled := uint32(math.Ceil(val))
// In histograms, bucket boundaries are usually defined as multiples of powers of 2,
// allowing for efficient computation of bucket indexes.
//
// We define k=7 in our case, because it allows for sufficient granularity in the
// distribution (2^7=128 primary buckets of which each can be further
// subdivided if needed).
//
// k is the constant balancing factor between granularity and
// computational efficiency.
//
// In our case:
// i.e 2^7 = 128 ~ 100 = 10^2
// 2^10 = 1024 ~ 1000 = 10^3
// f(x) = 3*x + 1 - empiric formula that works for us
// since f(2)=7 and f(3)=10
const k = uint32(7)
// 256 = 1 << (k+1)
if upscaled < 256 {
return upscaled
}
// `nkdiff` helps us find the right bucket for `upscaled`. It does so by determining the
// index for the "major" bucket (a set of values within a power of two range) and then
// the "sub" bucket within that major bucket. This system provides us with a fine level
// of granularity within a computationally efficient bucketing system. The result is a
// histogram that provides a detailed representation of the distribution of values.
//
// Here we use some math to get simple formula
// derivation:
// let u = upscaled
// let n = msb(u) - most significant digit position
// i.e. n = floor(log(u, 2))
// major_bucket_index = n - k + 1
// sub_bucket_index = u>>(n - k) - (1<<k)
// bucket = major_bucket_index << k + sub_bucket_index =
// = (n-k+1)<<k + u>>(n-k) - (1<<k) =
// = (n-k)<<k + u>>(n-k)
//
nkdiff := uint32(bits.Len32(upscaled>>k) - 1) // msb index
return (nkdiff << k) + (upscaled >> nkdiff)
}
// Add implements the metricValue interface.
func (h *histogram) Add(v float64) {
h.addToBucket(v)
}