Skip to content
Newer
Older
100644 270 lines (241 sloc) 8.67 KB
27494a2 @jdelong Pull from FB rev 63ce89e2f2301e6bba44a111cc7d4218022156f6
jdelong authored Jun 2, 2012
1 /*
2 * Copyright 2012 Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef FOLLY_HISTOGRAM_INL_H_
18 #define FOLLY_HISTOGRAM_INL_H_
19
20 #include "folly/Conv.h"
21
22 #include <glog/logging.h>
23
24 namespace folly {
25
26 namespace detail {
27
28 template <typename T, typename BucketT>
29 HistogramBuckets<T, BucketT>::HistogramBuckets(ValueType bucketSize,
30 ValueType min,
31 ValueType max,
32 const BucketType& defaultBucket)
33 : bucketSize_(bucketSize),
34 min_(min),
35 max_(max) {
36 CHECK_GT(bucketSize_, ValueType(0));
37 CHECK_LT(min_, max_);
38
39 unsigned int numBuckets = (max - min) / bucketSize;
40 // Round up if the bucket size does not fit evenly
41 if (numBuckets * bucketSize < max - min) {
42 ++numBuckets;
43 }
44 // Add 2 for the extra 'below min' and 'above max' buckets
45 numBuckets += 2;
46 buckets_.assign(numBuckets, defaultBucket);
47 }
48
49 template <typename T, typename BucketType>
50 unsigned int HistogramBuckets<T, BucketType>::getBucketIdx(
51 ValueType value) const {
52 if (value < min_) {
53 return 0;
54 } else if (value >= max_) {
55 return buckets_.size() - 1;
56 } else {
57 // the 1 is the below_min bucket
58 return ((value - min_) / bucketSize_) + 1;
59 }
60 }
61
62 template <typename T, typename BucketType>
63 template <typename CountFn>
64 unsigned int HistogramBuckets<T, BucketType>::getPercentileBucketIdx(
65 double pct,
66 CountFn countFromBucket,
67 double* lowPct, double* highPct) const {
68 CHECK_GE(pct, 0.0);
69 CHECK_LE(pct, 1.0);
70
71 unsigned int numBuckets = buckets_.size();
72
73 // Compute the counts in each bucket
74 std::vector<uint64_t> counts(numBuckets);
75 uint64_t totalCount = 0;
76 for (unsigned int n = 0; n < numBuckets; ++n) {
77 uint64_t bucketCount =
78 countFromBucket(const_cast<const BucketType&>(buckets_[n]));
79 counts[n] = bucketCount;
80 totalCount += bucketCount;
81 }
82
83 // If there are no elements, just return the lowest bucket.
84 // Note that we return bucket 1, which is the first bucket in the
85 // histogram range; bucket 0 is for all values below min_.
86 if (totalCount == 0) {
87 // Set lowPct and highPct both to 0.
88 // getPercentileEstimate() will recognize this to mean that the histogram
89 // is empty.
90 if (lowPct) {
91 *lowPct = 0.0;
92 }
93 if (highPct) {
94 *highPct = 0.0;
95 }
96 return 1;
97 }
98
99 // Loop through all the buckets, keeping track of each bucket's
100 // percentile range: [0,10], [10,17], [17,45], etc. When we find a range
101 // that includes our desired percentile, we return that bucket index.
102 double prevPct = 0.0;
103 double curPct = 0.0;
104 uint64_t curCount = 0;
105 unsigned int idx;
106 for (idx = 0; idx < numBuckets; ++idx) {
107 if (counts[idx] == 0) {
108 // skip empty buckets
109 continue;
110 }
111
112 prevPct = curPct;
113 curCount += counts[idx];
114 curPct = static_cast<double>(curCount) / totalCount;
115 if (pct <= curPct) {
116 // This is the desired bucket
117 break;
118 }
119 }
120
121 if (lowPct) {
122 *lowPct = prevPct;
123 }
124 if (highPct) {
125 *highPct = curPct;
126 }
127 return idx;
128 }
129
130 template <typename T, typename BucketType>
131 template <typename CountFn, typename AvgFn>
132 T HistogramBuckets<T, BucketType>::getPercentileEstimate(
133 double pct,
134 CountFn countFromBucket,
135 AvgFn avgFromBucket) const {
136
137 // Find the bucket where this percentile falls
138 double lowPct;
139 double highPct;
140 unsigned int bucketIdx = getPercentileBucketIdx(pct, countFromBucket,
141 &lowPct, &highPct);
142 if (lowPct == 0.0 && highPct == 0.0) {
143 // Invalid range -- the buckets must all be empty
144 // Return the default value for ValueType.
145 return ValueType();
146 }
147 if (lowPct == highPct) {
148 // Unlikely to have exact equality,
149 // but just return the bucket average in this case.
150 // We handle this here to avoid division by 0 below.
151 return avgFromBucket(buckets_[bucketIdx]);
152 }
153
154 CHECK_GE(pct, lowPct);
155 CHECK_LE(pct, highPct);
156 CHECK_LT(lowPct, highPct);
157
158 // Compute information about this bucket
159 ValueType avg = avgFromBucket(buckets_[bucketIdx]);
160 ValueType low;
161 ValueType high;
162 if (bucketIdx == 0) {
163 if (avg > min_) {
164 // This normally shouldn't happen. This bucket is only supposed to track
165 // values less than min_. Most likely this means that integer overflow
166 // occurred, and the code in avgFromBucket() returned a huge value
167 // instead of a small one. Just return the minimum possible value for
168 // now.
169 //
170 // (Note that if the counter keeps being decremented, eventually it will
171 // wrap and become small enough that we won't detect this any more, and
172 // we will return bogus information.)
173 LOG(ERROR) << "invalid average value in histogram minimum bucket: " <<
174 avg << " > " << min_ << ": possible integer overflow?";
175 return getBucketMin(bucketIdx);
176 }
177 // For the below-min bucket, just assume the lowest value ever seen is
178 // twice as far away from min_ as avg.
179 high = min_;
180 low = high - (2 * (high - avg));
181 // Adjust low in case it wrapped
182 if (low > avg) {
183 low = std::numeric_limits<ValueType>::min();
184 }
185 } else if (bucketIdx == buckets_.size() - 1) {
186 if (avg < max_) {
187 // Most likely this means integer overflow occurred. See the comments
188 // above in the minimum case.
189 LOG(ERROR) << "invalid average value in histogram maximum bucket: " <<
190 avg << " < " << max_ << ": possible integer overflow?";
191 return getBucketMax(bucketIdx);
192 }
193 // Similarly for the above-max bucket, assume the highest value ever seen
194 // is twice as far away from max_ as avg.
195 low = max_;
196 high = low + (2 * (avg - low));
197 // Adjust high in case it wrapped
198 if (high < avg) {
199 high = std::numeric_limits<ValueType>::max();
200 }
201 } else {
202 low = getBucketMin(bucketIdx);
203 high = getBucketMax(bucketIdx);
204 if (avg < low || avg > high) {
205 // Most likely this means an integer overflow occurred.
206 // See the comments above. Return the midpoint between low and high
207 // as a best guess, since avg is meaningless.
208 LOG(ERROR) << "invalid average value in histogram bucket: " <<
209 avg << " not in range [" << low << ", " << high <<
210 "]: possible integer overflow?";
211 return (low + high) / 2;
212 }
213 }
214
215 // Since we know the average value in this bucket, we can do slightly better
216 // than just assuming the data points in this bucket are uniformly
217 // distributed between low and high.
218 //
219 // Assume that the median value in this bucket is the same as the average
220 // value.
221 double medianPct = (lowPct + highPct) / 2.0;
222 if (pct < medianPct) {
223 // Assume that the data points lower than the median of this bucket
224 // are uniformly distributed between low and avg
225 double pctThroughSection = (pct - lowPct) / (medianPct - lowPct);
226 return low + ((avg - low) * pctThroughSection);
227 } else {
228 // Assume that the data points greater than the median of this bucket
229 // are uniformly distributed between avg and high
230 double pctThroughSection = (pct - medianPct) / (highPct - medianPct);
231 return avg + ((high - avg) * pctThroughSection);
232 }
233 }
234
235 } // detail
236
237
238 template <typename T>
239 std::string Histogram<T>::debugString() const {
240 std::string ret = folly::to<std::string>(
241 "num buckets: ", buckets_.getNumBuckets(),
242 ", bucketSize: ", buckets_.getBucketSize(),
243 ", min: ", buckets_.getMin(), ", max: ", buckets_.getMax(), "\n");
244
ad8e104 @philippv Histogram function to write tab-separated values.
philippv authored Sep 1, 2012
245 for (unsigned int i = 0; i < buckets_.getNumBuckets(); ++i) {
246 folly::toAppend(" ", buckets_.getBucketMin(i), ": ",
247 buckets_.getByIndex(i).count, "\n",
27494a2 @jdelong Pull from FB rev 63ce89e2f2301e6bba44a111cc7d4218022156f6
jdelong authored Jun 2, 2012
248 &ret);
249 }
250
251 return ret;
252 }
253
ad8e104 @philippv Histogram function to write tab-separated values.
philippv authored Sep 1, 2012
254 template <typename T>
255 void Histogram<T>::toTSV(std::ostream& out, bool skipEmptyBuckets) const {
256 for (unsigned int i = 0; i < buckets_.getNumBuckets(); ++i) {
257 // Do not output empty buckets in order to reduce data file size.
258 if (skipEmptyBuckets && getBucketByIndex(i).count == 0) {
259 continue;
260 }
261 const auto& bucket = getBucketByIndex(i);
262 out << getBucketMin(i) << '\t' << getBucketMax(i) << '\t'
263 << bucket.count << '\t' << bucket.sum << '\n';
264 }
265 }
266
27494a2 @jdelong Pull from FB rev 63ce89e2f2301e6bba44a111cc7d4218022156f6
jdelong authored Jun 2, 2012
267 } // folly
268
269 #endif // FOLLY_HISTOGRAM_INL_H_
Something went wrong with that request. Please try again.