-
Notifications
You must be signed in to change notification settings - Fork 208
/
KllDirectFloatsSketch.java
249 lines (228 loc) · 9.66 KB
/
KllDirectFloatsSketch.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.kll;
import static java.lang.Math.max;
import static java.lang.Math.min;
import org.apache.datasketches.memory.MemoryRequestServer;
import org.apache.datasketches.memory.WritableMemory;
/**
* This class implements an off-heap floats KllSketch via a WritableMemory instance of the sketch.
*
* <p>Please refer to the documentation in the package-info:<br>
* {@link org.apache.datasketches.kll}</p>
*
* @author Lee Rhodes, Kevin Lang
*/
public class KllDirectFloatsSketch extends KllDirectSketch {
/**
*
* @param wmem the current WritableMemory
* @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory
*/
public KllDirectFloatsSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr) {
super(SketchType.FLOATS_SKETCH, wmem, memReqSvr);
}
/**
* Returns an approximation to the Cumulative Distribution Function (CDF), which is the
* cumulative analog of the PMF, of the input stream given a set of splitPoint (values).
*
* <p>The resulting approximations have a probabilistic guarantee that can be obtained from the
* getNormalizedRankError(false) function.
*
* <p>If the sketch is empty this returns null.</p>
*
* @param splitPoints an array of <i>m</i> unique, monotonically increasing float values
* that divide the real number line into <i>m+1</i> consecutive disjoint intervals.
* The definition of an "interval" is inclusive of the left splitPoint (or minimum value) and
* exclusive of the right splitPoint, with the exception that the last interval will include
* the maximum value.
* It is not necessary to include either the min or max values in these split points.
*
* @return an array of m+1 double values, which are a consecutive approximation to the CDF
* of the input stream given the splitPoints. The value at array position j of the returned
* CDF array is the sum of the returned values in positions 0 through j of the returned PMF
* array.
*/
public double[] getCDF(final float[] splitPoints) {
return getFloatsPmfOrCdf(splitPoints, true);
}
/**
* Returns the max value of the stream.
* If the sketch is empty this returns NaN.
*
* @return the max value of the stream
*/
public float getMaxValue() {
return getMaxFloatValue();
}
/**
* Returns the min value of the stream.
* If the sketch is empty this returns NaN.
*
* @return the min value of the stream
*/
public float getMinValue() {
return getMinFloatValue();
}
/**
* Returns an approximation to the Probability Mass Function (PMF) of the input stream
* given a set of splitPoints (values).
*
* <p>The resulting approximations have a probabilistic guarantee that can be obtained from the
* getNormalizedRankError(true) function.
*
* <p>If the sketch is empty this returns null.</p>
*
* @param splitPoints an array of <i>m</i> unique, monotonically increasing float values
* that divide the real number line into <i>m+1</i> consecutive disjoint intervals.
* The definition of an "interval" is inclusive of the left splitPoint (or minimum value) and
* exclusive of the right splitPoint, with the exception that the last interval will include
* the maximum value.
* It is not necessary to include either the min or max values in these split points.
*
* @return an array of m+1 doubles each of which is an approximation
* to the fraction of the input stream values (the mass) that fall into one of those intervals.
* The definition of an "interval" is inclusive of the left splitPoint and exclusive of the right
* splitPoint, with the exception that the last interval will include maximum value.
*/
public double[] getPMF(final float[] splitPoints) {
return getFloatsPmfOrCdf(splitPoints, false);
}
/**
* Returns an approximation to the value of the data item
* that would be preceded by the given fraction of a hypothetical sorted
* version of the input stream so far.
*
* <p>We note that this method has a fairly large overhead (microseconds instead of nanoseconds)
* so it should not be called multiple times to get different quantiles from the same
* sketch. Instead use getQuantiles(), which pays the overhead only once.
*
* <p>If the sketch is empty this returns NaN.
*
* @param fraction the specified fractional position in the hypothetical sorted stream.
* These are also called normalized ranks or fractional ranks.
* If fraction = 0.0, the true minimum value of the stream is returned.
* If fraction = 1.0, the true maximum value of the stream is returned.
*
* @return the approximation to the value at the given fraction
*/
public float getQuantile(final double fraction) {
return getFloatsQuantile(fraction);
}
/**
* Gets the lower bound of the value interval in which the true quantile of the given rank
* exists with a confidence of at least 99%.
* @param fraction the given normalized rank as a fraction
* @return the lower bound of the value interval in which the true quantile of the given rank
* exists with a confidence of at least 99%. Returns NaN if the sketch is empty.
*/
public float getQuantileLowerBound(final double fraction) {
return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getDyMinK(), false)));
}
/**
* This is a more efficient multiple-query version of getQuantile().
*
* <p>This returns an array that could have been generated by using getQuantile() with many
* different fractional ranks, but would be very inefficient.
* This method incurs the internal set-up overhead once and obtains multiple quantile values in
* a single query. It is strongly recommend that this method be used instead of multiple calls
* to getQuantile().
*
* <p>If the sketch is empty this returns null.
*
* @param fractions given array of fractional positions in the hypothetical sorted stream.
* These are also called normalized ranks or fractional ranks.
* These fractions must be in the interval [0.0, 1.0], inclusive.
*
* @return array of approximations to the given fractions in the same order as given fractions
* array.
*/
public float[] getQuantiles(final double[] fractions) {
return getFloatsQuantiles(fractions);
}
/**
* This is also a more efficient multiple-query version of getQuantile() and allows the caller to
* specify the number of evenly spaced fractional ranks.
*
* <p>If the sketch is empty this returns null.
*
* @param numEvenlySpaced an integer that specifies the number of evenly spaced fractional ranks.
* This must be a positive integer greater than 0. A value of 1 will return the min value.
* A value of 2 will return the min and the max value. A value of 3 will return the min,
* the median and the max value, etc.
*
* @return array of approximations to the given fractions in the same order as given fractions
* array.
*/
public float[] getQuantiles(final int numEvenlySpaced) {
if (isEmpty()) { return null; }
return getQuantiles(org.apache.datasketches.Util.evenlySpaced(0.0, 1.0, numEvenlySpaced));
}
/**
* Gets the upper bound of the value interval in which the true quantile of the given rank
* exists with a confidence of at least 99%.
* @param fraction the given normalized rank as a fraction
* @return the upper bound of the value interval in which the true quantile of the given rank
* exists with a confidence of at least 99%. Returns NaN if the sketch is empty.
*/
public float getQuantileUpperBound(final double fraction) {
return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getDyMinK(), false)));
}
/**
* Returns an approximation to the normalized (fractional) rank of the given value from 0 to 1,
* inclusive.
*
* <p>The resulting approximation has a probabilistic guarantee that can be obtained from the
* getNormalizedRankError(false) function.
*
* <p>If the sketch is empty this returns NaN.</p>
*
* @param value to be ranked
* @return an approximate rank of the given value
*/
public double getRank(final float value) {
return getFloatRank(value);
}
/**
* @return the iterator for this class
*/
public KllFloatsSketchIterator iterator() {
return new KllFloatsSketchIterator(getFloatItemsArray(), getLevelsArray(), getNumLevels());
}
/**
* Merges another sketch into this one.
* @param other sketch to merge into this one
*/
public void merge(final KllSketch other) {
if (!other.isDirect()) { kllSketchThrow(32); }
if (!other.isFloatsSketch()) { kllSketchThrow(34); }
mergeFloatImpl(other);
}
@Override
public byte[] toByteArray() {
return toCompactByteArrayImpl();
}
@Override
public String toString(final boolean withLevels, final boolean withData) {
return toStringImpl(withLevels, withData);
}
public void update(final float value) {
updateFloat(value);
}
}