/
GenericInequalitySearch.java
244 lines (229 loc) · 9.62 KB
/
GenericInequalitySearch.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches;
import java.util.Comparator;
import java.util.Objects;
/**
* This provides efficient, unique and unambiguous binary searching for inequality comparison criteria
* for ordered arrays of values that may include duplicate values. The inequality criteria include
* <, ≤, ==, ≥, >. All the inequality criteria use the same search algorithm.
* (Although == is not an inequality, it is included for convenience.)
*
* <p>In order to make the searching unique and unambiguous, we modified the traditional binary
* search algorithm to search for adjacent pairs of values <i>{A, B}</i> in the values array
* instead of just a single value, where <i>a</i> and <i>b</i> are the array indices of two
* adjacent values in the array. For all the search criteria, when the algorithm has narrowed the
* search down to a single value or adjacent pair of values, the <i>resolve()</i> method provides the
* final result of the search. If there is no valid value in the array that satisfies the search
* criterion, the algorithm will return -1 to the caller.</p>
*
* <p>Given a sorted array of values <i>arr[]</i> and a search key value <i>v</i>, the algorithms for
* the searching criteria are given with each enum criterion.</p>
*
* @see <a href="https://datasketches.apache.org/docs/Quantiles/SketchingQuantilesAndRanksTutorial.html">
* Sketching Quantiles and Ranks Tutorial</a>
* @author Lee Rhodes
*/
public class GenericInequalitySearch {
/**
* The enumerator of inequalities
*/
public enum Inequality {
/**
* Given a sorted array of increasing values <i>arr[]</i> and a key value <i>v</i>,
* this criterion instructs the binary search algorithm to find the highest adjacent pair of
* values <i>{A,B}</i> such that <i>A < v ≤ B</i>.<br>
* Let <i>low</i> = index of the lowest value in the range.<br>
* Let <i>high</i> = index of the highest value in the range.
*
* <p>If <i>v</i> > arr[high], return arr[high].<br>
* If <i>v</i> ≤ arr[low], return -1.<br>
* Else return index of A.</p>
*/
LT,
/**
* Given a sorted array of increasing values <i>arr[]</i> and a key value <i>V</i>,
* this criterion instructs the binary search algorithm to find the highest adjacent pair of
* values <i>{A,B}</i> such that <i>A ≤ V < B</i>.<br>
* Let <i>low</i> = index of the lowest value in the range.<br>
* Let <i>high</i> = index of the highest value in the range.
*
* <p>If <i>v</i> ≥ arr[high], return arr[high].<br>
* If <i>v</i> < arr[low], return -1.<br>
* Else return index of A.</p>
*/
LE,
/**
* Given a sorted array of increasing values <i>arr[]</i> and a key value <i>V</i>,
* this criterion instructs the binary search algorithm to find the adjacent pair of
* values <i>{A,B}</i> such that <i>A ≤ V ≤ B</i>.
* The returned value from the binary search algorithm will be the index of <i>A</i> or <i>B</i>,
* if one of them is equal to <i>V</i>, or -1 if V is not equal to either one.
*/
EQ,
/**
* Given a sorted array of increasing values <i>arr[]</i> and a key value <i>V</i>,
* this criterion instructs the binary search algorithm to find the lowest adjacent pair of
* values <i>{A,B}</i> such that <i>A < V ≤ B</i>.<br>
* Let <i>low</i> = index of the lowest value in the range.<br>
* Let <i>high</i> = index of the highest value in the range.
*
* <p>If <i>v</i> ≤ arr[low], return arr[low].<br>
* If <i>v</i> > arr[high], return -1.<br>
* Else return index of B.</p>
*/
GE,
/**
* Given a sorted array of increasing values <i>arr[]</i> and a key value <i>V</i>,
* this criterion instructs the binary search algorithm to find the lowest adjacent pair of
* values <i>{A,B}</i> such that <i>A ≤ V < B</i>.<br>
* Let <i>low</i> = index of the lowest value in the range.<br>
* Let <i>high</i> = index of the highest value in the range.
*
* <p>If <i>v</i> < arr[low], return arr[low].<br>
* If <i>v</i> ≥ arr[high], return -1.<br>
* Else return index of B.</p>
*/
GT
}
/**
* Binary Search for the index of the generic value in the given search range that satisfies
* the given Inequality criterion.
* If -1 is returned there are no values in the search range that satisfy the inequality.
*
* @param arr the given array of comparable values that must be sorted.
* The array must not be null or empty and the values of the array must not be null (or NaN)
* in the range [low, high].
* @param low the lowest index of the lowest value in the search range, inclusive.
* @param high the highest index of the highest value in the search range, inclusive.
* @param v the value to search for. It must not be null (or NaN).
* @param crit one of the Inequality criteria: LT, LE, EQ, GE, GT. It must not be null.
* @param comparator for the type T.
* It must not be null. It must return: -1 if A < B, 0 if A == B, and +1 if A > B.
* @param <T> The generic type of value to be used in the search process.
* @return the index of the value in the given search range that satisfies the Inequality criterion.
*/
public static <T> int find(final T[] arr, final int low, final int high, final T v,
final Inequality crit, final Comparator<T> comparator) {
Objects.requireNonNull(arr, "Input arr must not be null");
Objects.requireNonNull(v,"Input v must not be null");
Objects.requireNonNull(crit, "Input inequality must not be null");
Objects.requireNonNull(comparator,"Input comparator must not be null");
if (arr.length == 0) { throw new SketchesArgumentException("Input array must not be empty."); }
int lo = low;
int hi = high;
while (lo <= hi) {
if (hi - lo <= 1) {
return resolve(arr, lo, hi, v, crit, comparator);
}
final int mid = lo + (hi - lo) / 2;
final int ret = compare(arr, mid, mid + 1, v, crit, comparator);
if (ret == -1 ) { hi = mid; }
else if (ret == 1) { lo = mid + 1; }
else { return getIndex(arr, mid, mid + 1, v, crit, comparator); }
}
return -1; //should never return here
}
private static <T> int compare(final T[] arr, final int a, final int b, final T v,
final Inequality crit, final Comparator<T> comparator) {
int result = 0;
switch (crit) {
case GE:
case LT: {
result = comparator.compare(v, arr[a]) <= 0 ? -1 : comparator.compare(arr[b], v) < 0 ? 1 : 0;
break;
}
case GT:
case LE: {
result = comparator.compare(v, arr[a]) < 0 ? -1 : comparator.compare(arr[b], v) <= 0 ? 1 : 0;
break;
}
case EQ: {
result = comparator.compare(v, arr[a]) < 0 ? -1 : comparator.compare(arr[b], v) < 0 ? 1 : 0;
break;
}
}
return result;
}
private static <T> int getIndex(final T[] arr, final int a, final int b, final T v,
final Inequality crit, final Comparator<T> comparator) {
int result = 0;
switch (crit) {
case LT:
case LE: {
result = a; break;
}
case GE:
case GT: {
result = b; break;
}
case EQ: {
result = comparator.compare(v, arr[a]) == 0 ? a : comparator.compare(v, arr[b]) == 0 ? b : -1;
}
}
return result;
}
private static <T> int resolve(final T[] arr, final int lo, final int hi, final T v,
final Inequality crit, final Comparator<T> comparator) {
int result = 0;
switch (crit) {
case LT: {
result = (lo == hi)
? (comparator.compare(v, arr[lo]) > 0 ? lo : -1)
: (comparator.compare(v, arr[hi]) > 0
? hi
: (comparator.compare(v, arr[lo]) > 0 ? lo : -1));
break;
}
case LE: {
result = (lo == hi)
? (comparator.compare(v, arr[lo]) >= 0 ? lo : -1)
: (comparator.compare(v, arr[hi]) >= 0
? hi
: (comparator.compare(v, arr[lo]) >= 0 ? lo : -1));
break;
}
case EQ: {
result = (lo == hi)
? (comparator.compare(v, arr[lo]) == 0 ? lo : -1)
: (comparator.compare(v, arr[hi]) == 0
? hi
: (comparator.compare(v, arr[lo]) == 0 ? lo : -1));
break;
}
case GE: {
result = (lo == hi)
? (comparator.compare(v, arr[lo]) <= 0 ? lo : -1)
: (comparator.compare(v, arr[lo]) <= 0
? lo
: (comparator.compare(v, arr[hi]) <= 0 ? hi : -1));
break;
}
case GT: {
result = (lo == hi)
? (comparator.compare(v, arr[lo]) < 0 ? lo : -1)
: (comparator.compare(v, arr[lo]) < 0
? lo
: (comparator.compare(v, arr[hi]) < 0 ? hi : -1));
break;
}
}
return result;
}
}