Skip to content

Commit

Permalink
Addressed more revision comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Marc D'Mello committed May 22, 2022
1 parent bb36b79 commit dc4b1aa
Show file tree
Hide file tree
Showing 7 changed files with 281 additions and 79 deletions.
Expand Up @@ -14,18 +14,18 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.facet.hyperrectangle;
package org.apache.lucene.document;

import java.util.Arrays;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.util.NumericUtils;

/**
* Takes an array of doubles and converts them to sortable longs, then stores as a {@link
* BinaryDocValuesField}
*
* @lucene.experimental
*/
public class DoublePointFacetField extends BinaryDocValuesField {
public class DoublePointDocValuesField extends BinaryDocValuesField {

/**
* Creates a new DoublePointFacetField, indexing the provided N-dimensional long point.
Expand All @@ -34,11 +34,14 @@ public class DoublePointFacetField extends BinaryDocValuesField {
* @param point double[] value
* @throws IllegalArgumentException if the field name or value is null.
*/
public DoublePointFacetField(String name, double... point) {
public DoublePointDocValuesField(String name, double... point) {
super(name, LongPoint.pack(convertToSortableLongPoint(point)));
}

private static long[] convertToSortableLongPoint(double[] point) {
if (point == null || point.length == 0) {
throw new IllegalArgumentException("Point value cannot be null or empty");
}
return Arrays.stream(point).mapToLong(NumericUtils::doubleToSortableLong).toArray();
}
}
Expand Up @@ -14,13 +14,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.facet.hyperrectangle;
package org.apache.lucene.document;

import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.LongPoint;

/** Packs an array of longs into a {@link BinaryDocValuesField} */
public class LongPointFacetField extends BinaryDocValuesField {
/**
* Packs an array of longs into a {@link BinaryDocValuesField}
*
* @lucene.experimental
*/
public class LongPointDocValuesField extends BinaryDocValuesField {

/**
* Creates a new LongPointFacetField, indexing the provided N-dimensional long point.
Expand All @@ -29,7 +30,14 @@ public class LongPointFacetField extends BinaryDocValuesField {
* @param point long[] value
* @throws IllegalArgumentException if the field name or value is null.
*/
public LongPointFacetField(String name, long... point) {
super(name, LongPoint.pack(point));
public LongPointDocValuesField(String name, long... point) {
super(name, LongPoint.pack(validatePoint(point)));
}

private static long[] validatePoint(long[] point) {
if (point == null || point.length == 0) {
throw new IllegalArgumentException("Point value cannot be null or empty");
}
return point;
}
}
Expand Up @@ -19,7 +19,11 @@
import java.util.Arrays;
import org.apache.lucene.util.NumericUtils;

/** Stores a hyper rectangle as an array of DoubleRangePairs */
/**
* Stores a hyper rectangle as an array of DoubleRangePairs
*
* @lucene.experimental
*/
public class DoubleHyperRectangle extends HyperRectangle {

/** Creates DoubleHyperRectangle */
Expand Down Expand Up @@ -79,7 +83,7 @@ public DoubleRangePair(double minIn, boolean minInclusive, double maxIn, boolean
*
* @return A LongRangePair equivalent of this object
*/
public LongRangePair toLongRangePair() {
private LongRangePair toLongRangePair() {
long longMin = NumericUtils.doubleToSortableLong(min);
long longMax = NumericUtils.doubleToSortableLong(max);
return new LongRangePair(longMin, true, longMax, true);
Expand Down
Expand Up @@ -16,16 +16,27 @@
*/
package org.apache.lucene.facet.hyperrectangle;

/** Holds the name and the number of dims for a HyperRectangle */
import java.util.Arrays;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.util.ArrayUtil;

/**
* Holds the label, the number of dims, and the point pairs for a HyperRectangle
*
* @lucene.experimental
*/
public abstract class HyperRectangle {
/** Label that identifies this range. */
public final String label;

/** How many dimensions this hyper rectangle has (IE: a regular rectangle would have dims=2) */
public final int dims;

/** All subclasses should store pairs as comparable longs */
protected final LongRangePair[] pairs;
private final ArrayUtil.ByteArrayComparator byteComparator =
ArrayUtil.getUnsignedComparator(Long.BYTES);

private final byte[] lowerPoints;
private final byte[] upperPoints;

/** Sole constructor. */
protected HyperRectangle(String label, LongRangePair... pairs) {
Expand All @@ -37,17 +48,41 @@ protected HyperRectangle(String label, LongRangePair... pairs) {
}
this.label = label;
this.dims = pairs.length;
this.pairs = pairs;

this.lowerPoints =
LongPoint.pack(Arrays.stream(pairs).mapToLong(pair -> pair.min).toArray()).bytes;
this.upperPoints =
LongPoint.pack(Arrays.stream(pairs).mapToLong(pair -> pair.max).toArray()).bytes;
}

/**
* Returns comparable long range for a provided dim
* Checked a long packed value against this HyperRectangle. If you indexed a field with {@link
* org.apache.lucene.document.LongPointDocValuesField} or {@link
* org.apache.lucene.document.DoublePointDocValuesField}, those field values will be able to be
* passed directly into this method.
*
* @param dim dimension of the request range
* @return The comparable long version of the requested range
* @param packedValue a byte array representing a long value
* @return whether the packed long point intersects with this HyperRectangle
*/
public LongRangePair getComparableDimRange(int dim) {
return pairs[dim];
public final boolean matches(byte[] packedValue) {
assert packedValue.length / Long.BYTES == dims
: "Point dimension (dim="
+ packedValue.length / Long.BYTES
+ ") is incompatible with hyper rectangle dimension (dim="
+ dims
+ ")";
for (int dim = 0; dim < dims; dim++) {
int offset = dim * Long.BYTES;
if (byteComparator.compare(packedValue, offset, lowerPoints, offset) < 0) {
// Doc's value is too low, in this dimension
return false;
}
if (byteComparator.compare(packedValue, offset, upperPoints, offset) > 0) {
// Doc's value is too low, in this dimension
return false;
}
}
return true;
}

/** Defines a single range in a HyperRectangle */
Expand Down Expand Up @@ -92,10 +127,5 @@ public LongRangePair(long minIn, boolean minInclusive, long maxIn, boolean maxIn
this.min = minIn;
this.max = maxIn;
}

/** True if this range accepts the provided value. */
public boolean accept(long value) {
return value >= min && value <= max;
}
}
}
Expand Up @@ -20,31 +20,38 @@
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.LabelAndValue;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.search.ConjunctionUtils;
import org.apache.lucene.search.DocIdSetIterator;

/** Get counts given a list of HyperRectangles (which must be of the same type) */
/**
* Get counts given a list of HyperRectangles
*
* @lucene.experimental
*/
public class HyperRectangleFacetCounts extends Facets {
/** Hypper rectangles passed to constructor. */
protected final HyperRectangle[] hyperRectangles;
/** Hyper rectangles passed to constructor. */
private final HyperRectangle[] hyperRectangles;

/** Counts, initialized in subclass. */
protected final int[] counts;
/**
* Holds the number of matching documents (contains intersecting point in field) for each {@link
* HyperRectangle}
*/
private final int[] counts;

/** Our field name. */
protected final String field;
private final String field;

/** Number of dimensions for field */
protected final int dims;
private final int dims;

/** Total number of hits. */
protected int totCount;
private int totCount;

/**
* Create HyperRectangleFacetCounts using this
Expand All @@ -56,9 +63,12 @@ public class HyperRectangleFacetCounts extends Facets {
*/
public HyperRectangleFacetCounts(
String field, FacetsCollector hits, HyperRectangle... hyperRectangles) throws IOException {
assert hyperRectangles.length > 0 : "Hyper rectangle ranges cannot be empty";
assert areHyperRectangleDimsConsistent(hyperRectangles)
: "All hyper rectangles must be the same dimensionality";
if (hyperRectangles == null || hyperRectangles.length == 0) {
throw new IllegalArgumentException("Hyper rectangle ranges cannot be empty");
}
if (areHyperRectangleDimsConsistent(hyperRectangles) == false) {
throw new IllegalArgumentException("All hyper rectangles must be the same dimensionality");
}
this.field = field;
this.hyperRectangles = hyperRectangles;
this.dims = hyperRectangles[0].dims;
Expand All @@ -75,50 +85,34 @@ private boolean areHyperRectangleDimsConsistent(HyperRectangle[] hyperRectangles
private void count(String field, List<FacetsCollector.MatchingDocs> matchingDocs)
throws IOException {

for (int i = 0; i < matchingDocs.size(); i++) {

FacetsCollector.MatchingDocs hits = matchingDocs.get(i);
for (FacetsCollector.MatchingDocs hits : matchingDocs) {

BinaryDocValues binaryDocValues = DocValues.getBinary(hits.context.reader(), field);

final DocIdSetIterator it = hits.bits.iterator();
final DocIdSetIterator it =
ConjunctionUtils.intersectIterators(Arrays.asList(hits.bits.iterator(), binaryDocValues));
if (it == null) {
continue;
}

for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
if (binaryDocValues.advanceExact(doc)) {
long[] point = LongPoint.unpack(binaryDocValues.binaryValue());
assert point.length == dims
: "Point dimension (dim="
+ point.length
+ ") is incompatible with hyper rectangle dimension (dim="
+ dims
+ ")";
// linear scan, change this to use R trees
boolean docIsValid = false;
for (int j = 0; j < hyperRectangles.length; j++) {
boolean validPoint = true;
for (int dim = 0; dim < dims; dim++) {
HyperRectangle.LongRangePair range = hyperRectangles[j].getComparableDimRange(dim);
if (!range.accept(point[dim])) {
validPoint = false;
break;
}
}
if (validPoint) {
counts[j]++;
docIsValid = true;
}
}
if (docIsValid) {
totCount++;
boolean shouldCountDoc = false;
// linear scan, change this to use R trees
for (int j = 0; j < hyperRectangles.length; j++) {
if (hyperRectangles[j].matches(binaryDocValues.binaryValue().bytes)) {
counts[j]++;
shouldCountDoc = true;
}
}
if (shouldCountDoc) {
totCount++;
}
}
}
}

// TODO: This does not really provide "top children" functionality yet but provides "all
// children". This is being worked on in LUCENE-10550
@Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
validateTopN(topN);
Expand Down
Expand Up @@ -16,7 +16,11 @@
*/
package org.apache.lucene.facet.hyperrectangle;

/** Stores a hyper rectangle as an array of LongRangePairs */
/**
* Stores a hyper rectangle as an array of LongRangePairs
*
* @lucene.experimental
*/
public class LongHyperRectangle extends HyperRectangle {

/** Created LongHyperRectangle */
Expand Down

0 comments on commit dc4b1aa

Please sign in to comment.