Skip to content

Commit

Permalink
Guarantee sorted order for [Double|Long|Bytes]Values
Browse files Browse the repository at this point in the history
Values returned by [Double|Long|Bytes]Values are sorted today which
is guaranteed by the underlying Lucene index. Several implementations can
make use of this property but the interfaces don't guarantee this behavior.
This commit adds the guarantees and makes use of them in several places.

Note: This change might require sorting for 3rd party implemenations of these
interaces.
  • Loading branch information
s1monw committed Nov 7, 2013
1 parent f5f6259 commit a9239f8
Show file tree
Hide file tree
Showing 14 changed files with 313 additions and 69 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ public BytesRef nextValue() {
scratch.copyChars(Double.toString(values.nextValue()));
return scratch;
}

@Override
public Order getOrder() {
return values.getOrder();
}

};
} else {
final LongValues values = getLongValues();
Expand All @@ -74,6 +80,11 @@ public BytesRef nextValue() {
scratch.copyChars(Long.toString(values.nextValue()));
return scratch;
}

@Override
public Order getOrder() {
return values.getOrder();
}
};
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,36 @@ interface WithOrdinals<Script extends ScriptDocValues> extends AtomicFieldData<S
*/
BytesValues.WithOrdinals getBytesValues(boolean needsHashes);
}

/**
* This enum provides information about the order of the values for
* a given document. For instance {@link BytesValues} by default
* return values in {@link #BYTES} order but if the interface
* wraps a numeric variant the sort order might change to {@link #NUMERIC}.
* In that case the values might not be returned in byte sort order but in numeric
* order instead while maintaining the property of <tt>N < N+1</tt> during the
* value iterations.
*
* @see org.elasticsearch.index.fielddata.BytesValues#getOrder()
* @see org.elasticsearch.index.fielddata.DoubleValues#getOrder()
* @see org.elasticsearch.index.fielddata.LongValues#getOrder()
*/
public enum Order {
/**
* Donates Byte sort order
*/
BYTES,
/**
* Donates Numeric sort order
*/
NUMERIC,
/**
* Donates custom sort order
*/
CUSTOM,
/**
* Donates no sort order
*/
NONE
}
}
16 changes: 15 additions & 1 deletion src/main/java/org/elasticsearch/index/fielddata/BytesValues.java
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,13 @@ public BytesRef copyShared() {
* Returns the next value for the current docID set to {@link #setDocument(int)}.
* This method should only be called <tt>N</tt> times where <tt>N</tt> is the number
* returned from {@link #setDocument(int)}. If called more than <tt>N</tt> times the behavior
* is undefined.
* is undefined. This interface guarantees that the values are returned in order.
* <p>
* If this instance returns ordered values the <tt>Nth</tt> value is strictly less than the <tt>N+1</tt> value with
* respect to the {@link AtomicFieldData.Order} returned from {@link #getOrder()}. If this instance returns
* <i>unordered</i> values {@link #getOrder()} must return {@link AtomicFieldData.Order#NONE}
* Note: the values returned are de-duplicated, only unique values are returned.
* </p>
*
* Note: the returned {@link BytesRef} might be shared across invocations.
*
Expand All @@ -104,6 +110,14 @@ public int currentValueHash() {
return scratch.hashCode();
}

/**
* Returns the order the values are returned from {@link #nextValue()}.
* <p> Note: {@link BytesValues} have {@link AtomicFieldData.Order#BYTES} by default.</p>
*/
public AtomicFieldData.Order getOrder() {
return AtomicFieldData.Order.BYTES;
}

/**
* Ordinal based {@link BytesValues}.
*/
Expand Down
14 changes: 14 additions & 0 deletions src/main/java/org/elasticsearch/index/fielddata/DoubleValues.java
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,25 @@ public final boolean isMultiValued() {
* This method should only be called <tt>N</tt> times where <tt>N</tt> is the number
* returned from {@link #setDocument(int)}. If called more than <tt>N</tt> times the behavior
* is undefined.
* <p>
* If this instance returns ordered values the <tt>Nth</tt> value is strictly less than the <tt>N+1</tt> value with
* respect to the {@link AtomicFieldData.Order} returned from {@link #getOrder()}. If this instance returns
* <i>unordered</i> values {@link #getOrder()} must return {@link AtomicFieldData.Order#NONE}
* Note: the values returned are de-duplicated, only unique values are returned.
* </p>
*
* @return the next value for the current docID set to {@link #setDocument(int)}.
*/
public abstract double nextValue();

/**
* Returns the order the values are returned from {@link #nextValue()}.
* <p> Note: {@link DoubleValues} have {@link AtomicFieldData.Order#NUMERIC} by default.</p>
*/
public AtomicFieldData.Order getOrder() {
return AtomicFieldData.Order.NUMERIC;
}

/**
* Ordinal based {@link DoubleValues}.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,9 @@ public BytesRef nextValue() {
public int currentValueHash() {
return delegate.currentValueHash();
}

@Override
public AtomicFieldData.Order getOrder() {
return delegate.getOrder();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,11 @@ public int setDocument(int docId) {
public double nextValue() {
return delegate.nextValue();
}

@Override
public AtomicFieldData.Order getOrder() {
return delegate.getOrder();
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,9 @@ public long nextValue() {
return delegate.nextValue();
}

@Override
public AtomicFieldData.Order getOrder() {
return delegate.getOrder();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,27 @@ public final boolean isMultiValued() {
* This method should only be called <tt>N</tt> times where <tt>N</tt> is the number
* returned from {@link #setDocument(int)}. If called more than <tt>N</tt> times the behavior
* is undefined.
* <p>
* If this instance returns ordered values the <tt>Nth</tt> value is strictly less than the <tt>N+1</tt> value with
* respect to the {@link AtomicFieldData.Order} returned from {@link #getOrder()}. If this instance returns
* <i>unordered</i> values {@link #getOrder()} must return {@link AtomicFieldData.Order#NONE}
* Note: the values returned are de-duplicated, only unique values are returned.
* </p>
*
* Note: the returned {@link GeoPoint} might be shared across invocations.
*
* @return the next value for the current docID set to {@link #setDocument(int)}.
*/
public abstract GeoPoint nextValue();

/**
* Returns the order the values are returned from {@link #nextValue()}.
* <p> Note: {@link GeoPointValues} have {@link AtomicFieldData.Order#NONE} by default.</p>
*/
public AtomicFieldData.Order getOrder() {
return AtomicFieldData.Order.NONE;
}

/**
* An empty {@link GeoPointValues} implementation
*/
Expand Down
14 changes: 14 additions & 0 deletions src/main/java/org/elasticsearch/index/fielddata/LongValues.java
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,25 @@ public final boolean isMultiValued() {
* This method should only be called <tt>N</tt> times where <tt>N</tt> is the number
* returned from {@link #setDocument(int)}. If called more than <tt>N</tt> times the behavior
* is undefined.
* <p>
* If this instance returns ordered values the <tt>Nth</tt> value is strictly less than the <tt>N+1</tt> value with
* respect to the {@link AtomicFieldData.Order} returned from {@link #getOrder()}. If this instance returns
* <i>unordered</i> values {@link #getOrder()} must return {@link AtomicFieldData.Order#NONE}
* Note: the values returned are de-duplicated, only unique values are returned.
* </p>
*
* @return the next value for the current docID set to {@link #setDocument(int)}.
*/
public abstract long nextValue();

/**
* Returns the order the values are returned from {@link #nextValue()}.
* <p> Note: {@link LongValues} have {@link AtomicFieldData.Order#NUMERIC} by default.</p>
*/
public AtomicFieldData.Order getOrder() {
return AtomicFieldData.Order.NUMERIC;
}

/**
* Ordinal based {@link LongValues}.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,8 @@

import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.elasticsearch.index.fielddata.BytesValues;
import org.elasticsearch.index.fielddata.FilterBytesValues;
import org.elasticsearch.index.fielddata.IndexFieldData;

import java.io.IOException;
Expand Down Expand Up @@ -62,30 +59,26 @@ public int compare(int slot1, int slot2) {

@Override
public int compareBottom(int doc) throws IOException {
int length = docTerms.setDocument(doc); // safes one hasValue lookup
BytesRef val2 = length == 0 ? missingValue : docTerms.nextValue();
BytesRef val2 = sortMode.getRelevantValue(docTerms, doc, missingValue);
return compareValues(bottom, val2);
}

@Override
public void copy(int slot, int doc) throws IOException {
int length = docTerms.setDocument(doc); // safes one hasValue lookup
if (length == 0) {
BytesRef relevantValue = sortMode.getRelevantValue(docTerms, doc, missingValue);
if (relevantValue == missingValue) {
values[slot] = missingValue;
} else {
if (values[slot] == null || values[slot] == missingValue) {
values[slot] = new BytesRef();
}
values[slot].copyBytes(docTerms.nextValue());
values[slot].copyBytes(relevantValue);
}
}

@Override
public FieldComparator<BytesRef> setNextReader(AtomicReaderContext context) throws IOException {
docTerms = indexFieldData.load(context).getBytesValues(false);
if (docTerms.isMultiValued()) {
docTerms = new MultiValuedBytesWrapper(docTerms, sortMode);
}
return this;
}

Expand Down Expand Up @@ -114,53 +107,7 @@ public int compareValues(BytesRef val1, BytesRef val2) {

@Override
public int compareDocToValue(int doc, BytesRef value) {
final int length = docTerms.setDocument(doc); // safes one hasValue lookup
return (length == 0 ? missingValue : docTerms.nextValue()).compareTo(value);
}

private static final class MultiValuedBytesWrapper extends FilterBytesValues {

private final SortMode sortMode;
private int numValues;

public MultiValuedBytesWrapper(BytesValues delegate, SortMode sortMode) {
super(delegate);
this.sortMode = sortMode;
}

public int setDocument(int docId) {
// either 0 or 1
return Math.min(1, (numValues = delegate.setDocument(docId)));
}

public BytesRef nextValue() {
BytesRef currentVal = delegate.nextValue();
// We MUST allocate a new byte[] since relevantVal might have been filled by reference by a PagedBytes instance
// meaning that the BytesRef.bytes are shared and shouldn't be overwritten. We can't use the bytes of the iterator
// either because they will be overwritten by subsequent calls in the current thread
scratch.bytes = new byte[ArrayUtil.oversize(currentVal.length, RamUsageEstimator.NUM_BYTES_BYTE)];
scratch.offset = 0;
scratch.length = currentVal.length;
System.arraycopy(currentVal.bytes, currentVal.offset, scratch.bytes, 0, currentVal.length);
for (int i = 1; i < numValues; i++) {
currentVal = delegate.nextValue();
if (sortMode == SortMode.MAX) {
if (currentVal.compareTo(scratch) > 0) {
scratch.grow(currentVal.length);
scratch.length = currentVal.length;
System.arraycopy(currentVal.bytes, currentVal.offset, scratch.bytes, 0, currentVal.length);
}
} else {
if (currentVal.compareTo(scratch) < 0) {
scratch.grow(currentVal.length);
scratch.length = currentVal.length;
System.arraycopy(currentVal.bytes, currentVal.offset, scratch.bytes, 0, currentVal.length);
}
}
}
return scratch;
}

return sortMode.getRelevantValue(docTerms, doc, missingValue).compareTo(value);
}

@Override
Expand Down
Loading

0 comments on commit a9239f8

Please sign in to comment.