Skip to content

Commit

Permalink
Fielddata: Switch to Lucene DV APIs.
Browse files Browse the repository at this point in the history
This commits removes BytesValues/LongValues/DoubleValues/... and tries to use
Lucene's APIs such as NumericDocValues or RandomAccessOrds instead whenever
possible.

The next step would be to take advantage of the fact that APIs are the same in
Lucene and Elasticsearch in order to remove our custom comparators and use
Lucene's.

There are a few side-effects to this change:
 - GeoDistanceComparator has been removed, DoubleValuesComparator is used instead
   on top of dynamically computed values (was easier than migrating
   GeoDistanceComparator).
 - SortedNumericDocValues doesn't guarantee uniqueness so long/double terms
   aggregators have been updated to make sure a document cannot fall twice in
   the same bucket.
 - Sorting by maximum value of a field or running a `max` aggregation is
   potentially significantly faster thanks to the random-access API.

Our aggs and p/c aggregations benchmarks don't report differences with this
change on uninverted field data. However the fact that doc values don't need
to be wrapped anymore seems to help a lot. For example
TermsAggregationSearchBenchmark reports ~30% faster terms aggregations on doc
values on string fields with this change, which are now only ~18% slower than
uninverted field data although stored on disk.

Close #6908
  • Loading branch information
jpountz committed Jul 22, 2014
1 parent 0de30e1 commit 3c142e5
Show file tree
Hide file tree
Showing 202 changed files with 4,964 additions and 6,294 deletions.
40 changes: 40 additions & 0 deletions src/main/java/org/elasticsearch/common/geo/GeoDistance.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@

package org.elasticsearch.common.geo;

import org.apache.lucene.util.Bits;
import org.apache.lucene.util.SloppyMath;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.unit.DistanceUnit;
import org.elasticsearch.index.fielddata.*;

import java.util.Locale;

Expand Down Expand Up @@ -368,4 +370,42 @@ public double calculate(double targetLatitude, double targetLongitude) {
return SLOPPY_ARC.calculate(sourceLatitude, sourceLongitude, targetLatitude, targetLongitude, unit);
}
}

/**
* Return a {@link SortedNumericDoubleValues} instance that returns the distance to a given geo-point for each document.
*/
public static SortedNumericDoubleValues distanceValues(final FixedSourceDistance distance, final MultiGeoPointValues geoPointValues) {
final GeoPointValues singleValues = FieldData.unwrapSingleton(geoPointValues);
if (singleValues != null) {
final Bits docsWithField = FieldData.unwrapSingletonBits(geoPointValues);
return FieldData.singleton(new NumericDoubleValues() {

@Override
public double get(int docID) {
if (docsWithField != null && !docsWithField.get(docID)) {
return 0d;
}
final GeoPoint point = singleValues.get(docID);
return distance.calculate(point.lat(), point.lon());
}

}, docsWithField);
} else {
return new SortingNumericDoubleValues() {

@Override
public void setDocument(int doc) {
geoPointValues.setDocument(doc);
count = geoPointValues.count();
grow();
for (int i = 0; i < count; ++i) {
final GeoPoint point = geoPointValues.valueAt(i);
values[i] = distance.calculate(point.lat(), point.lon());
}
sort();
}

};
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.Explanation;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.index.fielddata.DoubleValues;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;

import java.util.Locale;

Expand All @@ -37,7 +37,7 @@ public class FieldValueFactorFunction extends ScoreFunction {
private final float boostFactor;
private final Modifier modifier;
private final IndexNumericFieldData indexFieldData;
private DoubleValues values;
private SortedNumericDoubleValues values;

public FieldValueFactorFunction(String field, float boostFactor, Modifier modifierType, IndexNumericFieldData indexFieldData) {
super(CombineFunction.MULT);
Expand All @@ -54,9 +54,10 @@ public void setNextReader(AtomicReaderContext context) {

@Override
public double score(int docId, float subQueryScore) {
final int numValues = this.values.setDocument(docId);
this.values.setDocument(docId);
final int numValues = this.values.count();
if (numValues > 0) {
double val = this.values.nextValue() * boostFactor;
double val = this.values.valueAt(0) * boostFactor;
double result = modifier.apply(val);
if (Double.isNaN(result) || Double.isInfinite(result)) {
throw new ElasticsearchException("Result of field modification [" + modifier.toString() +
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,29 @@
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata.plain;

import org.elasticsearch.index.fielddata.DoubleValues;
package org.elasticsearch.index.fielddata;

import org.apache.lucene.index.RandomAccessOrds;

/**
* Package private base class for dense double values.
* Base implementation of a {@link RandomAccessOrds} instance.
*/
abstract class DenseDoubleValues extends DoubleValues {
public abstract class AbstractRandomAccessOrds extends RandomAccessOrds {

int i = 0;

protected abstract void doSetDocument(int docID);

protected DenseDoubleValues(boolean multiValued) {
super(multiValued);
@Override
public final void setDocument(int docID) {
doSetDocument(docID);
i = 0;
}

@Override
public final int setDocument(int docId) {
this.docId = docId;
return 1;
public long nextOrd() {
return ordAt(i++);
}
}

}
106 changes: 5 additions & 101 deletions src/main/java/org/elasticsearch/index/fielddata/AtomicFieldData.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,117 +20,21 @@
package org.elasticsearch.index.fielddata;

import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.index.fielddata.ScriptDocValues.Strings;
import org.elasticsearch.common.lease.Releasable;

/**
* The thread safe {@link org.apache.lucene.index.AtomicReader} level cache of the data.
*/
public interface AtomicFieldData<Script extends ScriptDocValues> extends Accountable {

/**
* Use a non thread safe (lightweight) view of the values as bytes.
*/
BytesValues getBytesValues();
public interface AtomicFieldData extends Accountable, Releasable {

/**
* Returns a "scripting" based values.
*/
Script getScriptValues();
ScriptDocValues getScriptValues();

/**
* Close the field data.
* Return a String representation of the values.
*/
void close();

interface WithOrdinals<Script extends ScriptDocValues> extends AtomicFieldData<Script> {

public static final WithOrdinals<ScriptDocValues.Strings> EMPTY = new WithOrdinals<ScriptDocValues.Strings>() {

@Override
public Strings getScriptValues() {
return new ScriptDocValues.Strings(getBytesValues());
}

@Override
public void close() {
}

@Override
public long ramBytesUsed() {
return 0;
}

@Override
public BytesValues.WithOrdinals getBytesValues() {
return new BytesValues.WithOrdinals(false) {

@Override
public int setDocument(int docId) {
return 0;
}
SortedBinaryDocValues getBytesValues();

@Override
public long nextOrd() {
return MISSING_ORDINAL;
}

@Override
public BytesRef getValueByOrd(long ord) {
throw new UnsupportedOperationException();
}

@Override
public long getOrd(int docId) {
return MISSING_ORDINAL;
}

@Override
public long getMaxOrd() {
return 0;
}
};
}

};

/**
* Use a non thread safe (lightweight) view of the values as bytes.
* @param needsHashes
*/
BytesValues.WithOrdinals getBytesValues();

}

/**
* This enum provides information about the order of the values for
* a given document. For instance {@link BytesValues} by default
* return values in {@link #BYTES} order but if the interface
* wraps a numeric variant the sort order might change to {@link #NUMERIC}.
* In that case the values might not be returned in byte sort order but in numeric
* order instead while maintaining the property of <tt>N < N+1</tt> during the
* value iterations.
*
* @see org.elasticsearch.index.fielddata.BytesValues#getOrder()
* @see org.elasticsearch.index.fielddata.DoubleValues#getOrder()
* @see org.elasticsearch.index.fielddata.LongValues#getOrder()
*/
public enum Order {
/**
* Donates Byte sort order
*/
BYTES,
/**
* Donates Numeric sort order
*/
NUMERIC,
/**
* Donates custom sort order
*/
CUSTOM,
/**
* Donates no sort order
*/
NONE
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,35 +18,15 @@
*/
package org.elasticsearch.index.fielddata;

import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.geo.GeoHashUtils;
import org.elasticsearch.common.geo.GeoPoint;

/**
* {@link AtomicFieldData} specialization for geo points.
*/
public abstract class AtomicGeoPointFieldData<Script extends ScriptDocValues> implements AtomicFieldData<Script> {
public interface AtomicGeoPointFieldData extends AtomicFieldData {

public abstract GeoPointValues getGeoPointValues();

@Override
public BytesValues getBytesValues() {
final GeoPointValues values = getGeoPointValues();
return new BytesValues(values.isMultiValued()) {
private final BytesRef scratch = new BytesRef();

@Override
public int setDocument(int docId) {
return values.setDocument(docId);
}

@Override
public BytesRef nextValue() {
GeoPoint value = values.nextValue();
scratch.copyChars(GeoHashUtils.encode(value.lat(), value.lon()));
return scratch;
}

};
}
/**
* Return geo-point values.
*/
MultiGeoPointValues getGeoPointValues();

}
Loading

0 comments on commit 3c142e5

Please sign in to comment.