From d16067b890096f2ce2ab742e6fbffb2471cf7ce0 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Tue, 22 Jul 2014 14:42:15 -0400 Subject: [PATCH 1/3] Change numeric data types to use SORTED_NUMERIC docvalues type instead of a custom encoding in BINARY. In low level benchmarks this is 2x to 5x faster: its also optimized for the common case where fields actually only contain at most one value for each document. Additionally SORTED_NUMERIC doesn't lose values if they appear more than once, so mathematical computations such as averages are correct. --- .../plain/DocValuesIndexFieldData.java | 10 +- .../plain/SortedNumericDVIndexFieldData.java | 291 ++++++++++++++++++ .../index/mapper/core/BinaryFieldMapper.java | 47 ++- .../index/mapper/core/ByteFieldMapper.java | 2 +- .../index/mapper/core/DateFieldMapper.java | 3 +- .../index/mapper/core/DoubleFieldMapper.java | 45 +-- .../index/mapper/core/FloatFieldMapper.java | 45 +-- .../index/mapper/core/IntegerFieldMapper.java | 2 +- .../index/mapper/core/LongFieldMapper.java | 2 +- .../index/mapper/core/NumberFieldMapper.java | 110 +------ .../index/mapper/core/ShortFieldMapper.java | 2 +- .../index/mapper/geo/GeoPointFieldMapper.java | 62 +++- .../index/mapper/ip/IpFieldMapper.java | 2 +- .../fielddata/AbstractFieldDataTests.java | 3 + .../index/fielddata/DuelFieldDataTests.java | 53 ++-- .../fielddata/IndexFieldDataServiceTests.java | 6 +- .../mapper/numeric/SimpleNumericTests.java | 8 +- 17 files changed, 451 insertions(+), 242 deletions(-) create mode 100644 src/main/java/org/elasticsearch/index/fielddata/plain/SortedNumericDVIndexFieldData.java diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/DocValuesIndexFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/DocValuesIndexFieldData.java index 17b8e7ca3a79a..3b39a98113c4d 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/plain/DocValuesIndexFieldData.java +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/DocValuesIndexFieldData.java @@ -22,6 +22,8 @@ import com.google.common.collect.ImmutableSet; import org.apache.lucene.index.IndexReader; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.Version; +import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.logging.ESLogger; import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.settings.Settings; @@ -107,7 +109,13 @@ public IndexFieldData build(Index index, Settings indexSettings, FieldMapper< assert !numericType.isFloatingPoint(); return new NumericDVIndexFieldData(index, fieldNames, mapper.fieldDataType()); } else if (numericType != null) { - return new BinaryDVNumericIndexFieldData(index, fieldNames, numericType, mapper.fieldDataType()); + Version version = indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, org.elasticsearch.Version.CURRENT); + if (version.onOrAfter(Version.V_1_4_0)) { + return new SortedNumericDVIndexFieldData(index, fieldNames, numericType, mapper.fieldDataType()); + } else { + // prior to ES 1.4: multi-valued numerics were boxed inside a byte[] as BINARY + return new BinaryDVNumericIndexFieldData(index, fieldNames, numericType, mapper.fieldDataType()); + } } else { return new SortedSetDVOrdinalsIndexFieldData(index, cache, indexSettings, fieldNames, breakerService, mapper.fieldDataType()); } diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/SortedNumericDVIndexFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/SortedNumericDVIndexFieldData.java new file mode 100644 index 0000000000000..631f69ad25e71 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/SortedNumericDVIndexFieldData.java @@ -0,0 +1,291 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.fielddata.plain; + +import com.google.common.base.Preconditions; +import org.apache.lucene.index.*; +import org.apache.lucene.util.NumericUtils; +import org.elasticsearch.ElasticsearchIllegalStateException; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.fielddata.*; +import org.elasticsearch.index.fielddata.fieldcomparator.DoubleValuesComparatorSource; +import org.elasticsearch.index.fielddata.fieldcomparator.FloatValuesComparatorSource; +import org.elasticsearch.index.fielddata.fieldcomparator.LongValuesComparatorSource; +import org.elasticsearch.index.mapper.FieldMapper.Names; +import org.elasticsearch.search.MultiValueMode; + +import java.io.IOException; + +/** + * FieldData backed by {@link AtomicReader#getSortedNumericDocValues(String)} + * @see FieldInfo.DocValuesType#SORTED_NUMERIC + */ +public class SortedNumericDVIndexFieldData extends DocValuesIndexFieldData implements IndexNumericFieldData { + private final NumericType numericType; + + public SortedNumericDVIndexFieldData(Index index, Names fieldNames, NumericType numericType, FieldDataType fieldDataType) { + super(index, fieldNames, fieldDataType); + Preconditions.checkArgument(numericType != null, "numericType must be non-null"); + this.numericType = numericType; + } + + @Override + public org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource comparatorSource(Object missingValue, MultiValueMode sortMode) { + switch (numericType) { + case FLOAT: + return new FloatValuesComparatorSource(this, missingValue, sortMode); + case DOUBLE: + return new DoubleValuesComparatorSource(this, missingValue, sortMode); + default: + assert !numericType.isFloatingPoint(); + return new LongValuesComparatorSource(this, missingValue, sortMode); + } + } + + @Override + public NumericType getNumericType() { + return numericType; + } + + @Override + public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception { + return load(context); + } + + @Override + public AtomicNumericFieldData load(AtomicReaderContext context) { + final AtomicReader reader = context.reader(); + final String field = fieldNames.indexName(); + + switch (numericType) { + case FLOAT: + return new SortedNumericFloatFieldData(reader, field); + case DOUBLE: + return new SortedNumericDoubleFieldData(reader, field); + default: + return new SortedNumericLongFieldData(reader, field); + } + } + + /** + * FieldData implementation for integral types. + *

+ * Order of values within a document is consistent with + * {@link Long#compareTo(Long)}. + *

+ * Although the API is multi-valued, most codecs in Lucene specialize + * for the case where documents have at most one value. In this case + * {@link DocValues#unwrapSingleton(SortedNumericDocValues)} will return + * the underlying single-valued NumericDocValues representation, and + * {@link DocValues#unwrapSingletonBits(SortedNumericDocValues)} will return + * a Bits matching documents that have a real value (as opposed to missing). + */ + static final class SortedNumericLongFieldData extends AtomicLongFieldData { + final AtomicReader reader; + final String field; + + SortedNumericLongFieldData(AtomicReader reader, String field) { + super(-1L); + this.reader = reader; + this.field = field; + } + + @Override + public SortedNumericDocValues getLongValues() { + try { + return DocValues.getSortedNumeric(reader, field); + } catch (IOException e) { + throw new ElasticsearchIllegalStateException("Cannot load doc values", e); + } + } + } + + /** + * FieldData implementation for 32-bit float values. + *

+ * Order of values within a document is consistent with + * {@link Float#compareTo(Float)}, hence the following reversible + * transformation is applied at both index and search: + * {code} + * bits ^ (bits >> 31) & 0x7fffffff + * {code} + *

+ * Although the API is multi-valued, most codecs in Lucene specialize + * for the case where documents have at most one value. In this case + * {@link FieldData#unwrapSingleton(SortedNumericDoubleValues)} will return + * the underlying single-valued NumericDoubleValues representation, and + * {@link FieldData#unwrapSingletonBits(SortedNumericDoubleValues)} will return + * a Bits matching documents that have a real value (as opposed to missing). + */ + static final class SortedNumericFloatFieldData extends AtomicDoubleFieldData { + final AtomicReader reader; + final String field; + + SortedNumericFloatFieldData(AtomicReader reader, String field) { + super(-1L); + this.reader = reader; + this.field = field; + } + + @Override + public SortedNumericDoubleValues getDoubleValues() { + try { + SortedNumericDocValues raw = DocValues.getSortedNumeric(reader, field); + + NumericDocValues single = DocValues.unwrapSingleton(raw); + if (single != null) { + return FieldData.singleton(new SingleFloatValues(single), DocValues.unwrapSingletonBits(raw)); + } else { + return new MultiFloatValues(raw); + } + } catch (IOException e) { + throw new ElasticsearchIllegalStateException("Cannot load doc values", e); + } + } + } + + /** + * Wraps a NumericDocValues and exposes a single 32-bit float per document. + */ + static final class SingleFloatValues extends NumericDoubleValues { + final NumericDocValues in; + + SingleFloatValues(NumericDocValues in) { + this.in = in; + } + + @Override + public double get(int docID) { + return NumericUtils.sortableIntToFloat((int) in.get(docID)); + } + } + + /** + * Wraps a SortedNumericDocValues and exposes multiple 32-bit floats per document. + */ + static final class MultiFloatValues extends SortedNumericDoubleValues { + final SortedNumericDocValues in; + + MultiFloatValues(SortedNumericDocValues in) { + this.in = in; + } + + @Override + public void setDocument(int doc) { + in.setDocument(doc); + } + + @Override + public double valueAt(int index) { + return NumericUtils.sortableIntToFloat((int) in.valueAt(index)); + } + + @Override + public int count() { + return in.count(); + } + } + + /** + * FieldData implementation for 64-bit double values. + *

+ * Order of values within a document is consistent with + * {@link Double#compareTo(Double)}, hence the following reversible + * transformation is applied at both index and search: + * {code} + * bits ^ (bits >> 63) & 0x7fffffffffffffffL + * {code} + *

+ * Although the API is multi-valued, most codecs in Lucene specialize + * for the case where documents have at most one value. In this case + * {@link FieldData#unwrapSingleton(SortedNumericDoubleValues)} will return + * the underlying single-valued NumericDoubleValues representation, and + * {@link FieldData#unwrapSingletonBits(SortedNumericDoubleValues)} will return + * a Bits matching documents that have a real value (as opposed to missing). + */ + static final class SortedNumericDoubleFieldData extends AtomicDoubleFieldData { + final AtomicReader reader; + final String field; + + SortedNumericDoubleFieldData(AtomicReader reader, String field) { + super(-1L); + this.reader = reader; + this.field = field; + } + + @Override + public SortedNumericDoubleValues getDoubleValues() { + try { + SortedNumericDocValues raw = DocValues.getSortedNumeric(reader, field); + + NumericDocValues single = DocValues.unwrapSingleton(raw); + if (single != null) { + return FieldData.singleton(new SingleDoubleValues(single), DocValues.unwrapSingletonBits(raw)); + } else { + return new MultiDoubleValues(raw); + } + } catch (IOException e) { + throw new ElasticsearchIllegalStateException("Cannot load doc values", e); + } + } + } + + /** + * Wraps a NumericDocValues and exposes a single 64-bit double per document. + */ + static final class SingleDoubleValues extends NumericDoubleValues { + final NumericDocValues in; + + SingleDoubleValues(NumericDocValues in) { + this.in = in; + } + + @Override + public double get(int docID) { + return NumericUtils.sortableLongToDouble(in.get(docID)); + } + } + + /** + * Wraps a SortedNumericDocValues and exposes multiple 64-bit doubles per document. + */ + static final class MultiDoubleValues extends SortedNumericDoubleValues { + final SortedNumericDocValues in; + + MultiDoubleValues(SortedNumericDocValues in) { + this.in = in; + } + + @Override + public void setDocument(int doc) { + in.setDocument(doc); + } + + @Override + public double valueAt(int index) { + return NumericUtils.sortableLongToDouble(in.valueAt(index)); + } + + @Override + public int count() { + return in.count(); + } + } +} diff --git a/src/main/java/org/elasticsearch/index/mapper/core/BinaryFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/BinaryFieldMapper.java index f2cf9ce3998e0..8c50219d932f8 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/BinaryFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/BinaryFieldMapper.java @@ -20,9 +20,13 @@ package org.elasticsearch.index.mapper.core; import com.carrotsearch.hppc.ObjectArrayList; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.IndexableFieldType; import org.apache.lucene.store.ByteArrayDataOutput; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchException; @@ -46,6 +50,7 @@ import org.elasticsearch.index.mapper.*; import java.io.IOException; +import java.io.Reader; import java.util.List; import java.util.Map; @@ -255,7 +260,7 @@ public void merge(Mapper mergeWith, MergeContext mergeContext) throws MergeMappi } } - public static class CustomBinaryDocValuesField extends NumberFieldMapper.CustomNumericDocValuesField { + public static class CustomBinaryDocValuesField implements IndexableField { public static final FieldType TYPE = new FieldType(); static { @@ -263,12 +268,14 @@ public static class CustomBinaryDocValuesField extends NumberFieldMapper.CustomN TYPE.freeze(); } + private final String name; + private final ObjectArrayList bytesList; private int totalSize = 0; - public CustomBinaryDocValuesField(String name, byte[] bytes) { - super(name); + public CustomBinaryDocValuesField(String name, byte[] bytes) { + this.name = name; bytesList = new ObjectArrayList<>(); add(bytes); } @@ -296,7 +303,41 @@ public BytesRef binaryValue() { } catch (IOException e) { throw new ElasticsearchException("Failed to get binary value", e); } + } + + @Override + public float boost() { + return 1f; + } + + @Override + public IndexableFieldType fieldType() { + return TYPE; + } + + @Override + public String name() { + return name; + } + + @Override + public Number numericValue() { + return null; + } + + @Override + public Reader readerValue() { + return null; + } + @Override + public String stringValue() { + return null; + } + + @Override + public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException { + return null; } } } diff --git a/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java index b6a43dcb7e1f7..cf3bd5cee8930 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java @@ -320,7 +320,7 @@ protected void innerParseCreateField(ParseContext context, List fields) t fields.add(field); } if (hasDocValues()) { - addDocValue(context, value); + addDocValue(context, fields, value); } } diff --git a/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java index 85cb4489030a6..b7e6694513a78 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java @@ -41,7 +41,6 @@ import org.elasticsearch.common.util.LocaleUtils; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.analysis.NumericDateAnalyzer; import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider; import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider; @@ -514,7 +513,7 @@ protected void innerParseCreateField(ParseContext context, List fields) t fields.add(field); } if (hasDocValues()) { - addDocValue(context, value); + addDocValue(context, fields, value); } } } diff --git a/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java index d5ed372934e04..e96cf65d55036 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java @@ -19,12 +19,10 @@ package org.elasticsearch.index.mapper.core; -import com.carrotsearch.hppc.DoubleArrayList; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; -import org.apache.lucene.index.FieldInfo; import org.apache.lucene.search.Filter; import org.apache.lucene.search.NumericRangeFilter; import org.apache.lucene.search.NumericRangeQuery; @@ -37,8 +35,6 @@ import org.elasticsearch.common.Numbers; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.Fuzziness; -import org.elasticsearch.common.util.ByteUtils; -import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.NumericDoubleAnalyzer; @@ -316,13 +312,7 @@ protected void innerParseCreateField(ParseContext context, List fields) t fields.add(field); } if (hasDocValues()) { - CustomDoubleNumericDocValuesField field = (CustomDoubleNumericDocValuesField) context.doc().getByKey(names().indexName()); - if (field != null) { - field.add(value); - } else { - field = new CustomDoubleNumericDocValuesField(names().indexName(), value); - context.doc().addWithKey(names().indexName(), field); - } + addDocValue(context, fields, NumericUtils.doubleToSortableLong(value)); } } @@ -386,37 +376,4 @@ public String numericAsString() { return Double.toString(number); } } - - public static class CustomDoubleNumericDocValuesField extends CustomNumericDocValuesField { - - public static final FieldType TYPE = new FieldType(); - static { - TYPE.setDocValueType(FieldInfo.DocValuesType.BINARY); - TYPE.freeze(); - } - - private final DoubleArrayList values; - - public CustomDoubleNumericDocValuesField(String name, double value) { - super(name); - values = new DoubleArrayList(); - add(value); - } - - public void add(double value) { - values.add(value); - } - - @Override - public BytesRef binaryValue() { - CollectionUtils.sortAndDedup(values); - - final byte[] bytes = new byte[values.size() * 8]; - for (int i = 0; i < values.size(); ++i) { - ByteUtils.writeDoubleLE(values.get(i), bytes, i * 8); - } - return new BytesRef(bytes); - } - - } } diff --git a/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java index ed4c7a907e452..54bab7ae49250 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java @@ -19,12 +19,10 @@ package org.elasticsearch.index.mapper.core; -import com.carrotsearch.hppc.FloatArrayList; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; -import org.apache.lucene.index.FieldInfo; import org.apache.lucene.search.Filter; import org.apache.lucene.search.NumericRangeFilter; import org.apache.lucene.search.NumericRangeQuery; @@ -38,8 +36,6 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.Fuzziness; -import org.elasticsearch.common.util.ByteUtils; -import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.NumericFloatAnalyzer; @@ -321,13 +317,7 @@ protected void innerParseCreateField(ParseContext context, List fields) t fields.add(field); } if (hasDocValues()) { - CustomFloatNumericDocValuesField field = (CustomFloatNumericDocValuesField) context.doc().getByKey(names().indexName()); - if (field != null) { - field.add(value); - } else { - field = new CustomFloatNumericDocValuesField(names().indexName(), value); - context.doc().addWithKey(names().indexName(), field); - } + addDocValue(context, fields, NumericUtils.floatToSortableInt(value)); } } @@ -392,37 +382,4 @@ public String numericAsString() { return Float.toString(number); } } - - public static class CustomFloatNumericDocValuesField extends CustomNumericDocValuesField { - - public static final FieldType TYPE = new FieldType(); - static { - TYPE.setDocValueType(FieldInfo.DocValuesType.BINARY); - TYPE.freeze(); - } - - private final FloatArrayList values; - - public CustomFloatNumericDocValuesField(String name, float value) { - super(name); - values = new FloatArrayList(); - add(value); - } - - public void add(float value) { - values.add(value); - } - - @Override - public BytesRef binaryValue() { - CollectionUtils.sortAndDedup(values); - - final byte[] bytes = new byte[values.size() * 4]; - for (int i = 0; i < values.size(); ++i) { - ByteUtils.writeFloatLE(values.get(i), bytes, i * 4); - } - return new BytesRef(bytes); - } - - } } diff --git a/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java index 7dbf091d3424d..b6dd5d3fb4215 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java @@ -319,7 +319,7 @@ protected void addIntegerFields(ParseContext context, List fields, int va fields.add(field); } if (hasDocValues()) { - addDocValue(context, value); + addDocValue(context, fields, value); } } diff --git a/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java index ddef3ce20431b..8a57bb58c1168 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java @@ -305,7 +305,7 @@ protected void innerParseCreateField(ParseContext context, List fields) t fields.add(field); } if (hasDocValues()) { - addDocValue(context, value); + addDocValue(context, fields, value); } } diff --git a/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java index c4bcc19dbc639..3cd3a2e5f4c22 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java @@ -20,27 +20,19 @@ package org.elasticsearch.index.mapper.core; import com.carrotsearch.hppc.DoubleOpenHashSet; -import com.carrotsearch.hppc.LongArrayList; import com.carrotsearch.hppc.LongOpenHashSet; -import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.NumericTokenStream; -import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; -import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.index.FieldInfo.IndexOptions; -import org.apache.lucene.index.IndexableField; -import org.apache.lucene.index.IndexableFieldType; import org.apache.lucene.search.Filter; import org.apache.lucene.search.Query; -import org.apache.lucene.store.ByteArrayDataOutput; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.Fuzziness; -import org.elasticsearch.common.util.ByteUtils; -import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider; @@ -234,14 +226,8 @@ protected void parseCreateField(ParseContext context, List fields) throws protected abstract void innerParseCreateField(ParseContext context, List fields) throws IOException; - protected final void addDocValue(ParseContext context, long value) { - CustomLongNumericDocValuesField field = (CustomLongNumericDocValuesField) context.doc().getByKey(names().indexName()); - if (field != null) { - field.add(value); - } else { - field = new CustomLongNumericDocValuesField(names().indexName(), value); - context.doc().addWithKey(names().indexName(), field); - } + protected final void addDocValue(ParseContext context, List fields, long value) { + fields.add(new SortedNumericDocValuesField(names().indexName(), value)); } /** @@ -413,96 +399,6 @@ public Reader readerValue() { public abstract String numericAsString(); } - public static abstract class CustomNumericDocValuesField implements IndexableField { - - public static final FieldType TYPE = new FieldType(); - static { - TYPE.setDocValueType(FieldInfo.DocValuesType.BINARY); - TYPE.freeze(); - } - - private final String name; - - public CustomNumericDocValuesField(String name) { - this.name = name; - } - - @Override - public String name() { - return name; - } - - @Override - public IndexableFieldType fieldType() { - return TYPE; - } - - @Override - public float boost() { - return 1f; - } - - @Override - public String stringValue() { - return null; - } - - @Override - public Reader readerValue() { - return null; - } - - @Override - public Number numericValue() { - return null; - } - - @Override - public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException { - return null; - } - - } - - public static class CustomLongNumericDocValuesField extends CustomNumericDocValuesField { - - public static final FieldType TYPE = new FieldType(); - static { - TYPE.setDocValueType(FieldInfo.DocValuesType.BINARY); - TYPE.freeze(); - } - - private final LongArrayList values; - - public CustomLongNumericDocValuesField(String name, long value) { - super(name); - values = new LongArrayList(); - add(value); - } - - public void add(long value) { - values.add(value); - } - - @Override - public BytesRef binaryValue() { - CollectionUtils.sortAndDedup(values); - - // here is the trick: - // - the first value is zig-zag encoded so that eg. -5 would become positive and would be better compressed by vLong - // - for other values, we only encode deltas using vLong - final byte[] bytes = new byte[values.size() * ByteUtils.MAX_BYTES_VLONG]; - final ByteArrayDataOutput out = new ByteArrayDataOutput(bytes); - ByteUtils.writeVLong(out, ByteUtils.zigZagEncode(values.get(0))); - for (int i = 1; i < values.size(); ++i) { - final long delta = values.get(i) - values.get(i - 1); - ByteUtils.writeVLong(out, delta); - } - return new BytesRef(bytes, 0, out.getPosition()); - } - - } - @Override protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException { super.doXContentBody(builder, includeDefaults, params); diff --git a/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java index 14e99efd1fd74..6e86197701e5e 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java @@ -321,7 +321,7 @@ protected void innerParseCreateField(ParseContext context, List fields) t fields.add(field); } if (hasDocValues()) { - addDocValue(context, value); + addDocValue(context, fields, value); } } diff --git a/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldMapper.java index a0c4f0c0b225b..45b25ffc563e5 100644 --- a/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldMapper.java @@ -22,10 +22,14 @@ import com.carrotsearch.hppc.ObjectOpenHashSet; import com.carrotsearch.hppc.cursors.ObjectCursor; import com.google.common.base.Objects; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.IndexableFieldType; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchIllegalArgumentException; import org.elasticsearch.ElasticsearchIllegalStateException; @@ -46,12 +50,15 @@ import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider; import org.elasticsearch.index.fielddata.FieldDataType; import org.elasticsearch.index.mapper.*; -import org.elasticsearch.index.mapper.core.*; -import org.elasticsearch.index.mapper.core.NumberFieldMapper.CustomNumericDocValuesField; +import org.elasticsearch.index.mapper.core.AbstractFieldMapper; +import org.elasticsearch.index.mapper.core.DoubleFieldMapper; +import org.elasticsearch.index.mapper.core.NumberFieldMapper; +import org.elasticsearch.index.mapper.core.StringFieldMapper; import org.elasticsearch.index.mapper.object.ArrayValueMapperParser; import org.elasticsearch.index.similarity.SimilarityProvider; import java.io.IOException; +import java.io.Reader; import java.util.Iterator; import java.util.List; import java.util.Locale; @@ -714,6 +721,57 @@ protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, } } } + + private static abstract class CustomNumericDocValuesField implements IndexableField { + + public static final FieldType TYPE = new FieldType(); + static { + TYPE.setDocValueType(FieldInfo.DocValuesType.BINARY); + TYPE.freeze(); + } + + private final String name; + + public CustomNumericDocValuesField(String name) { + this.name = name; + } + + @Override + public String name() { + return name; + } + + @Override + public IndexableFieldType fieldType() { + return TYPE; + } + + @Override + public float boost() { + return 1f; + } + + @Override + public String stringValue() { + return null; + } + + @Override + public Reader readerValue() { + return null; + } + + @Override + public Number numericValue() { + return null; + } + + @Override + public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException { + return null; + } + + } public static class CustomGeoPointDocValuesField extends CustomNumericDocValuesField { diff --git a/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java index 543f10fa57915..8b273d907a63b 100644 --- a/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java @@ -298,7 +298,7 @@ protected void innerParseCreateField(ParseContext context, List fields) t fields.add(field); } if (hasDocValues()) { - addDocValue(context, value); + addDocValue(context, fields, value); } } diff --git a/src/test/java/org/elasticsearch/index/fielddata/AbstractFieldDataTests.java b/src/test/java/org/elasticsearch/index/fielddata/AbstractFieldDataTests.java index e4deb370d9c7f..9afc30c100d9e 100644 --- a/src/test/java/org/elasticsearch/index/fielddata/AbstractFieldDataTests.java +++ b/src/test/java/org/elasticsearch/index/fielddata/AbstractFieldDataTests.java @@ -22,6 +22,7 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.*; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; @@ -34,6 +35,8 @@ import org.junit.Before; // we might wanna cut this over to LuceneTestCase +@SuppressCodecs({"Lucene3x", "Lucene40", "Lucene41", "Lucene42", "Lucene45", "Lucene46"}) +// avoid codecs that do not support SortedNumerics, SortedSet, etc public abstract class AbstractFieldDataTests extends ElasticsearchSingleNodeTest { protected IndexService indexService; diff --git a/src/test/java/org/elasticsearch/index/fielddata/DuelFieldDataTests.java b/src/test/java/org/elasticsearch/index/fielddata/DuelFieldDataTests.java index 264f8b7d1ad5f..f5a9146a93fbb 100644 --- a/src/test/java/org/elasticsearch/index/fielddata/DuelFieldDataTests.java +++ b/src/test/java/org/elasticsearch/index/fielddata/DuelFieldDataTests.java @@ -101,9 +101,7 @@ public void testDuelAllTypesSingleValue() throws Exception { typeMap.put(new FieldDataType("long", ImmutableSettings.builder().put("format", "doc_values")), Type.Long); typeMap.put(new FieldDataType("double", ImmutableSettings.builder().put("format", "doc_values")), Type.Double); typeMap.put(new FieldDataType("float", ImmutableSettings.builder().put("format", "doc_values")), Type.Float); - if (LuceneTestCase.defaultCodecSupportsSortedSet()) { - typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "doc_values")), Type.Bytes); - } + typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "doc_values")), Type.Bytes); ArrayList> list = new ArrayList<>(typeMap.entrySet()); Preprocessor pre = new ToDoublePreprocessor(); while (!list.isEmpty()) { @@ -149,13 +147,17 @@ public void testDuelIntegers() throws Exception { final int maxNumValues = randomBoolean() ? 1 : randomIntBetween(2, 40); byte[] values = new byte[maxNumValues]; for (int i = 0; i < atLeast; i++) { - final int numValues = randomInt(maxNumValues); + int numValues = randomInt(maxNumValues); + // FD loses values if they are duplicated, so we must deduplicate for this test + Set vals = new HashSet(); for (int j = 0; j < numValues; ++j) { - if (randomBoolean()) { - values[j] = 1; // test deduplication - } else { - values[j] = randomByte(); - } + vals.add(randomByte()); + } + + numValues = vals.size(); + int upto = 0; + for (Byte bb : vals) { + values[upto++] = bb.byteValue(); } XContentBuilder doc = XContentFactory.jsonBuilder().startObject(); @@ -227,15 +229,22 @@ public void testDuelDoubles() throws Exception { final int maxNumValues = randomBoolean() ? 1 : randomIntBetween(2, 40); float[] values = new float[maxNumValues]; for (int i = 0; i < atLeast; i++) { - final int numValues = randomInt(maxNumValues); + int numValues = randomInt(maxNumValues); float def = randomBoolean() ? randomFloat() : Float.NaN; + // FD loses values if they are duplicated, so we must deduplicate for this test + Set vals = new HashSet(); for (int j = 0; j < numValues; ++j) { if (randomBoolean()) { - values[j] = def; + vals.add(def); } else { - values[j] = randomFloat(); + vals.add(randomFloat()); } } + numValues = vals.size(); + int upto = 0; + for (Float f : vals) { + values[upto++] = f.floatValue(); + } XContentBuilder doc = XContentFactory.jsonBuilder().startObject().startArray("float"); for (int j = 0; j < numValues; ++j) { @@ -302,15 +311,11 @@ public void testDuelStrings() throws Exception { for (int j : numbers) { final String s = English.longToEnglish(j); d.add(new StringField("bytes", s, Field.Store.NO)); - if (LuceneTestCase.defaultCodecSupportsSortedSet()) { - d.add(new SortedSetDocValuesField("bytes", new BytesRef(s))); - } + d.add(new SortedSetDocValuesField("bytes", new BytesRef(s))); } if (random.nextInt(10) == 0) { d.add(new StringField("bytes", "", Field.Store.NO)); - if (LuceneTestCase.defaultCodecSupportsSortedSet()) { - d.add(new SortedSetDocValuesField("bytes", new BytesRef())); - } + d.add(new SortedSetDocValuesField("bytes", new BytesRef())); } } writer.addDocument(d); @@ -322,9 +327,7 @@ public void testDuelStrings() throws Exception { Map typeMap = new HashMap<>(); typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "fst")), Type.Bytes); typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "paged_bytes")), Type.Bytes); - if (LuceneTestCase.defaultCodecSupportsSortedSet()) { - typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "doc_values")), Type.Bytes); - } + typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "doc_values")), Type.Bytes); // TODO add filters ArrayList> list = new ArrayList<>(typeMap.entrySet()); Preprocessor pre = new Preprocessor(); @@ -371,9 +374,7 @@ public void testDuelGlobalOrdinals() throws Exception { for (int j = 0; j < numVals; ++j) { final String value = RandomPicks.randomFrom(random, Arrays.asList(values)); d.add(new StringField("string", value, Field.Store.NO)); - if (LuceneTestCase.defaultCodecSupportsSortedSet()) { - d.add(new SortedSetDocValuesField("bytes", new BytesRef(value))); - } + d.add(new SortedSetDocValuesField("bytes", new BytesRef(value))); } writer.addDocument(d); if (randomInt(10) == 0) { @@ -385,9 +386,7 @@ public void testDuelGlobalOrdinals() throws Exception { Map typeMap = new HashMap(); typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "fst")), Type.Bytes); typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "paged_bytes")), Type.Bytes); - if (LuceneTestCase.defaultCodecSupportsSortedSet()) { - typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "doc_values")), Type.Bytes); - } + typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "doc_values")), Type.Bytes); for (Map.Entry entry : typeMap.entrySet()) { ifdService.clear(); diff --git a/src/test/java/org/elasticsearch/index/fielddata/IndexFieldDataServiceTests.java b/src/test/java/org/elasticsearch/index/fielddata/IndexFieldDataServiceTests.java index bfadf17118b41..a9cd7c98ae6c6 100644 --- a/src/test/java/org/elasticsearch/index/fielddata/IndexFieldDataServiceTests.java +++ b/src/test/java/org/elasticsearch/index/fielddata/IndexFieldDataServiceTests.java @@ -68,7 +68,7 @@ public void testGetForFieldDefaults() { ifdService.clear(); fd = ifdService.getForField(mapper); if (docValues) { - assertTrue(fd instanceof BinaryDVNumericIndexFieldData); + assertTrue(fd instanceof SortedNumericDVIndexFieldData); } else { assertTrue(fd instanceof PackedArrayIndexFieldData); } @@ -78,7 +78,7 @@ public void testGetForFieldDefaults() { ifdService.clear(); fd = ifdService.getForField(floatMapper); if (docValues) { - assertTrue(fd instanceof BinaryDVNumericIndexFieldData); + assertTrue(fd instanceof SortedNumericDVIndexFieldData); } else { assertTrue(fd instanceof FloatArrayIndexFieldData); } @@ -87,7 +87,7 @@ public void testGetForFieldDefaults() { ifdService.clear(); fd = ifdService.getForField(doubleMapper); if (docValues) { - assertTrue(fd instanceof BinaryDVNumericIndexFieldData); + assertTrue(fd instanceof SortedNumericDVIndexFieldData); } else { assertTrue(fd instanceof DoubleArrayIndexFieldData); } diff --git a/src/test/java/org/elasticsearch/index/mapper/numeric/SimpleNumericTests.java b/src/test/java/org/elasticsearch/index/mapper/numeric/SimpleNumericTests.java index 5de1e6974e47e..472c1405806f0 100644 --- a/src/test/java/org/elasticsearch/index/mapper/numeric/SimpleNumericTests.java +++ b/src/test/java/org/elasticsearch/index/mapper/numeric/SimpleNumericTests.java @@ -279,8 +279,8 @@ public void testDocValues() throws Exception { .endObject() .bytes()); final Document doc = parsedDoc.rootDoc(); - assertEquals(DocValuesType.BINARY, SimpleStringMappingTests.docValuesType(doc, "int")); - assertEquals(DocValuesType.BINARY, SimpleStringMappingTests.docValuesType(doc, "double")); + assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "int")); + assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "double")); } public void testDocValuesOnNested() throws Exception { @@ -326,8 +326,8 @@ public void testDocValuesOnNested() throws Exception { if (doc == parsedDoc.rootDoc()) { continue; } - assertEquals(DocValuesType.BINARY, SimpleStringMappingTests.docValuesType(doc, "nested.int")); - assertEquals(DocValuesType.BINARY, SimpleStringMappingTests.docValuesType(doc, "nested.double")); + assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "nested.int")); + assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "nested.double")); } } From 231be2da7ab11bc7734945cc43252d3a75818850 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Wed, 23 Jul 2014 12:21:57 -0400 Subject: [PATCH 2/3] use indexCreated macro and add correct back compat --- .../plain/DocValuesIndexFieldData.java | 4 +- .../index/mapper/core/BinaryFieldMapper.java | 47 +------ .../index/mapper/core/DoubleFieldMapper.java | 49 ++++++- .../index/mapper/core/FloatFieldMapper.java | 49 ++++++- .../index/mapper/core/NumberFieldMapper.java | 122 +++++++++++++++++- .../index/mapper/geo/GeoPointFieldMapper.java | 62 +-------- 6 files changed, 223 insertions(+), 110 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/DocValuesIndexFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/DocValuesIndexFieldData.java index 3b39a98113c4d..0605b0e909e36 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/plain/DocValuesIndexFieldData.java +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/DocValuesIndexFieldData.java @@ -23,7 +23,6 @@ import org.apache.lucene.index.IndexReader; import org.elasticsearch.ElasticsearchIllegalArgumentException; import org.elasticsearch.Version; -import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.logging.ESLogger; import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.settings.Settings; @@ -109,8 +108,7 @@ public IndexFieldData build(Index index, Settings indexSettings, FieldMapper< assert !numericType.isFloatingPoint(); return new NumericDVIndexFieldData(index, fieldNames, mapper.fieldDataType()); } else if (numericType != null) { - Version version = indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, org.elasticsearch.Version.CURRENT); - if (version.onOrAfter(Version.V_1_4_0)) { + if (Version.indexCreated(indexSettings).onOrAfter(Version.V_1_4_0)) { return new SortedNumericDVIndexFieldData(index, fieldNames, numericType, mapper.fieldDataType()); } else { // prior to ES 1.4: multi-valued numerics were boxed inside a byte[] as BINARY diff --git a/src/main/java/org/elasticsearch/index/mapper/core/BinaryFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/BinaryFieldMapper.java index 8c50219d932f8..f2cf9ce3998e0 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/BinaryFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/BinaryFieldMapper.java @@ -20,13 +20,9 @@ package org.elasticsearch.index.mapper.core; import com.carrotsearch.hppc.ObjectArrayList; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.IndexableField; -import org.apache.lucene.index.IndexableFieldType; import org.apache.lucene.store.ByteArrayDataOutput; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchException; @@ -50,7 +46,6 @@ import org.elasticsearch.index.mapper.*; import java.io.IOException; -import java.io.Reader; import java.util.List; import java.util.Map; @@ -260,7 +255,7 @@ public void merge(Mapper mergeWith, MergeContext mergeContext) throws MergeMappi } } - public static class CustomBinaryDocValuesField implements IndexableField { + public static class CustomBinaryDocValuesField extends NumberFieldMapper.CustomNumericDocValuesField { public static final FieldType TYPE = new FieldType(); static { @@ -268,14 +263,12 @@ public static class CustomBinaryDocValuesField implements IndexableField { TYPE.freeze(); } - private final String name; - private final ObjectArrayList bytesList; private int totalSize = 0; - public CustomBinaryDocValuesField(String name, byte[] bytes) { - this.name = name; + public CustomBinaryDocValuesField(String name, byte[] bytes) { + super(name); bytesList = new ObjectArrayList<>(); add(bytes); } @@ -303,41 +296,7 @@ public BytesRef binaryValue() { } catch (IOException e) { throw new ElasticsearchException("Failed to get binary value", e); } - } - - @Override - public float boost() { - return 1f; - } - - @Override - public IndexableFieldType fieldType() { - return TYPE; - } - - @Override - public String name() { - return name; - } - - @Override - public Number numericValue() { - return null; - } - - @Override - public Reader readerValue() { - return null; - } - @Override - public String stringValue() { - return null; - } - - @Override - public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException { - return null; } } } diff --git a/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java index e96cf65d55036..2af91bcc67351 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java @@ -19,10 +19,12 @@ package org.elasticsearch.index.mapper.core; +import com.carrotsearch.hppc.DoubleArrayList; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.FieldInfo; import org.apache.lucene.search.Filter; import org.apache.lucene.search.NumericRangeFilter; import org.apache.lucene.search.NumericRangeQuery; @@ -35,6 +37,8 @@ import org.elasticsearch.common.Numbers; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.common.util.ByteUtils; +import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.NumericDoubleAnalyzer; @@ -312,7 +316,17 @@ protected void innerParseCreateField(ParseContext context, List fields) t fields.add(field); } if (hasDocValues()) { - addDocValue(context, fields, NumericUtils.doubleToSortableLong(value)); + if (useSortedNumericDocValues) { + addDocValue(context, fields, NumericUtils.doubleToSortableLong(value)); + } else { + CustomDoubleNumericDocValuesField field = (CustomDoubleNumericDocValuesField) context.doc().getByKey(names().indexName()); + if (field != null) { + field.add(value); + } else { + field = new CustomDoubleNumericDocValuesField(names().indexName(), value); + context.doc().addWithKey(names().indexName(), field); + } + } } } @@ -376,4 +390,37 @@ public String numericAsString() { return Double.toString(number); } } + + public static class CustomDoubleNumericDocValuesField extends CustomNumericDocValuesField { + + public static final FieldType TYPE = new FieldType(); + static { + TYPE.setDocValueType(FieldInfo.DocValuesType.BINARY); + TYPE.freeze(); + } + + private final DoubleArrayList values; + + public CustomDoubleNumericDocValuesField(String name, double value) { + super(name); + values = new DoubleArrayList(); + add(value); + } + + public void add(double value) { + values.add(value); + } + + @Override + public BytesRef binaryValue() { + CollectionUtils.sortAndDedup(values); + + final byte[] bytes = new byte[values.size() * 8]; + for (int i = 0; i < values.size(); ++i) { + ByteUtils.writeDoubleLE(values.get(i), bytes, i * 8); + } + return new BytesRef(bytes); + } + + } } diff --git a/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java index 54bab7ae49250..8c71b21c79a73 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java @@ -19,10 +19,12 @@ package org.elasticsearch.index.mapper.core; +import com.carrotsearch.hppc.FloatArrayList; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.FieldInfo; import org.apache.lucene.search.Filter; import org.apache.lucene.search.NumericRangeFilter; import org.apache.lucene.search.NumericRangeQuery; @@ -36,6 +38,8 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.common.util.ByteUtils; +import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.NumericFloatAnalyzer; @@ -317,7 +321,17 @@ protected void innerParseCreateField(ParseContext context, List fields) t fields.add(field); } if (hasDocValues()) { - addDocValue(context, fields, NumericUtils.floatToSortableInt(value)); + if (useSortedNumericDocValues) { + addDocValue(context, fields, NumericUtils.floatToSortableInt(value)); + } else { + CustomFloatNumericDocValuesField field = (CustomFloatNumericDocValuesField) context.doc().getByKey(names().indexName()); + if (field != null) { + field.add(value); + } else { + field = new CustomFloatNumericDocValuesField(names().indexName(), value); + context.doc().addWithKey(names().indexName(), field); + } + } } } @@ -382,4 +396,37 @@ public String numericAsString() { return Float.toString(number); } } + + public static class CustomFloatNumericDocValuesField extends CustomNumericDocValuesField { + + public static final FieldType TYPE = new FieldType(); + static { + TYPE.setDocValueType(FieldInfo.DocValuesType.BINARY); + TYPE.freeze(); + } + + private final FloatArrayList values; + + public CustomFloatNumericDocValuesField(String name, float value) { + super(name); + values = new FloatArrayList(); + add(value); + } + + public void add(float value) { + values.add(value); + } + + @Override + public BytesRef binaryValue() { + CollectionUtils.sortAndDedup(values); + + final byte[] bytes = new byte[values.size() * 4]; + for (int i = 0; i < values.size(); ++i) { + ByteUtils.writeFloatLE(values.get(i), bytes, i * 4); + } + return new BytesRef(bytes); + } + + } } diff --git a/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java index 3cd3a2e5f4c22..7e1dc8e0a5962 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java @@ -20,19 +20,29 @@ package org.elasticsearch.index.mapper.core; import com.carrotsearch.hppc.DoubleOpenHashSet; +import com.carrotsearch.hppc.LongArrayList; import com.carrotsearch.hppc.LongOpenHashSet; +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.NumericTokenStream; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.IndexableFieldType; import org.apache.lucene.search.Filter; import org.apache.lucene.search.Query; +import org.apache.lucene.store.ByteArrayDataOutput; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.Version; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.common.util.ByteUtils; +import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider; @@ -130,6 +140,14 @@ protected Explicit coerce(BuilderContext context) { protected Explicit coerce; + /** + * True if index version is 1.4+ + *

+ * In this case numerics are encoded with SORTED_NUMERIC docvalues, + * otherwise for older indexes we must continue to write BINARY (for now) + */ + protected final boolean useSortedNumericDocValues; + private ThreadLocal tokenStream = new ThreadLocal() { @Override protected NumericTokenStream initialValue() { @@ -181,6 +199,8 @@ protected NumberFieldMapper(Names names, int precisionStep, float boost, FieldTy } this.ignoreMalformed = ignoreMalformed; this.coerce = coerce; + Version v = indexSettings == null ? Version.CURRENT : Version.indexCreated(indexSettings); + this.useSortedNumericDocValues = v.onOrAfter(Version.V_1_4_0); } @Override @@ -227,7 +247,17 @@ protected void parseCreateField(ParseContext context, List fields) throws protected abstract void innerParseCreateField(ParseContext context, List fields) throws IOException; protected final void addDocValue(ParseContext context, List fields, long value) { - fields.add(new SortedNumericDocValuesField(names().indexName(), value)); + if (useSortedNumericDocValues) { + fields.add(new SortedNumericDocValuesField(names().indexName(), value)); + } else { + CustomLongNumericDocValuesField field = (CustomLongNumericDocValuesField) context.doc().getByKey(names().indexName()); + if (field != null) { + field.add(value); + } else { + field = new CustomLongNumericDocValuesField(names().indexName(), value); + context.doc().addWithKey(names().indexName(), field); + } + } } /** @@ -399,6 +429,96 @@ public Reader readerValue() { public abstract String numericAsString(); } + public static abstract class CustomNumericDocValuesField implements IndexableField { + + public static final FieldType TYPE = new FieldType(); + static { + TYPE.setDocValueType(FieldInfo.DocValuesType.BINARY); + TYPE.freeze(); + } + + private final String name; + + public CustomNumericDocValuesField(String name) { + this.name = name; + } + + @Override + public String name() { + return name; + } + + @Override + public IndexableFieldType fieldType() { + return TYPE; + } + + @Override + public float boost() { + return 1f; + } + + @Override + public String stringValue() { + return null; + } + + @Override + public Reader readerValue() { + return null; + } + + @Override + public Number numericValue() { + return null; + } + + @Override + public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException { + return null; + } + + } + + public static class CustomLongNumericDocValuesField extends CustomNumericDocValuesField { + + public static final FieldType TYPE = new FieldType(); + static { + TYPE.setDocValueType(FieldInfo.DocValuesType.BINARY); + TYPE.freeze(); + } + + private final LongArrayList values; + + public CustomLongNumericDocValuesField(String name, long value) { + super(name); + values = new LongArrayList(); + add(value); + } + + public void add(long value) { + values.add(value); + } + + @Override + public BytesRef binaryValue() { + CollectionUtils.sortAndDedup(values); + + // here is the trick: + // - the first value is zig-zag encoded so that eg. -5 would become positive and would be better compressed by vLong + // - for other values, we only encode deltas using vLong + final byte[] bytes = new byte[values.size() * ByteUtils.MAX_BYTES_VLONG]; + final ByteArrayDataOutput out = new ByteArrayDataOutput(bytes); + ByteUtils.writeVLong(out, ByteUtils.zigZagEncode(values.get(0))); + for (int i = 1; i < values.size(); ++i) { + final long delta = values.get(i) - values.get(i - 1); + ByteUtils.writeVLong(out, delta); + } + return new BytesRef(bytes, 0, out.getPosition()); + } + + } + @Override protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException { super.doXContentBody(builder, includeDefaults, params); diff --git a/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldMapper.java index 45b25ffc563e5..a0c4f0c0b225b 100644 --- a/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldMapper.java @@ -22,14 +22,10 @@ import com.carrotsearch.hppc.ObjectOpenHashSet; import com.carrotsearch.hppc.cursors.ObjectCursor; import com.google.common.base.Objects; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo.IndexOptions; -import org.apache.lucene.index.IndexableField; -import org.apache.lucene.index.IndexableFieldType; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchIllegalArgumentException; import org.elasticsearch.ElasticsearchIllegalStateException; @@ -50,15 +46,12 @@ import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider; import org.elasticsearch.index.fielddata.FieldDataType; import org.elasticsearch.index.mapper.*; -import org.elasticsearch.index.mapper.core.AbstractFieldMapper; -import org.elasticsearch.index.mapper.core.DoubleFieldMapper; -import org.elasticsearch.index.mapper.core.NumberFieldMapper; -import org.elasticsearch.index.mapper.core.StringFieldMapper; +import org.elasticsearch.index.mapper.core.*; +import org.elasticsearch.index.mapper.core.NumberFieldMapper.CustomNumericDocValuesField; import org.elasticsearch.index.mapper.object.ArrayValueMapperParser; import org.elasticsearch.index.similarity.SimilarityProvider; import java.io.IOException; -import java.io.Reader; import java.util.Iterator; import java.util.List; import java.util.Locale; @@ -721,57 +714,6 @@ protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, } } } - - private static abstract class CustomNumericDocValuesField implements IndexableField { - - public static final FieldType TYPE = new FieldType(); - static { - TYPE.setDocValueType(FieldInfo.DocValuesType.BINARY); - TYPE.freeze(); - } - - private final String name; - - public CustomNumericDocValuesField(String name) { - this.name = name; - } - - @Override - public String name() { - return name; - } - - @Override - public IndexableFieldType fieldType() { - return TYPE; - } - - @Override - public float boost() { - return 1f; - } - - @Override - public String stringValue() { - return null; - } - - @Override - public Reader readerValue() { - return null; - } - - @Override - public Number numericValue() { - return null; - } - - @Override - public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException { - return null; - } - - } public static class CustomGeoPointDocValuesField extends CustomNumericDocValuesField { From e58f9d5173940e06971d2c2a4d50445c78992193 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Wed, 23 Jul 2014 14:54:07 -0400 Subject: [PATCH 3/3] syncup to comparator api --- .../fielddata/plain/SortedNumericDVIndexFieldData.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/SortedNumericDVIndexFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/SortedNumericDVIndexFieldData.java index 631f69ad25e71..12230c2cbcbf0 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/plain/SortedNumericDVIndexFieldData.java +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/SortedNumericDVIndexFieldData.java @@ -25,6 +25,7 @@ import org.elasticsearch.ElasticsearchIllegalStateException; import org.elasticsearch.index.Index; import org.elasticsearch.index.fielddata.*; +import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested; import org.elasticsearch.index.fielddata.fieldcomparator.DoubleValuesComparatorSource; import org.elasticsearch.index.fielddata.fieldcomparator.FloatValuesComparatorSource; import org.elasticsearch.index.fielddata.fieldcomparator.LongValuesComparatorSource; @@ -47,15 +48,15 @@ public SortedNumericDVIndexFieldData(Index index, Names fieldNames, NumericType } @Override - public org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource comparatorSource(Object missingValue, MultiValueMode sortMode) { + public org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource comparatorSource(Object missingValue, MultiValueMode sortMode, Nested nested) { switch (numericType) { case FLOAT: - return new FloatValuesComparatorSource(this, missingValue, sortMode); + return new FloatValuesComparatorSource(this, missingValue, sortMode, nested); case DOUBLE: - return new DoubleValuesComparatorSource(this, missingValue, sortMode); + return new DoubleValuesComparatorSource(this, missingValue, sortMode, nested); default: assert !numericType.isFloatingPoint(); - return new LongValuesComparatorSource(this, missingValue, sortMode); + return new LongValuesComparatorSource(this, missingValue, sortMode, nested); } }