Skip to content

Commit

Permalink
Make a builder for _tsid (#83799)
Browse files Browse the repository at this point in the history
`_tsid` is built by getting a sorted list of encoded dimensions. This
creates a `TimeSeriesIdBuilder` that abstracts that behind sensibly
named methods.
  • Loading branch information
nik9000 committed Feb 14, 2022
1 parent 8906af2 commit da6a1e4
Show file tree
Hide file tree
Showing 15 changed files with 223 additions and 179 deletions.
13 changes: 13 additions & 0 deletions server/src/main/java/org/elasticsearch/index/IndexMode.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.mapper.DataStreamTimestampFieldMapper;
import org.elasticsearch.index.mapper.DateFieldMapper;
import org.elasticsearch.index.mapper.DocumentDimensions;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.MappingLookup;
import org.elasticsearch.index.mapper.MetadataFieldMapper;
Expand Down Expand Up @@ -84,6 +85,11 @@ public MetadataFieldMapper buildTimeSeriesIdFieldMapper() {
// non time-series indices must not have a TimeSeriesIdFieldMapper
return null;
}

@Override
public DocumentDimensions buildDocumentDimensions() {
return new DocumentDimensions.OnlySingleValueAllowed();
}
},
TIME_SERIES("time_series") {
@Override
Expand Down Expand Up @@ -149,6 +155,11 @@ private String routingRequiredBad() {
public MetadataFieldMapper buildTimeSeriesIdFieldMapper() {
return TimeSeriesIdFieldMapper.INSTANCE;
}

@Override
public DocumentDimensions buildDocumentDimensions() {
return new TimeSeriesIdFieldMapper.TimeSeriesIdBuilder();
}
};

protected String tsdbMode() {
Expand Down Expand Up @@ -241,6 +252,8 @@ public String getName() {
*/
public abstract MetadataFieldMapper buildTimeSeriesIdFieldMapper();

public abstract DocumentDimensions buildDocumentDimensions();

public static IndexMode fromString(String value) {
return switch (value) {
case "standard" -> IndexMode.STANDARD;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.index.mapper;

import java.net.InetAddress;
import java.util.HashSet;
import java.util.Set;

/**
* Collects dimensions from documents.
*/
public interface DocumentDimensions {
void addString(String fieldName, String value);

void addIp(String fieldName, InetAddress value);

void addLong(String fieldName, long value);

void addUnsignedLong(String fieldName, long value);

/**
* Makes sure that each dimension only appears on time.
*/
class OnlySingleValueAllowed implements DocumentDimensions {
private final Set<String> names = new HashSet<>();

@Override
public void addString(String fieldName, String value) {
add(fieldName);
}

@Override
public void addIp(String fieldName, InetAddress value) {
add(fieldName);
}

@Override
public void addLong(String fieldName, long value) {
add(fieldName);
}

@Override
public void addUnsignedLong(String fieldName, long value) {
add(fieldName);
}

private void add(String fieldName) {
boolean isNew = names.add(fieldName);
if (false == isNew) {
throw new IllegalArgumentException("Dimension field [" + fieldName + "] cannot be a multi-valued field.");
}
}
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ protected void addDoc(LuceneDocument doc) {
private final Set<String> newFieldsSeen;
private final Map<String, ObjectMapper> dynamicObjectMappers;
private final List<RuntimeField> dynamicRuntimeFields;
private final DocumentDimensions dimensions;
private Field version;
private SeqNoFieldMapper.SequenceIDFields seqID;

Expand All @@ -105,6 +106,7 @@ private DocumentParserContext(DocumentParserContext in) {
this.dynamicRuntimeFields = in.dynamicRuntimeFields;
this.version = in.version;
this.seqID = in.seqID;
this.dimensions = in.dimensions;
}

protected DocumentParserContext(
Expand All @@ -124,6 +126,7 @@ protected DocumentParserContext(
this.newFieldsSeen = new HashSet<>();
this.dynamicObjectMappers = new HashMap<>();
this.dynamicRuntimeFields = new ArrayList<>();
this.dimensions = indexSettings.getMode().buildDocumentDimensions();
}

public final IndexSettings indexSettings() {
Expand Down Expand Up @@ -334,6 +337,13 @@ public XContentParser parser() {
};
}

/**
* The collection of dimensions for this document.
*/
public DocumentDimensions getDimensions() {
return dimensions;
}

public abstract ContentPath path();

public abstract XContentParser parser();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,9 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.logging.DeprecationCategory;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.network.InetAddresses;
import org.elasticsearch.common.network.NetworkAddress;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.fielddata.IndexFieldData;
Expand Down Expand Up @@ -492,12 +490,7 @@ private static InetAddress value(XContentParser parser, InetAddress nullValue) t

private void indexValue(DocumentParserContext context, InetAddress address) {
if (dimension) {
// Encode the tsid part of the dimension field if the _tsid field is enabled.
// If the _tsid field is not enabled, we can skip the encoding part.
BytesReference bytes = context.getMetadataMapper(TimeSeriesIdFieldMapper.NAME) != null
? TimeSeriesIdFieldMapper.encodeTsidValue(NetworkAddress.format(address))
: null;
context.doc().addDimensionBytes(fieldType().name(), bytes);
context.getDimensions().addIp(fieldType().name(), address);
}
if (indexed) {
Field field = new InetAddressPoint(fieldType().name(), address);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
import org.apache.lucene.util.automaton.CompiledAutomaton.AUTOMATON_TYPE;
import org.apache.lucene.util.automaton.MinimizationOperations;
import org.apache.lucene.util.automaton.Operations;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.AutomatonQueries;
Expand Down Expand Up @@ -901,12 +900,7 @@ private void indexValue(DocumentParserContext context, String value) {

value = normalizeValue(fieldType().normalizer(), name(), value);
if (dimension) {
// Encode the tsid part of the dimension field. Although, it would seem reasonable
// to skip the encode part if we don't generate a _tsid field (as we do with number
// and ip fields), we keep this test because we must ensure that the value of this
// dimension field is not larger than TimeSeriesIdFieldMapper.DIMENSION_VALUE_LIMIT
BytesReference bytes = TimeSeriesIdFieldMapper.encodeTsidValue(value);
context.doc().addDimensionBytes(fieldType().name(), bytes);
context.getDimensions().addString(fieldType().name(), value);
}

// convert to utf8 only once before feeding postings/dv/stored fields
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,12 @@

import org.apache.lucene.index.IndexableField;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.bytes.BytesReference;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;

/**
* Fork of {@link org.apache.lucene.document.Document} with additional functionality.
Expand All @@ -31,12 +27,6 @@ public class LuceneDocument implements Iterable<IndexableField> {
private final String prefix;
private final List<IndexableField> fields;
private Map<Object, IndexableField> keyedFields;
/**
* A sorted map of the serialized values of dimension fields that will be used
* for generating the _tsid field. The map will be used by {@link TimeSeriesIdFieldMapper}
* to build the _tsid field for the document.
*/
private SortedMap<BytesRef, BytesReference> dimensionBytes;

LuceneDocument(String path, LuceneDocument parent) {
fields = new ArrayList<>();
Expand Down Expand Up @@ -109,28 +99,6 @@ public IndexableField getByKey(Object key) {
return keyedFields == null ? null : keyedFields.get(key);
}

/**
* Add the serialized byte reference for a dimension field. This will be used by {@link TimeSeriesIdFieldMapper}
* to build the _tsid field for the document.
*/
public void addDimensionBytes(String fieldName, BytesReference tsidBytes) {
BytesRef fieldNameBytes = new BytesRef(fieldName);
if (dimensionBytes == null) {
// It is a {@link TreeMap} so that it is order by field name.
dimensionBytes = new TreeMap<>();
} else if (dimensionBytes.containsKey(fieldNameBytes)) {
throw new IllegalArgumentException("Dimension field [" + fieldName + "] cannot be a multi-valued field.");
}
dimensionBytes.put(fieldNameBytes, tsidBytes);
}

public SortedMap<BytesRef, BytesReference> getDimensionBytes() {
if (dimensionBytes == null) {
return Collections.emptySortedMap();
}
return dimensionBytes;
}

public IndexableField[] getFields(String name) {
List<IndexableField> f = new ArrayList<>();
for (IndexableField field : fields) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.Numbers;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Setting.Property;
Expand Down Expand Up @@ -1476,14 +1475,7 @@ private static Number value(XContentParser parser, NumberType numberType, Number

private void indexValue(DocumentParserContext context, Number numericValue) {
if (dimension && numericValue != null) {
// Dimension can only be one of byte, short, int, long. So, we encode the tsid
// part of the dimension field by using the long value.
// Also, there is no point in encoding the tsid value if we do not generate
// the _tsid field.
BytesReference bytes = context.getMetadataMapper(TimeSeriesIdFieldMapper.NAME) != null
? TimeSeriesIdFieldMapper.encodeTsidValue(numericValue.longValue())
: null;
context.doc().addDimensionBytes(fieldType().name(), bytes);
context.getDimensions().addLong(fieldType().name(), numericValue.longValue());
}
List<Field> fields = fieldType().type.createFields(fieldType().name(), numericValue, indexed, hasDocValues, stored);
context.doc().addAll(fields);
Expand Down

0 comments on commit da6a1e4

Please sign in to comment.