Make a builder for _tsid (#83799)

`_tsid` is built by getting a sorted list of encoded dimensions. This creates a `TimeSeriesIdBuilder` that abstracts that behind sensibly named methods.
elastic · Feb 14, 2022 · da6a1e4 · da6a1e4
1 parent 8906af2
commit da6a1e4
Show file tree

Hide file tree

Showing 15 changed files with 223 additions and 179 deletions.
diff --git a/server/src/main/java/org/elasticsearch/index/IndexMode.java b/server/src/main/java/org/elasticsearch/index/IndexMode.java
@@ -17,6 +17,7 @@
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.index.mapper.DataStreamTimestampFieldMapper;
 import org.elasticsearch.index.mapper.DateFieldMapper;
+import org.elasticsearch.index.mapper.DocumentDimensions;
 import org.elasticsearch.index.mapper.MapperService;
 import org.elasticsearch.index.mapper.MappingLookup;
 import org.elasticsearch.index.mapper.MetadataFieldMapper;
@@ -84,6 +85,11 @@ public MetadataFieldMapper buildTimeSeriesIdFieldMapper() {
             // non time-series indices must not have a TimeSeriesIdFieldMapper
             return null;
         }
+
+        @Override
+        public DocumentDimensions buildDocumentDimensions() {
+            return new DocumentDimensions.OnlySingleValueAllowed();
+        }
     },
     TIME_SERIES("time_series") {
         @Override
@@ -149,6 +155,11 @@ private String routingRequiredBad() {
         public MetadataFieldMapper buildTimeSeriesIdFieldMapper() {
             return TimeSeriesIdFieldMapper.INSTANCE;
         }
+
+        @Override
+        public DocumentDimensions buildDocumentDimensions() {
+            return new TimeSeriesIdFieldMapper.TimeSeriesIdBuilder();
+        }
     };
 
     protected String tsdbMode() {
@@ -241,6 +252,8 @@ public String getName() {
      */
     public abstract MetadataFieldMapper buildTimeSeriesIdFieldMapper();
 
+    public abstract DocumentDimensions buildDocumentDimensions();
+
     public static IndexMode fromString(String value) {
         return switch (value) {
             case "standard" -> IndexMode.STANDARD;

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentDimensions.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentDimensions.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.index.mapper;
+
+import java.net.InetAddress;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * Collects dimensions from documents.
+ */
+public interface DocumentDimensions {
+    void addString(String fieldName, String value);
+
+    void addIp(String fieldName, InetAddress value);
+
+    void addLong(String fieldName, long value);
+
+    void addUnsignedLong(String fieldName, long value);
+
+    /**
+     * Makes sure that each dimension only appears on time.
+     */
+    class OnlySingleValueAllowed implements DocumentDimensions {
+        private final Set<String> names = new HashSet<>();
+
+        @Override
+        public void addString(String fieldName, String value) {
+            add(fieldName);
+        }
+
+        @Override
+        public void addIp(String fieldName, InetAddress value) {
+            add(fieldName);
+        }
+
+        @Override
+        public void addLong(String fieldName, long value) {
+            add(fieldName);
+        }
+
+        @Override
+        public void addUnsignedLong(String fieldName, long value) {
+            add(fieldName);
+        }
+
+        private void add(String fieldName) {
+            boolean isNew = names.add(fieldName);
+            if (false == isNew) {
+                throw new IllegalArgumentException("Dimension field [" + fieldName + "] cannot be a multi-valued field.");
+            }
+        }
+    };
+}
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java
@@ -89,6 +89,7 @@ protected void addDoc(LuceneDocument doc) {
     private final Set<String> newFieldsSeen;
     private final Map<String, ObjectMapper> dynamicObjectMappers;
     private final List<RuntimeField> dynamicRuntimeFields;
+    private final DocumentDimensions dimensions;
     private Field version;
     private SeqNoFieldMapper.SequenceIDFields seqID;
 
@@ -105,6 +106,7 @@ private DocumentParserContext(DocumentParserContext in) {
         this.dynamicRuntimeFields = in.dynamicRuntimeFields;
         this.version = in.version;
         this.seqID = in.seqID;
+        this.dimensions = in.dimensions;
     }
 
     protected DocumentParserContext(
@@ -124,6 +126,7 @@ protected DocumentParserContext(
         this.newFieldsSeen = new HashSet<>();
         this.dynamicObjectMappers = new HashMap<>();
         this.dynamicRuntimeFields = new ArrayList<>();
+        this.dimensions = indexSettings.getMode().buildDocumentDimensions();
     }
 
     public final IndexSettings indexSettings() {
@@ -334,6 +337,13 @@ public XContentParser parser() {
         };
     }
 
+    /**
+     * The collection of dimensions for this document.
+     */
+    public DocumentDimensions getDimensions() {
+        return dimensions;
+    }
+
     public abstract ContentPath path();
 
     public abstract XContentParser parser();

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java
@@ -18,11 +18,9 @@
 import org.apache.lucene.search.Query;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.Version;
-import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.logging.DeprecationCategory;
 import org.elasticsearch.common.logging.DeprecationLogger;
 import org.elasticsearch.common.network.InetAddresses;
-import org.elasticsearch.common.network.NetworkAddress;
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.core.Tuple;
 import org.elasticsearch.index.fielddata.IndexFieldData;
@@ -492,12 +490,7 @@ private static InetAddress value(XContentParser parser, InetAddress nullValue) t
 
     private void indexValue(DocumentParserContext context, InetAddress address) {
         if (dimension) {
-            // Encode the tsid part of the dimension field if the _tsid field is enabled.
-            // If the _tsid field is not enabled, we can skip the encoding part.
-            BytesReference bytes = context.getMetadataMapper(TimeSeriesIdFieldMapper.NAME) != null
-                ? TimeSeriesIdFieldMapper.encodeTsidValue(NetworkAddress.format(address))
-                : null;
-            context.doc().addDimensionBytes(fieldType().name(), bytes);
+            context.getDimensions().addIp(fieldType().name(), address);
         }
         if (indexed) {
             Field field = new InetAddressPoint(fieldType().name(), address);

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
@@ -32,7 +32,6 @@
 import org.apache.lucene.util.automaton.CompiledAutomaton.AUTOMATON_TYPE;
 import org.apache.lucene.util.automaton.MinimizationOperations;
 import org.apache.lucene.util.automaton.Operations;
-import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.lucene.BytesRefs;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.lucene.search.AutomatonQueries;
@@ -901,12 +900,7 @@ private void indexValue(DocumentParserContext context, String value) {
 
         value = normalizeValue(fieldType().normalizer(), name(), value);
         if (dimension) {
-            // Encode the tsid part of the dimension field. Although, it would seem reasonable
-            // to skip the encode part if we don't generate a _tsid field (as we do with number
-            // and ip fields), we keep this test because we must ensure that the value of this
-            // dimension field is not larger than TimeSeriesIdFieldMapper.DIMENSION_VALUE_LIMIT
-            BytesReference bytes = TimeSeriesIdFieldMapper.encodeTsidValue(value);
-            context.doc().addDimensionBytes(fieldType().name(), bytes);
+            context.getDimensions().addString(fieldType().name(), value);
         }
 
         // convert to utf8 only once before feeding postings/dv/stored fields

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/LuceneDocument.java b/server/src/main/java/org/elasticsearch/index/mapper/LuceneDocument.java
@@ -10,16 +10,12 @@
 
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.util.BytesRef;
-import org.elasticsearch.common.bytes.BytesReference;
 
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.SortedMap;
-import java.util.TreeMap;
 
 /**
  * Fork of {@link org.apache.lucene.document.Document} with additional functionality.
@@ -31,12 +27,6 @@ public class LuceneDocument implements Iterable<IndexableField> {
     private final String prefix;
     private final List<IndexableField> fields;
     private Map<Object, IndexableField> keyedFields;
-    /**
-     * A sorted map of the serialized values of dimension fields that will be used
-     * for generating the _tsid field. The map will be used by {@link TimeSeriesIdFieldMapper}
-     * to build the _tsid field for the document.
-     */
-    private SortedMap<BytesRef, BytesReference> dimensionBytes;
 
     LuceneDocument(String path, LuceneDocument parent) {
         fields = new ArrayList<>();
@@ -109,28 +99,6 @@ public IndexableField getByKey(Object key) {
         return keyedFields == null ? null : keyedFields.get(key);
     }
 
-    /**
-     * Add the serialized byte reference for a dimension field. This will be used by {@link TimeSeriesIdFieldMapper}
-     * to build the _tsid field for the document.
-     */
-    public void addDimensionBytes(String fieldName, BytesReference tsidBytes) {
-        BytesRef fieldNameBytes = new BytesRef(fieldName);
-        if (dimensionBytes == null) {
-            // It is a {@link TreeMap} so that it is order by field name.
-            dimensionBytes = new TreeMap<>();
-        } else if (dimensionBytes.containsKey(fieldNameBytes)) {
-            throw new IllegalArgumentException("Dimension field [" + fieldName + "] cannot be a multi-valued field.");
-        }
-        dimensionBytes.put(fieldNameBytes, tsidBytes);
-    }
-
-    public SortedMap<BytesRef, BytesReference> getDimensionBytes() {
-        if (dimensionBytes == null) {
-            return Collections.emptySortedMap();
-        }
-        return dimensionBytes;
-    }
-
     public IndexableField[] getFields(String name) {
         List<IndexableField> f = new ArrayList<>();
         for (IndexableField field : fields) {

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java
@@ -28,7 +28,6 @@
 import org.apache.lucene.util.NumericUtils;
 import org.elasticsearch.common.Explicit;
 import org.elasticsearch.common.Numbers;
-import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.lucene.search.Queries;
 import org.elasticsearch.common.settings.Setting;
 import org.elasticsearch.common.settings.Setting.Property;
@@ -1476,14 +1475,7 @@ private static Number value(XContentParser parser, NumberType numberType, Number
 
     private void indexValue(DocumentParserContext context, Number numericValue) {
         if (dimension && numericValue != null) {
-            // Dimension can only be one of byte, short, int, long. So, we encode the tsid
-            // part of the dimension field by using the long value.
-            // Also, there is no point in encoding the tsid value if we do not generate
-            // the _tsid field.
-            BytesReference bytes = context.getMetadataMapper(TimeSeriesIdFieldMapper.NAME) != null
-                ? TimeSeriesIdFieldMapper.encodeTsidValue(numericValue.longValue())
-                : null;
-            context.doc().addDimensionBytes(fieldType().name(), bytes);
+            context.getDimensions().addLong(fieldType().name(), numericValue.longValue());
         }
         List<Field> fields = fieldType().type.createFields(fieldType().name(), numericValue, indexed, hasDocValues, stored);
         context.doc().addAll(fields);