Add XContentFieldFilter (#83152)

This commit introduces XContentFieldFilter, which applies field includes/excludes to XContent without having to realise the xcontent itself as a java map. SourceFieldMapper and ShardGetService are cut over to use this class.
elastic · Feb 2, 2022 · 919ed3a · 919ed3a
1 parent 63ce3af
commit 919ed3a
Show file tree

Hide file tree

Showing 5 changed files with 685 additions and 34 deletions.
diff --git a/server/src/main/java/org/elasticsearch/common/xcontent/XContentFieldFilter.java b/server/src/main/java/org/elasticsearch/common/xcontent/XContentFieldFilter.java
@@ -0,0 +1,101 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.common.xcontent;
+
+import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.io.stream.BytesStreamOutput;
+import org.elasticsearch.common.util.CollectionUtils;
+import org.elasticsearch.common.xcontent.support.XContentMapValues;
+import org.elasticsearch.core.CheckedFunction;
+import org.elasticsearch.core.Nullable;
+import org.elasticsearch.core.Tuple;
+import org.elasticsearch.xcontent.XContentBuilder;
+import org.elasticsearch.xcontent.XContentFactory;
+import org.elasticsearch.xcontent.XContentParser;
+import org.elasticsearch.xcontent.XContentParserConfiguration;
+import org.elasticsearch.xcontent.XContentType;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Set;
+import java.util.function.Function;
+
+/**
+ * A filter that filter fields away from source
+ */
+public interface XContentFieldFilter {
+    /**
+     * filter source in {@link BytesReference} format and in {@link XContentType} content type
+     * note that xContentType may be null in some case, we should guess xContentType from sourceBytes in such cases
+     */
+    BytesReference apply(BytesReference sourceBytes, @Nullable XContentType xContentType) throws IOException;
+
+    /**
+     * Construct {@link XContentFieldFilter} using given includes and excludes
+     *
+     * @param includes fields to keep, wildcard supported
+     * @param excludes fields to remove, wildcard supported
+     * @return filter that filter {@link org.elasticsearch.xcontent.XContent} with given includes and excludes
+     */
+    static XContentFieldFilter newFieldFilter(String[] includes, String[] excludes) {
+        final CheckedFunction<XContentType, BytesReference, IOException> emptyValueSupplier = xContentType -> {
+            BytesStreamOutput bStream = new BytesStreamOutput();
+            XContentBuilder builder = XContentFactory.contentBuilder(xContentType, bStream).map(Collections.emptyMap());
+            builder.close();
+            return bStream.bytes();
+        };
+        // Use the old map-based filtering mechanism if there are wildcards in the excludes.
+        // TODO: Remove this if block once: https://github.com/elastic/elasticsearch/pull/80160 is merged
+        if ((CollectionUtils.isEmpty(excludes) == false) && Arrays.stream(excludes).filter(field -> field.contains("*")).count() > 0) {
+            return (originalSource, contentType) -> {
+                if (originalSource == null || originalSource.length() <= 0) {
+                    if (contentType == null) {
+                        throw new IllegalStateException("originalSource and contentType can not be null at the same time");
+                    }
+                    return emptyValueSupplier.apply(contentType);
+                }
+                Function<Map<String, ?>, Map<String, Object>> mapFilter = XContentMapValues.filter(includes, excludes);
+                Tuple<XContentType, Map<String, Object>> mapTuple = XContentHelper.convertToMap(originalSource, true, contentType);
+                Map<String, Object> filteredSource = mapFilter.apply(mapTuple.v2());
+                BytesStreamOutput bStream = new BytesStreamOutput();
+                XContentType actualContentType = mapTuple.v1();
+                XContentBuilder builder = XContentFactory.contentBuilder(actualContentType, bStream).map(filteredSource);
+                builder.close();
+                return bStream.bytes();
+            };
+        } else {
+            final XContentParserConfiguration parserConfig = XContentParserConfiguration.EMPTY.withFiltering(
+                Set.of(includes),
+                Set.of(excludes),
+                true
+            );
+            return (originalSource, contentType) -> {
+                if (originalSource == null || originalSource.length() <= 0) {
+                    if (contentType == null) {
+                        throw new IllegalStateException("originalSource and contentType can not be null at the same time");
+                    }
+                    return emptyValueSupplier.apply(contentType);
+                }
+                if (contentType == null) {
+                    contentType = XContentHelper.xContentTypeMayCompressed(originalSource);
+                }
+                BytesStreamOutput streamOutput = new BytesStreamOutput(Math.min(1024, originalSource.length()));
+                XContentBuilder builder = new XContentBuilder(contentType.xContent(), streamOutput);
+                XContentParser parser = contentType.xContent().createParser(parserConfig, originalSource.streamInput());
+                if ((parser.currentToken() == null) && (parser.nextToken() == null)) {
+                    return emptyValueSupplier.apply(contentType);
+                }
+                builder.copyCurrentStructure(parser);
+                return BytesReference.bytes(builder);
+            };
+        }
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java b/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java
@@ -524,6 +524,32 @@ public static BytesReference toXContent(ToXContent toXContent, XContentType xCon
         }
     }
 
+    /**
+     * Guesses the content type based on the provided bytes which may be compressed.
+     *
+     * @deprecated the content type should not be guessed except for few cases where we effectively don't know the content type.
+     * The REST layer should move to reading the Content-Type header instead. There are other places where auto-detection may be needed.
+     * This method is deprecated to prevent usages of it from spreading further without specific reasons.
+     */
+    @Deprecated
+    public static XContentType xContentTypeMayCompressed(BytesReference bytes) {
+        Compressor compressor = CompressorFactory.compressor(bytes);
+        if (compressor != null) {
+            try {
+                InputStream compressedStreamInput = compressor.threadLocalInputStream(bytes.streamInput());
+                if (compressedStreamInput.markSupported() == false) {
+                    compressedStreamInput = new BufferedInputStream(compressedStreamInput);
+                }
+                return XContentFactory.xContentType(compressedStreamInput);
+            } catch (IOException e) {
+                assert false : "Should not happen, we're just reading bytes from memory";
+                throw new UncheckedIOException(e);
+            }
+        } else {
+            return XContentHelper.xContentType(bytes);
+        }
+    }
+
     /**
      * Guesses the content type based on the provided bytes.
      *

diff --git a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java
@@ -16,10 +16,8 @@
 import org.elasticsearch.common.metrics.CounterMetric;
 import org.elasticsearch.common.metrics.MeanMetric;
 import org.elasticsearch.common.util.set.Sets;
-import org.elasticsearch.common.xcontent.XContentHelper;
-import org.elasticsearch.common.xcontent.support.XContentMapValues;
+import org.elasticsearch.common.xcontent.XContentFieldFilter;
 import org.elasticsearch.core.Nullable;
-import org.elasticsearch.core.Tuple;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.VersionType;
 import org.elasticsearch.index.engine.Engine;
@@ -33,8 +31,6 @@
 import org.elasticsearch.index.shard.AbstractIndexShardComponent;
 import org.elasticsearch.index.shard.IndexShard;
 import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
-import org.elasticsearch.xcontent.XContentFactory;
-import org.elasticsearch.xcontent.XContentType;
 
 import java.io.IOException;
 import java.util.HashMap;
@@ -253,15 +249,11 @@ private GetResult innerGetLoadFromStoredFields(
             if (fetchSourceContext.fetchSource() == false) {
                 source = null;
             } else if (fetchSourceContext.includes().length > 0 || fetchSourceContext.excludes().length > 0) {
-                Map<String, Object> sourceAsMap;
                 // TODO: The source might be parsed and available in the sourceLookup but that one uses unordered maps so different.
                 // Do we care?
-                Tuple<XContentType, Map<String, Object>> typeMapTuple = XContentHelper.convertToMap(source, true);
-                XContentType sourceContentType = typeMapTuple.v1();
-                sourceAsMap = typeMapTuple.v2();
-                sourceAsMap = XContentMapValues.filter(sourceAsMap, fetchSourceContext.includes(), fetchSourceContext.excludes());
                 try {
-                    source = BytesReference.bytes(XContentFactory.contentBuilder(sourceContentType).map(sourceAsMap));
+                    source = XContentFieldFilter.newFieldFilter(fetchSourceContext.includes(), fetchSourceContext.excludes())
+                        .apply(source, null);
                 } catch (IOException e) {
                     throw new ElasticsearchException("Failed to get id [" + id + "] with includes/excludes set", e);
                 }

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java
@@ -16,32 +16,24 @@
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.bytes.BytesReference;
-import org.elasticsearch.common.io.stream.BytesStreamOutput;
 import org.elasticsearch.common.util.CollectionUtils;
-import org.elasticsearch.common.xcontent.XContentHelper;
-import org.elasticsearch.common.xcontent.support.XContentMapValues;
+import org.elasticsearch.common.xcontent.XContentFieldFilter;
 import org.elasticsearch.core.Nullable;
-import org.elasticsearch.core.Tuple;
 import org.elasticsearch.index.query.QueryShardException;
 import org.elasticsearch.index.query.SearchExecutionContext;
-import org.elasticsearch.xcontent.XContentBuilder;
-import org.elasticsearch.xcontent.XContentFactory;
 import org.elasticsearch.xcontent.XContentType;
 
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
-import java.util.Map;
-import java.util.function.Function;
 
 public class SourceFieldMapper extends MetadataFieldMapper {
-
     public static final String NAME = "_source";
     public static final String RECOVERY_SOURCE_NAME = "_recovery_source";
 
     public static final String CONTENT_TYPE = "_source";
-    private final Function<Map<String, ?>, Map<String, Object>> filter;
+    private final XContentFieldFilter filter;
 
     private static final SourceFieldMapper DEFAULT = new SourceFieldMapper(Defaults.ENABLED, Strings.EMPTY_ARRAY, Strings.EMPTY_ARRAY);
 
@@ -145,7 +137,9 @@ private SourceFieldMapper(boolean enabled, String[] includes, String[] excludes)
         this.includes = includes;
         this.excludes = excludes;
         final boolean filtered = CollectionUtils.isEmpty(includes) == false || CollectionUtils.isEmpty(excludes) == false;
-        this.filter = enabled && filtered ? XContentMapValues.filter(includes, excludes) : null;
+        this.filter = enabled && filtered
+            ? XContentFieldFilter.newFieldFilter(includes, excludes)
+            : (sourceBytes, contentType) -> sourceBytes;
         this.complete = enabled && CollectionUtils.isEmpty(includes) && CollectionUtils.isEmpty(excludes);
     }
 
@@ -180,18 +174,7 @@ public void preParse(DocumentParserContext context) throws IOException {
     public BytesReference applyFilters(@Nullable BytesReference originalSource, @Nullable XContentType contentType) throws IOException {
         if (enabled && originalSource != null) {
             // Percolate and tv APIs may not set the source and that is ok, because these APIs will not index any data
-            if (filter != null) {
-                // we don't update the context source if we filter, we want to keep it as is...
-                Tuple<XContentType, Map<String, Object>> mapTuple = XContentHelper.convertToMap(originalSource, true, contentType);
-                Map<String, Object> filteredSource = filter.apply(mapTuple.v2());
-                BytesStreamOutput bStream = new BytesStreamOutput();
-                XContentType actualContentType = mapTuple.v1();
-                XContentBuilder builder = XContentFactory.contentBuilder(actualContentType, bStream).map(filteredSource);
-                builder.close();
-                return bStream.bytes();
-            } else {
-                return originalSource;
-            }
+            return filter.apply(originalSource, contentType);
         } else {
             return null;
         }