diff --git a/docs/changelog/138299.yaml b/docs/changelog/138299.yaml new file mode 100644 index 0000000000000..8b2dad06c8eca --- /dev/null +++ b/docs/changelog/138299.yaml @@ -0,0 +1,5 @@ +pr: 138299 +summary: Add `PerFieldStoredFieldsFormat` to allow multiple stored field formats +area: Codec +type: enhancement +issues: [] diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index 9c5d11e1cf9e1..33d512b046a99 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -245,6 +245,7 @@ exports org.elasticsearch.index.codec; exports org.elasticsearch.index.codec.tsdb; exports org.elasticsearch.index.codec.bloomfilter; + exports org.elasticsearch.index.codec.storedfields; exports org.elasticsearch.index.codec.zstd; exports org.elasticsearch.index.engine; exports org.elasticsearch.index.fielddata; @@ -468,6 +469,13 @@ org.elasticsearch.index.codec.vectors.es93.ES93BinaryQuantizedVectorsFormat, org.elasticsearch.index.codec.vectors.es93.ES93HnswVectorsFormat, org.elasticsearch.index.codec.vectors.es93.ES93HnswBinaryQuantizedVectorsFormat; + provides org.elasticsearch.index.codec.storedfields.ESStoredFieldsFormat + with + org.elasticsearch.index.codec.storedfields.ESZstd814StoredFieldsFormat, + org.elasticsearch.index.codec.storedfields.ESLucene90StoredFieldsFormat, + org.elasticsearch.index.codec.bloomfilter.ES93BloomFilterStoredFieldsFormat; + + uses org.elasticsearch.index.codec.storedfields.ESStoredFieldsFormat; provides org.apache.lucene.codecs.Codec with diff --git a/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java index 9cbbb9879523a..f547d6cd4b7c1 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormat.java @@ -11,7 +11,6 @@ import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.FieldsProducer; -import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.StoredFieldsWriter; import org.apache.lucene.index.CorruptIndexException; @@ -40,12 +39,14 @@ import org.elasticsearch.common.util.ByteArray; import org.elasticsearch.core.IOUtils; import org.elasticsearch.core.Nullable; +import org.elasticsearch.index.codec.storedfields.ESStoredFieldsFormat; import java.io.Closeable; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Objects; +import java.util.Set; import java.util.function.IntSupplier; import static org.elasticsearch.index.codec.bloomfilter.BloomFilterHashFunctions.MurmurHash3.hash64; @@ -72,10 +73,14 @@ * be found in {@link BloomFilterMetadata}. * */ -public class ES93BloomFilterStoredFieldsFormat extends StoredFieldsFormat { +public class ES93BloomFilterStoredFieldsFormat extends ESStoredFieldsFormat { public static final String STORED_FIELDS_BLOOM_FILTER_FORMAT_NAME = "ES93BloomFilterStoredFieldsFormat"; public static final String STORED_FIELDS_BLOOM_FILTER_EXTENSION = "sfbf"; public static final String STORED_FIELDS_METADATA_BLOOM_FILTER_EXTENSION = "sfbfm"; + private static final Set FILE_EXTENSIONS = Set.of( + STORED_FIELDS_METADATA_BLOOM_FILTER_EXTENSION, + STORED_FIELDS_BLOOM_FILTER_FORMAT_NAME + ); private static final int VERSION_START = 0; private static final int VERSION_CURRENT = VERSION_START; @@ -85,24 +90,26 @@ public class ES93BloomFilterStoredFieldsFormat extends StoredFieldsFormat { private static final byte BLOOM_FILTER_STORED = 1; private static final byte BLOOM_FILTER_NOT_STORED = 0; private static final ByteSizeValue MAX_BLOOM_FILTER_SIZE = ByteSizeValue.ofMb(8); + private static final String DEFAULT_SEGMENT_SUFFIX = ""; + public static final ByteSizeValue DEFAULT_BLOOM_FILTER_SIZE = ByteSizeValue.ofKb(2); private final BigArrays bigArrays; - private final String segmentSuffix; - private final StoredFieldsFormat delegate; private final String bloomFilterFieldName; private final int numHashFunctions; private final int bloomFilterSizeInBits; - public ES93BloomFilterStoredFieldsFormat( - BigArrays bigArrays, - String segmentSuffix, - StoredFieldsFormat delegate, - ByteSizeValue bloomFilterSize, - String bloomFilterFieldName - ) { + // Public constructor SPI use for reads only + public ES93BloomFilterStoredFieldsFormat() { + super(STORED_FIELDS_BLOOM_FILTER_FORMAT_NAME); + bigArrays = null; + bloomFilterFieldName = null; + numHashFunctions = 0; + bloomFilterSizeInBits = 0; + } + + public ES93BloomFilterStoredFieldsFormat(BigArrays bigArrays, ByteSizeValue bloomFilterSize, String bloomFilterFieldName) { + super(STORED_FIELDS_BLOOM_FILTER_FORMAT_NAME); this.bigArrays = bigArrays; - this.segmentSuffix = segmentSuffix; - this.delegate = delegate; this.bloomFilterFieldName = bloomFilterFieldName; this.numHashFunctions = DEFAULT_NUM_HASH_FUNCTIONS; @@ -115,29 +122,28 @@ public ES93BloomFilterStoredFieldsFormat( @Override public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException { - return new Reader(directory, si, fn, context, segmentSuffix, delegate.fieldsReader(directory, si, fn, context)); + return new Reader(directory, si, fn, context); } @Override public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException { + assert bigArrays != null; + assert bloomFilterFieldName != null; + assert numHashFunctions > 0; + assert bloomFilterSizeInBits > 0; // TODO: compute the bloom filter size based on heuristics and oversize factor - return new Writer( - directory, - si, - context, - segmentSuffix, - bigArrays, - numHashFunctions, - this::getBloomFilterSizeInBits, - bloomFilterFieldName, - delegate.fieldsWriter(directory, si, context) - ); + return new Writer(directory, si, context, bigArrays, numHashFunctions, this::getBloomFilterSizeInBits, bloomFilterFieldName); } int getBloomFilterSizeInBits() { return bloomFilterSizeInBits; } + @Override + protected Set getFileExtensions() { + return FILE_EXTENSIONS; + } + static int closestPowerOfTwoBloomFilterSizeInBits(ByteSizeValue bloomFilterSize) { var closestPowerOfTwoBloomFilterSizeInBytes = Long.highestOneBit(bloomFilterSize.getBytes()); if (closestPowerOfTwoBloomFilterSizeInBytes > MAX_BLOOM_FILTER_SIZE.getBytes()) { @@ -157,12 +163,10 @@ static class Writer extends StoredFieldsWriter { private final Directory directory; private final SegmentInfo segmentInfo; private final IOContext context; - private final String segmentSuffix; private final BigArrays bigArrays; private final IntSupplier defaultBloomFilterSizeInBitsSupplier; private final int numHashFunctions; private final String bloomFilterFieldName; - private final StoredFieldsWriter delegateWriter; private final List toClose = new ArrayList<>(); private final IndexOutput metadataOut; @@ -172,17 +176,14 @@ static class Writer extends StoredFieldsWriter { Directory directory, SegmentInfo segmentInfo, IOContext context, - String segmentSuffix, BigArrays bigArrays, int numHashFunctions, IntSupplier defaultBloomFilterSizeInBitsSupplier, - String bloomFilterFieldName, - StoredFieldsWriter delegateWriter + String bloomFilterFieldName ) throws IOException { this.directory = directory; this.segmentInfo = segmentInfo; this.context = context; - this.segmentSuffix = segmentSuffix; this.bigArrays = bigArrays; this.defaultBloomFilterSizeInBitsSupplier = defaultBloomFilterSizeInBitsSupplier; assert numHashFunctions <= PRIMES.length @@ -191,19 +192,16 @@ static class Writer extends StoredFieldsWriter { this.numHashFunctions = numHashFunctions; this.bloomFilterFieldName = bloomFilterFieldName; - this.delegateWriter = delegateWriter; - toClose.add(delegateWriter); - boolean success = false; try { - metadataOut = directory.createOutput(bloomFilterMetadataFileName(segmentInfo, segmentSuffix), context); + metadataOut = directory.createOutput(bloomFilterMetadataFileName(segmentInfo), context); toClose.add(metadataOut); CodecUtil.writeIndexHeader( metadataOut, STORED_FIELDS_BLOOM_FILTER_FORMAT_NAME, VERSION_CURRENT, segmentInfo.getId(), - segmentSuffix + DEFAULT_SEGMENT_SUFFIX ); success = true; @@ -215,55 +213,43 @@ static class Writer extends StoredFieldsWriter { } @Override - public void startDocument() throws IOException { - delegateWriter.startDocument(); + public void startDocument() { + } @Override - public void finishDocument() throws IOException { - delegateWriter.finishDocument(); + public void finishDocument() { + } @Override public void writeField(FieldInfo info, int value) throws IOException { - if (isBloomFilterField(info) == false) { - delegateWriter.writeField(info, value); - } + throwUnsupported(info, "int"); } @Override public void writeField(FieldInfo info, long value) throws IOException { - if (isBloomFilterField(info) == false) { - delegateWriter.writeField(info, value); - } + throwUnsupported(info, "long"); } @Override public void writeField(FieldInfo info, float value) throws IOException { - if (isBloomFilterField(info) == false) { - delegateWriter.writeField(info, value); - } + throwUnsupported(info, "float"); } @Override public void writeField(FieldInfo info, double value) throws IOException { - if (isBloomFilterField(info) == false) { - delegateWriter.writeField(info, value); - } + throwUnsupported(info, "double"); } @Override public void writeField(FieldInfo info, StoredFieldDataInput value) throws IOException { - if (isBloomFilterField(info) == false) { - delegateWriter.writeField(info, value); - } + throwUnsupported(info, "StoredFieldDataInput"); } @Override public void writeField(FieldInfo info, String value) throws IOException { - if (isBloomFilterField(info) == false) { - delegateWriter.writeField(info, value); - } + throwUnsupported(info, "String"); } @Override @@ -271,10 +257,16 @@ public void writeField(FieldInfo info, BytesRef value) throws IOException { if (isBloomFilterField(info)) { addToBloomFilter(info, value); } else { - delegateWriter.writeField(info, value); + throw new IllegalArgumentException("Bloom filter field [" + info.name + "] is not supported"); } } + private void throwUnsupported(FieldInfo info, String dataType) { + throw new UnsupportedOperationException( + "writeField operation not supported for field '" + info.name + "' with type " + dataType + ); + } + private boolean isBloomFilterField(FieldInfo info) { return (bloomFilterWriter != null && bloomFilterWriter.fieldInfo.getFieldNumber() == info.getFieldNumber()) || info.getName().equals(bloomFilterFieldName); @@ -295,7 +287,6 @@ private void addToBloomFilter(FieldInfo info, BytesRef value) throws IOException @Override public void finish(int numDocs) throws IOException { finishBloomFilterStoredFormat(); - delegateWriter.finish(numDocs); } private void finishBloomFilterStoredFormat() throws IOException { @@ -318,7 +309,7 @@ public int merge(MergeState mergeState) throws IOException { rebuildBloomFilterFromSegments(mergeState); } finishBloomFilterStoredFormat(); - return delegateWriter.merge(mergeState); + return 0; } private void mergeOptimized(MergeState mergeState) throws IOException { @@ -432,7 +423,7 @@ public void close() throws IOException { @Override public long ramBytesUsed() { - return bloomFilterWriter == null ? 0 : bloomFilterWriter.buffer.ramBytesUsed() + delegateWriter.ramBytesUsed(); + return bloomFilterWriter == null ? 0 : bloomFilterWriter.buffer.ramBytesUsed(); } private void maybeInitializeBloomFilterWriter(FieldInfo fieldInfo, int bitSetSizeInBits) throws IOException { @@ -466,7 +457,7 @@ class BloomFilterWriter implements Closeable { this.bitSetSizeInBytes = bitsetSizeInBits / Byte.SIZE; this.buffer = bigArrays.newByteArray(bitSetSizeInBytes, false); this.hashes = new int[numHashFunctions]; - this.bloomFilterDataOut = directory.createOutput(bloomFilterFileName(segmentInfo, segmentSuffix), context); + this.bloomFilterDataOut = directory.createOutput(bloomFilterFileName(segmentInfo), context); boolean success = false; try { @@ -475,7 +466,7 @@ class BloomFilterWriter implements Closeable { STORED_FIELDS_BLOOM_FILTER_FORMAT_NAME, VERSION_CURRENT, segmentInfo.getId(), - segmentSuffix + DEFAULT_SEGMENT_SUFFIX ); success = true; } finally { @@ -565,26 +556,9 @@ public void close() throws IOException { private static class Reader extends StoredFieldsReader implements BloomFilterProvider { @Nullable private final BloomFilterFieldReader bloomFilterFieldReader; - private final StoredFieldsReader delegateReader; - Reader( - Directory directory, - SegmentInfo si, - FieldInfos fn, - IOContext context, - String segmentSuffix, - StoredFieldsReader delegateReader - ) throws IOException { - this.delegateReader = delegateReader; - var success = false; - try { - bloomFilterFieldReader = BloomFilterFieldReader.open(directory, si, fn, context, segmentSuffix); - success = true; - } finally { - if (success == false) { - delegateReader.close(); - } - } + Reader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException { + bloomFilterFieldReader = BloomFilterFieldReader.open(directory, si, fn, context); } @Override @@ -597,17 +571,16 @@ public void checkIntegrity() throws IOException { if (bloomFilterFieldReader != null) { bloomFilterFieldReader.checkIntegrity(); } - delegateReader.checkIntegrity(); } @Override public void close() throws IOException { - IOUtils.close(bloomFilterFieldReader, delegateReader); + IOUtils.close(bloomFilterFieldReader); } @Override public void document(int docID, StoredFieldVisitor visitor) throws IOException { - delegateReader.document(docID, visitor); + // TODO: read synthetic _id from doc values } @Override @@ -650,18 +623,17 @@ static class BloomFilterFieldReader implements BloomFilter { private final int[] hashes; @Nullable - static BloomFilterFieldReader open(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context, String segmentSuffix) - throws IOException { + static BloomFilterFieldReader open(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException { List toClose = new ArrayList<>(); boolean success = false; - try (var metaInput = directory.openChecksumInput(bloomFilterMetadataFileName(si, segmentSuffix))) { + try (var metaInput = directory.openChecksumInput(bloomFilterMetadataFileName(si))) { var metadataVersion = CodecUtil.checkIndexHeader( metaInput, STORED_FIELDS_BLOOM_FILTER_FORMAT_NAME, VERSION_START, VERSION_CURRENT, si.getId(), - segmentSuffix + DEFAULT_SEGMENT_SUFFIX ); var hasBloomFilter = metaInput.readByte() == BLOOM_FILTER_STORED; if (hasBloomFilter == false) { @@ -670,7 +642,7 @@ static BloomFilterFieldReader open(Directory directory, SegmentInfo si, FieldInf BloomFilterMetadata bloomFilterMetadata = BloomFilterMetadata.readFrom(metaInput, fn); CodecUtil.checkFooter(metaInput); - IndexInput bloomFilterData = directory.openInput(bloomFilterFileName(si, segmentSuffix), context); + IndexInput bloomFilterData = directory.openInput(bloomFilterFileName(si), context); toClose.add(bloomFilterData); var bloomFilterDataVersion = CodecUtil.checkIndexHeader( bloomFilterData, @@ -678,7 +650,7 @@ static BloomFilterFieldReader open(Directory directory, SegmentInfo si, FieldInf VERSION_START, VERSION_CURRENT, si.getId(), - segmentSuffix + DEFAULT_SEGMENT_SUFFIX ); if (metadataVersion != bloomFilterDataVersion) { @@ -767,12 +739,12 @@ private static boolean isPowerOfTwo(int value) { return (value & (value - 1)) == 0; } - private static String bloomFilterMetadataFileName(SegmentInfo segmentInfo, String segmentSuffix) { - return IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, STORED_FIELDS_METADATA_BLOOM_FILTER_EXTENSION); + private static String bloomFilterMetadataFileName(SegmentInfo segmentInfo) { + return IndexFileNames.segmentFileName(segmentInfo.name, DEFAULT_SEGMENT_SUFFIX, STORED_FIELDS_METADATA_BLOOM_FILTER_EXTENSION); } - private static String bloomFilterFileName(SegmentInfo segmentInfo, String segmentSuffix) { - return IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, STORED_FIELDS_BLOOM_FILTER_EXTENSION); + private static String bloomFilterFileName(SegmentInfo segmentInfo) { + return IndexFileNames.segmentFileName(segmentInfo.name, DEFAULT_SEGMENT_SUFFIX, STORED_FIELDS_BLOOM_FILTER_EXTENSION); } public interface BloomFilter extends Closeable { diff --git a/server/src/main/java/org/elasticsearch/index/codec/storedfields/ESLucene90StoredFieldsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/storedfields/ESLucene90StoredFieldsFormat.java new file mode 100644 index 0000000000000..2b639da2627ff --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/storedfields/ESLucene90StoredFieldsFormat.java @@ -0,0 +1,47 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.storedfields; + +import org.apache.lucene.codecs.lucene103.Lucene103Codec; +import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat; +import org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingStoredFieldsWriter; + +import java.util.Set; + +/** + * Simple wrapper for Lucene90StoredFieldsFormat that allows it to be loaded through SPI + */ +public class ESLucene90StoredFieldsFormat extends FilterESStoredFieldsFormat { + public static final Set FILE_EXTENSIONS = Set.of( + Lucene90CompressingStoredFieldsWriter.FIELDS_EXTENSION, + Lucene90CompressingStoredFieldsWriter.INDEX_EXTENSION, + Lucene90CompressingStoredFieldsWriter.META_EXTENSION + ); + + public ESLucene90StoredFieldsFormat() { + this(Lucene103Codec.Mode.BEST_SPEED); + } + + public ESLucene90StoredFieldsFormat(Lucene103Codec.Mode mode) { + super( + "ESLucene90StoredFieldsFormat", + new Lucene90StoredFieldsFormat( + mode == Lucene103Codec.Mode.BEST_COMPRESSION + ? Lucene90StoredFieldsFormat.Mode.BEST_COMPRESSION + : Lucene90StoredFieldsFormat.Mode.BEST_SPEED + ) + ); + } + + @Override + protected Set getFileExtensions() { + return FILE_EXTENSIONS; + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/storedfields/ESStoredFieldsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/storedfields/ESStoredFieldsFormat.java new file mode 100644 index 0000000000000..369f6a9bb029e --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/storedfields/ESStoredFieldsFormat.java @@ -0,0 +1,61 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.storedfields; + +import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.lucene.util.NamedSPILoader; + +import java.util.Set; + +/** + * A {@link StoredFieldsFormat} that can be loaded via SPI and provides a name for identification. + * This is required because {@link PerFieldStoredFieldsFormat} uses SPI to load stored field formats + * when reading fields. + */ +public abstract class ESStoredFieldsFormat extends StoredFieldsFormat implements NamedSPILoader.NamedSPI { + private static final class Holder { + public static final NamedSPILoader LOADER = new NamedSPILoader<>(ESStoredFieldsFormat.class); + + private Holder() {} + + static NamedSPILoader getLoader() { + if (LOADER == null) { + throw new IllegalStateException( + "You tried to lookup a ESStoredFieldsFormat by name before all formats could be initialized." + ); + } + return LOADER; + } + } + + public static ESStoredFieldsFormat forName(String name) { + return Holder.getLoader().lookup(name); + } + + /** + * Unique name that's used to retrieve this format when reading the index. + */ + private final String name; + + protected ESStoredFieldsFormat(String name) { + NamedSPILoader.checkServiceName(name); + this.name = name; + } + + @Override + public String getName() { + return name; + } + + /** + * Returns the set of file fileExtensions that this stored fields format would write to disk. + */ + protected abstract Set getFileExtensions(); +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/storedfields/ESZstd814StoredFieldsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/storedfields/ESZstd814StoredFieldsFormat.java new file mode 100644 index 0000000000000..084fc2d2c8f8b --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/storedfields/ESZstd814StoredFieldsFormat.java @@ -0,0 +1,39 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.storedfields; + +import org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingStoredFieldsWriter; +import org.elasticsearch.index.codec.zstd.Zstd814StoredFieldsFormat; + +import java.util.Set; + +/** + * Simple wrapper for Lucene90StoredFieldsFormat that uses zstd for compression. Allowing to be loaded through SPI. + */ +public class ESZstd814StoredFieldsFormat extends FilterESStoredFieldsFormat { + public static final Set FILE_EXTENSIONS = Set.of( + Lucene90CompressingStoredFieldsWriter.FIELDS_EXTENSION, + Lucene90CompressingStoredFieldsWriter.INDEX_EXTENSION, + Lucene90CompressingStoredFieldsWriter.META_EXTENSION + ); + + public ESZstd814StoredFieldsFormat() { + this(Zstd814StoredFieldsFormat.Mode.BEST_SPEED); + } + + public ESZstd814StoredFieldsFormat(Zstd814StoredFieldsFormat.Mode mode) { + super("ESZstd814StoredFieldsFormat", mode.getFormat()); + } + + @Override + protected Set getFileExtensions() { + return FILE_EXTENSIONS; + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/storedfields/FilterESStoredFieldsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/storedfields/FilterESStoredFieldsFormat.java new file mode 100644 index 0000000000000..a3a154095e1ad --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/storedfields/FilterESStoredFieldsFormat.java @@ -0,0 +1,39 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.storedfields; + +import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.lucene.codecs.StoredFieldsReader; +import org.apache.lucene.codecs.StoredFieldsWriter; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; + +import java.io.IOException; + +abstract class FilterESStoredFieldsFormat extends ESStoredFieldsFormat { + private final StoredFieldsFormat delegate; + + FilterESStoredFieldsFormat(String name, StoredFieldsFormat delegate) { + super(name); + this.delegate = delegate; + } + + @Override + public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException { + return delegate.fieldsReader(directory, si, fn, context); + } + + @Override + public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException { + return delegate.fieldsWriter(directory, si, context); + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/storedfields/PerFieldStoredFieldsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/storedfields/PerFieldStoredFieldsFormat.java new file mode 100644 index 0000000000000..cf3abf9ef52a8 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/storedfields/PerFieldStoredFieldsFormat.java @@ -0,0 +1,342 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.storedfields; + +import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.lucene.codecs.StoredFieldsReader; +import org.apache.lucene.codecs.StoredFieldsWriter; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.MergeState; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.StoredFieldVisitor; +import org.apache.lucene.internal.hppc.IntObjectHashMap; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.util.Maps; +import org.elasticsearch.common.util.set.Sets; +import org.elasticsearch.core.IOUtils; +import org.elasticsearch.core.Nullable; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +/** + * Enables per field stored fields format support. + * + *

This class uses SPI to resolve format names.

+ * + *

Files written by each stored fields format should use different file fileExtensions, this is enforced during the writer creation.

+ */ +public abstract class PerFieldStoredFieldsFormat extends StoredFieldsFormat { + public static final String STORED_FIELD_FORMAT_ATTRIBUTE_KEY = "stored_field_format"; + + @Override + public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException { + return new PerFieldStoredFieldsReader(directory, si, fn, context); + } + + @Override + public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException { + return new PerFieldStoredFieldsWriter(directory, si, context); + } + + protected abstract ESStoredFieldsFormat getStoredFieldsFormatForField(String field); + + class PerFieldStoredFieldsWriter extends StoredFieldsWriter { + private final IntObjectHashMap fields = new IntObjectHashMap<>(); + private final Map formatWriters = new HashMap<>(); + + private final Directory directory; + private final SegmentInfo si; + private final IOContext context; + + private int numStartedDocs = 0; + private int numFinishedDocs = 0; + + PerFieldStoredFieldsWriter(Directory directory, SegmentInfo si, IOContext context) { + this.directory = directory; + this.si = si; + this.context = context; + } + + @Override + public void startDocument() throws IOException { + for (var writerAndExtensions : formatWriters.values()) { + writerAndExtensions.writer().startDocument(); + } + numStartedDocs++; + } + + @Override + public void finishDocument() throws IOException { + for (var writerAndExtensions : formatWriters.values()) { + writerAndExtensions.writer().finishDocument(); + } + numFinishedDocs++; + } + + @Override + public void writeField(FieldInfo info, int value) throws IOException { + getWriterForField(info).writeField(info, value); + } + + @Override + public void writeField(FieldInfo info, long value) throws IOException { + getWriterForField(info).writeField(info, value); + } + + @Override + public void writeField(FieldInfo info, float value) throws IOException { + getWriterForField(info).writeField(info, value); + } + + @Override + public void writeField(FieldInfo info, double value) throws IOException { + getWriterForField(info).writeField(info, value); + } + + @Override + public void writeField(FieldInfo info, BytesRef value) throws IOException { + getWriterForField(info).writeField(info, value); + } + + @Override + public void writeField(FieldInfo info, String value) throws IOException { + getWriterForField(info).writeField(info, value); + } + + @Override + public void finish(int numDocs) throws IOException { + for (var writerAndExtensions : formatWriters.values()) { + writerAndExtensions.writer().finish(numDocs); + } + } + + @Override + public int merge(MergeState mergeState) throws IOException { + Map formatWriters = new HashMap<>(); + for (FieldInfo mergeFieldInfo : mergeState.mergeFieldInfos) { + var writerAndMetadata = getWriterAndMetadataForField(mergeFieldInfo); + formatWriters.put(writerAndMetadata.formatName(), writerAndMetadata.writer()); + } + + var totalDocs = 0; + for (Map.Entry formatNameAndWriter : formatWriters.entrySet()) { + final String writerFormatName = formatNameAndWriter.getKey(); + final StoredFieldsWriter formatWriter = formatNameAndWriter.getValue(); + StoredFieldsReader[] updatedReaders = new StoredFieldsReader[mergeState.storedFieldsReaders.length]; + for (int i = 0; i < mergeState.storedFieldsReaders.length; i++) { + final StoredFieldsReader storedFieldsReader = mergeState.storedFieldsReaders[i]; + + // We need to unwrap the stored field readers belonging to a PerFieldStoredFieldsFormat, + // otherwise, downstream formats won't be able to perform certain optimizations when + // they try to merge segments as they expect an instance of the actual Reader in their checks + // (i.e. Lucene90CompressingStoredFieldsReader would do chunk merging for instances of the same class) + if (storedFieldsReader instanceof PerFieldStoredFieldsReader reader) { + final var formatStoredFieldsReader = reader.getFormatToStoredFieldReaders().get(writerFormatName); + // In case that we're dealing with a previous format, we just fall back to the slow path + updatedReaders[i] = Objects.requireNonNullElse(formatStoredFieldsReader, storedFieldsReader); + } else { + updatedReaders[i] = storedFieldsReader; + } + } + + var updatedMergeState = new MergeState( + mergeState.docMaps, + mergeState.segmentInfo, + mergeState.mergeFieldInfos, + updatedReaders, + mergeState.termVectorsReaders, + mergeState.normsProducers, + mergeState.docValuesProducers, + mergeState.fieldInfos, + mergeState.liveDocs, + mergeState.fieldsProducers, + mergeState.pointsReaders, + mergeState.knnVectorsReaders, + mergeState.maxDocs, + mergeState.infoStream, + mergeState.intraMergeTaskExecutor, + mergeState.needsIndexSort + ); + + totalDocs += formatWriter.merge(updatedMergeState); + } + return totalDocs; + } + + @Override + public void close() throws IOException { + IOUtils.close(formatWriters.values()); + } + + @Override + public long ramBytesUsed() { + long ramBytesUsed = 0; + for (var writer : formatWriters.values()) { + ramBytesUsed += writer.writer().ramBytesUsed(); + } + return ramBytesUsed; + } + + private StoredFieldsWriter getWriterForField(FieldInfo field) throws IOException { + return getWriterAndMetadataForField(field).writer; + } + + private StoredFieldsWriterAndMetadata getWriterAndMetadataForField(FieldInfo field) throws IOException { + var writer = fields.get(field.number); + if (writer != null) { + return writer; + } + + var format = getStoredFieldsFormatForField(field.name); + + if (format == null) { + throw new IllegalStateException("invalid null StoredFieldsFormat for field=\"" + field.name + "\""); + } + + var formatWriter = formatWriters.get(format); + if (formatWriter == null) { + for (StoredFieldsWriterAndMetadata value : formatWriters.values()) { + if (Sets.intersection(value.fileExtensions(), format.getFileExtensions()).isEmpty() == false) { + throw new IllegalStateException( + "File extension conflict for field '" + + field.name + + "': format " + + format.getName() + + " has overlapping fileExtensions with existing format" + ); + } + } + formatWriter = new StoredFieldsWriterAndMetadata( + format.getName(), + format.getFileExtensions(), + format.fieldsWriter(directory, si, context) + ); + + // Ensure that the doc count is consistent so when #finish is called + // all formats have a consistent doc count + for (int i = 0; i < numStartedDocs; i++) { + formatWriter.writer().startDocument(); + } + for (int i = 0; i < numFinishedDocs; i++) { + formatWriter.writer().finishDocument(); + } + + var previous = formatWriters.put(format, formatWriter); + assert previous == null; + } + fields.put(field.number, formatWriter); + field.putAttribute(STORED_FIELD_FORMAT_ATTRIBUTE_KEY, format.getName()); + + return formatWriter; + } + } + + record StoredFieldsWriterAndMetadata(String formatName, Set fileExtensions, StoredFieldsWriter writer) implements Closeable { + @Override + public void close() throws IOException { + writer.close(); + } + } + + public static class PerFieldStoredFieldsReader extends StoredFieldsReader { + private final Map formatToStoredFieldReaders; + private final Map fieldToFormat; + + PerFieldStoredFieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException { + HashMap formatStoredFieldReaders = new HashMap<>(); + HashMap fieldToFormat = new HashMap<>(); + boolean success = false; + try { + for (FieldInfo fi : fn) { + final String formatName = fi.getAttribute(STORED_FIELD_FORMAT_ATTRIBUTE_KEY); + // Can be a format name be null if we're reading a segment from this codec? + if (formatName != null) { + var storedFieldsReader = formatStoredFieldReaders.get(formatName); + if (storedFieldsReader == null) { + ESStoredFieldsFormat format = ESStoredFieldsFormat.forName(formatName); + storedFieldsReader = format.fieldsReader(directory, si, fn, context); + var previous = formatStoredFieldReaders.put(formatName, storedFieldsReader); + assert previous == null; + } + fieldToFormat.put(fi.name, formatName); + } + } + success = true; + } finally { + if (success == false) { + IOUtils.close(formatStoredFieldReaders.values()); + } + } + this.formatToStoredFieldReaders = Collections.unmodifiableMap(formatStoredFieldReaders); + this.fieldToFormat = Collections.unmodifiableMap(fieldToFormat); + } + + PerFieldStoredFieldsReader(Map formatToStoredFieldReaders, Map fieldToFormat) { + this.formatToStoredFieldReaders = Collections.unmodifiableMap(formatToStoredFieldReaders); + this.fieldToFormat = Collections.unmodifiableMap(fieldToFormat); + } + + @Override + public StoredFieldsReader clone() { + Map clonedFormats = Maps.newMapWithExpectedSize(formatToStoredFieldReaders.size()); + for (Map.Entry entry : formatToStoredFieldReaders.entrySet()) { + clonedFormats.put(entry.getKey(), entry.getValue().clone()); + } + return new PerFieldStoredFieldsReader(clonedFormats, fieldToFormat); + } + + @Override + public StoredFieldsReader getMergeInstance() { + Map mergeFormats = Maps.newMapWithExpectedSize(formatToStoredFieldReaders.size()); + for (Map.Entry entry : formatToStoredFieldReaders.entrySet()) { + mergeFormats.put(entry.getKey(), entry.getValue().getMergeInstance()); + } + return new PerFieldStoredFieldsReader(mergeFormats, fieldToFormat); + } + + @Override + public void checkIntegrity() throws IOException { + for (StoredFieldsReader storedFieldsReader : formatToStoredFieldReaders.values()) { + storedFieldsReader.checkIntegrity(); + } + } + + @Override + public void close() throws IOException { + IOUtils.close(formatToStoredFieldReaders.values()); + } + + @Override + public void document(int docID, StoredFieldVisitor visitor) throws IOException { + for (StoredFieldsReader storedFieldsReader : formatToStoredFieldReaders.values()) { + storedFieldsReader.document(docID, visitor); + } + } + + @Nullable + public StoredFieldsReader getReaderForField(String fieldName) { + String formatName = fieldToFormat.get(fieldName); + return formatName != null ? formatToStoredFieldReaders.get(formatName) : null; + } + + private Map getFormatToStoredFieldReaders() { + return formatToStoredFieldReaders; + } + } +} diff --git a/server/src/main/resources/META-INF/services/org.elasticsearch.index.codec.storedfields.ESStoredFieldsFormat b/server/src/main/resources/META-INF/services/org.elasticsearch.index.codec.storedfields.ESStoredFieldsFormat new file mode 100644 index 0000000000000..8687fd881249e --- /dev/null +++ b/server/src/main/resources/META-INF/services/org.elasticsearch.index.codec.storedfields.ESStoredFieldsFormat @@ -0,0 +1,3 @@ +org.elasticsearch.index.codec.storedfields.ESZstd814StoredFieldsFormat +org.elasticsearch.index.codec.storedfields.ESLucene90StoredFieldsFormat +org.elasticsearch.index.codec.bloomfilter.ES93BloomFilterStoredFieldsFormat diff --git a/server/src/test/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java index 17bd1c9c13675..d8fdd14ab38eb 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/bloomfilter/ES93BloomFilterStoredFieldsFormatTests.java @@ -10,12 +10,13 @@ package org.elasticsearch.index.codec.bloomfilter; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.LongField; import org.apache.lucene.document.StringField; +import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FilterMergePolicy; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -29,14 +30,15 @@ import org.apache.lucene.store.IOContext; import org.apache.lucene.tests.analysis.MockAnalyzer; import org.apache.lucene.tests.codecs.asserting.AssertingCodec; -import org.apache.lucene.tests.index.BaseStoredFieldsFormatTestCase; -import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.UUIDs; -import org.elasticsearch.common.logging.LogConfigurator; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.index.codec.storedfields.ESLucene90StoredFieldsFormat; +import org.elasticsearch.index.codec.storedfields.ESStoredFieldsFormat; +import org.elasticsearch.index.codec.storedfields.PerFieldStoredFieldsFormat; import org.elasticsearch.index.mapper.IdFieldMapper; +import org.elasticsearch.test.ESTestCase; import java.io.IOException; import java.nio.charset.StandardCharsets; @@ -49,40 +51,22 @@ import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.nullValue; -public class ES93BloomFilterStoredFieldsFormatTests extends BaseStoredFieldsFormatTestCase { - - static { - LogConfigurator.loadLog4jPlugins(); - LogConfigurator.configureESLogging(); // native access requires logging to be initialized - } - - @Override - protected Codec getCodec() { - return new AssertingCodec() { - @Override - public StoredFieldsFormat storedFieldsFormat() { - var bloomFilterSizeInKb = atLeast(2); - return new ES93BloomFilterStoredFieldsFormat( - BigArrays.NON_RECYCLING_INSTANCE, - "", - TestUtil.getDefaultCodec().storedFieldsFormat(), - ByteSizeValue.ofKb(bloomFilterSizeInKb), - IdFieldMapper.NAME - ); - } - }; - } - - @Override - protected void addRandomFields(Document doc) { - - } - +public class ES93BloomFilterStoredFieldsFormatTests extends ESTestCase { public void testBloomFilterFieldIsNotStoredAndBloomFilterCanBeChecked() throws IOException { try (var directory = newDirectory()) { Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig conf = newIndexWriterConfig(analyzer); - conf.setCodec(getCodec()); + var bloomFilterSizeInKb = atLeast(2); + conf.setCodec( + new TestCodec( + IdFieldMapper.NAME, + new ES93BloomFilterStoredFieldsFormat( + BigArrays.NON_RECYCLING_INSTANCE, + ByteSizeValue.ofKb(bloomFilterSizeInKb), + IdFieldMapper.NAME + ) + ) + ); conf.setMergePolicy(newLogMergePolicy()); // We want to have at most 1 segment conf.setMaxBufferedDocs(200); @@ -112,8 +96,6 @@ public StoredFieldsFormat storedFieldsFormat() { var bloomFilterSizeInKb = atLeast(2); return new ES93BloomFilterStoredFieldsFormat( BigArrays.NON_RECYCLING_INSTANCE, - "", - TestUtil.getDefaultCodec().storedFieldsFormat(), ByteSizeValue.ofKb(bloomFilterSizeInKb), IdFieldMapper.NAME ) { @@ -132,6 +114,31 @@ int getBloomFilterSizeInBits() { }; } }); + var bloomFilterSizeInKb = atLeast(2); + conf.setCodec( + new TestCodec( + IdFieldMapper.NAME, + new ES93BloomFilterStoredFieldsFormat( + BigArrays.NON_RECYCLING_INSTANCE, + ByteSizeValue.ofKb(bloomFilterSizeInKb), + IdFieldMapper.NAME + ) { + @Override + int getBloomFilterSizeInBits() { + if (randomBloomFilterSizes) { + // Use different power of 2 values so we rebuild the bloom filter from the _id terms + var bloomFilterSizeInBytes = ByteSizeValue.ofKb(1).getBytes() << atLeast(5); + + return ES93BloomFilterStoredFieldsFormat.closestPowerOfTwoBloomFilterSizeInBits( + ByteSizeValue.ofBytes(bloomFilterSizeInBytes) + ); + } + return super.getBloomFilterSizeInBits(); + } + } + + ) + ); conf.setMergePolicy(new FilterMergePolicy(newLogMergePolicy()) { @Override public boolean useCompoundFile(SegmentInfos infos, SegmentCommitInfo mergedInfo, MergeContext mergeContext) { @@ -174,8 +181,7 @@ private static List indexDocs(IndexWriter writer) throws IOException { private void assertBloomFilterTestsPositiveForExistingDocs(IndexWriter writer, List indexedIds) throws IOException { try (var directoryReader = StandardDirectoryReader.open(writer)) { for (LeafReaderContext leaf : directoryReader.leaves()) { - try (ES93BloomFilterStoredFieldsFormat.BloomFilterProvider fieldReader = getBloomFilterProvider(leaf)) { - var bloomFilter = fieldReader.getBloomFilter(); + try (ES93BloomFilterStoredFieldsFormat.BloomFilter bloomFilter = getBloomFilterProvider(leaf)) { // the bloom filter reader is null only if the _id field is not stored during indexing assertThat(bloomFilter, is(not(nullValue()))); @@ -201,20 +207,65 @@ private void assertBloomFilterTestsPositiveForExistingDocs(IndexWriter writer, L } } - private static BytesRef getBytesRefFromString(String random) { - return new BytesRef(random.getBytes(StandardCharsets.UTF_8)); + private static BytesRef getBytesRefFromString(String string) { + return new BytesRef(string.getBytes(StandardCharsets.UTF_8)); } - private ES93BloomFilterStoredFieldsFormat.BloomFilterProvider getBloomFilterProvider(LeafReaderContext leafReaderContext) - throws IOException { + private ES93BloomFilterStoredFieldsFormat.BloomFilter getBloomFilterProvider(LeafReaderContext leafReaderContext) throws IOException { LeafReader reader = leafReaderContext.reader(); - var fieldInfos = reader.getFieldInfos(); + FieldInfos fieldInfos = reader.getFieldInfos(); assertThat(reader, is(instanceOf(SegmentReader.class))); SegmentReader segmentReader = (SegmentReader) reader; SegmentInfo si = segmentReader.getSegmentInfo().info; - var storedFieldsReader = si.getCodec().storedFieldsFormat().fieldsReader(si.dir, si, fieldInfos, IOContext.DEFAULT); - assertThat(storedFieldsReader, is(instanceOf(ES93BloomFilterStoredFieldsFormat.BloomFilterProvider.class))); - return ((ES93BloomFilterStoredFieldsFormat.BloomFilterProvider) storedFieldsReader); + StoredFieldsReader storedFieldsReader = si.getCodec().storedFieldsFormat().fieldsReader(si.dir, si, fieldInfos, IOContext.DEFAULT); + + assertThat(storedFieldsReader, is(instanceOf(PerFieldStoredFieldsFormat.PerFieldStoredFieldsReader.class))); + + PerFieldStoredFieldsFormat.PerFieldStoredFieldsReader perFieldStoredFieldsReader = + (PerFieldStoredFieldsFormat.PerFieldStoredFieldsReader) storedFieldsReader; + + StoredFieldsReader bloomFilterReader = perFieldStoredFieldsReader.getReaderForField(IdFieldMapper.NAME); + + assertThat(bloomFilterReader, is(instanceOf(ES93BloomFilterStoredFieldsFormat.BloomFilterProvider.class))); + ES93BloomFilterStoredFieldsFormat.BloomFilterProvider bloomFilterProvider = + (ES93BloomFilterStoredFieldsFormat.BloomFilterProvider) bloomFilterReader; + var bloomFilter = bloomFilterProvider.getBloomFilter(); + // Wrap the reader in a bloom filter so we can close it after we're done with it + return new ES93BloomFilterStoredFieldsFormat.BloomFilter() { + @Override + public boolean mayContainTerm(String field, BytesRef term) throws IOException { + return bloomFilter.mayContainTerm(field, term); + } + + @Override + public void close() throws IOException { + storedFieldsReader.close(); + } + }; + } + + static class TestCodec extends AssertingCodec { + private final String bloomFilterField; + private final ES93BloomFilterStoredFieldsFormat bloomFilterStoredFieldsFormat; + private final ESStoredFieldsFormat defaultStoredFieldsFormat = new ESLucene90StoredFieldsFormat(); + + TestCodec(String bloomFilterField, ES93BloomFilterStoredFieldsFormat bloomFilterStoredFieldsFormat) { + this.bloomFilterField = bloomFilterField; + this.bloomFilterStoredFieldsFormat = bloomFilterStoredFieldsFormat; + } + + @Override + public StoredFieldsFormat storedFieldsFormat() { + return new PerFieldStoredFieldsFormat() { + @Override + protected ESStoredFieldsFormat getStoredFieldsFormatForField(String field) { + if (field.equals(bloomFilterField)) { + return bloomFilterStoredFieldsFormat; + } + return defaultStoredFieldsFormat; + } + }; + } } } diff --git a/server/src/test/java/org/elasticsearch/index/codec/storedfields/PerFieldStoredFieldsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/storedfields/PerFieldStoredFieldsFormatTests.java new file mode 100644 index 0000000000000..3a208295edbf4 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/codec/storedfields/PerFieldStoredFieldsFormatTests.java @@ -0,0 +1,119 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.storedfields; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.LongField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.tests.analysis.MockAnalyzer; +import org.apache.lucene.tests.codecs.asserting.AssertingCodec; +import org.apache.lucene.tests.index.BaseStoredFieldsFormatTestCase; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.UUIDs; +import org.elasticsearch.common.logging.LogConfigurator; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.index.codec.bloomfilter.ES93BloomFilterStoredFieldsFormat; +import org.elasticsearch.index.mapper.IdFieldMapper; + +import java.nio.charset.StandardCharsets; +import java.util.Set; + +import static org.hamcrest.Matchers.equalTo; + +public class PerFieldStoredFieldsFormatTests extends BaseStoredFieldsFormatTestCase { + + static { + LogConfigurator.loadLog4jPlugins(); + LogConfigurator.configureESLogging(); // native access requires logging to be initialized + } + + @Override + protected Codec getCodec() { + var bloomFilterSizeInKb = atLeast(1); + var bloomFilterStoredFieldsFormat = new ES93BloomFilterStoredFieldsFormat( + BigArrays.NON_RECYCLING_INSTANCE, + ByteSizeValue.ofKb(bloomFilterSizeInKb), + IdFieldMapper.NAME + ); + var defaultStoredFields = new ESLucene90StoredFieldsFormat(); + + return new AssertingCodec() { + @Override + public StoredFieldsFormat storedFieldsFormat() { + return new PerFieldStoredFieldsFormat() { + @Override + protected ESStoredFieldsFormat getStoredFieldsFormatForField(String field) { + if (field.equals(IdFieldMapper.NAME)) { + return bloomFilterStoredFieldsFormat; + } + return defaultStoredFields; + } + }; + } + }; + } + + public void testConflictingFileExtensionsThrowAnException() throws Exception { + try (var directory = newDirectory()) { + Analyzer analyzer = new MockAnalyzer(random()); + IndexWriterConfig conf = newIndexWriterConfig(analyzer); + var bloomFilterStoredFieldsFormat = new ES93BloomFilterStoredFieldsFormat( + BigArrays.NON_RECYCLING_INSTANCE, + ByteSizeValue.ofKb(1), + IdFieldMapper.NAME + ); + + var defaultStoredFields = new ESLucene90StoredFieldsFormat() { + @Override + protected Set getFileExtensions() { + return Set.of(ES93BloomFilterStoredFieldsFormat.STORED_FIELDS_METADATA_BLOOM_FILTER_EXTENSION); + } + }; + + conf.setCodec(new AssertingCodec() { + @Override + public StoredFieldsFormat storedFieldsFormat() { + return new PerFieldStoredFieldsFormat() { + @Override + protected ESStoredFieldsFormat getStoredFieldsFormatForField(String field) { + if (field.equals(IdFieldMapper.NAME)) { + return bloomFilterStoredFieldsFormat; + } + return defaultStoredFields; + } + }; + } + }); + conf.setMergePolicy(newLogMergePolicy()); + try (IndexWriter writer = new IndexWriter(directory, conf)) { + Document doc = new Document(); + var id = UUIDs.randomBase64UUID(); + doc.add(new StringField(IdFieldMapper.NAME, new BytesRef(id.getBytes(StandardCharsets.UTF_8)), Field.Store.YES)); + doc.add(new StringField("host", "host", Field.Store.YES)); + doc.add(new LongField("counter", 1, Field.Store.YES)); + var exception = expectThrows(IllegalStateException.class, () -> writer.addDocument(doc)); + assertThat( + exception.getMessage(), + equalTo( + "File extension conflict for field 'host': format ESLucene90StoredFieldsFormat " + + "has overlapping fileExtensions with existing format" + ) + ); + } + } + } +}