Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add best_compression option for indices #8863

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/reference/index-modules.asciidoc
Expand Up @@ -41,6 +41,11 @@ otherwise it is written in non-compound format.
refresh operation will be executed. Defaults to `1s`. Can be set to `-1`
in order to disable it.

`index.codec`::
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we randomize this setting in the index template in ElasticsearchIntegrationTest?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, i can look into it. Note that we sneak it in via regular Codec randomization in LuceneTestCase already.

The `default` value compresses stored data with LZ4 compression, but
this can be set to `best_compression` for a higher compression ratio,
at the expense of slower stored fields performance.

`index.shard.check_on_startup`::
Should shard consistency be checked upon opening.
When `true`, the shard will be checked, preventing it from being open in
Expand Down
4 changes: 2 additions & 2 deletions pom.xml
Expand Up @@ -32,7 +32,7 @@

<properties>
<lucene.version>5.0.0</lucene.version>
<lucene.maven.version>5.0.0-snapshot-1642891</lucene.maven.version>
<lucene.maven.version>5.0.0-snapshot-1644303</lucene.maven.version>
<tests.jvms>auto</tests.jvms>
<tests.shuffle>true</tests.shuffle>
<tests.output>onerror</tests.output>
Expand All @@ -54,7 +54,7 @@
</repository>
<repository>
<id>Lucene snapshots</id>
<url>https://download.elasticsearch.org/lucenesnapshots/1642891</url>
<url>https://download.elasticsearch.org/lucenesnapshots/1644303</url>
</repository>
</repositories>

Expand Down
17 changes: 15 additions & 2 deletions src/main/java/org/elasticsearch/index/codec/CodecService.java
Expand Up @@ -20,7 +20,11 @@
package org.elasticsearch.index.codec;

import com.google.common.collect.ImmutableMap;

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene50.Lucene50Codec;
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.collect.MapBuilder;
import org.elasticsearch.common.inject.Inject;
Expand Down Expand Up @@ -50,6 +54,7 @@ public class CodecService extends AbstractIndexComponent {
private final ImmutableMap<String, Codec> codecs;

public final static String DEFAULT_CODEC = "default";
public final static String BEST_COMPRESSION_CODEC = "best_compression";

public CodecService(Index index) {
this(index, ImmutableSettings.Builder.EMPTY_SETTINGS);
Expand All @@ -68,9 +73,17 @@ public CodecService(Index index, @IndexSettings Settings indexSettings, Postings
this.mapperService = mapperService;
MapBuilder<String, Codec> codecs = MapBuilder.<String, Codec>newMapBuilder();
if (mapperService == null) {
codecs.put(DEFAULT_CODEC, Codec.getDefault());
codecs.put(DEFAULT_CODEC, new Lucene50Codec());
codecs.put(BEST_COMPRESSION_CODEC, new Lucene50Codec(Mode.BEST_COMPRESSION));
} else {
codecs.put(DEFAULT_CODEC, new PerFieldMappingPostingFormatCodec(mapperService,
codecs.put(DEFAULT_CODEC,
new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED,
mapperService,
postingsFormatService.get(PostingsFormatService.DEFAULT_FORMAT).get(),
docValuesFormatService.get(DocValuesFormatService.DEFAULT_FORMAT).get(), logger));
codecs.put(BEST_COMPRESSION_CODEC,
new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION,
mapperService,
postingsFormatService.get(PostingsFormatService.DEFAULT_FORMAT).get(),
docValuesFormatService.get(DocValuesFormatService.DEFAULT_FORMAT).get(), logger));
}
Expand Down
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50Codec;
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider;
Expand All @@ -49,7 +50,8 @@ public class PerFieldMappingPostingFormatCodec extends Lucene50Codec {
assert Codec.forName(Lucene.LATEST_CODEC).getClass().isAssignableFrom(PerFieldMappingPostingFormatCodec.class) : "PerFieldMappingPostingFormatCodec must subclass the latest lucene codec: " + Lucene.LATEST_CODEC;
}

public PerFieldMappingPostingFormatCodec(MapperService mapperService, PostingsFormat defaultPostingFormat, DocValuesFormat defaultDocValuesFormat, ESLogger logger) {
public PerFieldMappingPostingFormatCodec(Lucene50StoredFieldsFormat.Mode compressionMode, MapperService mapperService, PostingsFormat defaultPostingFormat, DocValuesFormat defaultDocValuesFormat, ESLogger logger) {
super(compressionMode);
this.mapperService = mapperService;
this.logger = logger;
this.defaultPostingFormat = defaultPostingFormat;
Expand Down
Expand Up @@ -20,6 +20,7 @@
package org.elasticsearch.index.engine.internal;

import com.google.common.collect.Lists;

import org.apache.lucene.index.*;
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
import org.apache.lucene.search.*;
Expand Down Expand Up @@ -69,6 +70,7 @@

import java.io.Closeable;
import java.io.IOException;
import java.lang.reflect.Method;
import java.util.*;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
Expand Down Expand Up @@ -1054,10 +1056,13 @@ public void maybeMerge() throws EngineException {
}
}

// TODO: can we please remove this method?!
private void waitForMerges(boolean flushAfter) {
try {
currentIndexWriter().waitForMerges();
} catch (IOException e) {
Method method = IndexWriter.class.getDeclaredMethod("waitForMerges");
method.setAccessible(true);
method.invoke(currentIndexWriter());
} catch (ReflectiveOperationException e) {
throw new OptimizeFailedEngineException(shardId, e);
}
if (flushAfter) {
Expand Down
36 changes: 36 additions & 0 deletions src/test/java/org/elasticsearch/index/codec/CodecTests.java
Expand Up @@ -33,7 +33,15 @@
import org.apache.lucene.codecs.lucene49.Lucene49Codec;
import org.apache.lucene.codecs.lucene50.Lucene50Codec;
import org.apache.lucene.codecs.lucene50.Lucene50DocValuesFormat;
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.store.Directory;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
Expand Down Expand Up @@ -173,6 +181,34 @@ public void testChangeVersionFormat() throws Exception {
assertThat(documentMapper.rootMapper(VersionFieldMapper.class).docValuesFormatProvider(), instanceOf(PreBuiltDocValuesFormatProvider.class));
assertThat(documentMapper.rootMapper(VersionFieldMapper.class).docValuesFormatProvider().get(), instanceOf(Lucene410DocValuesFormat.class));
}

public void testDefault() throws Exception {
Codec codec = createCodecService().codec("default");
assertCompressionEquals(Mode.BEST_SPEED, codec);
}

public void testBestCompression() throws Exception {
Codec codec = createCodecService().codec("best_compression");
assertCompressionEquals(Mode.BEST_COMPRESSION, codec);
}

// write some docs with it, inspect .si to see this was the used compression
private void assertCompressionEquals(Mode expected, Codec actual) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(null);
iwc.setCodec(actual);
IndexWriter iw = new IndexWriter(dir, iwc);
iw.addDocument(new Document());
iw.commit();
iw.close();
DirectoryReader ir = DirectoryReader.open(dir);
SegmentReader sr = (SegmentReader) ir.leaves().get(0).reader();
String v = sr.getSegmentInfo().info.getAttribute(Lucene50StoredFieldsFormat.MODE_KEY);
assertNotNull(v);
assertEquals(expected, Mode.valueOf(v));
ir.close();
dir.close();
}

private static CodecService createCodecService() {
return createCodecService(ImmutableSettings.Builder.EMPTY_SETTINGS);
Expand Down
3 changes: 3 additions & 0 deletions src/test/java/org/elasticsearch/index/store/StoreTest.java
Expand Up @@ -187,6 +187,7 @@ public void testVerifyingIndexOutputWithBogusInput() throws IOException {
IOUtils.close(verifyingOutput, dir);
}

// TODO: remove this, its too fragile. just use a static old index instead.
private static final class OldSIMockingCodec extends FilterCodec {

protected OldSIMockingCodec() {
Expand Down Expand Up @@ -232,6 +233,7 @@ public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOE
}
}
output.writeStringSet(files);
output.writeStringStringMap(si.getAttributes());
CodecUtil.writeFooter(output);
success = true;
} finally {
Expand All @@ -245,6 +247,7 @@ public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOE
}
}

// IF THIS TEST FAILS ON UPGRADE GO LOOK AT THE OldSIMockingCodec!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am sorry :)

@Test
public void testWriteLegacyChecksums() throws IOException {
final ShardId shardId = new ShardId(new Index("index"), 1);
Expand Down