From ed7233634933c624ec130d2636ebd9bb4e2edcea Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Tue, 9 Dec 2014 21:36:07 -0500 Subject: [PATCH 1/3] upgrade to lucene 5 r1644303 --- pom.xml | 4 ++-- .../index/engine/internal/InternalEngine.java | 9 +++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/pom.xml b/pom.xml index d59bd2ca133c2..e3be19ef86a3d 100644 --- a/pom.xml +++ b/pom.xml @@ -32,7 +32,7 @@ 5.0.0 - 5.0.0-snapshot-1642891 + 5.0.0-snapshot-1644303 auto true onerror @@ -54,7 +54,7 @@ Lucene snapshots - https://download.elasticsearch.org/lucenesnapshots/1642891 + https://download.elasticsearch.org/lucenesnapshots/1644303 diff --git a/src/main/java/org/elasticsearch/index/engine/internal/InternalEngine.java b/src/main/java/org/elasticsearch/index/engine/internal/InternalEngine.java index df40a63237b3e..838e250b75189 100644 --- a/src/main/java/org/elasticsearch/index/engine/internal/InternalEngine.java +++ b/src/main/java/org/elasticsearch/index/engine/internal/InternalEngine.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.engine.internal; import com.google.common.collect.Lists; + import org.apache.lucene.index.*; import org.apache.lucene.index.IndexWriter.IndexReaderWarmer; import org.apache.lucene.search.*; @@ -69,6 +70,7 @@ import java.io.Closeable; import java.io.IOException; +import java.lang.reflect.Method; import java.util.*; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; @@ -1054,10 +1056,13 @@ public void maybeMerge() throws EngineException { } } + // TODO: can we please remove this method?! private void waitForMerges(boolean flushAfter) { try { - currentIndexWriter().waitForMerges(); - } catch (IOException e) { + Method method = IndexWriter.class.getDeclaredMethod("waitForMerges"); + method.setAccessible(true); + method.invoke(currentIndexWriter()); + } catch (ReflectiveOperationException e) { throw new OptimizeFailedEngineException(shardId, e); } if (flushAfter) { From b4630be9e2727ea97d2b9967d6b3cc777ec74317 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Tue, 9 Dec 2014 21:48:00 -0500 Subject: [PATCH 2/3] fix test bug --- src/test/java/org/elasticsearch/index/store/StoreTest.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/test/java/org/elasticsearch/index/store/StoreTest.java b/src/test/java/org/elasticsearch/index/store/StoreTest.java index 215a6a6ea7887..b63c2eb2d79d2 100644 --- a/src/test/java/org/elasticsearch/index/store/StoreTest.java +++ b/src/test/java/org/elasticsearch/index/store/StoreTest.java @@ -187,6 +187,7 @@ public void testVerifyingIndexOutputWithBogusInput() throws IOException { IOUtils.close(verifyingOutput, dir); } + // TODO: remove this, its too fragile. just use a static old index instead. private static final class OldSIMockingCodec extends FilterCodec { protected OldSIMockingCodec() { @@ -232,6 +233,7 @@ public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOE } } output.writeStringSet(files); + output.writeStringStringMap(si.getAttributes()); CodecUtil.writeFooter(output); success = true; } finally { @@ -245,6 +247,7 @@ public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOE } } + // IF THIS TEST FAILS ON UPGRADE GO LOOK AT THE OldSIMockingCodec!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @Test public void testWriteLegacyChecksums() throws IOException { final ShardId shardId = new ShardId(new Index("index"), 1); From 66d956a5f4286b22c2e1d8837cd4e0758859b555 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Tue, 9 Dec 2014 22:37:58 -0500 Subject: [PATCH 3/3] add best_compression option for lucene 5.0 --- docs/reference/index-modules.asciidoc | 5 +++ .../index/codec/CodecService.java | 17 +++++++-- .../PerFieldMappingPostingFormatCodec.java | 4 ++- .../elasticsearch/index/codec/CodecTests.java | 36 +++++++++++++++++++ 4 files changed, 59 insertions(+), 3 deletions(-) diff --git a/docs/reference/index-modules.asciidoc b/docs/reference/index-modules.asciidoc index f9e922b3e15a7..707e712f89a4b 100644 --- a/docs/reference/index-modules.asciidoc +++ b/docs/reference/index-modules.asciidoc @@ -41,6 +41,11 @@ otherwise it is written in non-compound format. refresh operation will be executed. Defaults to `1s`. Can be set to `-1` in order to disable it. +`index.codec`:: + The `default` value compresses stored data with LZ4 compression, but + this can be set to `best_compression` for a higher compression ratio, + at the expense of slower stored fields performance. + `index.shard.check_on_startup`:: Should shard consistency be checked upon opening. When `true`, the shard will be checked, preventing it from being open in diff --git a/src/main/java/org/elasticsearch/index/codec/CodecService.java b/src/main/java/org/elasticsearch/index/codec/CodecService.java index 907c1d0e2fd2e..933c7d18399fd 100644 --- a/src/main/java/org/elasticsearch/index/codec/CodecService.java +++ b/src/main/java/org/elasticsearch/index/codec/CodecService.java @@ -20,7 +20,11 @@ package org.elasticsearch.index.codec; import com.google.common.collect.ImmutableMap; + import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene50.Lucene50Codec; +import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat; +import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode; import org.elasticsearch.ElasticsearchIllegalArgumentException; import org.elasticsearch.common.collect.MapBuilder; import org.elasticsearch.common.inject.Inject; @@ -50,6 +54,7 @@ public class CodecService extends AbstractIndexComponent { private final ImmutableMap codecs; public final static String DEFAULT_CODEC = "default"; + public final static String BEST_COMPRESSION_CODEC = "best_compression"; public CodecService(Index index) { this(index, ImmutableSettings.Builder.EMPTY_SETTINGS); @@ -68,9 +73,17 @@ public CodecService(Index index, @IndexSettings Settings indexSettings, Postings this.mapperService = mapperService; MapBuilder codecs = MapBuilder.newMapBuilder(); if (mapperService == null) { - codecs.put(DEFAULT_CODEC, Codec.getDefault()); + codecs.put(DEFAULT_CODEC, new Lucene50Codec()); + codecs.put(BEST_COMPRESSION_CODEC, new Lucene50Codec(Mode.BEST_COMPRESSION)); } else { - codecs.put(DEFAULT_CODEC, new PerFieldMappingPostingFormatCodec(mapperService, + codecs.put(DEFAULT_CODEC, + new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED, + mapperService, + postingsFormatService.get(PostingsFormatService.DEFAULT_FORMAT).get(), + docValuesFormatService.get(DocValuesFormatService.DEFAULT_FORMAT).get(), logger)); + codecs.put(BEST_COMPRESSION_CODEC, + new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION, + mapperService, postingsFormatService.get(PostingsFormatService.DEFAULT_FORMAT).get(), docValuesFormatService.get(DocValuesFormatService.DEFAULT_FORMAT).get(), logger)); } diff --git a/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java b/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java index f23e82a6c4ee1..9bf33865d9e1e 100644 --- a/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java +++ b/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java @@ -23,6 +23,7 @@ import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.lucene50.Lucene50Codec; +import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat; import org.elasticsearch.common.logging.ESLogger; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider; @@ -49,7 +50,8 @@ public class PerFieldMappingPostingFormatCodec extends Lucene50Codec { assert Codec.forName(Lucene.LATEST_CODEC).getClass().isAssignableFrom(PerFieldMappingPostingFormatCodec.class) : "PerFieldMappingPostingFormatCodec must subclass the latest lucene codec: " + Lucene.LATEST_CODEC; } - public PerFieldMappingPostingFormatCodec(MapperService mapperService, PostingsFormat defaultPostingFormat, DocValuesFormat defaultDocValuesFormat, ESLogger logger) { + public PerFieldMappingPostingFormatCodec(Lucene50StoredFieldsFormat.Mode compressionMode, MapperService mapperService, PostingsFormat defaultPostingFormat, DocValuesFormat defaultDocValuesFormat, ESLogger logger) { + super(compressionMode); this.mapperService = mapperService; this.logger = logger; this.defaultPostingFormat = defaultPostingFormat; diff --git a/src/test/java/org/elasticsearch/index/codec/CodecTests.java b/src/test/java/org/elasticsearch/index/codec/CodecTests.java index b15c73d284321..fd984fe8df642 100644 --- a/src/test/java/org/elasticsearch/index/codec/CodecTests.java +++ b/src/test/java/org/elasticsearch/index/codec/CodecTests.java @@ -33,7 +33,15 @@ import org.apache.lucene.codecs.lucene49.Lucene49Codec; import org.apache.lucene.codecs.lucene50.Lucene50Codec; import org.apache.lucene.codecs.lucene50.Lucene50DocValuesFormat; +import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat; +import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode; import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.SegmentReader; +import org.apache.lucene.store.Directory; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; @@ -173,6 +181,34 @@ public void testChangeVersionFormat() throws Exception { assertThat(documentMapper.rootMapper(VersionFieldMapper.class).docValuesFormatProvider(), instanceOf(PreBuiltDocValuesFormatProvider.class)); assertThat(documentMapper.rootMapper(VersionFieldMapper.class).docValuesFormatProvider().get(), instanceOf(Lucene410DocValuesFormat.class)); } + + public void testDefault() throws Exception { + Codec codec = createCodecService().codec("default"); + assertCompressionEquals(Mode.BEST_SPEED, codec); + } + + public void testBestCompression() throws Exception { + Codec codec = createCodecService().codec("best_compression"); + assertCompressionEquals(Mode.BEST_COMPRESSION, codec); + } + + // write some docs with it, inspect .si to see this was the used compression + private void assertCompressionEquals(Mode expected, Codec actual) throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(null); + iwc.setCodec(actual); + IndexWriter iw = new IndexWriter(dir, iwc); + iw.addDocument(new Document()); + iw.commit(); + iw.close(); + DirectoryReader ir = DirectoryReader.open(dir); + SegmentReader sr = (SegmentReader) ir.leaves().get(0).reader(); + String v = sr.getSegmentInfo().info.getAttribute(Lucene50StoredFieldsFormat.MODE_KEY); + assertNotNull(v); + assertEquals(expected, Mode.valueOf(v)); + ir.close(); + dir.close(); + } private static CodecService createCodecService() { return createCodecService(ImmutableSettings.Builder.EMPTY_SETTINGS);