From 5466daecdd603c7ebab3e061f0b44bd47be7dc17 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 22 Aug 2013 11:54:36 +0200 Subject: [PATCH] Prevent NPE if all docs for the field are pruned during merge During segment merges FieldConsumers for all fields from the source segments are created. Yet, if all documents that have a value in a certain field are deleted / pruned during the merge the FieldConsumer will not be called at all causing the internally used FST Builder to return `null` from it's `build()` method. This means we can't store it and run potentially into a NPE. This commit adds handling for this corner case both during merge and during suggest phase since we also don't have a Lookup instance for this segments field. Closes #3555 --- .../AnalyzingCompletionLookupProvider.java | 37 +++++++++++------- .../completion/CompletionSuggester.java | 13 +++++-- .../suggest/CompletionPostingsFormatTest.java | 21 ++++++++++ .../suggest/CompletionSuggestSearchTests.java | 38 +++++++++++++++++++ 4 files changed, 92 insertions(+), 17 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java b/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java index d3e766a46fd27..8d966e55c0b3b 100644 --- a/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java +++ b/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java @@ -133,17 +133,26 @@ public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws * buid the FST and write it to disk. */ FST> build = builder.build(); - fieldOffsets.put(field, output.getFilePointer()); - build.save(output); - /* write some more meta-info */ - output.writeVInt(postingsConsumer.getMaxAnalyzedPathsForOneInput()); - output.writeVInt(maxSurfaceFormsPerAnalyzedForm); - output.writeInt(maxGraphExpansions); // can be negative - int options = 0; - options |= preserveSep ? SERIALIZE_PRESERVE_SEPERATORS : 0; - options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0; - options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0; - output.writeVInt(options); + assert build != null || docCount == 0 : "the FST is null but docCount is != 0 actual value: [" + docCount + "]"; + /* + * it's possible that the FST is null if we have 2 segments that get merged + * and all docs that have a value in this field are deleted. This will cause + * a consumer to be created but it doesn't consume any values causing the FSTBuilder + * to return null. + */ + if (build != null) { + fieldOffsets.put(field, output.getFilePointer()); + build.save(output); + /* write some more meta-info */ + output.writeVInt(postingsConsumer.getMaxAnalyzedPathsForOneInput()); + output.writeVInt(maxSurfaceFormsPerAnalyzedForm); + output.writeInt(maxGraphExpansions); // can be negative + int options = 0; + options |= preserveSep ? SERIALIZE_PRESERVE_SEPERATORS : 0; + options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0; + options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0; + output.writeVInt(options); + } } }; } @@ -251,9 +260,9 @@ public CompletionStats stats(String ... fields) { for (Map.Entry entry: lookupMap.entrySet()) { sizeInBytes += entry.getValue().fst.sizeInBytes(); - - if (fields == null || fields.length == 0) continue; - + if (fields == null || fields.length == 0) { + continue; + } for (String field : fields) { // support for getting fields by regex as in fielddata if (Regex.simpleMatch(field, entry.getKey())) { diff --git a/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java b/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java index 4502dd26cd8d5..e9fc40a9ee3e4 100644 --- a/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java +++ b/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java @@ -31,7 +31,9 @@ import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.text.StringText; import org.elasticsearch.index.mapper.core.CompletionFieldMapper; -import org.elasticsearch.search.suggest.*; +import org.elasticsearch.search.suggest.Suggest; +import org.elasticsearch.search.suggest.SuggestContextParser; +import org.elasticsearch.search.suggest.Suggester; import org.elasticsearch.search.suggest.completion.CompletionSuggestion.Entry.Option; import java.io.IOException; @@ -64,8 +66,13 @@ protected Suggest.Suggestion lookupResults = lookup.lookup(spare, false, suggestionContext.getSize()); for (Lookup.LookupResult res : lookupResults) { diff --git a/src/test/java/org/elasticsearch/test/integration/search/suggest/CompletionPostingsFormatTest.java b/src/test/java/org/elasticsearch/test/integration/search/suggest/CompletionPostingsFormatTest.java index be089468542b4..d6f4771f2da38 100644 --- a/src/test/java/org/elasticsearch/test/integration/search/suggest/CompletionPostingsFormatTest.java +++ b/src/test/java/org/elasticsearch/test/integration/search/suggest/CompletionPostingsFormatTest.java @@ -247,6 +247,27 @@ public PostingsFormat postingsFormat() { dir.close(); return lookup; } + + @Test + public void testNoDocs() throws IOException { + AnalyzingCompletionLookupProvider provider = new AnalyzingCompletionLookupProvider(true, false, true, true); + RAMDirectory dir = new RAMDirectory(); + IndexOutput output = dir.createOutput("foo.txt", IOContext.DEFAULT); + FieldsConsumer consumer = provider.consumer(output); + FieldInfo fieldInfo = new FieldInfo("foo", true, 1, false, true, true, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, + DocValuesType.SORTED, DocValuesType.BINARY, new HashMap()); + TermsConsumer addField = consumer.addField(fieldInfo); + addField.finish(0, 0, 0); + consumer.close(); + output.close(); + + IndexInput input = dir.openInput("foo.txt", IOContext.DEFAULT); + LookupFactory load = provider.load(input); + PostingsFormatProvider format = new PreBuiltPostingsFormatProvider(new ElasticSearch090PostingsFormat()); + NamedAnalyzer analyzer = new NamedAnalyzer("foo", new StandardAnalyzer(TEST_VERSION_CURRENT)); + assertNull(load.getLookup(new CompletionFieldMapper(new Names("foo"), analyzer, analyzer, format, null, true, true, true), new CompletionSuggestionContext(null))); + dir.close(); + } // TODO ADD more unittests } diff --git a/src/test/java/org/elasticsearch/test/integration/search/suggest/CompletionSuggestSearchTests.java b/src/test/java/org/elasticsearch/test/integration/search/suggest/CompletionSuggestSearchTests.java index d0281ad7a9757..282e0143da92d 100644 --- a/src/test/java/org/elasticsearch/test/integration/search/suggest/CompletionSuggestSearchTests.java +++ b/src/test/java/org/elasticsearch/test/integration/search/suggest/CompletionSuggestSearchTests.java @@ -22,6 +22,9 @@ import com.google.common.collect.Lists; import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus; import org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse; +import org.elasticsearch.action.admin.indices.optimize.OptimizeResponse; +import org.elasticsearch.action.admin.indices.segments.IndexShardSegments; +import org.elasticsearch.action.admin.indices.segments.ShardSegments; import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.suggest.SuggestResponse; @@ -679,4 +682,39 @@ private void createData(boolean optimize) throws IOException, InterruptedExcepti client().admin().indices().prepareOptimize(INDEX).execute().actionGet(); } } + + @Test // see #3555 + public void testPrunedSegments() throws IOException { + createIndexAndMappingAndSettings(settingsBuilder().put(SETTING_NUMBER_OF_SHARDS, 1).put(SETTING_NUMBER_OF_REPLICAS, 0), "standard", "standard", false, false, false); + + client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder() + .startObject().startObject(FIELD) + .startArray("input").value("The Beatles").endArray() + .endObject().endObject() + ).get(); + client().prepareIndex(INDEX, TYPE, "2").setSource(jsonBuilder() + .startObject() + .field("somefield", "somevalue") + .endObject() + ).get(); // we have 2 docs in a segment... + OptimizeResponse actionGet = client().admin().indices().prepareOptimize().setFlush(true).setMaxNumSegments(1).setRefresh(true).execute().actionGet(); + assertNoFailures(actionGet); + // update the first one and then merge.. the target segment will have no value in FIELD + client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder() + .startObject() + .field("somefield", "somevalue") + .endObject() + ).get(); + actionGet = client().admin().indices().prepareOptimize().setFlush(true).setMaxNumSegments(1).setRefresh(true).execute().actionGet(); + assertNoFailures(actionGet); + + assertSuggestions("b"); + assertThat(2l, equalTo(client().prepareCount(INDEX).get().getCount())); + for(IndexShardSegments seg : client().admin().indices().prepareSegments().get().getIndices().get(INDEX)) { + ShardSegments[] shards = seg.getShards(); + for (ShardSegments shardSegments : shards) { + assertThat(1, equalTo(shardSegments.getSegments().size())); + } + } + } }