Skip to content

Commit

Permalink
Prevent NPE if all docs for the field are pruned during merge
Browse files Browse the repository at this point in the history
During segment merges FieldConsumers for all fields from the source
segments are created. Yet, if all documents that have a value in a
certain field  are deleted / pruned during the merge the FieldConsumer
will not be called at all causing the internally used FST Builder to
return `null` from it's `build()` method. This means we can't store
it and run potentially into a NPE. This commit adds handling for
this corner case both during merge and during suggest phase since
we also don't have a Lookup instance for this segments field.

Closes #3555
  • Loading branch information
s1monw committed Aug 22, 2013
1 parent 7618bfd commit 5466dae
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 17 deletions.
Expand Up @@ -133,17 +133,26 @@ public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws
* buid the FST and write it to disk.
*/
FST<Pair<Long, BytesRef>> build = builder.build();
fieldOffsets.put(field, output.getFilePointer());
build.save(output);
/* write some more meta-info */
output.writeVInt(postingsConsumer.getMaxAnalyzedPathsForOneInput());
output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
output.writeInt(maxGraphExpansions); // can be negative
int options = 0;
options |= preserveSep ? SERIALIZE_PRESERVE_SEPERATORS : 0;
options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
output.writeVInt(options);
assert build != null || docCount == 0 : "the FST is null but docCount is != 0 actual value: [" + docCount + "]";
/*
* it's possible that the FST is null if we have 2 segments that get merged
* and all docs that have a value in this field are deleted. This will cause
* a consumer to be created but it doesn't consume any values causing the FSTBuilder
* to return null.
*/
if (build != null) {
fieldOffsets.put(field, output.getFilePointer());
build.save(output);
/* write some more meta-info */
output.writeVInt(postingsConsumer.getMaxAnalyzedPathsForOneInput());
output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
output.writeInt(maxGraphExpansions); // can be negative
int options = 0;
options |= preserveSep ? SERIALIZE_PRESERVE_SEPERATORS : 0;
options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
output.writeVInt(options);
}
}
};
}
Expand Down Expand Up @@ -251,9 +260,9 @@ public CompletionStats stats(String ... fields) {

for (Map.Entry<String, AnalyzingSuggestHolder> entry: lookupMap.entrySet()) {
sizeInBytes += entry.getValue().fst.sizeInBytes();

if (fields == null || fields.length == 0) continue;

if (fields == null || fields.length == 0) {
continue;
}
for (String field : fields) {
// support for getting fields by regex as in fielddata
if (Regex.simpleMatch(field, entry.getKey())) {
Expand Down
Expand Up @@ -31,7 +31,9 @@
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.text.StringText;
import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
import org.elasticsearch.search.suggest.*;
import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.search.suggest.SuggestContextParser;
import org.elasticsearch.search.suggest.Suggester;
import org.elasticsearch.search.suggest.completion.CompletionSuggestion.Entry.Option;

import java.io.IOException;
Expand Down Expand Up @@ -64,8 +66,13 @@ protected Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Sugges
AtomicReader atomicReader = atomicReaderContext.reader();
Terms terms = atomicReader.fields().terms(fieldName);
if (terms instanceof Completion090PostingsFormat.CompletionTerms) {
Completion090PostingsFormat.CompletionTerms lookupTerms = (Completion090PostingsFormat.CompletionTerms) terms;
Lookup lookup = lookupTerms.getLookup(suggestionContext.mapper(), suggestionContext);
final Completion090PostingsFormat.CompletionTerms lookupTerms = (Completion090PostingsFormat.CompletionTerms) terms;
final Lookup lookup = lookupTerms.getLookup(suggestionContext.mapper(), suggestionContext);
if (lookup == null) {
// we don't have a lookup for this segment.. this might be possible if a merge dropped all
// docs from the segment that had a value in this segment.
continue;
}
List<Lookup.LookupResult> lookupResults = lookup.lookup(spare, false, suggestionContext.getSize());
for (Lookup.LookupResult res : lookupResults) {

Expand Down
Expand Up @@ -247,6 +247,27 @@ public PostingsFormat postingsFormat() {
dir.close();
return lookup;
}

@Test
public void testNoDocs() throws IOException {
AnalyzingCompletionLookupProvider provider = new AnalyzingCompletionLookupProvider(true, false, true, true);
RAMDirectory dir = new RAMDirectory();
IndexOutput output = dir.createOutput("foo.txt", IOContext.DEFAULT);
FieldsConsumer consumer = provider.consumer(output);
FieldInfo fieldInfo = new FieldInfo("foo", true, 1, false, true, true, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
DocValuesType.SORTED, DocValuesType.BINARY, new HashMap<String, String>());
TermsConsumer addField = consumer.addField(fieldInfo);
addField.finish(0, 0, 0);
consumer.close();
output.close();

IndexInput input = dir.openInput("foo.txt", IOContext.DEFAULT);
LookupFactory load = provider.load(input);
PostingsFormatProvider format = new PreBuiltPostingsFormatProvider(new ElasticSearch090PostingsFormat());
NamedAnalyzer analyzer = new NamedAnalyzer("foo", new StandardAnalyzer(TEST_VERSION_CURRENT));
assertNull(load.getLookup(new CompletionFieldMapper(new Names("foo"), analyzer, analyzer, format, null, true, true, true), new CompletionSuggestionContext(null)));
dir.close();
}

// TODO ADD more unittests
}
Expand Up @@ -22,6 +22,9 @@
import com.google.common.collect.Lists;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus;
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse;
import org.elasticsearch.action.admin.indices.optimize.OptimizeResponse;
import org.elasticsearch.action.admin.indices.segments.IndexShardSegments;
import org.elasticsearch.action.admin.indices.segments.ShardSegments;
import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.suggest.SuggestResponse;
Expand Down Expand Up @@ -679,4 +682,39 @@ private void createData(boolean optimize) throws IOException, InterruptedExcepti
client().admin().indices().prepareOptimize(INDEX).execute().actionGet();
}
}

@Test // see #3555
public void testPrunedSegments() throws IOException {
createIndexAndMappingAndSettings(settingsBuilder().put(SETTING_NUMBER_OF_SHARDS, 1).put(SETTING_NUMBER_OF_REPLICAS, 0), "standard", "standard", false, false, false);

client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
.startObject().startObject(FIELD)
.startArray("input").value("The Beatles").endArray()
.endObject().endObject()
).get();
client().prepareIndex(INDEX, TYPE, "2").setSource(jsonBuilder()
.startObject()
.field("somefield", "somevalue")
.endObject()
).get(); // we have 2 docs in a segment...
OptimizeResponse actionGet = client().admin().indices().prepareOptimize().setFlush(true).setMaxNumSegments(1).setRefresh(true).execute().actionGet();
assertNoFailures(actionGet);
// update the first one and then merge.. the target segment will have no value in FIELD
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
.startObject()
.field("somefield", "somevalue")
.endObject()
).get();
actionGet = client().admin().indices().prepareOptimize().setFlush(true).setMaxNumSegments(1).setRefresh(true).execute().actionGet();
assertNoFailures(actionGet);

assertSuggestions("b");
assertThat(2l, equalTo(client().prepareCount(INDEX).get().getCount()));
for(IndexShardSegments seg : client().admin().indices().prepareSegments().get().getIndices().get(INDEX)) {
ShardSegments[] shards = seg.getShards();
for (ShardSegments shardSegments : shards) {
assertThat(1, equalTo(shardSegments.getSegments().size()));
}
}
}
}

0 comments on commit 5466dae

Please sign in to comment.