New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Default position_offset_gap to 100 #12544
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,6 +24,7 @@ | |
import org.apache.lucene.index.IndexOptions; | ||
import org.apache.lucene.search.Query; | ||
import org.apache.lucene.util.BytesRef; | ||
import org.elasticsearch.Version; | ||
import org.elasticsearch.common.Strings; | ||
import org.elasticsearch.common.settings.Settings; | ||
import org.elasticsearch.common.xcontent.XContentBuilder; | ||
|
@@ -52,6 +53,7 @@ | |
public class StringFieldMapper extends FieldMapper implements AllFieldMapper.IncludeInAll { | ||
|
||
public static final String CONTENT_TYPE = "string"; | ||
private static final int POSITION_OFFSET_GAP_USE_ANALYZER = -1; | ||
|
||
public static class Defaults { | ||
public static final MappedFieldType FIELD_TYPE = new StringFieldType(); | ||
|
@@ -62,15 +64,36 @@ public static class Defaults { | |
|
||
// NOTE, when adding defaults here, make sure you add them in the builder | ||
public static final String NULL_VALUE = null; | ||
public static final int POSITION_OFFSET_GAP = 0; | ||
/** | ||
* Post 2.1 default for position_offset_gap. Set to 100 so that | ||
* phrase queries of reasonably high slop will not match across field | ||
* values. | ||
*/ | ||
public static final int POSITION_OFFSET_GAP = 100; | ||
public static final int POSITION_OFFSET_GAP_PRE_2_1 = 0; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change is going to 2.0 branch right? Do we need to change this to 2_0? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sadly, yup. I had originally targeted 2.0, then gave up on making it in, then @s1monw said we should jam it in there anyway. So I got what I had reviewed merged as quickly as I could. It's looking like it'll be reasonably quick to merge to 2.0 so I'll do that and then go fix master so it makes sense. |
||
public static final int IGNORE_ABOVE = -1; | ||
|
||
/** | ||
* The default position_offset_gap for a particular version of Elasticsearch. | ||
*/ | ||
public static int positionOffsetGap(Version version) { | ||
if (version.before(Version.V_2_1_0)) { | ||
return POSITION_OFFSET_GAP_PRE_2_1; | ||
} | ||
return POSITION_OFFSET_GAP; | ||
} | ||
} | ||
|
||
public static class Builder extends FieldMapper.Builder<Builder, StringFieldMapper> { | ||
|
||
protected String nullValue = Defaults.NULL_VALUE; | ||
|
||
protected int positionOffsetGap = Defaults.POSITION_OFFSET_GAP; | ||
/** | ||
* The distance between tokens from different values in the same field. | ||
* POSITION_OFFSET_GAP_USE_ANALYZER means default to the analyzer's | ||
* setting which in turn defaults to Defaults.POSITION_OFFSET_GAP. | ||
*/ | ||
protected int positionOffsetGap = POSITION_OFFSET_GAP_USE_ANALYZER; | ||
|
||
protected int ignoreAbove = Defaults.IGNORE_ABOVE; | ||
|
||
|
@@ -102,7 +125,7 @@ public Builder ignoreAbove(int ignoreAbove) { | |
|
||
@Override | ||
public StringFieldMapper build(BuilderContext context) { | ||
if (positionOffsetGap > 0) { | ||
if (positionOffsetGap != POSITION_OFFSET_GAP_USE_ANALYZER) { | ||
fieldType.setIndexAnalyzer(new NamedAnalyzer(fieldType.indexAnalyzer(), positionOffsetGap)); | ||
fieldType.setSearchAnalyzer(new NamedAnalyzer(fieldType.searchAnalyzer(), positionOffsetGap)); | ||
fieldType.setSearchQuoteAnalyzer(new NamedAnalyzer(fieldType.searchQuoteAnalyzer(), positionOffsetGap)); | ||
|
@@ -154,7 +177,11 @@ public Mapper.Builder parse(String name, Map<String, Object> node, ParserContext | |
builder.searchQuotedAnalyzer(analyzer); | ||
iterator.remove(); | ||
} else if (propName.equals("position_offset_gap")) { | ||
builder.positionOffsetGap(XContentMapValues.nodeIntegerValue(propNode, -1)); | ||
int newPositionOffsetGap = XContentMapValues.nodeIntegerValue(propNode, -1); | ||
if (newPositionOffsetGap < 0) { | ||
throw new MapperParsingException("positions_offset_gap less than 0 aren't allowed."); | ||
} | ||
builder.positionOffsetGap(newPositionOffsetGap); | ||
// we need to update to actual analyzers if they are not set in this case... | ||
// so we can inject the position offset gap... | ||
if (builder.fieldType().indexAnalyzer() == null) { | ||
|
@@ -354,7 +381,7 @@ protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, | |
builder.field("include_in_all", false); | ||
} | ||
|
||
if (includeDefaults || positionOffsetGap != Defaults.POSITION_OFFSET_GAP) { | ||
if (includeDefaults || positionOffsetGap != POSITION_OFFSET_GAP_USE_ANALYZER) { | ||
builder.field("position_offset_gap", positionOffsetGap); | ||
} | ||
NamedAnalyzer searchQuoteAnalyzer = fieldType().searchQuoteAnalyzer(); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
/* | ||
* Licensed to Elasticsearch under one or more contributor | ||
* license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright | ||
* ownership. Elasticsearch licenses this file to you under | ||
* the Apache License, Version 2.0 (the "License"); you may | ||
* not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.elasticsearch.index.mapper.string; | ||
|
||
import com.google.common.collect.ImmutableList; | ||
|
||
import org.elasticsearch.ExceptionsHelper; | ||
import org.elasticsearch.client.Client; | ||
import org.elasticsearch.common.xcontent.XContentBuilder; | ||
import org.elasticsearch.common.xcontent.XContentFactory; | ||
import org.elasticsearch.index.mapper.MapperParsingException; | ||
import org.elasticsearch.test.ESSingleNodeTestCase; | ||
|
||
import java.io.IOException; | ||
|
||
import static org.elasticsearch.index.query.QueryBuilders.matchPhraseQuery; | ||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; | ||
import static org.hamcrest.Matchers.containsString; | ||
|
||
/** | ||
* Tests that position_offset_gap is read from the mapper and applies as | ||
* expected in queries. | ||
*/ | ||
public class StringFieldMapperPositionOffsetGapTests extends ESSingleNodeTestCase { | ||
/** | ||
* The default position_offset_gap should be large enough that most | ||
* "sensible" queries phrase slops won't match across values. | ||
*/ | ||
public void testDefault() throws IOException { | ||
assertGapIsOneHundred(client(), "test", "test"); | ||
} | ||
|
||
/** | ||
* Asserts that the post-2.0 default is being applied. | ||
*/ | ||
public static void assertGapIsOneHundred(Client client, String indexName, String type) throws IOException { | ||
testGap(client(), indexName, type, 100); | ||
|
||
// No match across gap using default slop with default positionOffsetGap | ||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two")).get(), 0); | ||
|
||
// Nor with small-ish values | ||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two").slop(5)).get(), 0); | ||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two").slop(50)).get(), 0); | ||
|
||
// But huge-ish values still match | ||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two").slop(500)).get(), 1); | ||
} | ||
|
||
public void testZero() throws IOException { | ||
setupGapInMapping(0); | ||
assertGapIsZero(client(), "test", "test"); | ||
} | ||
|
||
/** | ||
* Asserts that the pre-2.0 default has been applied or explicitly | ||
* configured. | ||
*/ | ||
public static void assertGapIsZero(Client client, String indexName, String type) throws IOException { | ||
testGap(client, indexName, type, 0); | ||
/* | ||
* Phrases match across different values using default slop with pre-2.0 default | ||
* position_offset_gap. | ||
*/ | ||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two")).get(), 1); | ||
} | ||
|
||
public void testLargerThanDefault() throws IOException { | ||
setupGapInMapping(10000); | ||
testGap(client(), "test", "test", 10000); | ||
} | ||
|
||
public void testSmallerThanDefault() throws IOException { | ||
setupGapInMapping(2); | ||
testGap(client(), "test", "test", 2); | ||
} | ||
|
||
public void testNegativeIsError() throws IOException { | ||
try { | ||
setupGapInMapping(-1); | ||
fail("Expected an error"); | ||
} catch (MapperParsingException e) { | ||
assertThat(ExceptionsHelper.detailedMessage(e), containsString("positions_offset_gap less than 0 aren't allowed")); | ||
} | ||
} | ||
|
||
/** | ||
* Tests that the default actually defaults to the position_offset_gap | ||
* configured in the analyzer. This behavior is very old and a little | ||
* strange but not worth breaking some thought. | ||
*/ | ||
public void testDefaultDefaultsToAnalyzer() throws IOException { | ||
XContentBuilder settings = XContentFactory.jsonBuilder().startObject().startObject("analysis").startObject("analyzer") | ||
.startObject("gappy"); | ||
settings.field("type", "custom"); | ||
settings.field("tokenizer", "standard"); | ||
settings.field("position_offset_gap", 2); | ||
setupAnalyzer(settings, "gappy"); | ||
testGap(client(), "test", "test", 2); | ||
} | ||
|
||
/** | ||
* Build an index named "test" with a field named "string" with the provided | ||
* positionOffsetGap that uses the standard analyzer. | ||
*/ | ||
private void setupGapInMapping(int positionOffsetGap) throws IOException { | ||
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("properties").startObject("string"); | ||
mapping.field("type", "string"); | ||
mapping.field("position_offset_gap", positionOffsetGap); | ||
client().admin().indices().prepareCreate("test").addMapping("test", mapping).get(); | ||
} | ||
|
||
/** | ||
* Build an index named "test" with the provided settings and and a field | ||
* named "string" that uses the specified analyzer and default | ||
* position_offset_gap. | ||
*/ | ||
private void setupAnalyzer(XContentBuilder settings, String analyzer) throws IOException { | ||
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("properties").startObject("string"); | ||
mapping.field("type", "string"); | ||
mapping.field("analyzer", analyzer); | ||
client().admin().indices().prepareCreate("test").addMapping("test", mapping).setSettings(settings).get(); | ||
} | ||
|
||
private static void testGap(Client client, String indexName, String type, int positionOffsetGap) throws IOException { | ||
client.prepareIndex(indexName, type, "position_gap_test").setSource("string", ImmutableList.of("one", "two three")).setRefresh(true).get(); | ||
|
||
// Baseline - phrase query finds matches in the same field value | ||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "two three")).get(), 1); | ||
|
||
if (positionOffsetGap > 0) { | ||
// No match across gaps when slop < position gap | ||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two").slop(positionOffsetGap - 1)).get(), | ||
0); | ||
} | ||
|
||
// Match across gaps when slop >= position gap | ||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two").slop(positionOffsetGap)).get(), 1); | ||
assertHitCount(client.prepareSearch(indexName).setQuery(matchPhraseQuery("string", "one two").slop(positionOffsetGap + 1)).get(), 1); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe add some comments here explaining? I'm confused...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah - this was confusing when I wrote it. Its funky and deserves more comments....
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok - added some comments above.