New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support for char filters in the analyze API #5148
Changes from 5 commits
554a08a
e236b33
187846c
d0dc1e3
36235ac
1fe04bf
8eb4170
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,9 +18,12 @@ | |
*/ | ||
package org.elasticsearch.action.admin.indices.analyze; | ||
|
||
import org.elasticsearch.ElasticsearchIllegalArgumentException; | ||
import org.elasticsearch.Version; | ||
import org.elasticsearch.action.ActionRequestValidationException; | ||
import org.elasticsearch.action.support.single.custom.SingleCustomOperationRequest; | ||
import org.elasticsearch.common.Nullable; | ||
import org.elasticsearch.common.Strings; | ||
import org.elasticsearch.common.io.stream.StreamInput; | ||
import org.elasticsearch.common.io.stream.StreamOutput; | ||
|
||
|
@@ -42,7 +45,9 @@ public class AnalyzeRequest extends SingleCustomOperationRequest<AnalyzeRequest> | |
|
||
private String tokenizer; | ||
|
||
private String[] tokenFilters; | ||
private String[] tokenFilters = Strings.EMPTY_ARRAY; | ||
|
||
private String[] charFilters = Strings.EMPTY_ARRAY; | ||
|
||
private String field; | ||
|
||
|
@@ -102,6 +107,7 @@ public String tokenizer() { | |
} | ||
|
||
public AnalyzeRequest tokenFilters(String... tokenFilters) { | ||
if (tokenFilters == null) throw new ElasticsearchIllegalArgumentException("token filters must not be null"); | ||
this.tokenFilters = tokenFilters; | ||
return this; | ||
} | ||
|
@@ -110,6 +116,16 @@ public String[] tokenFilters() { | |
return this.tokenFilters; | ||
} | ||
|
||
public AnalyzeRequest charFilters(String... charFilters) { | ||
if (charFilters == null) throw new ElasticsearchIllegalArgumentException("char filters must not be null"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as above |
||
this.charFilters = charFilters; | ||
return this; | ||
} | ||
|
||
public String[] charFilters() { | ||
return this.charFilters; | ||
} | ||
|
||
public AnalyzeRequest field(String field) { | ||
this.field = field; | ||
return this; | ||
|
@@ -125,6 +141,12 @@ public ActionRequestValidationException validate() { | |
if (text == null) { | ||
validationException = addValidationError("text is missing", validationException); | ||
} | ||
if (tokenFilters == null) { | ||
validationException = addValidationError("tokenFilters is null", validationException); | ||
} | ||
if (charFilters == null) { | ||
validationException = addValidationError("charFilters is null", validationException); | ||
} | ||
return validationException; | ||
} | ||
|
||
|
@@ -135,12 +157,9 @@ public void readFrom(StreamInput in) throws IOException { | |
text = in.readString(); | ||
analyzer = in.readOptionalString(); | ||
tokenizer = in.readOptionalString(); | ||
int size = in.readVInt(); | ||
if (size > 0) { | ||
tokenFilters = new String[size]; | ||
for (int i = 0; i < size; i++) { | ||
tokenFilters[i] = in.readString(); | ||
} | ||
tokenFilters = in.readStringArray(); | ||
if (in.getVersion().onOrAfter(Version.V_1_1_0)) { | ||
charFilters = in.readStringArray(); | ||
} | ||
field = in.readOptionalString(); | ||
} | ||
|
@@ -152,13 +171,9 @@ public void writeTo(StreamOutput out) throws IOException { | |
out.writeString(text); | ||
out.writeOptionalString(analyzer); | ||
out.writeOptionalString(tokenizer); | ||
if (tokenFilters == null) { | ||
out.writeVInt(0); | ||
} else { | ||
out.writeVInt(tokenFilters.length); | ||
for (String tokenFilter : tokenFilters) { | ||
out.writeString(tokenFilter); | ||
} | ||
out.writeStringArrayNullable(tokenFilters); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
if (out.getVersion().onOrAfter(Version.V_1_1_0)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. see above we have a |
||
out.writeStringArrayNullable(charFilters); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
} | ||
out.writeOptionalString(field); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,12 +23,27 @@ | |
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequestBuilder; | ||
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse; | ||
import org.elasticsearch.common.Priority; | ||
import org.elasticsearch.common.inject.Injector; | ||
import org.elasticsearch.common.inject.ModulesBuilder; | ||
import org.elasticsearch.common.settings.ImmutableSettings; | ||
import org.elasticsearch.common.settings.Settings; | ||
import org.elasticsearch.common.settings.SettingsModule; | ||
import org.elasticsearch.common.xcontent.XContentFactory; | ||
import org.elasticsearch.env.Environment; | ||
import org.elasticsearch.env.EnvironmentModule; | ||
import org.elasticsearch.index.Index; | ||
import org.elasticsearch.index.IndexNameModule; | ||
import org.elasticsearch.index.analysis.AnalysisModule; | ||
import org.elasticsearch.index.analysis.AnalysisService; | ||
import org.elasticsearch.index.settings.IndexSettingsModule; | ||
import org.elasticsearch.indices.analysis.IndicesAnalysisModule; | ||
import org.elasticsearch.indices.analysis.IndicesAnalysisService; | ||
import org.elasticsearch.test.ElasticsearchIntegrationTest; | ||
import org.junit.Test; | ||
|
||
import java.io.IOException; | ||
|
||
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder; | ||
import static org.hamcrest.Matchers.equalTo; | ||
|
||
/** | ||
|
@@ -106,6 +121,50 @@ public void analyzeWithNoIndex() throws Exception { | |
analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("keyword").setTokenFilters("lowercase").execute().actionGet(); | ||
assertThat(analyzeResponse.getTokens().size(), equalTo(1)); | ||
assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("this is a test")); | ||
|
||
analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("standard").setTokenFilters("lowercase", "reverse").execute().actionGet(); | ||
assertThat(analyzeResponse.getTokens().size(), equalTo(4)); | ||
AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(0); | ||
assertThat(token.getTerm(), equalTo("siht")); | ||
token = analyzeResponse.getTokens().get(1); | ||
assertThat(token.getTerm(), equalTo("si")); | ||
token = analyzeResponse.getTokens().get(2); | ||
assertThat(token.getTerm(), equalTo("a")); | ||
token = analyzeResponse.getTokens().get(3); | ||
assertThat(token.getTerm(), equalTo("tset")); | ||
} | ||
|
||
@Test | ||
public void analyzeWithCharFilters() throws Exception { | ||
|
||
ImmutableSettings.Builder settings = settingsBuilder() | ||
.put("index.analysis.char_filter.custom_mapping.type", "mapping") | ||
.putArray("index.analysis.char_filter.custom_mapping.mappings", "ph=>f", "qu=>q") | ||
.put("index.analysis.analyzer.custom_with_char_filter.tokenizer", "standard") | ||
.putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "custom_mapping"); | ||
|
||
prepareCreate("test", 1, settings).execute().actionGet(); | ||
client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().execute().actionGet(); | ||
|
||
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("<h2><b>THIS</b> IS A</h2> <a href=\"#\">TEST</a>").setTokenizer("standard").setCharFilters("html_strip").execute().actionGet(); | ||
assertThat(analyzeResponse.getTokens().size(), equalTo(4)); | ||
|
||
analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A <b>TEST</b>").setTokenizer("keyword").setTokenFilters("lowercase").setCharFilters("html_strip").execute().actionGet(); | ||
assertThat(analyzeResponse.getTokens().size(), equalTo(1)); | ||
assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("this is a test")); | ||
|
||
analyzeResponse = client().admin().indices().prepareAnalyze("test", "jeff quit phish").setTokenizer("keyword").setTokenFilters("lowercase").setCharFilters("custom_mapping").execute().actionGet(); | ||
assertThat(analyzeResponse.getTokens().size(), equalTo(1)); | ||
assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("jeff qit fish")); | ||
|
||
analyzeResponse = client().admin().indices().prepareAnalyze("test", "<a href=\"#\">jeff quit fish</a>").setTokenizer("standard").setCharFilters("html_strip", "custom_mapping").execute().actionGet(); | ||
assertThat(analyzeResponse.getTokens().size(), equalTo(3)); | ||
AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(0); | ||
assertThat(token.getTerm(), equalTo("jeff")); | ||
token = analyzeResponse.getTokens().get(1); | ||
assertThat(token.getTerm(), equalTo("qit")); | ||
token = analyzeResponse.getTokens().get(2); | ||
assertThat(token.getTerm(), equalTo("fish")); | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we could also test the case where we have a custom char filter registered under a specific index? |
||
@Test | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you please add brackets to this statement and move the
throw
on a new line?