From b3275b7023517612eb29ad7fde2de788e4ebd5ab Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 8 Jun 2018 13:12:38 +0100 Subject: [PATCH 01/15] Multiplexing filter --- .../index/analysis/AnalysisRegistry.java | 16 +- .../MultiplexingTokenFilterFactory.java | 145 ++++++++++++++++++ .../index/analysis/AnalysisRegistryTests.java | 92 +++++++++++ 3 files changed, 252 insertions(+), 1 deletion(-) create mode 100644 server/src/main/java/org/elasticsearch/index/analysis/MultiplexingTokenFilterFactory.java diff --git a/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java b/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java index 61b5cb9171244..1a39d861626c4 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java @@ -166,7 +166,21 @@ public Map buildTokenFilterFactories(IndexSettings i */ tokenFilters.put("synonym", requiresAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings))); tokenFilters.put("synonym_graph", requiresAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings))); - return buildMapping(Component.FILTER, indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.preConfiguredTokenFilters); + + // Add multiplexer as a special case + tokenFilters.put("multiplexer", requiresAnalysisSettings(MultiplexingTokenFilterFactory::new)); + + Map mappings + = buildMapping(Component.FILTER, indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.preConfiguredTokenFilters); + + // Multiplexers refer to other tokenfilters, so they need all filters to be built before they + // can finalise construction + for (TokenFilterFactory tff : mappings.values()) { + if (tff instanceof MultiplexingTokenFilterFactory) { + ((MultiplexingTokenFilterFactory)tff).buildFilters(mappings); + } + } + return mappings; } public Map buildTokenizerFactories(IndexSettings indexSettings) throws IOException { diff --git a/server/src/main/java/org/elasticsearch/index/analysis/MultiplexingTokenFilterFactory.java b/server/src/main/java/org/elasticsearch/index/analysis/MultiplexingTokenFilterFactory.java new file mode 100644 index 0000000000000..9508f5cd7f589 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/analysis/MultiplexingTokenFilterFactory.java @@ -0,0 +1,145 @@ +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.ConditionalTokenFilter; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.indices.analysis.AnalysisModule; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +public class MultiplexingTokenFilterFactory extends AbstractTokenFilterFactory { + + private List filters; + private List filterNames; + + private static final TokenFilterFactory IDENTITY_FACTORY = new TokenFilterFactory() { + @Override + public String name() { + return "identity"; + } + + @Override + public TokenStream create(TokenStream tokenStream) { + return tokenStream; + } + }; + + public MultiplexingTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) throws IOException { + super(indexSettings, name, settings); + this.filterNames = settings.getAsList("filters"); + } + + @Override + public TokenStream create(TokenStream tokenStream) { + List> functions = new ArrayList<>(); + for (TokenFilterFactory tff : filters) { + functions.add(tff::create); + } + return new MultiplexTokenFilter(tokenStream, functions); + } + + public void buildFilters(Map factories) { + filters = new ArrayList<>(); + for (String filter : filterNames) { + if ("identity".equals(filter)) { + filters.add(IDENTITY_FACTORY); + } + else if (factories.containsKey(filter) == false) { + throw new IllegalArgumentException("Multiplexing filter [" + name() + "] refers to undefined tokenfilter [" + filter + "]"); + } + else { + filters.add(factories.get(filter)); + } + } + } + + private final class MultiplexTokenFilter extends TokenFilter { + + private final TokenStream source; + private final int filterCount; + + private int selector; + + /** + * Creates a MultiplexTokenFilter on the given input with a set of filters + */ + public MultiplexTokenFilter(TokenStream input, List> filters) { + super(input); + TokenStream source = new MultiplexerFilter(input); + for (int i = 0; i < filters.size(); i++) { + final int slot = i; + source = new ConditionalTokenFilter(source, filters.get(i)) { + @Override + protected boolean shouldFilter() { + return slot == selector; + } + }; + } + this.source = source; + this.filterCount = filters.size(); + } + + @Override + public boolean incrementToken() throws IOException { + return source.incrementToken(); + } + + @Override + public void end() throws IOException { + source.end(); + } + + @Override + public void reset() throws IOException { + source.reset(); + } + + private final class MultiplexerFilter extends TokenFilter { + + State state; + PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); + + private MultiplexerFilter(TokenStream input) { + super(input); + } + + @Override + public boolean incrementToken() throws IOException { + if (selector >= filterCount - 1) { + state = null; + selector = 0; + } + if (state == null) { + if (input.incrementToken() == false) { + return false; + } + state = captureState(); + return true; + } + restoreState(state); + posIncAtt.setPositionIncrement(0); + selector++; + return true; + } + + @Override + public void reset() throws IOException { + super.reset(); + selector = 0; + this.state = null; + } + } + + } +} diff --git a/server/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java b/server/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java index 36da9761b978d..12777a80a7171 100644 --- a/server/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java +++ b/server/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java @@ -22,10 +22,12 @@ import com.carrotsearch.randomizedtesting.generators.RandomPicks; import org.apache.lucene.analysis.MockTokenFilter; +import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.settings.Settings; @@ -239,4 +241,94 @@ public void testEnsureCloseInvocationProperlyDelegated() throws IOException { registry.close(); verify(mock).close(); } + + private final class TruncateTokenFilter extends TokenFilter { + + CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + + final int length; + + public TruncateTokenFilter(TokenStream input, int length) { + super(input); + this.length = length; + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken() == false) + return false; + termAtt.setLength(length); + return true; + } + } + + public void testMultiplexingFilter() throws IOException { + Settings settings = Settings.builder() + .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) + .build(); + Settings indexSettings = Settings.builder() + .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) + .put("index.analysis.filter.myNgramFilter.type", "truncate") + .put("index.analysis.filter.myNgramFilter.length", "2") + .put("index.analysis.filter.multiplexFilter.type", "multiplexer") + .putList("index.analysis.filter.multiplexFilter.filters", "identity", "lowercase", "myNgramFilter") + .put("index.analysis.analyzer.myAnalyzer.type", "custom") + .put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard") + .putList("index.analysis.analyzer.myAnalyzer.filter", "multiplexFilter") + .build(); + IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings); + + AnalysisPlugin plugin = new AnalysisPlugin() { + + class TruncateFactory extends AbstractTokenFilterFactory { + + final int length; + + public TruncateFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { + super(indexSettings, name, settings); + this.length = settings.getAsInt("length", 4); + } + + @Override + public TokenStream create(TokenStream tokenStream) { + return new TruncateTokenFilter(tokenStream, length); + } + } + + @Override + public Map> getTokenFilters() { + return singletonMap("truncate", TruncateFactory::new); + } + }; + + IndexAnalyzers indexAnalyzers = new AnalysisModule(TestEnvironment.newEnvironment(settings), + Collections.singletonList(plugin)).getAnalysisRegistry().build(idxSettings); + + try (NamedAnalyzer analyzer = indexAnalyzers.get("myAnalyzer")) { + assertNotNull(analyzer); + TokenStream tokenStream = analyzer.tokenStream("foo", "ONe tHree"); + tokenStream.reset(); + CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); + PositionIncrementAttribute posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class); + assertTrue(tokenStream.incrementToken()); + assertEquals("ONe", charTermAttribute.toString()); + assertEquals(1, posIncAtt.getPositionIncrement()); + assertTrue(tokenStream.incrementToken()); + assertEquals("one", charTermAttribute.toString()); + assertEquals(0, posIncAtt.getPositionIncrement()); + assertTrue(tokenStream.incrementToken()); + assertEquals("ON", charTermAttribute.toString()); + assertEquals(0, posIncAtt.getPositionIncrement()); + assertTrue(tokenStream.incrementToken()); + assertEquals("tHree", charTermAttribute.toString()); + assertEquals(1, posIncAtt.getPositionIncrement()); + assertTrue(tokenStream.incrementToken()); + assertEquals("three", charTermAttribute.toString()); + assertEquals(0, posIncAtt.getPositionIncrement()); + assertTrue(tokenStream.incrementToken()); + assertEquals("tH", charTermAttribute.toString()); + assertEquals(0, posIncAtt.getPositionIncrement()); + assertFalse(tokenStream.incrementToken()); + } + } } From 0f6598b9f3d9b36fff125467d03c4e46c7a6dd97 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 8 Jun 2018 16:32:56 +0100 Subject: [PATCH 02/15] Allow chaining; docs --- docs/reference/analysis/tokenfilters.asciidoc | 2 + .../multiplexer-tokenfilter.asciidoc | 110 ++++++++++++++++++ .../MultiplexingTokenFilterFactory.java | 44 ++++++- .../index/analysis/AnalysisRegistryTests.java | 50 ++++++-- 4 files changed, 192 insertions(+), 14 deletions(-) create mode 100644 docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc diff --git a/docs/reference/analysis/tokenfilters.asciidoc b/docs/reference/analysis/tokenfilters.asciidoc index 6e77b4498650d..15a18004a2525 100644 --- a/docs/reference/analysis/tokenfilters.asciidoc +++ b/docs/reference/analysis/tokenfilters.asciidoc @@ -35,6 +35,8 @@ include::tokenfilters/word-delimiter-tokenfilter.asciidoc[] include::tokenfilters/word-delimiter-graph-tokenfilter.asciidoc[] +include::tokenfilters/multiplexer-tokenfilter.asciidoc[] + include::tokenfilters/stemmer-tokenfilter.asciidoc[] include::tokenfilters/stemmer-override-tokenfilter.asciidoc[] diff --git a/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc new file mode 100644 index 0000000000000..abe5fac0acef0 --- /dev/null +++ b/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc @@ -0,0 +1,110 @@ +[[analysis-multiplexer-tokenfilter]] +=== Multiplexer Token Filter + +A token filter of type `multiplexer` will emit multiple tokens at the same position, +each version of the token having been run through a different filter. + + +[float] +=== Options +[horizontal] +filters:: a list of token filters to apply to incoming tokens. These can be any + token filters defined elsewhere in the index mappings, or the special `identity` + filter that will emit the token unchanged. Filters can be chained + using a comma-delimited string, so for example `"lowercase, porter_stem"` would + apply the `lowercase` filter and then the `porter_stem` filter to a single token + +[float] +=== Settings example + +You can set it up like: + +[source,js] +-------------------------------------------------- +PUT /multiplexer_example +{ + "settings" : { + "analysis" : { + "analyzer" : { + "my_analyzer" : { + "tokenizer" : "standard", + "filter" : [ "my_multiplexer" ] + } + }, + "filter" : { + "my_multiplexer" : { + "type" : "multiplexer", + "filters" : [ "identity", "lowercase", "lowercase, porter_stem" ] + } + } + } + } +} +-------------------------------------------------- +// CONSOLE + +And test it like: + +[source,js] +-------------------------------------------------- +POST /multiplexer_example/_analyze +{ + "analyzer" : "my_analyzer", + "text" : "Going HOME" +} +-------------------------------------------------- +// CONSOLE +// TEST[continued] + +And it'd respond: + +[source,js] +-------------------------------------------------- +{ + "tokens": [ + { + "token": "Going", + "start_offset": 0, + "end_offset": 5, + "type": "", + "position": 1 + }, + { + "token": "going", + "start_offset": 0, + "end_offset": 5, + "type": "", + "position": 1 + }, + { + "token": "go", + "start_offset": 0, + "end_offset": 5, + "type": "", + "position": 1 + }, + { + "token": "HOME", + "start_offset": 6, + "end_offset": 10, + "type": "", + "position": 2 + }, + { + "token": "home", + "start_offset": 6, + "end_offset": 10, + "type": "", + "position": 2 + }, + { + "token": "home", + "start_offset": 6, + "end_offset": 10, + "type": "", + "position": 2 + } + ] +} +-------------------------------------------------- +// TESTRESPONSE \ No newline at end of file diff --git a/server/src/main/java/org/elasticsearch/index/analysis/MultiplexingTokenFilterFactory.java b/server/src/main/java/org/elasticsearch/index/analysis/MultiplexingTokenFilterFactory.java index 9508f5cd7f589..0fea3ebb4ed25 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/MultiplexingTokenFilterFactory.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/MultiplexingTokenFilterFactory.java @@ -4,6 +4,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.miscellaneous.ConditionalTokenFilter; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; @@ -52,15 +53,46 @@ public TokenStream create(TokenStream tokenStream) { public void buildFilters(Map factories) { filters = new ArrayList<>(); for (String filter : filterNames) { - if ("identity".equals(filter)) { - filters.add(IDENTITY_FACTORY); - } - else if (factories.containsKey(filter) == false) { - throw new IllegalArgumentException("Multiplexing filter [" + name() + "] refers to undefined tokenfilter [" + filter + "]"); + String[] parts = Strings.tokenizeToStringArray(filter, ","); + if (parts.length == 1) { + filters.add(resolveFilterFactory(factories, parts[0])); } else { - filters.add(factories.get(filter)); + List chain = new ArrayList<>(); + for (String subfilter : parts) { + chain.add(resolveFilterFactory(factories, subfilter)); + } + filters.add(chainFilters(filter, chain)); + } + } + } + + private TokenFilterFactory chainFilters(String name, List filters) { + return new TokenFilterFactory() { + @Override + public String name() { + return name; } + + @Override + public TokenStream create(TokenStream tokenStream) { + for (TokenFilterFactory tff : filters) { + tokenStream = tff.create(tokenStream); + } + return tokenStream; + } + }; + } + + private TokenFilterFactory resolveFilterFactory(Map factories, String name) { + if ("identity".equals(name)) { + return IDENTITY_FACTORY; + } + else if (factories.containsKey(name) == false) { + throw new IllegalArgumentException("Multiplexing filter [" + name() + "] refers to undefined tokenfilter [" + name + "]"); + } + else { + return factories.get(name); } } diff --git a/server/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java b/server/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java index 12777a80a7171..25b0816118cb8 100644 --- a/server/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java +++ b/server/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java @@ -21,6 +21,7 @@ import com.carrotsearch.randomizedtesting.generators.RandomPicks; +import org.apache.lucene.analysis.CharacterUtils; import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; @@ -44,6 +45,7 @@ import java.io.IOException; import java.util.Collections; +import java.util.HashMap; import java.util.Map; import static java.util.Collections.emptyMap; @@ -262,16 +264,33 @@ public boolean incrementToken() throws IOException { } } + private final class UppercaseTokenFilter extends TokenFilter { + + CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + + protected UppercaseTokenFilter(TokenStream input) { + super(input); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken() == false) + return false; + CharacterUtils.toUpperCase(termAtt.buffer(), 0, termAtt.length()); + return true; + } + } + public void testMultiplexingFilter() throws IOException { Settings settings = Settings.builder() .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); Settings indexSettings = Settings.builder() .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) - .put("index.analysis.filter.myNgramFilter.type", "truncate") - .put("index.analysis.filter.myNgramFilter.length", "2") + .put("index.analysis.filter.t.type", "truncate") + .put("index.analysis.filter.t.length", "2") .put("index.analysis.filter.multiplexFilter.type", "multiplexer") - .putList("index.analysis.filter.multiplexFilter.filters", "identity", "lowercase", "myNgramFilter") + .putList("index.analysis.filter.multiplexFilter.filters", "identity", "lowercase, t", "uppercase") .put("index.analysis.analyzer.myAnalyzer.type", "custom") .put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard") .putList("index.analysis.analyzer.myAnalyzer.filter", "multiplexFilter") @@ -295,9 +314,24 @@ public TokenStream create(TokenStream tokenStream) { } } + class UppercaseFactory extends AbstractTokenFilterFactory { + + public UppercaseFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { + super(indexSettings, name, settings); + } + + @Override + public TokenStream create(TokenStream tokenStream) { + return new UppercaseTokenFilter(tokenStream); + } + } + @Override public Map> getTokenFilters() { - return singletonMap("truncate", TruncateFactory::new); + Map> filters = new HashMap<>(); + filters.put("truncate", TruncateFactory::new); + filters.put("uppercase", UppercaseFactory::new); + return filters; } }; @@ -314,19 +348,19 @@ public Map> getTokenFilters() { assertEquals("ONe", charTermAttribute.toString()); assertEquals(1, posIncAtt.getPositionIncrement()); assertTrue(tokenStream.incrementToken()); - assertEquals("one", charTermAttribute.toString()); + assertEquals("on", charTermAttribute.toString()); assertEquals(0, posIncAtt.getPositionIncrement()); assertTrue(tokenStream.incrementToken()); - assertEquals("ON", charTermAttribute.toString()); + assertEquals("ONE", charTermAttribute.toString()); assertEquals(0, posIncAtt.getPositionIncrement()); assertTrue(tokenStream.incrementToken()); assertEquals("tHree", charTermAttribute.toString()); assertEquals(1, posIncAtt.getPositionIncrement()); assertTrue(tokenStream.incrementToken()); - assertEquals("three", charTermAttribute.toString()); + assertEquals("th", charTermAttribute.toString()); assertEquals(0, posIncAtt.getPositionIncrement()); assertTrue(tokenStream.incrementToken()); - assertEquals("tH", charTermAttribute.toString()); + assertEquals("THREE", charTermAttribute.toString()); assertEquals(0, posIncAtt.getPositionIncrement()); assertFalse(tokenStream.incrementToken()); } From 721de2c6e918cd319cb16dcf89bfc585cea0d5ba Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 8 Jun 2018 16:51:46 +0100 Subject: [PATCH 03/15] checkstyle --- .../index/analysis/MultiplexingTokenFilterFactory.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/analysis/MultiplexingTokenFilterFactory.java b/server/src/main/java/org/elasticsearch/index/analysis/MultiplexingTokenFilterFactory.java index 0fea3ebb4ed25..67fcc17b22dfc 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/MultiplexingTokenFilterFactory.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/MultiplexingTokenFilterFactory.java @@ -106,7 +106,7 @@ private final class MultiplexTokenFilter extends TokenFilter { /** * Creates a MultiplexTokenFilter on the given input with a set of filters */ - public MultiplexTokenFilter(TokenStream input, List> filters) { + MultiplexTokenFilter(TokenStream input, List> filters) { super(input); TokenStream source = new MultiplexerFilter(input); for (int i = 0; i < filters.size(); i++) { From ac86ce3298a8aef6bda4e8fb1fdd64f0211265f3 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 11 Jun 2018 13:25:22 +0100 Subject: [PATCH 04/15] Move multiplexer to common analysis plugin --- .../analysis/common/CommonAnalysisPlugin.java | 1 + .../MultiplexingTokenFilterFactory.java | 29 ++++- .../common/MultiplexerTokenFilterTests.java | 94 ++++++++++++++ .../index/analysis/AnalysisRegistry.java | 11 +- .../analysis/ReferringFilterFactory.java | 37 ++++++ .../index/analysis/AnalysisRegistryTests.java | 121 ------------------ 6 files changed, 162 insertions(+), 131 deletions(-) rename {server/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/MultiplexingTokenFilterFactory.java (83%) create mode 100644 modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterTests.java create mode 100644 server/src/main/java/org/elasticsearch/index/analysis/ReferringFilterFactory.java diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index 24dce7abcf370..7333f56cfa7e2 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -188,6 +188,7 @@ public Map> getTokenFilters() { filters.put("limit", LimitTokenCountFilterFactory::new); filters.put("lowercase", LowerCaseTokenFilterFactory::new); filters.put("min_hash", MinHashTokenFilterFactory::new); + filters.put("multiplexer", MultiplexingTokenFilterFactory::new); filters.put("ngram", NGramTokenFilterFactory::new); filters.put("nGram", NGramTokenFilterFactory::new); filters.put("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new)); diff --git a/server/src/main/java/org/elasticsearch/index/analysis/MultiplexingTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexingTokenFilterFactory.java similarity index 83% rename from server/src/main/java/org/elasticsearch/index/analysis/MultiplexingTokenFilterFactory.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexingTokenFilterFactory.java index 67fcc17b22dfc..2cc7362e7acbf 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/MultiplexingTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexingTokenFilterFactory.java @@ -1,4 +1,23 @@ -package org.elasticsearch.index.analysis; +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; @@ -8,6 +27,9 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; +import org.elasticsearch.index.analysis.ReferringFilterFactory; +import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.indices.analysis.AnalysisModule; import java.io.IOException; @@ -19,7 +41,7 @@ import java.util.function.Supplier; import java.util.stream.Collectors; -public class MultiplexingTokenFilterFactory extends AbstractTokenFilterFactory { +public class MultiplexingTokenFilterFactory extends AbstractTokenFilterFactory implements ReferringFilterFactory { private List filters; private List filterNames; @@ -50,7 +72,8 @@ public TokenStream create(TokenStream tokenStream) { return new MultiplexTokenFilter(tokenStream, functions); } - public void buildFilters(Map factories) { + @Override + public void addReferences(Map factories) { filters = new ArrayList<>(); for (String filter : filterNames) { String[] parts = Strings.tokenizeToStringArray(filter, ","); diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterTests.java new file mode 100644 index 0000000000000..574beb0f8058d --- /dev/null +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterTests.java @@ -0,0 +1,94 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.analysis.common; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.TruncateTokenFilter; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.elasticsearch.Version; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; +import org.elasticsearch.env.TestEnvironment; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; +import org.elasticsearch.index.analysis.IndexAnalyzers; +import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.analysis.TokenFilterFactory; +import org.elasticsearch.indices.analysis.AnalysisModule; +import org.elasticsearch.plugins.AnalysisPlugin; +import org.elasticsearch.test.ESTokenStreamTestCase; +import org.elasticsearch.test.IndexSettingsModule; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; + +public class MultiplexerTokenFilterTests extends ESTokenStreamTestCase { + + public void testMultiplexingFilter() throws IOException { + Settings settings = Settings.builder() + .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) + .build(); + Settings indexSettings = Settings.builder() + .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) + .put("index.analysis.filter.t.type", "truncate") + .put("index.analysis.filter.t.length", "2") + .put("index.analysis.filter.multiplexFilter.type", "multiplexer") + .putList("index.analysis.filter.multiplexFilter.filters", "identity", "lowercase, t", "uppercase") + .put("index.analysis.analyzer.myAnalyzer.type", "custom") + .put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard") + .putList("index.analysis.analyzer.myAnalyzer.filter", "multiplexFilter") + .build(); + IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings); + + IndexAnalyzers indexAnalyzers = new AnalysisModule(TestEnvironment.newEnvironment(settings), + Collections.singletonList(new CommonAnalysisPlugin())).getAnalysisRegistry().build(idxSettings); + + try (NamedAnalyzer analyzer = indexAnalyzers.get("myAnalyzer")) { + assertNotNull(analyzer); + TokenStream tokenStream = analyzer.tokenStream("foo", "ONe tHree"); + tokenStream.reset(); + CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); + PositionIncrementAttribute posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class); + assertTrue(tokenStream.incrementToken()); + assertEquals("ONe", charTermAttribute.toString()); + assertEquals(1, posIncAtt.getPositionIncrement()); + assertTrue(tokenStream.incrementToken()); + assertEquals("on", charTermAttribute.toString()); + assertEquals(0, posIncAtt.getPositionIncrement()); + assertTrue(tokenStream.incrementToken()); + assertEquals("ONE", charTermAttribute.toString()); + assertEquals(0, posIncAtt.getPositionIncrement()); + assertTrue(tokenStream.incrementToken()); + assertEquals("tHree", charTermAttribute.toString()); + assertEquals(1, posIncAtt.getPositionIncrement()); + assertTrue(tokenStream.incrementToken()); + assertEquals("th", charTermAttribute.toString()); + assertEquals(0, posIncAtt.getPositionIncrement()); + assertTrue(tokenStream.incrementToken()); + assertEquals("THREE", charTermAttribute.toString()); + assertEquals(0, posIncAtt.getPositionIncrement()); + assertFalse(tokenStream.incrementToken()); + } + } + +} diff --git a/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java b/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java index 1a39d861626c4..dbe71f566be3d 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java @@ -167,17 +167,14 @@ public Map buildTokenFilterFactories(IndexSettings i tokenFilters.put("synonym", requiresAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings))); tokenFilters.put("synonym_graph", requiresAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings))); - // Add multiplexer as a special case - tokenFilters.put("multiplexer", requiresAnalysisSettings(MultiplexingTokenFilterFactory::new)); - Map mappings = buildMapping(Component.FILTER, indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.preConfiguredTokenFilters); - // Multiplexers refer to other tokenfilters, so they need all filters to be built before they - // can finalise construction + // ReferringTokenFilters require references to other tokenfilters, so we pass these in + // after all factories have been registered for (TokenFilterFactory tff : mappings.values()) { - if (tff instanceof MultiplexingTokenFilterFactory) { - ((MultiplexingTokenFilterFactory)tff).buildFilters(mappings); + if (tff instanceof ReferringFilterFactory) { + ((ReferringFilterFactory)tff).addReferences(mappings); } } return mappings; diff --git a/server/src/main/java/org/elasticsearch/index/analysis/ReferringFilterFactory.java b/server/src/main/java/org/elasticsearch/index/analysis/ReferringFilterFactory.java new file mode 100644 index 0000000000000..3b37523ceef4c --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/analysis/ReferringFilterFactory.java @@ -0,0 +1,37 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import java.util.Map; + +/** + * Marks a {@link TokenFilterFactory} that refers to other filter factories. + * + * The analysis registry will call {@link #addReferences(Map)} with a map of all + * available TokenFilterFactories after all factories have been registered + */ +public interface ReferringFilterFactory { + + /** + * Called with a map of all registered filter factories + */ + void addReferences(Map factories); + +} diff --git a/server/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java b/server/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java index 25b0816118cb8..72ec035948384 100644 --- a/server/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java +++ b/server/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java @@ -244,125 +244,4 @@ public void testEnsureCloseInvocationProperlyDelegated() throws IOException { verify(mock).close(); } - private final class TruncateTokenFilter extends TokenFilter { - - CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - - final int length; - - public TruncateTokenFilter(TokenStream input, int length) { - super(input); - this.length = length; - } - - @Override - public boolean incrementToken() throws IOException { - if (input.incrementToken() == false) - return false; - termAtt.setLength(length); - return true; - } - } - - private final class UppercaseTokenFilter extends TokenFilter { - - CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - - protected UppercaseTokenFilter(TokenStream input) { - super(input); - } - - @Override - public boolean incrementToken() throws IOException { - if (input.incrementToken() == false) - return false; - CharacterUtils.toUpperCase(termAtt.buffer(), 0, termAtt.length()); - return true; - } - } - - public void testMultiplexingFilter() throws IOException { - Settings settings = Settings.builder() - .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) - .build(); - Settings indexSettings = Settings.builder() - .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) - .put("index.analysis.filter.t.type", "truncate") - .put("index.analysis.filter.t.length", "2") - .put("index.analysis.filter.multiplexFilter.type", "multiplexer") - .putList("index.analysis.filter.multiplexFilter.filters", "identity", "lowercase, t", "uppercase") - .put("index.analysis.analyzer.myAnalyzer.type", "custom") - .put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard") - .putList("index.analysis.analyzer.myAnalyzer.filter", "multiplexFilter") - .build(); - IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings); - - AnalysisPlugin plugin = new AnalysisPlugin() { - - class TruncateFactory extends AbstractTokenFilterFactory { - - final int length; - - public TruncateFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { - super(indexSettings, name, settings); - this.length = settings.getAsInt("length", 4); - } - - @Override - public TokenStream create(TokenStream tokenStream) { - return new TruncateTokenFilter(tokenStream, length); - } - } - - class UppercaseFactory extends AbstractTokenFilterFactory { - - public UppercaseFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { - super(indexSettings, name, settings); - } - - @Override - public TokenStream create(TokenStream tokenStream) { - return new UppercaseTokenFilter(tokenStream); - } - } - - @Override - public Map> getTokenFilters() { - Map> filters = new HashMap<>(); - filters.put("truncate", TruncateFactory::new); - filters.put("uppercase", UppercaseFactory::new); - return filters; - } - }; - - IndexAnalyzers indexAnalyzers = new AnalysisModule(TestEnvironment.newEnvironment(settings), - Collections.singletonList(plugin)).getAnalysisRegistry().build(idxSettings); - - try (NamedAnalyzer analyzer = indexAnalyzers.get("myAnalyzer")) { - assertNotNull(analyzer); - TokenStream tokenStream = analyzer.tokenStream("foo", "ONe tHree"); - tokenStream.reset(); - CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); - PositionIncrementAttribute posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class); - assertTrue(tokenStream.incrementToken()); - assertEquals("ONe", charTermAttribute.toString()); - assertEquals(1, posIncAtt.getPositionIncrement()); - assertTrue(tokenStream.incrementToken()); - assertEquals("on", charTermAttribute.toString()); - assertEquals(0, posIncAtt.getPositionIncrement()); - assertTrue(tokenStream.incrementToken()); - assertEquals("ONE", charTermAttribute.toString()); - assertEquals(0, posIncAtt.getPositionIncrement()); - assertTrue(tokenStream.incrementToken()); - assertEquals("tHree", charTermAttribute.toString()); - assertEquals(1, posIncAtt.getPositionIncrement()); - assertTrue(tokenStream.incrementToken()); - assertEquals("th", charTermAttribute.toString()); - assertEquals(0, posIncAtt.getPositionIncrement()); - assertTrue(tokenStream.incrementToken()); - assertEquals("THREE", charTermAttribute.toString()); - assertEquals(0, posIncAtt.getPositionIncrement()); - assertFalse(tokenStream.incrementToken()); - } - } } From 7ad7d9d3b2fc862a209ed360350d649799bbe2a8 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 11 Jun 2018 13:26:53 +0100 Subject: [PATCH 05/15] tidy up --- .../elasticsearch/index/analysis/AnalysisRegistryTests.java | 6 ------ 1 file changed, 6 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java b/server/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java index 72ec035948384..26a5b87866c21 100644 --- a/server/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java +++ b/server/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java @@ -20,15 +20,11 @@ package org.elasticsearch.index.analysis; import com.carrotsearch.randomizedtesting.generators.RandomPicks; - -import org.apache.lucene.analysis.CharacterUtils; import org.apache.lucene.analysis.MockTokenFilter; -import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.settings.Settings; @@ -45,7 +41,6 @@ import java.io.IOException; import java.util.Collections; -import java.util.HashMap; import java.util.Map; import static java.util.Collections.emptyMap; @@ -243,5 +238,4 @@ public void testEnsureCloseInvocationProperlyDelegated() throws IOException { registry.close(); verify(mock).close(); } - } From 3cc89b2550e3f19f595d4753a0bf74003a9ca686 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 11 Jun 2018 16:21:46 +0100 Subject: [PATCH 06/15] addRef -> setRefs --- .../analysis/common/MultiplexingTokenFilterFactory.java | 2 +- .../org/elasticsearch/index/analysis/AnalysisRegistry.java | 2 +- .../elasticsearch/index/analysis/ReferringFilterFactory.java | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexingTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexingTokenFilterFactory.java index 2cc7362e7acbf..e607908b23422 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexingTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexingTokenFilterFactory.java @@ -73,7 +73,7 @@ public TokenStream create(TokenStream tokenStream) { } @Override - public void addReferences(Map factories) { + public void setReferences(Map factories) { filters = new ArrayList<>(); for (String filter : filterNames) { String[] parts = Strings.tokenizeToStringArray(filter, ","); diff --git a/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java b/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java index dbe71f566be3d..c61a7cf070680 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java @@ -174,7 +174,7 @@ public Map buildTokenFilterFactories(IndexSettings i // after all factories have been registered for (TokenFilterFactory tff : mappings.values()) { if (tff instanceof ReferringFilterFactory) { - ((ReferringFilterFactory)tff).addReferences(mappings); + ((ReferringFilterFactory)tff).setReferences(mappings); } } return mappings; diff --git a/server/src/main/java/org/elasticsearch/index/analysis/ReferringFilterFactory.java b/server/src/main/java/org/elasticsearch/index/analysis/ReferringFilterFactory.java index 3b37523ceef4c..9eb9bc2dbd653 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/ReferringFilterFactory.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/ReferringFilterFactory.java @@ -24,7 +24,7 @@ /** * Marks a {@link TokenFilterFactory} that refers to other filter factories. * - * The analysis registry will call {@link #addReferences(Map)} with a map of all + * The analysis registry will call {@link #setReferences(Map)} with a map of all * available TokenFilterFactories after all factories have been registered */ public interface ReferringFilterFactory { @@ -32,6 +32,6 @@ public interface ReferringFilterFactory { /** * Called with a map of all registered filter factories */ - void addReferences(Map factories); + void setReferences(Map factories); } From f367fefb3adc43ca3af641620c0187aab5d8c833 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 11 Jun 2018 16:25:03 +0100 Subject: [PATCH 07/15] simplify --- .../analysis/common/MultiplexingTokenFilterFactory.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexingTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexingTokenFilterFactory.java index e607908b23422..7d296dce34676 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexingTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexingTokenFilterFactory.java @@ -172,10 +172,7 @@ private MultiplexerFilter(TokenStream input) { @Override public boolean incrementToken() throws IOException { if (selector >= filterCount - 1) { - state = null; selector = 0; - } - if (state == null) { if (input.incrementToken() == false) { return false; } @@ -191,7 +188,7 @@ public boolean incrementToken() throws IOException { @Override public void reset() throws IOException { super.reset(); - selector = 0; + selector = filterCount - 1; this.state = null; } } From 692542c203726f8b86a914b8fae70005778d9ed6 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 11 Jun 2018 16:34:34 +0100 Subject: [PATCH 08/15] Add preserve_original settings --- .../MultiplexingTokenFilterFactory.java | 11 +-- .../common/MultiplexerTokenFilterTests.java | 70 ++++++++++--------- 2 files changed, 45 insertions(+), 36 deletions(-) diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexingTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexingTokenFilterFactory.java index 7d296dce34676..74c948fa105c7 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexingTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexingTokenFilterFactory.java @@ -45,6 +45,7 @@ public class MultiplexingTokenFilterFactory extends AbstractTokenFilterFactory i private List filters; private List filterNames; + private final boolean preserveOriginal; private static final TokenFilterFactory IDENTITY_FACTORY = new TokenFilterFactory() { @Override @@ -61,6 +62,7 @@ public TokenStream create(TokenStream tokenStream) { public MultiplexingTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) throws IOException { super(indexSettings, name, settings); this.filterNames = settings.getAsList("filters"); + this.preserveOriginal = settings.getAsBoolean("preserveOriginal", true); } @Override @@ -75,6 +77,9 @@ public TokenStream create(TokenStream tokenStream) { @Override public void setReferences(Map factories) { filters = new ArrayList<>(); + if (preserveOriginal) { + filters.add(IDENTITY_FACTORY); + } for (String filter : filterNames) { String[] parts = Strings.tokenizeToStringArray(filter, ","); if (parts.length == 1) { @@ -108,10 +113,7 @@ public TokenStream create(TokenStream tokenStream) { } private TokenFilterFactory resolveFilterFactory(Map factories, String name) { - if ("identity".equals(name)) { - return IDENTITY_FACTORY; - } - else if (factories.containsKey(name) == false) { + if (factories.containsKey(name) == false) { throw new IllegalArgumentException("Multiplexing filter [" + name() + "] refers to undefined tokenfilter [" + name + "]"); } else { @@ -143,6 +145,7 @@ protected boolean shouldFilter() { } this.source = source; this.filterCount = filters.size(); + this.selector = filterCount - 1; } @Override diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterTests.java index 574beb0f8058d..55f5359d7927c 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterTests.java @@ -19,28 +19,20 @@ package org.elasticsearch.analysis.common; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.TruncateTokenFilter; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.env.TestEnvironment; import org.elasticsearch.index.IndexSettings; -import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.NamedAnalyzer; -import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.indices.analysis.AnalysisModule; -import org.elasticsearch.plugins.AnalysisPlugin; import org.elasticsearch.test.ESTokenStreamTestCase; import org.elasticsearch.test.IndexSettingsModule; import java.io.IOException; import java.util.Collections; -import java.util.HashMap; public class MultiplexerTokenFilterTests extends ESTokenStreamTestCase { @@ -53,7 +45,7 @@ public void testMultiplexingFilter() throws IOException { .put("index.analysis.filter.t.type", "truncate") .put("index.analysis.filter.t.length", "2") .put("index.analysis.filter.multiplexFilter.type", "multiplexer") - .putList("index.analysis.filter.multiplexFilter.filters", "identity", "lowercase, t", "uppercase") + .putList("index.analysis.filter.multiplexFilter.filters", "lowercase, t", "uppercase") .put("index.analysis.analyzer.myAnalyzer.type", "custom") .put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard") .putList("index.analysis.analyzer.myAnalyzer.filter", "multiplexFilter") @@ -65,30 +57,44 @@ public void testMultiplexingFilter() throws IOException { try (NamedAnalyzer analyzer = indexAnalyzers.get("myAnalyzer")) { assertNotNull(analyzer); - TokenStream tokenStream = analyzer.tokenStream("foo", "ONe tHree"); - tokenStream.reset(); - CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); - PositionIncrementAttribute posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class); - assertTrue(tokenStream.incrementToken()); - assertEquals("ONe", charTermAttribute.toString()); - assertEquals(1, posIncAtt.getPositionIncrement()); - assertTrue(tokenStream.incrementToken()); - assertEquals("on", charTermAttribute.toString()); - assertEquals(0, posIncAtt.getPositionIncrement()); - assertTrue(tokenStream.incrementToken()); - assertEquals("ONE", charTermAttribute.toString()); - assertEquals(0, posIncAtt.getPositionIncrement()); - assertTrue(tokenStream.incrementToken()); - assertEquals("tHree", charTermAttribute.toString()); - assertEquals(1, posIncAtt.getPositionIncrement()); - assertTrue(tokenStream.incrementToken()); - assertEquals("th", charTermAttribute.toString()); - assertEquals(0, posIncAtt.getPositionIncrement()); - assertTrue(tokenStream.incrementToken()); - assertEquals("THREE", charTermAttribute.toString()); - assertEquals(0, posIncAtt.getPositionIncrement()); - assertFalse(tokenStream.incrementToken()); + assertAnalyzesTo(analyzer, "ONe tHree", new String[]{ + "ONe", "on", "ONE", "tHree", "th", "THREE" + }, new int[]{ + 1, 0, 0, 1, 0, 0 + }); } } + public void testMultiplexingNoOriginal() throws IOException { + + Settings settings = Settings.builder() + .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) + .build(); + Settings indexSettings = Settings.builder() + .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) + .put("index.analysis.filter.t.type", "truncate") + .put("index.analysis.filter.t.length", "2") + .put("index.analysis.filter.multiplexFilter.type", "multiplexer") + .put("index.analysis.filter.multiplexFilter.preserveOriginal", "false") + .putList("index.analysis.filter.multiplexFilter.filters", "lowercase, t", "uppercase") + .put("index.analysis.analyzer.myAnalyzer.type", "custom") + .put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard") + .putList("index.analysis.analyzer.myAnalyzer.filter", "multiplexFilter") + .build(); + IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings); + + IndexAnalyzers indexAnalyzers = new AnalysisModule(TestEnvironment.newEnvironment(settings), + Collections.singletonList(new CommonAnalysisPlugin())).getAnalysisRegistry().build(idxSettings); + + try (NamedAnalyzer analyzer = indexAnalyzers.get("myAnalyzer")) { + assertNotNull(analyzer); + assertAnalyzesTo(analyzer, "ONe tHree", new String[]{ + "on", "ONE", "th", "THREE" + }, new int[]{ + 1, 0, 1, 0, + }); + } + + } + } From 24de7ad8009a889be00413968684f8e57f10cee9 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 11 Jun 2018 16:38:15 +0100 Subject: [PATCH 09/15] docs --- .../tokenfilters/multiplexer-tokenfilter.asciidoc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc index abe5fac0acef0..0b20b8870d6f7 100644 --- a/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc @@ -4,15 +4,19 @@ A token filter of type `multiplexer` will emit multiple tokens at the same position, each version of the token having been run through a different filter. +Note that the child filters will in effect be passed a mock tokenstream consisting +of a single token, so filters that read ahead in the tokenstream like shingles or +multi-word synonyms will not work properly [float] === Options [horizontal] filters:: a list of token filters to apply to incoming tokens. These can be any - token filters defined elsewhere in the index mappings, or the special `identity` - filter that will emit the token unchanged. Filters can be chained + token filters defined elsewhere in the index mappings. Filters can be chained using a comma-delimited string, so for example `"lowercase, porter_stem"` would apply the `lowercase` filter and then the `porter_stem` filter to a single token +preserve_original:: if `true` (the default) then emit the original token in + addition to the filtered tokens [float] === Settings example @@ -34,7 +38,7 @@ PUT /multiplexer_example "filter" : { "my_multiplexer" : { "type" : "multiplexer", - "filters" : [ "identity", "lowercase", "lowercase, porter_stem" ] + "filters" : [ "lowercase", "lowercase, porter_stem" ] } } } From 65064d934a1100ce7bb72f4d0bfb91d516d0aa6b Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 12 Jun 2018 09:55:44 +0100 Subject: [PATCH 10/15] docs --- .../tokenfilters/multiplexer-tokenfilter.asciidoc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc index 0b20b8870d6f7..b1b7bf3665a7a 100644 --- a/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc @@ -4,17 +4,16 @@ A token filter of type `multiplexer` will emit multiple tokens at the same position, each version of the token having been run through a different filter. -Note that the child filters will in effect be passed a mock tokenstream consisting -of a single token, so filters that read ahead in the tokenstream like shingles or -multi-word synonyms will not work properly - [float] === Options [horizontal] filters:: a list of token filters to apply to incoming tokens. These can be any token filters defined elsewhere in the index mappings. Filters can be chained using a comma-delimited string, so for example `"lowercase, porter_stem"` would - apply the `lowercase` filter and then the `porter_stem` filter to a single token + apply the `lowercase` filter and then the `porter_stem` filter to a single token. + WARNING: Shingle or multi-word synonym token filters will not function normally + when they are declared in the filters array because they read ahead internally + which is unsupported by the multiplexer preserve_original:: if `true` (the default) then emit the original token in addition to the filtered tokens @@ -111,4 +110,4 @@ And it'd respond: ] } -------------------------------------------------- -// TESTRESPONSE \ No newline at end of file +// TESTRESPONSE From 0aa4bc197e0cdd3862e7cf4e8fd85f799bbdcd3a Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 18 Jun 2018 10:07:56 +0100 Subject: [PATCH 11/15] Add deduplication to the multiplexer example docs --- .../tokenfilters/multiplexer-tokenfilter.asciidoc | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc index b1b7bf3665a7a..770112ababd2c 100644 --- a/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc @@ -31,7 +31,7 @@ PUT /multiplexer_example "analyzer" : { "my_analyzer" : { "tokenizer" : "standard", - "filter" : [ "my_multiplexer" ] + "filter" : [ "my_multiplexer", "remove_duplicates" ] <1> } }, "filter" : { @@ -46,6 +46,12 @@ PUT /multiplexer_example -------------------------------------------------- // CONSOLE +<1> The `remove_duplicates` filter here will prevent identical tokens being emitted +at the same position. For example, the token `home` in the `_analyze` script below +would be emitted from both the `"lowercase"` and the `"lowercase, porter_stem"` +branches of the multiplexer without this extra filter. Deduplicating will help +preserve term statistics that might otherwise produce odd effects in scoring. + And test it like: [source,js] @@ -93,13 +99,6 @@ And it'd respond: "type": "", "position": 2 }, - { - "token": "home", - "start_offset": 6, - "end_offset": 10, - "type": "", - "position": 2 - }, { "token": "home", "start_offset": 6, From f936171f32192dfa71c1dd6b80431be8928e0950 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 18 Jun 2018 10:28:58 +0100 Subject: [PATCH 12/15] snake_case; make class names consistent --- .../elasticsearch/analysis/common/CommonAnalysisPlugin.java | 2 +- ...ilterFactory.java => MultiplexerTokenFilterFactory.java} | 6 +++--- .../analysis/common/MultiplexerTokenFilterTests.java | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) rename modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/{MultiplexingTokenFilterFactory.java => MultiplexerTokenFilterFactory.java} (95%) diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index 76a0b0aa02d34..46606631586fa 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -188,7 +188,7 @@ public Map> getTokenFilters() { filters.put("limit", LimitTokenCountFilterFactory::new); filters.put("lowercase", LowerCaseTokenFilterFactory::new); filters.put("min_hash", MinHashTokenFilterFactory::new); - filters.put("multiplexer", MultiplexingTokenFilterFactory::new); + filters.put("multiplexer", MultiplexerTokenFilterFactory::new); filters.put("ngram", NGramTokenFilterFactory::new); filters.put("nGram", NGramTokenFilterFactory::new); filters.put("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new)); diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexingTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterFactory.java similarity index 95% rename from modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexingTokenFilterFactory.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterFactory.java index 74c948fa105c7..5774db6f8c544 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexingTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterFactory.java @@ -41,7 +41,7 @@ import java.util.function.Supplier; import java.util.stream.Collectors; -public class MultiplexingTokenFilterFactory extends AbstractTokenFilterFactory implements ReferringFilterFactory { +public class MultiplexerTokenFilterFactory extends AbstractTokenFilterFactory implements ReferringFilterFactory { private List filters; private List filterNames; @@ -59,10 +59,10 @@ public TokenStream create(TokenStream tokenStream) { } }; - public MultiplexingTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) throws IOException { + public MultiplexerTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) throws IOException { super(indexSettings, name, settings); this.filterNames = settings.getAsList("filters"); - this.preserveOriginal = settings.getAsBoolean("preserveOriginal", true); + this.preserveOriginal = settings.getAsBoolean("preserve_original", true); } @Override diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterTests.java index 55f5359d7927c..86b64fd73c5e9 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterTests.java @@ -75,7 +75,7 @@ public void testMultiplexingNoOriginal() throws IOException { .put("index.analysis.filter.t.type", "truncate") .put("index.analysis.filter.t.length", "2") .put("index.analysis.filter.multiplexFilter.type", "multiplexer") - .put("index.analysis.filter.multiplexFilter.preserveOriginal", "false") + .put("index.analysis.filter.multiplexFilter.preserve_original", "false") .putList("index.analysis.filter.multiplexFilter.filters", "lowercase, t", "uppercase") .put("index.analysis.analyzer.myAnalyzer.type", "custom") .put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard") From de07870ba7ed4b4e41979518eb14d4da1349582d Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 18 Jun 2018 14:20:02 +0100 Subject: [PATCH 13/15] docs --- .../multiplexer-tokenfilter.asciidoc | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc index 770112ababd2c..21d0cff1c0acb 100644 --- a/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc @@ -75,36 +75,36 @@ And it'd respond: "token": "Going", "start_offset": 0, "end_offset": 5, - "type": "", - "position": 1 + "type": "", + "position": 0 }, { "token": "going", "start_offset": 0, "end_offset": 5, - "type": "", - "position": 1 + "type": "", + "position": 0 }, { "token": "go", "start_offset": 0, "end_offset": 5, - "type": "", - "position": 1 + "type": "", + "position": 0 }, { "token": "HOME", "start_offset": 6, "end_offset": 10, - "type": "", - "position": 2 + "type": "", + "position": 1 }, { "token": "home", "start_offset": 6, "end_offset": 10, - "type": "", - "position": 2 + "type": "", + "position": 1 } ] } From b497fe2e1708a023bedf94c22b1e0c006e881ff0 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 18 Jun 2018 16:58:52 +0100 Subject: [PATCH 14/15] Always remove duplicate tokens --- .../multiplexer-tokenfilter.asciidoc | 24 +++++++++++-------- .../common/MultiplexerTokenFilterFactory.java | 3 ++- .../common/MultiplexerTokenFilterTests.java | 6 +++++ 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc index 21d0cff1c0acb..51937084e3984 100644 --- a/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc @@ -2,7 +2,11 @@ === Multiplexer Token Filter A token filter of type `multiplexer` will emit multiple tokens at the same position, -each version of the token having been run through a different filter. +each version of the token having been run through a different filter. Identical +output tokens at the same position will be removed. + +WARNING: If the incoming token stream has duplicate tokens, then these will also be +removed by the multiplexer [float] === Options @@ -11,12 +15,15 @@ filters:: a list of token filters to apply to incoming tokens. These can be any token filters defined elsewhere in the index mappings. Filters can be chained using a comma-delimited string, so for example `"lowercase, porter_stem"` would apply the `lowercase` filter and then the `porter_stem` filter to a single token. - WARNING: Shingle or multi-word synonym token filters will not function normally + +WARNING: Shingle or multi-word synonym token filters will not function normally when they are declared in the filters array because they read ahead internally which is unsupported by the multiplexer + preserve_original:: if `true` (the default) then emit the original token in addition to the filtered tokens + [float] === Settings example @@ -31,7 +38,7 @@ PUT /multiplexer_example "analyzer" : { "my_analyzer" : { "tokenizer" : "standard", - "filter" : [ "my_multiplexer", "remove_duplicates" ] <1> + "filter" : [ "my_multiplexer" ] } }, "filter" : { @@ -46,12 +53,6 @@ PUT /multiplexer_example -------------------------------------------------- // CONSOLE -<1> The `remove_duplicates` filter here will prevent identical tokens being emitted -at the same position. For example, the token `home` in the `_analyze` script below -would be emitted from both the `"lowercase"` and the `"lowercase, porter_stem"` -branches of the multiplexer without this extra filter. Deduplicating will help -preserve term statistics that might otherwise produce odd effects in scoring. - And test it like: [source,js] @@ -100,7 +101,7 @@ And it'd respond: "position": 1 }, { - "token": "home", + "token": "home", <1> "start_offset": 6, "end_offset": 10, "type": "", @@ -110,3 +111,6 @@ And it'd respond: } -------------------------------------------------- // TESTRESPONSE + +<1> The stemmer has also emitted a token `home` at position 1, but because it is a +duplicate of this token it has been removed from the token stream \ No newline at end of file diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterFactory.java index 5774db6f8c544..91a853df832ea 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterFactory.java @@ -22,6 +22,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.miscellaneous.ConditionalTokenFilter; +import org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilter; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; @@ -71,7 +72,7 @@ public TokenStream create(TokenStream tokenStream) { for (TokenFilterFactory tff : filters) { functions.add(tff::create); } - return new MultiplexTokenFilter(tokenStream, functions); + return new RemoveDuplicatesTokenFilter(new MultiplexTokenFilter(tokenStream, functions)); } @Override diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterTests.java index 86b64fd73c5e9..c39fa05c26f72 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterTests.java @@ -62,6 +62,12 @@ public void testMultiplexingFilter() throws IOException { }, new int[]{ 1, 0, 0, 1, 0, 0 }); + // Duplicates are removed + assertAnalyzesTo(analyzer, "ONe THREE", new String[]{ + "ONe", "on", "ONE", "THREE", "th" + }, new int[]{ + 1, 0, 0, 1, 0, 0 + }); } } From 024d67a1b887d014d0d6064d8124a4db39271e7b Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 19 Jun 2018 10:13:51 +0100 Subject: [PATCH 15/15] checkstyle --- .../analysis/common/MultiplexerTokenFilterFactory.java | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterFactory.java index 91a853df832ea..1cf5303a77209 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/MultiplexerTokenFilterFactory.java @@ -31,16 +31,12 @@ import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; import org.elasticsearch.index.analysis.ReferringFilterFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; -import org.elasticsearch.indices.analysis.AnalysisModule; import java.io.IOException; -import java.io.UncheckedIOException; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.function.Function; -import java.util.function.Supplier; -import java.util.stream.Collectors; public class MultiplexerTokenFilterFactory extends AbstractTokenFilterFactory implements ReferringFilterFactory { @@ -85,8 +81,7 @@ public void setReferences(Map factories) { String[] parts = Strings.tokenizeToStringArray(filter, ","); if (parts.length == 1) { filters.add(resolveFilterFactory(factories, parts[0])); - } - else { + } else { List chain = new ArrayList<>(); for (String subfilter : parts) { chain.add(resolveFilterFactory(factories, subfilter)); @@ -116,8 +111,7 @@ public TokenStream create(TokenStream tokenStream) { private TokenFilterFactory resolveFilterFactory(Map factories, String name) { if (factories.containsKey(name) == false) { throw new IllegalArgumentException("Multiplexing filter [" + name() + "] refers to undefined tokenfilter [" + name + "]"); - } - else { + } else { return factories.get(name); } }