Skip to content

Commit

Permalink
Check stemmer language setting early
Browse files Browse the repository at this point in the history
Currently the StemmerTokenFilterFactory checks the validity of the language
setting only when the first TokenStream is processed. Instead we should throw an
error earlier at mapping creation time. This change adds a check to the
StemmerTokenFilterFactory constructor that checks for a valid `language` setting
by trying to create a new TokenStream from an empty input stream. This will
throw errors about wrong language settings early on.

Closes elastic#34170
  • Loading branch information
Christoph Büscher committed Oct 18, 2018
1 parent 6c07d10 commit f679c90
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 3 deletions.
Expand Up @@ -44,6 +44,7 @@
import org.apache.lucene.analysis.id.IndonesianStemFilter;
import org.apache.lucene.analysis.it.ItalianLightStemFilter;
import org.apache.lucene.analysis.lv.LatvianStemFilter;
import org.apache.lucene.analysis.miscellaneous.EmptyTokenStream;
import org.apache.lucene.analysis.no.NorwegianLightStemFilter;
import org.apache.lucene.analysis.no.NorwegianLightStemmer;
import org.apache.lucene.analysis.no.NorwegianMinimalStemFilter;
Expand Down Expand Up @@ -82,13 +83,19 @@
import org.tartarus.snowball.ext.SwedishStemmer;
import org.tartarus.snowball.ext.TurkishStemmer;

import java.io.IOException;

public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory {

private final static TokenStream EMPTY_TOKEN_STREAM = new EmptyTokenStream();

private String language;

StemmerTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
StemmerTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException {
super(indexSettings, name, settings);
this.language = Strings.capitalize(settings.get("language", settings.get("name", "porter")));
// check that we have a valid language by trying to create a TokenStream
create(EMPTY_TOKEN_STREAM).close();
}

@Override
Expand Down
Expand Up @@ -69,7 +69,6 @@ public void testEnglishFilterFactory() throws IOException {
assertThat(create, instanceOf(PorterStemFilter.class));
assertAnalyzesTo(analyzer, "consolingly", new String[]{"consolingli"});
}

}

public void testPorter2FilterFactory() throws IOException {
Expand Down Expand Up @@ -97,7 +96,16 @@ public void testPorter2FilterFactory() throws IOException {
assertThat(create, instanceOf(SnowballFilter.class));
assertAnalyzesTo(analyzer, "possibly", new String[]{"possibl"});
}

}

public void testMultipleLanguagesThrowsException() throws IOException {
Version v = VersionUtils.randomVersion(random());
Settings settings = Settings.builder().put("index.analysis.filter.my_english.type", "stemmer")
.putList("index.analysis.filter.my_english.language", "english", "light_english").put(SETTING_VERSION_CREATED, v)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();

IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN));
assertEquals("Invalid stemmer class specified: [english, light_english]", e.getMessage());
}
}

0 comments on commit f679c90

Please sign in to comment.