Skip to content

Commit

Permalink
Remove AnalysisService and reduce it to a simple name to analyzer map…
Browse files Browse the repository at this point in the history
…ping (#20627)

Today we hold on to all possible tokenizers, tokenfilters etc. when we create
an index service on a node. This was mainly done to allow the `_analyze` API to
directly access all these primitive. We fixed this in #19827 and can now get rid of
the AnalysisService entirely and replace it with a simple map like class. This
ensures we don't create a gazillion long living objects that are entirely useless since
they are never used in most of the indices. Also those objects might consume a considerable
amount of memory since they might load stopwords or synonyms etc.

Closes #19828
  • Loading branch information
s1monw committed Sep 23, 2016
1 parent e3b7b4f commit fe1803c
Show file tree
Hide file tree
Showing 78 changed files with 823 additions and 779 deletions.
1 change: 0 additions & 1 deletion buildSrc/src/main/resources/checkstyle_suppressions.xml
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,6 @@
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]MergePolicyConfig.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]SearchSlowLog.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]AnalysisRegistry.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]AnalysisService.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]CommonGramsTokenFilterFactory.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]CustomAnalyzerProvider.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]NumericDoubleAnalyzer.java" checks="LineLength" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AnalysisRegistry;
import org.elasticsearch.index.analysis.AnalysisService;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.CustomAnalyzer;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
Expand Down Expand Up @@ -145,45 +145,46 @@ protected AnalyzeResponse shardOperation(AnalyzeRequest request, ShardId shardId
}
}
final AnalysisRegistry analysisRegistry = indicesService.getAnalysis();
return analyze(request, field, analyzer, indexService != null ? indexService.analysisService() : null, analysisRegistry, environment);
return analyze(request, field, analyzer, indexService != null ? indexService.getIndexAnalyzers() : null, analysisRegistry, environment);
} catch (IOException e) {
throw new ElasticsearchException("analysis failed", e);
}

}

public static AnalyzeResponse analyze(AnalyzeRequest request, String field, Analyzer analyzer, AnalysisService analysisService, AnalysisRegistry analysisRegistry, Environment environment) throws IOException {
public static AnalyzeResponse analyze(AnalyzeRequest request, String field, Analyzer analyzer, IndexAnalyzers indexAnalyzers, AnalysisRegistry analysisRegistry, Environment environment) throws IOException {

boolean closeAnalyzer = false;
if (analyzer == null && request.analyzer() != null) {
if (analysisService == null) {
if (indexAnalyzers == null) {
analyzer = analysisRegistry.getAnalyzer(request.analyzer());
if (analyzer == null) {
throw new IllegalArgumentException("failed to find global analyzer [" + request.analyzer() + "]");
}
} else {
analyzer = analysisService.analyzer(request.analyzer());
analyzer = indexAnalyzers.get(request.analyzer());
if (analyzer == null) {
throw new IllegalArgumentException("failed to find analyzer [" + request.analyzer() + "]");
}
}

} else if (request.tokenizer() != null) {
TokenizerFactory tokenizerFactory = parseTokenizerFactory(request, analysisService, analysisRegistry, environment);
final IndexSettings indexSettings = indexAnalyzers == null ? null : indexAnalyzers.getIndexSettings();
TokenizerFactory tokenizerFactory = parseTokenizerFactory(request, indexAnalyzers, analysisRegistry, environment);

TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0];
tokenFilterFactories = getTokenFilterFactories(request, analysisService, analysisRegistry, environment, tokenFilterFactories);
tokenFilterFactories = getTokenFilterFactories(request, indexSettings, analysisRegistry, environment, tokenFilterFactories);

CharFilterFactory[] charFilterFactories = new CharFilterFactory[0];
charFilterFactories = getCharFilterFactories(request, analysisService, analysisRegistry, environment, charFilterFactories);
charFilterFactories = getCharFilterFactories(request, indexSettings, analysisRegistry, environment, charFilterFactories);

analyzer = new CustomAnalyzer(tokenizerFactory, charFilterFactories, tokenFilterFactories);
closeAnalyzer = true;
} else if (analyzer == null) {
if (analysisService == null) {
if (indexAnalyzers == null) {
analyzer = analysisRegistry.getAnalyzer("standard");
} else {
analyzer = analysisService.defaultIndexAnalyzer();
analyzer = indexAnalyzers.getDefaultIndexAnalyzer();
}
}
if (analyzer == null) {
Expand Down Expand Up @@ -446,7 +447,7 @@ private static Map<String, Object> extractExtendedAttributes(TokenStream stream,
return extendedAttributes;
}

private static CharFilterFactory[] getCharFilterFactories(AnalyzeRequest request, AnalysisService analysisService, AnalysisRegistry analysisRegistry,
private static CharFilterFactory[] getCharFilterFactories(AnalyzeRequest request, IndexSettings indexSettings, AnalysisRegistry analysisRegistry,
Environment environment, CharFilterFactory[] charFilterFactories) throws IOException {
if (request.charFilters() != null && request.charFilters().size() > 0) {
charFilterFactories = new CharFilterFactory[request.charFilters().size()];
Expand All @@ -468,19 +469,19 @@ private static CharFilterFactory[] getCharFilterFactories(AnalyzeRequest request
charFilterFactories[i] = charFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_charfilter_[" + i + "]", settings);
} else {
AnalysisModule.AnalysisProvider<CharFilterFactory> charFilterFactoryFactory;
if (analysisService == null) {
if (indexSettings == null) {
charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name);
if (charFilterFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global char filter under [" + charFilter.name + "]");
}
charFilterFactories[i] = charFilterFactoryFactory.get(environment, charFilter.name);
} else {
charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name, analysisService.getIndexSettings());
charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name, indexSettings);
if (charFilterFactoryFactory == null) {
throw new IllegalArgumentException("failed to find char filter under [" + charFilter.name + "]");
}
charFilterFactories[i] = charFilterFactoryFactory.get(analysisService.getIndexSettings(), environment, charFilter.name,
AnalysisRegistry.getSettingsFromIndexSettings(analysisService.getIndexSettings(),
charFilterFactories[i] = charFilterFactoryFactory.get(indexSettings, environment, charFilter.name,
AnalysisRegistry.getSettingsFromIndexSettings(indexSettings,
AnalysisRegistry.INDEX_ANALYSIS_CHAR_FILTER + "." + charFilter.name));
}
}
Expand All @@ -492,7 +493,7 @@ private static CharFilterFactory[] getCharFilterFactories(AnalyzeRequest request
return charFilterFactories;
}

private static TokenFilterFactory[] getTokenFilterFactories(AnalyzeRequest request, AnalysisService analysisService, AnalysisRegistry analysisRegistry,
private static TokenFilterFactory[] getTokenFilterFactories(AnalyzeRequest request, IndexSettings indexSettings, AnalysisRegistry analysisRegistry,
Environment environment, TokenFilterFactory[] tokenFilterFactories) throws IOException {
if (request.tokenFilters() != null && request.tokenFilters().size() > 0) {
tokenFilterFactories = new TokenFilterFactory[request.tokenFilters().size()];
Expand All @@ -514,19 +515,19 @@ private static TokenFilterFactory[] getTokenFilterFactories(AnalyzeRequest reque
tokenFilterFactories[i] = tokenFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenfilter_[" + i + "]", settings);
} else {
AnalysisModule.AnalysisProvider<TokenFilterFactory> tokenFilterFactoryFactory;
if (analysisService == null) {
if (indexSettings == null) {
tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name);
if (tokenFilterFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global token filter under [" + tokenFilter.name + "]");
}
tokenFilterFactories[i] = tokenFilterFactoryFactory.get(environment, tokenFilter.name);
} else {
tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name, analysisService.getIndexSettings());
tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name, indexSettings);
if (tokenFilterFactoryFactory == null) {
throw new IllegalArgumentException("failed to find token filter under [" + tokenFilter.name + "]");
}
tokenFilterFactories[i] = tokenFilterFactoryFactory.get(analysisService.getIndexSettings(), environment, tokenFilter.name,
AnalysisRegistry.getSettingsFromIndexSettings(analysisService.getIndexSettings(),
tokenFilterFactories[i] = tokenFilterFactoryFactory.get(indexSettings, environment, tokenFilter.name,
AnalysisRegistry.getSettingsFromIndexSettings(indexSettings,
AnalysisRegistry.INDEX_ANALYSIS_FILTER + "." + tokenFilter.name));
}
}
Expand All @@ -538,7 +539,7 @@ private static TokenFilterFactory[] getTokenFilterFactories(AnalyzeRequest reque
return tokenFilterFactories;
}

private static TokenizerFactory parseTokenizerFactory(AnalyzeRequest request, AnalysisService analysisService,
private static TokenizerFactory parseTokenizerFactory(AnalyzeRequest request, IndexAnalyzers indexAnalzyers,
AnalysisRegistry analysisRegistry, Environment environment) throws IOException {
TokenizerFactory tokenizerFactory;
final AnalyzeRequest.NameOrDefinition tokenizer = request.tokenizer();
Expand All @@ -558,19 +559,19 @@ private static TokenizerFactory parseTokenizerFactory(AnalyzeRequest request, An
tokenizerFactory = tokenizerFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenizer", settings);
} else {
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory;
if (analysisService == null) {
if (indexAnalzyers == null) {
tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name);
if (tokenizerFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizer.name + "]");
}
tokenizerFactory = tokenizerFactoryFactory.get(environment, tokenizer.name);
} else {
tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name, analysisService.getIndexSettings());
tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name, indexAnalzyers.getIndexSettings());
if (tokenizerFactoryFactory == null) {
throw new IllegalArgumentException("failed to find tokenizer under [" + tokenizer.name + "]");
}
tokenizerFactory = tokenizerFactoryFactory.get(analysisService.getIndexSettings(), environment, tokenizer.name,
AnalysisRegistry.getSettingsFromIndexSettings(analysisService.getIndexSettings(),
tokenizerFactory = tokenizerFactoryFactory.get(indexAnalzyers.getIndexSettings(), environment, tokenizer.name,
AnalysisRegistry.getSettingsFromIndexSettings(indexAnalzyers.getIndexSettings(),
AnalysisRegistry.INDEX_ANALYSIS_TOKENIZER + "." + tokenizer.name));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,16 @@
import org.elasticsearch.common.settings.IndexScopedSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AnalysisService;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.similarity.SimilarityService;
import org.elasticsearch.indices.mapper.MapperRegistry;

import java.util.AbstractMap;
import java.util.Collections;
import java.util.Map;
import java.util.Set;

/**
* This service is responsible for upgrading legacy index metadata to the current version
Expand Down Expand Up @@ -112,9 +115,30 @@ private void checkMappingsCompatibility(IndexMetaData indexMetaData) {
// been started yet. However, we don't really need real analyzers at this stage - so we can fake it
IndexSettings indexSettings = new IndexSettings(indexMetaData, this.settings);
SimilarityService similarityService = new SimilarityService(indexSettings, Collections.emptyMap());
final NamedAnalyzer fakeDefault = new NamedAnalyzer("fake_default", new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
throw new UnsupportedOperationException("shouldn't be here");
}
});
// this is just a fake map that always returns the same value for any possible string key
// also the entrySet impl isn't fully correct but we implement it since internally
// IndexAnalyzers will iterate over all analyzers to close them.
final Map<String, NamedAnalyzer> analyzerMap = new AbstractMap<String, NamedAnalyzer>() {
@Override
public NamedAnalyzer get(Object key) {
assert key instanceof String : "key must be a string but was: " + key.getClass();
return new NamedAnalyzer((String)key, fakeDefault.analyzer());
}

try (AnalysisService analysisService = new FakeAnalysisService(indexSettings)) {
MapperService mapperService = new MapperService(indexSettings, analysisService, similarityService, mapperRegistry, () -> null);
@Override
public Set<Entry<String, NamedAnalyzer>> entrySet() {
// just to ensure we can iterate over this single analzyer
return Collections.singletonMap(fakeDefault.name(), fakeDefault).entrySet();
}
};
try (IndexAnalyzers fakeIndexAnalzyers = new IndexAnalyzers(indexSettings, fakeDefault, fakeDefault, fakeDefault, analyzerMap)) {
MapperService mapperService = new MapperService(indexSettings, fakeIndexAnalzyers, similarityService, mapperRegistry, () -> null);
for (ObjectCursor<MappingMetaData> cursor : indexMetaData.getMappings().values()) {
MappingMetaData mappingMetaData = cursor.value;
mapperService.merge(mappingMetaData.type(), mappingMetaData.source(), MapperService.MergeReason.MAPPING_RECOVERY, false);
Expand All @@ -134,34 +158,6 @@ private IndexMetaData markAsUpgraded(IndexMetaData indexMetaData) {
return IndexMetaData.builder(indexMetaData).settings(settings).build();
}

/**
* A fake analysis server that returns the same keyword analyzer for all requests
*/
private static class FakeAnalysisService extends AnalysisService {

private Analyzer fakeAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
throw new UnsupportedOperationException("shouldn't be here");
}
};

public FakeAnalysisService(IndexSettings indexSettings) {
super(indexSettings, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap());
}

@Override
public NamedAnalyzer analyzer(String name) {
return new NamedAnalyzer(name, fakeAnalyzer);
}

@Override
public void close() {
fakeAnalyzer.close();
super.close();
}
}

IndexMetaData archiveBrokenIndexSettings(IndexMetaData indexMetaData) {
final Settings settings = indexMetaData.getSettings();
final Settings upgrade = indexScopedSettings.archiveUnknownOrBrokenSettings(settings);
Expand Down
Loading

0 comments on commit fe1803c

Please sign in to comment.