Skip to content
Permalink
Browse files
OAK-9772 Elastic and Lucene tokenizer difference
  • Loading branch information
thomasmueller committed May 18, 2022
1 parent 4f99576 commit 14278534fcd70517c82ec435192ecac176241b27
Showing 2 changed files with 17 additions and 2 deletions.
@@ -83,6 +83,9 @@ public class ElasticIndexDefinition extends IndexDefinition {
*/
private static final String INDEX_ORIGINAL_TERM = "indexOriginalTerm";

private static final String SPLIT_ON_CASE_CHANGE = "splitOnCaseChange";
private static final String SPLIT_ON_NUMERICS = "splitOnNumerics";

private static final String SIMILARITY_TAGS_ENABLED = "similarityTagsEnabled";
private static final boolean SIMILARITY_TAGS_ENABLED_DEFAULT = true;

@@ -228,11 +231,21 @@ protected String getDefaultFunctionName() {
/**
* Returns {@code true} if original terms need to be preserved at indexing analysis phase
*/
public boolean indexOriginalTerms() {
public boolean analyzerConfigIndexOriginalTerms() {
NodeState analyzersTree = definition.getChildNode(ANALYZERS);
return getOptionalValue(analyzersTree, INDEX_ORIGINAL_TERM, false);
}

public boolean analyzerConfigSplitOnCaseChange() {
NodeState analyzersTree = definition.getChildNode(ANALYZERS);
return getOptionalValue(analyzersTree, SPLIT_ON_CASE_CHANGE, false);
}

public boolean analyzerConfigSplitOnNumerics() {
NodeState analyzersTree = definition.getChildNode(ANALYZERS);
return getOptionalValue(analyzersTree, SPLIT_ON_NUMERICS, false);
}

@Override
protected PropertyDefinition createPropertyDefinition(IndexDefinition.IndexingRule rule, String name, NodeState nodeState) {
return new ElasticPropertyDefinition(rule, name, nodeState);
@@ -115,7 +115,9 @@ private static XContentBuilder loadSettings(ElasticIndexDefinition indexDefiniti
settingsBuilder.field("generate_word_parts", true);
settingsBuilder.field("stem_english_possessive", true);
settingsBuilder.field("generate_number_parts", true);
settingsBuilder.field("preserve_original", indexDefinition.indexOriginalTerms());
settingsBuilder.field("split_on_numerics", indexDefinition.analyzerConfigSplitOnNumerics());
settingsBuilder.field("split_on_case_change", indexDefinition.analyzerConfigSplitOnCaseChange());
settingsBuilder.field("preserve_original", indexDefinition.analyzerConfigIndexOriginalTerms());
}
settingsBuilder.endObject();

0 comments on commit 1427853

Please sign in to comment.