This repository has been archived by the owner on Mar 31, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement analysis configuration #24
- Loading branch information
1 parent
92a1578
commit 924e3b4
Showing
44 changed files
with
2,261 additions
and
7 deletions.
There are no files selected for viewing
65 changes: 65 additions & 0 deletions
65
search/src/main/java/io/jmix/search/index/mapping/analysis/AnalysisConfigurationContext.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
/* | ||
* Copyright 2021 Haulmont. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package io.jmix.search.index.mapping.analysis; | ||
|
||
import static io.jmix.search.index.mapping.analysis.AnalysisConfigurationStages.*; | ||
|
||
/** | ||
* Allows to configure Elasticsearch analysis elements. | ||
*/ | ||
public interface AnalysisConfigurationContext { | ||
|
||
/** | ||
* Init definition of new analyzer. | ||
* | ||
* @param name name of the new analyzer | ||
* @return Initial stage of analyzer configuration | ||
*/ | ||
DefineAnalyzer defineAnalyzer(String name); | ||
|
||
/** | ||
* Init definition of new normalizer. | ||
* | ||
* @param name name of the new normalizer | ||
* @return Initial stage of normalizer configuration | ||
*/ | ||
DefineNormalizer defineNormalizer(String name); | ||
|
||
/** | ||
* Init definition of new tokenizer. | ||
* | ||
* @param name name of the new tokenizer | ||
* @return Initial stage of tokenizer configuration | ||
*/ | ||
DefineTokenizer defineTokenizer(String name); | ||
|
||
/** | ||
* Init definition of new character filter. | ||
* | ||
* @param name name of the new character filter | ||
* @return Initial stage of character filter configuration | ||
*/ | ||
DefineCharacterFilter defineCharacterFilter(String name); | ||
|
||
/** | ||
* Init definition of new token filter | ||
* | ||
* @param name name of the new token filter | ||
* @return Initial stage of token filter configuration | ||
*/ | ||
DefineTokenFilter defineTokenFilter(String name); | ||
} |
67 changes: 67 additions & 0 deletions
67
search/src/main/java/io/jmix/search/index/mapping/analysis/AnalysisConfigurationStages.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
/* | ||
* Copyright 2021 Haulmont. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package io.jmix.search.index.mapping.analysis; | ||
|
||
/** | ||
* Stages of analysis configuration fluent API | ||
*/ | ||
public interface AnalysisConfigurationStages { | ||
|
||
interface DefineAnalyzer extends SetupNativeConfiguration, ConfigureBuiltIn { | ||
SetupTokenizer createCustom(); | ||
} | ||
|
||
interface DefineNormalizer extends SetupNativeConfiguration, ConfigureBuiltIn { | ||
SetupFilters createCustom(); | ||
} | ||
|
||
interface DefineTokenizer extends SetupNativeConfiguration, ConfigureBuiltIn { | ||
} | ||
|
||
interface DefineCharacterFilter extends SetupNativeConfiguration, ConfigureBuiltIn { | ||
} | ||
|
||
interface DefineTokenFilter extends SetupNativeConfiguration, ConfigureBuiltIn { | ||
} | ||
|
||
interface SetupNativeConfiguration { | ||
void withNativeConfiguration(String nativeConfiguration); | ||
} | ||
|
||
interface ConfigureBuiltIn { | ||
SetupParameters configureBuiltIn(String builtInTypeName); | ||
} | ||
|
||
interface SetupParameters { | ||
SetupParameters withParameter(String key, Object value); | ||
} | ||
|
||
interface SetupTokenizer { | ||
SetupFilters withTokenizer(String tokenizerName); | ||
} | ||
|
||
interface SetupCharacterFilters { | ||
SetupFilters withCharacterFilters(String... charFilterNames); | ||
} | ||
|
||
interface SetupTokenFilters { | ||
SetupFilters withTokenFilters(String... tokenFilterNames); | ||
} | ||
|
||
interface SetupFilters extends SetupCharacterFilters, SetupTokenFilters { | ||
} | ||
} |
118 changes: 118 additions & 0 deletions
118
search/src/main/java/io/jmix/search/index/mapping/analysis/IndexAnalysisConfigurer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
/* | ||
* Copyright 2021 Haulmont. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package io.jmix.search.index.mapping.analysis; | ||
|
||
/** | ||
* Base interface for configurers of Elasticsearch analysis elements (analyzers, normalizers, etc). | ||
* <p> | ||
* Create Spring Bean that implements this interface. | ||
* <p> | ||
* Analysis elements can be configured inside {@link IndexAnalysisConfigurer#configure(AnalysisConfigurationContext)} | ||
* <p> | ||
* Example:<pre> | ||
* @Component | ||
* public class CustomIndexAnalysisConfigurer implements IndexAnalysisConfigurer { | ||
* @Override | ||
* public void configure(AnalysisConfigurationContext context) { | ||
* // Analyzer | ||
* context.defineAnalyzer("configured_builtin_analyzer") | ||
* .configureBuiltIn("standard") | ||
* .withParameter("max_token_length", 100) | ||
* .withParameter("stopwords", "_english_"); | ||
* | ||
* context.defineAnalyzer("custom_analyzer") | ||
* .createCustom() | ||
* .withTokenizer("whitespace") | ||
* .withCharacterFilters("html_strip") | ||
* .withTokenFilters("stop"); | ||
* | ||
* context.defineAnalyzer("analyzer_with_native_config") | ||
* .withNativeConfiguration( | ||
* "{" + | ||
* "\"type\": \"standard\"," + | ||
* " \"max_token_length\": 100," + | ||
* " \"stopwords\": \"_english_\"" + | ||
* "}" | ||
* ); | ||
* | ||
* // Normalizer | ||
* context.defineNormalizer("custom_normalizer") | ||
* .createCustom() | ||
* .withCharacterFilters("html_strip") | ||
* .withTokenFilters("lowercase"); | ||
* | ||
* context.defineNormalizer("normalizer_with_native_config") | ||
* .withNativeConfiguration( | ||
* "{" + | ||
* " \"type\": \"custom\"," + | ||
* " \"filter\": [" + | ||
* " \"lowercase\"," + | ||
* " \"asciifolding\"" + | ||
* " ]" + | ||
* "}" | ||
* ); | ||
* | ||
* // Tokenizer | ||
* context.defineTokenizer("configured_tokenizer") | ||
* .configureBuiltIn("whitespace") | ||
* .withParameter("max_token_length", 100); | ||
* | ||
* context.defineTokenizer("tokenizer_with_native_config") | ||
* .withNativeConfiguration( | ||
* "{" + | ||
* " \"type\": \"standard\"," + | ||
* " \"max_token_length\": 100" + | ||
* "}" | ||
* ); | ||
* | ||
* // Character Filters | ||
* context.defineCharacterFilter("configured_char_filter") | ||
* .configureBuiltIn("html_strip") | ||
* .withParameter("escaped_tags", Arrays.asList("b", "i")); | ||
* | ||
* context.defineCharacterFilter("char_filter_with_native_config") | ||
* .withNativeConfiguration( | ||
* "{" + | ||
* " \"type\": \"html_strip\"," + | ||
* " \"escaped_tags\": [" + | ||
* " \"b\"" + | ||
* " ]" + | ||
* "}" | ||
* ); | ||
* | ||
* // Token Filter | ||
* context.defineTokenFilter("configured_token_filter") | ||
* .configureBuiltIn("stop") | ||
* .withParameter("stopwords", "_russian_") | ||
* .withParameter("ignore_case", "true"); | ||
* | ||
* context.defineTokenFilter("filter_with_native_config") | ||
* .withNativeConfiguration( | ||
* "{" + | ||
* " \"type\": \"ngram\"," + | ||
* " \"min_gram\": 3," + | ||
* " \"max_gram\": 5" + | ||
* "}" | ||
* ); | ||
* } | ||
* } | ||
* </pre> | ||
*/ | ||
public interface IndexAnalysisConfigurer { | ||
|
||
void configure(AnalysisConfigurationContext context); | ||
} |
101 changes: 101 additions & 0 deletions
101
...ain/java/io/jmix/search/index/mapping/analysis/impl/AnalysisConfigurationContextImpl.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
/* | ||
* Copyright 2021 Haulmont. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package io.jmix.search.index.mapping.analysis.impl; | ||
|
||
import io.jmix.core.common.util.Preconditions; | ||
import io.jmix.search.index.mapping.analysis.AnalysisConfigurationContext; | ||
import io.jmix.search.index.mapping.analysis.AnalysisConfigurationStages; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
public class AnalysisConfigurationContextImpl implements AnalysisConfigurationContext { | ||
|
||
protected List<AnalyzerConfigurer> analyzerConfigurers; | ||
protected List<NormalizerConfigurer> normalizerConfigurers; | ||
protected List<TokenizerConfigurer> tokenizerConfigurers; | ||
protected List<CharacterFilterConfigurer> characterFilterConfigurers; | ||
protected List<TokenFilterConfigurer> tokenFilterConfigurers; | ||
|
||
protected AnalysisConfigurationContextImpl() { | ||
analyzerConfigurers = new ArrayList<>(); | ||
normalizerConfigurers = new ArrayList<>(); | ||
tokenizerConfigurers = new ArrayList<>(); | ||
characterFilterConfigurers = new ArrayList<>(); | ||
tokenFilterConfigurers = new ArrayList<>(); | ||
} | ||
|
||
@Override | ||
public AnalysisConfigurationStages.DefineAnalyzer defineAnalyzer(String name) { | ||
Preconditions.checkNotEmptyString(name, "Analyzer name is not specified"); | ||
AnalyzerConfigurer configurer = new AnalyzerConfigurer(name); | ||
analyzerConfigurers.add(configurer); | ||
return configurer; | ||
} | ||
|
||
@Override | ||
public AnalysisConfigurationStages.DefineNormalizer defineNormalizer(String name) { | ||
Preconditions.checkNotEmptyString(name, "Normalizer name is not specified"); | ||
NormalizerConfigurer configurer = new NormalizerConfigurer(name); | ||
normalizerConfigurers.add(configurer); | ||
return configurer; | ||
} | ||
|
||
@Override | ||
public AnalysisConfigurationStages.DefineTokenizer defineTokenizer(String name) { | ||
Preconditions.checkNotEmptyString(name, "Tokenizer name is not specified"); | ||
TokenizerConfigurer configurer = new TokenizerConfigurer(name); | ||
tokenizerConfigurers.add(configurer); | ||
return configurer; | ||
} | ||
|
||
@Override | ||
public AnalysisConfigurationStages.DefineCharacterFilter defineCharacterFilter(String name) { | ||
Preconditions.checkNotEmptyString(name, "Character Filter name is not specified"); | ||
CharacterFilterConfigurer configurer = new CharacterFilterConfigurer(name); | ||
characterFilterConfigurers.add(configurer); | ||
return configurer; | ||
} | ||
|
||
@Override | ||
public AnalysisConfigurationStages.DefineTokenFilter defineTokenFilter(String name) { | ||
Preconditions.checkNotEmptyString(name, "Token Filter name is not specified"); | ||
TokenFilterConfigurer configurer = new TokenFilterConfigurer(name); | ||
tokenFilterConfigurers.add(configurer); | ||
return configurer; | ||
} | ||
|
||
public List<AnalyzerConfigurer> getAnalyzerConfigurers() { | ||
return analyzerConfigurers; | ||
} | ||
|
||
public List<NormalizerConfigurer> getNormalizerConfigurers() { | ||
return normalizerConfigurers; | ||
} | ||
|
||
public List<TokenizerConfigurer> getTokenizerConfigurers() { | ||
return tokenizerConfigurers; | ||
} | ||
|
||
public List<CharacterFilterConfigurer> getCharacterFilterConfigurers() { | ||
return characterFilterConfigurers; | ||
} | ||
|
||
public List<TokenFilterConfigurer> getTokenFilterConfigurers() { | ||
return tokenFilterConfigurers; | ||
} | ||
} |
Oops, something went wrong.