Skip to content
This repository has been archived by the owner on Mar 31, 2022. It is now read-only.

Commit

Permalink
Implement analysis configuration #24
Browse files Browse the repository at this point in the history
  • Loading branch information
Gavrilov-Ivan committed Aug 26, 2021
1 parent 92a1578 commit 924e3b4
Show file tree
Hide file tree
Showing 44 changed files with 2,261 additions and 7 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Copyright 2021 Haulmont.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.jmix.search.index.mapping.analysis;

import static io.jmix.search.index.mapping.analysis.AnalysisConfigurationStages.*;

/**
* Allows to configure Elasticsearch analysis elements.
*/
public interface AnalysisConfigurationContext {

/**
* Init definition of new analyzer.
*
* @param name name of the new analyzer
* @return Initial stage of analyzer configuration
*/
DefineAnalyzer defineAnalyzer(String name);

/**
* Init definition of new normalizer.
*
* @param name name of the new normalizer
* @return Initial stage of normalizer configuration
*/
DefineNormalizer defineNormalizer(String name);

/**
* Init definition of new tokenizer.
*
* @param name name of the new tokenizer
* @return Initial stage of tokenizer configuration
*/
DefineTokenizer defineTokenizer(String name);

/**
* Init definition of new character filter.
*
* @param name name of the new character filter
* @return Initial stage of character filter configuration
*/
DefineCharacterFilter defineCharacterFilter(String name);

/**
* Init definition of new token filter
*
* @param name name of the new token filter
* @return Initial stage of token filter configuration
*/
DefineTokenFilter defineTokenFilter(String name);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* Copyright 2021 Haulmont.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.jmix.search.index.mapping.analysis;

/**
* Stages of analysis configuration fluent API
*/
public interface AnalysisConfigurationStages {

interface DefineAnalyzer extends SetupNativeConfiguration, ConfigureBuiltIn {
SetupTokenizer createCustom();
}

interface DefineNormalizer extends SetupNativeConfiguration, ConfigureBuiltIn {
SetupFilters createCustom();
}

interface DefineTokenizer extends SetupNativeConfiguration, ConfigureBuiltIn {
}

interface DefineCharacterFilter extends SetupNativeConfiguration, ConfigureBuiltIn {
}

interface DefineTokenFilter extends SetupNativeConfiguration, ConfigureBuiltIn {
}

interface SetupNativeConfiguration {
void withNativeConfiguration(String nativeConfiguration);
}

interface ConfigureBuiltIn {
SetupParameters configureBuiltIn(String builtInTypeName);
}

interface SetupParameters {
SetupParameters withParameter(String key, Object value);
}

interface SetupTokenizer {
SetupFilters withTokenizer(String tokenizerName);
}

interface SetupCharacterFilters {
SetupFilters withCharacterFilters(String... charFilterNames);
}

interface SetupTokenFilters {
SetupFilters withTokenFilters(String... tokenFilterNames);
}

interface SetupFilters extends SetupCharacterFilters, SetupTokenFilters {
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
/*
* Copyright 2021 Haulmont.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.jmix.search.index.mapping.analysis;

/**
* Base interface for configurers of Elasticsearch analysis elements (analyzers, normalizers, etc).
* <p>
* Create Spring Bean that implements this interface.
* <p>
* Analysis elements can be configured inside {@link IndexAnalysisConfigurer#configure(AnalysisConfigurationContext)}
* <p>
* Example:<pre>
* &#64;Component
* public class CustomIndexAnalysisConfigurer implements IndexAnalysisConfigurer {
* &#64;Override
* public void configure(AnalysisConfigurationContext context) {
* // Analyzer
* context.defineAnalyzer("configured_builtin_analyzer")
* .configureBuiltIn("standard")
* .withParameter("max_token_length", 100)
* .withParameter("stopwords", "_english_");
*
* context.defineAnalyzer("custom_analyzer")
* .createCustom()
* .withTokenizer("whitespace")
* .withCharacterFilters("html_strip")
* .withTokenFilters("stop");
*
* context.defineAnalyzer("analyzer_with_native_config")
* .withNativeConfiguration(
* "{" +
* "\"type\": \"standard\"," +
* " \"max_token_length\": 100," +
* " \"stopwords\": \"_english_\"" +
* "}"
* );
*
* // Normalizer
* context.defineNormalizer("custom_normalizer")
* .createCustom()
* .withCharacterFilters("html_strip")
* .withTokenFilters("lowercase");
*
* context.defineNormalizer("normalizer_with_native_config")
* .withNativeConfiguration(
* "{" +
* " \"type\": \"custom\"," +
* " \"filter\": [" +
* " \"lowercase\"," +
* " \"asciifolding\"" +
* " ]" +
* "}"
* );
*
* // Tokenizer
* context.defineTokenizer("configured_tokenizer")
* .configureBuiltIn("whitespace")
* .withParameter("max_token_length", 100);
*
* context.defineTokenizer("tokenizer_with_native_config")
* .withNativeConfiguration(
* "{" +
* " \"type\": \"standard\"," +
* " \"max_token_length\": 100" +
* "}"
* );
*
* // Character Filters
* context.defineCharacterFilter("configured_char_filter")
* .configureBuiltIn("html_strip")
* .withParameter("escaped_tags", Arrays.asList("b", "i"));
*
* context.defineCharacterFilter("char_filter_with_native_config")
* .withNativeConfiguration(
* "{" +
* " \"type\": \"html_strip\"," +
* " \"escaped_tags\": [" +
* " \"b\"" +
* " ]" +
* "}"
* );
*
* // Token Filter
* context.defineTokenFilter("configured_token_filter")
* .configureBuiltIn("stop")
* .withParameter("stopwords", "_russian_")
* .withParameter("ignore_case", "true");
*
* context.defineTokenFilter("filter_with_native_config")
* .withNativeConfiguration(
* "{" +
* " \"type\": \"ngram\"," +
* " \"min_gram\": 3," +
* " \"max_gram\": 5" +
* "}"
* );
* }
* }
* </pre>
*/
public interface IndexAnalysisConfigurer {

void configure(AnalysisConfigurationContext context);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*
* Copyright 2021 Haulmont.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.jmix.search.index.mapping.analysis.impl;

import io.jmix.core.common.util.Preconditions;
import io.jmix.search.index.mapping.analysis.AnalysisConfigurationContext;
import io.jmix.search.index.mapping.analysis.AnalysisConfigurationStages;

import java.util.ArrayList;
import java.util.List;

public class AnalysisConfigurationContextImpl implements AnalysisConfigurationContext {

protected List<AnalyzerConfigurer> analyzerConfigurers;
protected List<NormalizerConfigurer> normalizerConfigurers;
protected List<TokenizerConfigurer> tokenizerConfigurers;
protected List<CharacterFilterConfigurer> characterFilterConfigurers;
protected List<TokenFilterConfigurer> tokenFilterConfigurers;

protected AnalysisConfigurationContextImpl() {
analyzerConfigurers = new ArrayList<>();
normalizerConfigurers = new ArrayList<>();
tokenizerConfigurers = new ArrayList<>();
characterFilterConfigurers = new ArrayList<>();
tokenFilterConfigurers = new ArrayList<>();
}

@Override
public AnalysisConfigurationStages.DefineAnalyzer defineAnalyzer(String name) {
Preconditions.checkNotEmptyString(name, "Analyzer name is not specified");
AnalyzerConfigurer configurer = new AnalyzerConfigurer(name);
analyzerConfigurers.add(configurer);
return configurer;
}

@Override
public AnalysisConfigurationStages.DefineNormalizer defineNormalizer(String name) {
Preconditions.checkNotEmptyString(name, "Normalizer name is not specified");
NormalizerConfigurer configurer = new NormalizerConfigurer(name);
normalizerConfigurers.add(configurer);
return configurer;
}

@Override
public AnalysisConfigurationStages.DefineTokenizer defineTokenizer(String name) {
Preconditions.checkNotEmptyString(name, "Tokenizer name is not specified");
TokenizerConfigurer configurer = new TokenizerConfigurer(name);
tokenizerConfigurers.add(configurer);
return configurer;
}

@Override
public AnalysisConfigurationStages.DefineCharacterFilter defineCharacterFilter(String name) {
Preconditions.checkNotEmptyString(name, "Character Filter name is not specified");
CharacterFilterConfigurer configurer = new CharacterFilterConfigurer(name);
characterFilterConfigurers.add(configurer);
return configurer;
}

@Override
public AnalysisConfigurationStages.DefineTokenFilter defineTokenFilter(String name) {
Preconditions.checkNotEmptyString(name, "Token Filter name is not specified");
TokenFilterConfigurer configurer = new TokenFilterConfigurer(name);
tokenFilterConfigurers.add(configurer);
return configurer;
}

public List<AnalyzerConfigurer> getAnalyzerConfigurers() {
return analyzerConfigurers;
}

public List<NormalizerConfigurer> getNormalizerConfigurers() {
return normalizerConfigurers;
}

public List<TokenizerConfigurer> getTokenizerConfigurers() {
return tokenizerConfigurers;
}

public List<CharacterFilterConfigurer> getCharacterFilterConfigurers() {
return characterFilterConfigurers;
}

public List<TokenFilterConfigurer> getTokenFilterConfigurers() {
return tokenFilterConfigurers;
}
}

0 comments on commit 924e3b4

Please sign in to comment.