Skip to content

Commit

Permalink
HSEARCH-2219 Translate Lucene analyzer definitions to their Elasticse…
Browse files Browse the repository at this point in the history
…arch equivalent automatically

There are a few gotchas, most notably:

 * parameters expecting a list of file paths only accept *one* file
   in Elasticsearch
 * the files targeted by file path parameters must be in the config
   directory of Elasticsearch, instead of being in the classpath
 * TypeTokenFilterFactory is not supported, because the "keep_types"
   parameter is a file path in Lucene, but an array of elements in
   Elasticsearch
 * HunspellStemFilterFactory is not supported, because the "dictionary"
   parameter is a list of files in Lucene, but does not exist in
   Elasticsearch (a "locale" parameter is expected instead)
 * A few parameters that are supported in Lucene but not in
   Elasticsearch will trigger an exception when used
 * For now, extra parameters that are not even supported by Lucene will
   not trigger any exception. This may change in the future.
  • Loading branch information
yrodiere authored and Sanne committed Dec 19, 2016
1 parent 9a7e8e5 commit f8e35cb
Show file tree
Hide file tree
Showing 36 changed files with 2,073 additions and 90 deletions.
@@ -0,0 +1,53 @@
/*
* Hibernate Search, full-text search for your domain model
*
* License: GNU Lesser General Public License (LGPL), version 2.1 or later
* See the lgpl.txt file in the root directory or <http://www.gnu.org/licenses/lgpl-2.1.html>.
*/
package org.hibernate.search.elasticsearch.analyzer.impl;

import org.hibernate.search.annotations.AnalyzerDef;

/**
* A description of an Elasticsearch analyzer built through an analyzer definition.
* <p>
* This implementation is used whenever {@code @Analyzer(definition = "foo")} is encountered
* and an {@code @AnalyzerDefinition} exists with the given name ("foo" in this example).
*
* @author Guillaume Smet
* @author Yoann Rodiere
*/
public class CustomElasticsearchAnalyzerImpl implements ElasticsearchAnalyzer {

private final AnalyzerDef definition;

public CustomElasticsearchAnalyzerImpl(AnalyzerDef definition) {
this.definition = definition;
}

@Override
public String getName(String fieldName) {
return definition.name();
}

@Override
public AnalyzerDef getDefinition(String fieldName) {
return definition;
}

@Override
public void close() {
// nothing to close
}

@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append( getClass().getSimpleName() );
sb.append( "<" );
sb.append( definition );
sb.append( ">" );
return sb.toString();
}

}
Expand Up @@ -7,6 +7,7 @@
package org.hibernate.search.elasticsearch.analyzer.impl;

import org.hibernate.search.analyzer.impl.RemoteAnalyzer;
import org.hibernate.search.annotations.AnalyzerDef;

/**
* A description of an Elasticsearch analyzer.
Expand All @@ -15,4 +16,6 @@
*/
public interface ElasticsearchAnalyzer extends RemoteAnalyzer {

AnalyzerDef getDefinition(String fieldName);

}

This file was deleted.

Expand Up @@ -21,12 +21,12 @@ public class ElasticsearchAnalyzerStrategy implements AnalyzerStrategy<Elasticse

@Override
public ElasticsearchAnalyzerReference createDefaultAnalyzerReference() {
return new ElasticsearchAnalyzerReference( new ElasticsearchAnalyzerImpl( "default" ) );
return new ElasticsearchAnalyzerReference( new UndefinedElasticsearchAnalyzerImpl( "default" ) );
}

@Override
public ElasticsearchAnalyzerReference createPassThroughAnalyzerReference() {
return new ElasticsearchAnalyzerReference( new ElasticsearchAnalyzerImpl( "keyword" ) );
return new ElasticsearchAnalyzerReference( new UndefinedElasticsearchAnalyzerImpl( "keyword" ) );
}

@Override
Expand Down Expand Up @@ -57,18 +57,19 @@ private void initializeReference(Map<String, ElasticsearchAnalyzer> initializedA
ElasticsearchAnalyzer analyzer = initializedAnalyzers.get( name );

if ( analyzer == null ) {
// TODO HSEARCH-2219 Actually use the definition
analyzer = buildAnalyzer( name );
AnalyzerDef analyzerDefinition = analyzerDefinitions.get( name );
if ( analyzerDefinition == null ) {
analyzer = new UndefinedElasticsearchAnalyzerImpl( name );
}
else {
analyzer = new CustomElasticsearchAnalyzerImpl( analyzerDefinition );
}
initializedAnalyzers.put( name, analyzer );
}

analyzerReference.initialize( analyzer );
}

private ElasticsearchAnalyzer buildAnalyzer(String name) {
return new ElasticsearchAnalyzerImpl( name );
}

@Override
public ScopedElasticsearchAnalyzer.Builder buildScopedAnalyzer(ElasticsearchAnalyzerReference initialGlobalAnalyzerReference) {
return new ScopedElasticsearchAnalyzer.Builder(
Expand Down
Expand Up @@ -11,6 +11,7 @@

import org.hibernate.search.analyzer.spi.AnalyzerReference;
import org.hibernate.search.analyzer.spi.ScopedAnalyzer;
import org.hibernate.search.annotations.AnalyzerDef;
import org.hibernate.search.util.logging.impl.Log;
import org.hibernate.search.util.logging.impl.LoggerFactory;

Expand Down Expand Up @@ -38,13 +39,22 @@ public ScopedElasticsearchAnalyzer(Builder builder) {
this.scopedAnalyzers.putAll( builder.scopedAnalyzers );
}

@Override
public String getName(String fieldName) {
private ElasticsearchAnalyzerReference getDelegate(String fieldName) {
ElasticsearchAnalyzerReference analyzerReference = scopedAnalyzers.get( fieldName );
if ( analyzerReference == null ) {
analyzerReference = globalAnalyzerReference;
}
return analyzerReference.getAnalyzer().getName( fieldName );
return analyzerReference;
}

@Override
public String getName(String fieldName) {
return getDelegate( fieldName ).getAnalyzer().getName( fieldName );
}

@Override
public AnalyzerDef getDefinition(String fieldName) {
return getDelegate( fieldName ).getAnalyzer().getDefinition( fieldName );
}

@Override
Expand Down
@@ -0,0 +1,46 @@
/*
* Hibernate Search, full-text search for your domain model
*
* License: GNU Lesser General Public License (LGPL), version 2.1 or later
* See the lgpl.txt file in the root directory or <http://www.gnu.org/licenses/lgpl-2.1.html>.
*/
package org.hibernate.search.elasticsearch.analyzer.impl;

import org.hibernate.search.annotations.AnalyzerDef;

/**
* An Elasticsearch analyzer for which no definition was found in the Hibernate Search mapping.
* <p>
* Such an analyzer is expected to be defined separately on Elasticsearch.
* <p>
* This implementation is used whenever {@code @Analyzer(definition = "foo")} is encountered
* and <strong>no</strong> {@code @AnalyzerDefinition} exists with the given name
* ("foo" in this example).
*
* @author Yoann Rodiere
*/
public class UndefinedElasticsearchAnalyzerImpl implements ElasticsearchAnalyzer {

private final String remoteName;

public UndefinedElasticsearchAnalyzerImpl(String remoteName) {
this.remoteName = remoteName;
}

@Override
public void close() {
// nothing to do
}

@Override
public String getName(String fieldName) {
return remoteName;
}

@Override
public AnalyzerDef getDefinition(String fieldName) {
// No definition
return null;
}

}
Expand Up @@ -330,14 +330,12 @@ private void reinitializeIndex(Set<Class<?>> entityTypesToInitialize) {
}

private IndexMetadata createIndexMetadata(Collection<Class<?>> classes) {
IndexMetadata index = new IndexMetadata();
index.setName( actualIndexName );
List<EntityIndexBinding> descriptors = new ArrayList<>();
for ( Class<?> entityType : classes ) {
String entityName = entityType.getName();
EntityIndexBinding descriptor = searchIntegrator.getIndexBinding( entityType );
index.putMapping( entityName, schemaTranslator.translate( descriptor, schemaManagementExecutionOptions ) );
descriptors.add( descriptor );
}
return index;
return schemaTranslator.translate( actualIndexName, descriptors, schemaManagementExecutionOptions );
}

@Override
Expand Down
Expand Up @@ -306,4 +306,47 @@ BulkRequestFailedException elasticsearchBulkRequestFailed(String request, String
)
SearchException indexedEmbeddedPrefixBypass(Class<?> entityType, String fieldPath, String expectedParent);

@Message(id = ES_BACKEND_MESSAGES_START_ID + 55,
value = "The same tokenizer name '%1$s' is used multiple times. The tokenizer names must be unique."
+ " If this name was automatically generated, you may override it by using @TokenizerDef.name." )
SearchException tokenizerNamingConflict(String remoteName);

@Message(id = ES_BACKEND_MESSAGES_START_ID + 56,
value = "The same char filter name '%1$s' is used multiple times. The char filter names must be unique."
+ " If this name was automatically generated, you may override it by using @CharFilterDef.name." )
SearchException charFilterNamingConflict(String remoteName);

@Message(id = ES_BACKEND_MESSAGES_START_ID + 57,
value = "The same token filter name '%1$s' is used multiple times. The token filter names must be unique."
+ " If this name was automatically generated, you may override it by using @TokenFilterDef.name." )
SearchException tokenFilterNamingConflict(String remoteName);

@Message(id = ES_BACKEND_MESSAGES_START_ID + 58,
value = "The char filter factory '%1$s' is not supported with Elasticsearch."
+ " Please only use builtin Lucene factories that have a builtin equivalent in Elasticsearch." )
SearchException unsupportedCharFilterFactory(@FormatWith(ClassFormatter.class) Class<?> factoryType);

@Message(id = ES_BACKEND_MESSAGES_START_ID + 59,
value = "The tokenizer factory '%1$s' is not supported with Elasticsearch."
+ " Please only use builtin Lucene factories that have a builtin equivalent in Elasticsearch." )
SearchException unsupportedTokenizerFactory(@FormatWith(ClassFormatter.class) Class<?> factoryType);

@Message(id = ES_BACKEND_MESSAGES_START_ID + 60,
value = "The token filter factory '%1$s' is not supported with Elasticsearch."
+ " Please only use builtin Lucene factories that have a builtin equivalent in Elasticsearch." )
SearchException unsupportedTokenFilterFactory(@FormatWith(ClassFormatter.class) Class<?> factoryType);

@Message(id = ES_BACKEND_MESSAGES_START_ID + 61,
value = "The parameter '%2$s' is not supported for the factory '%1$s' with Elasticsearch." )
SearchException unsupportedAnalysisFactoryParameter(@FormatWith(ClassFormatter.class) Class<?> factoryType, String parameter);

@Message(id = ES_BACKEND_MESSAGES_START_ID + 62,
value = "The parameter '%2$s' for the factory '%1$s' refers to the class '%3$s',"
+ " which cannot be converted to a builtin Elasticsearch tokenizer type." )
SearchException unsupportedAnalysisFactoryTokenizerClassNameParameter(@FormatWith(ClassFormatter.class) Class<?> factoryClass, String parameterName, String tokenizerClass);

@Message(id = ES_BACKEND_MESSAGES_START_ID + 63,
value = "The parameter '%2$s' for the factory '%1$s' has an unsupported value: '%3$s' is unsupported with Elasticsearch." )
SearchException unsupportedAnalysisDefinitionParameterValue(@FormatWith(ClassFormatter.class) Class<?> factoryClass, String parameterName, String parameterValue);

}

0 comments on commit f8e35cb

Please sign in to comment.