Skip to content

Commit

Permalink
HSEARCH-3777 Move configuration of the Similarity to LuceneAnalysisCo…
Browse files Browse the repository at this point in the history
…nfigurer

Because that makes sense, but also because that's probably where we will
have to allow definition of named similarities when we add support for
per-field Similarity, like Elasticsearch offers:

https://www.elastic.co/guide/en/elasticsearch/reference/5.6/similarity.html
  • Loading branch information
yrodiere committed Apr 20, 2020
1 parent 150f4ca commit 577c2c3
Show file tree
Hide file tree
Showing 8 changed files with 58 additions and 47 deletions.
Expand Up @@ -9,6 +9,8 @@
import org.hibernate.search.backend.lucene.analysis.model.dsl.LuceneAnalyzerTypeStep;
import org.hibernate.search.backend.lucene.analysis.model.dsl.LuceneNormalizerTypeStep;

import org.apache.lucene.search.similarities.Similarity;

/**
* A context allowing the definition of named analyzers and normalizers in a Lucene backend.
*/
Expand All @@ -30,4 +32,13 @@ public interface LuceneAnalysisConfigurationContext {
*/
LuceneNormalizerTypeStep normalizer(String name);

/**
* Set the {@link Similarity}.
* <p>
* Defaults to {@link org.apache.lucene.search.similarities.BM25Similarity}.
*
* @param similarity The {@link Similarity} to use when indexing and when searching.
*/
void similarity(Similarity similarity);

}
Expand Up @@ -9,6 +9,7 @@
import java.lang.invoke.MethodHandles;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Optional;

import org.hibernate.search.backend.lucene.analysis.impl.LuceneAnalysisComponentFactory;
import org.hibernate.search.backend.lucene.analysis.LuceneAnalysisConfigurationContext;
Expand All @@ -22,7 +23,7 @@
import org.hibernate.search.util.common.logging.impl.LoggerFactory;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.search.similarities.Similarity;


public class LuceneAnalysisConfigurationContextImpl
Expand All @@ -32,6 +33,8 @@ public class LuceneAnalysisConfigurationContextImpl

private final LuceneAnalysisComponentFactory factory;

private Similarity similarity;

private final Map<String, LuceneAnalyzerBuilder> analyzers = new LinkedHashMap<>();

private final Map<String, LuceneAnalyzerBuilder> normalizers = new LinkedHashMap<>();
Expand Down Expand Up @@ -78,6 +81,11 @@ public LuceneAnalysisConfigurationContext instance(Analyzer instance) {
};
}

@Override
public void similarity(Similarity similarity) {
this.similarity = similarity;
}

@Override
public void contribute(LuceneAnalysisDefinitionCollector collector) {
for ( Map.Entry<String, LuceneAnalyzerBuilder> entry : analyzers.entrySet() ) {
Expand All @@ -88,6 +96,11 @@ public void contribute(LuceneAnalysisDefinitionCollector collector) {
}
}

@Override
public Optional<Similarity> getSimilarity() {
return Optional.ofNullable( similarity );
}

private void addAnalyzer(String name, LuceneAnalyzerBuilder definition) {
LuceneAnalysisComponentBuilder<Analyzer> existing = analyzers.putIfAbsent( name, definition );
if ( existing != null ) {
Expand Down
Expand Up @@ -7,8 +7,14 @@
package org.hibernate.search.backend.lucene.analysis.model.impl;


import java.util.Optional;

import org.apache.lucene.search.similarities.Similarity;

public interface LuceneAnalysisDefinitionContributor {

void contribute(LuceneAnalysisDefinitionCollector collector);

Optional<Similarity> getSimilarity();

}
Expand Up @@ -15,6 +15,8 @@
import org.hibernate.search.util.common.logging.impl.LoggerFactory;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.Similarity;

/**
* A registry of analysis-related definitions for Lucene.
Expand All @@ -24,17 +26,21 @@ public final class LuceneAnalysisDefinitionRegistry {

private static final Log log = LoggerFactory.make( Log.class, MethodHandles.lookup() );

private final Similarity similarity;

private final Map<String, Analyzer> analyzerDefinitions;

private final Map<String, Analyzer> normalizerDefinitions;

public LuceneAnalysisDefinitionRegistry() {
// Nothing to do: we're creating an empty registry
similarity = createDefaultSimilarity();
analyzerDefinitions = Collections.emptyMap();
normalizerDefinitions = Collections.emptyMap();
}

public LuceneAnalysisDefinitionRegistry(LuceneAnalysisDefinitionContributor contributor) {
similarity = contributor.getSimilarity().orElseGet( LuceneAnalysisDefinitionRegistry::createDefaultSimilarity );
analyzerDefinitions = new TreeMap<>();
normalizerDefinitions = new TreeMap<>();
contributor.contribute( new LuceneAnalysisDefinitionCollector() {
Expand All @@ -56,6 +62,10 @@ public void collectNormalizer(String name, Analyzer normalizer) {
} );
}

public Similarity getSimilarity() {
return similarity;
}

/**
* @param name An analyzer name
* @return The analyzer definition associated with the given name,
Expand All @@ -74,4 +84,7 @@ public Analyzer getNormalizerDefinition(String name) {
return normalizerDefinitions.get( name );
}

private static Similarity createDefaultSimilarity() {
return new BM25Similarity();
}
}
Expand Up @@ -10,10 +10,7 @@
import org.hibernate.search.backend.lucene.lowlevel.directory.FileSystemAccessStrategyName;
import org.hibernate.search.backend.lucene.lowlevel.directory.LockingStrategyName;
import org.hibernate.search.backend.lucene.multitenancy.MultiTenancyStrategyName;
import org.hibernate.search.engine.environment.bean.BeanReference;

import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Version;

/**
Expand Down Expand Up @@ -96,19 +93,6 @@ private LuceneBackendSettings() {
public static final String DIRECTORY_FILESYSTEM_ACCESS_STRATEGY =
DIRECTORY_PREFIX + DirectoryRadicals.FILESYSTEM_ACCESS_STRATEGY;

/**
* The {@link org.apache.lucene.search.similarities.Similarity} to be used when indexing and querying.
* <p>
* Expects the fully qualified name of a concrete implementation of {@link org.apache.lucene.search.similarities.Similarity},
* or a reference to a bean of that type.
* <p>
* Defaults to {@link BM25Similarity}.
*
* @see org.hibernate.search.engine.cfg The core documentation of configuration properties,
* which includes a description of the "bean reference" properties and accepted values.
*/
public static final String SIMILARITY = "similarity";

/**
* The multi-tenancy strategy to use.
* <p>
Expand Down Expand Up @@ -171,8 +155,6 @@ private Defaults() {

public static final String DIRECTORY_ROOT = ".";

public static final BeanReference<? extends Similarity> SIMILARITY = BeanReference.of( BM25Similarity.class );

public static final FileSystemAccessStrategyName DIRECTORY_FILESYSTEM_ACCESS_STRATEGY =
FileSystemAccessStrategyName.AUTO;

Expand Down
Expand Up @@ -41,7 +41,6 @@
import org.hibernate.search.util.common.AssertionFailure;
import org.hibernate.search.util.common.logging.impl.LoggerFactory;

import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Version;


Expand All @@ -55,12 +54,6 @@ public class LuceneBackendFactory implements BackendFactory {
.as( Version.class, LuceneBackendFactory::parseLuceneVersion )
.build();

private static final ConfigurationProperty<BeanReference<? extends Similarity>> SIMILARITY =
ConfigurationProperty.forKey( LuceneBackendSettings.SIMILARITY )
.asBeanReference( Similarity.class )
.withDefault( LuceneBackendSettings.Defaults.SIMILARITY )
.build();

private static final ConfigurationProperty<MultiTenancyStrategyName> MULTI_TENANCY_STRATEGY =
ConfigurationProperty.forKey( LuceneBackendSettings.MULTI_TENANCY_STRATEGY )
.as( MultiTenancyStrategyName.class, MultiTenancyStrategyName::of )
Expand All @@ -79,7 +72,6 @@ public BackendImplementor create(String name, BackendBuildContext buildContext,

BackendThreads backendThreads = null;
BeanHolder<? extends DirectoryProvider> directoryProviderHolder = null;
BeanHolder<? extends Similarity> similarityHolder = null;

try {
backendThreads = new BackendThreads( "Backend " + name );
Expand All @@ -88,8 +80,6 @@ public BackendImplementor create(String name, BackendBuildContext buildContext,

directoryProviderHolder = getDirectoryProvider( backendContext, buildContext, propertySource );

similarityHolder = getSimilarity( buildContext, propertySource );

MultiTenancyStrategy multiTenancyStrategy = getMultiTenancyStrategy( propertySource );

LuceneAnalysisDefinitionRegistry analysisDefinitionRegistry = getAnalysisDefinitionRegistry(
Expand All @@ -99,7 +89,7 @@ public BackendImplementor create(String name, BackendBuildContext buildContext,
return new LuceneBackendImpl(
name,
backendThreads,
directoryProviderHolder, similarityHolder,
directoryProviderHolder,
new LuceneWorkFactoryImpl( multiTenancyStrategy ),
analysisDefinitionRegistry,
multiTenancyStrategy,
Expand All @@ -109,7 +99,6 @@ public BackendImplementor create(String name, BackendBuildContext buildContext,
}
catch (RuntimeException e) {
new SuppressingCloser( e )
.push( BeanHolder::close, similarityHolder )
.push( holder -> holder.get().close(), directoryProviderHolder )
.push( BeanHolder::close, directoryProviderHolder )
.push( BackendThreads::onStop, backendThreads );
Expand Down Expand Up @@ -148,12 +137,6 @@ private BeanHolder<? extends DirectoryProvider> getDirectoryProvider(EventContex
return initializationContext.createDirectoryProvider();
}

private BeanHolder<? extends Similarity> getSimilarity(BackendBuildContext buildContext,
ConfigurationPropertySource propertySource) {
BeanResolver beanResolver = buildContext.getBeanResolver();
return SIMILARITY.getAndTransform( propertySource, beanResolver::resolve );
}

private MultiTenancyStrategy getMultiTenancyStrategy(ConfigurationPropertySource propertySource) {
MultiTenancyStrategyName multiTenancyStrategyName = MULTI_TENANCY_STRATEGY.get( propertySource );

Expand Down
Expand Up @@ -45,7 +45,6 @@ public class LuceneBackendImpl implements BackendImplementor, LuceneBackend {

private final BackendThreads threads;
private final BeanHolder<? extends DirectoryProvider> directoryProviderHolder;
private final BeanHolder<? extends Similarity> similarityHolder;

private final LuceneAnalysisDefinitionRegistry analysisDefinitionRegistry;

Expand All @@ -59,7 +58,6 @@ public class LuceneBackendImpl implements BackendImplementor, LuceneBackend {
LuceneBackendImpl(String name,
BackendThreads threads,
BeanHolder<? extends DirectoryProvider> directoryProviderHolder,
BeanHolder<? extends Similarity> similarityHolder,
LuceneWorkFactory workFactory,
LuceneAnalysisDefinitionRegistry analysisDefinitionRegistry,
MultiTenancyStrategy multiTenancyStrategy,
Expand All @@ -68,19 +66,19 @@ public class LuceneBackendImpl implements BackendImplementor, LuceneBackend {
this.name = name;
this.threads = threads;
this.directoryProviderHolder = directoryProviderHolder;
this.similarityHolder = similarityHolder;

this.analysisDefinitionRegistry = analysisDefinitionRegistry;
Similarity similarity = analysisDefinitionRegistry.getSimilarity();

this.readOrchestrator = new LuceneSyncWorkOrchestratorImpl(
"Lucene read work orchestrator for backend " + name, similarityHolder.get()
"Lucene read work orchestrator for backend " + name, similarity
);
this.multiTenancyStrategy = multiTenancyStrategy;
this.timingSource = timingSource;

this.eventContext = EventContexts.fromBackendName( name );
this.indexManagerBackendContext = new IndexManagerBackendContext(
eventContext, threads, directoryProviderHolder.get(), similarityHolder.get(),
eventContext, threads, directoryProviderHolder.get(), similarity,
workFactory, multiTenancyStrategy,
timingSource, analysisDefinitionRegistry,
failureHandler,
Expand Down Expand Up @@ -113,7 +111,6 @@ public CompletableFuture<?> preStop() {
public void stop() {
try ( Closer<RuntimeException> closer = new Closer<>() ) {
closer.push( LuceneSyncWorkOrchestratorImpl::stop, readOrchestrator );
closer.push( BeanHolder::close, similarityHolder );
closer.push( holder -> holder.get().close(), directoryProviderHolder );
closer.push( BeanHolder::close, directoryProviderHolder );
closer.push( TimingSource::stop, timingSource );
Expand Down
Expand Up @@ -4,11 +4,12 @@
* License: GNU Lesser General Public License (LGPL), version 2.1 or later
* See the lgpl.txt file in the root directory or <http://www.gnu.org/licenses/lgpl-2.1.html>.
*/
package org.hibernate.search.integrationtest.backend.lucene.lowlevel.similarity;
package org.hibernate.search.integrationtest.backend.lucene.analysis;

import static org.assertj.core.api.Assertions.assertThat;
import static org.hibernate.search.util.impl.integrationtest.mapper.stub.StubMapperUtils.referenceProvider;

import org.hibernate.search.backend.lucene.analysis.LuceneAnalysisConfigurer;
import org.hibernate.search.backend.lucene.cfg.LuceneBackendSettings;
import org.hibernate.search.backend.lucene.index.impl.LuceneIndexManagerImpl;
import org.hibernate.search.backend.lucene.index.impl.Shard;
Expand All @@ -27,6 +28,7 @@
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.Similarity;

public class LuceneSimilarityIT {

Expand Down Expand Up @@ -64,7 +66,7 @@ public void defaults() {
@TestForIssue(jiraKey = "HSEARCH-3777")
@PortedFromSearch5(original = "org.hibernate.search.test.similarity.SimilarityTest")
public void custom() {
setup( ClassicSimilarity.class.getName() );
setup( new ClassicSimilarity() );

LuceneIndexManagerImpl luceneIndexManager = index.unwrapForTests( LuceneIndexManagerImpl.class );
assertThat( luceneIndexManager.getShardsForTests() )
Expand All @@ -86,10 +88,14 @@ public void custom() {
.hasTotalHitCount( 1L );
}

private SearchIntegration setup(String similarity) {
private SearchIntegration setup(Similarity similarity) {
return setupHelper.start()
.withIndex( index )
.withBackendProperty( LuceneBackendSettings.SIMILARITY, similarity )
.withBackendProperty(
LuceneBackendSettings.ANALYSIS_CONFIGURER,
similarity == null ? null
: (LuceneAnalysisConfigurer) context -> context.similarity( similarity )
)
.setup();
}
}

0 comments on commit 577c2c3

Please sign in to comment.