Skip to content

Commit

Permalink
HSEARCH-3197 Allow to set the minimum number of should clauses requir…
Browse files Browse the repository at this point in the history
…ed to match for boolean predicates in the DSL
  • Loading branch information
yrodiere committed Jun 19, 2018
1 parent e31b8a0 commit 1d83686
Show file tree
Hide file tree
Showing 5 changed files with 203 additions and 0 deletions.
Expand Up @@ -344,6 +344,11 @@ else if ( filters.size() == 1 ) {

clauses.append( boostAppender( booleanQuery ) );

int minimumShouldMatchNumber = booleanQuery.getMinimumNumberShouldMatch();
if ( minimumShouldMatchNumber != 0 ) {
clauses.addProperty( "minimum_should_match", minimumShouldMatchNumber );
}

JsonObject bool = new JsonObject();
bool.add( "bool", clauses.build() );
return bool;
Expand Down
Expand Up @@ -11,6 +11,61 @@

/**
* Represents a boolean query that can contains one or more elements to join
* <p>
* <h3 id="minimumshouldmatch">"minimumShouldMatch" constraints</h3>
* <p>
* "minimumShouldMatch" constraints define a minimum number of "should" clauses that have to match
* in order for the boolean junction to match.
* <p>
* The feature is similar, and will work identically, to
* <a href="https://lucene.apache.org/solr/7_3_0/solr-core/org/apache/solr/util/doc-files/min-should-match.html">"Min Number Should Match"</a>
* in Solr or
* <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-minimum-should-match.html">{@code minimum_should_match}</a>
* in Elasticsearch.
* <h4 id="minimumshouldmatch-minimum">Definition of the minimum</h4>
* <p>
* The minimum may be defined either directly as a positive number, or indirectly as a negative number
* or positive or negative percentage representing a ratio of the total number of "should" clauses in this boolean junction.
* <p>
* Here is how each type of input is interpreted:
* <dl>
* <dt>Positive number</dt>
* <dd>
* The value is interpreted directly as the minimum number of "should" clauses that have to match.
* </dd>
* <dt>Negative number</dt>
* <dd>
* The absolute value is interpreted as the maximum number of "should" clauses that may not match:
* the absolute value is subtracted from the total number of "should" clauses.
* </dd>
* <dt>Positive percentage</dt>
* <dd>
* The value is interpreted as the minimum percentage of the total number of "should" clauses that have to match:
* the percentage is applied to the total number of "should" clauses, then rounded down.
* </dd>
* <dt>Negative percentage</dt>
* <dd>
* The absolute value is interpreted as the maximum percentage of the total number of "should" clauses that may not match:
* the absolute value of the percentage is applied to the total number of "should" clauses, then rounded down,
* then subtracted from the total number of "should" clauses.
* </dd>
* </dl>
* <p>
* In any case, if the computed minimum is 0 or less, or higher than the total number of "should" clauses,
* behavior is backend-specific (it may throw an exception, or produce unpredictable results,
* or fall back to some default behavior).
* <p>
* Examples:
* <pre><code>
* // Example 1: at least 3 "should" clauses have to match
* booleanContext1.minimumShouldMatchNumber( 3 );
* // Example 2: at most 2 "should" clauses may not match
* booleanContext2.minimumShouldMatchNumber( -2 );
* // Example 3: at least 75% of "should" clauses have to match (rounded down)
* booleanContext3.minimumShouldMatchPercent( 75 );
* // Example 4: at most 25% of "should" clauses may not match (rounded down)
* booleanContext4.minimumShouldMatchPercent( -25 );
* </code></pre>
*
* @author Emmanuel Bernard
*/
Expand All @@ -35,4 +90,26 @@ public interface BooleanJunction<T extends BooleanJunction> extends QueryCustomi
* @return true if no restrictions have been applied
*/
boolean isEmpty();

/**
* Sets the <a href="#minimumshouldmatch">"minimumShouldMatch" constraint</a>.
*
* @param matchingClausesNumber A definition of the number of "should" clauses that have to match.
* If positive, it is the number of clauses that have to match.
* See <a href="BooleanJunction.html#minimumshouldmatch-minimum">Definition of the minimum</a>
* for details and possible values, in particular negative values.
* @return {@code this}, for method chaining.
*/
BooleanJunction minimumShouldMatchNumber(int matchingClausesNumber);

/**
* Sets the <a href="#minimumshouldmatch">"minimumShouldMatch" constraint</a>.
*
* @param matchingClausesPercent A definition of the number of "should" clauses that have to match, as a percentage.
* If positive, it is the percentage of the total number of "should" clauses that have to match.
* See <a href="BooleanJunction.html#minimumshouldmatch-minimum">Definition of the minimum</a>
* for details and possible values, in particular negative values.
* @return {@code this}, for method chaining.
*/
BooleanJunction minimumShouldMatchPercent(int matchingClausesPercent);
}
Expand Up @@ -33,6 +33,9 @@ class BooleanQueryBuilder implements MustJunction {
private final List<BooleanClause> clauses;
private BooleanClause lastClause;
private final QueryCustomizer queryCustomizer;
private MinimumShouldMatchContextImpl minimumShouldMatchContext;

private int shouldClauseCount = 0;

BooleanQueryBuilder() {
clauses = new ArrayList<BooleanClause>( 5 );
Expand Down Expand Up @@ -70,6 +73,7 @@ public BooleanJunction should(Query query) {
else {
lastClause = new BooleanClause( query, BooleanClause.Occur.SHOULD );
clauses.add( lastClause );
++shouldClauseCount;
}
return this;
}
Expand Down Expand Up @@ -124,6 +128,11 @@ public Query createQuery() {
//in this case we need to add a positive clause to match everything else.
builder.add( new MatchAllDocsQuery(), Occur.FILTER );
}

if ( minimumShouldMatchContext != null ) {
minimumShouldMatchContext.applyMinimum( builder, shouldClauseCount );
}

return queryCustomizer.setWrappedQuery( builder.build() ).createQuery();
}

Expand All @@ -132,4 +141,23 @@ public boolean isEmpty() {
return clauses.isEmpty();
}

@Override
public BooleanJunction minimumShouldMatchNumber(int matchingClausesNumber) {
getMinimumShouldMatchContext().requireNumber( matchingClausesNumber );
return this;
}

@Override
public BooleanJunction minimumShouldMatchPercent(int matchingClausesPercent) {
getMinimumShouldMatchContext().requirePercent( matchingClausesPercent );
return this;
}

private MinimumShouldMatchContextImpl getMinimumShouldMatchContext() {
if ( minimumShouldMatchContext == null ) {
minimumShouldMatchContext = new MinimumShouldMatchContextImpl();
}
return minimumShouldMatchContext;
}

}
@@ -0,0 +1,83 @@
/*
* Hibernate Search, full-text search for your domain model
*
* License: GNU Lesser General Public License (LGPL), version 2.1 or later
* See the lgpl.txt file in the root directory or <http://www.gnu.org/licenses/lgpl-2.1.html>.
*/
package org.hibernate.search.query.dsl.impl;

import java.lang.invoke.MethodHandles;

import org.hibernate.search.util.logging.impl.Log;
import org.hibernate.search.util.logging.impl.LoggerFactory;

import org.apache.lucene.search.BooleanQuery;

final class MinimumShouldMatchContextImpl {

private static final Log log = LoggerFactory.make( MethodHandles.lookup() );

private MinimumShouldMatchConstraint minimumShouldMatchConstraint;

public void requireNumber(int matchingClausesNumber) {
addMinimumShouldMatchConstraint(
new MinimumShouldMatchConstraint( matchingClausesNumber, null )
);
}

public void requirePercent(int matchingClausesPercent) {
addMinimumShouldMatchConstraint(
new MinimumShouldMatchConstraint( null, matchingClausesPercent )
);
}

void applyMinimum(BooleanQuery.Builder builder, int shouldClauseCount) {
if ( minimumShouldMatchConstraint != null ) {
int minimumShouldMatch = minimumShouldMatchConstraint.toMinimum( shouldClauseCount );
builder.setMinimumNumberShouldMatch( minimumShouldMatch );
}
}

private void addMinimumShouldMatchConstraint(MinimumShouldMatchConstraint constraint) {
if ( this.minimumShouldMatchConstraint != null ) {
throw log.minimumShouldMatchConflictingConstraints();
}
this.minimumShouldMatchConstraint = constraint;
}

private static final class MinimumShouldMatchConstraint {
private final Integer matchingClausesNumber;
private final Integer matchingClausesPercent;

MinimumShouldMatchConstraint(Integer matchingClausesNumber, Integer matchingClausesPercent) {
this.matchingClausesNumber = matchingClausesNumber;
this.matchingClausesPercent = matchingClausesPercent;
}

int toMinimum(int totalShouldClauseNumber) {
int minimum;
if ( matchingClausesNumber != null ) {
if ( matchingClausesNumber >= 0 ) {
minimum = matchingClausesNumber;
}
else {
minimum = totalShouldClauseNumber + matchingClausesNumber;
}
}
else {
if ( matchingClausesPercent >= 0 ) {
minimum = matchingClausesPercent * totalShouldClauseNumber / 100;
}
else {
minimum = totalShouldClauseNumber + matchingClausesPercent * totalShouldClauseNumber / 100;
}
}

if ( minimum < 1 || minimum > totalShouldClauseNumber ) {
throw log.minimumShouldMatchMinimumOutOfBounds( minimum, totalShouldClauseNumber );
}

return minimum;
}
}
}
Expand Up @@ -1011,4 +1011,14 @@ public interface Log extends BaseHibernateSearchLogger {

@Message(id = 349, value = "Some of the specified entity types ('%s') are not indexed, nor is any of their subclasses." )
IllegalArgumentException someTargetedEntityTypesNotIndexed(String targetedEntities);

@Message(id = 350, value = "'%1$s' must be positive or zero.")
IllegalArgumentException mustBePositiveOrZero(String objectDescription);

@Message(id = 351, value = "Computed minimum for minimumShouldMatch constraint is out of bounds:"
+ " expected a number between 1 and '%1$s', got '%2$s'.")
SearchException minimumShouldMatchMinimumOutOfBounds(int minimum, int totalShouldClauseNumber);

@Message(id = 352, value = "Multiple conflicting minimumShouldMatch constraints")
SearchException minimumShouldMatchConflictingConstraints();
}

0 comments on commit 1d83686

Please sign in to comment.