Skip to content

Commit

Permalink
HSEARCH-1492 Add support to exclude the entity compared from the results
Browse files Browse the repository at this point in the history
  • Loading branch information
emmanuelbernard authored and Sanne committed Mar 4, 2014
1 parent 1dbf45b commit f25e002
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 66 deletions.
Expand Up @@ -29,8 +29,11 @@
*/
public interface MoreLikeThisContext extends QueryCustomization<MoreLikeThisContext> {

//TODO add returnEntityComparedWith?
//TODO today we return the matching entity we could exclude it with a Boolean MUST NOT
/**
* Exclude the entity used for comparison from the results
*/
MoreLikeThisContext excludeEntityUsedForComparison();

/**
* Boost significant terms relative to their scores.
* Amplified by the boost factor (1 is the recommended default to start with).
Expand Down
Expand Up @@ -46,6 +46,12 @@ public ConnectedMoreLikeThisContext(QueryBuildingContext context) {
this.moreLikeThisContext = new MoreLikeThisQueryContext();
}

@Override
public MoreLikeThisContext excludeEntityUsedForComparison() {
moreLikeThisContext.setExcludeEntityUsedForComparison( true );
return this;
}

@Override
public MoreLikeThisContext favorSignificantTermsWithFactor(float factor) {
moreLikeThisContext.setBoostTerms( true );
Expand Down
Expand Up @@ -30,18 +30,13 @@

import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;

import org.hibernate.search.engine.metadata.impl.DocumentFieldMetadata;
import org.hibernate.search.engine.spi.DocumentBuilderIndexedEntity;
import org.hibernate.search.engine.spi.SearchFactoryImplementor;
import org.hibernate.search.exception.AssertionFailure;
import org.hibernate.search.query.dsl.MoreLikeThisTermination;
import org.hibernate.search.query.dsl.MoreLikeThisToEntityContentAndTermination;
import org.hibernate.search.query.engine.spi.EntityInfo;
import org.hibernate.search.query.engine.spi.HSQuery;
import org.hibernate.search.util.logging.impl.Log;
import org.hibernate.search.util.logging.impl.LoggerFactory;

Expand Down Expand Up @@ -97,13 +92,12 @@ public Query createQuery() {
// Use all compatible fields when comparingAllFields is used
fieldsContext.addAll( fieldNames );
}
Integer docId = getLuceneDocumentIdFromInputOrNull( documentBuilder );
query = new MoreLikeThisBuilder( documentBuilder, searchFactory )
.compatibleFieldNames( fieldNames )
.fieldsContext( fieldsContext )
.queryContext( queryContext )
.indexReader( indexReader )
.documentNumber( docId )
.inputType( inputType )
.input( input )
.otherMoreLikeThisContext( moreLikeThisContext )
.createQuery();
Expand Down Expand Up @@ -133,54 +127,6 @@ private String[] getAllCompatibleFieldNames(DocumentBuilderIndexedEntity<?> docu
return fieldNames.toArray( new String[fieldNames.size()] );
}

/**
* Try and retrieve the document id from the input. If failing and a backup approach exists, returns null.
*/
private Integer getLuceneDocumentIdFromInputOrNull(DocumentBuilderIndexedEntity<?> documentBuilder) {
//look for all fields of the entity
String id;
if ( inputType == INPUT_TYPE.ID ) {
id = documentBuilder.getIdBridge().objectToString( input );
}
else if ( inputType == INPUT_TYPE.ENTITY ) {
// Try and extract the id, if failing the id will be null
try {
// I expect a two way bridge to return null from a null input, correct?
id = documentBuilder.getIdBridge().objectToString( documentBuilder.getId( input ) );
}
catch (IllegalStateException e) {
id = null;
}
}
else {
throw new AssertionFailure( "We don't support no string and reader for MoreLikeThis" );
}

if ( id == null ) {
return null;
}
TermQuery findById = new TermQuery( new Term( documentBuilder.getIdKeywordName(), id ) );
HSQuery query = queryContext.getFactory().createHSQuery();
//can't use Arrays.asList for some obscure capture reason
List<Class<?>> classes = new ArrayList<Class<?>>(1);
classes.add( queryContext.getEntityType() );
List<EntityInfo> entityInfos = query
.luceneQuery( findById )
.maxResults( 1 )
.projection( HSQuery.DOCUMENT_ID )
.targetedEntities( classes )
.queryEntityInfos();
if ( entityInfos.size() == 0 ) {
if ( inputType == INPUT_TYPE.ID ) {
throw log.entityWithIdNotFound( queryContext.getEntityType(), id );
}
else {
return null;
}
}
return (Integer) entityInfos.iterator().next().getProjection()[0];
}

public static final class MoreLikeThisTerminationImpl extends ConnectedMoreLikeThisQueryBuilder implements MoreLikeThisTermination {

public MoreLikeThisTerminationImpl(Object id, INPUT_TYPE inputType, FieldsContext fieldsContext, MoreLikeThisQueryContext moreLikeThisContext, QueryCustomizer queryCustomizer, QueryBuildingContext queryContext) {
Expand Down
Expand Up @@ -47,6 +47,7 @@
import org.apache.lucene.queries.mlt.MoreLikeThis;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.similarities.TFIDFSimilarity;
Expand All @@ -62,10 +63,15 @@
import org.hibernate.search.engine.spi.DocumentBuilderIndexedEntity;
import org.hibernate.search.engine.spi.SearchFactoryImplementor;
import org.hibernate.search.exception.AssertionFailure;
import org.hibernate.search.query.engine.spi.EntityInfo;
import org.hibernate.search.query.engine.spi.HSQuery;
import org.hibernate.search.util.impl.PassThroughAnalyzer;
import org.hibernate.search.util.logging.impl.Log;
import org.hibernate.search.util.logging.impl.LoggerFactory;

import static org.hibernate.search.query.dsl.impl.ConnectedMoreLikeThisQueryBuilder.INPUT_TYPE.ID;
import static org.hibernate.search.query.dsl.impl.ConnectedMoreLikeThisQueryBuilder.INPUT_TYPE.ENTITY;

/**
* Class inspired and code copied from Apache Lucene MoreLikeThis class.
* Apache Lucene code copyright the Apache Software Foundation released under the
Expand Down Expand Up @@ -97,6 +103,9 @@ public class MoreLikeThisBuilder<T> {
private FieldsContext fieldsContext;
private Object input;
private QueryBuildingContext queryContext;
private boolean excludeEntityCompared;
private ConnectedMoreLikeThisQueryBuilder.INPUT_TYPE inputType;
private TermQuery findById;

public MoreLikeThisBuilder( DocumentBuilderIndexedEntity<T> documentBuilder, SearchFactoryImplementor searchFactory ) {
log.requireTFIDFSimilarity( documentBuilder.getBeanClass() );
Expand All @@ -114,14 +123,10 @@ public MoreLikeThisBuilder compatibleFieldNames(String... compatibleFieldNames)
return this;
}

public MoreLikeThisBuilder documentNumber(Integer docNum) {
this.documentNumber = docNum;
return this;
}

public MoreLikeThisBuilder otherMoreLikeThisContext(MoreLikeThisQueryContext moreLikeThisContext) {
this.boost = moreLikeThisContext.isBoostTerms();
this.boostFactor = moreLikeThisContext.getTermBoostFactor();
this.excludeEntityCompared = moreLikeThisContext.isExcludeEntityUsedForComparison();
return this;
}

Expand All @@ -130,13 +135,83 @@ public MoreLikeThisBuilder otherMoreLikeThisContext(MoreLikeThisQueryContext mor
*/
public Query createQuery() {
try {
return createQuery( retrieveTerms() );
documentNumber = getLuceneDocumentIdFromIdAsTermOrNull( documentBuilder );
return maybeExcludeComparedEntity( createQuery( retrieveTerms() ) );
}
catch (IOException e) {
throw log.ioExceptionOnIndexOfEntity( e, documentBuilder.getBeanClass() );
}
}

/**
* Try and retrieve the document id from the input. If failing and a backup approach exists, returns null.
*/
private Integer getLuceneDocumentIdFromIdAsTermOrNull(DocumentBuilderIndexedEntity<?> documentBuilder) {
String id;
if ( inputType == ID ) {
id = documentBuilder.getIdBridge().objectToString( input );
}
else if ( inputType == ENTITY ) {
// Try and extract the id, if failing the id will be null
try {
// I expect a two way bridge to return null from a null input, correct?
id = documentBuilder.getIdBridge().objectToString( documentBuilder.getId( input ) );
}
catch (IllegalStateException e) {
id = null;
}
}
else {
throw new AssertionFailure( "We don't support no string and reader for MoreLikeThis" );
}
if ( id == null ) {
return null;
}
findById = new TermQuery( new Term( documentBuilder.getIdKeywordName(), id ) );
HSQuery query = queryContext.getFactory().createHSQuery();
//can't use Arrays.asList for some obscure capture reason
List<Class<?>> classes = new ArrayList<Class<?>>(1);
classes.add( queryContext.getEntityType() );
List<EntityInfo> entityInfos = query
.luceneQuery( findById )
.maxResults( 1 )
.projection( HSQuery.DOCUMENT_ID )
.targetedEntities( classes )
.queryEntityInfos();
if ( entityInfos.size() == 0 ) {
if ( inputType == ID ) {
throw log.entityWithIdNotFound( queryContext.getEntityType(), id );
}
else {
return null;
}
}
return (Integer) entityInfos.iterator().next().getProjection()[0];
}

private Query maybeExcludeComparedEntity(Query query) {
// It would be better to attach a collector to exclude a document by its id
// but at this stage we could have documents reordered and thus with a different id
// Maybe a Filter would be more efficient?
if ( excludeEntityCompared && documentNumber != null ) {
BooleanQuery booleanQuery;
if ( ! ( query instanceof BooleanQuery ) ) {
booleanQuery = new BooleanQuery();
booleanQuery.add( query, BooleanClause.Occur.MUST );
}
else {
booleanQuery = (BooleanQuery) query;
}
booleanQuery.add(
new ConstantScoreQuery( findById ),
BooleanClause.Occur.MUST_NOT );
return booleanQuery;
}
else {
return query;
}
}

/**
* Create the More Like This query from a PriorityQueue
*/
Expand Down Expand Up @@ -460,6 +535,15 @@ public MoreLikeThisBuilder queryContext(QueryBuildingContext queryContext) {
return this;
}

public MoreLikeThisBuilder idAsTerm(String idAsTerm) {
return this;
}

public MoreLikeThisBuilder inputType(ConnectedMoreLikeThisQueryBuilder.INPUT_TYPE inputType) {
this.inputType = inputType;
return this;
}

/**
* PriorityQueue that orders words by score.
*/
Expand Down
Expand Up @@ -30,6 +30,7 @@
public class MoreLikeThisQueryContext {
private boolean boostTerms = false;
private float termBoostFactor = 1f;
private boolean excludeEntityUsedForComparison;

public boolean isBoostTerms() {
return boostTerms;
Expand All @@ -46,4 +47,12 @@ public float getTermBoostFactor() {
public void setTermBoostFactor(float termBoostFactor) {
this.termBoostFactor = termBoostFactor;
}

public void setExcludeEntityUsedForComparison(boolean excludeEntityUsedForComparison) {
this.excludeEntityUsedForComparison = excludeEntityUsedForComparison;
}

public boolean isExcludeEntityUsedForComparison() {
return excludeEntityUsedForComparison;
}
}
37 changes: 34 additions & 3 deletions orm/src/test/java/org/hibernate/search/test/query/dsl/DSLTest.java
Expand Up @@ -892,7 +892,7 @@ public boolean matches(Collection<?> value) {
assertThat( ( (Coffee) real[0] ).getId() ).isEqualTo( ( (Coffee) expected[0] ).getId() );
}

outputQueryAndResults( outputLogs, decaffInstance, mltQuery, results );
outputQueryAndResults( outputLogs, decaffInstance, mltQuery, entityResults );

// pass entity itself with a matching id but different values
// the id should take precedene
Expand Down Expand Up @@ -925,7 +925,7 @@ public boolean matches(Collection<?> value) {
assertThat( ( (Coffee) real[0] ).getId() ).isEqualTo( ( (Coffee) expected[0] ).getId() );
}

outputQueryAndResults( outputLogs, decaffInstance, mltQuery, results );
outputQueryAndResults( outputLogs, decaffInstance, mltQuery, entityResults );

// pass entity itself with the right values but no id
copyOfDecaffInstance = new Coffee();
Expand Down Expand Up @@ -955,7 +955,38 @@ public boolean matches(Collection<?> value) {
assertThat( ( (Coffee) real[0] ).getId() ).isEqualTo( ( (Coffee) expected[0] ).getId() );
}

outputQueryAndResults( outputLogs, decaffInstance, mltQuery, results );
outputQueryAndResults( outputLogs, decaffInstance, mltQuery, entityResults );

// exclude comparing entity
mltQuery = qb
.moreLikeThis()
.comparingField( "summary" ).boostedTo( 10f )
.andField( "description" )
.toEntityWithId( decaffInstance.getId() )
.createQuery();
results = (List<Object[]>) fullTextSession
.createFullTextQuery( mltQuery, Coffee.class )
.setProjection( ProjectionConstants.THIS, ProjectionConstants.SCORE )
.list();
mltQuery = qb
.moreLikeThis()
.excludeEntityUsedForComparison()
.comparingField( "summary" ).boostedTo( 10f )
.andField( "description" )
.toEntityWithId( decaffInstance.getId() )
.createQuery();
List<Object[]> resultsWoComparingEntity = (List<Object[]>) fullTextSession
.createFullTextQuery( mltQuery, Coffee.class )
.setProjection( ProjectionConstants.THIS, ProjectionConstants.SCORE )
.list();
assertThat( resultsWoComparingEntity ).hasSize( results.size() - 1 );
for ( int index = 0 ; index < resultsWoComparingEntity.size() ; index++ ) {
Object[] real = resultsWoComparingEntity.get( index );
Object[] expected = results.get( index + 1 );
assertThat( real[1] ).isEqualTo( expected[1] );
assertThat( ( (Coffee) real[0] ).getId() ).isEqualTo( ( (Coffee) expected[0] ).getId() );
}
outputQueryAndResults( outputLogs, decaffInstance, mltQuery, resultsWoComparingEntity );
}
finally {
transaction.commit();
Expand Down

0 comments on commit f25e002

Please sign in to comment.