From 991b9e7244fd848ab1806d42fd511e5e27d4ea29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yoann=20Rodi=C3=A8re?= Date: Tue, 2 May 2017 18:28:34 +0200 Subject: [PATCH] HSEARCH-2208 Use Query objects instead of Filters in spatial search --- .../impl/ElasticsearchHSQueryImpl.java | 2 +- .../elasticsearch/impl/ToElasticsearch.java | 59 ++-- .../search/query/dsl/QueryCustomization.java | 1 - .../spatial/impl/ConstantScoreScorer.java | 84 ++++++ .../spatial/impl/ConstantScoreWeight.java | 87 ++++++ .../search/spatial/impl/DistanceFilter.java | 216 -------------- .../search/spatial/impl/DistanceQuery.java | 272 ++++++++++++++++++ ...lHashFilter.java => SpatialHashQuery.java} | 49 ++-- .../SpatialQueryBuilderFromCoordinates.java | 115 +++----- .../test/spatial/BenchWithGeonames.java | 2 +- 10 files changed, 546 insertions(+), 341 deletions(-) create mode 100644 engine/src/main/java/org/hibernate/search/spatial/impl/ConstantScoreScorer.java create mode 100644 engine/src/main/java/org/hibernate/search/spatial/impl/ConstantScoreWeight.java delete mode 100644 engine/src/main/java/org/hibernate/search/spatial/impl/DistanceFilter.java create mode 100644 engine/src/main/java/org/hibernate/search/spatial/impl/DistanceQuery.java rename engine/src/main/java/org/hibernate/search/spatial/impl/{SpatialHashFilter.java => SpatialHashQuery.java} (67%) diff --git a/elasticsearch/src/main/java/org/hibernate/search/elasticsearch/impl/ElasticsearchHSQueryImpl.java b/elasticsearch/src/main/java/org/hibernate/search/elasticsearch/impl/ElasticsearchHSQueryImpl.java index e676435e8fd..77a0df20dce 100644 --- a/elasticsearch/src/main/java/org/hibernate/search/elasticsearch/impl/ElasticsearchHSQueryImpl.java +++ b/elasticsearch/src/main/java/org/hibernate/search/elasticsearch/impl/ElasticsearchHSQueryImpl.java @@ -420,7 +420,7 @@ private JsonObject getFilteredQuery(JsonElement originalQuery, JsonArray typeFil // user filter if ( userFilter != null ) { - filters.add( ToElasticsearch.fromLuceneFilter( userFilter ) ); + filters.add( ToElasticsearch.fromLuceneQuery( userFilter ) ); } if ( !filterDefinitions.isEmpty() ) { diff --git a/elasticsearch/src/main/java/org/hibernate/search/elasticsearch/impl/ToElasticsearch.java b/elasticsearch/src/main/java/org/hibernate/search/elasticsearch/impl/ToElasticsearch.java index 28b66a9e6ee..73bb09c54a4 100644 --- a/elasticsearch/src/main/java/org/hibernate/search/elasticsearch/impl/ToElasticsearch.java +++ b/elasticsearch/src/main/java/org/hibernate/search/elasticsearch/impl/ToElasticsearch.java @@ -17,7 +17,6 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.ConstantScoreQuery; -import org.apache.lucene.search.Filter; import org.apache.lucene.search.FilteredQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MatchAllDocsQuery; @@ -51,8 +50,8 @@ import org.hibernate.search.query.facet.FacetingRequest; import org.hibernate.search.spatial.Coordinates; import org.hibernate.search.spatial.DistanceSortField; -import org.hibernate.search.spatial.impl.DistanceFilter; -import org.hibernate.search.spatial.impl.SpatialHashFilter; +import org.hibernate.search.spatial.impl.DistanceQuery; +import org.hibernate.search.spatial.impl.SpatialHashQuery; import org.hibernate.search.util.StringHelper; import org.hibernate.search.util.logging.impl.LoggerFactory; @@ -232,8 +231,15 @@ else if ( query instanceof ConstantScoreQuery ) { else if ( query instanceof FilteredQuery ) { return convertFilteredQuery( (FilteredQuery) query ); } - else if ( query instanceof Filter ) { - return fromLuceneFilter( (Filter) query ); + else if ( query instanceof QueryWrapperFilter ) { + JsonObject result = fromLuceneQuery( ( (QueryWrapperFilter) query ).getQuery() ); + return wrapBoostIfNecessary( result, query.getBoost() ); + } + else if ( query instanceof DistanceQuery ) { + return convertDistanceQuery( (DistanceQuery) query ); + } + else if ( query instanceof SpatialHashQuery ) { + return convertSpatialHashFilter( (SpatialHashQuery) query ); } else if ( query instanceof PhraseQuery ) { return convertPhraseQuery( (PhraseQuery) query ); @@ -250,6 +256,10 @@ else if ( query instanceof org.apache.lucene.search.CachingWrapperQuery ) { JsonObject result = fromLuceneQuery( ( (org.apache.lucene.search.CachingWrapperQuery) query ).getQuery() ); return wrapBoostIfNecessary( result, query.getBoost() ); } + else if ( query instanceof org.apache.lucene.search.CachingWrapperFilter ) { + JsonObject result = fromLuceneQuery( ( (org.apache.lucene.search.CachingWrapperFilter) query ).getFilter() ); + return wrapBoostIfNecessary( result, query.getBoost() ); + } throw LOG.cannotTransformLuceneQueryIntoEsQuery( query ); } @@ -587,36 +597,36 @@ private static JsonObject convertFilteredQuery(FilteredQuery query) { return filteredQuery; } - private static JsonObject convertDistanceFilter(DistanceFilter filter) { + private static JsonObject convertDistanceQuery(DistanceQuery query) { JsonObject distanceQuery = JsonBuilder.object() .add( "geo_distance", JsonBuilder.object() - .addProperty( "distance", filter.getRadius() + "km" ) - .add( filter.getCoordinatesField(), + .addProperty( "distance", query.getRadius() + "km" ) + .add( query.getCoordinatesField(), JsonBuilder.object() - .addProperty( "lat", filter.getCenter().getLatitude() ) - .addProperty( "lon", filter.getCenter().getLongitude() ) + .addProperty( "lat", query.getCenter().getLatitude() ) + .addProperty( "lon", query.getCenter().getLongitude() ) ) ).build(); - distanceQuery = wrapQueryForNestedIfRequired( filter.getCoordinatesField(), distanceQuery ); + distanceQuery = wrapQueryForNestedIfRequired( query.getCoordinatesField(), distanceQuery ); - // we only implement the previous filter optimization when we use the hash method as Elasticsearch + // we only implement the approximation optimization when we use the hash method as Elasticsearch // automatically optimize the geo_distance query with a bounding box filter so we don't need to do it // ourselves when we use the range method. - Filter previousFilter = filter.getPreviousFilter(); - if ( previousFilter instanceof SpatialHashFilter ) { + Query approximationQuery = query.getApproximationQuery(); + if ( approximationQuery instanceof SpatialHashQuery ) { distanceQuery = JsonBuilder.object() .add( "bool", JsonBuilder.object() .add( "must", distanceQuery ) - .add( "filter", convertSpatialHashFilter( (SpatialHashFilter) previousFilter ) ) + .add( "filter", convertSpatialHashFilter( (SpatialHashQuery) approximationQuery ) ) ).build(); } return distanceQuery; } - private static JsonObject convertSpatialHashFilter(SpatialHashFilter filter) { + private static JsonObject convertSpatialHashFilter(SpatialHashQuery filter) { JsonArray cellsIdsJsonArray = new JsonArray(); for ( String cellId : filter.getSpatialHashCellsIds() ) { cellsIdsJsonArray.add( cellId ); @@ -709,23 +719,6 @@ private static boolean isNested(String field) { return false; } - public static JsonObject fromLuceneFilter(Filter luceneFilter) { - if ( luceneFilter instanceof QueryWrapperFilter ) { - Query query = ( (QueryWrapperFilter) luceneFilter ).getQuery(); - return fromLuceneQuery( query ); - } - else if ( luceneFilter instanceof DistanceFilter ) { - return convertDistanceFilter( (DistanceFilter) luceneFilter ); - } - else if ( luceneFilter instanceof SpatialHashFilter ) { - return convertSpatialHashFilter( (SpatialHashFilter) luceneFilter ); - } - else if ( luceneFilter instanceof org.apache.lucene.search.CachingWrapperFilter ) { - return fromLuceneFilter( ( (org.apache.lucene.search.CachingWrapperFilter) luceneFilter ).getFilter() ); - } - throw LOG.cannotTransformLuceneQueryIntoEsQuery( luceneFilter ); - } - /** * Convert a Lucene {@link Sort} to an Elasticsearch sort, trying to preserve * the exact same meaning as the Sort would have in Lucene. diff --git a/engine/src/main/java/org/hibernate/search/query/dsl/QueryCustomization.java b/engine/src/main/java/org/hibernate/search/query/dsl/QueryCustomization.java index ee3b24cb93a..38df8fdde10 100644 --- a/engine/src/main/java/org/hibernate/search/query/dsl/QueryCustomization.java +++ b/engine/src/main/java/org/hibernate/search/query/dsl/QueryCustomization.java @@ -7,7 +7,6 @@ package org.hibernate.search.query.dsl; -import org.apache.lucene.search.Filter; import org.apache.lucene.search.Query; /** diff --git a/engine/src/main/java/org/hibernate/search/spatial/impl/ConstantScoreScorer.java b/engine/src/main/java/org/hibernate/search/spatial/impl/ConstantScoreScorer.java new file mode 100644 index 00000000000..a52c4af9e44 --- /dev/null +++ b/engine/src/main/java/org/hibernate/search/spatial/impl/ConstantScoreScorer.java @@ -0,0 +1,84 @@ +/* + * Hibernate Search, full-text search for your domain model + * + * License: GNU Lesser General Public License (LGPL), version 2.1 or later + * See the lgpl.txt file in the root directory or . + */ +package org.hibernate.search.spatial.impl; + +import java.io.IOException; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.Weight; + + +/** + * A copy of Lucene's ConstantScoreWeight implementation, + * necessary because the one in Lucene is marked as "internal". + * + * @author Yoann Rodiere + */ +final class ConstantScoreScorer extends Scorer { + + private final float score; + private final TwoPhaseIterator twoPhaseIterator; + private final DocIdSetIterator disi; + + /** + * Constructor based on a {@link DocIdSetIterator} which will be used to drive iteration. Two phase iteration will + * not be supported. + * + * @param weight the parent weight + * @param score the score to return on each document + * @param disi the iterator that defines matching documents + */ + public ConstantScoreScorer(Weight weight, float score, DocIdSetIterator disi) { + super( weight ); + this.score = score; + this.twoPhaseIterator = null; + this.disi = disi; + } + + /** + * Constructor based on a {@link TwoPhaseIterator}. In that case the {@link Scorer} will support two-phase + * iteration. + * + * @param weight the parent weight + * @param score the score to return on each document + * @param twoPhaseIterator the iterator that defines matching documents + */ + public ConstantScoreScorer(Weight weight, float score, TwoPhaseIterator twoPhaseIterator) { + super( weight ); + this.score = score; + this.twoPhaseIterator = twoPhaseIterator; + this.disi = TwoPhaseIterator.asDocIdSetIterator( twoPhaseIterator ); + } + + @Override + public DocIdSetIterator iterator() { + return disi; + } + + @Override + public TwoPhaseIterator twoPhaseIterator() { + return twoPhaseIterator; + } + + @Override + public int docID() { + return disi.docID(); + } + + @Override + public float score() throws IOException { + return score; + } + + @Override + public int freq() throws IOException { + return 1; + } + +} diff --git a/engine/src/main/java/org/hibernate/search/spatial/impl/ConstantScoreWeight.java b/engine/src/main/java/org/hibernate/search/spatial/impl/ConstantScoreWeight.java new file mode 100644 index 00000000000..effd2f27be0 --- /dev/null +++ b/engine/src/main/java/org/hibernate/search/spatial/impl/ConstantScoreWeight.java @@ -0,0 +1,87 @@ +/* + * Hibernate Search, full-text search for your domain model + * + * License: GNU Lesser General Public License (LGPL), version 2.1 or later + * See the lgpl.txt file in the root directory or . + */ +package org.hibernate.search.spatial.impl; + +import java.io.IOException; +import java.util.Set; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.Weight; + + +/** + * A copy of Lucene's ConstantScoreWeight implementation, + * necessary because the one in Lucene is marked as "internal". + * + * @author Yoann Rodiere + */ +abstract class ConstantScoreWeight extends Weight { + + private float boost; + private float queryNorm; + private float queryWeight; + + protected ConstantScoreWeight(Query query) { + super( query ); + normalize( 1f, 1f ); + } + + @Override + public void extractTerms(Set terms) { + // most constant-score queries don't wrap index terms + // eg. geo filters, doc values queries, ... + // override if your constant-score query does wrap terms + } + + @Override + public final float getValueForNormalization() throws IOException { + return queryWeight * queryWeight; + } + + @Override + public void normalize(float norm, float boost) { + this.boost = boost; + queryNorm = norm; + queryWeight = queryNorm * boost; + } + + protected final float score() { + return queryWeight; + } + + @Override + public Explanation explain(LeafReaderContext context, int doc) throws IOException { + final Scorer s = scorer( context ); + final boolean exists; + if ( s == null ) { + exists = false; + } + else { + final TwoPhaseIterator twoPhase = s.twoPhaseIterator(); + if ( twoPhase == null ) { + exists = s.iterator().advance( doc ) == doc; + } + else { + exists = twoPhase.approximation().advance( doc ) == doc && twoPhase.matches(); + } + } + + if ( exists ) { + return Explanation.match( + queryWeight, getQuery().toString() + ", product of:", + Explanation.match( boost, "boost" ), Explanation.match( queryNorm, "queryNorm" ) ); + } + else { + return Explanation.noMatch( getQuery().toString() + " doesn't match id " + doc ); + } + } +} diff --git a/engine/src/main/java/org/hibernate/search/spatial/impl/DistanceFilter.java b/engine/src/main/java/org/hibernate/search/spatial/impl/DistanceFilter.java deleted file mode 100644 index c2d0418ec57..00000000000 --- a/engine/src/main/java/org/hibernate/search/spatial/impl/DistanceFilter.java +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Hibernate Search, full-text search for your domain model - * - * License: GNU Lesser General Public License (LGPL), version 2.1 or later - * See the lgpl.txt file in the root directory or . - */ -package org.hibernate.search.spatial.impl; - -import static org.hibernate.search.spatial.impl.CoordinateHelper.coordinate; - -import java.io.IOException; -import java.util.Objects; - -import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.search.DocIdSet; -import org.apache.lucene.search.Filter; -import org.apache.lucene.search.FilteredDocIdSet; -import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.QueryWrapperFilter; -import org.apache.lucene.util.Bits; -import org.hibernate.search.spatial.Coordinates; - -/** - * Lucene Filter for filtering documents which have been indexed with Hibernate Search spatial Field bridge - * Use double lat,long field in the index from a Coordinates field declaration - * - * @author Nicolas Helleringer - * @see org.hibernate.search.spatial.SpatialFieldBridgeByHash - * @see org.hibernate.search.spatial.SpatialFieldBridgeByRange - * @see org.hibernate.search.spatial.Coordinates - */ -public final class DistanceFilter extends Filter { - - private Filter previousFilter; - private Point center; - private double radius; - private String coordinatesField; - private String latitudeField; - private String longitudeField; - - /** - * Construct a Distance Filter to match document distant at most of radius from center Point - * - * @param previousFilter previous Filter in the chain. As Distance is costly by retrieving the lat and long field - * it is better to use it last - * @param centerCoordinates center of the search perimeter - * @param radius radius of the search perimeter - * @param coordinatesField name of the field implementing Coordinates - * @see org.hibernate.search.spatial.Coordinates - */ - public DistanceFilter(Filter previousFilter, Coordinates centerCoordinates, double radius, String coordinatesField) { - if ( previousFilter != null ) { - this.previousFilter = previousFilter; - } - else { - this.previousFilter = new QueryWrapperFilter( new MatchAllDocsQuery() ); - } - this.center = Point.fromCoordinates( centerCoordinates ); - this.radius = radius; - this.coordinatesField = coordinatesField; - } - - /** - * Construct a Distance Filter to match document distant at most of radius from center Point - * - * @param previousFilter previous Filter in the chain. As Distance is costly by retrieving the lat and long field - * it is better to use it last - * @param centerCoordinates center of the search perimeter - * @param radius radius of the search perimeter - * @param latitudeField name of the field hosting latitude - * @param longitudeField name of the field hosting longitude - * @see org.hibernate.search.spatial.Coordinates - */ - public DistanceFilter(Filter previousFilter, Coordinates centerCoordinates, double radius, String latitudeField, String longitudeField) { - if ( previousFilter != null ) { - this.previousFilter = previousFilter; - } - else { - this.previousFilter = new QueryWrapperFilter( new MatchAllDocsQuery() ); - } - this.center = Point.fromCoordinates( centerCoordinates ); - this.radius = radius; - this.coordinatesField = null; - this.latitudeField = latitudeField; - this.longitudeField = longitudeField; - } - - /** - * Returns Doc Ids by retrieving their latitude,longitude and checking if within distance(radius) of the center of the search - * - * @param context the {@link LeafReaderContext} for which to return the {LeafReaderContext}. - * @param acceptDocs Bits that represent the allowable docs to match (typically deleted docs but possibly filtering - * other documents) - * - * @return a {@link DocIdSet} with the matching document ids - */ - @Override - public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException { - final LeafReader atomicReader = context.reader(); - - DocIdSet docs = previousFilter.getDocIdSet( context, acceptDocs ); - - if ( ( docs == null ) || ( docs.iterator() == null ) ) { - return null; - } - - final Bits docsWithLatitude = DocValues.getDocsWithField( atomicReader, getLatitudeField() ); - final Bits docsWithLongitude = DocValues.getDocsWithField( atomicReader, getLongitudeField() ); - final NumericDocValues latitudeValues = DocValues.getNumeric( atomicReader, getLatitudeField() ); - final NumericDocValues longitudeValues = DocValues.getNumeric( atomicReader, getLongitudeField() ); - - return new FilteredDocIdSet( docs ) { - @Override - protected boolean match(int documentIndex) { - if ( docsWithLatitude.get( documentIndex ) && docsWithLongitude.get( documentIndex ) ) { - double lat = coordinate( latitudeValues, documentIndex ); - double lon = coordinate( longitudeValues, documentIndex ); - if ( center.getDistanceTo( lat, lon ) <= radius ) { - return true; - } - } - - return false; - } - }; - } - - public String getCoordinatesField() { - if ( coordinatesField != null ) { - return coordinatesField; - } - else { - return SpatialHelper.stripSpatialFieldSuffix( latitudeField ); - } - } - - public double getRadius() { - return radius; - } - - public Point getCenter() { - return center; - } - - public Filter getPreviousFilter() { - return previousFilter; - } - - private String getLatitudeField() { - if ( latitudeField != null ) { - return latitudeField; - } - else { - return SpatialHelper.formatLatitude( coordinatesField ); - } - } - - private String getLongitudeField() { - if ( longitudeField != null ) { - return longitudeField; - } - else { - return SpatialHelper.formatLongitude( coordinatesField ); - } - } - - @Override - public int hashCode() { - int hashCode = 31 * super.hashCode() + previousFilter.hashCode(); - hashCode = 31 * hashCode + center.hashCode(); - hashCode = 31 * hashCode + Double.hashCode( radius ); - hashCode = 31 * hashCode + Objects.hashCode( coordinatesField ); - hashCode = 31 * hashCode + Objects.hashCode( latitudeField ); - hashCode = 31 * hashCode + Objects.hashCode( longitudeField ); - return hashCode; - } - - @Override - public boolean equals(Object obj) { - if ( obj == this ) { - return true; - } - if ( obj instanceof DistanceFilter ) { - DistanceFilter other = (DistanceFilter) obj; - return Float.floatToIntBits( getBoost() ) == Float.floatToIntBits( other.getBoost() ) - && previousFilter.equals( other.previousFilter ) - && center.equals( other.center ) - && radius == other.radius - && Objects.equals( coordinatesField, other.coordinatesField ) - && Objects.equals( latitudeField, other.latitudeField ) - && Objects.equals( longitudeField, other.longitudeField ); - } - return false; - } - - @Override - public String toString(String field) { - final StringBuilder sb = new StringBuilder(); - sb.append( "DistanceFilter" ); - sb.append( "{previousFilter=" ).append( previousFilter ); - sb.append( ", center=" ).append( center ); - sb.append( ", radius=" ).append( radius ); - if ( coordinatesField != null ) { - sb.append( ", coordinatesField='" ).append( coordinatesField ).append( '\'' ); - } - else { - sb.append( ", latitudeField=" ).append( latitudeField ); - sb.append( ", longitudeField=" ).append( longitudeField ).append( '\'' ); - } - sb.append( '}' ); - return sb.toString(); - } -} diff --git a/engine/src/main/java/org/hibernate/search/spatial/impl/DistanceQuery.java b/engine/src/main/java/org/hibernate/search/spatial/impl/DistanceQuery.java new file mode 100644 index 00000000000..dfc94a69417 --- /dev/null +++ b/engine/src/main/java/org/hibernate/search/spatial/impl/DistanceQuery.java @@ -0,0 +1,272 @@ +/* + * Hibernate Search, full-text search for your domain model + * + * License: GNU Lesser General Public License (LGPL), version 2.1 or later + * See the lgpl.txt file in the root directory or . + */ +package org.hibernate.search.spatial.impl; + +import static org.hibernate.search.spatial.impl.CoordinateHelper.coordinate; + +import java.io.IOException; +import java.util.Objects; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.Weight; +import org.apache.lucene.util.Bits; +import org.hibernate.search.spatial.Coordinates; +import org.hibernate.search.spatial.SpatialFieldBridgeByRange; + +/** + * Lucene distance Query for documents which have been indexed with {@link SpatialFieldBridgeByRange} + * Use double lat,long field in the index from a Coordinates field declaration + * + * @author Nicolas Helleringer + * @see org.hibernate.search.spatial.SpatialFieldBridgeByHash + * @see org.hibernate.search.spatial.SpatialFieldBridgeByRange + * @see org.hibernate.search.spatial.Coordinates + */ +public final class DistanceQuery extends Query { + + private Query approximationQuery; + private Point center; + private double radius; + private String coordinatesField; + private String latitudeField; + private String longitudeField; + + /** + * Construct a distance query to match document distant at most of radius from center Point + * + * @param approximationQuery an approximation for this distance query + * (i.e. a query that produces no false-negatives, but may produce false-positives), or {@code null}. + * If non-null, only documents returned by the approximation query will be considered, + * which will enhance performance. + * @param centerCoordinates center of the search perimeter + * @param radius radius of the search perimeter + * @param coordinatesField name of the field implementing Coordinates + * @see org.hibernate.search.spatial.Coordinates + */ + public DistanceQuery(Query approximationQuery, Coordinates centerCoordinates, double radius, String coordinatesField) { + if ( approximationQuery == null ) { + this.approximationQuery = new MatchAllDocsQuery(); + } + else { + this.approximationQuery = approximationQuery; + } + this.center = Point.fromCoordinates( centerCoordinates ); + this.radius = radius; + this.coordinatesField = coordinatesField; + } + + /** + * Construct a distance query to match document distant at most of radius from center Point + * + * @param approximationQuery an approximation for this distance query + * (i.e. a query that produces no false-negatives, but may produce false-positives), or {@code null}. + * If non-null, only documents returned by the approximation query will be considered, + * which will enhance performance. + * @param centerCoordinates center of the search perimeter + * @param radius radius of the search perimeter + * @param latitudeField name of the field hosting latitude + * @param longitudeField name of the field hosting longitude + * @see org.hibernate.search.spatial.Coordinates + */ + public DistanceQuery(Query approximationQuery, Coordinates centerCoordinates, double radius, String latitudeField, String longitudeField) { + if ( approximationQuery == null ) { + this.approximationQuery = new MatchAllDocsQuery(); + } + else { + this.approximationQuery = approximationQuery; + } + this.center = Point.fromCoordinates( centerCoordinates ); + this.radius = radius; + this.coordinatesField = null; + this.latitudeField = latitudeField; + this.longitudeField = longitudeField; + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + Query superRewritten = super.rewrite( reader ); + if ( superRewritten != this ) { + return superRewritten; + } + Query rewrittenApproximationQuery = approximationQuery.rewrite( reader ); + if ( rewrittenApproximationQuery != approximationQuery ) { + DistanceQuery clone = (DistanceQuery) clone(); + clone.approximationQuery = rewrittenApproximationQuery; + return clone; + } + return this; + } + + @Override + public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { + Weight approximationWeight = approximationQuery.createWeight( searcher, needsScores ); + return new ConstantScoreWeight( this ) { + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + Scorer approximationScorer = approximationWeight.scorer( context ); + if ( approximationScorer == null ) { + // No result + return null; + } + DocIdSetIterator approximation = approximationScorer.iterator(); + TwoPhaseIterator iterator = createDocIdSetIterator( approximation, context ); + return new ConstantScoreScorer( this, score(), iterator ); + } + }; + } + + /** + * Returns a {@link TwoPhaseIterator} that will first check the {@link #approximationQuery} (if any), + * and will only match documents whose coordinates are within distance(radius) of the center of the search. + * + * @param approximation an approximation of matching documents. + * @param context the {@link LeafReaderContext} for which to return the {LeafReaderContext}. + * + * @return a {@link TwoPhaseIterator} with the matching document ids + */ + private TwoPhaseIterator createDocIdSetIterator(DocIdSetIterator approximation, LeafReaderContext context) throws IOException { + return new TwoPhaseIterator( approximation ) { + + private Bits docsWithLatitude; + private Bits docsWithLongitude; + private NumericDocValues latitudeValues; + private NumericDocValues longitudeValues; + + private void lazyInit() throws IOException { + if ( docsWithLatitude != null ) { + return; + } + LeafReader atomicReader = context.reader(); + this.docsWithLatitude = DocValues.getDocsWithField( atomicReader, getLatitudeField() ); + this.docsWithLongitude = DocValues.getDocsWithField( atomicReader, getLongitudeField() ); + this.latitudeValues = DocValues.getNumeric( atomicReader, getLatitudeField() ); + this.longitudeValues = DocValues.getNumeric( atomicReader, getLongitudeField() ); + } + + @Override + public boolean matches() throws IOException { + lazyInit(); + int docID = approximation().docID(); + if ( docsWithLatitude.get( docID ) && docsWithLongitude.get( docID ) ) { + double lat = coordinate( latitudeValues, docID ); + double lon = coordinate( longitudeValues, docID ); + if ( center.getDistanceTo( lat, lon ) <= radius ) { + return true; + } + } + return false; + } + + @Override + public float matchCost() { + /* + * I honestly have no idea how many "simple operations" we're performing here. + * I suppose sines and cosines are very low-level, probably assembly instructions + * on most architectures. + * Some Lucene implementations seem to use 100 as a default, so let's do the same. + */ + return 100; + } + }; + } + + public String getCoordinatesField() { + if ( coordinatesField != null ) { + return coordinatesField; + } + else { + return SpatialHelper.stripSpatialFieldSuffix( latitudeField ); + } + } + + public double getRadius() { + return radius; + } + + public Point getCenter() { + return center; + } + + public Query getApproximationQuery() { + return approximationQuery; + } + + private String getLatitudeField() { + if ( latitudeField != null ) { + return latitudeField; + } + else { + return SpatialHelper.formatLatitude( coordinatesField ); + } + } + + private String getLongitudeField() { + if ( longitudeField != null ) { + return longitudeField; + } + else { + return SpatialHelper.formatLongitude( coordinatesField ); + } + } + + @Override + public int hashCode() { + int hashCode = 31 * super.hashCode() + approximationQuery.hashCode(); + hashCode = 31 * hashCode + center.hashCode(); + hashCode = 31 * hashCode + Double.hashCode( radius ); + hashCode = 31 * hashCode + Objects.hashCode( coordinatesField ); + hashCode = 31 * hashCode + Objects.hashCode( latitudeField ); + hashCode = 31 * hashCode + Objects.hashCode( longitudeField ); + return hashCode; + } + + @Override + public boolean equals(Object obj) { + if ( obj == this ) { + return true; + } + if ( obj instanceof DistanceQuery ) { + DistanceQuery other = (DistanceQuery) obj; + return Float.floatToIntBits( getBoost() ) == Float.floatToIntBits( other.getBoost() ) + && approximationQuery.equals( other.approximationQuery ) + && center.equals( other.center ) + && radius == other.radius + && Objects.equals( coordinatesField, other.coordinatesField ) + && Objects.equals( latitudeField, other.latitudeField ) + && Objects.equals( longitudeField, other.longitudeField ); + } + return false; + } + + @Override + public String toString(String field) { + final StringBuilder sb = new StringBuilder(); + sb.append( "DistanceQuery" ); + sb.append( "{approximationQuery=" ).append( approximationQuery ); + sb.append( ", center=" ).append( center ); + sb.append( ", radius=" ).append( radius ); + if ( coordinatesField != null ) { + sb.append( ", coordinatesField='" ).append( coordinatesField ).append( '\'' ); + } + else { + sb.append( ", latitudeField=" ).append( latitudeField ); + sb.append( ", longitudeField=" ).append( longitudeField ).append( '\'' ); + } + sb.append( '}' ); + return sb.toString(); + } +} diff --git a/engine/src/main/java/org/hibernate/search/spatial/impl/SpatialHashFilter.java b/engine/src/main/java/org/hibernate/search/spatial/impl/SpatialHashQuery.java similarity index 67% rename from engine/src/main/java/org/hibernate/search/spatial/impl/SpatialHashFilter.java rename to engine/src/main/java/org/hibernate/search/spatial/impl/SpatialHashQuery.java index 5514dc5ce92..210cf5232fe 100644 --- a/engine/src/main/java/org/hibernate/search/spatial/impl/SpatialHashFilter.java +++ b/engine/src/main/java/org/hibernate/search/spatial/impl/SpatialHashQuery.java @@ -10,45 +10,56 @@ import java.util.Collections; import java.util.List; -import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.Filter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; import org.apache.lucene.util.BitDocIdSet; -import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; +import org.hibernate.search.spatial.SpatialFieldBridgeByHash; /** - * Lucene Filter for filtering documents which have been indexed with Hibernate Search Spatial SpatialFieldBridge + * Lucene distance Query for documents which have been indexed with Hibernate Search {@link SpatialFieldBridgeByHash} * Use denormalized spatial hash cell ids to return a sub set of documents near the center * * @author Nicolas Helleringer * @see org.hibernate.search.spatial.SpatialFieldBridgeByHash * @see org.hibernate.search.spatial.Coordinates */ -public final class SpatialHashFilter extends Filter { +public final class SpatialHashQuery extends Query { private final List spatialHashCellsIds; private final String fieldName; - public SpatialHashFilter(List spatialHashCellsIds, String fieldName) { + public SpatialHashQuery(List spatialHashCellsIds, String fieldName) { this.spatialHashCellsIds = spatialHashCellsIds; this.fieldName = fieldName; } + @Override + public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { + return new ConstantScoreWeight( this ) { + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + DocIdSetIterator iterator = createDocIdSetIterator( context ); + return new ConstantScoreScorer( this, score(), iterator ); + } + }; + } + /** * Search the index for document having the correct spatial hash cell id at given grid level. * * @param context the {@link LeafReaderContext} for which to return the {@link DocIdSet}. - * @param acceptDocs Bits that represent the allowable docs to match (typically deleted docs but possibly filtering - * other documents) - * @return a {@link DocIdSet} with the document ids matching + * @return a {@link DocIdSetIterator} with the matching document ids */ - @Override - public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException { + private DocIdSetIterator createDocIdSetIterator(LeafReaderContext context) throws IOException { if ( spatialHashCellsIds.size() == 0 ) { return null; } @@ -59,7 +70,7 @@ public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws I boolean found = false; for ( int i = 0; i < spatialHashCellsIds.size(); i++ ) { Term spatialHashCellTerm = new Term( fieldName, spatialHashCellsIds.get( i ) ); - DocsEnum spatialHashCellsDocs = atomicReader.termDocsEnum( spatialHashCellTerm ); + PostingsEnum spatialHashCellsDocs = atomicReader.postings( spatialHashCellTerm ); if ( spatialHashCellsDocs != null ) { while ( true ) { final int docId = spatialHashCellsDocs.nextDoc(); @@ -67,20 +78,18 @@ public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws I break; } else { - if ( acceptDocs == null || acceptDocs.get( docId ) ) { - matchedDocumentsIds.bits().set( docId ); - found = true; - } + matchedDocumentsIds.bits().set( docId ); + found = true; } } } } if ( found ) { - return matchedDocumentsIds; + return matchedDocumentsIds.iterator(); } else { - return null; + return DocIdSetIterator.empty(); } } @@ -104,8 +113,8 @@ public boolean equals(Object obj) { if ( obj == this ) { return true; } - if ( obj instanceof SpatialHashFilter ) { - SpatialHashFilter other = (SpatialHashFilter) obj; + if ( obj instanceof SpatialHashQuery ) { + SpatialHashQuery other = (SpatialHashQuery) obj; return spatialHashCellsIds.equals( other.spatialHashCellsIds ) && fieldName.equals( other.fieldName ); } diff --git a/engine/src/main/java/org/hibernate/search/spatial/impl/SpatialQueryBuilderFromCoordinates.java b/engine/src/main/java/org/hibernate/search/spatial/impl/SpatialQueryBuilderFromCoordinates.java index 139535b3b9b..94999ebfcc0 100644 --- a/engine/src/main/java/org/hibernate/search/spatial/impl/SpatialQueryBuilderFromCoordinates.java +++ b/engine/src/main/java/org/hibernate/search/spatial/impl/SpatialQueryBuilderFromCoordinates.java @@ -6,102 +6,82 @@ */ package org.hibernate.search.spatial.impl; +import java.util.List; + import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.Filter; -import org.apache.lucene.search.FilteredQuery; -import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.QueryWrapperFilter; - import org.hibernate.search.spatial.Coordinates; import org.hibernate.search.spatial.SpatialFieldBridgeByHash; -import java.util.List; - /** - * The SpatialQueryBuilder holds builder methods for Hash, Distance and Spatial (Hash+Distance) filters - * and queries + * The SpatialQueryBuilder holds builder methods for Hash, Distance and Spatial (Hash+Distance) queries * * @author Nicolas Helleringer */ public abstract class SpatialQueryBuilderFromCoordinates { /** - * Returns a Lucene filter which rely on Hibernate Search Spatial - * spatial hash indexation to filter document at radius + * Returns a Lucene query to match documents by distance to a center, + * relying only on spatial hashes. * * @param center center of the search discus * @param radius distance max to center in km * @param fieldName name of the Lucene Field implementing Coordinates - * @return Lucene filter to be used in a Query + * @return Lucene Query to be used in a search + * * @see org.apache.lucene.search.Query * @see org.hibernate.search.spatial.Coordinates - * @see org.apache.lucene.search.Filter */ - public static Filter buildSpatialHashFilter(Coordinates center, double radius, String fieldName) { + public static Query buildSpatialHashQuery(Coordinates center, double radius, String fieldName) { int bestSpatialHashLevel = SpatialHelper.findBestSpatialHashLevelForSearchRange( 2.0d * radius ); if ( bestSpatialHashLevel > SpatialFieldBridgeByHash.DEFAULT_BOTTOM_SPATIAL_HASH_LEVEL ) { bestSpatialHashLevel = SpatialFieldBridgeByHash.DEFAULT_BOTTOM_SPATIAL_HASH_LEVEL; } List spatialHashCellsIds = SpatialHelper.getSpatialHashCellsIds( center, radius, bestSpatialHashLevel ); - return new SpatialHashFilter( spatialHashCellsIds, SpatialHelper.formatFieldName( bestSpatialHashLevel, fieldName ) ); + return new SpatialHashQuery( spatialHashCellsIds, SpatialHelper.formatFieldName( bestSpatialHashLevel, fieldName ) ); } /** - * Returns a Lucene filter to fine filter document by distance + * Returns a Lucene query to match documents by distance to a center. * * @param center center of the search discus * @param radius distance max to center in km * @param coordinatesField name of the Lucene Field implementing Coordinates - * @return Lucene filter to be used in a Query - * @param previousFilter preceding filter in filter chain - * Warning if passed null DistanceFilter constructor use a - * filter wrapped match all query (time/resource consuming !) + * @param approximationQuery an approximation of the distance query + * (i.e. a query returning all the results returned by the distance query, + * but also some false positives). + * WARNING: when passing {@code null}, every single document will be scanned + * (time/resource consuming!) + * @return Lucene Query to be used in a search + * * @see org.apache.lucene.search.Query * @see org.hibernate.search.spatial.Coordinates - * @see DistanceFilter - * @see Filter */ - public static Filter buildDistanceFilter(Filter previousFilter, Coordinates center, double radius, String coordinatesField) { - return new DistanceFilter( previousFilter, center, radius, coordinatesField ); + public static Query buildDistanceQuery(Query approximationQuery, Coordinates center, double radius, String coordinatesField) { + return new DistanceQuery( approximationQuery, center, radius, coordinatesField ); } /** - * Returns a Lucene filter to fine filter document by distance + * Returns a Lucene query to match documents by distance to a center. * * @param center center of the search discus * @param radius distance max to center in km * @param latitudeField name of the Lucene Field hosting latitude * @param longitudeField name of the Lucene Field hosting longitude - * @return Lucene filter to be used in a Query - * @param previousFilter preceding filter in filter chain - * Warning if passed null DistanceFilter constructor use a - * filter wrapped match all query (time/ressource consuming !) - * @see org.apache.lucene.search.Query - * @see org.hibernate.search.spatial.Coordinates - * @see DistanceFilter - * @see Filter - */ - public static Filter buildDistanceFilter(Filter previousFilter, Coordinates center, double radius, String latitudeField, String longitudeField) { - return new DistanceFilter( previousFilter, center, radius, latitudeField, longitudeField ); - } - - /** - * Returns a Lucene Query which rely on Hibernate Search Spatial - * spatial hash indexation to filter document at radius by wrapping a - * SpatialHashFilter - * - * @param center center of the search discus - * @param radius distance max to center in km - * @param fieldName name of the Lucene Field implementing Coordinates + * @param approximationQuery an approximation of the distance query + * (i.e. a query returning all the results returned by the distance query, + * but also some false positives). + * WARNING: when passing {@code null}, every single document will be scanned + * (time/resource consuming!) * @return Lucene Query to be used in a search + * * @see org.apache.lucene.search.Query * @see org.hibernate.search.spatial.Coordinates */ - public static Query buildSpatialHashQuery(Coordinates center, double radius, String fieldName) { - return new FilteredQuery( new MatchAllDocsQuery(), buildSpatialHashFilter( center, radius, fieldName ) ); + public static Query buildDistanceQuery(Query approximationQuery, Coordinates center, double radius, String latitudeField, String longitudeField) { + return new DistanceQuery( approximationQuery, center, radius, latitudeField, longitudeField ); } /** @@ -112,34 +92,33 @@ public static Query buildSpatialHashQuery(Coordinates center, double radius, Str * @param radius distance max to center in km * @param fieldName name of the Lucene Field implementing Coordinates * @return Lucene Query to be used in a search + * * @see Query * @see org.hibernate.search.spatial.Coordinates */ public static Query buildDistanceQuery(Coordinates center, double radius, String fieldName) { - Filter allFilter = new QueryWrapperFilter( new MatchAllDocsQuery() ); - return new FilteredQuery( new MatchAllDocsQuery(), buildDistanceFilter( allFilter, center, radius, fieldName ) ); + return buildDistanceQuery( null, center, radius, fieldName ); } /** - * Returns a Lucene Query which relies on Hibernate Search Spatial - * spatial hash indexation to filter documents at radius and filter its results - * by a fine DistanceFilter + * Returns a Lucene query to match documents by distance to a center, + * relying first on spatial hash to approximate the result, and then on a more + * precise (but more costly) {@link DistanceQuery}. * * @param center center of the search discus * @param radius distance max to center in km * @param fieldName name of the Lucene Field implementing Coordinates * @return Lucene Query to be used in a search + * * @see Query * @see org.hibernate.search.spatial.Coordinates */ public static Query buildSpatialQueryByHash(Coordinates center, double radius, String fieldName) { - return new FilteredQuery( new MatchAllDocsQuery(), - buildDistanceFilter( - buildSpatialHashFilter( center, radius, fieldName ), - center, - radius, - fieldName - ) + return buildDistanceQuery( + buildSpatialHashQuery( center, radius, fieldName ), + center, + radius, + fieldName ); } @@ -151,6 +130,7 @@ public static Query buildSpatialQueryByHash(Coordinates center, double radius, S * @param radius distance max to center in km * @param fieldName name of the Lucene Field implementing Coordinates * @return Lucene Query to be used in a search + * * @see Query * @see org.hibernate.search.spatial.Coordinates */ @@ -185,15 +165,12 @@ public static Query buildSpatialQueryByRange(Coordinates centerCoordinates, doub .add( longQuery, BooleanClause.Occur.FILTER ) .build(); - return new FilteredQuery( - new MatchAllDocsQuery(), - buildDistanceFilter( - new QueryWrapperFilter( boxQuery ), - center, - radius, - latitudeFieldName, - longitudeFieldName - ) + return buildDistanceQuery( + boxQuery, + center, + radius, + latitudeFieldName, + longitudeFieldName ); } } diff --git a/orm/src/test/java/org/hibernate/search/test/spatial/BenchWithGeonames.java b/orm/src/test/java/org/hibernate/search/test/spatial/BenchWithGeonames.java index 7bcea952b23..52568540ebb 100644 --- a/orm/src/test/java/org/hibernate/search/test/spatial/BenchWithGeonames.java +++ b/orm/src/test/java/org/hibernate/search/test/spatial/BenchWithGeonames.java @@ -219,7 +219,7 @@ public static void Bench() { ) .createQuery(); org.apache.lucene.search.Query filteredQuery = new ConstantScoreQuery( - SpatialQueryBuilderFromCoordinates.buildDistanceFilter( + SpatialQueryBuilderFromCoordinates.buildDistanceQuery( new QueryWrapperFilter( query ), center, radius,