Skip to content
Permalink
Browse files

HSEARCH-1188 Some progress on faceting

  • Loading branch information...
gsmet committed Feb 4, 2016
1 parent 0b5f107 commit 657ecfe684e6b4e952d11274523a4c5683e0c7eb
Showing with 778 additions and 282 deletions.
  1. +3 −10 elasticsearch/pom.xml
  2. +16 −14 elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/ElasticSearchQueries.java
  3. +6 −6 elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/client/impl/JestClient.java
  4. +137 −32 ...earch/src/main/java/org/hibernate/search/backend/elasticsearch/impl/ElasticSearchHSQueryImpl.java
  5. +27 −5 .../src/main/java/org/hibernate/search/backend/elasticsearch/impl/ElasticSearchIndexWorkVisitor.java
  6. +3 −182 ...main/java/org/hibernate/search/backend/elasticsearch/impl/ElasticSearchLuceneQueryTranslator.java
  7. +162 −0 elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/json/JsonBuilder.java
  8. +349 −0 elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/json/ToElasticSearch.java
  9. +5 −0 elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/logging/impl/Log.java
  10. +15 −9 engine/src/main/java/org/hibernate/search/engine/spi/DocumentBuilderIndexedEntity.java
  11. +14 −0 engine/src/main/java/org/hibernate/search/query/dsl/impl/FacetRange.java
  12. +2 −0 engine/src/main/java/org/hibernate/search/query/engine/impl/AbstractHSQuery.java
  13. +7 −10 engine/src/main/java/org/hibernate/search/query/engine/impl/FacetManagerImpl.java
  14. +6 −0 engine/src/main/java/org/hibernate/search/query/engine/impl/LuceneHSQuery.java
  15. +5 −5 engine/src/main/java/org/hibernate/search/query/engine/impl/QueryHits.java
  16. +7 −0 engine/src/main/java/org/hibernate/search/query/facet/FacetSelection.java
  17. +5 −0 engine/src/test/java/org/hibernate/search/testsupport/backend/LeakingBackendQueueProcessor.java
  18. +5 −5 orm/src/test/java/org/hibernate/search/test/embedded/depth/RecursiveGraphIncludePathsTest.java
  19. +4 −4 orm/src/test/java/org/hibernate/search/test/shards/DynamicShardingTest.java
@@ -239,21 +239,14 @@
<exclude>**/ToStringTest.java</exclude>
<exclude>**/DynamicBoostingTest.java</exclude>
<exclude>**/DSLTest.java</exclude>
<exclude>**/DSLEmbeddedSearchTest.java</exclude>
<exclude>**/MoreLikeThisTest.java</exclude>
<exclude>**/NumericEncodingQueriesTest.java</exclude>
<exclude>**/ExplanationTest.java</exclude>
<exclude>**/EmbeddedSortableIdFieldTest.java</exclude>

<!-- Faceting -->
<exclude>**/CollectionFacetingTest.java</exclude>
<exclude>**/EdgeCaseFacetTest.java</exclude>
<exclude>**/FacetFilteringTest.java</exclude>
<exclude>**/ManyToOneFacetingTest.java</exclude>
<exclude>**/NoQueryResultsFacetingTest.java</exclude>
<exclude>**/NumberFacetingTest.java</exclude>
<exclude>**/RangeFacetingTest.java</exclude>
<exclude>**/SimpleFacetingTest.java</exclude>
<exclude>**/StringFacetingTest.java</exclude>
<exclude>**/WebShopTest.java</exclude>
<exclude>**/FacetIndexingFailureTest.java</exclude>

<exclude>**/IndexAndQueryNullTest.java</exclude>
<exclude>**/ProgrammaticIndexAndQueryNullTest.java</exclude>
@@ -7,6 +7,7 @@
package org.hibernate.search.backend.elasticsearch;

import org.hibernate.search.backend.elasticsearch.impl.ElasticSearchHSQueryImpl;
import org.hibernate.search.backend.elasticsearch.json.JsonBuilder;
import org.hibernate.search.engine.integration.impl.ExtendedSearchIntegrator;
import org.hibernate.search.query.engine.spi.HSQuery;
import org.hibernate.search.query.engine.spi.QueryDescriptor;
@@ -30,9 +31,15 @@ private ElasticSearchQueries() {
* documentation</a> for the complete query syntax.
*/
public static QueryDescriptor fromJson(String jsonQuery) {
// TODO Parse + Re-render using Gson for now to leverage single quote support
jsonQuery = new JsonParser().parse( jsonQuery ).toString();
return new ElasticSearchJsonQuery( new JsonParser().parse( jsonQuery ).getAsJsonObject() );
}

/**
* Creates an ElasticSearch query from the given JSON query representation. See the <a
* href="https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-body.html">official
* documentation</a> for the complete query syntax.
*/
public static QueryDescriptor fromJson(JsonObject jsonQuery) {
return new ElasticSearchJsonQuery( jsonQuery );
}

@@ -46,23 +53,18 @@ public static QueryDescriptor fromQueryString(String queryStringQuery) {
// Payload looks like so:
// { "query" : { "query_string" : { "query" : "abstract:Hibernate" } } }

JsonObject query = new JsonObject();
query.addProperty( "query", queryStringQuery );

JsonObject queryString = new JsonObject();
queryString.add( "query_string", query );

JsonObject queryObject = new JsonObject();
queryObject.add( "query", queryString );
JsonBuilder.Object query = JsonBuilder.object().add( "query",
JsonBuilder.object().add( "queryString",
JsonBuilder.object().addProperty( "query", queryStringQuery ) ) );

return new ElasticSearchJsonQuery( queryObject.toString() );
return new ElasticSearchJsonQuery( query.build() );
}

private static class ElasticSearchJsonQuery implements QueryDescriptor {

private final String jsonQuery;
private final JsonObject jsonQuery;

public ElasticSearchJsonQuery(String jsonQuery) {
public ElasticSearchJsonQuery(JsonObject jsonQuery) {
this.jsonQuery = jsonQuery;
}

@@ -73,7 +75,7 @@ public HSQuery createHSQuery(ExtendedSearchIntegrator extendedIntegrator) {

@Override
public String toString() {
return jsonQuery;
return jsonQuery.toString();
}
}
}
@@ -35,6 +35,11 @@

private io.searchbox.client.JestClient client;

public static final Gson GSON = new GsonBuilder()
.setDateFormat( AbstractJestClient.ELASTIC_SEARCH_DATE_FORMAT )
.serializeNulls()
.create();

@Override
public void start(Properties properties, BuildContext context) {
JestClientFactory factory = new JestClientFactory();
@@ -43,17 +48,12 @@ public void start(Properties properties, BuildContext context) {
properties, ElasticSearchEnvironment.SERVER_URI, "http://localhost:9200"
);

Gson gson = new GsonBuilder()
.setDateFormat( AbstractJestClient.ELASTIC_SEARCH_DATE_FORMAT )
.serializeNulls()
.create();

factory.setHttpClientConfig(
new HttpClientConfig.Builder( serverUri )
.multiThreaded( true )
.readTimeout( 2000 )
.connTimeout( 2000 )
.gson( gson )
.gson( GSON )
.build()
);

@@ -10,10 +10,13 @@
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.regex.Pattern;

@@ -31,6 +34,8 @@
import org.apache.lucene.search.TopDocs;
import org.hibernate.search.backend.elasticsearch.ProjectionConstants;
import org.hibernate.search.backend.elasticsearch.client.impl.JestClient;
import org.hibernate.search.backend.elasticsearch.json.JsonBuilder;
import org.hibernate.search.backend.elasticsearch.json.ToElasticSearch;
import org.hibernate.search.backend.elasticsearch.logging.impl.Log;
import org.hibernate.search.bridge.FieldBridge;
import org.hibernate.search.bridge.TwoWayFieldBridge;
@@ -43,14 +48,22 @@
import org.hibernate.search.filter.FullTextFilter;
import org.hibernate.search.indexes.spi.IndexManager;
import org.hibernate.search.metadata.NumericFieldSettingsDescriptor.NumericEncodingType;
import org.hibernate.search.query.dsl.impl.DiscreteFacetRequest;
import org.hibernate.search.query.dsl.impl.FacetRange;
import org.hibernate.search.query.dsl.impl.RangeFacetRequest;
import org.hibernate.search.query.engine.impl.AbstractHSQuery;
import org.hibernate.search.query.engine.impl.EntityInfoImpl;
import org.hibernate.search.query.engine.impl.FacetManagerImpl;
import org.hibernate.search.query.engine.impl.QueryHits.FacetComparator;
import org.hibernate.search.query.engine.impl.TimeoutManagerImpl;
import org.hibernate.search.query.engine.spi.DocumentExtractor;
import org.hibernate.search.query.engine.spi.EntityInfo;
import org.hibernate.search.query.engine.spi.FacetManager;
import org.hibernate.search.query.engine.spi.HSQuery;
import org.hibernate.search.query.facet.Facet;
import org.hibernate.search.query.facet.FacetSortOrder;
import org.hibernate.search.query.facet.FacetingRequest;
import org.hibernate.search.spatial.Coordinates;
import org.hibernate.search.util.impl.ReflectionHelper;
import org.hibernate.search.util.logging.impl.LoggerFactory;

import com.google.gson.JsonArray;
@@ -72,6 +85,15 @@
*/
public class ElasticSearchHSQueryImpl extends AbstractHSQuery {

// TODO GSM: Copied from QueryHits: should probably be in one place only.
private static final EnumMap<FacetSortOrder, FacetComparator> facetComparators = new EnumMap<>( FacetSortOrder.class );

static {
facetComparators.put( FacetSortOrder.COUNT_ASC, new FacetComparator( FacetSortOrder.COUNT_ASC ) );
facetComparators.put( FacetSortOrder.COUNT_DESC, new FacetComparator( FacetSortOrder.COUNT_DESC ) );
facetComparators.put( FacetSortOrder.FIELD_VALUE, new FacetComparator( FacetSortOrder.FIELD_VALUE ) );
}

private static final Log LOG = LoggerFactory.make( Log.class );

private static final Pattern DOT = Pattern.compile( "\\." );
@@ -81,13 +103,15 @@
*/
private static final int MAX_RESULT_WINDOW_SIZE = 10000;

private final String jsonQuery;
private final JsonObject jsonQuery;

private Integer resultSize;
private IndexSearcher searcher;
private SearchResult searchResult;

public ElasticSearchHSQueryImpl(String jsonQuery, ExtendedSearchIntegrator extendedIntegrator) {
private transient FacetManagerImpl facetManager;

public ElasticSearchHSQueryImpl(JsonObject jsonQuery, ExtendedSearchIntegrator extendedIntegrator) {
super( extendedIntegrator );
this.jsonQuery = jsonQuery;
}
@@ -98,9 +122,11 @@ public HSQuery luceneQuery(Query query) {
}

@Override
public FacetManager getFacetManager() {
// TODO implement
throw new UnsupportedOperationException( "Not yet implemented" );
public FacetManagerImpl getFacetManager() {
if ( facetManager == null ) {
facetManager = new FacetManagerImpl( this );
}
return facetManager;
}

@Override
@@ -113,12 +139,18 @@ public DocumentExtractor queryDocumentExtractor() {
return new ElasticSearchDocumentExtractor();
}

SearchResult getSearchResult() {
if ( searchResult == null ) {
execute();
}
return searchResult;
}

@Override
public int queryResultSize() {
if ( searchResult == null ) {
execute();
}

return resultSize;
}

@@ -200,6 +232,7 @@ public void disableFullTextFilter(String name) {

@Override
protected void clearCachedResults() {
searchResult = null;
resultSize = null;
}

@@ -271,7 +304,7 @@ private IndexSearcher() {
if ( !( indexManager instanceof ElasticSearchIndexManager ) ) {
throw LOG.cannotRunEsQueryTargetingEntityIndexedWithNonEsIndexManager(
queriedEntityType,
jsonQuery
jsonQuery.toString()
);
}

@@ -289,8 +322,23 @@ private IndexSearcher() {
// TODO feed in user-provided filters
JsonObject effectiveFilter = getEffectiveFilter( typeFilters );

// TODO can we avoid the forth and back between GSON and String?
executedQuery = "{ \"query\" : { \"filtered\" : { " + jsonQuery.substring( 1, jsonQuery.length() - 1 ) + ", \"filter\" : " + effectiveFilter.toString() + " } } }";
JsonBuilder.Object completeQuery = JsonBuilder.object();

completeQuery.add( "query",
JsonBuilder.object()
.add( "filtered", JsonBuilder.object( jsonQuery ).add( "filter", effectiveFilter ) ) );

if ( !getFacetManager().getFacetRequests().isEmpty() ) {
JsonBuilder.Object facets = JsonBuilder.object();

for ( Entry<String, FacetingRequest> facetRequestEntry : getFacetManager().getFacetRequests().entrySet() ) {
ToElasticSearch.addFacetingRequest( facets, facetRequestEntry.getValue() );
}

completeQuery.add( "aggregations", facets );
}

executedQuery = completeQuery.build().toString();

Search.Builder search = new Search.Builder( executedQuery );
search.addIndex( indexNames );
@@ -319,31 +367,16 @@ private JsonObject getEffectiveFilter(JsonArray typeFilters) {
}

// wrap type filters into should if there is more than one
JsonObject effectiveTypeFilter = new JsonObject();
if ( typeFilters.size() == 1 ) {
effectiveTypeFilter = typeFilters.get( 0 ).getAsJsonObject();
}
else {
JsonObject should = new JsonObject();
should.add( "should", typeFilters );
effectiveTypeFilter = new JsonObject();
effectiveTypeFilter.add( "bool", should );
}
filters.add( effectiveTypeFilter );
filters.add( ToElasticSearch.condition( "should", typeFilters ) );

// wrap filters into must if there is more than one
JsonObject effectiveFilter = new JsonObject();
if ( filters.size() == 1 ) {
effectiveFilter = filters.get( 0 ).getAsJsonObject();
}
else {
JsonObject must = new JsonObject();
must.add( "must", filters );
effectiveFilter = new JsonObject();
effectiveFilter.add( "bool", must );
// facet filters
Filter facetFilter = getFacetManager().getFacetFilter();
if ( facetFilter != null ) {
filters.add( ToElasticSearch.fromLuceneFilter( getFacetManager().getFacetFilter() ) );
}

return effectiveFilter;
// wrap filters into must if there is more than one
return ToElasticSearch.condition( "must", filters );
}

private JsonObject getEntityTypeFilter(Class<?> queriedEntityType) {
@@ -553,6 +586,78 @@ private JsonElement getFieldValue(JsonObject parent, String projectedField) {
}
}

@Override
protected void extractFacetResults() {
SearchResult searchResult = getSearchResult();
JsonElement aggregationsElement = searchResult.getJsonObject().get( "aggregations" );
if ( aggregationsElement == null ) {
return;
}
JsonObject aggregations = aggregationsElement.getAsJsonObject();

Map<String, List<Facet>> results = new HashMap<>();
for ( FacetingRequest facetRequest : getFacetManager().getFacetRequests().values() ) {
List<Facet> facets;
if ( facetRequest instanceof DiscreteFacetRequest ) {
facets = updateStringFacets( aggregations, (DiscreteFacetRequest) facetRequest );
// Discrete facets are sorted by ElasticSearch
}
else {
facets = updateRangeFacets( aggregations, (RangeFacetRequest<?>) facetRequest );
if ( !FacetSortOrder.RANGE_DEFINITION_ORDER.equals( facetRequest.getSort() ) ) {
Collections.sort( facets, facetComparators.get( facetRequest.getSort() ) );
}
}

results.put( facetRequest.getFacetingName(), facets );
}
getFacetManager().setFacetResults( results );
}

private List<Facet> updateRangeFacets(JsonObject aggregations, RangeFacetRequest<?> facetRequest) {
if ( !ReflectionHelper.isIntegerType( facetRequest.getFacetValueType() )
&& !Date.class.isAssignableFrom( facetRequest.getFacetValueType() )
&& !ReflectionHelper.isFloatingPointType( facetRequest.getFacetValueType() ) ) {
throw LOG.unsupportedFacetRangeParameter( facetRequest.getFacetValueType().getName() );
}

ArrayList<Facet> facets = new ArrayList<>();
for ( FacetRange<?> facetRange : facetRequest.getFacetRangeList() ) {
JsonElement aggregation = aggregations.get( facetRequest.getFacetingName() + "-" + facetRange.hashCode() );
if ( aggregation == null ) {
continue;
}
int docCount = aggregation.getAsJsonObject().get( "doc_count" ).getAsInt();
if ( docCount == 0 && !facetRequest.hasZeroCountsIncluded() ) {
continue;
}
facets.add( facetRequest.createFacet( facetRange.getRangeString(), docCount ) );
}
return facets;
}

private List<Facet> updateStringFacets(JsonObject aggregations, DiscreteFacetRequest facetRequest) {
JsonElement aggregation = aggregations.get( facetRequest.getFacetingName() );
if ( aggregation == null ) {
return Collections.emptyList();
}
// deal with nested aggregation for nested documents
if ( facetRequest.getFieldName().contains( "." ) ) {
aggregation = aggregation.getAsJsonObject().get( facetRequest.getFacetingName() );
}
if ( aggregation == null ) {
return Collections.emptyList();
}

ArrayList<Facet> facets = new ArrayList<>();
for ( JsonElement bucket : aggregation.getAsJsonObject().get( "buckets" ).getAsJsonArray() ) {
facets.add( facetRequest.createFacet(
bucket.getAsJsonObject().get( "key" ).getAsString(),
bucket.getAsJsonObject().get( "doc_count" ).getAsInt() ) );
}
return facets;
}

// TODO: Investigate scrolling API:
// https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-scroll.html
private class ElasticSearchDocumentExtractor implements DocumentExtractor {

0 comments on commit 657ecfe

Please sign in to comment.
You can’t perform that action at this time.