From e253a606c08c8b9001b4cd1c394e26ae021f0c34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yoann=20Rodi=C3=A8re?= Date: Tue, 14 Apr 2020 18:38:59 +0200 Subject: [PATCH] HSEARCH-3892 Move away from FacetConfig/SortedSetDocValuesFacetCounts for text doc values used in aggregations 1. SortedSetDocValuesFacetCounts does not support nested documents 2. SortedSetDocValuesFacetCounts depends heavily on how FacetConfig organizes doc values, with some dependencies on internal code. 3. FacetConfig is able to do much more than what we need, and is very complicated. So we don't want to copy that. Thus I copied SortedSetDocValuesFacetCounts to our project as TextMultiValueFacetCounts, and removed the FacetConfig parts. I'll adapt the code to support nested documents it in the next few commits. --- .../impl/LuceneIndexEntryFactory.java | 9 +- .../impl/LuceneRootDocumentBuilder.java | 25 +- .../LuceneIndexSchemaRootNodeBuilder.java | 10 +- .../document/model/impl/LuceneIndexModel.java | 12 +- .../impl/LuceneIndexSchemaNodeCollector.java | 2 - .../impl/IndexManagerBackendContext.java | 6 +- .../index/impl/LuceneIndexManagerBuilder.java | 4 +- .../backend/lucene/logging/impl/Log.java | 4 - .../facet/impl/TextMultiValueFacetCounts.java | 228 ++++++++++++++++++ ...ractLuceneFacetsBasedTermsAggregation.java | 19 +- .../impl/LuceneTextTermsAggregation.java | 60 ++--- .../codec/impl/LuceneStringFieldCodec.java | 10 +- ...uceneNumericIndexFieldTypeOptionsStep.java | 3 +- ...ceneGeoPointIndexFieldTypeOptionsStep.java | 3 +- ...LuceneNativeIndexFieldTypeOptionsStep.java | 3 +- ...LuceneStringIndexFieldTypeOptionsStep.java | 1 - .../types/impl/LuceneIndexFieldType.java | 12 +- 17 files changed, 268 insertions(+), 143 deletions(-) create mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/TextMultiValueFacetCounts.java diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/impl/LuceneIndexEntryFactory.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/impl/LuceneIndexEntryFactory.java index 722bce622f7..f6b5ff792e9 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/impl/LuceneIndexEntryFactory.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/impl/LuceneIndexEntryFactory.java @@ -9,25 +9,20 @@ import org.hibernate.search.backend.lucene.multitenancy.impl.MultiTenancyStrategy; import org.hibernate.search.engine.backend.work.execution.spi.DocumentContributor; -import org.apache.lucene.facet.FacetsConfig; - public class LuceneIndexEntryFactory { private final MultiTenancyStrategy multiTenancyStrategy; private final String indexName; - private final FacetsConfig facetsConfig; - public LuceneIndexEntryFactory(MultiTenancyStrategy multiTenancyStrategy, String indexName, - FacetsConfig facetsConfig) { + public LuceneIndexEntryFactory(MultiTenancyStrategy multiTenancyStrategy, String indexName) { this.indexName = indexName; this.multiTenancyStrategy = multiTenancyStrategy; - this.facetsConfig = facetsConfig; } public LuceneIndexEntry create(String tenantId, String id, String routingKey, DocumentContributor documentContributor) { LuceneRootDocumentBuilder builder = new LuceneRootDocumentBuilder( - multiTenancyStrategy, indexName, facetsConfig + multiTenancyStrategy, indexName ); documentContributor.contribute( builder ); return builder.build( tenantId, id, routingKey ); diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/impl/LuceneRootDocumentBuilder.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/impl/LuceneRootDocumentBuilder.java index bd83381cffc..983ce98265a 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/impl/LuceneRootDocumentBuilder.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/impl/LuceneRootDocumentBuilder.java @@ -6,35 +6,25 @@ */ package org.hibernate.search.backend.lucene.document.impl; -import java.io.IOException; -import java.lang.invoke.MethodHandles; import java.util.ArrayList; import java.util.List; import org.apache.lucene.document.Document; -import org.apache.lucene.facet.FacetsConfig; -import org.hibernate.search.backend.lucene.logging.impl.Log; import org.hibernate.search.backend.lucene.lowlevel.common.impl.MetadataFields; import org.hibernate.search.backend.lucene.document.model.impl.LuceneIndexSchemaObjectNode; import org.hibernate.search.backend.lucene.multitenancy.impl.MultiTenancyStrategy; -import org.hibernate.search.util.common.logging.impl.LoggerFactory; public class LuceneRootDocumentBuilder extends AbstractLuceneNonFlattenedDocumentBuilder { - private static final Log log = LoggerFactory.make( Log.class, MethodHandles.lookup() ); - private final MultiTenancyStrategy multiTenancyStrategy; private final String indexName; - private final FacetsConfig facetsConfig; - LuceneRootDocumentBuilder(MultiTenancyStrategy multiTenancyStrategy, String indexName, - FacetsConfig facetsConfig) { + LuceneRootDocumentBuilder(MultiTenancyStrategy multiTenancyStrategy, String indexName) { super( LuceneIndexSchemaObjectNode.root() ); this.multiTenancyStrategy = multiTenancyStrategy; this.indexName = indexName; - this.facetsConfig = facetsConfig; } public LuceneIndexEntry build(String tenantId, String id, String routingKey) { @@ -55,19 +45,6 @@ private List assembleDocuments(MultiTenancyStrategy multiTenancyStrate documents.add( document ); - if ( facetsConfig != null ) { - for ( int i = 0; i < documents.size(); i++ ) { - Document document = documents.get( i ); - try { - Document facetedDocument = facetsConfig.build( document ); - documents.set( i, facetedDocument ); - } - catch (IOException | RuntimeException e) { - throw log.errorDuringFacetingIndexing( e ); - } - } - } - return documents; } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexSchemaRootNodeBuilder.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexSchemaRootNodeBuilder.java index 66ba1d8d8ac..f2f7692eb80 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexSchemaRootNodeBuilder.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexSchemaRootNodeBuilder.java @@ -10,7 +10,6 @@ import java.util.Map; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.facet.FacetsConfig; import org.hibernate.search.backend.lucene.analysis.model.impl.LuceneAnalysisDefinitionRegistry; import org.hibernate.search.backend.lucene.document.model.impl.LuceneIndexModel; @@ -73,7 +72,6 @@ public LuceneIndexModel build(String indexName) { Map objectNodesBuilder = new HashMap<>(); Map> fieldNodesBuilder = new HashMap<>(); ScopedAnalyzer.Builder scopedAnalyzerBuilder = new ScopedAnalyzer.Builder(); - FacetsConfig facetsConfig = new FacetsConfig(); LuceneIndexSchemaNodeCollector collector = new LuceneIndexSchemaNodeCollector() { @Override @@ -90,11 +88,6 @@ public void collectFieldNode(String absoluteFieldPath, LuceneIndexSchemaFieldNod public void collectObjectNode(String absolutePath, LuceneIndexSchemaObjectNode node) { objectNodesBuilder.put( absolutePath, node ); } - - @Override - public void collectFacetConfig(String absoluteFieldPath, boolean multiValued) { - facetsConfig.setMultiValued( absoluteFieldPath, multiValued ); - } }; LuceneIndexSchemaObjectNode rootNode = LuceneIndexSchemaObjectNode.root(); @@ -106,8 +99,7 @@ public void collectFacetConfig(String absoluteFieldPath, boolean multiValued) { idDslConverter == null ? new StringToDocumentIdentifierValueConverter() : idDslConverter, objectNodesBuilder, fieldNodesBuilder, - scopedAnalyzerBuilder.build(), - facetsConfig.getDimConfigs().isEmpty() ? null : facetsConfig + scopedAnalyzerBuilder.build() ); } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/impl/LuceneIndexModel.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/impl/LuceneIndexModel.java index 234a5e15f43..2b3de5847d0 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/impl/LuceneIndexModel.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/impl/LuceneIndexModel.java @@ -14,8 +14,6 @@ import org.hibernate.search.engine.reporting.spi.EventContexts; import org.hibernate.search.util.common.impl.CollectionHelper; -import org.apache.lucene.facet.FacetsConfig; - public class LuceneIndexModel implements AutoCloseable { @@ -31,22 +29,18 @@ public class LuceneIndexModel implements AutoCloseable { private final ScopedAnalyzer scopedAnalyzer; - private final FacetsConfig facetsConfig; - public LuceneIndexModel(String indexName, String mappedTypeName, ToDocumentIdentifierValueConverter idDslConverter, Map objectNodesBuilder, Map> fieldNodesBuilder, - ScopedAnalyzer scopedAnalyzer, - FacetsConfig facetsConfig) { + ScopedAnalyzer scopedAnalyzer) { this.indexName = indexName; this.mappedTypeName = mappedTypeName; this.idDslConverter = idDslConverter; this.fieldNodes = CollectionHelper.toImmutableMap( fieldNodesBuilder ); this.objectNodes = CollectionHelper.toImmutableMap( objectNodesBuilder ); this.scopedAnalyzer = scopedAnalyzer; - this.facetsConfig = facetsConfig; } @Override @@ -82,10 +76,6 @@ public ScopedAnalyzer getScopedAnalyzer() { return scopedAnalyzer; } - public FacetsConfig getFacetsConfig() { - return facetsConfig; - } - @Override public String toString() { return new StringBuilder( getClass().getSimpleName() ) diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/impl/LuceneIndexSchemaNodeCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/impl/LuceneIndexSchemaNodeCollector.java index 9023fb7613b..d8f0e6e0013 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/impl/LuceneIndexSchemaNodeCollector.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/impl/LuceneIndexSchemaNodeCollector.java @@ -15,7 +15,5 @@ public interface LuceneIndexSchemaNodeCollector { void collectFieldNode(String absoluteFieldPath, LuceneIndexSchemaFieldNode schemaFieldNode); - void collectFacetConfig(String absoluteFieldPath, boolean multiValued); - void collectAnalyzer(String absoluteFieldPath, Analyzer analyzer); } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/index/impl/IndexManagerBackendContext.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/index/impl/IndexManagerBackendContext.java index 4e5ba22ff8a..abbc802cf55 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/index/impl/IndexManagerBackendContext.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/index/impl/IndexManagerBackendContext.java @@ -55,8 +55,6 @@ import org.hibernate.search.util.common.impl.SuppressingCloser; import org.hibernate.search.util.common.reporting.EventContext; -import org.apache.lucene.facet.FacetsConfig; - public class IndexManagerBackendContext implements WorkExecutionBackendContext, SearchBackendContext { private static final ConfigurationProperty IO_STRATEGY = @@ -175,8 +173,8 @@ EventContext getEventContext() { return eventContext; } - LuceneIndexEntryFactory createLuceneIndexEntryFactory(String indexName, FacetsConfig facetsConfig) { - return new LuceneIndexEntryFactory( multiTenancyStrategy, indexName, facetsConfig ); + LuceneIndexEntryFactory createLuceneIndexEntryFactory(String indexName) { + return new LuceneIndexEntryFactory( multiTenancyStrategy, indexName ); } IOStrategy createIOStrategy(ConfigurationPropertySource propertySource) { diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/index/impl/LuceneIndexManagerBuilder.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/index/impl/LuceneIndexManagerBuilder.java index b9a954a511e..382d7b985f9 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/index/impl/LuceneIndexManagerBuilder.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/index/impl/LuceneIndexManagerBuilder.java @@ -44,9 +44,7 @@ public LuceneIndexManagerImpl build() { LuceneIndexModel model = null; try { model = schemaRootNodeBuilder.build( indexName ); - LuceneIndexEntryFactory indexEntryFactory = backendContext.createLuceneIndexEntryFactory( - indexName, model.getFacetsConfig() - ); + LuceneIndexEntryFactory indexEntryFactory = backendContext.createLuceneIndexEntryFactory( indexName ); return new LuceneIndexManagerImpl( backendContext, indexName, model, indexEntryFactory ); diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/logging/impl/Log.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/logging/impl/Log.java index 10be5947d78..6f850d951f1 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/logging/impl/Log.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/logging/impl/Log.java @@ -128,10 +128,6 @@ public interface Log extends BasicLogger { value = "Value '%1$ss' is not in a valid format to express a Lucene version: %2$s" ) SearchException illegalLuceneVersionFormat(String property, String luceneErrorMessage, @Cause Exception e); - @Message(id = ID_OFFSET_1 + 265, - value = "Unable to build Lucene Document due to facet indexing error") - SearchException errorDuringFacetingIndexing(@Cause Exception e ); - @LogMessage(level = Level.DEBUG) @Message(id = ID_OFFSET_1 + 274, value = "Executing Lucene query '%s'" ) void executingLuceneQuery(Query luceneQuery); diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/TextMultiValueFacetCounts.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/TextMultiValueFacetCounts.java new file mode 100644 index 00000000000..9202b6aca2c --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/TextMultiValueFacetCounts.java @@ -0,0 +1,228 @@ +/* + * Hibernate Search, full-text search for your domain model + * + * License: GNU Lesser General Public License (LGPL), version 2.1 or later + * See the lgpl.txt file in the root directory or . + */ +package org.hibernate.search.backend.lucene.lowlevel.facet.impl; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.apache.lucene.facet.FacetResult; +import org.apache.lucene.facet.Facets; +import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.FacetsCollector.MatchingDocs; +import org.apache.lucene.facet.LabelAndValue; +import org.apache.lucene.facet.TopOrdAndIntQueue; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues; +import org.apache.lucene.index.OrdinalMap; +import org.apache.lucene.index.ReaderUtil; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.search.ConjunctionDISI; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LongValues; + +/** + * Copied with some changes from {@code org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts} + * of Apache Lucene project. + */ +public class TextMultiValueFacetCounts extends Facets { + + final SortedSetDocValues dv; + final String field; + final int ordCount; + final int[] counts; + + public TextMultiValueFacetCounts(IndexReader reader, String field, FacetsCollector hits) + throws IOException { + this.field = field; + dv = MultiDocValues.getSortedSetValues( reader, field ); + if ( dv != null && dv.getValueCount() > Integer.MAX_VALUE ) { + // We may want to remove this limitation? + // Note that DefaultSortedSetDocValuesReaderState has the same limitation, + // so this is no worse than the "legacy" facets from Search 5. + throw new IllegalStateException( + "Cannot aggregate when more than " + Integer.MAX_VALUE + " terms are indexed" ); + } + ordCount = dv == null ? 0 : (int) dv.getValueCount(); + counts = new int[ordCount]; + count( reader, hits.getMatchingDocs() ); + } + + @Override + public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { + if ( topN <= 0 ) { + throw new IllegalArgumentException( "topN must be > 0 (got: " + topN + ")" ); + } + if ( !dim.equals( field ) ) { + throw new IllegalArgumentException( "invalid dim \"" + dim + "\"; should be \"" + field + "\"" ); + } + if ( path.length != 0 ) { + throw new IllegalArgumentException( "path.length should be 0" ); + } + return getTopChildrenSortByCount( topN ); + } + + private FacetResult getTopChildrenSortByCount(int topN) throws IOException { + TopOrdAndIntQueue q = null; + + int bottomCount = 0; + + int totCount = 0; + int childCount = 0; + + TopOrdAndIntQueue.OrdAndValue reuse = null; + + for ( int ord = 0; ord < ordCount; ord++ ) { + if ( counts[ord] > 0 ) { + totCount += counts[ord]; + childCount++; + if ( counts[ord] > bottomCount ) { + if ( reuse == null ) { + reuse = new TopOrdAndIntQueue.OrdAndValue(); + } + reuse.ord = ord; + reuse.value = counts[ord]; + if ( q == null ) { + // Lazy init, so we don't create this for the + // sparse case unnecessarily + q = new TopOrdAndIntQueue( topN ); + } + reuse = q.insertWithOverflow( reuse ); + if ( q.size() == topN ) { + bottomCount = q.top().value; + } + } + } + } + + if ( q == null ) { + return null; + } + + LabelAndValue[] labelValues = new LabelAndValue[q.size()]; + for ( int i = labelValues.length - 1; i >= 0; i-- ) { + TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop(); + final BytesRef term = dv.lookupOrd( ordAndValue.ord ); + labelValues[i] = new LabelAndValue( term.utf8ToString(), ordAndValue.value ); + } + + return new FacetResult( field, new String[0], totCount, labelValues, childCount ); + } + + private void countOneSegment(OrdinalMap ordinalMap, LeafReader reader, int segOrd, MatchingDocs hits) + throws IOException { + SortedSetDocValues segValues = reader.getSortedSetDocValues( field ); + if ( segValues == null ) { + // nothing to count + return; + } + + DocIdSetIterator it = ConjunctionDISI.intersectIterators( Arrays.asList( hits.bits.iterator(), segValues ) ); + + // TODO: yet another option is to count all segs + // first, only in seg-ord space, and then do a + // merge-sort-PQ in the end to only "resolve to + // global" those seg ords that can compete, if we know + // we just want top K? ie, this is the same algo + // that'd be used for merging facets across shards + // (distributed faceting). but this has much higher + // temp ram req'ts (sum of number of ords across all + // segs) + if ( ordinalMap != null ) { + final LongValues ordMap = ordinalMap.getGlobalOrds( segOrd ); + + int numSegOrds = (int) segValues.getValueCount(); + + if ( hits.totalHits < numSegOrds / 10 ) { + // Remap every ord to global ord as we iterate: + for ( int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc() ) { + int term = (int) segValues.nextOrd(); + while ( term != SortedSetDocValues.NO_MORE_ORDS ) { + counts[(int) ordMap.get( term )]++; + term = (int) segValues.nextOrd(); + } + } + } + else { + // First count in seg-ord space: + final int[] segCounts = new int[numSegOrds]; + for ( int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc() ) { + int term = (int) segValues.nextOrd(); + while ( term != SortedSetDocValues.NO_MORE_ORDS ) { + segCounts[term]++; + term = (int) segValues.nextOrd(); + } + } + + // Then, migrate to global ords: + for ( int ord = 0; ord < numSegOrds; ord++ ) { + int count = segCounts[ord]; + if ( count != 0 ) { + counts[(int) ordMap.get( ord )] += count; + } + } + } + } + else { + // No ord mapping (e.g., single segment index): + // just aggregate directly into counts: + for ( int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc() ) { + int term = (int) segValues.nextOrd(); + while ( term != SortedSetDocValues.NO_MORE_ORDS ) { + counts[term]++; + term = (int) segValues.nextOrd(); + } + } + } + } + + /** + * Does all the "real work" of tallying up the counts. + */ + private void count(IndexReader reader, List matchingDocs) throws IOException { + OrdinalMap ordinalMap; + + // TODO: is this right? really, we need a way to + // verify that this ordinalMap "matches" the leaves in + // matchingDocs... + if ( dv instanceof MultiSortedSetDocValues && matchingDocs.size() > 1 ) { + ordinalMap = ( (MultiSortedSetDocValues) dv ).mapping; + } + else { + ordinalMap = null; + } + + for ( MatchingDocs hits : matchingDocs ) { + + // LUCENE-5090: make sure the provided reader context "matches" + // the top-level reader passed to the + // SortedSetDocValuesReaderState, else cryptic + // AIOOBE can happen: + if ( ReaderUtil.getTopLevelContext( hits.context ).reader() != reader ) { + throw new IllegalStateException( + "the SortedSetDocValuesReaderState provided to this class does not match the reader being searched; you must create a new SortedSetDocValuesReaderState every time you open a new IndexReader" ); + } + + countOneSegment( ordinalMap, hits.context.reader(), hits.context.ord, hits ); + } + } + + @Override + public Number getSpecificValue(String dim, String... path) { + throw new UnsupportedOperationException(); + } + + @Override + public List getAllDims(int topN) throws IOException { + return Collections.singletonList( getTopChildren( topN, field ) ); + } + +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneFacetsBasedTermsAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneFacetsBasedTermsAggregation.java index 3cd3bc188ad..c64dd59ec7b 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneFacetsBasedTermsAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneFacetsBasedTermsAggregation.java @@ -8,7 +8,6 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Collections; import java.util.Comparator; import java.util.LinkedHashMap; import java.util.List; @@ -65,23 +64,7 @@ public void request(AggregationRequestContext context) { public final Map extract(AggregationExtractContext context) throws IOException { FromDocumentFieldValueConvertContext convertContext = context.getConvertContext(); - List> buckets; - try { - buckets = getTopBuckets( context ); - } - catch (IllegalArgumentException e) { - /* - * Happens in two cases: - * 1. There are no facets at all stored in the matching documents. - * 2. There are facets stored in the matching documents in general, - * but not for this specific field. - * In both cases, we know the target field is correctly configured to generate facets, - * because we managed to create this aggregation. - * So we can safely return an empty list: the matching documents simply do not have - * any value for this field. - */ - return toMap( convertContext, Collections.emptyList() ); - } + List> buckets = getTopBuckets( context ); if ( BucketOrder.COUNT_DESC.equals( order ) && (minDocCount > 0 || buckets.size() >= maxTermCount ) ) { /* diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java index 649f0ed3e6b..629c28b68e7 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java @@ -8,21 +8,19 @@ import java.io.IOException; import java.util.Comparator; -import java.util.LinkedHashSet; import java.util.Set; +import java.util.TreeSet; +import org.hibernate.search.backend.lucene.lowlevel.facet.impl.TextMultiValueFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.backend.lucene.search.impl.LuceneSearchContext; import org.hibernate.search.engine.backend.types.converter.spi.ProjectionConverter; import org.apache.lucene.facet.FacetResult; import org.apache.lucene.facet.FacetsCollector; -import org.apache.lucene.facet.FacetsConfig; -import org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState; -import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts; -import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState; -import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState.OrdRange; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedSetDocValues; /** @@ -41,50 +39,40 @@ private LuceneTextTermsAggregation(Builder builder) { @Override FacetResult getTopChildren(IndexReader reader, FacetsCollector facetsCollector, NestedDocsProvider nestedDocsProvider, int limit) throws IOException { - // May throw IllegalArgumentException - SortedSetDocValuesReaderState docValuesReaderState = new DefaultSortedSetDocValuesReaderState( reader ); - - SortedSetDocValuesFacetCounts facetCounts = new SortedSetDocValuesFacetCounts( - docValuesReaderState, facetsCollector + TextMultiValueFacetCounts facetCounts = new TextMultiValueFacetCounts( + reader, absoluteFieldPath, facetsCollector ); - // May throw IllegalArgumentException return facetCounts.getTopChildren( limit, absoluteFieldPath ); } @Override Set collectFirstTerms(IndexReader reader, boolean descending, int limit) throws IOException { - Set collectedTerms = new LinkedHashSet<>(); - - SortedSetDocValuesReaderState docValuesReaderState = new DefaultSortedSetDocValuesReaderState( reader ); - OrdRange ordRange = docValuesReaderState.getOrdRange( absoluteFieldPath ); - SortedSetDocValues docValues = docValuesReaderState.getDocValues(); - - // Note ordRange.end is inclusive, hence the weird index operations. - if ( descending ) { - int start = Math.max( ordRange.start, ordRange.end - limit + 1 ); - for ( int i = ordRange.end; i >= start ; --i ) { - collectedTerms.add( lookupOrd( docValues, i ) ); + TreeSet collectedTerms = new TreeSet<>( descending ? STRING_COMPARATOR.reversed() : STRING_COMPARATOR ); + for ( LeafReaderContext leaf : reader.leaves() ) { + final LeafReader atomicReader = leaf.reader(); + SortedSetDocValues docValues = atomicReader.getSortedSetDocValues( absoluteFieldPath ); + if ( docValues == null ) { + continue; } - } - else { - int end = Math.min( ordRange.start + limit - 1, ordRange.end ); - for ( int i = ordRange.start; i <= end; ++i ) { - collectedTerms.add( lookupOrd( docValues, i ) ); + int valueCount = (int) docValues.getValueCount(); + if ( descending ) { + int start = Math.max( 0, valueCount - limit ); + for ( int i = start; i < valueCount; ++i ) { + collectedTerms.add( docValues.lookupOrd( i ).utf8ToString() ); + } + } + else { + int end = Math.min( limit, valueCount ); + for ( int i = 0; i < end; ++i ) { + collectedTerms.add( docValues.lookupOrd( i ).utf8ToString() ); + } } } - return collectedTerms; } - private String lookupOrd(SortedSetDocValues docValues, int ord) throws IOException { - String pathAsString = docValues.lookupOrd( ord ).utf8ToString(); - // FacetsConfig does not store the term directly: it prepends the field name - String[] pathAsComponents = FacetsConfig.stringToPath( pathAsString ); - return pathAsComponents[1]; - } - @Override Comparator getAscendingTermComparator() { return STRING_COMPARATOR; diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/codec/impl/LuceneStringFieldCodec.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/codec/impl/LuceneStringFieldCodec.java index 97a79383c59..b570326e9f3 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/codec/impl/LuceneStringFieldCodec.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/codec/impl/LuceneStringFieldCodec.java @@ -17,7 +17,6 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.SortedSetDocValuesField; -import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField; import org.apache.lucene.index.Term; import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.NormsFieldExistsQuery; @@ -64,12 +63,7 @@ public void encode(LuceneDocumentBuilder documentBuilder, String absoluteFieldPa if ( sortable || aggregable ) { BytesRef normalized = normalize( absoluteFieldPath, value ); - if ( sortable ) { - documentBuilder.addField( new SortedSetDocValuesField( absoluteFieldPath, normalized ) ); - } - if ( aggregable ) { - documentBuilder.addField( new SortedSetDocValuesFacetField( absoluteFieldPath, normalized.utf8ToString() ) ); - } + documentBuilder.addField( new SortedSetDocValuesField( absoluteFieldPath, normalized ) ); } if ( !sortable && fieldType.omitNorms() ) { @@ -103,7 +97,7 @@ public Query createExistsQuery(String absoluteFieldPath) { if ( !fieldType.omitNorms() ) { return new NormsFieldExistsQuery( absoluteFieldPath ); } - else if ( sortable ) { + else if ( sortable || aggregable ) { return new DocValuesFieldExistsQuery( absoluteFieldPath ); } else { diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneNumericIndexFieldTypeOptionsStep.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneNumericIndexFieldTypeOptionsStep.java index 6b096fed2ab..557b5f363b0 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneNumericIndexFieldTypeOptionsStep.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneNumericIndexFieldTypeOptionsStep.java @@ -66,8 +66,7 @@ public LuceneIndexFieldType toIndexFieldType() { dslToIndexConverter, rawDslToIndexConverter, indexToProjectionConverter, rawIndexToProjectionConverter, codec - ), - resolvedAggregable + ) ); } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/LuceneGeoPointIndexFieldTypeOptionsStep.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/LuceneGeoPointIndexFieldTypeOptionsStep.java index db430c4faf0..f4e38e86357 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/LuceneGeoPointIndexFieldTypeOptionsStep.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/LuceneGeoPointIndexFieldTypeOptionsStep.java @@ -58,8 +58,7 @@ public LuceneIndexFieldType toIndexFieldType() { ), new LuceneGeoPointFieldAggregationBuilderFactory( resolvedAggregable, dslConverter, projectionConverter, codec - ), - resolvedAggregable + ) ); } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/LuceneNativeIndexFieldTypeOptionsStep.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/LuceneNativeIndexFieldTypeOptionsStep.java index 9657917ee1b..e4d40cbd452 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/LuceneNativeIndexFieldTypeOptionsStep.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/LuceneNativeIndexFieldTypeOptionsStep.java @@ -41,8 +41,7 @@ public IndexFieldType toIndexFieldType() { new LuceneStandardFieldProjectionBuilderFactory<>( fieldValueExtractor != null, projectionConverter, rawProjectionConverter, codec ), - null, - false + null ); } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/LuceneStringIndexFieldTypeOptionsStep.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/LuceneStringIndexFieldTypeOptionsStep.java index f617414513f..49c85f06c09 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/LuceneStringIndexFieldTypeOptionsStep.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/LuceneStringIndexFieldTypeOptionsStep.java @@ -167,7 +167,6 @@ else if ( searchAnalyzer != null ) { codec, analyzer != null ), - resolvedAggregable, analyzerOrNormalizer ); } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/impl/LuceneIndexFieldType.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/impl/LuceneIndexFieldType.java index d306526d04f..eccf78dee10 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/impl/LuceneIndexFieldType.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/impl/LuceneIndexFieldType.java @@ -24,7 +24,6 @@ public class LuceneIndexFieldType implements IndexFieldType { private final LuceneFieldSortBuilderFactory sortBuilderFactory; private final LuceneFieldProjectionBuilderFactory projectionBuilderFactory; private final LuceneFieldAggregationBuilderFactory aggregationBuilderFactory; - private final boolean aggregable; private final Analyzer analyzerOrNormalizer; public LuceneIndexFieldType( @@ -32,10 +31,9 @@ public LuceneIndexFieldType( LuceneFieldPredicateBuilderFactory predicateBuilderFactory, LuceneFieldSortBuilderFactory sortBuilderFactory, LuceneFieldProjectionBuilderFactory projectionBuilderFactory, - LuceneFieldAggregationBuilderFactory aggregationBuilderFactory, - boolean aggregable) { + LuceneFieldAggregationBuilderFactory aggregationBuilderFactory) { this( codec, predicateBuilderFactory, sortBuilderFactory, projectionBuilderFactory, - aggregationBuilderFactory, aggregable, null ); + aggregationBuilderFactory, null ); } public LuceneIndexFieldType(LuceneFieldCodec codec, @@ -43,14 +41,12 @@ public LuceneIndexFieldType(LuceneFieldCodec codec, LuceneFieldSortBuilderFactory sortBuilderFactory, LuceneFieldProjectionBuilderFactory projectionBuilderFactory, LuceneFieldAggregationBuilderFactory aggregationBuilderFactory, - boolean aggregable, Analyzer analyzerOrNormalizer) { this.codec = codec; this.predicateBuilderFactory = predicateBuilderFactory; this.sortBuilderFactory = sortBuilderFactory; this.projectionBuilderFactory = projectionBuilderFactory; this.aggregationBuilderFactory = aggregationBuilderFactory; - this.aggregable = aggregable; this.analyzerOrNormalizer = analyzerOrNormalizer; } @@ -69,10 +65,6 @@ public LuceneIndexSchemaFieldNode addField(LuceneIndexSchemaNodeCollector col collector.collectFieldNode( schemaNode.getAbsoluteFieldPath(), schemaNode ); - if ( aggregable ) { - collector.collectFacetConfig( schemaNode.getAbsoluteFieldPath(), multiValued ); - } - collector.collectAnalyzer( schemaNode.getAbsoluteFieldPath(), analyzerOrNormalizer ); return schemaNode;