Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
HSEARCH-2192 Make highlighters use nesting aware accumulating values
- Loading branch information
1 parent
ce8cc8d
commit 68e846b
Showing
13 changed files
with
930 additions
and
613 deletions.
There are no files selected for viewing
43 changes: 43 additions & 0 deletions
43
...main/java/org/hibernate/search/backend/lucene/analysis/impl/LimitTokenOffsetAnalyzer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
/* | ||
* Hibernate Search, full-text search for your domain model | ||
* | ||
* License: GNU Lesser General Public License (LGPL), version 2.1 or later | ||
* See the lgpl.txt file in the root directory or <http://www.gnu.org/licenses/lgpl-2.1.html>. | ||
*/ | ||
package org.hibernate.search.backend.lucene.analysis.impl; | ||
|
||
import org.apache.lucene.analysis.Analyzer; | ||
import org.apache.lucene.analysis.AnalyzerWrapper; | ||
import org.apache.lucene.analysis.miscellaneous.LimitTokenOffsetFilter; | ||
|
||
public class LimitTokenOffsetAnalyzer extends AnalyzerWrapper { | ||
|
||
public static Analyzer analyzer(Analyzer delegate, Integer maxOffset) { | ||
if ( maxOffset != null ) { | ||
return new LimitTokenOffsetAnalyzer( delegate, maxOffset ); | ||
} | ||
return delegate; | ||
} | ||
|
||
private final Analyzer delegate; | ||
private final int maxOffset; | ||
|
||
private LimitTokenOffsetAnalyzer(Analyzer delegate, int maxOffset) { | ||
super( delegate.getReuseStrategy() ); | ||
this.delegate = delegate; | ||
this.maxOffset = maxOffset; | ||
} | ||
|
||
@Override | ||
protected Analyzer getWrappedAnalyzer(String fieldName) { | ||
return delegate; | ||
} | ||
|
||
@Override | ||
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { | ||
return new TokenStreamComponents( | ||
components.getSource(), | ||
new LimitTokenOffsetFilter( components.getTokenStream(), maxOffset, false ) | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
230 changes: 230 additions & 0 deletions
230
...ernate/search/backend/lucene/search/highlighter/impl/LuceneAbstractSearchHighlighter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,230 @@ | ||
/* | ||
* Hibernate Search, full-text search for your domain model | ||
* | ||
* License: GNU Lesser General Public License (LGPL), version 2.1 or later | ||
* See the lgpl.txt file in the root directory or <http://www.gnu.org/licenses/lgpl-2.1.html>. | ||
*/ | ||
package org.hibernate.search.backend.lucene.search.highlighter.impl; | ||
|
||
import static org.hibernate.search.util.common.impl.CollectionHelper.asImmutableList; | ||
|
||
import java.lang.invoke.MethodHandles; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.Locale; | ||
import java.util.Set; | ||
|
||
import org.hibernate.search.backend.lucene.logging.impl.Log; | ||
import org.hibernate.search.backend.lucene.lowlevel.collector.impl.Values; | ||
import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; | ||
import org.hibernate.search.backend.lucene.search.projection.impl.ProjectionExtractContext; | ||
import org.hibernate.search.backend.lucene.search.projection.impl.ProjectionRequestContext; | ||
import org.hibernate.search.engine.search.highlighter.SearchHighlighter; | ||
import org.hibernate.search.engine.search.highlighter.dsl.HighlighterEncoder; | ||
import org.hibernate.search.engine.search.highlighter.dsl.HighlighterFragmenter; | ||
import org.hibernate.search.engine.search.highlighter.dsl.HighlighterTagSchema; | ||
import org.hibernate.search.engine.search.highlighter.spi.BoundaryScannerType; | ||
import org.hibernate.search.engine.search.highlighter.spi.SearchHighlighterBuilder; | ||
import org.hibernate.search.engine.search.projection.spi.ProjectionAccumulator; | ||
import org.hibernate.search.util.common.impl.Contracts; | ||
import org.hibernate.search.util.common.logging.impl.LoggerFactory; | ||
|
||
import org.apache.lucene.analysis.Analyzer; | ||
import org.apache.lucene.search.highlight.DefaultEncoder; | ||
import org.apache.lucene.search.highlight.Encoder; | ||
import org.apache.lucene.search.highlight.SimpleHTMLEncoder; | ||
import org.apache.lucene.search.vectorhighlight.SimpleBoundaryScanner; | ||
|
||
public abstract class LuceneAbstractSearchHighlighter implements SearchHighlighter { | ||
|
||
protected static final Log log = LoggerFactory.make( Log.class, MethodHandles.lookup() ); | ||
private static final Encoder DEFAULT_ENCODER = new DefaultEncoder(); | ||
private static final Encoder HTML_ENCODER = new SimpleHTMLEncoder(); | ||
|
||
private static final List<String> DEFAULT_PRE_TAGS = Collections.singletonList( "<em>" ); | ||
private static final List<String> DEFAULT_POST_TAGS = Collections.singletonList( "</em>" ); | ||
private static final List<String> STYLED_SCHEMA_PRE_TAG = asImmutableList( | ||
"<em class=\"hlt1\">", | ||
"<em class=\"hlt2\">", | ||
"<em class=\"hlt3\">", | ||
"<em class=\"hlt4\">", | ||
"<em class=\"hlt5\">", | ||
"<em class=\"hlt6\">", | ||
"<em class=\"hlt7\">", | ||
"<em class=\"hlt8\">", | ||
"<em class=\"hlt9\">", | ||
"<em class=\"hlt10\">" | ||
); | ||
private static final List<String> STYLED_SCHEMA_POST_TAGS = DEFAULT_POST_TAGS; | ||
|
||
protected final Set<String> indexNames; | ||
protected final Character[] boundaryChars; | ||
protected final Integer boundaryMaxScan; | ||
protected final Integer fragmentSize; | ||
protected final Integer noMatchSize; | ||
protected final Integer numberOfFragments; | ||
protected final Boolean orderByScore; | ||
protected final Integer maxAnalyzedOffset; | ||
protected final List<String> preTags; | ||
protected final List<String> postTags; | ||
protected final BoundaryScannerType boundaryScannerType; | ||
protected final Locale boundaryScannerLocale; | ||
protected final HighlighterFragmenter fragmenterType; | ||
protected final Integer phraseLimit; | ||
protected final Encoder encoder; | ||
|
||
protected LuceneAbstractSearchHighlighter(Builder builder) { | ||
this( builder.scope.hibernateSearchIndexNames(), | ||
builder.boundaryChars(), builder.boundaryMaxScan(), | ||
builder.fragmentSize(), builder.noMatchSize(), builder.numberOfFragments(), builder.orderByScore(), | ||
builder.maxAnalyzedOffset(), | ||
HighlighterTagSchema.STYLED.equals( builder.tagSchema() ) ? STYLED_SCHEMA_PRE_TAG : builder.preTags(), | ||
HighlighterTagSchema.STYLED.equals( builder.tagSchema() ) ? STYLED_SCHEMA_POST_TAGS : | ||
builder.postTags(), | ||
BoundaryScannerType.DEFAULT.equals( builder.boundaryScannerType() ) ? null : | ||
builder.boundaryScannerType(), | ||
builder.boundaryScannerLocale(), builder.fragmenterType(), | ||
builder.phraseLimit(), | ||
builder.encoder() != null ? | ||
( HighlighterEncoder.HTML.equals( builder.encoder() ) ? HTML_ENCODER : DEFAULT_ENCODER ) : null | ||
); | ||
} | ||
|
||
protected LuceneAbstractSearchHighlighter(BoundaryScannerType scannerType) { | ||
this( | ||
Collections.emptySet(), | ||
SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS, | ||
SimpleBoundaryScanner.DEFAULT_MAX_SCAN, | ||
100, | ||
0, | ||
5, | ||
false, | ||
null, | ||
DEFAULT_PRE_TAGS, | ||
DEFAULT_POST_TAGS, | ||
scannerType, | ||
Locale.ROOT, | ||
HighlighterFragmenter.SPAN, | ||
256, | ||
DEFAULT_ENCODER | ||
); | ||
} | ||
|
||
protected LuceneAbstractSearchHighlighter(Set<String> indexNames, | ||
Character[] boundaryChars, | ||
Integer boundaryMaxScan, | ||
Integer fragmentSize, Integer noMatchSize, Integer numberOfFragments, Boolean orderByScore, | ||
Integer maxAnalyzedOffset, | ||
List<String> preTags, List<String> postTags, BoundaryScannerType boundaryScannerType, | ||
Locale boundaryScannerLocale, HighlighterFragmenter fragmenterType, | ||
Integer phraseLimit, | ||
Encoder encoder | ||
) { | ||
this.indexNames = indexNames; | ||
this.boundaryChars = boundaryChars; | ||
this.boundaryMaxScan = boundaryMaxScan; | ||
this.fragmentSize = fragmentSize; | ||
this.noMatchSize = noMatchSize; | ||
this.numberOfFragments = numberOfFragments; | ||
this.orderByScore = orderByScore; | ||
this.maxAnalyzedOffset = maxAnalyzedOffset; | ||
this.preTags = preTags; | ||
this.postTags = postTags; | ||
this.boundaryScannerType = boundaryScannerType; | ||
this.boundaryScannerLocale = boundaryScannerLocale; | ||
this.fragmenterType = fragmenterType; | ||
this.phraseLimit = phraseLimit; | ||
this.encoder = encoder; | ||
} | ||
|
||
public static LuceneAbstractSearchHighlighter from(LuceneSearchIndexScope<?> scope, SearchHighlighter highlighter) { | ||
if ( !( highlighter instanceof LuceneAbstractSearchHighlighter ) ) { | ||
throw log.cannotMixLuceneSearchQueryWithOtherQueryHighlighters( highlighter ); | ||
} | ||
LuceneAbstractSearchHighlighter casted = (LuceneAbstractSearchHighlighter) highlighter; | ||
if ( !scope.hibernateSearchIndexNames().equals( casted.indexNames() ) ) { | ||
throw log.queryHighlighterDefinedOnDifferentIndexes( highlighter, casted.indexNames(), | ||
scope.hibernateSearchIndexNames() | ||
); | ||
} | ||
return casted; | ||
} | ||
|
||
public static LuceneAbstractSearchHighlighter defaultHighlighter() { | ||
return LuceneUnifiedSearchHighlighter.DEFAULTS; | ||
} | ||
|
||
public LuceneAbstractSearchHighlighter withFallback(LuceneAbstractSearchHighlighter fallback) { | ||
Contracts.assertNotNull( fallback, "fallback highlighter" ); | ||
|
||
if ( !this.getClass().equals( fallback.getClass() ) ) { | ||
throw log.cannotMixDifferentHighlighterTypesAtOverrideLevel( | ||
this.getClass().getSimpleName(), fallback.getClass().getSimpleName() ); | ||
} | ||
return createHighlighterSameType( | ||
indexNames, | ||
boundaryChars != null && boundaryChars.length != 0 ? boundaryChars : fallback.boundaryChars, | ||
boundaryMaxScan != null ? boundaryMaxScan : fallback.boundaryMaxScan, | ||
fragmentSize != null ? fragmentSize : fallback.fragmentSize, | ||
noMatchSize != null ? noMatchSize : fallback.noMatchSize, | ||
numberOfFragments != null ? numberOfFragments : fallback.numberOfFragments, | ||
orderByScore != null ? orderByScore : fallback.orderByScore, | ||
maxAnalyzedOffset != null ? maxAnalyzedOffset : fallback.maxAnalyzedOffset, | ||
preTags != null && !preTags.isEmpty() ? preTags : fallback.preTags, | ||
postTags != null && !postTags.isEmpty() ? postTags : fallback.postTags, | ||
boundaryScannerType != null ? boundaryScannerType : fallback.boundaryScannerType, | ||
boundaryScannerLocale != null ? boundaryScannerLocale : fallback.boundaryScannerLocale, | ||
fragmenterType != null ? fragmenterType : fallback.fragmenterType, | ||
phraseLimit != null ? phraseLimit : fallback.phraseLimit, | ||
encoder != null ? encoder : fallback.encoder | ||
); | ||
} | ||
|
||
protected abstract LuceneAbstractSearchHighlighter createHighlighterSameType(Set<String> indexNames, | ||
Character[] boundaryChars, | ||
Integer boundaryMaxScan, | ||
Integer fragmentSize, Integer noMatchSize, Integer numberOfFragments, Boolean orderByScore, | ||
Integer maxAnalyzedOffset, | ||
List<String> preTags, List<String> postTags, BoundaryScannerType boundaryScannerType, | ||
Locale boundaryScannerLocale, HighlighterFragmenter fragmenterType, | ||
Integer phraseLimit, | ||
Encoder encoder); | ||
|
||
|
||
public abstract LuceneAbstractSearchHighlighter withFallbackDefaults(); | ||
|
||
public Set<String> indexNames() { | ||
return indexNames; | ||
} | ||
|
||
public abstract <A> Values<A> createValues(String parentDocumentPath, String nestedDocumentPath, | ||
String absoluteFieldPath, Analyzer analyzer, ProjectionExtractContext context, | ||
ProjectionAccumulator<String, ?, A, List<String>> accumulator); | ||
|
||
public static class Builder extends SearchHighlighterBuilder { | ||
|
||
private final LuceneSearchIndexScope<?> scope; | ||
|
||
public Builder(LuceneSearchIndexScope<?> scope) { | ||
this.scope = scope; | ||
} | ||
|
||
public LuceneAbstractSearchHighlighter build() { | ||
switch ( this.type ) { | ||
case UNIFIED: | ||
return new LuceneUnifiedSearchHighlighter( this ); | ||
case PLAIN: | ||
return new LucenePlainSearchHighlighter( this ); | ||
case FAST_VECTOR: | ||
return new LuceneFastVectorSearchHighlighter( this ); | ||
default: | ||
throw new IllegalStateException( "Unknown highlighter type." ); | ||
} | ||
} | ||
} | ||
|
||
public void request(ProjectionRequestContext context, String absoluteFieldPath) { | ||
// do nothing | ||
} | ||
|
||
} |
Oops, something went wrong.