HSEARCH-2192 Remove the max analyzed offset option

hibernate · May 2, 2023 · 0b1eedd · 0b1eedd
1 parent 713cc13
commit 0b1eedd
Show file tree

Hide file tree

Showing 19 changed files with 20 additions and 318 deletions.
diff --git a/...rch/backend/elasticsearch/search/highlighter/impl/ElasticsearchSearchHighlighterImpl.java b/...rch/backend/elasticsearch/search/highlighter/impl/ElasticsearchSearchHighlighterImpl.java
@@ -30,7 +30,7 @@
 public class ElasticsearchSearchHighlighterImpl implements ElasticsearchSearchHighlighter {
 
 	public static final ElasticsearchSearchHighlighter NO_OPTIONS_CONFIGURATION = new ElasticsearchSearchHighlighterImpl(
-			Collections.emptySet(), null, null, null, null, null, null, null, null, null, null, null, null, null,
+			Collections.emptySet(), null, null, null, null, null, null, null, null, null, null, null, null,
 			null, null, null
 	);
 
@@ -43,7 +43,6 @@ public class ElasticsearchSearchHighlighterImpl implements ElasticsearchSearchHi
 	private static final JsonAccessor<Integer> NO_MATCH_SIZE = JsonAccessor.root().property( "no_match_size" ).asInteger();
 	private static final JsonAccessor<Integer> NUMBER_OF_FRAGMENTS = JsonAccessor.root().property( "number_of_fragments" ).asInteger();
 	private static final JsonAccessor<String> ORDER = JsonAccessor.root().property( "order" ).asString();
-	private static final JsonAccessor<Integer> MAX_ANALYZED_OFFSET = JsonAccessor.root().property( "max_analyzed_offset" ).asInteger();
 	private static final JsonAccessor<String> TAGS_SCHEMA = JsonAccessor.root().property( "tags_schema" ).asString();
 	private static final JsonArrayAccessor PRE_TAGS = JsonAccessor.root().property( "pre_tags" ).asArray();
 	private static final JsonArrayAccessor POST_TAGS = JsonAccessor.root().property( "post_tags" ).asArray();
@@ -60,7 +59,6 @@ public class ElasticsearchSearchHighlighterImpl implements ElasticsearchSearchHi
 	private final Integer noMatchSize;
 	private final Integer numberOfFragments;
 	private final String orderByScore;
-	private final Integer maxAnalyzedOffset;
 	private final List<String> preTags;
 	private final List<String> postTags;
 	private final String boundaryScannerType;
@@ -76,7 +74,7 @@ private ElasticsearchSearchHighlighterImpl(Builder builder) {
 				builder.type(),
 				builder.boundaryCharsAsString(),
 				builder.boundaryMaxScan(), builder.fragmentSize(), builder.noMatchSize(), builder.numberOfFragments(),
-				Boolean.TRUE.equals( builder.orderByScore() ) ? "score" : null, builder.maxAnalyzedOffset(), builder.preTags(),
+				Boolean.TRUE.equals( builder.orderByScore() ) ? "score" : null, builder.preTags(),
 				builder.postTags(),
 				convertBoundaryScannerType( builder.boundaryScannerType() ),
 				Objects.toString( builder.boundaryScannerLocale(), null ),
@@ -88,7 +86,7 @@ private ElasticsearchSearchHighlighterImpl(Builder builder) {
 
 	private ElasticsearchSearchHighlighterImpl(Set<String> indexNames, SearchHighlighterType type, String boundaryChars,
 			Integer boundaryMaxScan, Integer fragmentSize, Integer noMatchSize, Integer numberOfFragments,
-			String orderByScore, Integer maxAnalyzedOffset, List<String> preTags, List<String> postTags,
+			String orderByScore, List<String> preTags, List<String> postTags,
 			String boundaryScannerType, String boundaryScannerLocale, String fragmenterType,
 			Integer phraseLimit, String encoder, String tagSchema) {
 		this.indexNames = indexNames;
@@ -99,7 +97,6 @@ private ElasticsearchSearchHighlighterImpl(Set<String> indexNames, SearchHighlig
 		this.noMatchSize = noMatchSize;
 		this.numberOfFragments = numberOfFragments;
 		this.orderByScore = orderByScore;
-		this.maxAnalyzedOffset = maxAnalyzedOffset;
 		this.preTags = preTags;
 		this.postTags = postTags;
 		this.boundaryScannerType = boundaryScannerType;
@@ -151,7 +148,6 @@ private JsonObject toJson(JsonObject result) {
 		setIfNotNull( NO_MATCH_SIZE, this.noMatchSize, result );
 		setIfNotNull( NUMBER_OF_FRAGMENTS, this.numberOfFragments, result );
 		setIfNotNull( ORDER, this.orderByScore, result );
-		setIfNotNull( MAX_ANALYZED_OFFSET, this.maxAnalyzedOffset, result );
 		setIfNotNull( BOUNDARY_SCANNER, this.boundaryScannerType, result );
 		setIfNotNull( BOUNDARY_SCANNER_LOCALE, this.boundaryScannerLocale, result );
 		setIfNotNull( FRAGMENTER, this.fragmenterType, result );

diff --git a/...main/java/org/hibernate/search/backend/lucene/analysis/impl/LimitTokenOffsetAnalyzer.java b/...main/java/org/hibernate/search/backend/lucene/analysis/impl/LimitTokenOffsetAnalyzer.java
diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/logging/impl/Log.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/logging/impl/Log.java
@@ -695,22 +695,15 @@ SearchException invalidSingleValuedProjectionOnValueFieldInMultiValuedObjectFiel
 	SearchException unifiedHighlighterFragmentSizeNotSupported();
 
 	@Message(id = ID_OFFSET + 171,
-			value = "Lucene's unified highlighter does not support the max analyzed offset setting " +
-					"on fields that have non default term vector storage strategy configured. " +
-					"The strategy was either configured explicitly, or implicitly because the fast vector highlighter type was requested to be supported. " +
-					"Either use a plain or fast vector highlighters, or do not set this setting.")
-	SearchException unifiedHighlighterMaxAnalyzedOffsetNotSupported();
-
-	@Message(id = ID_OFFSET + 172,
 			value = "Highlight projection cannot be applied within nested context of '%1$s'.")
 	SearchException cannotHighlightInNestedContext(String currentNestingField,
 			@Param EventContext eventContext);
 
-	@Message(id = ID_OFFSET + 173,
+	@Message(id = ID_OFFSET + 172,
 			value = "The highlight projection cannot be applied to a field from an object using `ObjectStructure.NESTED` structure.")
 	SearchException cannotHighlightFieldFromNestedObjectStructure(@Param EventContext eventContext);
 
-	@Message(id = ID_OFFSET + 174, value = "'%1$s' cannot be nested in an object projection. "
+	@Message(id = ID_OFFSET + 173, value = "'%1$s' cannot be nested in an object projection. "
 			+ "%2$s")
 	SearchException cannotUseProjectionInNestedContext(String projection, String hint, @Param EventContext eventContext);
 }
diff --git a/...ernate/search/backend/lucene/search/highlighter/impl/LuceneAbstractSearchHighlighter.java b/...ernate/search/backend/lucene/search/highlighter/impl/LuceneAbstractSearchHighlighter.java
@@ -17,7 +17,6 @@
 import org.hibernate.search.backend.lucene.logging.impl.Log;
 import org.hibernate.search.backend.lucene.lowlevel.collector.impl.Values;
 import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope;
-import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexValueFieldTypeContext;
 import org.hibernate.search.backend.lucene.search.projection.impl.ProjectionExtractContext;
 import org.hibernate.search.backend.lucene.search.projection.impl.ProjectionRequestContext;
 import org.hibernate.search.engine.search.highlighter.SearchHighlighter;
@@ -66,7 +65,6 @@ public abstract class LuceneAbstractSearchHighlighter implements SearchHighlight
 	protected final Integer noMatchSize;
 	protected final Integer numberOfFragments;
 	protected final Boolean orderByScore;
-	protected final Integer maxAnalyzedOffset;
 	protected final List<String> preTags;
 	protected final List<String> postTags;
 	protected final BoundaryScannerType boundaryScannerType;
@@ -79,7 +77,6 @@ protected LuceneAbstractSearchHighlighter(Builder builder) {
 		this( builder.scope.hibernateSearchIndexNames(),
 				builder.boundaryChars(), builder.boundaryMaxScan(),
 				builder.fragmentSize(), builder.noMatchSize(), builder.numberOfFragments(), builder.orderByScore(),
-				builder.maxAnalyzedOffset(),
 				HighlighterTagSchema.STYLED.equals( builder.tagSchema() ) ? STYLED_SCHEMA_PRE_TAG : builder.preTags(),
 				HighlighterTagSchema.STYLED.equals( builder.tagSchema() ) ? STYLED_SCHEMA_POST_TAGS :
 						builder.postTags(),
@@ -101,7 +98,6 @@ protected LuceneAbstractSearchHighlighter(BoundaryScannerType scannerType) {
 				0,
 				5,
 				false,
-				null,
 				DEFAULT_PRE_TAGS,
 				DEFAULT_POST_TAGS,
 				scannerType,
@@ -116,7 +112,6 @@ protected LuceneAbstractSearchHighlighter(Set<String> indexNames,
 			Character[] boundaryChars,
 			Integer boundaryMaxScan,
 			Integer fragmentSize, Integer noMatchSize, Integer numberOfFragments, Boolean orderByScore,
-			Integer maxAnalyzedOffset,
 			List<String> preTags, List<String> postTags, BoundaryScannerType boundaryScannerType,
 			Locale boundaryScannerLocale, HighlighterFragmenter fragmenterType,
 			Integer phraseLimit,
@@ -129,7 +124,6 @@ protected LuceneAbstractSearchHighlighter(Set<String> indexNames,
 		this.noMatchSize = noMatchSize;
 		this.numberOfFragments = numberOfFragments;
 		this.orderByScore = orderByScore;
-		this.maxAnalyzedOffset = maxAnalyzedOffset;
 		this.preTags = preTags;
 		this.postTags = postTags;
 		this.boundaryScannerType = boundaryScannerType;
@@ -170,7 +164,6 @@ public LuceneAbstractSearchHighlighter withFallback(LuceneAbstractSearchHighligh
 				noMatchSize != null ? noMatchSize : fallback.noMatchSize,
 				numberOfFragments != null ? numberOfFragments : fallback.numberOfFragments,
 				orderByScore != null ? orderByScore : fallback.orderByScore,
-				maxAnalyzedOffset != null ? maxAnalyzedOffset : fallback.maxAnalyzedOffset,
 				preTags != null && !preTags.isEmpty() ? preTags : fallback.preTags,
 				postTags != null && !postTags.isEmpty() ? postTags : fallback.postTags,
 				boundaryScannerType != null ? boundaryScannerType : fallback.boundaryScannerType,
@@ -185,7 +178,6 @@ protected abstract LuceneAbstractSearchHighlighter createHighlighterSameType(Set
 			Character[] boundaryChars,
 			Integer boundaryMaxScan,
 			Integer fragmentSize, Integer noMatchSize, Integer numberOfFragments, Boolean orderByScore,
-			Integer maxAnalyzedOffset,
 			List<String> preTags, List<String> postTags, BoundaryScannerType boundaryScannerType,
 			Locale boundaryScannerLocale, HighlighterFragmenter fragmenterType,
 			Integer phraseLimit,
@@ -204,10 +196,6 @@ public abstract <A> Values<A> createValues(String parentDocumentPath, String nes
 
 	public abstract SearchHighlighterType type();
 
-	public void checkApplicability(LuceneSearchIndexValueFieldTypeContext<?> typeContext) {
-		// do nothing
-	}
-
 	public static class Builder extends SearchHighlighterBuilder {
 
 		private final LuceneSearchIndexScope<?> scope;

diff --git a/...nate/search/backend/lucene/search/highlighter/impl/LuceneFastVectorSearchHighlighter.java b/...nate/search/backend/lucene/search/highlighter/impl/LuceneFastVectorSearchHighlighter.java
@@ -57,26 +57,26 @@ protected LuceneFastVectorSearchHighlighter(Builder builder) {
 
 	private LuceneFastVectorSearchHighlighter(Set<String> indexNames,
 			Character[] boundaryChars, Integer boundaryMaxScan, Integer fragmentSize, Integer noMatchSize,
-			Integer numberOfFragments, Boolean orderByScore, Integer maxAnalyzedOffset, List<String> preTags,
+			Integer numberOfFragments, Boolean orderByScore, List<String> preTags,
 			List<String> postTags, BoundaryScannerType boundaryScannerType, Locale boundaryScannerLocale,
 			HighlighterFragmenter fragmenterType,
 			Integer phraseLimit, Encoder encoder) {
 		super( indexNames, boundaryChars, boundaryMaxScan, fragmentSize, noMatchSize, numberOfFragments,
 				orderByScore,
-				maxAnalyzedOffset, preTags, postTags, boundaryScannerType, boundaryScannerLocale, fragmenterType,
+				preTags, postTags, boundaryScannerType, boundaryScannerLocale, fragmenterType,
 				phraseLimit, encoder
 		);
 	}
 
 	@Override
 	protected LuceneAbstractSearchHighlighter createHighlighterSameType(Set<String> indexNames,
 			Character[] boundaryChars, Integer boundaryMaxScan, Integer fragmentSize, Integer noMatchSize,
-			Integer numberOfFragments, Boolean orderByScore, Integer maxAnalyzedOffset, List<String> preTags,
+			Integer numberOfFragments, Boolean orderByScore, List<String> preTags,
 			List<String> postTags, BoundaryScannerType boundaryScannerType, Locale boundaryScannerLocale,
 			HighlighterFragmenter fragmenterType, Integer phraseLimit, Encoder encoder) {
 		return new LuceneFastVectorSearchHighlighter(
 				indexNames, boundaryChars, boundaryMaxScan, fragmentSize, noMatchSize, numberOfFragments,
-				orderByScore, maxAnalyzedOffset, preTags, postTags, boundaryScannerType, boundaryScannerLocale,
+				orderByScore, preTags, postTags, boundaryScannerType, boundaryScannerLocale,
 				fragmenterType, phraseLimit, encoder
 		);
 	}

diff --git a/...hibernate/search/backend/lucene/search/highlighter/impl/LucenePlainSearchHighlighter.java b/...hibernate/search/backend/lucene/search/highlighter/impl/LucenePlainSearchHighlighter.java
@@ -16,7 +16,6 @@
 import java.util.Locale;
 import java.util.Set;
 
-import org.hibernate.search.backend.lucene.analysis.impl.LimitTokenOffsetAnalyzer;
 import org.hibernate.search.backend.lucene.lowlevel.collector.impl.StoredFieldsValuesDelegate;
 import org.hibernate.search.backend.lucene.lowlevel.collector.impl.Values;
 import org.hibernate.search.backend.lucene.search.projection.impl.ProjectionExtractContext;
@@ -56,26 +55,26 @@ protected LucenePlainSearchHighlighter(Builder builder) {
 
 	private LucenePlainSearchHighlighter(Set<String> indexNames,
 			Character[] boundaryChars, Integer boundaryMaxScan, Integer fragmentSize, Integer noMatchSize,
-			Integer numberOfFragments, Boolean orderByScore, Integer maxAnalyzedOffset, List<String> preTags,
+			Integer numberOfFragments, Boolean orderByScore, List<String> preTags,
 			List<String> postTags, BoundaryScannerType boundaryScannerType, Locale boundaryScannerLocale,
 			HighlighterFragmenter fragmenterType, Integer phraseLimit,
 			Encoder encoder) {
 		super( indexNames, boundaryChars, boundaryMaxScan, fragmentSize, noMatchSize, numberOfFragments,
 				orderByScore,
-				maxAnalyzedOffset, preTags, postTags, boundaryScannerType, boundaryScannerLocale, fragmenterType,
+				preTags, postTags, boundaryScannerType, boundaryScannerLocale, fragmenterType,
 				phraseLimit, encoder
 		);
 	}
 
 	@Override
 	protected LuceneAbstractSearchHighlighter createHighlighterSameType(Set<String> indexNames,
 			Character[] boundaryChars, Integer boundaryMaxScan, Integer fragmentSize, Integer noMatchSize,
-			Integer numberOfFragments, Boolean orderByScore, Integer maxAnalyzedOffset, List<String> preTags,
+			Integer numberOfFragments, Boolean orderByScore, List<String> preTags,
 			List<String> postTags, BoundaryScannerType boundaryScannerType, Locale boundaryScannerLocale,
 			HighlighterFragmenter fragmenterType, Integer phraseLimit, Encoder encoder) {
 		return new LucenePlainSearchHighlighter(
 				indexNames, boundaryChars, boundaryMaxScan, fragmentSize, noMatchSize, numberOfFragments,
-				orderByScore, maxAnalyzedOffset, preTags, postTags, boundaryScannerType, boundaryScannerLocale,
+				orderByScore, preTags, postTags, boundaryScannerType, boundaryScannerLocale,
 				fragmenterType, phraseLimit, encoder
 		);
 	}
@@ -114,8 +113,7 @@ private final class PlainHighlighterValues<A> extends HighlighterValues<A> {
 			super( parentDocumentPath, nestedDocumentPath, context.collectorExecutionContext(), accumulator );
 			this.storedFieldsValuesDelegate = context.collectorExecutionContext().storedFieldsValuesDelegate();
 			this.field = field;
-			this.analyzer = LimitTokenOffsetAnalyzer.analyzer(
-					analyzer, LucenePlainSearchHighlighter.this.maxAnalyzedOffset );
+			this.analyzer = analyzer;
 
 			QueryScorer queryScorer = new QueryScorer( context.collectorExecutionContext().originalQuery(), field );
 			queryScorer.setExpandMultiTermQuery( true );

diff --git a/...bernate/search/backend/lucene/search/highlighter/impl/LuceneUnifiedSearchHighlighter.java b/...bernate/search/backend/lucene/search/highlighter/impl/LuceneUnifiedSearchHighlighter.java
@@ -16,9 +16,7 @@
 import java.util.Map;
 import java.util.Set;
 
-import org.hibernate.search.backend.lucene.analysis.impl.LimitTokenOffsetAnalyzer;
 import org.hibernate.search.backend.lucene.lowlevel.collector.impl.Values;
-import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexValueFieldTypeContext;
 import org.hibernate.search.backend.lucene.search.projection.impl.ProjectionExtractContext;
 import org.hibernate.search.engine.search.highlighter.dsl.HighlighterFragmenter;
 import org.hibernate.search.engine.search.highlighter.spi.BoundaryScannerType;
@@ -47,13 +45,13 @@ protected LuceneUnifiedSearchHighlighter(Builder builder) {
 
 	private LuceneUnifiedSearchHighlighter(Set<String> indexNames,
 			Character[] boundaryChars, Integer boundaryMaxScan, Integer fragmentSize, Integer noMatchSize,
-			Integer numberOfFragments, Boolean orderByScore, Integer maxAnalyzedOffset, List<String> preTags,
+			Integer numberOfFragments, Boolean orderByScore, List<String> preTags,
 			List<String> postTags, BoundaryScannerType boundaryScannerType, Locale boundaryScannerLocale,
 			HighlighterFragmenter fragmenterType,
 			Integer phraseLimit, Encoder encoder) {
 		super( indexNames, boundaryChars, boundaryMaxScan, fragmentSize, noMatchSize, numberOfFragments,
 				orderByScore,
-				maxAnalyzedOffset, preTags, postTags, boundaryScannerType, boundaryScannerLocale, fragmenterType,
+				preTags, postTags, boundaryScannerType, boundaryScannerLocale, fragmenterType,
 				phraseLimit, encoder
 		);
 
@@ -70,12 +68,12 @@ private LuceneUnifiedSearchHighlighter(Set<String> indexNames,
 	@Override
 	protected LuceneAbstractSearchHighlighter createHighlighterSameType(Set<String> indexNames,
 			Character[] boundaryChars, Integer boundaryMaxScan, Integer fragmentSize, Integer noMatchSize,
-			Integer numberOfFragments, Boolean orderByScore, Integer maxAnalyzedOffset, List<String> preTags,
+			Integer numberOfFragments, Boolean orderByScore, List<String> preTags,
 			List<String> postTags, BoundaryScannerType boundaryScannerType, Locale boundaryScannerLocale,
 			HighlighterFragmenter fragmenterType, Integer phraseLimit, Encoder encoder) {
 		return new LuceneUnifiedSearchHighlighter(
 				indexNames, boundaryChars, boundaryMaxScan, fragmentSize, noMatchSize, numberOfFragments,
-				orderByScore, maxAnalyzedOffset, preTags, postTags, boundaryScannerType, boundaryScannerLocale,
+				orderByScore, preTags, postTags, boundaryScannerType, boundaryScannerLocale,
 				fragmenterType, phraseLimit, encoder
 		);
 	}
@@ -98,12 +96,6 @@ public SearchHighlighterType type() {
 		return SearchHighlighterType.UNIFIED;
 	}
 
-	@Override
-	public void checkApplicability(LuceneSearchIndexValueFieldTypeContext<?> typeContext) {
-		if ( typeContext.hasTermVectorsConfigured() && this.maxAnalyzedOffset != null && this.maxAnalyzedOffset > 0 ) {
-			throw log.unifiedHighlighterMaxAnalyzedOffsetNotSupported();
-		}
-	}
 
 	private final class UnifiedHighlighterValues<A> extends HighlighterValues<A> {
 
@@ -124,10 +116,7 @@ private final class UnifiedHighlighterValues<A> extends HighlighterValues<A> {
 					LuceneUnifiedSearchHighlighter.this.encoder
 			);
 
-			this.highlighter = new UnifiedHighlighter(
-					context.collectorExecutionContext().getIndexSearcher(),
-					LimitTokenOffsetAnalyzer.analyzer( analyzer, LuceneUnifiedSearchHighlighter.this.maxAnalyzedOffset )
-			);
+			this.highlighter = new UnifiedHighlighter( context.collectorExecutionContext().getIndexSearcher(), analyzer );
 			highlighter.setFormatter( formatter );
 			highlighter.setBreakIterator( this::breakIterator );
 			highlighter.setMaxNoHighlightPassages( LuceneUnifiedSearchHighlighter.this.noMatchSize > 0 ? 1 : 0 );

diff --git a/...ibernate/search/backend/lucene/search/projection/impl/LuceneFieldHighlightProjection.java b/...ibernate/search/backend/lucene/search/projection/impl/LuceneFieldHighlightProjection.java
@@ -75,7 +75,6 @@ public FieldHighlightExtractor<?> request(ProjectionRequestContext context) {
 		if ( !typeContext.highlighterTypeSupported( highlighter.type() ) ) {
 			throw log.highlighterTypeNotSupported( highlighter.type(), absoluteFieldPath );
 		}
-		highlighter.checkApplicability( typeContext );
 		highlighter.request( context, absoluteFieldPath );
 
 		return new FieldHighlightExtractor<>( context.absoluteCurrentNestedFieldPath(), highlighter,