Skip to content

Commit

Permalink
HSEARCH-3844 Make sure the analyzer/normalizer is always non-null in …
Browse files Browse the repository at this point in the history
…the Lucene backend

So as to avoid NPE.
  • Loading branch information
yrodiere authored and fax4ever committed Feb 28, 2020
1 parent ac17ca5 commit 66b47c7
Show file tree
Hide file tree
Showing 9 changed files with 62 additions and 55 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ private Analyzer buildAnalyzer() {
return overrideAnalyzer;
}
if ( fields.size() == 1 ) {
return fields.values().iterator().next().getAnalyzer();
return fields.values().iterator().next().getAnalyzerOrNormalizer();
}

/*
Expand All @@ -134,7 +134,7 @@ private Analyzer buildAnalyzer() {
*/
ScopedAnalyzer.Builder builder = new ScopedAnalyzer.Builder();
for ( Map.Entry<String, LuceneSimpleQueryStringPredicateBuilderFieldState> entry : fields.entrySet() ) {
builder.setAnalyzer( entry.getKey(), entry.getValue().getAnalyzer() );
builder.setAnalyzer( entry.getKey(), entry.getValue().getAnalyzerOrNormalizer() );
}
return builder.build();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import java.util.Objects;

import org.hibernate.search.backend.lucene.document.impl.LuceneDocumentBuilder;
import org.hibernate.search.backend.lucene.lowlevel.common.impl.AnalyzerConstants;
import org.hibernate.search.backend.lucene.lowlevel.common.impl.MetadataFields;

import org.apache.lucene.analysis.Analyzer;
Expand Down Expand Up @@ -120,7 +121,8 @@ public BytesRef normalize(String absoluteFieldPath, String value) {
if ( value == null ) {
return null;
}
if ( analyzerOrNormalizer == null ) {
if ( analyzerOrNormalizer == AnalyzerConstants.KEYWORD_ANALYZER ) {
// Optimization when analysis is disabled
return new BytesRef( value );
}
return analyzerOrNormalizer.normalize( absoluteFieldPath, value );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.apache.lucene.index.IndexOptions;
import org.hibernate.search.backend.lucene.analysis.model.impl.LuceneAnalysisDefinitionRegistry;
import org.hibernate.search.backend.lucene.logging.impl.Log;
import org.hibernate.search.backend.lucene.lowlevel.common.impl.AnalyzerConstants;
import org.hibernate.search.backend.lucene.types.aggregation.impl.LuceneTextFieldAggregationBuilderFactory;
import org.hibernate.search.backend.lucene.types.codec.impl.LuceneStringFieldCodec;
import org.hibernate.search.backend.lucene.types.impl.LuceneIndexFieldType;
Expand Down Expand Up @@ -132,14 +133,18 @@ else if ( searchAnalyzer != null ) {
}

Analyzer analyzerOrNormalizer = analyzer != null ? analyzer : normalizer;
if ( analyzerOrNormalizer == null ) {
analyzerOrNormalizer = AnalyzerConstants.KEYWORD_ANALYZER;
}

DslConverter<?, ? extends String> dslConverter = createDslConverter();
DslConverter<String, ? extends String> rawDslConverter = createRawDslConverter();
ProjectionConverter<? super String, ?> projectionConverter = createProjectionConverter();
ProjectionConverter<? super String, String> rawProjectionConverter = createRawProjectionConverter();
LuceneStringFieldCodec codec = new LuceneStringFieldCodec(
resolvedSearchable, resolvedSortable, resolvedAggregable,
getFieldType( resolvedProjectable, resolvedSearchable, analyzer != null, resolvedNorms, resolvedTermVector ), indexNullAsValue,
getFieldType( resolvedProjectable, resolvedSearchable, analyzer != null, resolvedNorms, resolvedTermVector ),
indexNullAsValue,
analyzerOrNormalizer
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,7 @@ public LuceneIndexSchemaFieldNode<F> addField(LuceneIndexSchemaNodeCollector col
collector.collectFacetConfig( schemaNode.getAbsoluteFieldPath(), multiValued );
}

if ( analyzerOrNormalizer != null ) {
collector.collectAnalyzer( schemaNode.getAbsoluteFieldPath(), analyzerOrNormalizer );
}
collector.collectAnalyzer( schemaNode.getAbsoluteFieldPath(), analyzerOrNormalizer );

return schemaNode;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@
public final class LuceneSimpleQueryStringPredicateBuilderFieldState
implements SimpleQueryStringPredicateBuilder.FieldState, FieldContextSimpleQueryParser.FieldContext {

private final Analyzer analyzer;
private final Analyzer analyzerOrNormalizer;
private Float boost;

LuceneSimpleQueryStringPredicateBuilderFieldState(Analyzer analyzer) {
this.analyzer = analyzer;
LuceneSimpleQueryStringPredicateBuilderFieldState(Analyzer analyzerOrNormalizer) {
this.analyzerOrNormalizer = analyzerOrNormalizer;
}

@Override
Expand All @@ -38,7 +38,7 @@ public Query wrap(Query query) {
}
}

public Analyzer getAnalyzer() {
return analyzer;
public Analyzer getAnalyzerOrNormalizer() {
return analyzerOrNormalizer;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class LuceneTextMatchPredicateBuilder<F>
private Integer maxEditDistance;
private Integer prefixLength;

private Analyzer analyzer;
private Analyzer analyzerOrNormalizer;
private boolean analyzerOverridden = false;

LuceneTextMatchPredicateBuilder(
Expand All @@ -50,7 +50,7 @@ class LuceneTextMatchPredicateBuilder<F>
LuceneCompatibilityChecker converterChecker, LuceneTextFieldCodec<F> codec,
Analyzer analyzerOrNormalizer, LuceneCompatibilityChecker analyzerChecker) {
super( searchContext, absoluteFieldPath, converter, rawConverter, converterChecker, codec );
this.analyzer = analyzerOrNormalizer;
this.analyzerOrNormalizer = analyzerOrNormalizer;
this.analyzerChecker = analyzerChecker;
this.analysisDefinitionRegistry = searchContext.getAnalysisDefinitionRegistry();
}
Expand All @@ -63,16 +63,16 @@ public void fuzzy(int maxEditDistance, int exactPrefixLength) {

@Override
public void analyzer(String analyzerName) {
this.analyzer = analysisDefinitionRegistry.getAnalyzerDefinition( analyzerName );
if ( analyzer == null ) {
this.analyzerOrNormalizer = analysisDefinitionRegistry.getAnalyzerDefinition( analyzerName );
if ( analyzerOrNormalizer == null ) {
throw log.unknownAnalyzer( analyzerName, EventContexts.fromIndexFieldAbsolutePath( absoluteFieldPath ) );
}
this.analyzerOverridden = true;
}

@Override
public void skipAnalysis() {
this.analyzer = AnalyzerConstants.KEYWORD_ANALYZER;
this.analyzerOrNormalizer = AnalyzerConstants.KEYWORD_ANALYZER;
this.analyzerOverridden = true;
}

Expand All @@ -84,35 +84,33 @@ protected Query doBuild(LuceneSearchPredicateContext context) {
analyzerChecker.failIfNotCompatible();
}

if ( analyzer != null ) {
QueryBuilder effectiveQueryBuilder;
if ( analyzerOrNormalizer == AnalyzerConstants.KEYWORD_ANALYZER ) {
// Optimization when analysis is disabled
Term term = new Term( absoluteFieldPath, value );

if ( maxEditDistance != null ) {
effectiveQueryBuilder = new FuzzyQueryBuilder( analyzer, maxEditDistance, prefixLength );
return new FuzzyQuery( term, maxEditDistance, prefixLength );
}
else {
effectiveQueryBuilder = new QueryBuilder( analyzer );
return new TermQuery( term );
}
}

Query analyzed = effectiveQueryBuilder.createBooleanQuery( absoluteFieldPath, value );
if ( analyzed == null ) {
// Either the value was an empty string
// or the analysis removed all tokens (that can happen if the value contained only stopwords, for example)
// In any case, use the same behavior as Elasticsearch: don't match anything
analyzed = new MatchNoDocsQuery( "No tokens after analysis of the value to match" );
}
return analyzed;
QueryBuilder effectiveQueryBuilder;
if ( maxEditDistance != null ) {
effectiveQueryBuilder = new FuzzyQueryBuilder( analyzerOrNormalizer, maxEditDistance, prefixLength );
}
else {
// we are in the case where we a have a normalizer here as the analyzer case has already been treated by
// the queryBuilder case above
Term term = new Term( absoluteFieldPath, codec.normalize( absoluteFieldPath, value ) );
effectiveQueryBuilder = new QueryBuilder( analyzerOrNormalizer );
}

if ( maxEditDistance != null ) {
return new FuzzyQuery( term, maxEditDistance, prefixLength );
}
else {
return new TermQuery( term );
}
Query analyzed = effectiveQueryBuilder.createBooleanQuery( absoluteFieldPath, value );
if ( analyzed == null ) {
// Either the value was an empty string
// or the analysis removed all tokens (that can happen if the value contained only stopwords, for example)
// In any case, use the same behavior as Elasticsearch: don't match anything
analyzed = new MatchNoDocsQuery( "No tokens after analysis of the value to match" );
}
return analyzed;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -89,21 +89,18 @@ protected Query doBuild(LuceneSearchPredicateContext context) {
analyzerChecker.failIfNotCompatible();
}

if ( analyzer != null ) {
Query analyzed = new QueryBuilder( analyzer ).createPhraseQuery( absoluteFieldPath, phrase, slop );
if ( analyzed == null ) {
// Either the value was an empty string
// or the analysis removed all tokens (that can happen if the value contained only stopwords, for example)
// In any case, use the same behavior as Elasticsearch: don't match anything
analyzed = new MatchNoDocsQuery( "No tokens after analysis of the phrase to match" );
}
return analyzed;
if ( analyzer == AnalyzerConstants.KEYWORD_ANALYZER ) {
// Optimization when analysis is disabled
return new TermQuery( new Term( absoluteFieldPath, phrase ) );
}
else {
// we are in the case where we a have a normalizer here as the analyzer case has already been treated by
// the queryBuilder case above

return new TermQuery( new Term( absoluteFieldPath, codec.normalize( absoluteFieldPath, phrase ) ) );
Query analyzed = new QueryBuilder( analyzer ).createPhraseQuery( absoluteFieldPath, phrase, slop );
if ( analyzed == null ) {
// Either the value was an empty string
// or the analysis removed all tokens (that can happen if the value contained only stopwords, for example)
// In any case, use the same behavior as Elasticsearch: don't match anything
analyzed = new MatchNoDocsQuery( "No tokens after analysis of the phrase to match" );
}
return analyzed;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ class LuceneTextWildcardPredicateBuilder extends AbstractLuceneSearchPredicateBu

protected final String absoluteFieldPath;

private final Analyzer analyzer;
private final Analyzer analyzerOrNormalizer;

private String pattern;

LuceneTextWildcardPredicateBuilder(String absoluteFieldPath, Analyzer analyzer) {
LuceneTextWildcardPredicateBuilder(String absoluteFieldPath, Analyzer analyzerOrNormalizer) {
this.absoluteFieldPath = absoluteFieldPath;
this.analyzer = analyzer;
this.analyzerOrNormalizer = analyzerOrNormalizer;
}

@Override
Expand All @@ -39,7 +39,7 @@ public void pattern(String wildcardPattern) {

@Override
protected Query doBuild(LuceneSearchPredicateContext context) {
BytesRef analyzedWildcard = LuceneWildcardExpressionHelper.analyzeWildcard( analyzer, absoluteFieldPath, pattern );
BytesRef analyzedWildcard = LuceneWildcardExpressionHelper.analyzeWildcard( analyzerOrNormalizer, absoluteFieldPath, pattern );
return new WildcardQuery( new Term( absoluteFieldPath, analyzedWildcard ) );
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.hibernate.search.backend.lucene.lowlevel.common.impl.AnalyzerConstants;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
Expand All @@ -27,6 +29,11 @@ private LuceneWildcardExpressionHelper() {
}

public static BytesRef analyzeWildcard(Analyzer analyzer, String field, String termStr) {
if ( analyzer == AnalyzerConstants.KEYWORD_ANALYZER ) {
// Optimization when analysis is disabled
return new BytesRef( termStr );
}

// best effort to not pass the wildcard characters and escaped characters through #normalize
Matcher wildcardMatcher = WILDCARD_PATTERN.matcher( termStr );
BytesRefBuilder sb = new BytesRefBuilder();
Expand Down

0 comments on commit 66b47c7

Please sign in to comment.