Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LUCENE-10115: Add an extension point for custom query parsers to determine the similarity distance for fuzzy queries #314

Merged
merged 2 commits into from Sep 21, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -810,23 +810,38 @@ Query handleBareTokenQuery(
return q;
}

Query handleBareFuzzy(String qfield, Token fuzzySlop, String termImage) throws ParseException {
Query q;
float fms = fuzzyMinSim;
/**
* Determines the similarity distance for the given fuzzy token and term string.
*
* <p>The default implementation uses the string image of the {@code fuzzyToken} in an attempt to
* parse it to a primitive float value. Otherwise, the {@linkplain #getFuzzyMinSim() minimal
* similarity} distance is returned. Subclasses can override this method to return a similarity
* distance, say based on the {@code termStr}, if the {@code fuzzyToken} does not specify a
* distance.
*
* @param fuzzyToken The Fuzzy token
* @param termStr The Term string
* @return The similarity distance
*/
protected float getFuzzyDistance(Token fuzzyToken, String termStr) {
try {
fms = Float.parseFloat(fuzzySlop.image.substring(1));
return Float.parseFloat(fuzzyToken.image.substring(1));
} catch (
@SuppressWarnings("unused")
Exception ignored) {
}
return fuzzyMinSim;
}

Query handleBareFuzzy(String qfield, Token fuzzySlop, String termImage) throws ParseException {
float fms = getFuzzyDistance(fuzzySlop, termImage);
if (fms < 0.0f) {
throw new ParseException(
"Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");
} else if (fms >= 1.0f && fms != (int) fms) {
throw new ParseException("Fractional edit distances are not allowed!");
}
q = getFuzzyQuery(qfield, termImage, fms);
return q;
return getFuzzyQuery(qfield, termImage, fms);
}

// extracted from the .jj grammar
Expand Down
Expand Up @@ -196,6 +196,34 @@ Query handleBareFuzzy(String qfield, Token fuzzySlop, String termImage)
assertEquals(qp.parse("a:[11.95 TO 12.95]"), qp.parse("12.45~1€"));
}

public void testFuzzyDistanceExtendability() throws ParseException {
QueryParser qp =
new QueryParser("a", new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
@Override
protected float getFuzzyDistance(Token fuzzySlop, String termStr) {
try {
return Float.parseFloat(fuzzySlop.image.substring(1));
} catch (
@SuppressWarnings("unused")
Exception ignored) {
}
return 1f; // alternative value to the default min similarity
}
};
assertEquals(qp.parse("term~"), qp.parse("term~1"));
assertEquals(qp.parse("term~XXX"), qp.parse("term~1"));

QueryParser qp2 =
new QueryParser("a", new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
@Override
protected float getFuzzyDistance(Token fuzzySlop, String termStr) {
return termStr.length(); // distance based on the term length
}
};
assertEquals(qp2.parse("a~"), qp2.parse("a~1"));
assertEquals(qp2.parse("ab~"), qp2.parse("ab~2"));
}

@Override
public void testStarParsing() throws Exception {
final int[] type = new int[1];
Expand Down