From e83e8ee1a42388606fffd10330ed1aeec9518098 Mon Sep 17 00:00:00 2001 From: Alessandro Benedetti Date: Fri, 1 Jun 2018 12:52:41 +0100 Subject: [PATCH 1/4] [LUCENE-8343] introduced weight 0 check and positional coefficient scaling + tests --- .../analyzing/BlendedInfixSuggester.java | 7 +- .../analyzing/BlendedInfixSuggesterTest.java | 79 +++++++++++++------ 2 files changed, 58 insertions(+), 28 deletions(-) diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java index 413d401b6a5b..dc65f7a8d1dc 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java @@ -224,8 +224,11 @@ protected List createResults(IndexSearcher searcher, TopFie } else { coefficient = createCoefficient(searcher, fd.doc, matchedTokens, prefixToken); } - - long score = (long) (weight * coefficient); + if (weight == 0) { + weight = 1; + } + long scaledCoefficient = (long) (coefficient * 10); + long score = weight * scaledCoefficient; LookupResult result; if (doHighlight) { diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java index ace44678957f..1e5a5da350f2 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java @@ -44,22 +44,44 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase { * of the matching term. */ public void testBlendedSort() throws IOException { - BytesRef payload = new BytesRef("star"); - Input keys[] = new Input[]{ new Input("star wars: episode v - the empire strikes back", 8, payload) }; + BlendedInfixSuggester suggester = getBlendedInfixSuggester(keys); - Path tempDir = createTempDir("BlendedInfixSuggesterTest"); + assertSuggestionsRanking(payload, suggester); + } - Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET); - BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a, a, - AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, - BlendedInfixSuggester.BlenderType.POSITION_LINEAR, - BlendedInfixSuggester.DEFAULT_NUM_FACTOR, false); - suggester.build(new InputArrayIterator(keys)); + /** + * Test to validate the suggestions ranking according to the position coefficient, + * even if the weight associated to the suggestion is unitary. + */ + public void testBlendedSort_fieldWeightUnitary_shouldRankSuggestionsByPositionMatch() throws IOException { + BytesRef payload = new BytesRef("star"); + Input keys[] = new Input[]{ + new Input("star wars: episode v - the empire strikes back", 1, payload) + }; + BlendedInfixSuggester suggester = getBlendedInfixSuggester(keys); + + assertSuggestionsRanking(payload, suggester); + } + /** + * Test to validate the suggestions ranking according to the position coefficient, + * even if the weight associated to the suggestion is zero. + */ + public void testBlendedSort_fieldWeightZero_shouldRankSuggestionsByPositionMatch() throws IOException { + BytesRef payload = new BytesRef("star"); + Input keys[] = new Input[]{ + new Input("star wars: episode v - the empire strikes back", 0, payload) + }; + BlendedInfixSuggester suggester = getBlendedInfixSuggester(keys); + + assertSuggestionsRanking(payload, suggester); + } + + private void assertSuggestionsRanking(BytesRef payload, BlendedInfixSuggester suggester) throws IOException { // we query for star wars and check that the weight // is smaller when we search for tokens that are far from the beginning @@ -78,6 +100,18 @@ public void testBlendedSort() throws IOException { suggester.close(); } + private BlendedInfixSuggester getBlendedInfixSuggester(Input[] keys) throws IOException { + Path tempDir = createTempDir("BlendedInfixSuggesterTest"); + + Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET); + BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a, a, + AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, + BlendedInfixSuggester.BlenderType.POSITION_LINEAR, + BlendedInfixSuggester.DEFAULT_NUM_FACTOR, false); + suggester.build(new InputArrayIterator(keys)); + return suggester; + } + /** * Verify the different flavours of the blender types */ @@ -97,9 +131,9 @@ public void testBlendingType() throws IOException { BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a); suggester.build(new InputArrayIterator(keys)); - assertEquals(w, getInResults(suggester, "top", pl, 1)); - assertEquals((int) (w * (1 - 0.10 * 2)), getInResults(suggester, "the", pl, 1)); - assertEquals((int) (w * (1 - 0.10 * 3)), getInResults(suggester, "lake", pl, 1)); + assertEquals(10 * w, getInResults(suggester, "top", pl, 1)); + assertEquals(w * (long) (10 * (1 - 0.10 * 2)), getInResults(suggester, "the", pl, 1)); + assertEquals(w * (long) (10 * (1 - 0.10 * 3)), getInResults(suggester, "lake", pl, 1)); suggester.close(); @@ -109,9 +143,9 @@ public void testBlendingType() throws IOException { BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1, false); suggester.build(new InputArrayIterator(keys)); - assertEquals(w, getInResults(suggester, "top", pl, 1)); - assertEquals((int) (w * 1 / (1 + 2)), getInResults(suggester, "the", pl, 1)); - assertEquals((int) (w * 1 / (1 + 3)), getInResults(suggester, "lake", pl, 1)); + assertEquals(10 * w, getInResults(suggester, "top", pl, 1)); + assertEquals(w * (long) (10 * 1 / (1 + 2)), getInResults(suggester, "the", pl, 1)); + assertEquals(w * (long) (10 * 1 / (1 + 3)), getInResults(suggester, "lake", pl, 1)); suggester.close(); // BlenderType.EXPONENTIAL_RECIPROCAL is using 1/(pow(1+p, exponent)) * w where w is weight and p the position of the word @@ -121,9 +155,9 @@ public void testBlendingType() throws IOException { suggester.build(new InputArrayIterator(keys)); - assertEquals(w, getInResults(suggester, "top", pl, 1)); - assertEquals((int) (w * 1 / (Math.pow(1 + 2, 4.0))), getInResults(suggester, "the", pl, 1)); - assertEquals((int) (w * 1 / (Math.pow(1 + 3, 4.0))), getInResults(suggester, "lake", pl, 1)); + assertEquals(10 * w, getInResults(suggester, "top", pl, 1)); + assertEquals(w * (long) (10 * 1 / (Math.pow(1 + 2, 4.0))), getInResults(suggester, "the", pl, 1)); + assertEquals(w * (long) (10 * 1 / (Math.pow(1 + 3, 4.0))), getInResults(suggester, "lake", pl, 1)); suggester.close(); } @@ -195,14 +229,7 @@ public void testNullPrefixToken() throws IOException { new Input("top of the lake", 8, payload) }; - Path tempDir = createTempDir("BlendedInfixSuggesterTest"); - - Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET); - BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a, a, - AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, - BlendedInfixSuggester.BlenderType.POSITION_LINEAR, - BlendedInfixSuggester.DEFAULT_NUM_FACTOR, false); - suggester.build(new InputArrayIterator(keys)); + BlendedInfixSuggester suggester = getBlendedInfixSuggester(keys); getInResults(suggester, "of ", payload, 1); getInResults(suggester, "the ", payload, 1); From 17cfa634798f96539c2535dca2e9a8f2cc0bff45 Mon Sep 17 00:00:00 2001 From: Alessandro Benedetti Date: Wed, 6 Jun 2018 19:42:08 +0100 Subject: [PATCH 2/4] [LUCENE-8343] documentation fix --- solr/solr-ref-guide/src/suggester.adoc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/solr/solr-ref-guide/src/suggester.adoc b/solr/solr-ref-guide/src/suggester.adoc index 0f9e12e040a2..7156deab2fa8 100644 --- a/solr/solr-ref-guide/src/suggester.adoc +++ b/solr/solr-ref-guide/src/suggester.adoc @@ -189,7 +189,7 @@ This implementation supports <>. ==== BlendedInfixLookupFactory -An extension of the `AnalyzingInfixSuggester` which provides additional functionality to weight prefix matches across the matched documents. You can tell it to score higher if a hit is closer to the start of the suggestion or vice versa. +An extension of the `AnalyzingInfixSuggester` which provides additional functionality to weight prefix matches across the matched documents. It scores higher if a hit is closer to the start of the suggestion. This implementation uses the following additional properties: @@ -198,9 +198,11 @@ Used to calculate weight coefficient using the position of the first matching wo `position_linear`::: `weightFieldValue * (1 - 0.10*position)`: Matches to the start will be given a higher score. This is the default. `position_reciprocal`::: -`weightFieldValue / (1 + position)`: Matches to the end will be given a higher score. +`weightFieldValue / (1 + position)`: Matches to the start will be given a score which decay faster than linear. +`position_exponential_reciprocal`::: +`weightFieldValue / pow(1 + position,exponent)`: Matches to the start will be given a score which decay faster than reciprocal. `exponent`:::: -An optional configuration variable for `position_reciprocal` to control how fast the score will increase or decrease. Default `2.0`. +An optional configuration variable for `position_reciprocal` to control how fast the score will decrease. Default `2.0`. `numFactor`:: The factor to multiply the number of searched elements from which results will be pruned. Default is `10`. From 2b636e8c3adb879f0cd2cff45824e226d747b5f0 Mon Sep 17 00:00:00 2001 From: Alessandro Benedetti Date: Thu, 7 Jun 2018 16:51:38 +0100 Subject: [PATCH 3/4] [LUCENE-8343] minor documentation fixes --- solr/solr-ref-guide/src/suggester.adoc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/solr/solr-ref-guide/src/suggester.adoc b/solr/solr-ref-guide/src/suggester.adoc index 7156deab2fa8..caf36a9fc1ba 100644 --- a/solr/solr-ref-guide/src/suggester.adoc +++ b/solr/solr-ref-guide/src/suggester.adoc @@ -198,11 +198,11 @@ Used to calculate weight coefficient using the position of the first matching wo `position_linear`::: `weightFieldValue * (1 - 0.10*position)`: Matches to the start will be given a higher score. This is the default. `position_reciprocal`::: -`weightFieldValue / (1 + position)`: Matches to the start will be given a score which decay faster than linear. +`weightFieldValue / (1 + position)`: Matches to the start will be given a higher score. The score of matches positioned far from the start of the suggestion decays faster than linear. `position_exponential_reciprocal`::: -`weightFieldValue / pow(1 + position,exponent)`: Matches to the start will be given a score which decay faster than reciprocal. +`weightFieldValue / pow(1 + position,exponent)`: Matches to the start will be given a higher score. The score of matches positioned far from the start of the suggestion decays faster than reciprocal. `exponent`:::: -An optional configuration variable for `position_reciprocal` to control how fast the score will decrease. Default `2.0`. +An optional configuration variable for `position_exponential_reciprocal` to control how fast the score will decrease. Default `2.0`. `numFactor`:: The factor to multiply the number of searched elements from which results will be pruned. Default is `10`. From e0232f104509f28126d9ce060663f87508366338 Mon Sep 17 00:00:00 2001 From: Alessandro Benedetti Date: Thu, 7 Jun 2018 18:57:30 +0100 Subject: [PATCH 4/4] [LUCENE-8343] weight long overflow fix + test --- .../analyzing/BlendedInfixSuggester.java | 6 ++-- .../analyzing/BlendedInfixSuggesterTest.java | 32 +++++++++++++------ 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java index dc65f7a8d1dc..63f432fb76a0 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java @@ -227,8 +227,10 @@ protected List createResults(IndexSearcher searcher, TopFie if (weight == 0) { weight = 1; } - long scaledCoefficient = (long) (coefficient * 10); - long score = weight * scaledCoefficient; + if (weight < 1 / LINEAR_COEF && weight > -1 / LINEAR_COEF) { + weight *= 1 / LINEAR_COEF; + } + long score = (long) (weight * coefficient); LookupResult result; if (doHighlight) { diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java index 1e5a5da350f2..296e40452d20 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java @@ -81,6 +81,20 @@ public void testBlendedSort_fieldWeightZero_shouldRankSuggestionsByPositionMatch assertSuggestionsRanking(payload, suggester); } + /** + * Test to validate the suggestions ranking according to the position coefficient, + * even if the weight associated to the suggestion is very big, no overflow should happen. + */ + public void testBlendedSort_fieldWeightLongMax_shouldRankSuggestionsByPositionMatchWithNoOverflow() throws IOException { + BytesRef payload = new BytesRef("star"); + Input keys[] = new Input[]{ + new Input("star wars: episode v - the empire strikes back", Long.MAX_VALUE, payload) + }; + BlendedInfixSuggester suggester = getBlendedInfixSuggester(keys); + + assertSuggestionsRanking(payload, suggester); + } + private void assertSuggestionsRanking(BytesRef payload, BlendedInfixSuggester suggester) throws IOException { // we query for star wars and check that the weight // is smaller when we search for tokens that are far from the beginning @@ -131,9 +145,9 @@ public void testBlendingType() throws IOException { BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a); suggester.build(new InputArrayIterator(keys)); - assertEquals(10 * w, getInResults(suggester, "top", pl, 1)); - assertEquals(w * (long) (10 * (1 - 0.10 * 2)), getInResults(suggester, "the", pl, 1)); - assertEquals(w * (long) (10 * (1 - 0.10 * 3)), getInResults(suggester, "lake", pl, 1)); + assertEquals(w, getInResults(suggester, "top", pl, 1)); + assertEquals((int) (w * (1 - 0.10 * 2)), getInResults(suggester, "the", pl, 1)); + assertEquals((int) (w * (1 - 0.10 * 3)), getInResults(suggester, "lake", pl, 1)); suggester.close(); @@ -143,9 +157,9 @@ public void testBlendingType() throws IOException { BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1, false); suggester.build(new InputArrayIterator(keys)); - assertEquals(10 * w, getInResults(suggester, "top", pl, 1)); - assertEquals(w * (long) (10 * 1 / (1 + 2)), getInResults(suggester, "the", pl, 1)); - assertEquals(w * (long) (10 * 1 / (1 + 3)), getInResults(suggester, "lake", pl, 1)); + assertEquals(w, getInResults(suggester, "top", pl, 1)); + assertEquals((int) (w * 1 / (1 + 2)), getInResults(suggester, "the", pl, 1)); + assertEquals((int) (w * 1 / (1 + 3)), getInResults(suggester, "lake", pl, 1)); suggester.close(); // BlenderType.EXPONENTIAL_RECIPROCAL is using 1/(pow(1+p, exponent)) * w where w is weight and p the position of the word @@ -155,9 +169,9 @@ public void testBlendingType() throws IOException { suggester.build(new InputArrayIterator(keys)); - assertEquals(10 * w, getInResults(suggester, "top", pl, 1)); - assertEquals(w * (long) (10 * 1 / (Math.pow(1 + 2, 4.0))), getInResults(suggester, "the", pl, 1)); - assertEquals(w * (long) (10 * 1 / (Math.pow(1 + 3, 4.0))), getInResults(suggester, "lake", pl, 1)); + assertEquals(w, getInResults(suggester, "top", pl, 1)); + assertEquals((int) (w * 1 / (Math.pow(1 + 2, 4.0))), getInResults(suggester, "the", pl, 1)); + assertEquals((int) (w * 1 / (Math.pow(1 + 3, 4.0))), getInResults(suggester, "lake", pl, 1)); suggester.close(); }