From c665ed04b94f330fe27deec781cc1c59d45fddb5 Mon Sep 17 00:00:00 2001 From: javanna Date: Wed, 14 Nov 2018 10:51:48 +0100 Subject: [PATCH 01/17] Remove k1+1 constant factor from BM25 formula numerator --- .../org/apache/lucene/search/similarities/BM25Similarity.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java index e1d42421d3c4..626e7d0bf68a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java @@ -216,7 +216,7 @@ private static class BM25Scorer extends SimScorer { this.k1 = k1; this.b = b; this.cache = cache; - this.weight = (k1 + 1) * boost * idf.getValue().floatValue(); + this.weight = boost * idf.getValue().floatValue(); } @Override @@ -254,8 +254,6 @@ private Explanation explainTF(Explanation freq, long norm) { private List explainConstantFactors() { List subs = new ArrayList<>(); - // scale factor - subs.add(Explanation.match(k1 + 1, "scaling factor, k1 + 1")); // query boost if (boost != 1.0f) { subs.add(Explanation.match(boost, "boost")); From 1b3714410771b53d30f29c78f27c37c617dea85c Mon Sep 17 00:00:00 2001 From: javanna Date: Wed, 14 Nov 2018 12:15:39 +0100 Subject: [PATCH 02/17] adapt TestFunctionQuery --- .../apache/solr/search/function/TestFunctionQuery.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/search/function/TestFunctionQuery.java b/solr/core/src/test/org/apache/solr/search/function/TestFunctionQuery.java index cc448b3c4e3c..edce3bb301c7 100644 --- a/solr/core/src/test/org/apache/solr/search/function/TestFunctionQuery.java +++ b/solr/core/src/test/org/apache/solr/search/function/TestFunctionQuery.java @@ -389,16 +389,16 @@ public void testGeneral() throws Exception { // superman has a higher df (thus lower idf) in one segment, but reversed in the complete index String q ="{!func}query($qq)"; String fq="id:120"; - assertQ(req("fl","*,score","q", q, "qq","text:batman", "fq",fq), "//float[@name='score']<'1.0'"); - assertQ(req("fl","*,score","q", q, "qq","text:superman", "fq",fq), "//float[@name='score']>'1.0'"); + assertQ(req("fl","*,score","q", q, "qq","text:batman", "fq",fq), "//float[@name='score']<'0.6'"); + assertQ(req("fl","*,score","q", q, "qq","text:superman", "fq",fq), "//float[@name='score']>'0.6'"); // test weighting through a function range query - assertQ(req("fl","*,score", "fq",fq, "q", "{!frange l=1 u=10}query($qq)", "qq","text:superman"), "//*[@numFound='1']"); + assertQ(req("fl","*,score", "fq",fq, "q", "{!frange l=0.6 u=10}query($qq)", "qq","text:superman"), "//*[@numFound='1']"); // test weighting through a complex function q ="{!func}sub(div(sum(0.0,product(1,query($qq))),1),0)"; - assertQ(req("fl","*,score","q", q, "qq","text:batman", "fq",fq), "//float[@name='score']<'1.0'"); - assertQ(req("fl","*,score","q", q, "qq","text:superman", "fq",fq), "//float[@name='score']>'1.0'"); + assertQ(req("fl","*,score","q", q, "qq","text:batman", "fq",fq), "//float[@name='score']<'0.6'"); + assertQ(req("fl","*,score","q", q, "qq","text:superman", "fq",fq), "//float[@name='score']>'0.6'"); // test full param dereferencing From 7f56226906ededdc285877c429a00dae4f5a6d10 Mon Sep 17 00:00:00 2001 From: javanna Date: Wed, 14 Nov 2018 15:08:38 +0100 Subject: [PATCH 03/17] adapt TestPayloadScoreQParserPlugin --- .../org/apache/solr/search/TestPayloadScoreQParserPlugin.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/core/src/test/org/apache/solr/search/TestPayloadScoreQParserPlugin.java b/solr/core/src/test/org/apache/solr/search/TestPayloadScoreQParserPlugin.java index 9c9c50e0d43a..faccb4bdcb76 100644 --- a/solr/core/src/test/org/apache/solr/search/TestPayloadScoreQParserPlugin.java +++ b/solr/core/src/test/org/apache/solr/search/TestPayloadScoreQParserPlugin.java @@ -57,6 +57,6 @@ public void test() { // TODO: fix this includeSpanScore test to be less brittle - score result is score of "A" (via BM25) multipled by 1.0 (payload value) assertQ(req("fl","*,score", "q", "{!payload_score f=vals_dpf v=A func=min}"), "//float[@name='score']='1.0'"); - assertQ(req("fl","*,score", "q", "{!payload_score f=vals_dpf v=A func=min includeSpanScore=true}"), "//float[@name='score']='0.2876821'"); + assertQ(req("fl","*,score", "q", "{!payload_score f=vals_dpf v=A func=min includeSpanScore=true}"), "//float[@name='score']='0.13076457'"); } } From 870d1d79afef7800ec348fabbe7b14b1a11ae1c0 Mon Sep 17 00:00:00 2001 From: javanna Date: Wed, 14 Nov 2018 17:32:53 +0100 Subject: [PATCH 04/17] add migrate note --- lucene/MIGRATE.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt index 53187aa2d872..d2157296f252 100644 --- a/lucene/MIGRATE.txt +++ b/lucene/MIGRATE.txt @@ -146,7 +146,15 @@ use a TokenFilter chain as you would with any other Tokenizer. Both Highlighter and FastVectorHighlighter need a custom WeightedSpanTermExtractor or FieldQuery respectively in order to support ToParent/ToChildBlockJoinQuery. + ## MultiTermAwareComponent replaced by CharFilterFactory#normalize() and TokenFilterFactory#normalize() ## Normalization is now type-safe, with CharFilterFactory#normalize() returning a Reader and TokenFilterFactory#normalize() returning a TokenFilter. + +## k1+1 constant factor removed from BM25 similarity numerator + +Scores computed by the BM25 similarity are lower than previously as the k1+1 +constant factor was removed from the numerator of the scoring formula. +Ordering of results is preserved unless scores are computed from multiple +fields using different similarities. From 5f0a0b0fb0ec7ebad8094f7fa97a09c877d3a1cb Mon Sep 17 00:00:00 2001 From: javanna Date: Tue, 27 Nov 2018 22:15:12 +0100 Subject: [PATCH 05/17] add LegacyBM25Similarity --- lucene/MIGRATE.txt | 4 +- .../similarity/LegacyBM25Similarity.java | 86 ++++++++++++ .../similarity/TestLegacyBM25Similarity.java | 122 ++++++++++++++++++ 3 files changed, 210 insertions(+), 2 deletions(-) create mode 100644 lucene/misc/src/java/org/apache/lucene/search/similarity/LegacyBM25Similarity.java create mode 100644 lucene/misc/src/test/org/apache/lucene/search/similarity/TestLegacyBM25Similarity.java diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt index d2157296f252..79337db97bc8 100644 --- a/lucene/MIGRATE.txt +++ b/lucene/MIGRATE.txt @@ -146,7 +146,6 @@ use a TokenFilter chain as you would with any other Tokenizer. Both Highlighter and FastVectorHighlighter need a custom WeightedSpanTermExtractor or FieldQuery respectively in order to support ToParent/ToChildBlockJoinQuery. - ## MultiTermAwareComponent replaced by CharFilterFactory#normalize() and TokenFilterFactory#normalize() ## Normalization is now type-safe, with CharFilterFactory#normalize() returning a Reader and @@ -157,4 +156,5 @@ TokenFilterFactory#normalize() returning a TokenFilter. Scores computed by the BM25 similarity are lower than previously as the k1+1 constant factor was removed from the numerator of the scoring formula. Ordering of results is preserved unless scores are computed from multiple -fields using different similarities. +fields using different similarities. The previous behaviour is now exposed +through the LegacyBM25Similarity class. diff --git a/lucene/misc/src/java/org/apache/lucene/search/similarity/LegacyBM25Similarity.java b/lucene/misc/src/java/org/apache/lucene/search/similarity/LegacyBM25Similarity.java new file mode 100644 index 000000000000..6f83fef6c125 --- /dev/null +++ b/lucene/misc/src/java/org/apache/lucene/search/similarity/LegacyBM25Similarity.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.similarity; + +import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.search.CollectionStatistics; +import org.apache.lucene.search.TermStatistics; +import org.apache.lucene.search.similarities.BM25Similarity; +import org.apache.lucene.search.similarities.Similarity; + +/** + * Similarity that behaves like {@link BM25Similarity} while also applying + * the k1+1 factor to the numerator of the scoring formula + * + * @see BM25Similarity + */ +public final class LegacyBM25Similarity extends Similarity { + + private final BM25Similarity bm25Similarity; + + /** BM25 with these default values: + *
    + *
  • {@code k1 = 1.2}
  • + *
  • {@code b = 0.75}
  • + *
+ */ + public LegacyBM25Similarity() { + this.bm25Similarity = new BM25Similarity(); + } + + /** + * BM25 with the supplied parameter values. + * @param k1 Controls non-linear term frequency normalization (saturation). + * @param b Controls to what degree document length normalizes tf values. + * @throws IllegalArgumentException if {@code k1} is infinite or negative, or if {@code b} is + * not within the range {@code [0..1]} + */ + public LegacyBM25Similarity(float k1, float b) { + this.bm25Similarity = new BM25Similarity(k1, b); + } + + @Override + public long computeNorm(FieldInvertState state) { + return bm25Similarity.computeNorm(state); + } + + @Override + public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { + return bm25Similarity.scorer(boost * (1 + bm25Similarity.getK1()), collectionStats, termStats); + } + + /** + * Returns the k1 parameter + * @see #LegacyBM25Similarity(float, float) + */ + public final float getK1() { + return bm25Similarity.getK1(); + } + + /** + * Returns the b parameter + * @see #LegacyBM25Similarity(float, float) + */ + public final float getB() { + return bm25Similarity.getB(); + } + + @Override + public String toString() { + return bm25Similarity.toString(); + } +} diff --git a/lucene/misc/src/test/org/apache/lucene/search/similarity/TestLegacyBM25Similarity.java b/lucene/misc/src/test/org/apache/lucene/search/similarity/TestLegacyBM25Similarity.java new file mode 100644 index 000000000000..b3a0cd24ca13 --- /dev/null +++ b/lucene/misc/src/test/org/apache/lucene/search/similarity/TestLegacyBM25Similarity.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search.similarity; + +import java.util.Random; + +import org.apache.lucene.search.similarities.BM25Similarity; +import org.apache.lucene.search.similarities.BaseSimilarityTestCase; +import org.apache.lucene.search.similarities.Similarity; + +public class TestLegacyBM25Similarity extends BaseSimilarityTestCase { + + public void testIllegalK1() { + IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { + new LegacyBM25Similarity(Float.POSITIVE_INFINITY, 0.75f); + }); + assertTrue(expected.getMessage().contains("illegal k1 value")); + + expected = expectThrows(IllegalArgumentException.class, () -> { + new LegacyBM25Similarity(-1, 0.75f); + }); + assertTrue(expected.getMessage().contains("illegal k1 value")); + + expected = expectThrows(IllegalArgumentException.class, () -> { + new LegacyBM25Similarity(Float.NaN, 0.75f); + }); + assertTrue(expected.getMessage().contains("illegal k1 value")); + } + + public void testIllegalB() { + IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { + new LegacyBM25Similarity(1.2f, 2f); + }); + assertTrue(expected.getMessage().contains("illegal b value")); + + expected = expectThrows(IllegalArgumentException.class, () -> { + new LegacyBM25Similarity(1.2f, -1f); + }); + assertTrue(expected.getMessage().contains("illegal b value")); + + expected = expectThrows(IllegalArgumentException.class, () -> { + new LegacyBM25Similarity(1.2f, Float.POSITIVE_INFINITY); + }); + assertTrue(expected.getMessage().contains("illegal b value")); + + expected = expectThrows(IllegalArgumentException.class, () -> { + new LegacyBM25Similarity(1.2f, Float.NaN); + }); + assertTrue(expected.getMessage().contains("illegal b value")); + } + + public void testDefaults() { + LegacyBM25Similarity legacyBM25Similarity = new LegacyBM25Similarity(); + BM25Similarity bm25Similarity = new BM25Similarity(); + assertEquals(bm25Similarity.getB(), legacyBM25Similarity.getB(), 0f); + assertEquals(bm25Similarity.getK1(), legacyBM25Similarity.getK1(), 0f); + } + + public void testToString() { + LegacyBM25Similarity legacyBM25Similarity = new LegacyBM25Similarity(); + BM25Similarity bm25Similarity = new BM25Similarity(); + assertEquals(bm25Similarity.toString(), legacyBM25Similarity.toString()); + } + + @Override + protected Similarity getSimilarity(Random random) { + return new LegacyBM25Similarity(randomK1(random), randomB(random)); + } + + private static float randomK1(Random random) { + // term frequency normalization parameter k1 + switch (random.nextInt(4)) { + case 0: + // minimum value + return 0; + case 1: + // tiny value + return Float.MIN_VALUE; + case 2: + // maximum value + // upper bounds on individual term's score is 43.262806 * (k1 + 1) * boost + // we just limit the test to "reasonable" k1 values but don't enforce this anywhere. + return Integer.MAX_VALUE; + default: + // random value + return Integer.MAX_VALUE * random.nextFloat(); + } + } + + private static float randomB(Random random) { + // length normalization parameter b [0 .. 1] + switch (random.nextInt(4)) { + case 0: + // minimum value + return 0; + case 1: + // tiny value + return Float.MIN_VALUE; + case 2: + // maximum value + return 1; + default: + // random value + return random.nextFloat(); + } + } +} From ef4bdaa60a4461541e7f88e7af5463a034bdaccf Mon Sep 17 00:00:00 2001 From: javanna Date: Wed, 28 Nov 2018 19:03:03 +0100 Subject: [PATCH 06/17] add package.html --- .../lucene/search/similarity/package.html | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 lucene/misc/src/java/org/apache/lucene/search/similarity/package.html diff --git a/lucene/misc/src/java/org/apache/lucene/search/similarity/package.html b/lucene/misc/src/java/org/apache/lucene/search/similarity/package.html new file mode 100644 index 000000000000..7f624d41d54a --- /dev/null +++ b/lucene/misc/src/java/org/apache/lucene/search/similarity/package.html @@ -0,0 +1,22 @@ + + + + +Misc similarity implementations. + + \ No newline at end of file From 2bc343ec48db7bb29528e9bd6612abbc6bbef274 Mon Sep 17 00:00:00 2001 From: javanna Date: Thu, 29 Nov 2018 09:25:01 +0100 Subject: [PATCH 07/17] update score assertion --- .../org/apache/solr/ltr/feature/TestExternalFeatures.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java index 45e856a08ca3..0c97f0f7330a 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java @@ -70,7 +70,7 @@ public void testEfiInTransformerShouldNotChangeOrderOfRerankedResults() throws E query.add("rq", "{!ltr reRankDocs=10 model=externalmodel efi.user_query=w3 efi.userTitlePhrase1=w4 efi.userTitlePhrase2=w5}"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='3'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.7693934"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.34972426"); assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/score==0.0"); assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/score==0.0"); @@ -80,7 +80,7 @@ public void testEfiInTransformerShouldNotChangeOrderOfRerankedResults() throws E query.add("fl", "*,score,[fv efi.user_query=w2 efi.userTitlePhrase1=w4 efi.userTitlePhrase2=w5]"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='3'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.7693934"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.34972426"); assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/score==0.0"); assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/score==0.0"); } From 0f4db3f1261f02e3560aed568de285525538cb17 Mon Sep 17 00:00:00 2001 From: javanna Date: Thu, 29 Nov 2018 10:32:46 +0100 Subject: [PATCH 08/17] update migrate note --- lucene/MIGRATE.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt index 79337db97bc8..9648915aee95 100644 --- a/lucene/MIGRATE.txt +++ b/lucene/MIGRATE.txt @@ -151,7 +151,7 @@ in order to support ToParent/ToChildBlockJoinQuery. Normalization is now type-safe, with CharFilterFactory#normalize() returning a Reader and TokenFilterFactory#normalize() returning a TokenFilter. -## k1+1 constant factor removed from BM25 similarity numerator +## k1+1 constant factor removed from BM25 similarity numerator (LUCENE-8563) ## Scores computed by the BM25 similarity are lower than previously as the k1+1 constant factor was removed from the numerator of the scoring formula. From e7c5f48d9b12f8caef5d8dd76e976396317e236a Mon Sep 17 00:00:00 2001 From: javanna Date: Thu, 29 Nov 2018 10:33:35 +0100 Subject: [PATCH 09/17] update migrate note --- lucene/MIGRATE.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt index 9648915aee95..0b78d3c345a3 100644 --- a/lucene/MIGRATE.txt +++ b/lucene/MIGRATE.txt @@ -157,4 +157,4 @@ Scores computed by the BM25 similarity are lower than previously as the k1+1 constant factor was removed from the numerator of the scoring formula. Ordering of results is preserved unless scores are computed from multiple fields using different similarities. The previous behaviour is now exposed -through the LegacyBM25Similarity class. +by the LegacyBM25Similarity class which can be found in the lucene-misc jar. From 760d010bc0604e806f76e0131fb3c7256d493eef Mon Sep 17 00:00:00 2001 From: javanna Date: Thu, 29 Nov 2018 12:25:41 +0100 Subject: [PATCH 10/17] make solr use LegacyBM25Similarity --- .../lucene/search/similarity/LegacyBM25Similarity.java | 7 +++++++ .../solr/search/similarities/BM25SimilarityFactory.java | 7 ++++--- .../solr/search/similarities/SchemaSimilarityFactory.java | 6 +++--- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/lucene/misc/src/java/org/apache/lucene/search/similarity/LegacyBM25Similarity.java b/lucene/misc/src/java/org/apache/lucene/search/similarity/LegacyBM25Similarity.java index 6f83fef6c125..6581a304c4ca 100644 --- a/lucene/misc/src/java/org/apache/lucene/search/similarity/LegacyBM25Similarity.java +++ b/lucene/misc/src/java/org/apache/lucene/search/similarity/LegacyBM25Similarity.java @@ -79,6 +79,13 @@ public final float getB() { return bm25Similarity.getB(); } + /** Sets whether overlap tokens (Tokens with 0 position increment) are + * ignored when computing norm. By default this is true, meaning overlap + * tokens do not count when computing norms. */ + public void setDiscountOverlaps(boolean v) { + bm25Similarity.setDiscountOverlaps(v); + } + @Override public String toString() { return bm25Similarity.toString(); diff --git a/solr/core/src/java/org/apache/solr/search/similarities/BM25SimilarityFactory.java b/solr/core/src/java/org/apache/solr/search/similarities/BM25SimilarityFactory.java index ef8ffbd26546..fa67d7968750 100644 --- a/solr/core/src/java/org/apache/solr/search/similarities/BM25SimilarityFactory.java +++ b/solr/core/src/java/org/apache/solr/search/similarities/BM25SimilarityFactory.java @@ -18,11 +18,12 @@ import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.apache.solr.common.params.SolrParams; import org.apache.solr.schema.SimilarityFactory; /** - * Factory for {@link BM25Similarity} + * Factory for {@link LegacyBM25Similarity} *

* Parameters: *

    @@ -35,7 +36,7 @@ * Optional settings: *
      *
    • discountOverlaps (bool): Sets - * {@link BM25Similarity#setDiscountOverlaps(boolean)}
    • + * {@link LegacyBM25Similarity#setDiscountOverlaps(boolean)} *
    * @lucene.experimental */ @@ -54,7 +55,7 @@ public void init(SolrParams params) { @Override public Similarity getSimilarity() { - BM25Similarity sim = new BM25Similarity(k1, b); + LegacyBM25Similarity sim = new LegacyBM25Similarity(k1, b); sim.setDiscountOverlaps(discountOverlaps); return sim; } diff --git a/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java b/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java index 378197c8e4a2..6c3dedfb398c 100644 --- a/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java +++ b/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java @@ -16,10 +16,10 @@ */ package org.apache.solr.search.similarities; -import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.ClassicSimilarity; import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper; import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.apache.lucene.util.Version; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; @@ -40,7 +40,7 @@ *

    *
      *
    • luceneMatchVersion < 6.0 = {@link ClassicSimilarity}
    • - *
    • luceneMatchVersion >= 6.0 = {@link BM25Similarity}
    • + *
    • luceneMatchVersion >= 6.0 = {@link LegacyBM25Similarity}
    • *
    *

    * The defaultSimFromFieldType option accepts the name of any fieldtype, and uses @@ -109,7 +109,7 @@ public Similarity getSimilarity() { Similarity defaultSim = null; if (null == defaultSimFromFieldType) { // nothing configured, choose a sensible implicit default... - defaultSim = new BM25Similarity(); + defaultSim = new LegacyBM25Similarity(); } else { FieldType defSimFT = core.getLatestSchema().getFieldTypeByName(defaultSimFromFieldType); if (null == defSimFT) { From 8bbda1806b5512c1c1056be7d171f822417a0d05 Mon Sep 17 00:00:00 2001 From: javanna Date: Thu, 29 Nov 2018 12:26:13 +0100 Subject: [PATCH 11/17] Revert "adapt TestFunctionQuery" This reverts commit 1b3714410771b53d30f29c78f27c37c617dea85c. --- .../apache/solr/search/function/TestFunctionQuery.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/search/function/TestFunctionQuery.java b/solr/core/src/test/org/apache/solr/search/function/TestFunctionQuery.java index edce3bb301c7..cc448b3c4e3c 100644 --- a/solr/core/src/test/org/apache/solr/search/function/TestFunctionQuery.java +++ b/solr/core/src/test/org/apache/solr/search/function/TestFunctionQuery.java @@ -389,16 +389,16 @@ public void testGeneral() throws Exception { // superman has a higher df (thus lower idf) in one segment, but reversed in the complete index String q ="{!func}query($qq)"; String fq="id:120"; - assertQ(req("fl","*,score","q", q, "qq","text:batman", "fq",fq), "//float[@name='score']<'0.6'"); - assertQ(req("fl","*,score","q", q, "qq","text:superman", "fq",fq), "//float[@name='score']>'0.6'"); + assertQ(req("fl","*,score","q", q, "qq","text:batman", "fq",fq), "//float[@name='score']<'1.0'"); + assertQ(req("fl","*,score","q", q, "qq","text:superman", "fq",fq), "//float[@name='score']>'1.0'"); // test weighting through a function range query - assertQ(req("fl","*,score", "fq",fq, "q", "{!frange l=0.6 u=10}query($qq)", "qq","text:superman"), "//*[@numFound='1']"); + assertQ(req("fl","*,score", "fq",fq, "q", "{!frange l=1 u=10}query($qq)", "qq","text:superman"), "//*[@numFound='1']"); // test weighting through a complex function q ="{!func}sub(div(sum(0.0,product(1,query($qq))),1),0)"; - assertQ(req("fl","*,score","q", q, "qq","text:batman", "fq",fq), "//float[@name='score']<'0.6'"); - assertQ(req("fl","*,score","q", q, "qq","text:superman", "fq",fq), "//float[@name='score']>'0.6'"); + assertQ(req("fl","*,score","q", q, "qq","text:batman", "fq",fq), "//float[@name='score']<'1.0'"); + assertQ(req("fl","*,score","q", q, "qq","text:superman", "fq",fq), "//float[@name='score']>'1.0'"); // test full param dereferencing From 3284e1f2deadd9064aa0753994768f44144ee484 Mon Sep 17 00:00:00 2001 From: javanna Date: Thu, 29 Nov 2018 12:26:27 +0100 Subject: [PATCH 12/17] Revert "adapt TestPayloadScoreQParserPlugin" This reverts commit 7f56226906ededdc285877c429a00dae4f5a6d10. --- .../org/apache/solr/search/TestPayloadScoreQParserPlugin.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/core/src/test/org/apache/solr/search/TestPayloadScoreQParserPlugin.java b/solr/core/src/test/org/apache/solr/search/TestPayloadScoreQParserPlugin.java index faccb4bdcb76..9c9c50e0d43a 100644 --- a/solr/core/src/test/org/apache/solr/search/TestPayloadScoreQParserPlugin.java +++ b/solr/core/src/test/org/apache/solr/search/TestPayloadScoreQParserPlugin.java @@ -57,6 +57,6 @@ public void test() { // TODO: fix this includeSpanScore test to be less brittle - score result is score of "A" (via BM25) multipled by 1.0 (payload value) assertQ(req("fl","*,score", "q", "{!payload_score f=vals_dpf v=A func=min}"), "//float[@name='score']='1.0'"); - assertQ(req("fl","*,score", "q", "{!payload_score f=vals_dpf v=A func=min includeSpanScore=true}"), "//float[@name='score']='0.13076457'"); + assertQ(req("fl","*,score", "q", "{!payload_score f=vals_dpf v=A func=min includeSpanScore=true}"), "//float[@name='score']='0.2876821'"); } } From 670b4e417c592b56d143aa47ef417f53d7aed93c Mon Sep 17 00:00:00 2001 From: javanna Date: Thu, 29 Nov 2018 12:26:43 +0100 Subject: [PATCH 13/17] Revert "update score assertion" This reverts commit 2bc343ec48db7bb29528e9bd6612abbc6bbef274. --- .../org/apache/solr/ltr/feature/TestExternalFeatures.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java index 0c97f0f7330a..45e856a08ca3 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java @@ -70,7 +70,7 @@ public void testEfiInTransformerShouldNotChangeOrderOfRerankedResults() throws E query.add("rq", "{!ltr reRankDocs=10 model=externalmodel efi.user_query=w3 efi.userTitlePhrase1=w4 efi.userTitlePhrase2=w5}"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='3'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.34972426"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.7693934"); assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/score==0.0"); assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/score==0.0"); @@ -80,7 +80,7 @@ public void testEfiInTransformerShouldNotChangeOrderOfRerankedResults() throws E query.add("fl", "*,score,[fv efi.user_query=w2 efi.userTitlePhrase1=w4 efi.userTitlePhrase2=w5]"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='3'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.34972426"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.7693934"); assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/score==0.0"); assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/score==0.0"); } From 2e5fc9f4f96d5e976c49610bcc5e420620e2e967 Mon Sep 17 00:00:00 2001 From: javanna Date: Thu, 29 Nov 2018 12:30:25 +0100 Subject: [PATCH 14/17] deprecate LegacyBM25Similarity --- .../apache/lucene/search/similarity/LegacyBM25Similarity.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lucene/misc/src/java/org/apache/lucene/search/similarity/LegacyBM25Similarity.java b/lucene/misc/src/java/org/apache/lucene/search/similarity/LegacyBM25Similarity.java index 6581a304c4ca..58091a7c9727 100644 --- a/lucene/misc/src/java/org/apache/lucene/search/similarity/LegacyBM25Similarity.java +++ b/lucene/misc/src/java/org/apache/lucene/search/similarity/LegacyBM25Similarity.java @@ -27,7 +27,10 @@ * the k1+1 factor to the numerator of the scoring formula * * @see BM25Similarity + * + * @deprecated {@link BM25Similarity} should be used instead */ +@Deprecated public final class LegacyBM25Similarity extends Similarity { private final BM25Similarity bm25Similarity; From d400a65df2fc44f8a647c4f5d837cf2b4583e126 Mon Sep 17 00:00:00 2001 From: javanna Date: Thu, 29 Nov 2018 13:10:35 +0100 Subject: [PATCH 15/17] remove unused import --- .../apache/solr/search/similarities/BM25SimilarityFactory.java | 1 - 1 file changed, 1 deletion(-) diff --git a/solr/core/src/java/org/apache/solr/search/similarities/BM25SimilarityFactory.java b/solr/core/src/java/org/apache/solr/search/similarities/BM25SimilarityFactory.java index fa67d7968750..fd8a48cb7d6c 100644 --- a/solr/core/src/java/org/apache/solr/search/similarities/BM25SimilarityFactory.java +++ b/solr/core/src/java/org/apache/solr/search/similarities/BM25SimilarityFactory.java @@ -16,7 +16,6 @@ */ package org.apache.solr.search.similarities; -import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.apache.solr.common.params.SolrParams; From acd8c38353d9dcfc7483f411a8d87f028589b302 Mon Sep 17 00:00:00 2001 From: javanna Date: Thu, 29 Nov 2018 14:21:12 +0100 Subject: [PATCH 16/17] adapt solr tests --- .../org/apache/solr/rest/schema/TestBulkSchemaAPI.java | 5 +++-- .../search/similarities/TestBM25SimilarityFactory.java | 7 ++++--- .../similarities/TestNonDefinedSimilarityFactory.java | 3 ++- .../solr/search/similarities/TestPerFieldSimilarity.java | 7 ++++--- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java b/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java index 5d1dab1982b3..1ef730516704 100644 --- a/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java +++ b/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java @@ -24,6 +24,7 @@ import org.apache.lucene.misc.SweetSpotSimilarity; import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.core.SolrCore; import org.apache.solr.core.CoreContainer; @@ -591,7 +592,7 @@ public void testMultipleCommands() throws Exception{ assertNotNull("field a5 not created", m); assertEquals("myWhitespaceTxtField", m.get("type")); assertNull(m.get("uninvertible")); // inherited, but API shouldn't return w/o explicit showDefaults - assertFieldSimilarity("a5", BM25Similarity.class); // unspecified, expect default + assertFieldSimilarity("a5", LegacyBM25Similarity.class); // unspecified, expect default m = getObj(harness, "wdf_nocase", "fields"); assertNull("field 'wdf_nocase' not deleted", m); @@ -933,7 +934,7 @@ public void testSimilarityParser() throws Exception { Map fields = getObj(harness, fieldName, "fields"); assertNotNull("field " + fieldName + " not created", fields); - assertFieldSimilarity(fieldName, BM25Similarity.class, + assertFieldSimilarity(fieldName, LegacyBM25Similarity.class, sim -> assertEquals("Unexpected k1", k1, sim.getK1(), .001), sim -> assertEquals("Unexpected b", b, sim.getB(), .001)); diff --git a/solr/core/src/test/org/apache/solr/search/similarities/TestBM25SimilarityFactory.java b/solr/core/src/test/org/apache/solr/search/similarities/TestBM25SimilarityFactory.java index 3f6deacec426..3aad54e3a939 100644 --- a/solr/core/src/test/org/apache/solr/search/similarities/TestBM25SimilarityFactory.java +++ b/solr/core/src/test/org/apache/solr/search/similarities/TestBM25SimilarityFactory.java @@ -18,6 +18,7 @@ import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.junit.BeforeClass; /** @@ -31,14 +32,14 @@ public static void beforeClass() throws Exception { /** bm25 with default parameters */ public void test() throws Exception { - assertEquals(BM25Similarity.class, getSimilarity("text").getClass()); + assertEquals(LegacyBM25Similarity.class, getSimilarity("text").getClass()); } /** bm25 with parameters */ public void testParameters() throws Exception { Similarity sim = getSimilarity("text_params"); - assertEquals(BM25Similarity.class, sim.getClass()); - BM25Similarity bm25 = (BM25Similarity) sim; + assertEquals(LegacyBM25Similarity.class, sim.getClass()); + LegacyBM25Similarity bm25 = (LegacyBM25Similarity) sim; assertEquals(1.2f, bm25.getK1(), 0.01f); assertEquals(0.76f, bm25.getB(), 0.01f); } diff --git a/solr/core/src/test/org/apache/solr/search/similarities/TestNonDefinedSimilarityFactory.java b/solr/core/src/test/org/apache/solr/search/similarities/TestNonDefinedSimilarityFactory.java index 7460652366b1..5d840e215b75 100644 --- a/solr/core/src/test/org/apache/solr/search/similarities/TestNonDefinedSimilarityFactory.java +++ b/solr/core/src/test/org/apache/solr/search/similarities/TestNonDefinedSimilarityFactory.java @@ -17,6 +17,7 @@ package org.apache.solr.search.similarities; import org.apache.lucene.search.similarities.BM25Similarity; +import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.junit.After; /** @@ -36,7 +37,7 @@ public void cleanup() throws Exception { public void testCurrentBM25() throws Exception { // no sys prop set, rely on LATEST initCore("solrconfig-basic.xml","schema-tiny.xml"); - BM25Similarity sim = getSimilarity("text", BM25Similarity.class); + LegacyBM25Similarity sim = getSimilarity("text", LegacyBM25Similarity.class); assertEquals(0.75F, sim.getB(), 0.0F); } } diff --git a/solr/core/src/test/org/apache/solr/search/similarities/TestPerFieldSimilarity.java b/solr/core/src/test/org/apache/solr/search/similarities/TestPerFieldSimilarity.java index 58fe6eff2f0a..df7a154a5e96 100644 --- a/solr/core/src/test/org/apache/solr/search/similarities/TestPerFieldSimilarity.java +++ b/solr/core/src/test/org/apache/solr/search/similarities/TestPerFieldSimilarity.java @@ -19,6 +19,7 @@ import org.apache.lucene.misc.SweetSpotSimilarity; import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.junit.BeforeClass; /** @@ -58,18 +59,18 @@ public void testFactoryDynamic() throws Exception { /** test a field where no similarity is specified */ public void testDefaults() throws Exception { Similarity sim = getSimilarity("sim3text"); - assertEquals(BM25Similarity.class, sim.getClass());; + assertEquals(LegacyBM25Similarity.class, sim.getClass());; } /** ... and for a dynamic field */ public void testDefaultsDynamic() throws Exception { Similarity sim = getSimilarity("text_sim3"); - assertEquals(BM25Similarity.class, sim.getClass()); + assertEquals(LegacyBM25Similarity.class, sim.getClass()); } /** test a field that does not exist */ public void testNonexistent() throws Exception { Similarity sim = getSimilarity("sdfdsfdsfdswr5fsdfdsfdsfs"); - assertEquals(BM25Similarity.class, sim.getClass()); + assertEquals(LegacyBM25Similarity.class, sim.getClass()); } } From 1e62a7352d6b6b87866f3176c8f8971a24fc04fc Mon Sep 17 00:00:00 2001 From: javanna Date: Thu, 29 Nov 2018 14:46:01 +0100 Subject: [PATCH 17/17] remove unused imports --- .../solr/rest/schema/TestBulkSchemaAPI.java | 30 ++++++++----------- .../TestBM25SimilarityFactory.java | 1 - .../TestNonDefinedSimilarityFactory.java | 1 - .../similarities/TestPerFieldSimilarity.java | 1 - 4 files changed, 13 insertions(+), 20 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java b/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java index 1ef730516704..9a72043913eb 100644 --- a/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java +++ b/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java @@ -16,24 +16,31 @@ */ package org.apache.solr.rest.schema; -import org.apache.commons.io.FileUtils; +import java.io.File; +import java.io.StringReader; +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Consumer; +import org.apache.commons.io.FileUtils; +import org.apache.lucene.misc.SweetSpotSimilarity; import org.apache.lucene.search.similarities.DFISimilarity; import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper; -import org.apache.lucene.search.similarities.BM25Similarity; -import org.apache.lucene.misc.SweetSpotSimilarity; import org.apache.lucene.search.similarities.Similarity; - import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.apache.solr.common.SolrDocumentList; -import org.apache.solr.core.SolrCore; import org.apache.solr.core.CoreContainer; +import org.apache.solr.core.SolrCore; import org.apache.solr.schema.SimilarityFactory; import org.apache.solr.search.similarities.SchemaSimilarityFactory; import org.apache.solr.util.RESTfulServerProvider; import org.apache.solr.util.RestTestBase; import org.apache.solr.util.RestTestHarness; - import org.junit.After; import org.junit.Before; import org.noggit.JSONParser; @@ -41,17 +48,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.File; -import java.io.StringReader; -import java.lang.invoke.MethodHandles; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.function.Consumer; - public class TestBulkSchemaAPI extends RestTestBase { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); diff --git a/solr/core/src/test/org/apache/solr/search/similarities/TestBM25SimilarityFactory.java b/solr/core/src/test/org/apache/solr/search/similarities/TestBM25SimilarityFactory.java index 3aad54e3a939..6445b3469f0d 100644 --- a/solr/core/src/test/org/apache/solr/search/similarities/TestBM25SimilarityFactory.java +++ b/solr/core/src/test/org/apache/solr/search/similarities/TestBM25SimilarityFactory.java @@ -16,7 +16,6 @@ */ package org.apache.solr.search.similarities; -import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.junit.BeforeClass; diff --git a/solr/core/src/test/org/apache/solr/search/similarities/TestNonDefinedSimilarityFactory.java b/solr/core/src/test/org/apache/solr/search/similarities/TestNonDefinedSimilarityFactory.java index 5d840e215b75..9fe33b7638ea 100644 --- a/solr/core/src/test/org/apache/solr/search/similarities/TestNonDefinedSimilarityFactory.java +++ b/solr/core/src/test/org/apache/solr/search/similarities/TestNonDefinedSimilarityFactory.java @@ -16,7 +16,6 @@ */ package org.apache.solr.search.similarities; -import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.junit.After; diff --git a/solr/core/src/test/org/apache/solr/search/similarities/TestPerFieldSimilarity.java b/solr/core/src/test/org/apache/solr/search/similarities/TestPerFieldSimilarity.java index df7a154a5e96..a27837bb8a35 100644 --- a/solr/core/src/test/org/apache/solr/search/similarities/TestPerFieldSimilarity.java +++ b/solr/core/src/test/org/apache/solr/search/similarities/TestPerFieldSimilarity.java @@ -17,7 +17,6 @@ package org.apache.solr.search.similarities; import org.apache.lucene.misc.SweetSpotSimilarity; -import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.junit.BeforeClass;