-
Notifications
You must be signed in to change notification settings - Fork 624
/
Weight.cs
213 lines (198 loc) · 10.2 KB
/
Weight.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
using System;
using System.IO;
namespace Lucene.Net.Search
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext;
using IBits = Lucene.Net.Util.IBits;
/// <summary>
/// Expert: Calculate query weights and build query scorers.
/// <para/>
/// The purpose of <see cref="Weight"/> is to ensure searching does not modify a
/// <see cref="Search.Query"/>, so that a <see cref="Search.Query"/> instance can be reused.
/// <para/>
/// <see cref="IndexSearcher"/> dependent state of the query should reside in the
/// <see cref="Weight"/>.
/// <para/>
/// <see cref="Index.AtomicReader"/> dependent state should reside in the <see cref="Scorer"/>.
/// <para/>
/// Since <see cref="Weight"/> creates <see cref="Scorer"/> instances for a given
/// <see cref="AtomicReaderContext"/> (<see cref="GetScorer(AtomicReaderContext, IBits)"/>)
/// callers must maintain the relationship between the searcher's top-level
/// <see cref="Index.IndexReaderContext"/> and the context used to create a <see cref="Scorer"/>.
/// <para/>
/// A <see cref="Weight"/> is used in the following way:
/// <list type="number">
/// <item><description>A <see cref="Weight"/> is constructed by a top-level query, given a
/// <see cref="IndexSearcher"/> (<see cref="Query.CreateWeight(IndexSearcher)"/>).</description></item>
/// <item><description>The <see cref="GetValueForNormalization()"/> method is called on the
/// <see cref="Weight"/> to compute the query normalization factor
/// <see cref="Similarities.Similarity.QueryNorm(float)"/> of the query clauses contained in the
/// query.</description></item>
/// <item><description>The query normalization factor is passed to <see cref="Normalize(float, float)"/>. At
/// this point the weighting is complete.</description></item>
/// <item><description>A <see cref="Scorer"/> is constructed by
/// <see cref="GetScorer(AtomicReaderContext, IBits)"/>.</description></item>
/// </list>
/// <para/>
/// @since 2.9
/// </summary>
public abstract class Weight
{
/// <summary>
/// An explanation of the score computation for the named document.
/// </summary>
/// <param name="context"> The readers context to create the <see cref="Explanation"/> for. </param>
/// <param name="doc"> The document's id relative to the given context's reader </param>
/// <returns> An <see cref="Explanation"/> for the score </returns>
/// <exception cref="IOException"> if an <see cref="IOException"/> occurs </exception>
public abstract Explanation Explain(AtomicReaderContext context, int doc);
/// <summary>
/// The query that this concerns. </summary>
public abstract Query Query { get; }
/// <summary>
/// The value for normalization of contained query clauses (e.g. sum of squared weights). </summary>
public abstract float GetValueForNormalization();
/// <summary>
/// Assigns the query normalization factor and boost from parent queries to this. </summary>
public abstract void Normalize(float norm, float topLevelBoost);
/// <summary>
/// Returns a <see cref="Scorer"/> which scores documents in/out-of order according
/// to <c>scoreDocsInOrder</c>.
/// <para/>
/// <b>NOTE:</b> even if <c>scoreDocsInOrder</c> is <c>false</c>, it is
/// recommended to check whether the returned <see cref="Scorer"/> indeed scores
/// documents out of order (i.e., call <see cref="ScoresDocsOutOfOrder"/>), as
/// some <see cref="Scorer"/> implementations will always return documents
/// in-order.
/// <para/>
/// <b>NOTE:</b> <c>null</c> can be returned if no documents will be scored by this
/// query.
/// </summary>
/// <param name="context">
/// The <see cref="AtomicReaderContext"/> for which to return the <see cref="Scorer"/>. </param>
/// <param name="acceptDocs">
/// <see cref="IBits"/> that represent the allowable docs to match (typically deleted docs
/// but possibly filtering other documents)
/// </param>
/// <returns> A <see cref="Scorer"/> which scores documents in/out-of order. </returns>
/// <exception cref="IOException"> if there is a low-level I/O error </exception>
public abstract Scorer GetScorer(AtomicReaderContext context, IBits acceptDocs);
/// <summary>
/// Optional method, to return a <see cref="BulkScorer"/> to
/// score the query and send hits to a <see cref="ICollector"/>.
/// Only queries that have a different top-level approach
/// need to override this; the default implementation
/// pulls a normal <see cref="Scorer"/> and iterates and
/// collects the resulting hits.
/// </summary>
/// <param name="context">
/// The <see cref="AtomicReaderContext"/> for which to return the <see cref="Scorer"/>. </param>
/// <param name="scoreDocsInOrder">
/// Specifies whether in-order scoring of documents is required. Note
/// that if set to <c>false</c> (i.e., out-of-order scoring is required),
/// this method can return whatever scoring mode it supports, as every
/// in-order scorer is also an out-of-order one. However, an
/// out-of-order scorer may not support <see cref="DocIdSetIterator.NextDoc()"/>
/// and/or <see cref="DocIdSetIterator.Advance(int)"/>, therefore it is recommended to
/// request an in-order scorer if use of these
/// methods is required. </param>
/// <param name="acceptDocs">
/// <see cref="IBits"/> that represent the allowable docs to match (typically deleted docs
/// but possibly filtering other documents)
/// </param>
/// <returns> A <see cref="BulkScorer"/> which scores documents and
/// passes them to a collector. </returns>
/// <exception cref="IOException"> if there is a low-level I/O error </exception>
public virtual BulkScorer GetBulkScorer(AtomicReaderContext context, bool scoreDocsInOrder, IBits acceptDocs)
{
Scorer scorer = GetScorer(context, acceptDocs);
if (scorer == null)
{
// No docs match
return null;
}
// this impl always scores docs in order, so we can
// ignore scoreDocsInOrder:
return new DefaultBulkScorer(scorer);
}
/// <summary>
/// Just wraps a <see cref="Scorer"/> and performs top scoring using it. </summary>
internal class DefaultBulkScorer : BulkScorer
{
internal readonly Scorer scorer;
public DefaultBulkScorer(Scorer scorer)
{
this.scorer = scorer ?? throw new NullReferenceException();
}
public override bool Score(ICollector collector, int max)
{
// TODO: this may be sort of weird, when we are
// embedded in a BooleanScorer, because we are
// called for every chunk of 2048 documents. But,
// then, scorer is a FakeScorer in that case, so any
// Collector doing something "interesting" in
// setScorer will be forced to use BS2 anyways:
collector.SetScorer(scorer);
if (max == DocIdSetIterator.NO_MORE_DOCS)
{
ScoreAll(collector, scorer);
return false;
}
else
{
int doc = scorer.DocID;
if (doc < 0)
{
doc = scorer.NextDoc();
}
return ScoreRange(collector, scorer, doc, max);
}
}
internal static bool ScoreRange(ICollector collector, Scorer scorer, int currentDoc, int end)
{
while (currentDoc < end)
{
collector.Collect(currentDoc);
currentDoc = scorer.NextDoc();
}
return currentDoc != DocIdSetIterator.NO_MORE_DOCS;
}
internal static void ScoreAll(ICollector collector, Scorer scorer)
{
int doc;
while ((doc = scorer.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
{
collector.Collect(doc);
}
}
}
/// <summary>
/// Returns true if this implementation scores docs only out of order. This
/// method is used in conjunction with <see cref="ICollector"/>'s
/// <see cref="ICollector.AcceptsDocsOutOfOrder"/> and
/// <see cref="GetBulkScorer(AtomicReaderContext, bool, IBits)"/> to
/// create a matching <see cref="Scorer"/> instance for a given <see cref="ICollector"/>, or
/// vice versa.
/// <para/>
/// <b>NOTE:</b> the default implementation returns <c>false</c>, i.e.
/// the <see cref="Scorer"/> scores documents in-order.
/// </summary>
public virtual bool ScoresDocsOutOfOrder => false;
}
}