/
FieldLengthFeature.java
156 lines (129 loc) · 4.76 KB
/
FieldLengthFeature.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.ltr.feature;
import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.Map;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.SmallFloat;
import org.apache.solr.request.SolrQueryRequest;
/**
* This feature returns the length of a field (in terms) for the current document.
* Example configuration:
* <pre>{
"name": "titleLength",
"class": "org.apache.solr.ltr.feature.FieldLengthFeature",
"params": {
"field": "title"
}
}</pre>
* Note: since this feature relies on norms values that are stored in a single byte
* the value of the feature could have a lightly different value.
* (see also {@link org.apache.lucene.search.similarities.ClassicSimilarity})
**/
public class FieldLengthFeature extends Feature {
private String field;
public String getField() {
return field;
}
public void setField(String field) {
this.field = field;
}
@Override
public LinkedHashMap<String,Object> paramsToMap() {
final LinkedHashMap<String,Object> params = defaultParamsToMap();
params.put("field", field);
return params;
}
@Override
protected void validate() throws FeatureException {
if (field == null || field.isEmpty()) {
throw new FeatureException(getClass().getSimpleName()+
": field must be provided");
}
}
/** Cache of decoded bytes. */
private static final float[] NORM_TABLE = new float[256];
static {
NORM_TABLE[0] = 0;
for (int i = 1; i < 256; i++) {
NORM_TABLE[i] = SmallFloat.byte4ToInt((byte) i);
}
}
/**
* Decodes the norm value, assuming it is a single byte.
*
*/
private final float decodeNorm(long norm) {
return NORM_TABLE[(int) (norm & 0xFF)]; // & 0xFF maps negative bytes to
// positive above 127
}
public FieldLengthFeature(String name, Map<String,Object> params) {
super(name, params);
}
@Override
public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores,
SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi)
throws IOException {
return new FieldLengthFeatureWeight(searcher, request, originalQuery, efi);
}
public class FieldLengthFeatureWeight extends FeatureWeight {
public FieldLengthFeatureWeight(IndexSearcher searcher,
SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) {
super(FieldLengthFeature.this, searcher, request, originalQuery, efi);
}
@Override
public FeatureScorer scorer(LeafReaderContext context) throws IOException {
NumericDocValues norms = context.reader().getNormValues(field);
if (norms == null){
return new ValueFeatureScorer(this, 0f,
DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
}
return new FieldLengthFeatureScorer(this, norms);
}
public class FieldLengthFeatureScorer extends FeatureScorer {
NumericDocValues norms = null;
public FieldLengthFeatureScorer(FeatureWeight weight,
NumericDocValues norms) throws IOException {
super(weight, norms);
this.norms = norms;
// In the constructor, docId is -1, so using 0 as default lookup
final IndexableField idxF = searcher.doc(0).getField(field);
if (idxF.fieldType().omitNorms()) {
throw new IOException(
"FieldLengthFeatures can't be used if omitNorms is enabled (field="
+ field + ")");
}
}
@Override
public float score() throws IOException {
final long l = norms.longValue();
final float numTerms = decodeNorm(l);
return numTerms;
}
@Override
public float getMaxScore(int upTo) throws IOException {
return Float.POSITIVE_INFINITY;
}
}
}
}