forked from elastic/elasticsearch
-
Notifications
You must be signed in to change notification settings - Fork 3
/
PercolatorFieldMapper.java
603 lines (542 loc) · 31.7 KB
/
PercolatorFieldMapper.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.percolator;
import org.apache.lucene.document.BinaryRange;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.CoveringQuery;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LongValuesSource;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.elasticsearch.Version;
import org.elasticsearch.action.support.PlainActionFuture;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.hash.MurmurHash3;
import org.elasticsearch.common.io.stream.OutputStreamStreamOutput;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentLocation;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.mapper.BinaryFieldMapper;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.FieldNamesFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.index.mapper.RangeFieldMapper;
import org.elasticsearch.index.mapper.RangeFieldMapper.RangeType;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.BoostingQueryBuilder;
import org.elasticsearch.index.query.ConstantScoreQueryBuilder;
import org.elasticsearch.index.query.DisMaxQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.index.query.QueryShardException;
import org.elasticsearch.index.query.Rewriteable;
import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.function.Supplier;
import static org.elasticsearch.index.query.AbstractQueryBuilder.parseInnerQueryBuilder;
public class PercolatorFieldMapper extends FieldMapper {
static final XContentType QUERY_BUILDER_CONTENT_TYPE = XContentType.SMILE;
@Deprecated
static final Setting<Boolean> INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING = Setting.boolSetting(
"index.percolator.map_unmapped_fields_as_string", false, Setting.Property.IndexScope, Setting.Property.Deprecated);
static final Setting<Boolean> INDEX_MAP_UNMAPPED_FIELDS_AS_TEXT_SETTING = Setting.boolSetting(
"index.percolator.map_unmapped_fields_as_text", false, Setting.Property.IndexScope);
static final String CONTENT_TYPE = "percolator";
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(PercolatorFieldMapper.class));
private static final FieldType FIELD_TYPE = new FieldType();
static final byte FIELD_VALUE_SEPARATOR = 0; // nul code point
static final String EXTRACTION_COMPLETE = "complete";
static final String EXTRACTION_PARTIAL = "partial";
static final String EXTRACTION_FAILED = "failed";
static final String EXTRACTED_TERMS_FIELD_NAME = "extracted_terms";
static final String EXTRACTION_RESULT_FIELD_NAME = "extraction_result";
static final String QUERY_BUILDER_FIELD_NAME = "query_builder_field";
static final String RANGE_FIELD_NAME = "range_field";
static final String MINIMUM_SHOULD_MATCH_FIELD_NAME = "minimum_should_match_field";
static class Builder extends FieldMapper.Builder<Builder, PercolatorFieldMapper> {
private final Supplier<QueryShardContext> queryShardContext;
Builder(String fieldName, Supplier<QueryShardContext> queryShardContext) {
super(fieldName, FIELD_TYPE, FIELD_TYPE);
this.queryShardContext = queryShardContext;
}
@Override
public PercolatorFieldMapper build(BuilderContext context) {
context.path().add(name());
FieldType fieldType = (FieldType) this.fieldType;
KeywordFieldMapper extractedTermsField = createExtractQueryFieldBuilder(EXTRACTED_TERMS_FIELD_NAME, context);
fieldType.queryTermsField = extractedTermsField.fieldType();
KeywordFieldMapper extractionResultField = createExtractQueryFieldBuilder(EXTRACTION_RESULT_FIELD_NAME, context);
fieldType.extractionResultField = extractionResultField.fieldType();
BinaryFieldMapper queryBuilderField = createQueryBuilderFieldBuilder(context);
fieldType.queryBuilderField = queryBuilderField.fieldType();
// Range field is of type ip, because that matches closest with BinaryRange field. Otherwise we would
// have to introduce a new field type...
RangeFieldMapper rangeFieldMapper = createExtractedRangeFieldBuilder(RANGE_FIELD_NAME, RangeType.IP, context);
fieldType.rangeField = rangeFieldMapper.fieldType();
NumberFieldMapper minimumShouldMatchFieldMapper = createMinimumShouldMatchField(context);
fieldType.minimumShouldMatchField = minimumShouldMatchFieldMapper.fieldType();
fieldType.mapUnmappedFieldsAsText = getMapUnmappedFieldAsText(context.indexSettings());
context.path().remove();
setupFieldType(context);
return new PercolatorFieldMapper(name(), fieldType, defaultFieldType, context.indexSettings(),
multiFieldsBuilder.build(this, context), copyTo, queryShardContext, extractedTermsField,
extractionResultField, queryBuilderField, rangeFieldMapper, minimumShouldMatchFieldMapper);
}
private static boolean getMapUnmappedFieldAsText(Settings indexSettings) {
if (INDEX_MAP_UNMAPPED_FIELDS_AS_TEXT_SETTING.exists(indexSettings) &&
INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING.exists(indexSettings)) {
throw new IllegalArgumentException("Either specify [" + INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING.getKey() +
"] or [" + INDEX_MAP_UNMAPPED_FIELDS_AS_TEXT_SETTING.getKey() + "] setting, not both");
}
if (INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING.exists(indexSettings)) {
DEPRECATION_LOGGER.deprecatedAndMaybeLog(INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING.getKey(),
"The [" + INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING.getKey() +
"] setting is deprecated in favour for the [" + INDEX_MAP_UNMAPPED_FIELDS_AS_TEXT_SETTING.getKey() + "] setting");
return INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING.get(indexSettings);
} else {
return INDEX_MAP_UNMAPPED_FIELDS_AS_TEXT_SETTING.get(indexSettings);
}
}
static KeywordFieldMapper createExtractQueryFieldBuilder(String name, BuilderContext context) {
KeywordFieldMapper.Builder queryMetaDataFieldBuilder = new KeywordFieldMapper.Builder(name);
queryMetaDataFieldBuilder.docValues(false);
queryMetaDataFieldBuilder.store(false);
queryMetaDataFieldBuilder.indexOptions(IndexOptions.DOCS);
return queryMetaDataFieldBuilder.build(context);
}
static BinaryFieldMapper createQueryBuilderFieldBuilder(BuilderContext context) {
BinaryFieldMapper.Builder builder = new BinaryFieldMapper.Builder(QUERY_BUILDER_FIELD_NAME);
builder.docValues(true);
builder.indexOptions(IndexOptions.NONE);
builder.store(false);
builder.fieldType().setDocValuesType(DocValuesType.BINARY);
return builder.build(context);
}
static RangeFieldMapper createExtractedRangeFieldBuilder(String name, RangeType rangeType, BuilderContext context) {
RangeFieldMapper.Builder builder = new RangeFieldMapper.Builder(name, rangeType, context.indexCreatedVersion());
// For now no doc values, because in processQuery(...) only the Lucene range fields get added:
builder.docValues(false);
return builder.build(context);
}
static NumberFieldMapper createMinimumShouldMatchField(BuilderContext context) {
NumberFieldMapper.Builder builder =
new NumberFieldMapper.Builder(MINIMUM_SHOULD_MATCH_FIELD_NAME, NumberFieldMapper.NumberType.INTEGER);
builder.index(false);
builder.store(false);
builder.docValues(true);
builder.fieldType().setDocValuesType(DocValuesType.NUMERIC);
return builder.build(context);
}
}
static class TypeParser implements FieldMapper.TypeParser {
@Override
public Builder parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
return new Builder(name, parserContext.queryShardContextSupplier());
}
}
static class FieldType extends MappedFieldType {
MappedFieldType queryTermsField;
MappedFieldType extractionResultField;
MappedFieldType queryBuilderField;
MappedFieldType minimumShouldMatchField;
RangeFieldMapper.RangeFieldType rangeField;
boolean mapUnmappedFieldsAsText;
FieldType() {
setIndexOptions(IndexOptions.NONE);
setDocValuesType(DocValuesType.NONE);
setStored(false);
}
FieldType(FieldType ref) {
super(ref);
queryTermsField = ref.queryTermsField;
extractionResultField = ref.extractionResultField;
queryBuilderField = ref.queryBuilderField;
rangeField = ref.rangeField;
minimumShouldMatchField = ref.minimumShouldMatchField;
mapUnmappedFieldsAsText = ref.mapUnmappedFieldsAsText;
}
@Override
public MappedFieldType clone() {
return new FieldType(this);
}
@Override
public String typeName() {
return CONTENT_TYPE;
}
@Override
public Query existsQuery(QueryShardContext context) {
if (hasDocValues()) {
return new DocValuesFieldExistsQuery(name());
} else {
return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name()));
}
}
@Override
public Query termQuery(Object value, QueryShardContext context) {
throw new QueryShardException(context, "Percolator fields are not searchable directly, use a percolate query instead");
}
Query percolateQuery(String name, PercolateQuery.QueryStore queryStore, List<BytesReference> documents,
IndexSearcher searcher, Version indexVersion) throws IOException {
IndexReader indexReader = searcher.getIndexReader();
Tuple<BooleanQuery, Boolean> t = createCandidateQuery(indexReader, indexVersion);
Query candidateQuery = t.v1();
boolean canUseMinimumShouldMatchField = t.v2();
Query verifiedMatchesQuery;
// We can only skip the MemoryIndex verification when percolating a single non nested document. We cannot
// skip MemoryIndex verification when percolating multiple documents, because when terms and
// ranges are extracted from IndexReader backed by a RamDirectory holding multiple documents we do
// not know to which document the terms belong too and for certain queries we incorrectly emit candidate
// matches as actual match.
if (canUseMinimumShouldMatchField && indexReader.maxDoc() == 1) {
verifiedMatchesQuery = new TermQuery(new Term(extractionResultField.name(), EXTRACTION_COMPLETE));
} else {
verifiedMatchesQuery = new MatchNoDocsQuery("multiple or nested docs or CoveringQuery could not be used");
}
return new PercolateQuery(name, queryStore, documents, candidateQuery, searcher, verifiedMatchesQuery);
}
Tuple<BooleanQuery, Boolean> createCandidateQuery(IndexReader indexReader, Version indexVersion) throws IOException {
Tuple<List<BytesRef>, Map<String, List<byte[]>>> t = extractTermsAndRanges(indexReader);
List<BytesRef> extractedTerms = t.v1();
Map<String, List<byte[]>> encodedPointValuesByField = t.v2();
// `1 + ` is needed to take into account the EXTRACTION_FAILED should clause
boolean canUseMinimumShouldMatchField = 1 + extractedTerms.size() + encodedPointValuesByField.size() <=
BooleanQuery.getMaxClauseCount();
List<Query> subQueries = new ArrayList<>();
for (Map.Entry<String, List<byte[]>> entry : encodedPointValuesByField.entrySet()) {
String rangeFieldName = entry.getKey();
List<byte[]> encodedPointValues = entry.getValue();
byte[] min = encodedPointValues.get(0);
byte[] max = encodedPointValues.get(1);
Query query = BinaryRange.newIntersectsQuery(rangeField.name(), encodeRange(rangeFieldName, min, max));
subQueries.add(query);
}
BooleanQuery.Builder candidateQuery = new BooleanQuery.Builder();
if (canUseMinimumShouldMatchField && indexVersion.onOrAfter(Version.V_6_1_0)) {
LongValuesSource valuesSource = LongValuesSource.fromIntField(minimumShouldMatchField.name());
for (BytesRef extractedTerm : extractedTerms) {
subQueries.add(new TermQuery(new Term(queryTermsField.name(), extractedTerm)));
}
candidateQuery.add(new CoveringQuery(subQueries, valuesSource), BooleanClause.Occur.SHOULD);
} else {
candidateQuery.add(new TermInSetQuery(queryTermsField.name(), extractedTerms), BooleanClause.Occur.SHOULD);
for (Query subQuery : subQueries) {
candidateQuery.add(subQuery, BooleanClause.Occur.SHOULD);
}
}
// include extractionResultField:failed, because docs with this term have no extractedTermsField
// and otherwise we would fail to return these docs. Docs that failed query term extraction
// always need to be verified by MemoryIndex:
candidateQuery.add(new TermQuery(new Term(extractionResultField.name(), EXTRACTION_FAILED)), BooleanClause.Occur.SHOULD);
return new Tuple<>(candidateQuery.build(), canUseMinimumShouldMatchField);
}
// This was extracted the method above, because otherwise it is difficult to test what terms are included in
// the query in case a CoveringQuery is used (it does not have a getter to retrieve the clauses)
Tuple<List<BytesRef>, Map<String, List<byte[]>>> extractTermsAndRanges(IndexReader indexReader) throws IOException {
List<BytesRef> extractedTerms = new ArrayList<>();
Map<String, List<byte[]>> encodedPointValuesByField = new HashMap<>();
LeafReader reader = indexReader.leaves().get(0).reader();
for (FieldInfo info : reader.getFieldInfos()) {
Terms terms = reader.terms(info.name);
if (terms != null) {
BytesRef fieldBr = new BytesRef(info.name);
TermsEnum tenum = terms.iterator();
for (BytesRef term = tenum.next(); term != null; term = tenum.next()) {
BytesRefBuilder builder = new BytesRefBuilder();
builder.append(fieldBr);
builder.append(FIELD_VALUE_SEPARATOR);
builder.append(term);
extractedTerms.add(builder.toBytesRef());
}
}
if (info.getPointIndexDimensionCount() == 1) { // not != 0 because range fields are not supported
PointValues values = reader.getPointValues(info.name);
List<byte[]> encodedPointValues = new ArrayList<>();
encodedPointValues.add(values.getMinPackedValue().clone());
encodedPointValues.add(values.getMaxPackedValue().clone());
encodedPointValuesByField.put(info.name, encodedPointValues);
}
}
return new Tuple<>(extractedTerms, encodedPointValuesByField);
}
}
private final Supplier<QueryShardContext> queryShardContext;
private KeywordFieldMapper queryTermsField;
private KeywordFieldMapper extractionResultField;
private BinaryFieldMapper queryBuilderField;
private NumberFieldMapper minimumShouldMatchFieldMapper;
private RangeFieldMapper rangeFieldMapper;
PercolatorFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
Settings indexSettings, MultiFields multiFields, CopyTo copyTo,
Supplier<QueryShardContext> queryShardContext,
KeywordFieldMapper queryTermsField, KeywordFieldMapper extractionResultField,
BinaryFieldMapper queryBuilderField, RangeFieldMapper rangeFieldMapper,
NumberFieldMapper minimumShouldMatchFieldMapper) {
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
this.queryShardContext = queryShardContext;
this.queryTermsField = queryTermsField;
this.extractionResultField = extractionResultField;
this.queryBuilderField = queryBuilderField;
this.minimumShouldMatchFieldMapper = minimumShouldMatchFieldMapper;
this.rangeFieldMapper = rangeFieldMapper;
}
@Override
public FieldMapper updateFieldType(Map<String, MappedFieldType> fullNameToFieldType) {
PercolatorFieldMapper updated = (PercolatorFieldMapper) super.updateFieldType(fullNameToFieldType);
KeywordFieldMapper queryTermsUpdated = (KeywordFieldMapper) queryTermsField.updateFieldType(fullNameToFieldType);
KeywordFieldMapper extractionResultUpdated = (KeywordFieldMapper) extractionResultField.updateFieldType(fullNameToFieldType);
BinaryFieldMapper queryBuilderUpdated = (BinaryFieldMapper) queryBuilderField.updateFieldType(fullNameToFieldType);
RangeFieldMapper rangeFieldMapperUpdated = (RangeFieldMapper) rangeFieldMapper.updateFieldType(fullNameToFieldType);
NumberFieldMapper msmFieldMapperUpdated = (NumberFieldMapper) minimumShouldMatchFieldMapper.updateFieldType(fullNameToFieldType);
if (updated == this && queryTermsUpdated == queryTermsField && extractionResultUpdated == extractionResultField
&& queryBuilderUpdated == queryBuilderField && rangeFieldMapperUpdated == rangeFieldMapper) {
return this;
}
if (updated == this) {
updated = (PercolatorFieldMapper) updated.clone();
}
updated.queryTermsField = queryTermsUpdated;
updated.extractionResultField = extractionResultUpdated;
updated.queryBuilderField = queryBuilderUpdated;
updated.rangeFieldMapper = rangeFieldMapperUpdated;
updated.minimumShouldMatchFieldMapper = msmFieldMapperUpdated;
return updated;
}
@Override
public void parse(ParseContext context) throws IOException {
QueryShardContext queryShardContext = this.queryShardContext.get();
if (context.doc().getField(queryBuilderField.name()) != null) {
// If a percolator query has been defined in an array object then multiple percolator queries
// could be provided. In order to prevent this we fail if we try to parse more than one query
// for the current document.
throw new IllegalArgumentException("a document can only contain one percolator query");
}
XContentParser parser = context.parser();
QueryBuilder queryBuilder = parseQueryBuilder(
parser, parser.getTokenLocation()
);
verifyQuery(queryBuilder);
// Fetching of terms, shapes and indexed scripts happen during this rewrite:
PlainActionFuture<QueryBuilder> future = new PlainActionFuture<>();
Rewriteable.rewriteAndFetch(queryBuilder, queryShardContext, future);
queryBuilder = future.actionGet();
Version indexVersion = context.mapperService().getIndexSettings().getIndexVersionCreated();
createQueryBuilderField(indexVersion, queryBuilderField, queryBuilder, context);
Query query = toQuery(queryShardContext, isMapUnmappedFieldAsText(), queryBuilder);
processQuery(query, context);
}
static void createQueryBuilderField(Version indexVersion, BinaryFieldMapper qbField,
QueryBuilder queryBuilder, ParseContext context) throws IOException {
if (indexVersion.onOrAfter(Version.V_6_0_0_beta2)) {
try (ByteArrayOutputStream stream = new ByteArrayOutputStream()) {
try (OutputStreamStreamOutput out = new OutputStreamStreamOutput(stream)) {
out.setVersion(indexVersion);
out.writeNamedWriteable(queryBuilder);
byte[] queryBuilderAsBytes = stream.toByteArray();
qbField.parse(context.createExternalValueContext(queryBuilderAsBytes));
}
}
} else {
try (XContentBuilder builder = XContentFactory.contentBuilder(QUERY_BUILDER_CONTENT_TYPE)) {
queryBuilder.toXContent(builder, new MapParams(Collections.emptyMap()));
builder.flush();
byte[] queryBuilderAsBytes = BytesReference.toBytes(BytesReference.bytes(builder));
context.doc().add(new Field(qbField.name(), queryBuilderAsBytes, qbField.fieldType()));
}
}
}
void processQuery(Query query, ParseContext context) {
ParseContext.Document doc = context.doc();
FieldType pft = (FieldType) this.fieldType();
QueryAnalyzer.Result result;
try {
Version indexVersion = context.mapperService().getIndexSettings().getIndexVersionCreated();
result = QueryAnalyzer.analyze(query, indexVersion);
} catch (QueryAnalyzer.UnsupportedQueryException e) {
doc.add(new Field(pft.extractionResultField.name(), EXTRACTION_FAILED, extractionResultField.fieldType()));
return;
}
for (QueryAnalyzer.QueryExtraction extraction : result.extractions) {
if (extraction.term != null) {
BytesRefBuilder builder = new BytesRefBuilder();
builder.append(new BytesRef(extraction.field()));
builder.append(FIELD_VALUE_SEPARATOR);
builder.append(extraction.bytes());
doc.add(new Field(queryTermsField.name(), builder.toBytesRef(), queryTermsField.fieldType()));
} else if (extraction.range != null) {
byte[] min = extraction.range.lowerPoint;
byte[] max = extraction.range.upperPoint;
doc.add(new BinaryRange(rangeFieldMapper.name(), encodeRange(extraction.range.fieldName, min, max)));
}
}
Version indexVersionCreated = context.mapperService().getIndexSettings().getIndexVersionCreated();
if (result.matchAllDocs) {
doc.add(new Field(extractionResultField.name(), EXTRACTION_FAILED, extractionResultField.fieldType()));
if (result.verified) {
doc.add(new Field(extractionResultField.name(), EXTRACTION_COMPLETE, extractionResultField.fieldType()));
}
} else if (result.verified) {
doc.add(new Field(extractionResultField.name(), EXTRACTION_COMPLETE, extractionResultField.fieldType()));
} else {
doc.add(new Field(extractionResultField.name(), EXTRACTION_PARTIAL, extractionResultField.fieldType()));
}
List<IndexableField> fields = new ArrayList<>(1);
createFieldNamesField(context, fields);
for (IndexableField field : fields) {
context.doc().add(field);
}
if (indexVersionCreated.onOrAfter(Version.V_6_1_0)) {
doc.add(new NumericDocValuesField(minimumShouldMatchFieldMapper.name(), result.minimumShouldMatch));
}
}
static Query parseQuery(QueryShardContext context, boolean mapUnmappedFieldsAsString, XContentParser parser) throws IOException {
return toQuery(context, mapUnmappedFieldsAsString, parseQueryBuilder(parser, parser.getTokenLocation()));
}
static Query toQuery(QueryShardContext context, boolean mapUnmappedFieldsAsString, QueryBuilder queryBuilder) throws IOException {
// This means that fields in the query need to exist in the mapping prior to registering this query
// The reason that this is required, is that if a field doesn't exist then the query assumes defaults, which may be undesired.
//
// Even worse when fields mentioned in percolator queries do go added to map after the queries have been registered
// then the percolator queries don't work as expected any more.
//
// Query parsing can't introduce new fields in mappings (which happens when registering a percolator query),
// because field type can't be inferred from queries (like document do) so the best option here is to disallow
// the usage of unmapped fields in percolator queries to avoid unexpected behaviour
//
// if index.percolator.map_unmapped_fields_as_string is set to true, query can contain unmapped fields which will be mapped
// as an analyzed string.
context.setAllowUnmappedFields(false);
context.setMapUnmappedFieldAsString(mapUnmappedFieldsAsString);
return queryBuilder.toQuery(context);
}
private static QueryBuilder parseQueryBuilder(XContentParser parser, XContentLocation location) {
try {
return parseInnerQueryBuilder(parser);
} catch (IOException e) {
throw new ParsingException(location, "Failed to parse", e);
}
}
@Override
public Iterator<Mapper> iterator() {
return Arrays.<Mapper>asList(
queryTermsField, extractionResultField, queryBuilderField, minimumShouldMatchFieldMapper, rangeFieldMapper
).iterator();
}
@Override
protected void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException {
throw new UnsupportedOperationException("should not be invoked");
}
@Override
protected String contentType() {
return CONTENT_TYPE;
}
boolean isMapUnmappedFieldAsText() {
return ((FieldType) fieldType).mapUnmappedFieldsAsText;
}
/**
* Fails if a percolator contains an unsupported query. The following queries are not supported:
* 1) a has_child query
* 2) a has_parent query
*/
static void verifyQuery(QueryBuilder queryBuilder) {
if (queryBuilder.getName().equals("has_child")) {
throw new IllegalArgumentException("the [has_child] query is unsupported inside a percolator query");
} else if (queryBuilder.getName().equals("has_parent")) {
throw new IllegalArgumentException("the [has_parent] query is unsupported inside a percolator query");
} else if (queryBuilder instanceof BoolQueryBuilder) {
BoolQueryBuilder boolQueryBuilder = (BoolQueryBuilder) queryBuilder;
List<QueryBuilder> clauses = new ArrayList<>();
clauses.addAll(boolQueryBuilder.filter());
clauses.addAll(boolQueryBuilder.must());
clauses.addAll(boolQueryBuilder.mustNot());
clauses.addAll(boolQueryBuilder.should());
for (QueryBuilder clause : clauses) {
verifyQuery(clause);
}
} else if (queryBuilder instanceof ConstantScoreQueryBuilder) {
verifyQuery(((ConstantScoreQueryBuilder) queryBuilder).innerQuery());
} else if (queryBuilder instanceof FunctionScoreQueryBuilder) {
verifyQuery(((FunctionScoreQueryBuilder) queryBuilder).query());
} else if (queryBuilder instanceof BoostingQueryBuilder) {
verifyQuery(((BoostingQueryBuilder) queryBuilder).negativeQuery());
verifyQuery(((BoostingQueryBuilder) queryBuilder).positiveQuery());
} else if (queryBuilder instanceof DisMaxQueryBuilder) {
DisMaxQueryBuilder disMaxQueryBuilder = (DisMaxQueryBuilder) queryBuilder;
for (QueryBuilder innerQueryBuilder : disMaxQueryBuilder.innerQueries()) {
verifyQuery(innerQueryBuilder);
}
}
}
static byte[] encodeRange(String rangeFieldName, byte[] minEncoded, byte[] maxEncoded) {
assert minEncoded.length == maxEncoded.length;
byte[] bytes = new byte[BinaryRange.BYTES * 2];
// First compute hash for field name and write the full hash into the byte array
BytesRef fieldAsBytesRef = new BytesRef(rangeFieldName);
MurmurHash3.Hash128 hash = new MurmurHash3.Hash128();
MurmurHash3.hash128(fieldAsBytesRef.bytes, fieldAsBytesRef.offset, fieldAsBytesRef.length, 0, hash);
ByteBuffer bb = ByteBuffer.wrap(bytes);
bb.putLong(hash.h1).putLong(hash.h2).putLong(hash.h1).putLong(hash.h2);
assert bb.position() == bb.limit();
// Secondly, overwrite the min and max encoded values in the byte array
// This way we are able to reuse as much as possible from the hash for any range type.
int offset = BinaryRange.BYTES - minEncoded.length;
System.arraycopy(minEncoded, 0, bytes, offset, minEncoded.length);
System.arraycopy(maxEncoded, 0, bytes, BinaryRange.BYTES + offset, maxEncoded.length);
return bytes;
}
}