Skip to content

Commit

Permalink
Add index mapping parameter for counted_keyword (#103646)
Browse files Browse the repository at this point in the history
With this commit we add a new mapping parameter `index` to the
`counted_keyword` mapping type. This allows to reduce disk usage for use
cases where indexed fields are not required.

Relates #101826
  • Loading branch information
danielmitterdorfer committed Dec 22, 2023
1 parent de502db commit 53296e2
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 12 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/103646.yaml
@@ -0,0 +1,5 @@
pr: 103646
summary: Add index mapping parameter for `counted_keyword`
area: Aggregations
type: enhancement
issues: []
Expand Up @@ -78,16 +78,24 @@ public class CountedKeywordFieldMapper extends FieldMapper {
public static final String CONTENT_TYPE = "counted_keyword";
public static final String COUNT_FIELD_NAME_SUFFIX = "_count";

public static final FieldType FIELD_TYPE;
private static final FieldType FIELD_TYPE_INDEXED;
private static final FieldType FIELD_TYPE_NOT_INDEXED;

static {
FieldType ft = new FieldType();
ft.setDocValuesType(DocValuesType.SORTED_SET);
ft.setTokenized(false);
ft.setOmitNorms(true);
ft.setIndexOptions(IndexOptions.DOCS);
ft.freeze();
FIELD_TYPE = freezeAndDeduplicateFieldType(ft);
FieldType indexed = new FieldType();
indexed.setDocValuesType(DocValuesType.SORTED_SET);
indexed.setTokenized(false);
indexed.setOmitNorms(true);
indexed.setIndexOptions(IndexOptions.DOCS);
FIELD_TYPE_INDEXED = freezeAndDeduplicateFieldType(indexed);

FieldType notIndexed = new FieldType();
notIndexed.setDocValuesType(DocValuesType.SORTED_SET);
notIndexed.setTokenized(false);
notIndexed.setOmitNorms(true);
notIndexed.setIndexOptions(IndexOptions.NONE);
FIELD_TYPE_NOT_INDEXED = freezeAndDeduplicateFieldType(notIndexed);

}

private static class CountedKeywordFieldType extends StringFieldType {
Expand Down Expand Up @@ -261,7 +269,12 @@ public TermsEnum termsEnum() throws IOException {
}
}

private static CountedKeywordFieldMapper toType(FieldMapper in) {
return (CountedKeywordFieldMapper) in;
}

public static class Builder extends FieldMapper.Builder {
private final Parameter<Boolean> indexed = Parameter.indexParam(m -> toType(m).mappedFieldType.isIndexed(), true);
private final Parameter<Map<String, String>> meta = Parameter.metaParam();

protected Builder(String name) {
Expand All @@ -270,22 +283,24 @@ protected Builder(String name) {

@Override
protected Parameter<?>[] getParameters() {
return new Parameter<?>[] { meta };
return new Parameter<?>[] { meta, indexed };
}

@Override
public FieldMapper build(MapperBuilderContext context) {

BinaryFieldMapper countFieldMapper = new BinaryFieldMapper.Builder(name + COUNT_FIELD_NAME_SUFFIX, true).build(context);
boolean isIndexed = indexed.getValue();
FieldType ft = isIndexed ? FIELD_TYPE_INDEXED : FIELD_TYPE_NOT_INDEXED;
return new CountedKeywordFieldMapper(
name,
FIELD_TYPE,
ft,
new CountedKeywordFieldType(
context.buildFullName(name),
true,
isIndexed,
false,
true,
new TextSearchInfo(FIELD_TYPE, null, KEYWORD_ANALYZER, KEYWORD_ANALYZER),
new TextSearchInfo(ft, null, KEYWORD_ANALYZER, KEYWORD_ANALYZER),
meta.getValue(),
countFieldMapper.fieldType()
),
Expand Down
Expand Up @@ -7,6 +7,8 @@

package org.elasticsearch.xpack.countedkeyword;

import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
Expand Down Expand Up @@ -82,4 +84,15 @@ public void testDottedFieldNames() throws IOException {
List<IndexableField> fields = doc.rootDoc().getFields("dotted.field");
assertEquals(1, fields.size());
}

public void testDisableIndex() throws IOException {
DocumentMapper mapper = createDocumentMapper(
fieldMapping(b -> b.field("type", CountedKeywordFieldMapper.CONTENT_TYPE).field("index", false))
);
ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234")));
List<IndexableField> fields = doc.rootDoc().getFields("field");
assertEquals(1, fields.size());
assertEquals(IndexOptions.NONE, fields.get(0).fieldType().indexOptions());
assertEquals(DocValuesType.SORTED_SET, fields.get(0).fieldType().docValuesType());
}
}
Expand Up @@ -29,6 +29,16 @@ setup:
- do:
indices.refresh: { }

---
"Counted keyword is searchable by default":
- do:
field_caps:
index: test-events
fields: [ events ]

- match: { fields.events.counted_keyword.searchable: true }
- match: { fields.events.counted_keyword.aggregatable: true }

---
"Counted Terms agg":

Expand Down
@@ -0,0 +1,54 @@
setup:

- skip:
version: " - 8.12.99"
reason: "index option on counted_keyword was added in 8.13"

- do:
indices.create:
index: test-events-no-index
body:
mappings:
properties:
events:
type: counted_keyword
index: false

- do:
index:
index: test-events-no-index
id: "1"
body: { "events": [ "a", "a", "b" ] }


- do:
indices.refresh: { }

---
"Counted keyword with index false is not searchable":
- do:
field_caps:
index: test-events-no-index
fields: [ events ]

- match: { fields.events.counted_keyword.searchable: false }
- match: { fields.events.counted_keyword.aggregatable: true }

---
"Counted Terms agg only relies on doc values":
# although the field is not indexed, the counted_terms agg should still work
- do:
search:
index: test-events-no-index
body:
size: 0
aggs:
event_terms:
counted_terms:
field: events

- match: { aggregations.event_terms.buckets.0.key: "a" }
- match: { aggregations.event_terms.buckets.0.doc_count: 2 }
- match: { aggregations.event_terms.buckets.1.key: "b" }
- match: { aggregations.event_terms.buckets.1.doc_count: 1 }
- length: { aggregations.event_terms.buckets: 2 }

0 comments on commit 53296e2

Please sign in to comment.