From 53296e218bceb555290dadd502ecfc497bb5506f Mon Sep 17 00:00:00 2001 From: Daniel Mitterdorfer Date: Fri, 22 Dec 2023 11:25:50 +0100 Subject: [PATCH] Add index mapping parameter for counted_keyword (#103646) With this commit we add a new mapping parameter `index` to the `counted_keyword` mapping type. This allows to reduce disk usage for use cases where indexed fields are not required. Relates #101826 --- docs/changelog/103646.yaml | 5 ++ .../CountedKeywordFieldMapper.java | 39 +++++++++----- .../CountedKeywordFieldMapperTests.java | 13 +++++ .../test/counted_keyword/10_basic.yml | 10 ++++ .../test/counted_keyword/20_no_index.yml | 54 +++++++++++++++++++ 5 files changed, 109 insertions(+), 12 deletions(-) create mode 100644 docs/changelog/103646.yaml create mode 100644 x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/counted_keyword/20_no_index.yml diff --git a/docs/changelog/103646.yaml b/docs/changelog/103646.yaml new file mode 100644 index 0000000000000..b7a6fae025771 --- /dev/null +++ b/docs/changelog/103646.yaml @@ -0,0 +1,5 @@ +pr: 103646 +summary: Add index mapping parameter for `counted_keyword` +area: Aggregations +type: enhancement +issues: [] diff --git a/x-pack/plugin/mapper-counted-keyword/src/main/java/org/elasticsearch/xpack/countedkeyword/CountedKeywordFieldMapper.java b/x-pack/plugin/mapper-counted-keyword/src/main/java/org/elasticsearch/xpack/countedkeyword/CountedKeywordFieldMapper.java index d04bb88325cc7..ad5e224efd5db 100644 --- a/x-pack/plugin/mapper-counted-keyword/src/main/java/org/elasticsearch/xpack/countedkeyword/CountedKeywordFieldMapper.java +++ b/x-pack/plugin/mapper-counted-keyword/src/main/java/org/elasticsearch/xpack/countedkeyword/CountedKeywordFieldMapper.java @@ -78,16 +78,24 @@ public class CountedKeywordFieldMapper extends FieldMapper { public static final String CONTENT_TYPE = "counted_keyword"; public static final String COUNT_FIELD_NAME_SUFFIX = "_count"; - public static final FieldType FIELD_TYPE; + private static final FieldType FIELD_TYPE_INDEXED; + private static final FieldType FIELD_TYPE_NOT_INDEXED; static { - FieldType ft = new FieldType(); - ft.setDocValuesType(DocValuesType.SORTED_SET); - ft.setTokenized(false); - ft.setOmitNorms(true); - ft.setIndexOptions(IndexOptions.DOCS); - ft.freeze(); - FIELD_TYPE = freezeAndDeduplicateFieldType(ft); + FieldType indexed = new FieldType(); + indexed.setDocValuesType(DocValuesType.SORTED_SET); + indexed.setTokenized(false); + indexed.setOmitNorms(true); + indexed.setIndexOptions(IndexOptions.DOCS); + FIELD_TYPE_INDEXED = freezeAndDeduplicateFieldType(indexed); + + FieldType notIndexed = new FieldType(); + notIndexed.setDocValuesType(DocValuesType.SORTED_SET); + notIndexed.setTokenized(false); + notIndexed.setOmitNorms(true); + notIndexed.setIndexOptions(IndexOptions.NONE); + FIELD_TYPE_NOT_INDEXED = freezeAndDeduplicateFieldType(notIndexed); + } private static class CountedKeywordFieldType extends StringFieldType { @@ -261,7 +269,12 @@ public TermsEnum termsEnum() throws IOException { } } + private static CountedKeywordFieldMapper toType(FieldMapper in) { + return (CountedKeywordFieldMapper) in; + } + public static class Builder extends FieldMapper.Builder { + private final Parameter indexed = Parameter.indexParam(m -> toType(m).mappedFieldType.isIndexed(), true); private final Parameter> meta = Parameter.metaParam(); protected Builder(String name) { @@ -270,22 +283,24 @@ protected Builder(String name) { @Override protected Parameter[] getParameters() { - return new Parameter[] { meta }; + return new Parameter[] { meta, indexed }; } @Override public FieldMapper build(MapperBuilderContext context) { BinaryFieldMapper countFieldMapper = new BinaryFieldMapper.Builder(name + COUNT_FIELD_NAME_SUFFIX, true).build(context); + boolean isIndexed = indexed.getValue(); + FieldType ft = isIndexed ? FIELD_TYPE_INDEXED : FIELD_TYPE_NOT_INDEXED; return new CountedKeywordFieldMapper( name, - FIELD_TYPE, + ft, new CountedKeywordFieldType( context.buildFullName(name), - true, + isIndexed, false, true, - new TextSearchInfo(FIELD_TYPE, null, KEYWORD_ANALYZER, KEYWORD_ANALYZER), + new TextSearchInfo(ft, null, KEYWORD_ANALYZER, KEYWORD_ANALYZER), meta.getValue(), countFieldMapper.fieldType() ), diff --git a/x-pack/plugin/mapper-counted-keyword/src/test/java/org/elasticsearch/xpack/countedkeyword/CountedKeywordFieldMapperTests.java b/x-pack/plugin/mapper-counted-keyword/src/test/java/org/elasticsearch/xpack/countedkeyword/CountedKeywordFieldMapperTests.java index 1468ed456b132..2ffd4468c814a 100644 --- a/x-pack/plugin/mapper-counted-keyword/src/test/java/org/elasticsearch/xpack/countedkeyword/CountedKeywordFieldMapperTests.java +++ b/x-pack/plugin/mapper-counted-keyword/src/test/java/org/elasticsearch/xpack/countedkeyword/CountedKeywordFieldMapperTests.java @@ -7,6 +7,8 @@ package org.elasticsearch.xpack.countedkeyword; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.MappedFieldType; @@ -82,4 +84,15 @@ public void testDottedFieldNames() throws IOException { List fields = doc.rootDoc().getFields("dotted.field"); assertEquals(1, fields.size()); } + + public void testDisableIndex() throws IOException { + DocumentMapper mapper = createDocumentMapper( + fieldMapping(b -> b.field("type", CountedKeywordFieldMapper.CONTENT_TYPE).field("index", false)) + ); + ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); + List fields = doc.rootDoc().getFields("field"); + assertEquals(1, fields.size()); + assertEquals(IndexOptions.NONE, fields.get(0).fieldType().indexOptions()); + assertEquals(DocValuesType.SORTED_SET, fields.get(0).fieldType().docValuesType()); + } } diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/counted_keyword/10_basic.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/counted_keyword/10_basic.yml index d0f7c7636582f..4a0d6387683ac 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/counted_keyword/10_basic.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/counted_keyword/10_basic.yml @@ -29,6 +29,16 @@ setup: - do: indices.refresh: { } +--- +"Counted keyword is searchable by default": + - do: + field_caps: + index: test-events + fields: [ events ] + + - match: { fields.events.counted_keyword.searchable: true } + - match: { fields.events.counted_keyword.aggregatable: true } + --- "Counted Terms agg": diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/counted_keyword/20_no_index.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/counted_keyword/20_no_index.yml new file mode 100644 index 0000000000000..1fe48207b5586 --- /dev/null +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/counted_keyword/20_no_index.yml @@ -0,0 +1,54 @@ +setup: + + - skip: + version: " - 8.12.99" + reason: "index option on counted_keyword was added in 8.13" + + - do: + indices.create: + index: test-events-no-index + body: + mappings: + properties: + events: + type: counted_keyword + index: false + + - do: + index: + index: test-events-no-index + id: "1" + body: { "events": [ "a", "a", "b" ] } + + + - do: + indices.refresh: { } + +--- +"Counted keyword with index false is not searchable": + - do: + field_caps: + index: test-events-no-index + fields: [ events ] + + - match: { fields.events.counted_keyword.searchable: false } + - match: { fields.events.counted_keyword.aggregatable: true } + +--- +"Counted Terms agg only relies on doc values": +# although the field is not indexed, the counted_terms agg should still work + - do: + search: + index: test-events-no-index + body: + size: 0 + aggs: + event_terms: + counted_terms: + field: events + + - match: { aggregations.event_terms.buckets.0.key: "a" } + - match: { aggregations.event_terms.buckets.0.doc_count: 2 } + - match: { aggregations.event_terms.buckets.1.key: "b" } + - match: { aggregations.event_terms.buckets.1.doc_count: 1 } + - length: { aggregations.event_terms.buckets: 2 }