From 8c41366a3bf34b233fd19d7dea399131870dd7c6 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Thu, 20 Nov 2025 09:51:05 -0500 Subject: [PATCH 01/36] wiring up CSV tests (in progress) --- .../xpack/esql/core/plugin/EsqlCorePlugin.java | 4 ++++ .../xpack/esql/core/type/DataType.java | 10 ++++++++++ .../xpack/esql/CsvTestsDataLoader.java | 4 +++- .../main/resources/data/tdigest_standard_index.csv | 2 ++ .../resources/mapping-tdigest_standard_index.json | 13 +++++++++++++ .../src/main/resources/tdigest.csv-spec | 9 +++++++++ .../xpack/esql/action/EsqlCapabilities.java | 2 ++ 7 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/tdigest_standard_index.csv create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-tdigest_standard_index.json create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/tdigest.csv-spec diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/plugin/EsqlCorePlugin.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/plugin/EsqlCorePlugin.java index da808b0083d22..5bcff64bc7149 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/plugin/EsqlCorePlugin.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/plugin/EsqlCorePlugin.java @@ -14,4 +14,8 @@ public class EsqlCorePlugin extends Plugin implements ExtensiblePlugin { public static final FeatureFlag EXPONENTIAL_HISTOGRAM_FEATURE_FLAG = new FeatureFlag("esql_exponential_histogram"); + + // Note, there is also a feature flag for the field type in the analytics plugin, but for visibility reasons we need + // another one here. + public static final FeatureFlag T_DIGEST_ESQL_SUPPORT = new FeatureFlag("esql_t_digest_support"); } diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java index 8270605b97ca6..6e1659415eb88 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java @@ -348,6 +348,16 @@ public enum DataType implements Writeable { .underConstruction() ), + /* + TDIGEST( + builder().esType("exponential_histogram") + .estimatedSize(16 * 160)// guess 160 buckets (OTEL default for positive values only histograms) with 16 bytes per bucket + .docValues() + .underConstruction() + ), + + */ + /** * Fields with this type are dense vectors, represented as an array of float values. */ diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index bd42ee08ed384..b3320be613d20 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -173,6 +173,7 @@ public class CsvTestsDataLoader { private static final TestDataset COLORS = new TestDataset("colors"); private static final TestDataset COLORS_CMYK_LOOKUP = new TestDataset("colors_cmyk").withSetting("lookup-settings.json"); private static final TestDataset EXP_HISTO_SAMPLE = new TestDataset("exp_histo_sample"); + private static final TestDataset TDIGEST_STANDARD_INDEX = new TestDataset("tdigest_standard_index"); public static final Map CSV_DATASET_MAP = Map.ofEntries( Map.entry(EMPLOYEES.indexName, EMPLOYEES), @@ -241,7 +242,8 @@ public class CsvTestsDataLoader { Map.entry(COLORS_CMYK_LOOKUP.indexName, COLORS_CMYK_LOOKUP), Map.entry(MULTI_COLUMN_JOINABLE.indexName, MULTI_COLUMN_JOINABLE), Map.entry(MULTI_COLUMN_JOINABLE_LOOKUP.indexName, MULTI_COLUMN_JOINABLE_LOOKUP), - Map.entry(EXP_HISTO_SAMPLE.indexName, EXP_HISTO_SAMPLE) + Map.entry(EXP_HISTO_SAMPLE.indexName, EXP_HISTO_SAMPLE), + Map.entry(TDIGEST_STANDARD_INDEX.indexName, TDIGEST_STANDARD_INDEX) ); private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enrich-policy-languages.json"); diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/tdigest_standard_index.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/tdigest_standard_index.csv new file mode 100644 index 0000000000000..c02d5f54f904e --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/tdigest_standard_index.csv @@ -0,0 +1,2 @@ +@timestamp:date,instance:keyword,responseTime:tdigest +2025-01-01T00:00:00Z,hand-rolled,{"centroids": [2.1, 3.2]\,"counts": [1, 1]} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-tdigest_standard_index.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-tdigest_standard_index.json new file mode 100644 index 0000000000000..9ed14cd36e622 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-tdigest_standard_index.json @@ -0,0 +1,13 @@ +{ + "properties": { + "@timestamp": { + "type": "date" + }, + "instance": { + "type": "keyword" + }, + "responseTime": { + "type": "tdigest" + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/tdigest.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/tdigest.csv-spec new file mode 100644 index 0000000000000..ab444227bb9a5 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/tdigest.csv-spec @@ -0,0 +1,9 @@ +Basic retrieval of tdigest values +required_capability: tdigest_field_type_basic_functionality + +FROM tdigest_standard_index | KEEP *; + +@timestamp:date | instance:keyword | responseTime:tdigest +2025-01-01T00:00:00Z | hand-rolled | {"centroids": [2.1, 3.2]\,"counts": [1, 1]} + +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index d7683fe379d06..14b9cafa5e5f8 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -20,6 +20,7 @@ import java.util.Set; import static org.elasticsearch.xpack.esql.core.plugin.EsqlCorePlugin.EXPONENTIAL_HISTOGRAM_FEATURE_FLAG; +import static org.elasticsearch.xpack.esql.core.plugin.EsqlCorePlugin.T_DIGEST_ESQL_SUPPORT; /** * A {@link Set} of "capabilities" supported by the {@link RestEsqlQueryAction} @@ -1573,6 +1574,7 @@ public enum Cap { */ EXPONENTIAL_HISTOGRAM_MINMAX_SUPPORT(EXPONENTIAL_HISTOGRAM_FEATURE_FLAG), + TDIGEST_FIELD_TYPE_BASIC_FUNCTIONALITY(T_DIGEST_ESQL_SUPPORT), /** * Support for exponential_histogram type in SUM and AVG aggregation. */ From b63668b396ee2541c1e2e45a055af8085b897c0e Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Thu, 20 Nov 2025 17:10:55 -0500 Subject: [PATCH 02/36] I'm not proud of this, but it works and it only took an hour --- libs/tdigest/build.gradle | 1 + libs/tdigest/src/main/java/module-info.java | 2 + .../tdigest/parsing}/TDigestParser.java | 98 +++++++++++++------ .../tdigest/parsing/package-info.java | 15 +++ .../common/xcontent/XContentParserUtils.java | 2 +- .../analytics/mapper/TDigestFieldMapper.java | 9 +- 6 files changed, 94 insertions(+), 33 deletions(-) rename {x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper => libs/tdigest/src/main/java/org/elasticsearch/tdigest/parsing}/TDigestParser.java (69%) create mode 100644 libs/tdigest/src/main/java/org/elasticsearch/tdigest/parsing/package-info.java diff --git a/libs/tdigest/build.gradle b/libs/tdigest/build.gradle index 47fc0dbc239cf..6cdd3bb12ba30 100644 --- a/libs/tdigest/build.gradle +++ b/libs/tdigest/build.gradle @@ -23,6 +23,7 @@ apply plugin: 'elasticsearch.publish' dependencies { api project(':libs:core') + api project(':libs:x-content') api "org.apache.lucene:lucene-core:${versions.lucene}" testImplementation(project(":test:framework")) { diff --git a/libs/tdigest/src/main/java/module-info.java b/libs/tdigest/src/main/java/module-info.java index 79ddbe88ab3d3..beae047e0d777 100644 --- a/libs/tdigest/src/main/java/module-info.java +++ b/libs/tdigest/src/main/java/module-info.java @@ -20,7 +20,9 @@ module org.elasticsearch.tdigest { requires org.elasticsearch.base; requires org.apache.lucene.core; + requires org.elasticsearch.xcontent; exports org.elasticsearch.tdigest; exports org.elasticsearch.tdigest.arrays; + exports org.elasticsearch.tdigest.parsing; } diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java b/libs/tdigest/src/main/java/org/elasticsearch/tdigest/parsing/TDigestParser.java similarity index 69% rename from x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java rename to libs/tdigest/src/main/java/org/elasticsearch/tdigest/parsing/TDigestParser.java index 2cd536fa74035..d67045aef8e84 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java +++ b/libs/tdigest/src/main/java/org/elasticsearch/tdigest/parsing/TDigestParser.java @@ -1,28 +1,29 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ -package org.elasticsearch.xpack.analytics.mapper; +package org.elasticsearch.tdigest.parsing; -import org.elasticsearch.index.mapper.DocumentParsingException; import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.XContentLocation; import org.elasticsearch.xcontent.XContentParser; import java.io.IOException; import java.util.ArrayList; import java.util.List; - -import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; -import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.CENTROIDS_NAME; -import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.COUNTS_NAME; -import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.MAX_FIELD_NAME; -import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.MIN_FIELD_NAME; -import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.SUM_FIELD_NAME; +import java.util.function.BiFunction; public class TDigestParser { + public static final String CENTROIDS_NAME = "centroids"; + public static final String COUNTS_NAME = "counts"; + public static final String SUM_FIELD_NAME = "sum"; + public static final String MIN_FIELD_NAME = "min"; + public static final String MAX_FIELD_NAME = "max"; private static final ParseField COUNTS_FIELD = new ParseField(COUNTS_NAME); private static final ParseField CENTROIDS_FIELD = new ParseField(CENTROIDS_NAME); @@ -91,9 +92,15 @@ public Long count() { * * @param mappedFieldName the name of the field being parsed, used for error messages * @param parser the parser to use + * @param documentParsingExceptionProvider factory function for generating document parsing exceptions. Required for visibility. * @return the parsed histogram */ - public static ParsedTDigest parse(String mappedFieldName, XContentParser parser) throws IOException { + public static ParsedTDigest parse( + String mappedFieldName, + XContentParser parser, + BiFunction documentParsingExceptionProvider, + ParsingExceptionProvider parsingExceptionProvider + ) throws IOException { ArrayList centroids = null; ArrayList counts = null; Double sum = null; @@ -102,26 +109,26 @@ public static ParsedTDigest parse(String mappedFieldName, XContentParser parser) XContentParser.Token token = parser.currentToken(); while (token != XContentParser.Token.END_OBJECT) { // should be a field - ensureExpectedToken(XContentParser.Token.FIELD_NAME, token, parser); + ensureExpectedToken(XContentParser.Token.FIELD_NAME, token, parser, parsingExceptionProvider); String fieldName = parser.currentName(); if (fieldName.equals(CENTROIDS_FIELD.getPreferredName())) { - centroids = getCentroids(mappedFieldName, parser); + centroids = getCentroids(mappedFieldName, parser, documentParsingExceptionProvider, parsingExceptionProvider); } else if (fieldName.equals(COUNTS_FIELD.getPreferredName())) { - counts = getCounts(mappedFieldName, parser); + counts = getCounts(mappedFieldName, parser, documentParsingExceptionProvider, parsingExceptionProvider); } else if (fieldName.equals(SUM_FIELD.getPreferredName())) { token = parser.nextToken(); - ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser); + ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser, parsingExceptionProvider); sum = parser.doubleValue(); } else if (fieldName.equals(MIN_FIELD.getPreferredName())) { token = parser.nextToken(); - ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser); + ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser, parsingExceptionProvider); min = parser.doubleValue(); } else if (fieldName.equals(MAX_FIELD.getPreferredName())) { token = parser.nextToken(); - ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser); + ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser, parsingExceptionProvider); max = parser.doubleValue(); } else { - throw new DocumentParsingException( + throw documentParsingExceptionProvider.apply( parser.getTokenLocation(), "error parsing field [" + mappedFieldName + "], with unknown parameter [" + fieldName + "]" ); @@ -129,19 +136,19 @@ public static ParsedTDigest parse(String mappedFieldName, XContentParser parser) token = parser.nextToken(); } if (centroids == null) { - throw new DocumentParsingException( + throw documentParsingExceptionProvider.apply( parser.getTokenLocation(), "error parsing field [" + mappedFieldName + "], expected field called [" + CENTROIDS_FIELD.getPreferredName() + "]" ); } if (counts == null) { - throw new DocumentParsingException( + throw documentParsingExceptionProvider.apply( parser.getTokenLocation(), "error parsing field [" + mappedFieldName + "], expected field called [" + COUNTS_FIELD.getPreferredName() + "]" ); } if (centroids.size() != counts.size()) { - throw new DocumentParsingException( + throw documentParsingExceptionProvider.apply( parser.getTokenLocation(), "error parsing field [" + mappedFieldName @@ -165,20 +172,25 @@ public static ParsedTDigest parse(String mappedFieldName, XContentParser parser) return new ParsedTDigest(centroids, counts, sum, min, max); } - private static ArrayList getCounts(String mappedFieldName, XContentParser parser) throws IOException { + private static ArrayList getCounts( + String mappedFieldName, + XContentParser parser, + BiFunction documentParsingExceptionProvider, + ParsingExceptionProvider parsingExceptionProvider + ) throws IOException { ArrayList counts; XContentParser.Token token; token = parser.nextToken(); // should be an array - ensureExpectedToken(XContentParser.Token.START_ARRAY, token, parser); + ensureExpectedToken(XContentParser.Token.START_ARRAY, token, parser, parsingExceptionProvider); counts = new ArrayList<>(); token = parser.nextToken(); while (token != XContentParser.Token.END_ARRAY) { // should be a number - ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser); + ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser, parsingExceptionProvider); long count = parser.longValue(); if (count < 0) { - throw new DocumentParsingException( + throw documentParsingExceptionProvider.apply( parser.getTokenLocation(), "error parsing field [" + mappedFieldName + "], [" + COUNTS_FIELD + "] elements must be >= 0 but got " + count ); @@ -189,22 +201,27 @@ private static ArrayList getCounts(String mappedFieldName, XContentParser return counts; } - private static ArrayList getCentroids(String mappedFieldName, XContentParser parser) throws IOException { + private static ArrayList getCentroids( + String mappedFieldName, + XContentParser parser, + BiFunction documentParsingExceptionProvider, + ParsingExceptionProvider parsingExceptionProvider + ) throws IOException { XContentParser.Token token; ArrayList centroids; token = parser.nextToken(); // should be an array - ensureExpectedToken(XContentParser.Token.START_ARRAY, token, parser); + ensureExpectedToken(XContentParser.Token.START_ARRAY, token, parser, parsingExceptionProvider); centroids = new ArrayList<>(); token = parser.nextToken(); double previousVal = -Double.MAX_VALUE; while (token != XContentParser.Token.END_ARRAY) { // should be a number - ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser); + ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser, parsingExceptionProvider); double val = parser.doubleValue(); if (val < previousVal) { // centroids must be in increasing order - throw new DocumentParsingException( + throw documentParsingExceptionProvider.apply( parser.getTokenLocation(), "error parsing field [" + mappedFieldName @@ -224,4 +241,23 @@ private static ArrayList getCentroids(String mappedFieldName, XContentPa return centroids; } + /** + * Interface for throwing a parsing exception, needed for visibility + */ + @FunctionalInterface + public interface ParsingExceptionProvider { + RuntimeException apply(XContentParser parser, XContentParser.Token expected, XContentParser.Token actual) throws IOException; + } + + public static void ensureExpectedToken( + XContentParser.Token expected, + XContentParser.Token actual, + XContentParser parser, + ParsingExceptionProvider parsingExceptionProvider + ) throws IOException { + if (actual != expected) { + throw parsingExceptionProvider.apply(parser, expected, actual); + } + } + } diff --git a/libs/tdigest/src/main/java/org/elasticsearch/tdigest/parsing/package-info.java b/libs/tdigest/src/main/java/org/elasticsearch/tdigest/parsing/package-info.java new file mode 100644 index 0000000000000..3ddf75a66f584 --- /dev/null +++ b/libs/tdigest/src/main/java/org/elasticsearch/tdigest/parsing/package-info.java @@ -0,0 +1,15 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +/** + * Parsing package contains Elasticsearch specific classes for serializing and deserializing + * t-digests from various formats via Elasticsearch's XContent abstraction layer. + */ + +package org.elasticsearch.tdigest.parsing; diff --git a/server/src/main/java/org/elasticsearch/common/xcontent/XContentParserUtils.java b/server/src/main/java/org/elasticsearch/common/xcontent/XContentParserUtils.java index 6390e62f9758f..48ef941c60f4d 100644 --- a/server/src/main/java/org/elasticsearch/common/xcontent/XContentParserUtils.java +++ b/server/src/main/java/org/elasticsearch/common/xcontent/XContentParserUtils.java @@ -86,7 +86,7 @@ public static void expectValueToken(Token actual, XContentParser parser) { } } - private static ParsingException parsingException(XContentParser parser, Token expected, Token actual) { + public static ParsingException parsingException(XContentParser parser, Token expected, Token actual) { return new ParsingException( parser.getTokenLocation(), String.format(Locale.ROOT, "Failed to parse object: expecting token of type [%s] but found [%s]", expected, actual) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java index 2f331ad93d3cf..f411f4f3f33a8 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java @@ -23,6 +23,7 @@ import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.FeatureFlag; +import org.elasticsearch.common.xcontent.XContentParserUtils; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.FormattedDocValues; @@ -54,6 +55,7 @@ import org.elasticsearch.search.aggregations.metrics.TDigestState; import org.elasticsearch.search.sort.BucketedSort; import org.elasticsearch.search.sort.SortOrder; +import org.elasticsearch.tdigest.parsing.TDigestParser; import org.elasticsearch.xcontent.CopyingXContentParser; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; @@ -368,7 +370,12 @@ public void parse(DocumentParserContext context) throws IOException { } subParser.nextToken(); // TODO: Here we should build a t-digest out of the input, based on the settings on the field - TDigestParser.ParsedTDigest parsedTDigest = TDigestParser.parse(fullPath(), subParser); + TDigestParser.ParsedTDigest parsedTDigest = TDigestParser.parse( + fullPath(), + subParser, + DocumentParsingException::new, + XContentParserUtils::parsingException + ); BytesStreamOutput streamOutput = new BytesStreamOutput(); From c205ddb95a0ba23d3617f1a101d5f8872d543539 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Thu, 20 Nov 2025 17:24:09 -0500 Subject: [PATCH 03/36] wire up the parser, now that it's visible --- .../xpack/esql/CsvTestUtils.java | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java index 349cc8c3e6a6f..0e69e7c544c69 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java @@ -15,6 +15,7 @@ import org.elasticsearch.common.time.DateFormatters; import org.elasticsearch.common.time.DateUtils; import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.xcontent.XContentParserUtils; import org.elasticsearch.compute.data.AggregateMetricDoubleBlockBuilder; import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BlockFactory; @@ -32,8 +33,10 @@ import org.elasticsearch.exponentialhistogram.ExponentialHistogramXContent; import org.elasticsearch.geometry.utils.Geohash; import org.elasticsearch.h3.H3; +import org.elasticsearch.index.mapper.DocumentParsingException; import org.elasticsearch.logging.Logger; import org.elasticsearch.search.aggregations.bucket.geogrid.GeoTileUtils; +import org.elasticsearch.tdigest.parsing.TDigestParser; import org.elasticsearch.test.VersionUtils; import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.XContentParserConfiguration; @@ -505,6 +508,7 @@ public enum Type { ), DENSE_VECTOR(Float::parseFloat, Float.class, false), EXPONENTIAL_HISTOGRAM(CsvTestUtils::parseExponentialHistogram, ExponentialHistogram.class), + TDIGEST(CsvTestUtils::parseTDigest, TDigestParser.ParsedTDigest.class), UNSUPPORTED(Type::convertUnsupported, Void.class); private static Void convertUnsupported(String s) { @@ -717,4 +721,20 @@ private static ExponentialHistogram parseExponentialHistogram(@Nullable String j throw new IllegalArgumentException(e); } } + + private static TDigestParser.ParsedTDigest parseTDigest(@Nullable String json) { + if (json == null) { + return null; + } + try (XContentParser parser = JsonXContent.jsonXContent.createParser(XContentParserConfiguration.EMPTY, json)) { + return TDigestParser.parse( + "field from test data", + parser, + DocumentParsingException::new, + XContentParserUtils::parsingException + ); + } catch (IOException e) { + throw new IllegalArgumentException(e); + } + } } From 0b3b329b9d0476ff8ac1ad6003c63fb862c693b8 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 20 Nov 2025 22:36:59 +0000 Subject: [PATCH 04/36] [CI] Auto commit changes from spotless --- .../java/org/elasticsearch/xpack/esql/core/type/DataType.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java index 6e1659415eb88..1c02630a4ece4 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java @@ -355,7 +355,7 @@ public enum DataType implements Writeable { .docValues() .underConstruction() ), - + */ /** From f6022dc69f02594788837285bdcd3903caa90bc0 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Tue, 25 Nov 2025 12:04:37 -0500 Subject: [PATCH 05/36] checkpoint, doesn't compile --- .../compute/data/BlockFactory.java | 4 + .../compute/data/BlockUtils.java | 5 + .../compute/data/ElementType.java | 6 + .../compute/data/TDigestArrayBlock.java | 40 ++++++ .../compute/data/TDigestBlock.java | 26 +++- .../compute/data/TDigestBlockBuilder.java | 136 ++++++++++++++++++ .../compute/data/TDigestHolder.java | 33 +++++ 7 files changed, 249 insertions(+), 1 deletion(-) create mode 100644 x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java create mode 100644 x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestHolder.java diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockFactory.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockFactory.java index 39b06dcb684cf..5db38b62d234c 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockFactory.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockFactory.java @@ -496,6 +496,10 @@ public ExponentialHistogramBlockBuilder newExponentialHistogramBlockBuilder(int return new ExponentialHistogramBlockBuilder(estimatedSize, this); } + public TDigestBlockBuilder newTDigestBlockBuilder(int estimatedSize) { + return new TDigestBlockBuilder(estimatedSize, this); + } + public final ExponentialHistogramBlock newConstantExponentialHistogramBlock(ExponentialHistogram value, int positionCount) { try (ExponentialHistogramBlockBuilder builder = newExponentialHistogramBlockBuilder(positionCount)) { for (int i = 0; i < positionCount; i++) { diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java index 7dfe664364e51..550912e0ac019 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java @@ -316,6 +316,11 @@ yield new AggregateMetricDoubleLiteral( // return a copy so that the returned value is not bound to the lifetime of the block yield ExponentialHistogram.builder(histogram, ExponentialHistogramCircuitBreaker.noop()).build(); } + case TDIGEST -> { + TDigestBlock tDigestBlock = (TDigestBlock) block; + yield new TDigestHolder() + + } case UNKNOWN -> throw new IllegalArgumentException("can't read values from [" + block + "]"); }; } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ElementType.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ElementType.java index 52a7853e56182..fcdc2769f4525 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ElementType.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ElementType.java @@ -75,6 +75,12 @@ public enum ElementType { "ExponentialHistogram", BlockFactory::newExponentialHistogramBlockBuilder, ExponentialHistogramArrayBlock::readFrom + ), + TDIGEST( + 12, + "TDigest", + BlockFactory::newTDigestBlockBuilder, + TDigestArrayBlock::readFrom ); private static final TransportVersion ESQL_SERIALIZE_BLOCK_TYPE_CODE = TransportVersion.fromName("esql_serialize_block_type_code"); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java index d98929b727091..c8ae2720d5456 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java @@ -200,6 +200,22 @@ public Block deepCopy(BlockFactory blockFactory) { return new TDigestArrayBlock(copiedEncodedDigests, copiedMinima, copiedMaxima, copiedSums, copiedValueCounts); } + void copyInto( + BytesRefBlock.Builder encodedDigestsBuilder, + DoubleBlock.Builder minimaBuilder, + DoubleBlock.Builder maximaBuilder, + DoubleBlock.Builder sumsBuilder, + LongBlock.Builder valueCountsBuilder, + int beginInclusive, + int endExclusive + ) { + encodedDigestsBuilder.copyFrom(encodedDigests, beginInclusive, endExclusive); + minimaBuilder.copyFrom(minima, beginInclusive, endExclusive); + maximaBuilder.copyFrom(maxima, beginInclusive, endExclusive); + sumsBuilder.copyFrom(sums, beginInclusive, endExclusive); + valueCountsBuilder.copyFrom(valueCounts, beginInclusive, endExclusive); + } + @Override public void writeTo(StreamOutput out) throws IOException { Block.writeTypedBlock(encodedDigests, out); @@ -209,6 +225,30 @@ public void writeTo(StreamOutput out) throws IOException { Block.writeTypedBlock(valueCounts, out); } + public static TDigestArrayBlock readFrom(BlockStreamInput in) throws IOException { + BytesRefBlock encodedDigests = null; + DoubleBlock minima = null; + DoubleBlock maxima = null; + DoubleBlock sums = null; + LongBlock valueCounts = null; + + boolean success = false; + try { + encodedDigests = (BytesRefBlock) Block.readTypedBlock(in); + minima = (DoubleBlock) Block.readTypedBlock(in); + maxima = (DoubleBlock) Block.readTypedBlock(in); + sums = (DoubleBlock) Block.readTypedBlock(in); + valueCounts = (LongBlock) Block.readTypedBlock(in); + success = true; + } finally { + if (success == false) { + Releasables.close(minima, maxima, sums, valueCounts, encodedDigests); + } + } + return new TDigestArrayBlock(encodedDigests, minima, maxima, sums, valueCounts); + } + + @Override public long ramBytesUsed() { long bytes = 0; diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlock.java index 315dbbb7b52f8..5596392c00b01 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlock.java @@ -7,4 +7,28 @@ package org.elasticsearch.compute.data; -public sealed interface TDigestBlock extends Block permits ConstantNullBlock, TDigestArrayBlock {} +import org.elasticsearch.index.mapper.BlockLoader; + +public sealed interface TDigestBlock extends Block permits ConstantNullBlock, TDigestArrayBlock { + + /** + * Builder for {@link TDigestBlock} + */ + sealed interface Builder extends Block.Builder, BlockLoader.TDigestBuilder permits TDigestBlockBuilder { + + /** + * Copy the values in {@code block} from the given positon into this builder. + */ + TDigestBlock.Builder copyFrom(TDigestBlock block, int position); + + @Override + TDigestBlock build(); + + DoubleBlock minimaBlock(); + DoubleBlock maximaBlock(); + DoubleBlock sumsBlock(); + LongBlock valueCountsBlock(); + BytesRefBlock encodedHistogramsBlock(); + } + +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java new file mode 100644 index 0000000000000..ef80afcb866fd --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java @@ -0,0 +1,136 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.data; + +import org.elasticsearch.core.Releasables; +import org.elasticsearch.index.mapper.BlockLoader; + +public final class TDigestBlockBuilder implements TDigestBlock.Builder { + + private final BytesRefBlock.Builder encodedDigestsBuilder; + private final DoubleBlock.Builder minimaBuilder; + private final DoubleBlock.Builder maximaBuilder; + private final DoubleBlock.Builder sumsBuilder; + private final LongBlock.Builder valueCountsBuilder; + + public TDigestBlockBuilder(int size, BlockFactory blockFactory) { + BytesRefBlock.Builder encodedDigestsBuilder = null; + DoubleBlock.Builder minimaBuilder = null; + DoubleBlock.Builder maximaBuilder = null; + DoubleBlock.Builder sumsBuilder = null; + LongBlock.Builder valueCountsBuilder = null; + boolean success = false; + try { + encodedDigestsBuilder = blockFactory.newBytesRefBlockBuilder(size); + minimaBuilder = blockFactory.newDoubleBlockBuilder(size); + maximaBuilder = blockFactory.newDoubleBlockBuilder(size); + sumsBuilder = blockFactory.newDoubleBlockBuilder(size); + valueCountsBuilder = blockFactory.newLongBlockBuilder(size); + this.encodedDigestsBuilder = encodedDigestsBuilder; + this.minimaBuilder = minimaBuilder; + this.maximaBuilder = maximaBuilder; + this.sumsBuilder = sumsBuilder; + this.valueCountsBuilder = valueCountsBuilder; + success = true; + } finally { + if (success == false) { + Releasables.close(encodedDigestsBuilder, minimaBuilder, maximaBuilder, sumsBuilder, valueCountsBuilder); + } + } + } + + @Override + public TDigestBlockBuilder copyFrom(Block block, int beginInclusive, int endExclusive) { + if (block.areAllValuesNull()) { + for (int i = beginInclusive; i < endExclusive; i++) { + appendNull(); + } + } else { + TDigestArrayBlock digestBlock = (TDigestArrayBlock) block; + digestBlock.copyInto( + encodedDigestsBuilder, + minimaBuilder, + maximaBuilder, + sumsBuilder, + valueCountsBuilder, + beginInclusive, + endExclusive + ); + } + return this; + } + + @Override + public TDigestBlock.Builder copyFrom(TDigestBlock block, int position) { + copyFrom(block, position, position + 1); + return this; + } + + + @Override + public Block.Builder appendNull() { + throw new UnsupportedOperationException(); + } + + @Override + public Block.Builder beginPositionEntry() { + throw new UnsupportedOperationException(); + } + + @Override + public Block.Builder endPositionEntry() { + throw new UnsupportedOperationException(); + } + + @Override + public Block.Builder mvOrdering(Block.MvOrdering mvOrdering) { + assert mvOrdering == Block.MvOrdering.UNORDERED + : "TDigests don't have a natural order, so it doesn't make sense to call this"; + return this; + } + + @Override + public long estimatedBytes() { + return 0; + } + + @Override + public TDigestBlock build() { + return null; + } + + @Override + public BlockLoader.DoubleBuilder minima() { + return null; + } + + @Override + public BlockLoader.DoubleBuilder maxima() { + return null; + } + + @Override + public BlockLoader.DoubleBuilder sums() { + return null; + } + + @Override + public BlockLoader.LongBuilder valueCounts() { + return null; + } + + @Override + public BlockLoader.BytesRefBuilder encodedDigests() { + return null; + } + + @Override + public void close() { + + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestHolder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestHolder.java new file mode 100644 index 0000000000000..fb7c9a350ec35 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestHolder.java @@ -0,0 +1,33 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.data; + +import org.apache.lucene.util.BytesRef; + +/** + * This exists to hold the values from a {@link TDigestBlock}. It is roughly parallel to + * {@link org.elasticsearch.search.aggregations.metrics.TDigestState} in classic aggregations, which we are not using directly because + * the serialization format is pretty bad for ESQL's use case (specifically, encoding the near-constant compression and merge strategy + * data inline as opposed to in a dedicated column isn't great). + */ +public class TDigestHolder { + + private final double min; + private final double max; + private final double sum; + private final long valueCount; + private final BytesRef encodedDigest; + + public TDigestHolder(BytesRef encodedDigest, double min, double max, double sum, long valueCount) { + this.encodedDigest = encodedDigest; + this.min = min; + this.max = max; + this.sum = sum; + this.valueCount = valueCount; + } +} From fc5e4970b14a50de7919a5577c52e3144650e40e Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Tue, 25 Nov 2025 13:44:49 -0500 Subject: [PATCH 06/36] checkpoint, doesn't compile --- .../analytics/mapper/TDigestFieldMapper.java | 32 ++++++++------ .../compute/data/BlockUtils.java | 4 +- .../compute/data/ConstantNullBlock.java | 6 +++ .../compute/data/TDigestArrayBlock.java | 12 ++++++ .../compute/data/TDigestBlock.java | 8 +--- .../compute/data/TDigestHolder.java | 27 ++++++++++++ .../compute/operator/lookup/QueryList.java | 1 + .../compute/test/BlockTestUtils.java | 43 +++++++++++++++++++ .../xpack/esql/planner/AggregateMapper.java | 3 +- 9 files changed, 115 insertions(+), 21 deletions(-) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java index f411f4f3f33a8..f2dd3f32e7445 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java @@ -64,6 +64,7 @@ import java.io.IOException; import java.io.UncheckedIOException; +import java.util.List; import java.util.Map; import java.util.Objects; @@ -377,19 +378,7 @@ public void parse(DocumentParserContext context) throws IOException { XContentParserUtils::parsingException ); - BytesStreamOutput streamOutput = new BytesStreamOutput(); - - for (int i = 0; i < parsedTDigest.centroids().size(); i++) { - long count = parsedTDigest.counts().get(i); - assert count >= 0; - // we do not add elements with count == 0 - if (count > 0) { - streamOutput.writeVLong(count); - streamOutput.writeDouble(parsedTDigest.centroids().get(i)); - } - } - - BytesRef docValue = streamOutput.bytes().toBytesRef(); + BytesRef docValue = encodeCentroidsAndCounts(parsedTDigest.centroids(), parsedTDigest.counts()); Field digestField = new BinaryDocValuesField(fullPath(), docValue); // Add numeric doc values fields for the summary data @@ -460,6 +449,23 @@ public void parse(DocumentParserContext context) throws IOException { context.path().remove(); } + private static BytesRef encodeCentroidsAndCounts(List centroids, List counts) throws IOException { + BytesStreamOutput streamOutput = new BytesStreamOutput(); + + for (int i = 0; i < centroids.size(); i++) { + long count = counts.get(i); + assert count >= 0; + // we do not add elements with count == 0 + if (count > 0) { + streamOutput.writeVLong(count); + streamOutput.writeDouble(centroids.get(i)); + } + } + + BytesRef docValue = streamOutput.bytes().toBytesRef(); + return docValue; + } + private static String valuesCountSubFieldName(String fullPath) { return fullPath + "._values_count"; } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java index 550912e0ac019..c6348b9cd12d5 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java @@ -318,7 +318,9 @@ yield new AggregateMetricDoubleLiteral( } case TDIGEST -> { TDigestBlock tDigestBlock = (TDigestBlock) block; - yield new TDigestHolder() + // NOCOMMIT - probably something more sensible here. We presumably need to account for this memory in some way + BytesRef scratch = new BytesRef(); + yield tDigestBlock.getTDigestHolder(offset, scratch); } case UNKNOWN -> throw new IllegalArgumentException("can't read values from [" + block + "]"); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java index 92b6843ed253e..706b5e48b8f0f 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java @@ -301,6 +301,12 @@ public ExponentialHistogram getExponentialHistogram(int valueIndex, ExponentialH throw new UnsupportedOperationException("null block"); } + @Override + public TDigestHolder getTDigestHolder(int valueIndex, BytesRef scratch) { + assert false : "null block"; + throw new UnsupportedOperationException("null block"); + } + @Override public Block buildExponentialHistogramComponentBlock(Component component) { // if all histograms are null, the component block is also a constant null block with the same position count diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java index c8ae2720d5456..963b1b3d3de6b 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java @@ -7,6 +7,7 @@ package org.elasticsearch.compute.data; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.core.ReleasableIterator; @@ -257,4 +258,15 @@ public long ramBytesUsed() { } return bytes; } + + @Override + public TDigestHolder getTDigestHolder(int offset, BytesRef scratch) { + return new TDigestHolder( + encodedDigests.getBytesRef(offset, scratch), + minima.getDouble(offset), + maxima.getDouble(offset), + sums.getDouble(offset), + valueCounts.getLong(offset) + ); + } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlock.java index 5596392c00b01..695a742e4a1df 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlock.java @@ -7,6 +7,7 @@ package org.elasticsearch.compute.data; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.index.mapper.BlockLoader; public sealed interface TDigestBlock extends Block permits ConstantNullBlock, TDigestArrayBlock { @@ -23,12 +24,7 @@ sealed interface Builder extends Block.Builder, BlockLoader.TDigestBuilder permi @Override TDigestBlock build(); - - DoubleBlock minimaBlock(); - DoubleBlock maximaBlock(); - DoubleBlock sumsBlock(); - LongBlock valueCountsBlock(); - BytesRefBlock encodedHistogramsBlock(); } + TDigestHolder getTDigestHolder(int offset, BytesRef scratch); } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestHolder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestHolder.java index fb7c9a350ec35..e7cd02f849ed0 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestHolder.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestHolder.java @@ -8,6 +8,10 @@ package org.elasticsearch.compute.data; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.io.stream.BytesStreamOutput; + +import java.io.IOException; +import java.util.List; /** * This exists to hold the values from a {@link TDigestBlock}. It is roughly parallel to @@ -30,4 +34,27 @@ public TDigestHolder(BytesRef encodedDigest, double min, double max, double sum, this.sum = sum; this.valueCount = valueCount; } + + public TDigestHolder(List centroids, List counts, double min, double max, double sum, long valueCount) + throws IOException { + this(encodeCentroidsAndCounts(centroids, counts), min, max, sum, valueCount); + } + + private static BytesRef encodeCentroidsAndCounts(List centroids, List counts) throws IOException { + // TODO: This is copied from the method of the same name in TDigestFieldMapper. It would be nice to find a way to reuse that code + BytesStreamOutput streamOutput = new BytesStreamOutput(); + + for (int i = 0; i < centroids.size(); i++) { + long count = counts.get(i); + assert count >= 0; + // we do not add elements with count == 0 + if (count > 0) { + streamOutput.writeVLong(count); + streamOutput.writeDouble(centroids.get(i)); + } + } + + BytesRef docValue = streamOutput.bytes().toBytesRef(); + return docValue; + } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/lookup/QueryList.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/lookup/QueryList.java index c42946ed71777..975d85c55e6af 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/lookup/QueryList.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/lookup/QueryList.java @@ -189,6 +189,7 @@ public static IntFunction createBlockValueReader(Block block) { case COMPOSITE -> throw new IllegalArgumentException("can't read values from [composite] block"); case AGGREGATE_METRIC_DOUBLE -> throw new IllegalArgumentException("can't read values from [aggregate metric double] block"); case EXPONENTIAL_HISTOGRAM -> throw new IllegalArgumentException("can't read values from [exponential histogram] block"); + case TDIGEST -> throw new IllegalArgumentException("can't read values from [tdigest] block"); case UNKNOWN -> throw new IllegalArgumentException("can't read values from [" + block + "]"); }; } diff --git a/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/BlockTestUtils.java b/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/BlockTestUtils.java index 9f0a0be60d5f7..1063912782ed7 100644 --- a/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/BlockTestUtils.java +++ b/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/BlockTestUtils.java @@ -27,17 +27,22 @@ import org.elasticsearch.compute.data.LongBlock; import org.elasticsearch.compute.data.OrdinalBytesRefBlock; import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.data.TDigestHolder; import org.elasticsearch.core.Releasables; import org.elasticsearch.exponentialhistogram.ExponentialHistogram; import org.elasticsearch.exponentialhistogram.ExponentialHistogramBuilder; import org.elasticsearch.exponentialhistogram.ExponentialHistogramCircuitBreaker; import org.elasticsearch.exponentialhistogram.ReleasableExponentialHistogram; import org.elasticsearch.exponentialhistogram.ZeroBucket; +import org.elasticsearch.search.aggregations.metrics.TDigestState; +import org.elasticsearch.tdigest.Centroid; import org.hamcrest.Matcher; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.stream.DoubleStream; @@ -45,12 +50,15 @@ import static org.elasticsearch.compute.data.BlockUtils.toJavaObject; import static org.elasticsearch.test.ESTestCase.between; +import static org.elasticsearch.test.ESTestCase.fail; import static org.elasticsearch.test.ESTestCase.randomBoolean; import static org.elasticsearch.test.ESTestCase.randomDouble; import static org.elasticsearch.test.ESTestCase.randomFloat; +import static org.elasticsearch.test.ESTestCase.randomGaussianDouble; import static org.elasticsearch.test.ESTestCase.randomInt; import static org.elasticsearch.test.ESTestCase.randomIntBetween; import static org.elasticsearch.test.ESTestCase.randomLong; +import static org.elasticsearch.test.ESTestCase.randomLongBetween; import static org.elasticsearch.test.ESTestCase.randomNonNegativeInt; import static org.elasticsearch.test.ESTestCase.randomRealisticUnicodeOfCodepointLengthBetween; import static org.hamcrest.Matchers.equalTo; @@ -80,6 +88,7 @@ public static Object randomValue(ElementType e) { between(0, Integer.MAX_VALUE) ); case EXPONENTIAL_HISTOGRAM -> randomExponentialHistogram(); + case TDIGEST -> randomTDigest(); case NULL -> null; case COMPOSITE -> throw new IllegalArgumentException("can't make random values for composite"); case UNKNOWN -> throw new IllegalArgumentException("can't make random values for [" + e + "]"); @@ -414,6 +423,40 @@ public static ExponentialHistogram randomExponentialHistogram() { return histo; } + public static TDigestHolder randomTDigest() { + // TODO: This is mostly copied from TDigestFieldMapperTests; refactor it. + Map value = new LinkedHashMap<>(); + int size = between(1, 100); + // Note - we use TDigestState to build an actual t-digest for realistic values here + TDigestState digest = TDigestState.createWithoutCircuitBreaking(100); + for (int i = 0; i < size; i++) { + double sample = randomGaussianDouble(); + int count = randomIntBetween(1, Integer.MAX_VALUE); + digest.add(sample, count); + } + List centroids = new ArrayList<>(); + List counts = new ArrayList<>(); + double sum = 0.0; + long valueCount = 0L; + for (Centroid c : digest.centroids()) { + centroids.add(c.mean()); + counts.add(c.count()); + sum += c.mean() * c.count(); + valueCount += c.count(); + } + double min = digest.getMin(); + double max = digest.getMax(); + + TDigestHolder returnValue = null; + try { + returnValue = new TDigestHolder(centroids, counts, min, max, sum, valueCount); + } catch (IOException e) { + // This is a test util, so we're just going to fail the test here + fail(e); + } + return returnValue; + } + private static int dedupe(Map dedupe, BytesRefVector.Builder bytes, BytesRef v) { Integer current = dedupe.get(v); if (current != null) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java index 3981b71f316b0..6bcbbc86564ee 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java @@ -113,7 +113,8 @@ private static DataType toDataType(ElementType elementType) { case DOUBLE -> DataType.DOUBLE; case DOC -> DataType.DOC_DATA_TYPE; case EXPONENTIAL_HISTOGRAM -> DataType.EXPONENTIAL_HISTOGRAM; - case FLOAT, NULL, COMPOSITE, AGGREGATE_METRIC_DOUBLE, UNKNOWN -> throw new EsqlIllegalArgumentException( + // NOCOMMIT - add the data type here once it's wired up + case FLOAT, NULL, COMPOSITE, AGGREGATE_METRIC_DOUBLE, TDIGEST, UNKNOWN -> throw new EsqlIllegalArgumentException( "unsupported agg type: " + elementType ); }; From 7108827ec7fb28d3d4e695a3598bdb4205d07e75 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Tue, 25 Nov 2025 13:54:13 -0500 Subject: [PATCH 07/36] element type wired up enough to compile the tests --- .../java/org/elasticsearch/compute/data/ElementType.java | 2 ++ .../java/org/elasticsearch/xpack/esql/CsvTestUtils.java | 9 ++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ElementType.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ElementType.java index fcdc2769f4525..c3d334830f53d 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ElementType.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ElementType.java @@ -132,6 +132,8 @@ public static ElementType fromJava(Class type) { elementType = AGGREGATE_METRIC_DOUBLE; } else if (type != null && ExponentialHistogram.class.isAssignableFrom(type)) { elementType = EXPONENTIAL_HISTOGRAM; + } else if (type != null && TDigestHolder.class.isAssignableFrom(type)) { + elementType = TDIGEST; } else if (type == null || type == Void.class) { elementType = NULL; } else { diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java index 0e69e7c544c69..4bd137b93774e 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java @@ -23,6 +23,7 @@ import org.elasticsearch.compute.data.BlockUtils.BuilderWrapper; import org.elasticsearch.compute.data.ElementType; import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.data.TDigestHolder; import org.elasticsearch.core.Booleans; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.Releasable; @@ -508,7 +509,7 @@ public enum Type { ), DENSE_VECTOR(Float::parseFloat, Float.class, false), EXPONENTIAL_HISTOGRAM(CsvTestUtils::parseExponentialHistogram, ExponentialHistogram.class), - TDIGEST(CsvTestUtils::parseTDigest, TDigestParser.ParsedTDigest.class), + TDIGEST(CsvTestUtils::parseTDigest, TDigestHolder.class), UNSUPPORTED(Type::convertUnsupported, Void.class); private static Void convertUnsupported(String s) { @@ -605,6 +606,7 @@ public static Type asType(ElementType elementType, Type actualType) { case COMPOSITE -> throw new IllegalArgumentException("can't assert on composite blocks"); case AGGREGATE_METRIC_DOUBLE -> AGGREGATE_METRIC_DOUBLE; case EXPONENTIAL_HISTOGRAM -> EXPONENTIAL_HISTOGRAM; + case TDIGEST -> TDIGEST; case UNKNOWN -> throw new IllegalArgumentException("Unknown block types cannot be handled"); }; } @@ -722,17 +724,18 @@ private static ExponentialHistogram parseExponentialHistogram(@Nullable String j } } - private static TDigestParser.ParsedTDigest parseTDigest(@Nullable String json) { + private static TDigestHolder parseTDigest(@Nullable String json) { if (json == null) { return null; } try (XContentParser parser = JsonXContent.jsonXContent.createParser(XContentParserConfiguration.EMPTY, json)) { - return TDigestParser.parse( + TDigestParser.ParsedTDigest parsed = TDigestParser.parse( "field from test data", parser, DocumentParsingException::new, XContentParserUtils::parsingException ); + return new TDigestHolder(parsed.centroids(), parsed.counts(), parsed.min(), parsed.max(), parsed.sum(), parsed.count()); } catch (IOException e) { throw new IllegalArgumentException(e); } From bc252a57427bcc678bdbb7863227c8629a534f3f Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Tue, 25 Nov 2025 14:26:00 -0500 Subject: [PATCH 08/36] escape commas in the CSV --- .../src/main/resources/data/tdigest_standard_index.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/tdigest_standard_index.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/tdigest_standard_index.csv index c02d5f54f904e..adbcc987760b4 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/tdigest_standard_index.csv +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/tdigest_standard_index.csv @@ -1,2 +1,2 @@ @timestamp:date,instance:keyword,responseTime:tdigest -2025-01-01T00:00:00Z,hand-rolled,{"centroids": [2.1, 3.2]\,"counts": [1, 1]} +2025-01-01T00:00:00Z,hand-rolled,{"centroids": [2.1\, 3.2]\,"counts": [1\, 1]} From 40e6415d27a64c01652ed03a4a63752609621336 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 25 Nov 2025 19:34:32 +0000 Subject: [PATCH 09/36] [CI] Auto commit changes from spotless --- .../java/org/elasticsearch/compute/data/BlockUtils.java | 2 +- .../java/org/elasticsearch/compute/data/ElementType.java | 7 +------ .../org/elasticsearch/compute/data/TDigestArrayBlock.java | 1 - .../elasticsearch/compute/data/TDigestBlockBuilder.java | 4 +--- .../org/elasticsearch/compute/test/BlockTestUtils.java | 1 - 5 files changed, 3 insertions(+), 12 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java index c6348b9cd12d5..154d4b8d304e4 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java @@ -318,7 +318,7 @@ yield new AggregateMetricDoubleLiteral( } case TDIGEST -> { TDigestBlock tDigestBlock = (TDigestBlock) block; - // NOCOMMIT - probably something more sensible here. We presumably need to account for this memory in some way + // NOCOMMIT - probably something more sensible here. We presumably need to account for this memory in some way BytesRef scratch = new BytesRef(); yield tDigestBlock.getTDigestHolder(offset, scratch); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ElementType.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ElementType.java index c3d334830f53d..c8890542cf867 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ElementType.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ElementType.java @@ -76,12 +76,7 @@ public enum ElementType { BlockFactory::newExponentialHistogramBlockBuilder, ExponentialHistogramArrayBlock::readFrom ), - TDIGEST( - 12, - "TDigest", - BlockFactory::newTDigestBlockBuilder, - TDigestArrayBlock::readFrom - ); + TDIGEST(12, "TDigest", BlockFactory::newTDigestBlockBuilder, TDigestArrayBlock::readFrom); private static final TransportVersion ESQL_SERIALIZE_BLOCK_TYPE_CODE = TransportVersion.fromName("esql_serialize_block_type_code"); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java index 963b1b3d3de6b..3d47b0fa1cf34 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java @@ -249,7 +249,6 @@ public static TDigestArrayBlock readFrom(BlockStreamInput in) throws IOException return new TDigestArrayBlock(encodedDigests, minima, maxima, sums, valueCounts); } - @Override public long ramBytesUsed() { long bytes = 0; diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java index ef80afcb866fd..71ac9ee91f8a9 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java @@ -71,7 +71,6 @@ public TDigestBlock.Builder copyFrom(TDigestBlock block, int position) { return this; } - @Override public Block.Builder appendNull() { throw new UnsupportedOperationException(); @@ -89,8 +88,7 @@ public Block.Builder endPositionEntry() { @Override public Block.Builder mvOrdering(Block.MvOrdering mvOrdering) { - assert mvOrdering == Block.MvOrdering.UNORDERED - : "TDigests don't have a natural order, so it doesn't make sense to call this"; + assert mvOrdering == Block.MvOrdering.UNORDERED : "TDigests don't have a natural order, so it doesn't make sense to call this"; return this; } diff --git a/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/BlockTestUtils.java b/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/BlockTestUtils.java index 1063912782ed7..a3ed5bb38551b 100644 --- a/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/BlockTestUtils.java +++ b/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/BlockTestUtils.java @@ -58,7 +58,6 @@ import static org.elasticsearch.test.ESTestCase.randomInt; import static org.elasticsearch.test.ESTestCase.randomIntBetween; import static org.elasticsearch.test.ESTestCase.randomLong; -import static org.elasticsearch.test.ESTestCase.randomLongBetween; import static org.elasticsearch.test.ESTestCase.randomNonNegativeInt; import static org.elasticsearch.test.ESTestCase.randomRealisticUnicodeOfCodepointLengthBetween; import static org.hamcrest.Matchers.equalTo; From bb7174b0b41e115893672194c7e1e298b17b5bba Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Wed, 26 Nov 2025 13:24:51 -0500 Subject: [PATCH 10/36] minor test tweaks that don't fix anything --- .../src/main/resources/data/tdigest_standard_index.csv | 2 +- .../qa/testFixtures/src/main/resources/tdigest.csv-spec | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/tdigest_standard_index.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/tdigest_standard_index.csv index adbcc987760b4..34e7cbdb61e63 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/tdigest_standard_index.csv +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/tdigest_standard_index.csv @@ -1,2 +1,2 @@ @timestamp:date,instance:keyword,responseTime:tdigest -2025-01-01T00:00:00Z,hand-rolled,{"centroids": [2.1\, 3.2]\,"counts": [1\, 1]} +2025-01-01T00:00:00Z,hand-rolled,{"centroids":[0.1\,0.2\,0.3\,0.4\,0.5]\,"counts":[3\,7\,23\,12\,6]} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/tdigest.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/tdigest.csv-spec index ab444227bb9a5..1ca152a4d76aa 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/tdigest.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/tdigest.csv-spec @@ -1,9 +1,9 @@ -Basic retrieval of tdigest values +Make sure we can even load tdigest data required_capability: tdigest_field_type_basic_functionality -FROM tdigest_standard_index | KEEP *; +FROM tdigest_standard_index | KEEP @timestamp,instance; -@timestamp:date | instance:keyword | responseTime:tdigest -2025-01-01T00:00:00Z | hand-rolled | {"centroids": [2.1, 3.2]\,"counts": [1, 1]} +@timestamp:date | instance:keyword +2025-01-01T00:00:00Z | hand-rolled ; From 840cc554568719535b0a2196ce80cd3d66510ce6 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Wed, 26 Nov 2025 14:42:31 -0500 Subject: [PATCH 11/36] parsing works. It's awful, but it works --- .../compute/data/BlockUtils.java | 5 +- .../compute/data/TDigestBlockBuilder.java | 8 +++ .../compute/data/TDigestHolder.java | 23 ++++++ .../xpack/esql/CsvTestUtils.java | 72 ++++++++++++++++--- 4 files changed, 98 insertions(+), 10 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java index c6348b9cd12d5..04dda89fa9e7c 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java @@ -221,13 +221,16 @@ public static void appendValue(Block.Builder builder, Object val, ElementType ty switch (type) { case LONG -> ((LongBlock.Builder) builder).appendLong((Long) val); case INT -> ((IntBlock.Builder) builder).appendInt((Integer) val); + case NULL -> { + } case BYTES_REF -> ((BytesRefBlock.Builder) builder).appendBytesRef(toBytesRef(val)); case FLOAT -> ((FloatBlock.Builder) builder).appendFloat((Float) val); case DOUBLE -> ((DoubleBlock.Builder) builder).appendDouble((Double) val); case BOOLEAN -> ((BooleanBlock.Builder) builder).appendBoolean((Boolean) val); + case TDIGEST -> ((TDigestBlockBuilder) builder).append((TDigestHolder) val); case AGGREGATE_METRIC_DOUBLE -> ((AggregateMetricDoubleBlockBuilder) builder).appendLiteral((AggregateMetricDoubleLiteral) val); case EXPONENTIAL_HISTOGRAM -> ((ExponentialHistogramBlockBuilder) builder).append((ExponentialHistogram) val); - default -> throw new UnsupportedOperationException("unsupported element type [" + type + "]"); + case DOC, COMPOSITE, UNKNOWN -> throw new UnsupportedOperationException("unsupported element type [" + type + "]"); } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java index ef80afcb866fd..8fea5b2c1f5f3 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java @@ -133,4 +133,12 @@ public BlockLoader.BytesRefBuilder encodedDigests() { public void close() { } + + public void append(TDigestHolder val) { + encodedDigestsBuilder.appendBytesRef(val.getEncodedDigest()); + minimaBuilder.appendDouble(val.getMin()); + maximaBuilder.appendDouble(val.getMax()); + sumsBuilder.appendDouble(val.getSum()); + valueCountsBuilder.appendLong(val.getValueCount()); + } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestHolder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestHolder.java index e7cd02f849ed0..cf9a1993d874c 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestHolder.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestHolder.java @@ -27,6 +27,7 @@ public class TDigestHolder { private final long valueCount; private final BytesRef encodedDigest; + // NOCOMMIT - Deal with the empty array case better public TDigestHolder(BytesRef encodedDigest, double min, double max, double sum, long valueCount) { this.encodedDigest = encodedDigest; this.min = min; @@ -57,4 +58,26 @@ private static BytesRef encodeCentroidsAndCounts(List centroids, List centroids = new ArrayList<>(); + List counts = new ArrayList<>(); + + s = s.replace("\\,", ","); + String[] values = s.substring(1, s.length() - 1).split(","); + for (int i = 0; i < values.length; i++) { + String v = values[i]; + var pair = v.split(":"); + String type = pair[0]; + String number = pair[1]; + switch (type) { + case "min", "\"min\"": + min = Double.parseDouble(number); + break; + case "max", "\"max\"": + max = Double.parseDouble(number); + break; + case "sum", "\"sum\"": + sum = Double.parseDouble(number); + break; + case "value_count", "\"value_count\"": + count = Integer.parseInt(number); + break; + case "centroids", "\"centroids\"": + if (number.startsWith("[") == false) { + throw new IllegalArgumentException("Expected a list of numbers, got [" + number + "]"); + } + number = number.substring(1, number.length()); + while (number.endsWith("]") == false) { + centroids.add(Double.parseDouble(number)); + number = values[++i]; + } + number = number.substring(0, number.length() - 1); + centroids.add(Double.parseDouble(number)); + break; + case "counts", "\"counts\"": + if (number.startsWith("[") == false) { + throw new IllegalArgumentException("Expected a list of numbers, got [" + number + "]"); + } + number = number.substring(1, number.length()); + while (number.endsWith("]") == false) { + counts.add(Long.parseLong(number)); + number = values[++i]; + } + number = number.substring(0, number.length() - 1); + counts.add(Long.parseLong(number)); + break; + default: + throw new IllegalArgumentException("Received unexpected subfield: [" + type + "] with value: [" + number + "]"); + } + } + + TDigestHolder returnValue = null; + try { + returnValue = new TDigestHolder(centroids, counts, min, max, sum, count); } catch (IOException e) { - throw new IllegalArgumentException(e); + ESTestCase.fail(e); } + return returnValue; } } From 7ea10737e772f3f5ff4cbc26f72a42689dd0bc04 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Wed, 26 Nov 2025 15:36:10 -0500 Subject: [PATCH 12/36] better parsing solution, thanks Dima! --- .../compute/data/TDigestArrayBlock.java | 2 +- .../xpack/esql/CsvTestUtils.java | 73 +++---------------- 2 files changed, 13 insertions(+), 62 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java index 963b1b3d3de6b..e2499ff42ae2f 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java @@ -73,7 +73,7 @@ public int getValueCount(int position) { @Override public ElementType elementType() { - throw new UnsupportedOperationException("Need to implement this later"); + return ElementType.TDIGEST; } @Override diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java index 6607f4b726f57..cb45ce438cb6b 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java @@ -729,69 +729,20 @@ private static TDigestHolder parseTDigest(@Nullable String json) { if (json == null) { return null; } - String s = json; - Double min = 0.0; - Double max = 0.0; - Double sum = 0.0; - Integer count = 0; - List centroids = new ArrayList<>(); - List counts = new ArrayList<>(); - - s = s.replace("\\,", ","); - String[] values = s.substring(1, s.length() - 1).split(","); - for (int i = 0; i < values.length; i++) { - String v = values[i]; - var pair = v.split(":"); - String type = pair[0]; - String number = pair[1]; - switch (type) { - case "min", "\"min\"": - min = Double.parseDouble(number); - break; - case "max", "\"max\"": - max = Double.parseDouble(number); - break; - case "sum", "\"sum\"": - sum = Double.parseDouble(number); - break; - case "value_count", "\"value_count\"": - count = Integer.parseInt(number); - break; - case "centroids", "\"centroids\"": - if (number.startsWith("[") == false) { - throw new IllegalArgumentException("Expected a list of numbers, got [" + number + "]"); - } - number = number.substring(1, number.length()); - while (number.endsWith("]") == false) { - centroids.add(Double.parseDouble(number)); - number = values[++i]; - } - number = number.substring(0, number.length() - 1); - centroids.add(Double.parseDouble(number)); - break; - case "counts", "\"counts\"": - if (number.startsWith("[") == false) { - throw new IllegalArgumentException("Expected a list of numbers, got [" + number + "]"); - } - number = number.substring(1, number.length()); - while (number.endsWith("]") == false) { - counts.add(Long.parseLong(number)); - number = values[++i]; - } - number = number.substring(0, number.length() - 1); - counts.add(Long.parseLong(number)); - break; - default: - throw new IllegalArgumentException("Received unexpected subfield: [" + type + "] with value: [" + number + "]"); + try (XContentParser parser = JsonXContent.jsonXContent.createParser(XContentParserConfiguration.EMPTY, json)) { + if (parser.nextToken() != XContentParser.Token.START_OBJECT) { + throw new IllegalArgumentException("Expected START_OBJECT but found: " + parser.currentToken()); } - } - - TDigestHolder returnValue = null; - try { - returnValue = new TDigestHolder(centroids, counts, min, max, sum, count); + parser.nextToken(); + TDigestParser.ParsedTDigest parsed = TDigestParser.parse( + "field from test data", + parser, + DocumentParsingException::new, + XContentParserUtils::parsingException + ); + return new TDigestHolder(parsed.centroids(), parsed.counts(), parsed.min(), parsed.max(), parsed.sum(), parsed.count()); } catch (IOException e) { - ESTestCase.fail(e); + throw new IllegalArgumentException(e); } - return returnValue; } } From bd72b1a3198cf0d7d2132c1d42d8900165737002 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Wed, 26 Nov 2025 16:08:05 -0500 Subject: [PATCH 13/36] actually implement building the block --- .../compute/data/TDigestBlockBuilder.java | 32 +++++++++++++++---- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java index 8fea5b2c1f5f3..d4ffbb6bb5a26 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java @@ -101,37 +101,55 @@ public long estimatedBytes() { @Override public TDigestBlock build() { - return null; + DoubleBlock minima = null; + DoubleBlock maxima = null; + DoubleBlock sums = null; + LongBlock valueCounts = null; + BytesRefBlock encodedDigests = null; + boolean success = false; + try { + minima = minimaBuilder.build(); + maxima = maximaBuilder.build(); + sums = sumsBuilder.build(); + valueCounts = valueCountsBuilder.build(); + encodedDigests = encodedDigestsBuilder.build(); + success = true; + return new TDigestArrayBlock(encodedDigests, minima, maxima, sums, valueCounts); + } finally { + if (success == false) { + Releasables.close(minima, maxima, sums, valueCounts, encodedDigests); + } + } } @Override public BlockLoader.DoubleBuilder minima() { - return null; + throw new UnsupportedOperationException(); } @Override public BlockLoader.DoubleBuilder maxima() { - return null; + throw new UnsupportedOperationException(); } @Override public BlockLoader.DoubleBuilder sums() { - return null; + throw new UnsupportedOperationException(); } @Override public BlockLoader.LongBuilder valueCounts() { - return null; + throw new UnsupportedOperationException(); } @Override public BlockLoader.BytesRefBuilder encodedDigests() { - return null; + throw new UnsupportedOperationException(); } @Override public void close() { - + Releasables.close(encodedDigestsBuilder, minimaBuilder, maximaBuilder, sumsBuilder, valueCountsBuilder); } public void append(TDigestHolder val) { From f0a629e0365c21527925c6a352f58d23eaf6c681 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 26 Nov 2025 21:16:09 +0000 Subject: [PATCH 14/36] [CI] Auto commit changes from spotless --- .../src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java | 1 - 1 file changed, 1 deletion(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java index cb45ce438cb6b..7e58a5742d7e2 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java @@ -38,7 +38,6 @@ import org.elasticsearch.logging.Logger; import org.elasticsearch.search.aggregations.bucket.geogrid.GeoTileUtils; import org.elasticsearch.tdigest.parsing.TDigestParser; -import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.VersionUtils; import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.XContentParserConfiguration; From b89bcef5c213b60d007a64d0bc55556bdcc7486b Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Wed, 26 Nov 2025 16:19:14 -0500 Subject: [PATCH 15/36] turn no commits into todos --- .../main/java/org/elasticsearch/compute/data/BlockUtils.java | 2 +- .../java/org/elasticsearch/compute/data/TDigestHolder.java | 4 ++-- .../org/elasticsearch/xpack/esql/planner/AggregateMapper.java | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java index 4384750f0e0a3..0bfdc3910eced 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java @@ -321,7 +321,7 @@ yield new AggregateMetricDoubleLiteral( } case TDIGEST -> { TDigestBlock tDigestBlock = (TDigestBlock) block; - // NOCOMMIT - probably something more sensible here. We presumably need to account for this memory in some way + // TODO memory tracking? Or do we not care here because this is only called for literals? BytesRef scratch = new BytesRef(); yield tDigestBlock.getTDigestHolder(offset, scratch); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestHolder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestHolder.java index cf9a1993d874c..63ac22aaa5c66 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestHolder.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestHolder.java @@ -27,7 +27,7 @@ public class TDigestHolder { private final long valueCount; private final BytesRef encodedDigest; - // NOCOMMIT - Deal with the empty array case better + // TODO - Deal with the empty array case better public TDigestHolder(BytesRef encodedDigest, double min, double max, double sum, long valueCount) { this.encodedDigest = encodedDigest; this.min = min; @@ -63,7 +63,7 @@ public BytesRef getEncodedDigest() { return encodedDigest; } - // NOCOMMIT - compute these if they're not given? or do that at object creation time, maybe. + // TODO - compute these if they're not given? or do that at object creation time, maybe. public double getMax() { return max; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java index 6bcbbc86564ee..7c35cbc04aba1 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java @@ -113,7 +113,6 @@ private static DataType toDataType(ElementType elementType) { case DOUBLE -> DataType.DOUBLE; case DOC -> DataType.DOC_DATA_TYPE; case EXPONENTIAL_HISTOGRAM -> DataType.EXPONENTIAL_HISTOGRAM; - // NOCOMMIT - add the data type here once it's wired up case FLOAT, NULL, COMPOSITE, AGGREGATE_METRIC_DOUBLE, TDIGEST, UNKNOWN -> throw new EsqlIllegalArgumentException( "unsupported agg type: " + elementType ); From 4e42f39ab0c83a028dbc4f7306419821d8076bac Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Mon, 1 Dec 2025 09:51:28 -0500 Subject: [PATCH 16/36] fix element type usages in tests --- .../org/elasticsearch/compute/data/VectorBuilderTests.java | 5 +++-- .../elasticsearch/compute/data/VectorFixedBuilderTests.java | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/VectorBuilderTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/VectorBuilderTests.java index 91ec105f20091..7c63bcf8b8ce9 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/VectorBuilderTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/VectorBuilderTests.java @@ -35,6 +35,7 @@ public static List params() { || e == ElementType.DOC || e == ElementType.COMPOSITE || e == ElementType.AGGREGATE_METRIC_DOUBLE + || e == ElementType.TDIGEST || e == ElementType.EXPONENTIAL_HISTOGRAM) { continue; } @@ -119,7 +120,7 @@ public void testCranky() { private Vector.Builder vectorBuilder(int estimatedSize, BlockFactory blockFactory) { return switch (elementType) { - case NULL, DOC, COMPOSITE, AGGREGATE_METRIC_DOUBLE, EXPONENTIAL_HISTOGRAM, UNKNOWN -> throw new UnsupportedOperationException(); + case NULL, DOC, COMPOSITE, AGGREGATE_METRIC_DOUBLE, EXPONENTIAL_HISTOGRAM, TDIGEST, UNKNOWN -> throw new UnsupportedOperationException(); case BOOLEAN -> blockFactory.newBooleanVectorBuilder(estimatedSize); case BYTES_REF -> blockFactory.newBytesRefVectorBuilder(estimatedSize); case FLOAT -> blockFactory.newFloatVectorBuilder(estimatedSize); @@ -131,7 +132,7 @@ private Vector.Builder vectorBuilder(int estimatedSize, BlockFactory blockFactor private void fill(Vector.Builder builder, Vector from) { switch (elementType) { - case NULL, DOC, COMPOSITE, AGGREGATE_METRIC_DOUBLE, UNKNOWN -> throw new UnsupportedOperationException(); + case NULL, DOC, COMPOSITE, AGGREGATE_METRIC_DOUBLE, TDIGEST, UNKNOWN -> throw new UnsupportedOperationException(); case BOOLEAN -> { for (int p = 0; p < from.getPositionCount(); p++) { ((BooleanVector.Builder) builder).appendBoolean(((BooleanVector) from).getBoolean(p)); diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/VectorFixedBuilderTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/VectorFixedBuilderTests.java index b72ee46c4b828..e06074c0d2979 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/VectorFixedBuilderTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/VectorFixedBuilderTests.java @@ -35,6 +35,7 @@ public static List params() { || elementType == ElementType.DOC || elementType == ElementType.BYTES_REF || elementType == ElementType.AGGREGATE_METRIC_DOUBLE + || elementType == ElementType.TDIGEST || elementType == ElementType.EXPONENTIAL_HISTOGRAM) { continue; } @@ -119,7 +120,7 @@ public void testCranky() { private Vector.Builder vectorBuilder(int size, BlockFactory blockFactory) { return switch (elementType) { - case NULL, BYTES_REF, DOC, COMPOSITE, AGGREGATE_METRIC_DOUBLE, EXPONENTIAL_HISTOGRAM, UNKNOWN -> + case NULL, BYTES_REF, DOC, COMPOSITE, AGGREGATE_METRIC_DOUBLE, EXPONENTIAL_HISTOGRAM, TDIGEST, UNKNOWN -> throw new UnsupportedOperationException(); case BOOLEAN -> blockFactory.newBooleanVectorFixedBuilder(size); case DOUBLE -> blockFactory.newDoubleVectorFixedBuilder(size); @@ -131,7 +132,7 @@ private Vector.Builder vectorBuilder(int size, BlockFactory blockFactory) { private void fill(Vector.Builder builder, Vector from) { switch (elementType) { - case NULL, DOC, COMPOSITE, AGGREGATE_METRIC_DOUBLE, EXPONENTIAL_HISTOGRAM, UNKNOWN -> throw new UnsupportedOperationException(); + case NULL, DOC, COMPOSITE, AGGREGATE_METRIC_DOUBLE, EXPONENTIAL_HISTOGRAM, TDIGEST, UNKNOWN -> throw new UnsupportedOperationException(); case BOOLEAN -> { for (int p = 0; p < from.getPositionCount(); p++) { ((BooleanVector.FixedBuilder) builder).appendBoolean(((BooleanVector) from).getBoolean(p)); From b76505f3f1d81704ceb005559b5fe042a3bcceb6 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Mon, 1 Dec 2025 11:23:50 -0500 Subject: [PATCH 17/36] Don't load tdigest fields in CSV tests where it's not supported --- .../rest/generative/GenerativeRestTest.java | 13 +++++-- .../xpack/esql/CsvTestsDataLoader.java | 38 ++++++++++++++++--- 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java index c5e81a42e0616..060bd99f7d940 100644 --- a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java +++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java @@ -12,6 +12,7 @@ import org.elasticsearch.test.rest.ESRestTestCase; import org.elasticsearch.xpack.esql.AssertWarnings; import org.elasticsearch.xpack.esql.CsvTestsDataLoader; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; import org.elasticsearch.xpack.esql.generator.Column; import org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator; import org.elasticsearch.xpack.esql.generator.LookupIdx; @@ -267,10 +268,14 @@ private static List originalTypes(Map x) { } private List availableIndices() throws IOException { - return availableDatasetsForEs(true, supportsSourceFieldMapping(), false, requiresTimeSeries(), false).stream() - .filter(x -> x.requiresInferenceEndpoint() == false) - .map(x -> x.indexName()) - .toList(); + return availableDatasetsForEs( + true, + supportsSourceFieldMapping(), + false, + requiresTimeSeries(), + false, + EsqlCapabilities.Cap.TDIGEST_FIELD_TYPE_BASIC_FUNCTIONALITY.isEnabled() + ).stream().filter(x -> x.requiresInferenceEndpoint() == false).map(x -> x.indexName()).toList(); } private List lookupIndices() { diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index 82e42f308c58b..1fc89ca3ce35f 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -32,6 +32,7 @@ import org.elasticsearch.logging.Logger; import org.elasticsearch.test.rest.ESRestTestCase; import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; import java.io.BufferedReader; import java.io.IOException; @@ -341,7 +342,7 @@ public static void main(String[] args) throws IOException { } try (RestClient client = builder.build()) { - loadDataSetIntoEs(client, true, true, false, false, true, (restClient, indexName, indexMapping, indexSettings) -> { + loadDataSetIntoEs(client, true, true, false, false, true, EsqlCapabilities.Cap.TDIGEST_FIELD_TYPE_BASIC_FUNCTIONALITY.isEnabled(), (restClient, indexName, indexMapping, indexSettings) -> { // don't use ESRestTestCase methods here or, if you do, test running the main method before making the change StringBuilder jsonBody = new StringBuilder("{"); if (indexSettings != null && indexSettings.isEmpty() == false) { @@ -365,7 +366,8 @@ public static Set availableDatasetsForEs( boolean supportsSourceFieldMapping, boolean inferenceEnabled, boolean requiresTimeSeries, - boolean exponentialHistogramFieldSupported + boolean exponentialHistogramFieldSupported, + boolean tDigestFieldSupported ) throws IOException { Set testDataSets = new HashSet<>(); @@ -374,7 +376,8 @@ public static Set availableDatasetsForEs( && (supportsIndexModeLookup || isLookupDataset(dataset) == false) && (supportsSourceFieldMapping || isSourceMappingDataset(dataset) == false) && (requiresTimeSeries == false || isTimeSeries(dataset)) - && (exponentialHistogramFieldSupported || containsExponentialHistogramFields(dataset) == false)) { + && (exponentialHistogramFieldSupported || containsExponentialHistogramFields(dataset) == false) + && (tDigestFieldSupported || containsTDigestFields(dataset) == false)) { testDataSets.add(dataset); } } @@ -419,6 +422,28 @@ private static boolean containsExponentialHistogramFields(TestDataset dataset) t return false; } + private static boolean containsTDigestFields(TestDataset dataset) throws IOException { + if (dataset.mappingFileName() == null) { + return false; + } + String mappingJsonText = readTextFile(getResource("/" + dataset.mappingFileName())); + JsonNode mappingNode = new ObjectMapper().readTree(mappingJsonText); + JsonNode properties = mappingNode.get("properties"); + if (properties != null) { + for (var fieldWithValue : properties.properties()) { + JsonNode fieldProperties = fieldWithValue.getValue(); + if (fieldProperties != null) { + JsonNode typeNode = fieldProperties.get("type"); + if (typeNode != null && typeNode.asText().equals("tdigest")) { + return true; + } + } + } + } + return false; + } + + private static boolean isTimeSeries(TestDataset dataset) throws IOException { Settings settings = dataset.readSettingsFile(); String mode = settings.get("index.mode"); @@ -449,7 +474,7 @@ public static void loadDataSetIntoEs( inferenceEnabled, timeSeriesOnly, exponentialHistogramFieldSupported, - (restClient, indexName, indexMapping, indexSettings) -> { + EsqlCapabilities.Cap.TDIGEST_FIELD_TYPE_BASIC_FUNCTIONALITY.isEnabled(), (restClient, indexName, indexMapping, indexSettings) -> { ESRestTestCase.createIndex(restClient, indexName, indexSettings, indexMapping, null); } ); @@ -462,7 +487,7 @@ private static void loadDataSetIntoEs( boolean inferenceEnabled, boolean timeSeriesOnly, boolean exponentialHistogramFieldSupported, - IndexCreator indexCreator + boolean tDigestFieldSupported, IndexCreator indexCreator ) throws IOException { Logger logger = LogManager.getLogger(CsvTestsDataLoader.class); @@ -473,7 +498,8 @@ private static void loadDataSetIntoEs( supportsSourceFieldMapping, inferenceEnabled, timeSeriesOnly, - exponentialHistogramFieldSupported + exponentialHistogramFieldSupported, + tDigestFieldSupported )) { load(client, dataset, logger, indexCreator); loadedDatasets.add(dataset.indexName); From 3175b411e0baefe57231225ec5b0f691f1edf360 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Mon, 1 Dec 2025 11:31:08 -0500 Subject: [PATCH 18/36] fix license headers --- .../tdigest/parsing/TDigestParser.java | 24 ++++++++++++++----- .../tdigest/parsing/package-info.java | 24 ++++++++++++++----- 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/libs/tdigest/src/main/java/org/elasticsearch/tdigest/parsing/TDigestParser.java b/libs/tdigest/src/main/java/org/elasticsearch/tdigest/parsing/TDigestParser.java index ccdee289cf523..b884539373d30 100644 --- a/libs/tdigest/src/main/java/org/elasticsearch/tdigest/parsing/TDigestParser.java +++ b/libs/tdigest/src/main/java/org/elasticsearch/tdigest/parsing/TDigestParser.java @@ -1,10 +1,22 @@ /* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * This project is based on a modification of https://github.com/tdunning/t-digest which is licensed under the Apache 2.0 License. */ package org.elasticsearch.tdigest.parsing; diff --git a/libs/tdigest/src/main/java/org/elasticsearch/tdigest/parsing/package-info.java b/libs/tdigest/src/main/java/org/elasticsearch/tdigest/parsing/package-info.java index 3ddf75a66f584..83e31f73e66f4 100644 --- a/libs/tdigest/src/main/java/org/elasticsearch/tdigest/parsing/package-info.java +++ b/libs/tdigest/src/main/java/org/elasticsearch/tdigest/parsing/package-info.java @@ -1,10 +1,22 @@ /* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * This project is based on a modification of https://github.com/tdunning/t-digest which is licensed under the Apache 2.0 License. */ /** From 85c341754e2e9c4ec6cf267635e9b778a70ce2df Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Mon, 1 Dec 2025 11:55:34 -0500 Subject: [PATCH 19/36] minor cleanup --- .../main/java/org/elasticsearch/compute/data/BlockUtils.java | 4 +--- .../java/org/elasticsearch/compute/test/BlockTestUtils.java | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java index 0bfdc3910eced..4a76bcecc913f 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java @@ -221,8 +221,6 @@ public static void appendValue(Block.Builder builder, Object val, ElementType ty switch (type) { case LONG -> ((LongBlock.Builder) builder).appendLong((Long) val); case INT -> ((IntBlock.Builder) builder).appendInt((Integer) val); - case NULL -> { - } case BYTES_REF -> ((BytesRefBlock.Builder) builder).appendBytesRef(toBytesRef(val)); case FLOAT -> ((FloatBlock.Builder) builder).appendFloat((Float) val); case DOUBLE -> ((DoubleBlock.Builder) builder).appendDouble((Double) val); @@ -230,7 +228,7 @@ public static void appendValue(Block.Builder builder, Object val, ElementType ty case TDIGEST -> ((TDigestBlockBuilder) builder).append((TDigestHolder) val); case AGGREGATE_METRIC_DOUBLE -> ((AggregateMetricDoubleBlockBuilder) builder).appendLiteral((AggregateMetricDoubleLiteral) val); case EXPONENTIAL_HISTOGRAM -> ((ExponentialHistogramBlockBuilder) builder).append((ExponentialHistogram) val); - case DOC, COMPOSITE, UNKNOWN -> throw new UnsupportedOperationException("unsupported element type [" + type + "]"); + case DOC, COMPOSITE, NULL, UNKNOWN -> throw new UnsupportedOperationException("unsupported element type [" + type + "]"); } } diff --git a/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/BlockTestUtils.java b/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/BlockTestUtils.java index a3ed5bb38551b..08d79b31b48b8 100644 --- a/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/BlockTestUtils.java +++ b/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/BlockTestUtils.java @@ -424,7 +424,6 @@ public static ExponentialHistogram randomExponentialHistogram() { public static TDigestHolder randomTDigest() { // TODO: This is mostly copied from TDigestFieldMapperTests; refactor it. - Map value = new LinkedHashMap<>(); int size = between(1, 100); // Note - we use TDigestState to build an actual t-digest for realistic values here TDigestState digest = TDigestState.createWithoutCircuitBreaking(100); From 3f972eab3c54a38c9590eda1b3ef98a26bdf2e3b Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Mon, 1 Dec 2025 12:58:29 -0500 Subject: [PATCH 20/36] PR feedback --- .../main/java/org/elasticsearch/compute/data/BlockUtils.java | 4 +--- .../org/elasticsearch/compute/data/ConstantNullBlock.java | 2 +- .../org/elasticsearch/compute/data/TDigestArrayBlock.java | 5 +++-- .../java/org/elasticsearch/compute/data/TDigestBlock.java | 3 +-- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java index 4a76bcecc913f..b3f38bbdb00aa 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java @@ -319,9 +319,7 @@ yield new AggregateMetricDoubleLiteral( } case TDIGEST -> { TDigestBlock tDigestBlock = (TDigestBlock) block; - // TODO memory tracking? Or do we not care here because this is only called for literals? - BytesRef scratch = new BytesRef(); - yield tDigestBlock.getTDigestHolder(offset, scratch); + yield tDigestBlock.getTDigestHolder(offset); } case UNKNOWN -> throw new IllegalArgumentException("can't read values from [" + block + "]"); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java index 706b5e48b8f0f..0f8b19312375b 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java @@ -302,7 +302,7 @@ public ExponentialHistogram getExponentialHistogram(int valueIndex, ExponentialH } @Override - public TDigestHolder getTDigestHolder(int valueIndex, BytesRef scratch) { + public TDigestHolder getTDigestHolder(int valueIndex) { assert false : "null block"; throw new UnsupportedOperationException("null block"); } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java index a35b831233174..e649310fe12b3 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java @@ -259,9 +259,10 @@ public long ramBytesUsed() { } @Override - public TDigestHolder getTDigestHolder(int offset, BytesRef scratch) { + public TDigestHolder getTDigestHolder(int offset) { return new TDigestHolder( - encodedDigests.getBytesRef(offset, scratch), + // TODO: Memory tracking? creating a new bytes ref here doesn't seem great + encodedDigests.getBytesRef(offset, new BytesRef()), minima.getDouble(offset), maxima.getDouble(offset), sums.getDouble(offset), diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlock.java index 695a742e4a1df..1ab231a9ca3ea 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlock.java @@ -7,7 +7,6 @@ package org.elasticsearch.compute.data; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.index.mapper.BlockLoader; public sealed interface TDigestBlock extends Block permits ConstantNullBlock, TDigestArrayBlock { @@ -26,5 +25,5 @@ sealed interface Builder extends Block.Builder, BlockLoader.TDigestBuilder permi TDigestBlock build(); } - TDigestHolder getTDigestHolder(int offset, BytesRef scratch); + TDigestHolder getTDigestHolder(int offset); } From adb31060b904bc6d5cf796462960101a5f56fbaf Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Mon, 1 Dec 2025 13:09:49 -0500 Subject: [PATCH 21/36] test invariants in the array block --- .../compute/data/TDigestArrayBlock.java | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java index e649310fe12b3..be7bc627d33fe 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java @@ -35,6 +35,38 @@ public TDigestArrayBlock( this.maxima = maxima; this.sums = sums; this.valueCounts = valueCounts; + assertInvariants(); + } + + + private void assertInvariants() { + for (Block b : getSubBlocks()) { + assert b.isReleased() == false; + assert b.doesHaveMultivaluedFields() == false + : "TDigestArrayBlock sub-blocks can't have multi-values but [" + b + "] does"; + assert b.getPositionCount() == getPositionCount() + : "TDigestArrayBlock sub-blocks must have the same position count but [" + + b + + "] has " + + b.getPositionCount() + + " instead of " + + getPositionCount(); + for (int i = 0; i < b.getPositionCount(); i++) { + if (isNull(i)) { + assert b.isNull(i) + : "TDigestArrayBlock sub-block [" + b + "] should be null at position " + i + ", but was not"; + } else { + if (b == sums || b == minima || b == maxima) { + // sums / minima / maxima should be null exactly when value count is 0 or the histogram is null + assert b.isNull(i) == (valueCounts.getLong(valueCounts.getFirstValueIndex(i)) == 0) + : "TDigestArrayBlock sums/minima/maxima sub-block [" + b + "] has wrong nullity at position " + i; + } else { + assert b.isNull(i) == false + : "TDigestArrayBlock sub-block [" + b + "] should be non-null at position " + i + ", but was not"; + } + } + } + } } private List getSubBlocks() { From de3eabf5ee89a5313fe6d7ed38163811ee20275b Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 1 Dec 2025 18:18:19 +0000 Subject: [PATCH 22/36] [CI] Auto commit changes from spotless --- .../compute/data/TDigestArrayBlock.java | 17 +++---- .../compute/data/VectorBuilderTests.java | 3 +- .../compute/data/VectorFixedBuilderTests.java | 3 +- .../compute/test/BlockTestUtils.java | 1 - .../xpack/esql/CsvTestsDataLoader.java | 46 +++++++++++-------- 5 files changed, 39 insertions(+), 31 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java index be7bc627d33fe..4dec4b3a6a8c9 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java @@ -38,23 +38,20 @@ public TDigestArrayBlock( assertInvariants(); } - private void assertInvariants() { for (Block b : getSubBlocks()) { assert b.isReleased() == false; - assert b.doesHaveMultivaluedFields() == false - : "TDigestArrayBlock sub-blocks can't have multi-values but [" + b + "] does"; + assert b.doesHaveMultivaluedFields() == false : "TDigestArrayBlock sub-blocks can't have multi-values but [" + b + "] does"; assert b.getPositionCount() == getPositionCount() : "TDigestArrayBlock sub-blocks must have the same position count but [" - + b - + "] has " - + b.getPositionCount() - + " instead of " - + getPositionCount(); + + b + + "] has " + + b.getPositionCount() + + " instead of " + + getPositionCount(); for (int i = 0; i < b.getPositionCount(); i++) { if (isNull(i)) { - assert b.isNull(i) - : "TDigestArrayBlock sub-block [" + b + "] should be null at position " + i + ", but was not"; + assert b.isNull(i) : "TDigestArrayBlock sub-block [" + b + "] should be null at position " + i + ", but was not"; } else { if (b == sums || b == minima || b == maxima) { // sums / minima / maxima should be null exactly when value count is 0 or the histogram is null diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/VectorBuilderTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/VectorBuilderTests.java index 7c63bcf8b8ce9..5df8330034ed2 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/VectorBuilderTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/VectorBuilderTests.java @@ -120,7 +120,8 @@ public void testCranky() { private Vector.Builder vectorBuilder(int estimatedSize, BlockFactory blockFactory) { return switch (elementType) { - case NULL, DOC, COMPOSITE, AGGREGATE_METRIC_DOUBLE, EXPONENTIAL_HISTOGRAM, TDIGEST, UNKNOWN -> throw new UnsupportedOperationException(); + case NULL, DOC, COMPOSITE, AGGREGATE_METRIC_DOUBLE, EXPONENTIAL_HISTOGRAM, TDIGEST, UNKNOWN -> + throw new UnsupportedOperationException(); case BOOLEAN -> blockFactory.newBooleanVectorBuilder(estimatedSize); case BYTES_REF -> blockFactory.newBytesRefVectorBuilder(estimatedSize); case FLOAT -> blockFactory.newFloatVectorBuilder(estimatedSize); diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/VectorFixedBuilderTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/VectorFixedBuilderTests.java index e06074c0d2979..5c49f9e1913fa 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/VectorFixedBuilderTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/VectorFixedBuilderTests.java @@ -132,7 +132,8 @@ private Vector.Builder vectorBuilder(int size, BlockFactory blockFactory) { private void fill(Vector.Builder builder, Vector from) { switch (elementType) { - case NULL, DOC, COMPOSITE, AGGREGATE_METRIC_DOUBLE, EXPONENTIAL_HISTOGRAM, TDIGEST, UNKNOWN -> throw new UnsupportedOperationException(); + case NULL, DOC, COMPOSITE, AGGREGATE_METRIC_DOUBLE, EXPONENTIAL_HISTOGRAM, TDIGEST, UNKNOWN -> + throw new UnsupportedOperationException(); case BOOLEAN -> { for (int p = 0; p < from.getPositionCount(); p++) { ((BooleanVector.FixedBuilder) builder).appendBoolean(((BooleanVector) from).getBoolean(p)); diff --git a/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/BlockTestUtils.java b/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/BlockTestUtils.java index 08d79b31b48b8..016455f9c9d56 100644 --- a/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/BlockTestUtils.java +++ b/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/BlockTestUtils.java @@ -42,7 +42,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.stream.DoubleStream; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index 1fc89ca3ce35f..a03b7c7bca61d 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -342,22 +342,31 @@ public static void main(String[] args) throws IOException { } try (RestClient client = builder.build()) { - loadDataSetIntoEs(client, true, true, false, false, true, EsqlCapabilities.Cap.TDIGEST_FIELD_TYPE_BASIC_FUNCTIONALITY.isEnabled(), (restClient, indexName, indexMapping, indexSettings) -> { - // don't use ESRestTestCase methods here or, if you do, test running the main method before making the change - StringBuilder jsonBody = new StringBuilder("{"); - if (indexSettings != null && indexSettings.isEmpty() == false) { - jsonBody.append("\"settings\":"); - jsonBody.append(Strings.toString(indexSettings)); - jsonBody.append(","); + loadDataSetIntoEs( + client, + true, + true, + false, + false, + true, + EsqlCapabilities.Cap.TDIGEST_FIELD_TYPE_BASIC_FUNCTIONALITY.isEnabled(), + (restClient, indexName, indexMapping, indexSettings) -> { + // don't use ESRestTestCase methods here or, if you do, test running the main method before making the change + StringBuilder jsonBody = new StringBuilder("{"); + if (indexSettings != null && indexSettings.isEmpty() == false) { + jsonBody.append("\"settings\":"); + jsonBody.append(Strings.toString(indexSettings)); + jsonBody.append(","); + } + jsonBody.append("\"mappings\":"); + jsonBody.append(indexMapping); + jsonBody.append("}"); + + Request request = new Request("PUT", "/" + indexName); + request.setJsonEntity(jsonBody.toString()); + restClient.performRequest(request); } - jsonBody.append("\"mappings\":"); - jsonBody.append(indexMapping); - jsonBody.append("}"); - - Request request = new Request("PUT", "/" + indexName); - request.setJsonEntity(jsonBody.toString()); - restClient.performRequest(request); - }); + ); } } @@ -443,7 +452,6 @@ private static boolean containsTDigestFields(TestDataset dataset) throws IOExcep return false; } - private static boolean isTimeSeries(TestDataset dataset) throws IOException { Settings settings = dataset.readSettingsFile(); String mode = settings.get("index.mode"); @@ -474,7 +482,8 @@ public static void loadDataSetIntoEs( inferenceEnabled, timeSeriesOnly, exponentialHistogramFieldSupported, - EsqlCapabilities.Cap.TDIGEST_FIELD_TYPE_BASIC_FUNCTIONALITY.isEnabled(), (restClient, indexName, indexMapping, indexSettings) -> { + EsqlCapabilities.Cap.TDIGEST_FIELD_TYPE_BASIC_FUNCTIONALITY.isEnabled(), + (restClient, indexName, indexMapping, indexSettings) -> { ESRestTestCase.createIndex(restClient, indexName, indexSettings, indexMapping, null); } ); @@ -487,7 +496,8 @@ private static void loadDataSetIntoEs( boolean inferenceEnabled, boolean timeSeriesOnly, boolean exponentialHistogramFieldSupported, - boolean tDigestFieldSupported, IndexCreator indexCreator + boolean tDigestFieldSupported, + IndexCreator indexCreator ) throws IOException { Logger logger = LogManager.getLogger(CsvTestsDataLoader.class); From ac22a037a87abeeca35ccc7d5a1f7bf4c6072da5 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Mon, 1 Dec 2025 13:49:08 -0500 Subject: [PATCH 23/36] just do what ExponentialHistogram does --- .../xpack/esql/qa/rest/generative/GenerativeRestTest.java | 2 +- .../java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java index 060bd99f7d940..4efbd20e08c80 100644 --- a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java +++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java @@ -274,7 +274,7 @@ private List availableIndices() throws IOException { false, requiresTimeSeries(), false, - EsqlCapabilities.Cap.TDIGEST_FIELD_TYPE_BASIC_FUNCTIONALITY.isEnabled() + false ).stream().filter(x -> x.requiresInferenceEndpoint() == false).map(x -> x.indexName()).toList(); } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index a03b7c7bca61d..0060daaca1eeb 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -349,7 +349,7 @@ public static void main(String[] args) throws IOException { false, false, true, - EsqlCapabilities.Cap.TDIGEST_FIELD_TYPE_BASIC_FUNCTIONALITY.isEnabled(), + true, (restClient, indexName, indexMapping, indexSettings) -> { // don't use ESRestTestCase methods here or, if you do, test running the main method before making the change StringBuilder jsonBody = new StringBuilder("{"); From 37fc1fcd62acfb8b792e0652e36ee779a8e5fc64 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 1 Dec 2025 18:57:43 +0000 Subject: [PATCH 24/36] [CI] Auto commit changes from spotless --- .../rest/generative/GenerativeRestTest.java | 13 ++----- .../xpack/esql/CsvTestsDataLoader.java | 39 +++++++------------ 2 files changed, 19 insertions(+), 33 deletions(-) diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java index 4efbd20e08c80..24a9c88a026fe 100644 --- a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java +++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java @@ -12,7 +12,6 @@ import org.elasticsearch.test.rest.ESRestTestCase; import org.elasticsearch.xpack.esql.AssertWarnings; import org.elasticsearch.xpack.esql.CsvTestsDataLoader; -import org.elasticsearch.xpack.esql.action.EsqlCapabilities; import org.elasticsearch.xpack.esql.generator.Column; import org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator; import org.elasticsearch.xpack.esql.generator.LookupIdx; @@ -268,14 +267,10 @@ private static List originalTypes(Map x) { } private List availableIndices() throws IOException { - return availableDatasetsForEs( - true, - supportsSourceFieldMapping(), - false, - requiresTimeSeries(), - false, - false - ).stream().filter(x -> x.requiresInferenceEndpoint() == false).map(x -> x.indexName()).toList(); + return availableDatasetsForEs(true, supportsSourceFieldMapping(), false, requiresTimeSeries(), false, false).stream() + .filter(x -> x.requiresInferenceEndpoint() == false) + .map(x -> x.indexName()) + .toList(); } private List lookupIndices() { diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index 0060daaca1eeb..021dcc923d467 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -342,31 +342,22 @@ public static void main(String[] args) throws IOException { } try (RestClient client = builder.build()) { - loadDataSetIntoEs( - client, - true, - true, - false, - false, - true, - true, - (restClient, indexName, indexMapping, indexSettings) -> { - // don't use ESRestTestCase methods here or, if you do, test running the main method before making the change - StringBuilder jsonBody = new StringBuilder("{"); - if (indexSettings != null && indexSettings.isEmpty() == false) { - jsonBody.append("\"settings\":"); - jsonBody.append(Strings.toString(indexSettings)); - jsonBody.append(","); - } - jsonBody.append("\"mappings\":"); - jsonBody.append(indexMapping); - jsonBody.append("}"); - - Request request = new Request("PUT", "/" + indexName); - request.setJsonEntity(jsonBody.toString()); - restClient.performRequest(request); + loadDataSetIntoEs(client, true, true, false, false, true, true, (restClient, indexName, indexMapping, indexSettings) -> { + // don't use ESRestTestCase methods here or, if you do, test running the main method before making the change + StringBuilder jsonBody = new StringBuilder("{"); + if (indexSettings != null && indexSettings.isEmpty() == false) { + jsonBody.append("\"settings\":"); + jsonBody.append(Strings.toString(indexSettings)); + jsonBody.append(","); } - ); + jsonBody.append("\"mappings\":"); + jsonBody.append(indexMapping); + jsonBody.append("}"); + + Request request = new Request("PUT", "/" + indexName); + request.setJsonEntity(jsonBody.toString()); + restClient.performRequest(request); + }); } } From d2c7fc138015f8b72a8cd9997e9959b752ad1f8c Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Mon, 1 Dec 2025 14:54:23 -0500 Subject: [PATCH 25/36] gate other versions of CSV tests on the capabilities --- .../xpack/esql/ccq/MultiClusterSpecIT.java | 16 ++++++++++++++++ .../xpack/esql/qa/single_node/EsqlSpecIT.java | 8 ++++++++ .../xpack/esql/qa/rest/EsqlSpecTestCase.java | 10 +++++++++- .../xpack/esql/CsvTestsDataLoader.java | 7 ++++--- 4 files changed, 37 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java index 8bcc45c3ff406..083f52341124d 100644 --- a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java +++ b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java @@ -372,4 +372,20 @@ protected boolean supportsExponentialHistograms() { throw new RuntimeException(e); } } + + @Override + protected boolean supportsTDigestField() { + try { + return RestEsqlTestCase.hasCapabilities( + client(), + List.of(EsqlCapabilities.Cap.TDIGEST_FIELD_TYPE_BASIC_FUNCTIONALITY.capabilityName()) + ) + && RestEsqlTestCase.hasCapabilities( + remoteClusterClient(), + List.of(EsqlCapabilities.Cap.TDIGEST_FIELD_TYPE_BASIC_FUNCTIONALITY.capabilityName()) + ); + } catch (IOException e) { + throw new RuntimeException(e); + } + } } diff --git a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/EsqlSpecIT.java b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/EsqlSpecIT.java index 0469741b935b0..5e303e7382547 100644 --- a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/EsqlSpecIT.java +++ b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/EsqlSpecIT.java @@ -62,6 +62,14 @@ protected boolean supportsExponentialHistograms() { ); } + @Override + protected boolean supportsTDigestField() { + return RestEsqlTestCase.hasCapabilities( + client(), + List.of(EsqlCapabilities.Cap.TDIGEST_FIELD_TYPE_BASIC_FUNCTIONALITY.capabilityName()) + ); + } + @Before public void configureChunks() throws IOException { assumeTrue("test clusters were broken", testClustersOk); diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java index 40cca58a481f8..b18880968aa72 100644 --- a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java +++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java @@ -184,7 +184,8 @@ public void setup() { supportsSourceFieldMapping(), supportsInferenceTestService(), false, - supportsExponentialHistograms() + supportsExponentialHistograms(), + supportsTDigestField() ); return null; }); @@ -293,6 +294,13 @@ protected boolean supportsExponentialHistograms() { ); } + protected boolean supportsTDigestField() { + return RestEsqlTestCase.hasCapabilities( + client(), + List.of(EsqlCapabilities.Cap.TDIGEST_FIELD_TYPE_BASIC_FUNCTIONALITY.capabilityName()) + ); + } + protected void doTest() throws Throwable { doTest(testCase.query); } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index 0060daaca1eeb..a6acb37150285 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -464,7 +464,7 @@ public static void loadDataSetIntoEs( boolean supportsSourceFieldMapping, boolean inferenceEnabled ) throws IOException { - loadDataSetIntoEs(client, supportsIndexModeLookup, supportsSourceFieldMapping, inferenceEnabled, false, false); + loadDataSetIntoEs(client, supportsIndexModeLookup, supportsSourceFieldMapping, inferenceEnabled, false, false, false); } public static void loadDataSetIntoEs( @@ -473,7 +473,8 @@ public static void loadDataSetIntoEs( boolean supportsSourceFieldMapping, boolean inferenceEnabled, boolean timeSeriesOnly, - boolean exponentialHistogramFieldSupported + boolean exponentialHistogramFieldSupported, + boolean tDigestFieldSupported ) throws IOException { loadDataSetIntoEs( client, @@ -482,7 +483,7 @@ public static void loadDataSetIntoEs( inferenceEnabled, timeSeriesOnly, exponentialHistogramFieldSupported, - EsqlCapabilities.Cap.TDIGEST_FIELD_TYPE_BASIC_FUNCTIONALITY.isEnabled(), + tDigestFieldSupported, (restClient, indexName, indexMapping, indexSettings) -> { ESRestTestCase.createIndex(restClient, indexName, indexSettings, indexMapping, null); } From dd93cd54090eb043f698ce8aefd093153c014009 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Mon, 1 Dec 2025 15:39:48 -0500 Subject: [PATCH 26/36] skip TDigest in BlockBuilderCopyFrom tests --- .../compute/data/BlockBuilderCopyFromTests.java | 3 ++- .../org/elasticsearch/compute/test/RandomBlock.java | 10 +++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockBuilderCopyFromTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockBuilderCopyFromTests.java index 752250bce5643..f02b3c028aa58 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockBuilderCopyFromTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockBuilderCopyFromTests.java @@ -31,7 +31,8 @@ public static List params() { || e == ElementType.NULL || e == ElementType.DOC || e == ElementType.COMPOSITE - || e == ElementType.EXPONENTIAL_HISTOGRAM) { + || e == ElementType.EXPONENTIAL_HISTOGRAM + || e == ElementType.TDIGEST) { continue; } for (boolean nullAllowed : new boolean[] { false, true }) { diff --git a/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/RandomBlock.java b/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/RandomBlock.java index 8281a07aa4df2..eff939b4554fa 100644 --- a/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/RandomBlock.java +++ b/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/RandomBlock.java @@ -19,6 +19,8 @@ import org.elasticsearch.compute.data.FloatBlock; import org.elasticsearch.compute.data.IntBlock; import org.elasticsearch.compute.data.LongBlock; +import org.elasticsearch.compute.data.TDigestBlockBuilder; +import org.elasticsearch.compute.data.TDigestHolder; import org.elasticsearch.exponentialhistogram.ExponentialHistogram; import org.elasticsearch.geo.GeometryTestUtils; import org.elasticsearch.geo.ShapeTestUtils; @@ -89,7 +91,7 @@ public static RandomBlock randomBlock( ) { List> values = new ArrayList<>(); Block.MvOrdering mvOrdering = Block.MvOrdering.DEDUPLICATED_AND_SORTED_ASCENDING; - if (elementType == ElementType.EXPONENTIAL_HISTOGRAM) { + if (elementType == ElementType.EXPONENTIAL_HISTOGRAM || elementType == ElementType.TDIGEST) { // histograms do not support multi-values // TODO(b/133393) remove this when we support multi-values in exponential histogram blocks minValuesPerPosition = Math.min(1, minValuesPerPosition); @@ -172,6 +174,12 @@ public static RandomBlock randomBlock( b.append(histogram); valuesAtPosition.add(histogram); } + case TDIGEST -> { + TDigestBlockBuilder b = (TDigestBlockBuilder) builder; + TDigestHolder digest = BlockTestUtils.randomTDigest(); + b.append(digest); + valuesAtPosition.add(digest); + } default -> throw new IllegalArgumentException("unsupported element type [" + elementType + "]"); } } From ebe9dcfe38697530a76636d645d8bafbfdcf501f Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 1 Dec 2025 20:49:52 +0000 Subject: [PATCH 27/36] [CI] Auto commit changes from spotless --- .../elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java | 6 +++--- .../org/elasticsearch/xpack/esql/CsvTestsDataLoader.java | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java index 083f52341124d..31413ea06d5cc 100644 --- a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java +++ b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java @@ -381,9 +381,9 @@ protected boolean supportsTDigestField() { List.of(EsqlCapabilities.Cap.TDIGEST_FIELD_TYPE_BASIC_FUNCTIONALITY.capabilityName()) ) && RestEsqlTestCase.hasCapabilities( - remoteClusterClient(), - List.of(EsqlCapabilities.Cap.TDIGEST_FIELD_TYPE_BASIC_FUNCTIONALITY.capabilityName()) - ); + remoteClusterClient(), + List.of(EsqlCapabilities.Cap.TDIGEST_FIELD_TYPE_BASIC_FUNCTIONALITY.capabilityName()) + ); } catch (IOException e) { throw new RuntimeException(e); } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index f235e141e99f8..d88b015a9e3d4 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -32,7 +32,6 @@ import org.elasticsearch.logging.Logger; import org.elasticsearch.test.rest.ESRestTestCase; import org.elasticsearch.xcontent.XContentType; -import org.elasticsearch.xpack.esql.action.EsqlCapabilities; import java.io.BufferedReader; import java.io.IOException; From 67471533928fe026e6bb7e5f691b290e955b145b Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Mon, 1 Dec 2025 16:49:36 -0500 Subject: [PATCH 28/36] fill in some other methods on the block builder --- .../compute/data/TDigestBlockBuilder.java | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java index a6f7eaca72c41..3837d6179dd26 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java @@ -73,17 +73,22 @@ public TDigestBlock.Builder copyFrom(TDigestBlock block, int position) { @Override public Block.Builder appendNull() { - throw new UnsupportedOperationException(); + encodedDigestsBuilder.appendNull(); + minimaBuilder.appendNull(); + maximaBuilder.appendNull(); + sumsBuilder.appendNull(); + valueCountsBuilder.appendNull(); + return this; } @Override public Block.Builder beginPositionEntry() { - throw new UnsupportedOperationException(); + throw new UnsupportedOperationException("TDigest Blocks do not support multi-values"); } @Override public Block.Builder endPositionEntry() { - throw new UnsupportedOperationException(); + throw new UnsupportedOperationException("TDigest Blocks do not support multi-values"); } @Override @@ -122,27 +127,27 @@ public TDigestBlock build() { @Override public BlockLoader.DoubleBuilder minima() { - throw new UnsupportedOperationException(); + return minimaBuilder; } @Override public BlockLoader.DoubleBuilder maxima() { - throw new UnsupportedOperationException(); + return maximaBuilder; } @Override public BlockLoader.DoubleBuilder sums() { - throw new UnsupportedOperationException(); + return sumsBuilder; } @Override public BlockLoader.LongBuilder valueCounts() { - throw new UnsupportedOperationException(); + return valueCountsBuilder; } @Override public BlockLoader.BytesRefBuilder encodedDigests() { - throw new UnsupportedOperationException(); + return encodedDigestsBuilder; } @Override From cef90ad3c8db8fed370b7d597e1d28af73849dc0 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Tue, 2 Dec 2025 10:08:25 -0500 Subject: [PATCH 29/36] skip TDigest in tests for features it doesn't support --- .../org/elasticsearch/compute/data/BlockBuilderTests.java | 2 +- .../compute/operator/mvdedupe/MultivalueDedupeTests.java | 3 ++- .../compute/operator/topn/ExtractorTests.java | 2 +- .../compute/operator/topn/TopNOperatorTests.java | 8 +++++--- .../org/elasticsearch/compute/test/BlockTestUtils.java | 2 ++ 5 files changed, 11 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockBuilderTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockBuilderTests.java index 45f452d7ca188..d5806991d7fa3 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockBuilderTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockBuilderTests.java @@ -43,7 +43,7 @@ public static List params() { private static boolean supportsVectors(ElementType type) { return switch (type) { - case AGGREGATE_METRIC_DOUBLE, EXPONENTIAL_HISTOGRAM -> false; + case AGGREGATE_METRIC_DOUBLE, EXPONENTIAL_HISTOGRAM, TDIGEST -> false; default -> true; }; } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/mvdedupe/MultivalueDedupeTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/mvdedupe/MultivalueDedupeTests.java index 73efcec997eef..8dddbec11bb7e 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/mvdedupe/MultivalueDedupeTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/mvdedupe/MultivalueDedupeTests.java @@ -65,7 +65,8 @@ public static List supportedTypes() { ElementType.COMPOSITE, ElementType.FLOAT, ElementType.AGGREGATE_METRIC_DOUBLE, - ElementType.EXPONENTIAL_HISTOGRAM + ElementType.EXPONENTIAL_HISTOGRAM, + ElementType.TDIGEST )) { continue; } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/ExtractorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/ExtractorTests.java index 60b11e5a290e8..073bbcd53bdb5 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/ExtractorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/ExtractorTests.java @@ -130,7 +130,7 @@ public static Iterable parameters() { ) } ); } - case EXPONENTIAL_HISTOGRAM -> + case TDIGEST, EXPONENTIAL_HISTOGRAM -> // multi values are not supported cases.add(valueTestCase("single " + e, e, TopNEncoder.DEFAULT_UNSORTABLE, () -> BlockTestUtils.randomValue(e))); case NULL -> { diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/TopNOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/TopNOperatorTests.java index 72020925d0faa..94b94cfdda64d 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/TopNOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/TopNOperatorTests.java @@ -82,6 +82,7 @@ import static org.elasticsearch.compute.data.ElementType.FLOAT; import static org.elasticsearch.compute.data.ElementType.INT; import static org.elasticsearch.compute.data.ElementType.LONG; +import static org.elasticsearch.compute.data.ElementType.TDIGEST; import static org.elasticsearch.compute.operator.topn.TopNEncoder.DEFAULT_SORTABLE; import static org.elasticsearch.compute.operator.topn.TopNEncoder.DEFAULT_UNSORTABLE; import static org.elasticsearch.compute.operator.topn.TopNEncoder.UTF8; @@ -535,7 +536,7 @@ public void testCollectAllValues() { encoders.add(DEFAULT_SORTABLE); for (ElementType e : ElementType.values()) { - if (e == ElementType.UNKNOWN || e == COMPOSITE || e == EXPONENTIAL_HISTOGRAM) { + if (e == ElementType.UNKNOWN || e == COMPOSITE || e == EXPONENTIAL_HISTOGRAM || e == TDIGEST) { continue; } elementTypes.add(e); @@ -606,7 +607,7 @@ public void testCollectAllValues_RandomMultiValues() { for (int type = 0; type < blocksCount; type++) { ElementType e = randomFrom(ElementType.values()); - if (e == ElementType.UNKNOWN || e == COMPOSITE || e == AGGREGATE_METRIC_DOUBLE || e == EXPONENTIAL_HISTOGRAM) { + if (e == ElementType.UNKNOWN || e == COMPOSITE || e == AGGREGATE_METRIC_DOUBLE || e == EXPONENTIAL_HISTOGRAM || e == TDIGEST) { continue; } elementTypes.add(e); @@ -1042,7 +1043,8 @@ public void testRandomMultiValuesTopN() { || t == ElementType.DOC || t == COMPOSITE || t == AGGREGATE_METRIC_DOUBLE - || t == EXPONENTIAL_HISTOGRAM, + || t == EXPONENTIAL_HISTOGRAM + || t == TDIGEST, () -> randomFrom(ElementType.values()) ); elementTypes.add(e); diff --git a/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/BlockTestUtils.java b/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/BlockTestUtils.java index 016455f9c9d56..a1a1688dd4e64 100644 --- a/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/BlockTestUtils.java +++ b/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/BlockTestUtils.java @@ -27,6 +27,7 @@ import org.elasticsearch.compute.data.LongBlock; import org.elasticsearch.compute.data.OrdinalBytesRefBlock; import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.data.TDigestBlock; import org.elasticsearch.compute.data.TDigestHolder; import org.elasticsearch.core.Releasables; import org.elasticsearch.exponentialhistogram.ExponentialHistogram; @@ -327,6 +328,7 @@ public static List> valuesAtPositions(Block block, int from, int to i++, new ExponentialHistogramScratch() ); + case TDIGEST -> ((TDigestBlock) block).getTDigestHolder(i++); default -> throw new IllegalArgumentException("unsupported element type [" + block.elementType() + "]"); }); } From d4be9561a4d495108a603c1bdfdf13b2d88fcd81 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Tue, 2 Dec 2025 11:29:47 -0500 Subject: [PATCH 30/36] ResultBuilder for TDigest --- .../compute/data/BlockFactory.java | 3 + .../compute/data/BlockUtils.java | 1 + .../compute/data/TDigestArrayBlock.java | 61 +++++++++++++++ .../compute/data/TDigestBlock.java | 33 +++++++++ .../compute/data/TDigestBlockBuilder.java | 19 +++++ .../compute/operator/topn/ResultBuilder.java | 1 + .../topn/ResultBuilderForTDigest.java | 74 +++++++++++++++++++ .../compute/operator/topn/ExtractorTests.java | 3 +- 8 files changed, 194 insertions(+), 1 deletion(-) create mode 100644 x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ResultBuilderForTDigest.java diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockFactory.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockFactory.java index a6f37f74bc4f1..42812019fd977 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockFactory.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockFactory.java @@ -504,6 +504,9 @@ public final ExponentialHistogramBlock newConstantExponentialHistogramBlock(Expo return ExponentialHistogramArrayBlock.createConstant(value, positionCount, this); } + public final TDigestBlock newConstantTDigestBlock(TDigestHolder value, int positions) { + return TDigestArrayBlock.createConstant(value, positions, this); + } public BlockLoader.Block newExponentialHistogramBlockFromDocValues( DoubleBlock minima, DoubleBlock maxima, diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java index b3f38bbdb00aa..da7f31913f18c 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java @@ -258,6 +258,7 @@ private static Block constantBlock(BlockFactory blockFactory, ElementType type, case AGGREGATE_METRIC_DOUBLE -> blockFactory.newConstantAggregateMetricDoubleBlock((AggregateMetricDoubleLiteral) val, size); case FLOAT -> blockFactory.newConstantFloatBlockWith((float) val, size); case EXPONENTIAL_HISTOGRAM -> blockFactory.newConstantExponentialHistogramBlock((ExponentialHistogram) val, size); + case TDIGEST -> blockFactory.newConstantTDigestBlock((TDigestHolder) val, size); default -> throw new UnsupportedOperationException("unsupported element type [" + type + "]"); }; } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java index 4dec4b3a6a8c9..633325e4c97ae 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java @@ -12,6 +12,7 @@ import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.core.ReleasableIterator; import org.elasticsearch.core.Releasables; +import org.elasticsearch.exponentialhistogram.ExponentialHistogram; import java.io.IOException; import java.util.List; @@ -298,4 +299,64 @@ public TDigestHolder getTDigestHolder(int offset) { valueCounts.getLong(offset) ); } + + public static TDigestBlock createConstant(TDigestHolder histogram, int positionCount, BlockFactory blockFactory) { + // ExponentialHistogramArrayBlock.EncodedHistogramData data = encode(histogram); + DoubleBlock minBlock = null; + DoubleBlock maxBlock = null; + DoubleBlock sumBlock = null; + LongBlock countBlock = null; + BytesRefBlock encodedDigestsBlock = null; + boolean success = false; + try { + countBlock = blockFactory.newConstantLongBlockWith(histogram.getValueCount(), positionCount); + if (Double.isNaN(histogram.getMin())) { + minBlock = (DoubleBlock) blockFactory.newConstantNullBlock(positionCount); + } else { + minBlock = blockFactory.newConstantDoubleBlockWith(histogram.getMin(), positionCount); + } + if (Double.isNaN(histogram.getMax())) { + maxBlock = (DoubleBlock) blockFactory.newConstantNullBlock(positionCount); + } else { + maxBlock = blockFactory.newConstantDoubleBlockWith(histogram.getMax(), positionCount); + } + if (Double.isNaN(histogram.getSum())) { + sumBlock = (DoubleBlock) blockFactory.newConstantNullBlock(positionCount); + } else { + sumBlock = blockFactory.newConstantDoubleBlockWith(histogram.getSum(), positionCount); + } + encodedDigestsBlock = blockFactory.newConstantBytesRefBlockWith(histogram.getEncodedDigest(), positionCount); + success = true; + return new TDigestArrayBlock(encodedDigestsBlock, minBlock, maxBlock, sumBlock, countBlock); + } finally { + if (success == false) { + Releasables.close(minBlock, maxBlock, sumBlock, countBlock, encodedDigestsBlock); + } + } + } + @Override + public boolean equals(Object o) { + if (o instanceof TDigestBlock block) { + return TDigestBlock.equals(this, block); + } + return false; + } + + boolean equalsAfterTypeCheck(TDigestArrayBlock that) { + return minima.equals(that.minima) + && maxima.equals(that.maxima) + && sums.equals(that.sums) + && valueCounts.equals(that.valueCounts) + && encodedDigests.equals(that.encodedDigests); + } + + @Override + public int hashCode() { + /* + for now we use just the hash of encodedDigests + this ensures proper equality with null blocks and should be unique enough for practical purposes. + This mirrors the behavior in Exponential Histogram + */ + return encodedDigests.hashCode(); + } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlock.java index 1ab231a9ca3ea..cdc11f082f9b9 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlock.java @@ -7,10 +7,26 @@ package org.elasticsearch.compute.data; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.index.mapper.BlockLoader; public sealed interface TDigestBlock extends Block permits ConstantNullBlock, TDigestArrayBlock { + static boolean equals(TDigestBlock blockA, TDigestBlock blockB) { + if (blockA == blockB) { + return true; + } + return switch (blockA) { + case null -> false; + case ConstantNullBlock a -> a.equals(blockB); + case TDigestArrayBlock a -> switch (blockB) { + case null -> false; + case ConstantNullBlock b -> b.equals(a); + case TDigestArrayBlock b -> a.equalsAfterTypeCheck(b); + }; + }; + } + /** * Builder for {@link TDigestBlock} */ @@ -26,4 +42,21 @@ sealed interface Builder extends Block.Builder, BlockLoader.TDigestBuilder permi } TDigestHolder getTDigestHolder(int offset); + + interface SerializedTDigestOutput { + void appendDouble(double value); + + void appendLong(long value); + + void appendBytesRef(BytesRef bytesRef); + } + + interface SerializedTDigestInput { + double readDouble(); + + long readLong(); + + BytesRef readBytesRef(BytesRef scratch); + } + } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java index 3837d6179dd26..af7c467a30fde 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java @@ -7,6 +7,8 @@ package org.elasticsearch.compute.data; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.operator.topn.ResultBuilderForTDigest; import org.elasticsearch.core.Releasables; import org.elasticsearch.index.mapper.BlockLoader; @@ -18,6 +20,8 @@ public final class TDigestBlockBuilder implements TDigestBlock.Builder { private final DoubleBlock.Builder sumsBuilder; private final LongBlock.Builder valueCountsBuilder; + private final BytesRef scratch = new BytesRef(); + public TDigestBlockBuilder(int size, BlockFactory blockFactory) { BytesRefBlock.Builder encodedDigestsBuilder = null; DoubleBlock.Builder minimaBuilder = null; @@ -162,4 +166,19 @@ public void append(TDigestHolder val) { sumsBuilder.appendDouble(val.getSum()); valueCountsBuilder.appendLong(val.getValueCount()); } + + public void deserializeAndAppend(TDigestBlock.SerializedTDigestInput input) { + long valueCount = input.readLong(); + valueCountsBuilder.appendLong(valueCount); + if (valueCount > 0) { + sumsBuilder.appendDouble(input.readDouble()); + minimaBuilder.appendDouble(input.readDouble()); + maximaBuilder.appendDouble(input.readDouble()); + } else { + sumsBuilder.appendNull(); + minimaBuilder.appendNull(); + maximaBuilder.appendNull(); + } + encodedDigestsBuilder.appendBytesRef(input.readBytesRef(scratch)); + } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ResultBuilder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ResultBuilder.java index cff31535f82e0..f518734121a2a 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ResultBuilder.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ResultBuilder.java @@ -56,6 +56,7 @@ static ResultBuilder resultBuilderFor( case DOC -> new ResultBuilderForDoc(blockFactory, (DocVectorEncoder) encoder, positions); case AGGREGATE_METRIC_DOUBLE -> new ResultBuilderForAggregateMetricDouble(blockFactory, positions); case EXPONENTIAL_HISTOGRAM -> new ResultBuilderForExponentialHistogram(blockFactory, positions); + case TDIGEST -> new ResultBuilderForTDigest(blockFactory, positions); default -> { assert false : "Result builder for [" + elementType + "]"; throw new UnsupportedOperationException("Result builder for [" + elementType + "]"); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ResultBuilderForTDigest.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ResultBuilderForTDigest.java new file mode 100644 index 0000000000000..e4e2cc0d6472c --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ResultBuilderForTDigest.java @@ -0,0 +1,74 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.TDigestBlock; +import org.elasticsearch.compute.data.TDigestBlockBuilder; + +public class ResultBuilderForTDigest implements ResultBuilder { + private final TDigestBlockBuilder builder; + private final ResultBuilderForTDigest.ReusableTopNEncoderInput reusableInput = new ReusableTopNEncoderInput(); + + ResultBuilderForTDigest(BlockFactory blockFactory, int positions) { + this.builder = blockFactory.newTDigestBlockBuilder(positions); + } + + @Override + public void decodeKey(BytesRef keys) { + throw new AssertionError("TDigest can't be a key"); + } + + @Override + public void decodeValue(BytesRef values) { + int count = TopNEncoder.DEFAULT_UNSORTABLE.decodeVInt(values); + if (count == 0) { + builder.appendNull(); + return; + } + assert count == 1 : "TDigest does not support multi values"; + reusableInput.inputValues = values; + builder.deserializeAndAppend(reusableInput); + } + + @Override + public Block build() { + return builder.build(); + } + + @Override + public String toString() { + return "ResultBuilderForExponentialHistogram"; + } + + @Override + public void close() { + builder.close(); + } + + private static final class ReusableTopNEncoderInput implements TDigestBlock.SerializedTDigestInput { + BytesRef inputValues; + + @Override + public double readDouble() { + return TopNEncoder.DEFAULT_UNSORTABLE.decodeDouble(inputValues); + } + + @Override + public long readLong() { + return TopNEncoder.DEFAULT_UNSORTABLE.decodeLong(inputValues); + } + + @Override + public BytesRef readBytesRef(BytesRef scratch) { + return TopNEncoder.DEFAULT_UNSORTABLE.decodeBytesRef(inputValues, scratch); + } + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/ExtractorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/ExtractorTests.java index 073bbcd53bdb5..9a4abb824fc86 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/ExtractorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/ExtractorTests.java @@ -211,7 +211,8 @@ public void testNotInKey() { result.decodeValue(values); assertThat(values.length, equalTo(0)); - assertThat(result.build(), equalTo(value)); + Block resultBlock = result.build(); + assertThat(resultBlock, equalTo(value)); } public void testInKey() { From f80aeb0d9a68b62110fd5955990df7d4102f480e Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Tue, 2 Dec 2025 13:12:23 -0500 Subject: [PATCH 31/36] ValueLoader for Tdigest --- .../compute/data/ConstantNullBlock.java | 6 ++ .../compute/data/TDigestArrayBlock.java | 16 ++++- .../compute/data/TDigestBlock.java | 2 + .../compute/operator/topn/ValueExtractor.java | 2 + .../topn/ValueExtractorForTDigest.java | 62 +++++++++++++++++++ 5 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ValueExtractorForTDigest.java diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java index 0f8b19312375b..8eabb7ba171ca 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java @@ -301,6 +301,12 @@ public ExponentialHistogram getExponentialHistogram(int valueIndex, ExponentialH throw new UnsupportedOperationException("null block"); } + @Override + public void serializeTDigest(int valueIndex, SerializedTDigestOutput out, BytesRef scratch) { + assert false : "null block"; + throw new UnsupportedOperationException("null block"); + } + @Override public TDigestHolder getTDigestHolder(int valueIndex) { assert false : "null block"; diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java index 633325e4c97ae..05346d635033e 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java @@ -12,7 +12,6 @@ import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.core.ReleasableIterator; import org.elasticsearch.core.Releasables; -import org.elasticsearch.exponentialhistogram.ExponentialHistogram; import java.io.IOException; import java.util.List; @@ -288,6 +287,21 @@ public long ramBytesUsed() { return bytes; } + @Override + public void serializeTDigest(int valueIndex, SerializedTDigestOutput out, BytesRef scratch) { + // not that this value count is different from getValueCount(position)! + // this value count represents the number of individual samples the histogram was computed for + long valueCount = valueCounts.getLong(valueCounts.getFirstValueIndex(valueIndex)); + out.appendLong(valueCount); + if (valueCount > 0) { + // sum / min / max are only non-null for non-empty histograms + out.appendDouble(sums.getDouble(sums.getFirstValueIndex(valueIndex))); + out.appendDouble(minima.getDouble(minima.getFirstValueIndex(valueIndex))); + out.appendDouble(maxima.getDouble(maxima.getFirstValueIndex(valueIndex))); + } + out.appendBytesRef(encodedDigests.getBytesRef(encodedDigests.getFirstValueIndex(valueIndex), scratch)); + } + @Override public TDigestHolder getTDigestHolder(int offset) { return new TDigestHolder( diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlock.java index cdc11f082f9b9..8bbc59cad1f0e 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlock.java @@ -27,6 +27,8 @@ static boolean equals(TDigestBlock blockA, TDigestBlock blockB) { }; } + void serializeTDigest(int valueIndex, SerializedTDigestOutput out, BytesRef scratch); + /** * Builder for {@link TDigestBlock} */ diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ValueExtractor.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ValueExtractor.java index d5120cddcb761..48b8346ece62c 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ValueExtractor.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ValueExtractor.java @@ -18,6 +18,7 @@ import org.elasticsearch.compute.data.FloatBlock; import org.elasticsearch.compute.data.IntBlock; import org.elasticsearch.compute.data.LongBlock; +import org.elasticsearch.compute.data.TDigestBlock; import org.elasticsearch.compute.operator.BreakingBytesRefBuilder; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.RefCounted; @@ -55,6 +56,7 @@ static ValueExtractor extractorFor(ElementType elementType, TopNEncoder encoder, case DOC -> new ValueExtractorForDoc(encoder, ((DocBlock) block).asVector()); case AGGREGATE_METRIC_DOUBLE -> new ValueExtractorForAggregateMetricDouble(encoder, (AggregateMetricDoubleBlock) block); case EXPONENTIAL_HISTOGRAM -> new ValueExtractorForExponentialHistogram(encoder, (ExponentialHistogramBlock) block); + case TDIGEST -> new ValueExtractorForTDigest(encoder, (TDigestBlock) block); default -> { assert false : "No value extractor for [" + block.elementType() + "]"; throw new UnsupportedOperationException("No value extractor for [" + block.elementType() + "]"); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ValueExtractorForTDigest.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ValueExtractorForTDigest.java new file mode 100644 index 0000000000000..876c34f2ce1fc --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ValueExtractorForTDigest.java @@ -0,0 +1,62 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.topn; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.TDigestBlock; +import org.elasticsearch.compute.operator.BreakingBytesRefBuilder; + +public class ValueExtractorForTDigest implements ValueExtractor { + private final TDigestBlock block; + + private final BytesRef scratch = new BytesRef(); + private final ReusableTopNEncoderOutput reusableOutput = new ReusableTopNEncoderOutput(); + + ValueExtractorForTDigest(TopNEncoder encoder, TDigestBlock block) { + assert encoder == TopNEncoder.DEFAULT_UNSORTABLE; + this.block = block; + } + + @Override + public void writeValue(BreakingBytesRefBuilder values, int position) { + // number of multi-values first for compatibility with ValueExtractorForNull + if (block.isNull(position)) { + TopNEncoder.DEFAULT_UNSORTABLE.encodeVInt(0, values); + } else { + assert block.getValueCount(position) == 1 : "Multi-valued ExponentialHistogram blocks are not supported in TopN"; + TopNEncoder.DEFAULT_UNSORTABLE.encodeVInt(1, values); + int valueIndex = block.getFirstValueIndex(position); + reusableOutput.target = values; + block.serializeTDigest(valueIndex, reusableOutput, scratch); + } + } + + @Override + public String toString() { + return "ValueExtractorForExponentialHistogram"; + } + + private static final class ReusableTopNEncoderOutput implements TDigestBlock.SerializedTDigestOutput { + BreakingBytesRefBuilder target; + + @Override + public void appendDouble(double value) { + TopNEncoder.DEFAULT_UNSORTABLE.encodeDouble(value, target); + } + + @Override + public void appendLong(long value) { + TopNEncoder.DEFAULT_UNSORTABLE.encodeLong(value, target); + } + + @Override + public void appendBytesRef(BytesRef value) { + TopNEncoder.DEFAULT_UNSORTABLE.encodeBytesRef(value, target); + } + } +} From 38a8b3a770e2813ff7f3f3272b88ec5573d1b227 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Tue, 2 Dec 2025 14:29:33 -0500 Subject: [PATCH 32/36] implement estimate bytes --- .../org/elasticsearch/compute/data/TDigestBlockBuilder.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java index af7c467a30fde..a01c9af1c2804 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java @@ -103,7 +103,8 @@ public Block.Builder mvOrdering(Block.MvOrdering mvOrdering) { @Override public long estimatedBytes() { - return 0; + return encodedDigestsBuilder.estimatedBytes() + minimaBuilder.estimatedBytes() + maximaBuilder.estimatedBytes() + sumsBuilder + .estimatedBytes() + valueCountsBuilder.estimatedBytes(); } @Override From 19803192fb9c915091558d7533df09be2e9894e6 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Tue, 2 Dec 2025 15:43:51 -0500 Subject: [PATCH 33/36] skip RightChunkedLeftJoinTests for now --- .../compute/data/TDigestHolder.java | 18 ++++++++++++++++++ .../lookup/RightChunkedLeftJoinTests.java | 4 ++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestHolder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestHolder.java index 63ac22aaa5c66..0ce6b6fcffa6a 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestHolder.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestHolder.java @@ -12,6 +12,7 @@ import java.io.IOException; import java.util.List; +import java.util.Objects; /** * This exists to hold the values from a {@link TDigestBlock}. It is roughly parallel to @@ -41,6 +42,23 @@ public TDigestHolder(List centroids, List counts, double min, doub this(encodeCentroidsAndCounts(centroids, counts), min, max, sum, valueCount); } + @Override + public boolean equals(Object o) { + if ((o instanceof TDigestHolder that)) { + return Double.compare(min, that.min) == 0 + && Double.compare(max, that.max) == 0 + && Double.compare(sum, that.sum) == 0 + && valueCount == that.valueCount + && Objects.equals(encodedDigest, that.encodedDigest); + } + return false; + } + + @Override + public int hashCode() { + return Objects.hash(min, max, sum, valueCount, encodedDigest); + } + private static BytesRef encodeCentroidsAndCounts(List centroids, List counts) throws IOException { // TODO: This is copied from the method of the same name in TDigestFieldMapper. It would be nice to find a way to reuse that code BytesStreamOutput streamOutput = new BytesStreamOutput(); diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/lookup/RightChunkedLeftJoinTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/lookup/RightChunkedLeftJoinTests.java index 6ad3cab9f1881..1a1c7748cde3e 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/lookup/RightChunkedLeftJoinTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/lookup/RightChunkedLeftJoinTests.java @@ -230,13 +230,13 @@ private void testRandom(BlockFactory factory) { 1, 10, ElementType[]::new, - () -> RandomBlock.randomElementExcluding(List.of(ElementType.AGGREGATE_METRIC_DOUBLE)) + () -> RandomBlock.randomElementExcluding(List.of(ElementType.AGGREGATE_METRIC_DOUBLE, ElementType.TDIGEST)) ); ElementType[] rightColumns = randomArray( 1, 10, ElementType[]::new, - () -> RandomBlock.randomElementExcluding(List.of(ElementType.AGGREGATE_METRIC_DOUBLE)) + () -> RandomBlock.randomElementExcluding(List.of(ElementType.AGGREGATE_METRIC_DOUBLE, ElementType.TDIGEST)) ); RandomPage left = randomPage(factory, leftColumns, leftSize); From bb26fe6f401bc1c331930da0bffb918424e79bed Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Tue, 2 Dec 2025 15:51:41 -0500 Subject: [PATCH 34/36] skip multivalued tests for Tdigest --- .../org/elasticsearch/compute/data/BlockMultiValuedTests.java | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockMultiValuedTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockMultiValuedTests.java index a01ce9664110f..a62c334916189 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockMultiValuedTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockMultiValuedTests.java @@ -50,6 +50,7 @@ public static List params() { || e == ElementType.DOC || e == ElementType.COMPOSITE || e == ElementType.EXPONENTIAL_HISTOGRAM // TODO(b/133393): Enable tests once the block supports lookup + || e == ElementType.TDIGEST || e == ElementType.AGGREGATE_METRIC_DOUBLE) { continue; } From 578e01e1fd64e534ed6bd1d7194d4f56676d7fb7 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Tue, 2 Dec 2025 16:04:44 -0500 Subject: [PATCH 35/36] delete unused code --- .../java/org/elasticsearch/compute/data/TDigestArrayBlock.java | 1 - 1 file changed, 1 deletion(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java index 05346d635033e..e19f698015b80 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java @@ -315,7 +315,6 @@ public TDigestHolder getTDigestHolder(int offset) { } public static TDigestBlock createConstant(TDigestHolder histogram, int positionCount, BlockFactory blockFactory) { - // ExponentialHistogramArrayBlock.EncodedHistogramData data = encode(histogram); DoubleBlock minBlock = null; DoubleBlock maxBlock = null; DoubleBlock sumBlock = null; From fc8d0b165270f1729538e3955bbaba21c608df8e Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 2 Dec 2025 21:12:59 +0000 Subject: [PATCH 36/36] [CI] Auto commit changes from spotless --- .../main/java/org/elasticsearch/compute/data/BlockFactory.java | 1 + .../java/org/elasticsearch/compute/data/TDigestArrayBlock.java | 1 + .../org/elasticsearch/compute/data/TDigestBlockBuilder.java | 1 - .../org/elasticsearch/compute/operator/topn/ResultBuilder.java | 2 +- .../org/elasticsearch/compute/operator/topn/ValueExtractor.java | 2 +- .../elasticsearch/compute/operator/topn/TopNOperatorTests.java | 2 +- 6 files changed, 5 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockFactory.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockFactory.java index 42812019fd977..b3144d6060219 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockFactory.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockFactory.java @@ -507,6 +507,7 @@ public final ExponentialHistogramBlock newConstantExponentialHistogramBlock(Expo public final TDigestBlock newConstantTDigestBlock(TDigestHolder value, int positions) { return TDigestArrayBlock.createConstant(value, positions, this); } + public BlockLoader.Block newExponentialHistogramBlockFromDocValues( DoubleBlock minima, DoubleBlock maxima, diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java index e19f698015b80..b276bf0c2f895 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java @@ -347,6 +347,7 @@ public static TDigestBlock createConstant(TDigestHolder histogram, int positionC } } } + @Override public boolean equals(Object o) { if (o instanceof TDigestBlock block) { diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java index a01c9af1c2804..b097e9f070b5a 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java @@ -8,7 +8,6 @@ package org.elasticsearch.compute.data; import org.apache.lucene.util.BytesRef; -import org.elasticsearch.compute.operator.topn.ResultBuilderForTDigest; import org.elasticsearch.core.Releasables; import org.elasticsearch.index.mapper.BlockLoader; diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ResultBuilder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ResultBuilder.java index f518734121a2a..b51578baff5d9 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ResultBuilder.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ResultBuilder.java @@ -56,7 +56,7 @@ static ResultBuilder resultBuilderFor( case DOC -> new ResultBuilderForDoc(blockFactory, (DocVectorEncoder) encoder, positions); case AGGREGATE_METRIC_DOUBLE -> new ResultBuilderForAggregateMetricDouble(blockFactory, positions); case EXPONENTIAL_HISTOGRAM -> new ResultBuilderForExponentialHistogram(blockFactory, positions); - case TDIGEST -> new ResultBuilderForTDigest(blockFactory, positions); + case TDIGEST -> new ResultBuilderForTDigest(blockFactory, positions); default -> { assert false : "Result builder for [" + elementType + "]"; throw new UnsupportedOperationException("Result builder for [" + elementType + "]"); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ValueExtractor.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ValueExtractor.java index 48b8346ece62c..708c218b2aaac 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ValueExtractor.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/ValueExtractor.java @@ -56,7 +56,7 @@ static ValueExtractor extractorFor(ElementType elementType, TopNEncoder encoder, case DOC -> new ValueExtractorForDoc(encoder, ((DocBlock) block).asVector()); case AGGREGATE_METRIC_DOUBLE -> new ValueExtractorForAggregateMetricDouble(encoder, (AggregateMetricDoubleBlock) block); case EXPONENTIAL_HISTOGRAM -> new ValueExtractorForExponentialHistogram(encoder, (ExponentialHistogramBlock) block); - case TDIGEST -> new ValueExtractorForTDigest(encoder, (TDigestBlock) block); + case TDIGEST -> new ValueExtractorForTDigest(encoder, (TDigestBlock) block); default -> { assert false : "No value extractor for [" + block.elementType() + "]"; throw new UnsupportedOperationException("No value extractor for [" + block.elementType() + "]"); diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/TopNOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/TopNOperatorTests.java index 94b94cfdda64d..28c44113f15a2 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/TopNOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/topn/TopNOperatorTests.java @@ -607,7 +607,7 @@ public void testCollectAllValues_RandomMultiValues() { for (int type = 0; type < blocksCount; type++) { ElementType e = randomFrom(ElementType.values()); - if (e == ElementType.UNKNOWN || e == COMPOSITE || e == AGGREGATE_METRIC_DOUBLE || e == EXPONENTIAL_HISTOGRAM || e == TDIGEST) { + if (e == ElementType.UNKNOWN || e == COMPOSITE || e == AGGREGATE_METRIC_DOUBLE || e == EXPONENTIAL_HISTOGRAM || e == TDIGEST) { continue; } elementTypes.add(e);