Skip to content

Commit

Permalink
Support downsampling of histogram as labels (#93445)
Browse files Browse the repository at this point in the history
The first GA version of downsampling supports numeric metrics and
does not include histograms. Support for downsampling of histogram
fields as metrics will come in future. In the meanwhile we need to make
sure that histograms are correctly handled as labels considering that we
can't use histogram fields as dimensions nor as metrics. This means that
we treat histograms as any other label, which is, we propagate the latest
(most recent timestamp-wise) value. Note that we need this even after
supporting histograms as metrics, in case the time_series_metric is
not provided for a histogram field.

Histogram fields do not have FormattedDocValues. Here we override the
getFormattedValues for the histogram field data in such a way to
be able to extract the doc values and process them.
  • Loading branch information
salvatore-campagna committed Feb 3, 2023
1 parent 925a6dc commit 6f7eeb0
Show file tree
Hide file tree
Showing 5 changed files with 230 additions and 1 deletion.
6 changes: 6 additions & 0 deletions docs/changelog/93445.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 93445
summary: Support downsampling of histogram as labels
area: Rollup
type: bug
issues:
- 93263
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.index.fielddata.FieldDataContext;
import org.elasticsearch.index.fielddata.FormattedDocValues;
import org.elasticsearch.index.fielddata.HistogramValue;
import org.elasticsearch.index.fielddata.HistogramValues;
import org.elasticsearch.index.fielddata.IndexFieldData;
Expand Down Expand Up @@ -49,6 +50,7 @@
import org.elasticsearch.xpack.analytics.aggregations.support.AnalyticsValuesSourceType;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.Map;
import java.util.stream.Stream;
Expand Down Expand Up @@ -196,6 +198,33 @@ public DocValuesScriptFieldFactory getScriptFieldFactory(String name) {
throw new UnsupportedOperationException("The [" + CONTENT_TYPE + "] field does not " + "support scripts");
}

@Override
public FormattedDocValues getFormattedValues(DocValueFormat format) {
try {
final BinaryDocValues values = DocValues.getBinary(context.reader(), fieldName);
final InternalHistogramValue value = new InternalHistogramValue();
return new FormattedDocValues() {
@Override
public boolean advanceExact(int docId) throws IOException {
return values.advanceExact(docId);
}

@Override
public int docValueCount() {
return 1;
}

@Override
public Object nextValue() throws IOException {
value.reset(values.binaryValue());
return value;
}
};
} catch (IOException e) {
throw new UncheckedIOException("Unable to loead histogram doc values", e);
}
}

@Override
public SortedBinaryDocValues getBytesValues() {
throw new UnsupportedOperationException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,73 @@ setup:
body:
index.blocks.write: true

- do:
indices.create:
index: test-histogram
body:
settings:
number_of_shards: 1
number_of_replicas: 0
index:
mode: time_series
routing_path: [ metricset, k8s.pod.uid ]
time_series:
start_time: 2021-04-28T00:00:00Z
end_time: 2021-04-29T00:00:00Z
mappings:
properties:
"@timestamp":
type: date
metricset:
type: keyword
time_series_dimension: true
k8s:
properties:
pod:
properties:
uid:
type: keyword
time_series_dimension: true
name:
type: keyword
latency:
type: histogram
empty-histogram:
type: histogram
network:
properties:
tx:
type: long
time_series_metric: gauge
rx:
type: long
time_series_metric: gauge
- do:
bulk:
refresh: true
index: test-histogram
body:
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:50:04.467Z", "metricset": "pod", "k8s": {"pod": {"name": "cat", "uid":"947e4ced-1786-4e53-9e0c-5c447e959507", "network": {"tx": 2001818691, "rx": 802133794}, "latency": {"counts": [10, 12, 20, 5], "values": [1.0, 10.0, 100.0, 1000.0]}}}}'
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:55:04.467Z", "metricset": "pod", "k8s": {"pod": {"name": "cat", "uid":"947e4ced-1786-4e53-9e0c-5c447e959507", "network": {"tx": 2001819988, "rx": 802133911}, "latency": {"counts": [8, 7, 10, 12], "values": [1.0, 2.0, 5.0, 10.0]}}}}'
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T19:00:04.467Z", "metricset": "pod", "k8s": {"pod": {"name": "cat", "uid":"947e4ced-1786-4e53-9e0c-5c447e959507", "network": {"tx": 1001818691, "rx": 502133794}, "latency": {"counts": [1, 5, 5, 22], "values": [1.0, 10.0, 100.0, 1000.0]}}}}'
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T19:05:04.467Z", "metricset": "pod", "k8s": {"pod": {"name": "cat", "uid":"947e4ced-1786-4e53-9e0c-5c447e959507", "network": {"tx": 1001822087, "rx": 502134222}, "latency": {"counts": [7, 15, 10, 10], "values": [1.0, 2.0, 5.0, 10.0]}}}}'
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:51:20.467Z", "metricset": "pod", "k8s": {"pod": {"name": "dog", "uid":"df3145b3-0563-4d3b-a0f7-897eb2876ea9", "network": {"tx": 1781818691, "rx": 533135238}, "latency": {"counts": [2, 4, 16, 4], "values": [1.0, 2.0, 5.0, 10.0]}}}}'
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:56:20.467Z", "metricset": "pod", "k8s": {"pod": {"name": "dog", "uid":"df3145b3-0563-4d3b-a0f7-897eb2876ea9", "network": {"tx": 1781818691, "rx": 533135567}, "latency": {"counts": [2, 2, 8, 8], "values": [1.0, 10.0, 100.0, 1000.0]}}}}'
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T19:01:04.467Z", "metricset": "pod", "k8s": {"pod": {"name": "dog", "uid":"df3145b3-0563-4d3b-a0f7-897eb2876ea9", "network": {"tx": 1651818691, "rx": 487133866}, "latency": {"counts": [4, 5, 4, 13], "values": [1.0, 10.0, 100.0, 1000.0]}}}}'

- do:
indices.put_settings:
index: test-histogram
body:
index.blocks.write: true

---
"Downsample index":
- skip:
Expand Down Expand Up @@ -480,3 +547,104 @@ setup:
{
"fixed_interval": "90m"
}
---
"Downsample histogram as label":
- skip:
version: " - 8.4.99"
reason: "rollup renamed to downsample in 8.5.0"

- do:
indices.downsample:
index: test-histogram
target_index: rollup-test-histogram
body: >
{
"fixed_interval": "1h"
}
- is_true: acknowledged

- do:
indices.get_mapping:
index: rollup-test-histogram

- match: { rollup-test-histogram.mappings.properties.@timestamp.type: date }
- match: { rollup-test-histogram.mappings.properties.@timestamp.meta.fixed_interval: 1h }
- match: { rollup-test-histogram.mappings.properties.@timestamp.meta.time_zone: UTC }
- match: { rollup-test-histogram.mappings.properties.k8s.properties.pod.properties.latency.type: histogram }
- match: { rollup-test-histogram.mappings.properties.k8s.properties.pod.properties.latency.time_series_metric: null }
- match: { rollup-test-histogram.mappings.properties.k8s.properties.pod.properties.empty-histogram.type: histogram }
- match: { rollup-test-histogram.mappings.properties.k8s.properties.pod.properties.empty-histogram.time_series_metric: null }
- match: { rollup-test-histogram.mappings.properties.k8s.properties.pod.properties.uid.type: keyword }
- match: { rollup-test-histogram.mappings.properties.k8s.properties.pod.properties.uid.time_series_dimension: true }
- match: { rollup-test-histogram.mappings.properties.k8s.properties.pod.properties.network.properties.tx.type: aggregate_metric_double }
- match: { rollup-test-histogram.mappings.properties.k8s.properties.pod.properties.network.properties.tx.metrics: [ "min", "max", "sum", "value_count" ] }
- match: { rollup-test-histogram.mappings.properties.k8s.properties.pod.properties.network.properties.tx.default_metric: max }
- match: { rollup-test-histogram.mappings.properties.k8s.properties.pod.properties.network.properties.tx.time_series_metric: gauge }

- do:
search:
index: rollup-test-histogram
body:
sort: [ "_tsid", "@timestamp" ]

- length: { hits.hits: 4 }
- match: { hits.hits.0._source._doc_count: 2 }
- match: { hits.hits.0._source.k8s.pod.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 }
- match: { hits.hits.0._source.metricset: pod }
- match: { hits.hits.0._source.@timestamp: 2021-04-28T18:00:00.000Z }
- length: { hits.hits.0._source.k8s.pod.latency.counts: 4 }
- match: { hits.hits.0._source.k8s.pod.latency.counts.0: 8 }
- match: { hits.hits.0._source.k8s.pod.latency.counts.1: 7 }
- match: { hits.hits.0._source.k8s.pod.latency.counts.2: 10 }
- match: { hits.hits.0._source.k8s.pod.latency.counts.3: 12 }
- length: { hits.hits.0._source.k8s.pod.latency.values: 4 }
- match: { hits.hits.0._source.k8s.pod.latency.values.0: 1.0 }
- match: { hits.hits.0._source.k8s.pod.latency.values.1: 2.0 }
- match: { hits.hits.0._source.k8s.pod.latency.values.2: 5.0 }
- match: { hits.hits.0._source.k8s.pod.latency.values.3: 10.0 }

- match: { hits.hits.1._source._doc_count: 2 }
- match: { hits.hits.1._source.k8s.pod.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 }
- match: { hits.hits.1._source.metricset: pod }
- match: { hits.hits.1._source.@timestamp: 2021-04-28T19:00:00.000Z }
- length: { hits.hits.1._source.k8s.pod.latency.counts: 4 }
- match: { hits.hits.1._source.k8s.pod.latency.counts.0: 7 }
- match: { hits.hits.1._source.k8s.pod.latency.counts.1: 15 }
- match: { hits.hits.1._source.k8s.pod.latency.counts.2: 10 }
- match: { hits.hits.1._source.k8s.pod.latency.counts.3: 10 }
- length: { hits.hits.1._source.k8s.pod.latency.values: 4 }
- match: { hits.hits.1._source.k8s.pod.latency.values.0: 1.0 }
- match: { hits.hits.1._source.k8s.pod.latency.values.1: 2.0 }
- match: { hits.hits.1._source.k8s.pod.latency.values.2: 5.0 }
- match: { hits.hits.1._source.k8s.pod.latency.values.3: 10.0 }

- match: { hits.hits.2._source._doc_count: 2 }
- match: { hits.hits.2._source.k8s.pod.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 }
- match: { hits.hits.2._source.metricset: pod }
- match: { hits.hits.2._source.@timestamp: 2021-04-28T18:00:00.000Z }
- length: { hits.hits.2._source.k8s.pod.latency.counts: 4 }
- match: { hits.hits.2._source.k8s.pod.latency.counts.0: 2 }
- match: { hits.hits.2._source.k8s.pod.latency.counts.1: 2 }
- match: { hits.hits.2._source.k8s.pod.latency.counts.2: 8 }
- match: { hits.hits.2._source.k8s.pod.latency.counts.3: 8 }
- length: { hits.hits.2._source.k8s.pod.latency.values: 4 }
- match: { hits.hits.2._source.k8s.pod.latency.values.0: 1.0 }
- match: { hits.hits.2._source.k8s.pod.latency.values.1: 10.0 }
- match: { hits.hits.2._source.k8s.pod.latency.values.2: 100.0 }
- match: { hits.hits.2._source.k8s.pod.latency.values.3: 1000.0 }

- match: { hits.hits.3._source._doc_count: 1 }
- match: { hits.hits.3._source.k8s.pod.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 }
- match: { hits.hits.3._source.metricset: pod }
- match: { hits.hits.3._source.@timestamp: 2021-04-28T19:00:00.000Z }
- length: { hits.hits.3._source.k8s.pod.latency.counts: 4 }
- match: { hits.hits.3._source.k8s.pod.latency.counts.0: 4 }
- match: { hits.hits.3._source.k8s.pod.latency.counts.1: 5 }
- match: { hits.hits.3._source.k8s.pod.latency.counts.2: 4 }
- match: { hits.hits.3._source.k8s.pod.latency.counts.3: 13 }
- length: { hits.hits.3._source.k8s.pod.latency.values: 4 }
- match: { hits.hits.3._source.k8s.pod.latency.values.0: 1.0 }
- match: { hits.hits.3._source.k8s.pod.latency.values.1: 10.0 }
- match: { hits.hits.3._source.k8s.pod.latency.values.2: 100.0 }
- match: { hits.hits.3._source.k8s.pod.latency.values.3: 1000.0 }
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ private AbstractRollupFieldProducer createRollupFieldProducer() {
};
} else {
// If field is not a metric, we downsample it as a label
if ("histogram".equals(fieldType.typeName())) {
return new LabelFieldProducer.HistogramLastLabelFieldProducer(name());
}
return new LabelFieldProducer.LabelLastValueFieldProducer(name());
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@
package org.elasticsearch.xpack.downsample;

import org.elasticsearch.index.fielddata.FormattedDocValues;
import org.elasticsearch.index.fielddata.HistogramValue;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xpack.aggregatemetric.mapper.AggregateDoubleMetricFieldMapper.Metric;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
* Class that produces values for a label field.
Expand Down Expand Up @@ -85,7 +88,7 @@ void collect(Object value) {
* {@link LabelFieldProducer} implementation for a last value label
*/
static class LabelLastValueFieldProducer extends LabelFieldProducer {
private final LastValueLabel label;
protected final LastValueLabel label;

LabelLastValueFieldProducer(String name, LastValueLabel label) {
super(name);
Expand Down Expand Up @@ -144,4 +147,24 @@ static class AggregateMetricFieldProducer extends LabelLastValueFieldProducer {
super(name, new LastValueLabel(metric.name()));
}
}

public static class HistogramLastLabelFieldProducer extends LabelLastValueFieldProducer {
HistogramLastLabelFieldProducer(String name) {
super(name);
}

@Override
public void write(XContentBuilder builder) throws IOException {
if (isEmpty() == false) {
final HistogramValue histogramValue = (HistogramValue) label.get();
final List<Double> values = new ArrayList<>();
final List<Integer> counts = new ArrayList<>();
while (histogramValue.next()) {
values.add(histogramValue.value());
counts.add(histogramValue.count());
}
builder.startObject(name()).field("counts", counts).field("values", values).endObject();
}
}
}
}

0 comments on commit 6f7eeb0

Please sign in to comment.