-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Closed
Description
Inaccurate aggregation results when using doubleSum aggregator when doing a topN query
Affected Version
31.0.0 and 31.0.1
Description
-
when using rollup metric aggregations on the data(longSum/doubleSum) during ingestion, topN queries with granularities smaller than 15 mins provide inflated results when using
doubleSumaggregator alone. -
when the metric aggregators have a
longSumorfloatSumalong with thedoubleSum, it provides correct results. -
Cluster size
- mid-size cluster with 3 master services, 6 historicals, 2 brokers
-
Steps to reproduce the problem
- input data
2025-02-19T00:00:00.000Z,a, east_sc, 1
2025-02-19T00:00:00.000Z,b, east_sc, 1
2025-02-19T00:00:00.000Z,c, east_sc, 1
2025-02-19T00:01:00.000Z,d, east_sc, 2
2025-02-19T00:01:00.000Z,e, east_sc, 2
2025-02-19T00:01:00.000Z,f, east_sc, 2
2025-02-19T00:02:00.000Z,g, east_sc, 3
2025-02-19T00:02:00.000Z,h, east_sc, 3
2025-02-19T00:02:00.000Z,i, east_sc, 3- ingestion payload for the data
{
"type": "index_parallel",
"id": "index_parallel_topN-bug-check-4_djegegba_2025-02-20T15:54:55.042Z",
"groupId": "index_parallel_topN-bug-check-4_djegegba_2025-02-20T15:54:55.042Z",
"resource": {
"availabilityGroup": "index_parallel_topN-bug-check-4_djegegba_2025-02-20T15:54:55.042Z",
"requiredCapacity": 1
},
"spec": {
"dataSchema": {
"dataSource": "topN-bug-check-4",
"timestampSpec": {
"column": "time",
"format": "iso",
"missingValue": null
},
"dimensionsSpec": {
"dimensions": [],
"dimensionExclusions": [
"val",
"__time",
"count",
"time",
"sum_val"
],
"includeAllDimensions": false,
"useSchemaDiscovery": false
},
"metricsSpec": [
{
"type": "count",
"name": "count"
},
{
"type": "longSum",
"name": "sum_val",
"fieldName": "val"
}
],
"granularitySpec": {
"type": "uniform",
"segmentGranularity": "HOUR",
"queryGranularity": "MINUTE",
"rollup": true,
"intervals": []
},
"transformSpec": {
"filter": null,
"transforms": []
}
},
"ioConfig": {
"type": "index_parallel",
"inputSource": {
"type": "inline",
"data": "2025-02-19T00:00:00.000Z,a, east_sc, 1\n2025-02-19T00:00:00.000Z,b, east_sc, 1\n2025-02-19T00:00:00.000Z,c, east_sc, 1\n2025-02-19T00:01:00.000Z,d, east_sc, 2\n2025-02-19T00:01:00.000Z,e, east_sc, 2\n2025-02-19T00:01:00.000Z,f, east_sc, 2\n2025-02-19T00:02:00.000Z,g, east_sc, 3\n2025-02-19T00:02:00.000Z,h, east_sc, 3\n2025-02-19T00:02:00.000Z,i, east_sc, 3\n"
},
"inputFormat": {
"type": "csv",
"columns": [
"time",
"tmp",
"dc",
"val"
]
},
"appendToExisting": false,
"dropExisting": false
},
"tuningConfig": {
"type": "index_parallel",
"maxRowsPerSegment": 5000000,
"appendableIndexSpec": {
"type": "onheap",
"preserveExistingMetrics": false
},
"maxRowsInMemory": 1000000,
"maxBytesInMemory": 0,
"skipBytesInMemoryOverheadCheck": false,
"maxTotalRows": null,
"numShards": null,
"splitHintSpec": null,
"partitionsSpec": {
"type": "hashed",
"numShards": null,
"partitionDimensions": [],
"partitionFunction": "murmur3_32_abs",
"maxRowsPerSegment": 5000000
},
"indexSpec": {
"bitmap": {
"type": "roaring"
},
"dimensionCompression": "lz4",
"stringDictionaryEncoding": {
"type": "utf8"
},
"metricCompression": "lz4",
"longEncoding": "longs"
},
"indexSpecForIntermediatePersists": {
"bitmap": {
"type": "roaring"
},
"dimensionCompression": "lz4",
"stringDictionaryEncoding": {
"type": "utf8"
},
"metricCompression": "lz4",
"longEncoding": "longs"
},
"maxPendingPersists": 0,
"forceGuaranteedRollup": true,
"reportParseExceptions": false,
"pushTimeout": 0,
"segmentWriteOutMediumFactory": null,
"maxNumConcurrentSubTasks": 1,
"maxRetry": 3,
"taskStatusCheckPeriodMs": 1000,
"chatHandlerTimeout": "PT10S",
"chatHandlerNumRetries": 5,
"maxNumSegmentsToMerge": 100,
"totalNumMergeTasks": 10,
"logParseExceptions": false,
"maxParseExceptions": 2147483647,
"maxSavedParseExceptions": 0,
"maxColumnsToMerge": -1,
"awaitSegmentAvailabilityTimeoutMillis": 0,
"maxAllowedLockCount": -1,
"numPersistThreads": 1,
"partitionDimensions": []
}
},
"context": {
"forceTimeChunkLock": true,
"useLineageBasedSegmentAllocation": true
},
"dataSource": "topN-bug-check-4"
}- query payload
{
"dimension": {
"type": "default",
"dimension": "dc",
"outputName": "Data center"
},
"metric": "val",
"threshold": 5,
"dataSource": "topN-bug-check-4",
"granularity": "minute",
"intervals": [
"2025-02-19T00:00Z/2025-02-19T00:03Z"
],
"aggregations": [
{
"fieldName": "sum_val",
"type": "doubleSum",
"name": "val"
}
],
"postAggregations": [],
"context": {
"queryId": "tbc",
"vectorize": true
},
"queryType": "topN"
}Reactions are currently unavailable

