From dfbd2c6417019fd7ed40e46c35c9a8107c5df8f7 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Mon, 6 May 2024 17:50:14 +1000 Subject: [PATCH 01/43] Add some test cases --- .../testdata/ours/binary_operators.test | 120 ++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 pkg/streamingpromql/testdata/ours/binary_operators.test diff --git a/pkg/streamingpromql/testdata/ours/binary_operators.test b/pkg/streamingpromql/testdata/ours/binary_operators.test new file mode 100644 index 00000000000..fe9431f1af2 --- /dev/null +++ b/pkg/streamingpromql/testdata/ours/binary_operators.test @@ -0,0 +1,120 @@ +# SPDX-License-Identifier: AGPL-3.0-only + +# Most cases for aggregation operators are covered already in the upstream test cases. +# These test cases cover scenarios not covered by the upstream test cases, such as range queries, or edge cases that are uniquely likely to cause issues in the streaming engine. + +# Throughout this file, we use a 6m step to avoid the default 5m lookback window. + +# Basic arithmetic operations +load 6m + left_side 11 21 32 42 + right_side 1 2 3 4 + +eval range from 0 to 24m step 6m left_side + right_side + {} 12 23 35 46 + +eval range from 0 to 24m step 6m left_side - right_side + {} 10 19 29 38 + +eval range from 0 to 24m step 6m left_side * right_side + {} 11 42 96 168 + +eval range from 0 to 24m step 6m left_side / right_side + {} 11 10.5 10.66666 10.5 + +eval range from 0 to 24m step 6m left_side % right_side + {} 0 1 2 2 + +eval range from 0 to 24m step 6m left_side ^ right_side + {} 11 441 32768 3111696 + +clear + +# One-to-one matching with all labels +load 6m + left_side{env="prod", pod="pod-abc123"} 1 2 3 4 + left_side{env="dev", pod="pod-abc123"} 10 20 30 40 + left_side{env="dev", pod="pod-xyz456"} 9 9 9 9 + right_side{env="prod", pod="pod-abc123"} 100 200 300 400 + right_side{env="dev", pod="pod-abc123"} 1000 2000 3000 4000 + right_side{env="dev", pod="pod-mno789"} 5 5 5 5 + +# Matches on both sides: returns results for matching series, ignores non-matching series +eval range from 0 to 24m step 6m left_side + right_side + {env="prod", pod="pod-abc123"} 101 202 303 404 + {env="dev", pod="pod-abc123"} 1010 2020 3030 4040 + +# No series on either side: returns no results +eval range from 0 to 24m step 6m left_side_that_doesnt_exist + right_side_that_doesnt_exist + +# No series on left side: returns no results +eval range from 0 to 24m step 6m left_side_that_doesnt_exist + right_side + +# No series on right side: returns no results +eval range from 0 to 24m step 6m left_side + right_side_that_doesnt_exist + +clear + +# Series match on both sides, but points don't align +load 6m + partial_left_side 1 2 _ _ + partial_right_side _ _ 3 4 + +eval range from 0 to 24m step 6m partial_left_side + partial_right_side + +clear + +# One-to-one matching with "on" and "ignoring" +load 6m + left_side{env="test", pod="a"} 1 2 3 + left_side{env="prod", pod="b"} 4 5 6 + right_side{env="prod", pod="a"} 10 20 30 + right_side{env="test", pod="b"} 40 50 60 + +eval range from 0 to 24m step 6m left_side + on(env) right_side + {env="prod"} 14 25 36 + {env="test"} 41 52 63 + +eval range from 0 to 24m step 6m left_side + ignoring(pod) right_side + {env="prod"} 14 25 36 + {env="test"} 41 52 63 + +clear + +# One-to-one matching, but different series match at different time steps, or not at all +load 6m + left_side{env="test", bar="a"} 1 _ 3 _ _ 6 _ + left_side{env="test", bar="b"} _ 2 _ 4 _ _ _ + right_side{env="test", foo="0"} 2 2 _ _ _ _ 2 + right_side{env="test", foo="1"} _ _ 3 3 _ _ _ + +eval range from 0 to 42m step 6m left_side * on (env) right_side + {env="test"} 2 4 9 12 _ _ _ + +clear + +# One-to-one matching with multiple matches on left side +load 6m + left_side{env="test", pod="a"} 1 2 3 + left_side{env="test", pod="b"} 4 5 6 + left_side{env="test", pod="c"} 7 8 9 + left_side{env="test", pod="d"} _ 10 11 + right_side{env="test"} 100 200 300 + +eval_fail range from 0 to 42m step 6m left_side * on (env) right_side + # TODO: expected_message multiple matches for labels: many-to-one matching must be explicit (group_left/group_right) + +clear + +# One-to-one matching with multiple matches on right side +load 6m + left_side{env="test"} 100 200 300 + right_side{env="test", pod="a"} 1 2 3 + right_side{env="test", pod="b"} 4 5 6 + right_side{env="test", pod="c"} 7 8 9 + right_side{env="test", pod="d"} _ 10 11 + +eval_fail range from 0 to 42m step 6m left_side * on (env) right_side + # TODO: expected_message found duplicate series for the match group {env="test"} on the right hand-side of the operation: [{__name__="right_side", env="test", pod="b"}, {__name__="right_side", env="test", pod="a"}];many-to-many matching not allowed: matching labels must be unique on one side + +clear From 74f17a928e1f5fbb1936cd47d6988456f9c4de5f Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Mon, 6 May 2024 19:49:52 +1000 Subject: [PATCH 02/43] Enable upstream test cases for binary operators with one-to-one matching and arithmetic operations --- .../testdata/upstream/operators.test | 580 ++++++++++++++++++ .../testdata/upstream/operators.test.disabled | 494 --------------- 2 files changed, 580 insertions(+), 494 deletions(-) create mode 100644 pkg/streamingpromql/testdata/upstream/operators.test delete mode 100644 pkg/streamingpromql/testdata/upstream/operators.test.disabled diff --git a/pkg/streamingpromql/testdata/upstream/operators.test b/pkg/streamingpromql/testdata/upstream/operators.test new file mode 100644 index 00000000000..1b1b899145c --- /dev/null +++ b/pkg/streamingpromql/testdata/upstream/operators.test @@ -0,0 +1,580 @@ +# SPDX-License-Identifier: AGPL-3.0-only +# Provenance-includes-location: https://github.com/prometheus/prometheus/tree/main/promql/testdata/operators.test +# Provenance-includes-license: Apache-2.0 +# Provenance-includes-copyright: The Prometheus Authors + +load 5m + http_requests{job="api-server", instance="0", group="production"} 0+10x10 + http_requests{job="api-server", instance="1", group="production"} 0+20x10 + http_requests{job="api-server", instance="0", group="canary"} 0+30x10 + http_requests{job="api-server", instance="1", group="canary"} 0+40x10 + http_requests{job="app-server", instance="0", group="production"} 0+50x10 + http_requests{job="app-server", instance="1", group="production"} 0+60x10 + http_requests{job="app-server", instance="0", group="canary"} 0+70x10 + http_requests{job="app-server", instance="1", group="canary"} 0+80x10 + +load 5m + vector_matching_a{l="x"} 0+1x100 + vector_matching_a{l="y"} 0+2x50 + vector_matching_b{l="x"} 0+4x25 + + +# Unsupported by streaming engine. +# eval instant at 50m SUM(http_requests) BY (job) - COUNT(http_requests) BY (job) +# {job="api-server"} 996 +# {job="app-server"} 2596 + +# Unsupported by streaming engine. +# eval instant at 50m 2 - SUM(http_requests) BY (job) +# {job="api-server"} -998 +# {job="app-server"} -2598 + +# Unsupported by streaming engine. +# eval instant at 50m -http_requests{job="api-server",instance="0",group="production"} +# {job="api-server",instance="0",group="production"} -100 + +# Unsupported by streaming engine. +# eval instant at 50m +http_requests{job="api-server",instance="0",group="production"} +# http_requests{job="api-server",instance="0",group="production"} 100 + +# Unsupported by streaming engine. +# eval instant at 50m - - - SUM(http_requests) BY (job) +# {job="api-server"} -1000 +# {job="app-server"} -2600 + +# Unsupported by streaming engine. +# eval instant at 50m - - - 1 +# -1 + +# Unsupported by streaming engine. +# eval instant at 50m -2^---1*3 +# -1.5 + +# Unsupported by streaming engine. +# eval instant at 50m 2/-2^---1*3+2 +# -10 + +# Unsupported by streaming engine. +# eval instant at 50m -10^3 * - SUM(http_requests) BY (job) ^ -1 +# {job="api-server"} 1 +# {job="app-server"} 0.38461538461538464 + +# Unsupported by streaming engine. +# eval instant at 50m 1000 / SUM(http_requests) BY (job) +# {job="api-server"} 1 +# {job="app-server"} 0.38461538461538464 + +# Unsupported by streaming engine. +# eval instant at 50m SUM(http_requests) BY (job) - 2 +# {job="api-server"} 998 +# {job="app-server"} 2598 + +# Unsupported by streaming engine. +# eval instant at 50m SUM(http_requests) BY (job) % 3 +# {job="api-server"} 1 +# {job="app-server"} 2 + +# Unsupported by streaming engine. +# eval instant at 50m SUM(http_requests) BY (job) % 0.3 +# {job="api-server"} 0.1 +# {job="app-server"} 0.2 + +# Unsupported by streaming engine. +# eval instant at 50m SUM(http_requests) BY (job) ^ 2 +# {job="api-server"} 1000000 +# {job="app-server"} 6760000 + +# Unsupported by streaming engine. +# eval instant at 50m SUM(http_requests) BY (job) % 3 ^ 2 +# {job="api-server"} 1 +# {job="app-server"} 8 + +# Unsupported by streaming engine. +# eval instant at 50m SUM(http_requests) BY (job) % 2 ^ (3 ^ 2) +# {job="api-server"} 488 +# {job="app-server"} 40 + +# Unsupported by streaming engine. +# eval instant at 50m SUM(http_requests) BY (job) % 2 ^ 3 ^ 2 +# {job="api-server"} 488 +# {job="app-server"} 40 + +# Unsupported by streaming engine. +# eval instant at 50m SUM(http_requests) BY (job) % 2 ^ 3 ^ 2 ^ 2 +# {job="api-server"} 1000 +# {job="app-server"} 2600 + +# Unsupported by streaming engine. +# eval instant at 50m COUNT(http_requests) BY (job) ^ COUNT(http_requests) BY (job) +# {job="api-server"} 256 +# {job="app-server"} 256 + +# Unsupported by streaming engine. +# eval instant at 50m SUM(http_requests) BY (job) / 0 +# {job="api-server"} +Inf +# {job="app-server"} +Inf + +# Unsupported by streaming engine. +# eval instant at 50m http_requests{group="canary", instance="0", job="api-server"} / 0 +# {group="canary", instance="0", job="api-server"} +Inf + +# Unsupported by streaming engine. +# eval instant at 50m -1 * http_requests{group="canary", instance="0", job="api-server"} / 0 +# {group="canary", instance="0", job="api-server"} -Inf + +# Unsupported by streaming engine. +# eval instant at 50m 0 * http_requests{group="canary", instance="0", job="api-server"} / 0 +# {group="canary", instance="0", job="api-server"} NaN + +# Unsupported by streaming engine. +# eval instant at 50m 0 * http_requests{group="canary", instance="0", job="api-server"} % 0 +# {group="canary", instance="0", job="api-server"} NaN + +eval instant at 50m SUM(http_requests) BY (job) + SUM(http_requests) BY (job) + {job="api-server"} 2000 + {job="app-server"} 5200 + +eval instant at 50m (SUM((http_requests)) BY (job)) + SUM(http_requests) BY (job) + {job="api-server"} 2000 + {job="app-server"} 5200 + +eval instant at 50m http_requests{job="api-server", group="canary"} + http_requests{group="canary", instance="0", job="api-server"} 300 + http_requests{group="canary", instance="1", job="api-server"} 400 + +# Unsupported by streaming engine. +# eval instant at 50m http_requests{job="api-server", group="canary"} + rate(http_requests{job="api-server"}[5m]) * 5 * 60 +# {group="canary", instance="0", job="api-server"} 330 +# {group="canary", instance="1", job="api-server"} 440 + +# Unsupported by streaming engine. +# eval instant at 50m rate(http_requests[25m]) * 25 * 60 +# {group="canary", instance="0", job="api-server"} 150 +# {group="canary", instance="0", job="app-server"} 350 +# {group="canary", instance="1", job="api-server"} 200 +# {group="canary", instance="1", job="app-server"} 400 +# {group="production", instance="0", job="api-server"} 50 +# {group="production", instance="0", job="app-server"} 249.99999999999997 +# {group="production", instance="1", job="api-server"} 100 +# {group="production", instance="1", job="app-server"} 300 + +# Unsupported by streaming engine. +# eval instant at 50m (rate((http_requests[25m])) * 25) * 60 +# {group="canary", instance="0", job="api-server"} 150 +# {group="canary", instance="0", job="app-server"} 350 +# {group="canary", instance="1", job="api-server"} 200 +# {group="canary", instance="1", job="app-server"} 400 +# {group="production", instance="0", job="api-server"} 50 +# {group="production", instance="0", job="app-server"} 249.99999999999997 +# {group="production", instance="1", job="api-server"} 100 +# {group="production", instance="1", job="app-server"} 300 + + +# Unsupported by streaming engine. +# eval instant at 50m http_requests{group="canary"} and http_requests{instance="0"} +# http_requests{group="canary", instance="0", job="api-server"} 300 +# http_requests{group="canary", instance="0", job="app-server"} 700 + +# Unsupported by streaming engine. +# eval instant at 50m (http_requests{group="canary"} + 1) and http_requests{instance="0"} +# {group="canary", instance="0", job="api-server"} 301 +# {group="canary", instance="0", job="app-server"} 701 + +# Unsupported by streaming engine. +# eval instant at 50m (http_requests{group="canary"} + 1) and on(instance, job) http_requests{instance="0", group="production"} +# {group="canary", instance="0", job="api-server"} 301 +# {group="canary", instance="0", job="app-server"} 701 + +# Unsupported by streaming engine. +# eval instant at 50m (http_requests{group="canary"} + 1) and on(instance) http_requests{instance="0", group="production"} +# {group="canary", instance="0", job="api-server"} 301 +# {group="canary", instance="0", job="app-server"} 701 + +# Unsupported by streaming engine. +# eval instant at 50m (http_requests{group="canary"} + 1) and ignoring(group) http_requests{instance="0", group="production"} +# {group="canary", instance="0", job="api-server"} 301 +# {group="canary", instance="0", job="app-server"} 701 + +# Unsupported by streaming engine. +# eval instant at 50m (http_requests{group="canary"} + 1) and ignoring(group, job) http_requests{instance="0", group="production"} +# {group="canary", instance="0", job="api-server"} 301 +# {group="canary", instance="0", job="app-server"} 701 + +# Unsupported by streaming engine. +# eval instant at 50m http_requests{group="canary"} or http_requests{group="production"} +# http_requests{group="canary", instance="0", job="api-server"} 300 +# http_requests{group="canary", instance="0", job="app-server"} 700 +# http_requests{group="canary", instance="1", job="api-server"} 400 +# http_requests{group="canary", instance="1", job="app-server"} 800 +# http_requests{group="production", instance="0", job="api-server"} 100 +# http_requests{group="production", instance="0", job="app-server"} 500 +# http_requests{group="production", instance="1", job="api-server"} 200 +# http_requests{group="production", instance="1", job="app-server"} 600 + +# On overlap the rhs samples must be dropped. +# Unsupported by streaming engine. +# eval instant at 50m (http_requests{group="canary"} + 1) or http_requests{instance="1"} +# {group="canary", instance="0", job="api-server"} 301 +# {group="canary", instance="0", job="app-server"} 701 +# {group="canary", instance="1", job="api-server"} 401 +# {group="canary", instance="1", job="app-server"} 801 +# http_requests{group="production", instance="1", job="api-server"} 200 +# http_requests{group="production", instance="1", job="app-server"} 600 + + +# Matching only on instance excludes everything that has instance=0/1 but includes +# entries without the instance label. +# Unsupported by streaming engine. +# eval instant at 50m (http_requests{group="canary"} + 1) or on(instance) (http_requests or cpu_count or vector_matching_a) +# {group="canary", instance="0", job="api-server"} 301 +# {group="canary", instance="0", job="app-server"} 701 +# {group="canary", instance="1", job="api-server"} 401 +# {group="canary", instance="1", job="app-server"} 801 +# vector_matching_a{l="x"} 10 +# vector_matching_a{l="y"} 20 + +# Unsupported by streaming engine. +# eval instant at 50m (http_requests{group="canary"} + 1) or ignoring(l, group, job) (http_requests or cpu_count or vector_matching_a) +# {group="canary", instance="0", job="api-server"} 301 +# {group="canary", instance="0", job="app-server"} 701 +# {group="canary", instance="1", job="api-server"} 401 +# {group="canary", instance="1", job="app-server"} 801 +# vector_matching_a{l="x"} 10 +# vector_matching_a{l="y"} 20 + +# Unsupported by streaming engine. +# eval instant at 50m http_requests{group="canary"} unless http_requests{instance="0"} +# http_requests{group="canary", instance="1", job="api-server"} 400 +# http_requests{group="canary", instance="1", job="app-server"} 800 + +# Unsupported by streaming engine. +# eval instant at 50m http_requests{group="canary"} unless on(job) http_requests{instance="0"} + +# Unsupported by streaming engine. +# eval instant at 50m http_requests{group="canary"} unless on(job, instance) http_requests{instance="0"} +# http_requests{group="canary", instance="1", job="api-server"} 400 +# http_requests{group="canary", instance="1", job="app-server"} 800 + +eval instant at 50m http_requests{group="canary"} / on(instance,job) http_requests{group="production"} + {instance="0", job="api-server"} 3 + {instance="0", job="app-server"} 1.4 + {instance="1", job="api-server"} 2 + {instance="1", job="app-server"} 1.3333333333333333 + +# Unsupported by streaming engine. +# eval instant at 50m http_requests{group="canary"} unless ignoring(group, instance) http_requests{instance="0"} + +# Unsupported by streaming engine. +# eval instant at 50m http_requests{group="canary"} unless ignoring(group) http_requests{instance="0"} +# http_requests{group="canary", instance="1", job="api-server"} 400 +# http_requests{group="canary", instance="1", job="app-server"} 800 + +eval instant at 50m http_requests{group="canary"} / ignoring(group) http_requests{group="production"} + {instance="0", job="api-server"} 3 + {instance="0", job="app-server"} 1.4 + {instance="1", job="api-server"} 2 + {instance="1", job="app-server"} 1.3333333333333333 + +# https://github.com/prometheus/prometheus/issues/1489 +# Unsupported by streaming engine. +# eval instant at 50m http_requests AND ON (dummy) vector(1) +# http_requests{group="canary", instance="0", job="api-server"} 300 +# http_requests{group="canary", instance="0", job="app-server"} 700 +# http_requests{group="canary", instance="1", job="api-server"} 400 +# http_requests{group="canary", instance="1", job="app-server"} 800 +# http_requests{group="production", instance="0", job="api-server"} 100 +# http_requests{group="production", instance="0", job="app-server"} 500 +# http_requests{group="production", instance="1", job="api-server"} 200 +# http_requests{group="production", instance="1", job="app-server"} 600 + +# Unsupported by streaming engine. +# eval instant at 50m http_requests AND IGNORING (group, instance, job) vector(1) +# http_requests{group="canary", instance="0", job="api-server"} 300 +# http_requests{group="canary", instance="0", job="app-server"} 700 +# http_requests{group="canary", instance="1", job="api-server"} 400 +# http_requests{group="canary", instance="1", job="app-server"} 800 +# http_requests{group="production", instance="0", job="api-server"} 100 +# http_requests{group="production", instance="0", job="app-server"} 500 +# http_requests{group="production", instance="1", job="api-server"} 200 +# http_requests{group="production", instance="1", job="app-server"} 600 + + +# Comparisons. +# Unsupported by streaming engine. +# eval instant at 50m SUM(http_requests) BY (job) > 1000 +# {job="app-server"} 2600 + +# Unsupported by streaming engine. +# eval instant at 50m 1000 < SUM(http_requests) BY (job) +# {job="app-server"} 2600 + +# Unsupported by streaming engine. +# eval instant at 50m SUM(http_requests) BY (job) <= 1000 +# {job="api-server"} 1000 + +# Unsupported by streaming engine. +# eval instant at 50m SUM(http_requests) BY (job) != 1000 +# {job="app-server"} 2600 + +# Unsupported by streaming engine. +# eval instant at 50m SUM(http_requests) BY (job) == 1000 +# {job="api-server"} 1000 + +# Unsupported by streaming engine. +# eval instant at 50m SUM(http_requests) BY (job) == bool 1000 +# {job="api-server"} 1 +# {job="app-server"} 0 + +# Unsupported by streaming engine. +# eval instant at 50m SUM(http_requests) BY (job) == bool SUM(http_requests) BY (job) +# {job="api-server"} 1 +# {job="app-server"} 1 + +# Unsupported by streaming engine. +# eval instant at 50m SUM(http_requests) BY (job) != bool SUM(http_requests) BY (job) +# {job="api-server"} 0 +# {job="app-server"} 0 + +# Unsupported by streaming engine. +# eval instant at 50m 0 == bool 1 +# 0 + +# Unsupported by streaming engine. +# eval instant at 50m 1 == bool 1 +# 1 + +# Unsupported by streaming engine. +# eval instant at 50m http_requests{job="api-server", instance="0", group="production"} == bool 100 +# {job="api-server", instance="0", group="production"} 1 + +# group_left/group_right. + +clear + +load 5m + node_var{instance="abc",job="node"} 2 + node_role{instance="abc",job="node",role="prometheus"} 1 + +load 5m + node_cpu{instance="abc",job="node",mode="idle"} 3 + node_cpu{instance="abc",job="node",mode="user"} 1 + node_cpu{instance="def",job="node",mode="idle"} 8 + node_cpu{instance="def",job="node",mode="user"} 2 + +load 5m + random{foo="bar"} 1 + +load 5m + threshold{instance="abc",job="node",target="a@b.com"} 0 + +# Copy machine role to node variable. +# Unsupported by streaming engine. +# eval instant at 5m node_role * on (instance) group_right (role) node_var +# {instance="abc",job="node",role="prometheus"} 2 + +# Unsupported by streaming engine. +# eval instant at 5m node_var * on (instance) group_left (role) node_role +# {instance="abc",job="node",role="prometheus"} 2 + +# Unsupported by streaming engine. +# eval instant at 5m node_var * ignoring (role) group_left (role) node_role +# {instance="abc",job="node",role="prometheus"} 2 + +# Unsupported by streaming engine. +# eval instant at 5m node_role * ignoring (role) group_right (role) node_var +# {instance="abc",job="node",role="prometheus"} 2 + +# Copy machine role to node variable with instrumentation labels. +# Unsupported by streaming engine. +# eval instant at 5m node_cpu * ignoring (role, mode) group_left (role) node_role +# {instance="abc",job="node",mode="idle",role="prometheus"} 3 +# {instance="abc",job="node",mode="user",role="prometheus"} 1 + +# Unsupported by streaming engine. +# eval instant at 5m node_cpu * on (instance) group_left (role) node_role +# {instance="abc",job="node",mode="idle",role="prometheus"} 3 +# {instance="abc",job="node",mode="user",role="prometheus"} 1 + + +# Ratio of total. +# Unsupported by streaming engine. +# eval instant at 5m node_cpu / on (instance) group_left sum by (instance,job)(node_cpu) +# {instance="abc",job="node",mode="idle"} .75 +# {instance="abc",job="node",mode="user"} .25 +# {instance="def",job="node",mode="idle"} .80 +# {instance="def",job="node",mode="user"} .20 + +# Unsupported by streaming engine. +# eval instant at 5m sum by (mode, job)(node_cpu) / on (job) group_left sum by (job)(node_cpu) +# {job="node",mode="idle"} 0.7857142857142857 +# {job="node",mode="user"} 0.21428571428571427 + +# Unsupported by streaming engine. +# eval instant at 5m sum(sum by (mode, job)(node_cpu) / on (job) group_left sum by (job)(node_cpu)) +# {} 1.0 + + +# Unsupported by streaming engine. +# eval instant at 5m node_cpu / ignoring (mode) group_left sum without (mode)(node_cpu) +# {instance="abc",job="node",mode="idle"} .75 +# {instance="abc",job="node",mode="user"} .25 +# {instance="def",job="node",mode="idle"} .80 +# {instance="def",job="node",mode="user"} .20 + +# Unsupported by streaming engine. +# eval instant at 5m node_cpu / ignoring (mode) group_left(dummy) sum without (mode)(node_cpu) +# {instance="abc",job="node",mode="idle"} .75 +# {instance="abc",job="node",mode="user"} .25 +# {instance="def",job="node",mode="idle"} .80 +# {instance="def",job="node",mode="user"} .20 + +# Unsupported by streaming engine. +# eval instant at 5m sum without (instance)(node_cpu) / ignoring (mode) group_left sum without (instance, mode)(node_cpu) +# {job="node",mode="idle"} 0.7857142857142857 +# {job="node",mode="user"} 0.21428571428571427 + +# Unsupported by streaming engine. +# eval instant at 5m sum(sum without (instance)(node_cpu) / ignoring (mode) group_left sum without (instance, mode)(node_cpu)) +# {} 1.0 + + +# Copy over label from metric with no matching labels, without having to list cross-job target labels ('job' here). +# Unsupported by streaming engine. +# eval instant at 5m node_cpu + on(dummy) group_left(foo) random*0 +# {instance="abc",job="node",mode="idle",foo="bar"} 3 +# {instance="abc",job="node",mode="user",foo="bar"} 1 +# {instance="def",job="node",mode="idle",foo="bar"} 8 +# {instance="def",job="node",mode="user",foo="bar"} 2 + + +# Use threshold from metric, and copy over target. +# Unsupported by streaming engine. +# eval instant at 5m node_cpu > on(job, instance) group_left(target) threshold +# node_cpu{instance="abc",job="node",mode="idle",target="a@b.com"} 3 +# node_cpu{instance="abc",job="node",mode="user",target="a@b.com"} 1 + +# Use threshold from metric, and a default (1) if it's not present. +# Unsupported by streaming engine. +# eval instant at 5m node_cpu > on(job, instance) group_left(target) (threshold or on (job, instance) (sum by (job, instance)(node_cpu) * 0 + 1)) +# node_cpu{instance="abc",job="node",mode="idle",target="a@b.com"} 3 +# node_cpu{instance="abc",job="node",mode="user",target="a@b.com"} 1 +# node_cpu{instance="def",job="node",mode="idle"} 8 +# node_cpu{instance="def",job="node",mode="user"} 2 + + +# Check that binops drop the metric name. +# Unsupported by streaming engine. +# eval instant at 5m node_cpu + 2 +# {instance="abc",job="node",mode="idle"} 5 +# {instance="abc",job="node",mode="user"} 3 +# {instance="def",job="node",mode="idle"} 10 +# {instance="def",job="node",mode="user"} 4 + +# Unsupported by streaming engine. +# eval instant at 5m node_cpu - 2 +# {instance="abc",job="node",mode="idle"} 1 +# {instance="abc",job="node",mode="user"} -1 +# {instance="def",job="node",mode="idle"} 6 +# {instance="def",job="node",mode="user"} 0 + +# Unsupported by streaming engine. +# eval instant at 5m node_cpu / 2 +# {instance="abc",job="node",mode="idle"} 1.5 +# {instance="abc",job="node",mode="user"} 0.5 +# {instance="def",job="node",mode="idle"} 4 +# {instance="def",job="node",mode="user"} 1 + +# Unsupported by streaming engine. +# eval instant at 5m node_cpu * 2 +# {instance="abc",job="node",mode="idle"} 6 +# {instance="abc",job="node",mode="user"} 2 +# {instance="def",job="node",mode="idle"} 16 +# {instance="def",job="node",mode="user"} 4 + +# Unsupported by streaming engine. +# eval instant at 5m node_cpu ^ 2 +# {instance="abc",job="node",mode="idle"} 9 +# {instance="abc",job="node",mode="user"} 1 +# {instance="def",job="node",mode="idle"} 64 +# {instance="def",job="node",mode="user"} 4 + +# Unsupported by streaming engine. +# eval instant at 5m node_cpu % 2 +# {instance="abc",job="node",mode="idle"} 1 +# {instance="abc",job="node",mode="user"} 1 +# {instance="def",job="node",mode="idle"} 0 +# {instance="def",job="node",mode="user"} 0 + + +clear + +load 5m + random{foo="bar"} 2 + metricA{baz="meh"} 3 + metricB{baz="meh"} 4 + +# On with no labels, for metrics with no common labels. +eval instant at 5m random + on() metricA + {} 5 + +# Ignoring with no labels is the same as no ignoring. +eval instant at 5m metricA + ignoring() metricB + {baz="meh"} 7 + +eval instant at 5m metricA + metricB + {baz="meh"} 7 + +clear + +# Test duplicate labelset in promql output. +load 5m + testmetric1{src="a",dst="b"} 0 + testmetric2{src="a",dst="b"} 1 + +# Unsupported by streaming engine. +# eval_fail instant at 0m -{__name__=~'testmetric1|testmetric2'} + +clear + +load 5m + test_total{instance="localhost"} 50 + test_smaller{instance="localhost"} 10 + +# Unsupported by streaming engine. +# eval instant at 5m test_total > bool test_smaller +# {instance="localhost"} 1 + +# Unsupported by streaming engine. +# eval instant at 5m test_total > test_smaller +# test_total{instance="localhost"} 50 + +# Unsupported by streaming engine. +# eval instant at 5m test_total < bool test_smaller +# {instance="localhost"} 0 + +# Unsupported by streaming engine. +# eval instant at 5m test_total < test_smaller + +clear + +# Testing atan2. +load 5m + trigy{} 10 + trigx{} 20 + trigNaN{} NaN + +# Unsupported by streaming engine. +# eval instant at 5m trigy atan2 trigx +# {} 0.4636476090008061 + +# Unsupported by streaming engine. +# eval instant at 5m trigy atan2 trigNaN +# {} NaN + +# Unsupported by streaming engine. +# eval instant at 5m 10 atan2 20 +# 0.4636476090008061 + +# Unsupported by streaming engine. +# eval instant at 5m 10 atan2 NaN +# NaN diff --git a/pkg/streamingpromql/testdata/upstream/operators.test.disabled b/pkg/streamingpromql/testdata/upstream/operators.test.disabled deleted file mode 100644 index 14bf0b103dd..00000000000 --- a/pkg/streamingpromql/testdata/upstream/operators.test.disabled +++ /dev/null @@ -1,494 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-only -# Provenance-includes-location: https://github.com/prometheus/prometheus/tree/main/promql/testdata/operators.test -# Provenance-includes-license: Apache-2.0 -# Provenance-includes-copyright: The Prometheus Authors - -load 5m - http_requests{job="api-server", instance="0", group="production"} 0+10x10 - http_requests{job="api-server", instance="1", group="production"} 0+20x10 - http_requests{job="api-server", instance="0", group="canary"} 0+30x10 - http_requests{job="api-server", instance="1", group="canary"} 0+40x10 - http_requests{job="app-server", instance="0", group="production"} 0+50x10 - http_requests{job="app-server", instance="1", group="production"} 0+60x10 - http_requests{job="app-server", instance="0", group="canary"} 0+70x10 - http_requests{job="app-server", instance="1", group="canary"} 0+80x10 - -load 5m - vector_matching_a{l="x"} 0+1x100 - vector_matching_a{l="y"} 0+2x50 - vector_matching_b{l="x"} 0+4x25 - - -eval instant at 50m SUM(http_requests) BY (job) - COUNT(http_requests) BY (job) - {job="api-server"} 996 - {job="app-server"} 2596 - -eval instant at 50m 2 - SUM(http_requests) BY (job) - {job="api-server"} -998 - {job="app-server"} -2598 - -eval instant at 50m -http_requests{job="api-server",instance="0",group="production"} - {job="api-server",instance="0",group="production"} -100 - -eval instant at 50m +http_requests{job="api-server",instance="0",group="production"} - http_requests{job="api-server",instance="0",group="production"} 100 - -eval instant at 50m - - - SUM(http_requests) BY (job) - {job="api-server"} -1000 - {job="app-server"} -2600 - -eval instant at 50m - - - 1 - -1 - -eval instant at 50m -2^---1*3 - -1.5 - -eval instant at 50m 2/-2^---1*3+2 - -10 - -eval instant at 50m -10^3 * - SUM(http_requests) BY (job) ^ -1 - {job="api-server"} 1 - {job="app-server"} 0.38461538461538464 - -eval instant at 50m 1000 / SUM(http_requests) BY (job) - {job="api-server"} 1 - {job="app-server"} 0.38461538461538464 - -eval instant at 50m SUM(http_requests) BY (job) - 2 - {job="api-server"} 998 - {job="app-server"} 2598 - -eval instant at 50m SUM(http_requests) BY (job) % 3 - {job="api-server"} 1 - {job="app-server"} 2 - -eval instant at 50m SUM(http_requests) BY (job) % 0.3 - {job="api-server"} 0.1 - {job="app-server"} 0.2 - -eval instant at 50m SUM(http_requests) BY (job) ^ 2 - {job="api-server"} 1000000 - {job="app-server"} 6760000 - -eval instant at 50m SUM(http_requests) BY (job) % 3 ^ 2 - {job="api-server"} 1 - {job="app-server"} 8 - -eval instant at 50m SUM(http_requests) BY (job) % 2 ^ (3 ^ 2) - {job="api-server"} 488 - {job="app-server"} 40 - -eval instant at 50m SUM(http_requests) BY (job) % 2 ^ 3 ^ 2 - {job="api-server"} 488 - {job="app-server"} 40 - -eval instant at 50m SUM(http_requests) BY (job) % 2 ^ 3 ^ 2 ^ 2 - {job="api-server"} 1000 - {job="app-server"} 2600 - -eval instant at 50m COUNT(http_requests) BY (job) ^ COUNT(http_requests) BY (job) - {job="api-server"} 256 - {job="app-server"} 256 - -eval instant at 50m SUM(http_requests) BY (job) / 0 - {job="api-server"} +Inf - {job="app-server"} +Inf - -eval instant at 50m http_requests{group="canary", instance="0", job="api-server"} / 0 - {group="canary", instance="0", job="api-server"} +Inf - -eval instant at 50m -1 * http_requests{group="canary", instance="0", job="api-server"} / 0 - {group="canary", instance="0", job="api-server"} -Inf - -eval instant at 50m 0 * http_requests{group="canary", instance="0", job="api-server"} / 0 - {group="canary", instance="0", job="api-server"} NaN - -eval instant at 50m 0 * http_requests{group="canary", instance="0", job="api-server"} % 0 - {group="canary", instance="0", job="api-server"} NaN - -eval instant at 50m SUM(http_requests) BY (job) + SUM(http_requests) BY (job) - {job="api-server"} 2000 - {job="app-server"} 5200 - -eval instant at 50m (SUM((http_requests)) BY (job)) + SUM(http_requests) BY (job) - {job="api-server"} 2000 - {job="app-server"} 5200 - -eval instant at 50m http_requests{job="api-server", group="canary"} - http_requests{group="canary", instance="0", job="api-server"} 300 - http_requests{group="canary", instance="1", job="api-server"} 400 - -eval instant at 50m http_requests{job="api-server", group="canary"} + rate(http_requests{job="api-server"}[5m]) * 5 * 60 - {group="canary", instance="0", job="api-server"} 330 - {group="canary", instance="1", job="api-server"} 440 - -eval instant at 50m rate(http_requests[25m]) * 25 * 60 - {group="canary", instance="0", job="api-server"} 150 - {group="canary", instance="0", job="app-server"} 350 - {group="canary", instance="1", job="api-server"} 200 - {group="canary", instance="1", job="app-server"} 400 - {group="production", instance="0", job="api-server"} 50 - {group="production", instance="0", job="app-server"} 249.99999999999997 - {group="production", instance="1", job="api-server"} 100 - {group="production", instance="1", job="app-server"} 300 - -eval instant at 50m (rate((http_requests[25m])) * 25) * 60 - {group="canary", instance="0", job="api-server"} 150 - {group="canary", instance="0", job="app-server"} 350 - {group="canary", instance="1", job="api-server"} 200 - {group="canary", instance="1", job="app-server"} 400 - {group="production", instance="0", job="api-server"} 50 - {group="production", instance="0", job="app-server"} 249.99999999999997 - {group="production", instance="1", job="api-server"} 100 - {group="production", instance="1", job="app-server"} 300 - - -eval instant at 50m http_requests{group="canary"} and http_requests{instance="0"} - http_requests{group="canary", instance="0", job="api-server"} 300 - http_requests{group="canary", instance="0", job="app-server"} 700 - -eval instant at 50m (http_requests{group="canary"} + 1) and http_requests{instance="0"} - {group="canary", instance="0", job="api-server"} 301 - {group="canary", instance="0", job="app-server"} 701 - -eval instant at 50m (http_requests{group="canary"} + 1) and on(instance, job) http_requests{instance="0", group="production"} - {group="canary", instance="0", job="api-server"} 301 - {group="canary", instance="0", job="app-server"} 701 - -eval instant at 50m (http_requests{group="canary"} + 1) and on(instance) http_requests{instance="0", group="production"} - {group="canary", instance="0", job="api-server"} 301 - {group="canary", instance="0", job="app-server"} 701 - -eval instant at 50m (http_requests{group="canary"} + 1) and ignoring(group) http_requests{instance="0", group="production"} - {group="canary", instance="0", job="api-server"} 301 - {group="canary", instance="0", job="app-server"} 701 - -eval instant at 50m (http_requests{group="canary"} + 1) and ignoring(group, job) http_requests{instance="0", group="production"} - {group="canary", instance="0", job="api-server"} 301 - {group="canary", instance="0", job="app-server"} 701 - -eval instant at 50m http_requests{group="canary"} or http_requests{group="production"} - http_requests{group="canary", instance="0", job="api-server"} 300 - http_requests{group="canary", instance="0", job="app-server"} 700 - http_requests{group="canary", instance="1", job="api-server"} 400 - http_requests{group="canary", instance="1", job="app-server"} 800 - http_requests{group="production", instance="0", job="api-server"} 100 - http_requests{group="production", instance="0", job="app-server"} 500 - http_requests{group="production", instance="1", job="api-server"} 200 - http_requests{group="production", instance="1", job="app-server"} 600 - -# On overlap the rhs samples must be dropped. -eval instant at 50m (http_requests{group="canary"} + 1) or http_requests{instance="1"} - {group="canary", instance="0", job="api-server"} 301 - {group="canary", instance="0", job="app-server"} 701 - {group="canary", instance="1", job="api-server"} 401 - {group="canary", instance="1", job="app-server"} 801 - http_requests{group="production", instance="1", job="api-server"} 200 - http_requests{group="production", instance="1", job="app-server"} 600 - - -# Matching only on instance excludes everything that has instance=0/1 but includes -# entries without the instance label. -eval instant at 50m (http_requests{group="canary"} + 1) or on(instance) (http_requests or cpu_count or vector_matching_a) - {group="canary", instance="0", job="api-server"} 301 - {group="canary", instance="0", job="app-server"} 701 - {group="canary", instance="1", job="api-server"} 401 - {group="canary", instance="1", job="app-server"} 801 - vector_matching_a{l="x"} 10 - vector_matching_a{l="y"} 20 - -eval instant at 50m (http_requests{group="canary"} + 1) or ignoring(l, group, job) (http_requests or cpu_count or vector_matching_a) - {group="canary", instance="0", job="api-server"} 301 - {group="canary", instance="0", job="app-server"} 701 - {group="canary", instance="1", job="api-server"} 401 - {group="canary", instance="1", job="app-server"} 801 - vector_matching_a{l="x"} 10 - vector_matching_a{l="y"} 20 - -eval instant at 50m http_requests{group="canary"} unless http_requests{instance="0"} - http_requests{group="canary", instance="1", job="api-server"} 400 - http_requests{group="canary", instance="1", job="app-server"} 800 - -eval instant at 50m http_requests{group="canary"} unless on(job) http_requests{instance="0"} - -eval instant at 50m http_requests{group="canary"} unless on(job, instance) http_requests{instance="0"} - http_requests{group="canary", instance="1", job="api-server"} 400 - http_requests{group="canary", instance="1", job="app-server"} 800 - -eval instant at 50m http_requests{group="canary"} / on(instance,job) http_requests{group="production"} - {instance="0", job="api-server"} 3 - {instance="0", job="app-server"} 1.4 - {instance="1", job="api-server"} 2 - {instance="1", job="app-server"} 1.3333333333333333 - -eval instant at 50m http_requests{group="canary"} unless ignoring(group, instance) http_requests{instance="0"} - -eval instant at 50m http_requests{group="canary"} unless ignoring(group) http_requests{instance="0"} - http_requests{group="canary", instance="1", job="api-server"} 400 - http_requests{group="canary", instance="1", job="app-server"} 800 - -eval instant at 50m http_requests{group="canary"} / ignoring(group) http_requests{group="production"} - {instance="0", job="api-server"} 3 - {instance="0", job="app-server"} 1.4 - {instance="1", job="api-server"} 2 - {instance="1", job="app-server"} 1.3333333333333333 - -# https://github.com/prometheus/prometheus/issues/1489 -eval instant at 50m http_requests AND ON (dummy) vector(1) - http_requests{group="canary", instance="0", job="api-server"} 300 - http_requests{group="canary", instance="0", job="app-server"} 700 - http_requests{group="canary", instance="1", job="api-server"} 400 - http_requests{group="canary", instance="1", job="app-server"} 800 - http_requests{group="production", instance="0", job="api-server"} 100 - http_requests{group="production", instance="0", job="app-server"} 500 - http_requests{group="production", instance="1", job="api-server"} 200 - http_requests{group="production", instance="1", job="app-server"} 600 - -eval instant at 50m http_requests AND IGNORING (group, instance, job) vector(1) - http_requests{group="canary", instance="0", job="api-server"} 300 - http_requests{group="canary", instance="0", job="app-server"} 700 - http_requests{group="canary", instance="1", job="api-server"} 400 - http_requests{group="canary", instance="1", job="app-server"} 800 - http_requests{group="production", instance="0", job="api-server"} 100 - http_requests{group="production", instance="0", job="app-server"} 500 - http_requests{group="production", instance="1", job="api-server"} 200 - http_requests{group="production", instance="1", job="app-server"} 600 - - -# Comparisons. -eval instant at 50m SUM(http_requests) BY (job) > 1000 - {job="app-server"} 2600 - -eval instant at 50m 1000 < SUM(http_requests) BY (job) - {job="app-server"} 2600 - -eval instant at 50m SUM(http_requests) BY (job) <= 1000 - {job="api-server"} 1000 - -eval instant at 50m SUM(http_requests) BY (job) != 1000 - {job="app-server"} 2600 - -eval instant at 50m SUM(http_requests) BY (job) == 1000 - {job="api-server"} 1000 - -eval instant at 50m SUM(http_requests) BY (job) == bool 1000 - {job="api-server"} 1 - {job="app-server"} 0 - -eval instant at 50m SUM(http_requests) BY (job) == bool SUM(http_requests) BY (job) - {job="api-server"} 1 - {job="app-server"} 1 - -eval instant at 50m SUM(http_requests) BY (job) != bool SUM(http_requests) BY (job) - {job="api-server"} 0 - {job="app-server"} 0 - -eval instant at 50m 0 == bool 1 - 0 - -eval instant at 50m 1 == bool 1 - 1 - -eval instant at 50m http_requests{job="api-server", instance="0", group="production"} == bool 100 - {job="api-server", instance="0", group="production"} 1 - -# group_left/group_right. - -clear - -load 5m - node_var{instance="abc",job="node"} 2 - node_role{instance="abc",job="node",role="prometheus"} 1 - -load 5m - node_cpu{instance="abc",job="node",mode="idle"} 3 - node_cpu{instance="abc",job="node",mode="user"} 1 - node_cpu{instance="def",job="node",mode="idle"} 8 - node_cpu{instance="def",job="node",mode="user"} 2 - -load 5m - random{foo="bar"} 1 - -load 5m - threshold{instance="abc",job="node",target="a@b.com"} 0 - -# Copy machine role to node variable. -eval instant at 5m node_role * on (instance) group_right (role) node_var - {instance="abc",job="node",role="prometheus"} 2 - -eval instant at 5m node_var * on (instance) group_left (role) node_role - {instance="abc",job="node",role="prometheus"} 2 - -eval instant at 5m node_var * ignoring (role) group_left (role) node_role - {instance="abc",job="node",role="prometheus"} 2 - -eval instant at 5m node_role * ignoring (role) group_right (role) node_var - {instance="abc",job="node",role="prometheus"} 2 - -# Copy machine role to node variable with instrumentation labels. -eval instant at 5m node_cpu * ignoring (role, mode) group_left (role) node_role - {instance="abc",job="node",mode="idle",role="prometheus"} 3 - {instance="abc",job="node",mode="user",role="prometheus"} 1 - -eval instant at 5m node_cpu * on (instance) group_left (role) node_role - {instance="abc",job="node",mode="idle",role="prometheus"} 3 - {instance="abc",job="node",mode="user",role="prometheus"} 1 - - -# Ratio of total. -eval instant at 5m node_cpu / on (instance) group_left sum by (instance,job)(node_cpu) - {instance="abc",job="node",mode="idle"} .75 - {instance="abc",job="node",mode="user"} .25 - {instance="def",job="node",mode="idle"} .80 - {instance="def",job="node",mode="user"} .20 - -eval instant at 5m sum by (mode, job)(node_cpu) / on (job) group_left sum by (job)(node_cpu) - {job="node",mode="idle"} 0.7857142857142857 - {job="node",mode="user"} 0.21428571428571427 - -eval instant at 5m sum(sum by (mode, job)(node_cpu) / on (job) group_left sum by (job)(node_cpu)) - {} 1.0 - - -eval instant at 5m node_cpu / ignoring (mode) group_left sum without (mode)(node_cpu) - {instance="abc",job="node",mode="idle"} .75 - {instance="abc",job="node",mode="user"} .25 - {instance="def",job="node",mode="idle"} .80 - {instance="def",job="node",mode="user"} .20 - -eval instant at 5m node_cpu / ignoring (mode) group_left(dummy) sum without (mode)(node_cpu) - {instance="abc",job="node",mode="idle"} .75 - {instance="abc",job="node",mode="user"} .25 - {instance="def",job="node",mode="idle"} .80 - {instance="def",job="node",mode="user"} .20 - -eval instant at 5m sum without (instance)(node_cpu) / ignoring (mode) group_left sum without (instance, mode)(node_cpu) - {job="node",mode="idle"} 0.7857142857142857 - {job="node",mode="user"} 0.21428571428571427 - -eval instant at 5m sum(sum without (instance)(node_cpu) / ignoring (mode) group_left sum without (instance, mode)(node_cpu)) - {} 1.0 - - -# Copy over label from metric with no matching labels, without having to list cross-job target labels ('job' here). -eval instant at 5m node_cpu + on(dummy) group_left(foo) random*0 - {instance="abc",job="node",mode="idle",foo="bar"} 3 - {instance="abc",job="node",mode="user",foo="bar"} 1 - {instance="def",job="node",mode="idle",foo="bar"} 8 - {instance="def",job="node",mode="user",foo="bar"} 2 - - -# Use threshold from metric, and copy over target. -eval instant at 5m node_cpu > on(job, instance) group_left(target) threshold - node_cpu{instance="abc",job="node",mode="idle",target="a@b.com"} 3 - node_cpu{instance="abc",job="node",mode="user",target="a@b.com"} 1 - -# Use threshold from metric, and a default (1) if it's not present. -eval instant at 5m node_cpu > on(job, instance) group_left(target) (threshold or on (job, instance) (sum by (job, instance)(node_cpu) * 0 + 1)) - node_cpu{instance="abc",job="node",mode="idle",target="a@b.com"} 3 - node_cpu{instance="abc",job="node",mode="user",target="a@b.com"} 1 - node_cpu{instance="def",job="node",mode="idle"} 8 - node_cpu{instance="def",job="node",mode="user"} 2 - - -# Check that binops drop the metric name. -eval instant at 5m node_cpu + 2 - {instance="abc",job="node",mode="idle"} 5 - {instance="abc",job="node",mode="user"} 3 - {instance="def",job="node",mode="idle"} 10 - {instance="def",job="node",mode="user"} 4 - -eval instant at 5m node_cpu - 2 - {instance="abc",job="node",mode="idle"} 1 - {instance="abc",job="node",mode="user"} -1 - {instance="def",job="node",mode="idle"} 6 - {instance="def",job="node",mode="user"} 0 - -eval instant at 5m node_cpu / 2 - {instance="abc",job="node",mode="idle"} 1.5 - {instance="abc",job="node",mode="user"} 0.5 - {instance="def",job="node",mode="idle"} 4 - {instance="def",job="node",mode="user"} 1 - -eval instant at 5m node_cpu * 2 - {instance="abc",job="node",mode="idle"} 6 - {instance="abc",job="node",mode="user"} 2 - {instance="def",job="node",mode="idle"} 16 - {instance="def",job="node",mode="user"} 4 - -eval instant at 5m node_cpu ^ 2 - {instance="abc",job="node",mode="idle"} 9 - {instance="abc",job="node",mode="user"} 1 - {instance="def",job="node",mode="idle"} 64 - {instance="def",job="node",mode="user"} 4 - -eval instant at 5m node_cpu % 2 - {instance="abc",job="node",mode="idle"} 1 - {instance="abc",job="node",mode="user"} 1 - {instance="def",job="node",mode="idle"} 0 - {instance="def",job="node",mode="user"} 0 - - -clear - -load 5m - random{foo="bar"} 2 - metricA{baz="meh"} 3 - metricB{baz="meh"} 4 - -# On with no labels, for metrics with no common labels. -eval instant at 5m random + on() metricA - {} 5 - -# Ignoring with no labels is the same as no ignoring. -eval instant at 5m metricA + ignoring() metricB - {baz="meh"} 7 - -eval instant at 5m metricA + metricB - {baz="meh"} 7 - -clear - -# Test duplicate labelset in promql output. -load 5m - testmetric1{src="a",dst="b"} 0 - testmetric2{src="a",dst="b"} 1 - -eval_fail instant at 0m -{__name__=~'testmetric1|testmetric2'} - -clear - -load 5m - test_total{instance="localhost"} 50 - test_smaller{instance="localhost"} 10 - -eval instant at 5m test_total > bool test_smaller - {instance="localhost"} 1 - -eval instant at 5m test_total > test_smaller - test_total{instance="localhost"} 50 - -eval instant at 5m test_total < bool test_smaller - {instance="localhost"} 0 - -eval instant at 5m test_total < test_smaller - -clear - -# Testing atan2. -load 5m - trigy{} 10 - trigx{} 20 - trigNaN{} NaN - -eval instant at 5m trigy atan2 trigx - {} 0.4636476090008061 - -eval instant at 5m trigy atan2 trigNaN - {} NaN - -eval instant at 5m 10 atan2 20 - 0.4636476090008061 - -eval instant at 5m 10 atan2 NaN - NaN From f909d555f375af050d9bdbe1402706e15ada1eb6 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 8 May 2024 13:26:28 +1000 Subject: [PATCH 03/43] Fix invalid test case --- pkg/streamingpromql/testdata/ours/binary_operators.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/streamingpromql/testdata/ours/binary_operators.test b/pkg/streamingpromql/testdata/ours/binary_operators.test index fe9431f1af2..ba46d4cecec 100644 --- a/pkg/streamingpromql/testdata/ours/binary_operators.test +++ b/pkg/streamingpromql/testdata/ours/binary_operators.test @@ -41,8 +41,8 @@ load 6m # Matches on both sides: returns results for matching series, ignores non-matching series eval range from 0 to 24m step 6m left_side + right_side - {env="prod", pod="pod-abc123"} 101 202 303 404 {env="dev", pod="pod-abc123"} 1010 2020 3030 4040 + {env="prod", pod="pod-abc123"} 101 202 303 404 # No series on either side: returns no results eval range from 0 to 24m step 6m left_side_that_doesnt_exist + right_side_that_doesnt_exist From 3cba572cb54dfcc6fd7095a8045ba248fcee142a Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 8 May 2024 13:27:02 +1000 Subject: [PATCH 04/43] Make it clear that points are expected to be in timestamp order with no duplicate timestamps. --- pkg/streamingpromql/operator/operator.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pkg/streamingpromql/operator/operator.go b/pkg/streamingpromql/operator/operator.go index a778cf20341..c1f77a4707b 100644 --- a/pkg/streamingpromql/operator/operator.go +++ b/pkg/streamingpromql/operator/operator.go @@ -36,6 +36,13 @@ type SeriesMetadata struct { } type InstantVectorSeriesData struct { - Floats []promql.FPoint + // Floats contains floating point samples for this series. + // Samples must be sorted in timestamp order, earliest timestamps first. + // Samples must not have duplicate timestamps. + Floats []promql.FPoint + + // Histograms contains histogram samples for this series. + // Samples must be sorted in timestamp order, earliest timestamps first. + // Samples must not have duplicate timestamps. Histograms []promql.HPoint } From 9ed79617e1bce36b13dc1e18c6c55e2318a3b84c Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 8 May 2024 13:27:42 +1000 Subject: [PATCH 05/43] Enable relevant benchmarks --- pkg/streamingpromql/benchmarks/benchmarks.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pkg/streamingpromql/benchmarks/benchmarks.go b/pkg/streamingpromql/benchmarks/benchmarks.go index 3dd72db58ee..88a22d23cc0 100644 --- a/pkg/streamingpromql/benchmarks/benchmarks.go +++ b/pkg/streamingpromql/benchmarks/benchmarks.go @@ -101,13 +101,13 @@ func TestCases(metricSizes []int) []BenchCase { // Expr: "-a_X", //}, //// Binary operators. - //{ - // Expr: "a_X - b_X", - //}, - //{ - // Expr: "a_X - b_X", - // Steps: 10000, - //}, + { + Expr: "a_X - b_X", + }, + { + Expr: "a_X - b_X", + Steps: 10000, + }, //{ // Expr: "a_X and b_X{l=~'.*[0-4]$'}", //}, @@ -157,9 +157,9 @@ func TestCases(metricSizes []int) []BenchCase { // Expr: "topk(5, a_X)", //}, //// Combinations. - //{ - // Expr: "rate(a_X[1m]) + rate(b_X[1m])", - //}, + { + Expr: "rate(a_X[1m]) + rate(b_X[1m])", + }, { Expr: "sum by (le)(rate(h_X[1m]))", }, From 3ef9607530185c6d707bc46fc081cdb81c8dfadf Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 8 May 2024 13:52:28 +1000 Subject: [PATCH 06/43] Enable test cases for atan2 --- .../testdata/ours/binary_operators.test | 5 ++++- pkg/streamingpromql/testdata/upstream/operators.test | 12 ++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/pkg/streamingpromql/testdata/ours/binary_operators.test b/pkg/streamingpromql/testdata/ours/binary_operators.test index ba46d4cecec..ee6f56b2129 100644 --- a/pkg/streamingpromql/testdata/ours/binary_operators.test +++ b/pkg/streamingpromql/testdata/ours/binary_operators.test @@ -5,7 +5,7 @@ # Throughout this file, we use a 6m step to avoid the default 5m lookback window. -# Basic arithmetic operations +# Basic arithmetic operations, and atan2 load 6m left_side 11 21 32 42 right_side 1 2 3 4 @@ -28,6 +28,9 @@ eval range from 0 to 24m step 6m left_side % right_side eval range from 0 to 24m step 6m left_side ^ right_side {} 11 441 32768 3111696 +eval range from 0 to 24m step 6m left_side atan2 right_side + {} 1.4801364395941514 1.4758446204521403 1.477319545636307 1.4758446204521403 + clear # One-to-one matching with all labels diff --git a/pkg/streamingpromql/testdata/upstream/operators.test b/pkg/streamingpromql/testdata/upstream/operators.test index 1b1b899145c..b4f461eb937 100644 --- a/pkg/streamingpromql/testdata/upstream/operators.test +++ b/pkg/streamingpromql/testdata/upstream/operators.test @@ -563,18 +563,14 @@ load 5m trigx{} 20 trigNaN{} NaN -# Unsupported by streaming engine. -# eval instant at 5m trigy atan2 trigx -# {} 0.4636476090008061 +eval instant at 5m trigy atan2 trigx + {} 0.4636476090008061 -# Unsupported by streaming engine. -# eval instant at 5m trigy atan2 trigNaN -# {} NaN +eval instant at 5m trigy atan2 trigNaN + {} NaN -# Unsupported by streaming engine. # eval instant at 5m 10 atan2 20 # 0.4636476090008061 -# Unsupported by streaming engine. # eval instant at 5m 10 atan2 NaN # NaN From 67414c54e6ae1dd8356c7101e7f661fd06254508 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 8 May 2024 13:52:49 +1000 Subject: [PATCH 07/43] Initial (WIP) version of one-to-one matching --- pkg/streamingpromql/engine_test.go | 10 +- .../operator/binary_operator.go | 375 ++++++++++++++++++ pkg/streamingpromql/query.go | 28 ++ 3 files changed, 410 insertions(+), 3 deletions(-) create mode 100644 pkg/streamingpromql/operator/binary_operator.go diff --git a/pkg/streamingpromql/engine_test.go b/pkg/streamingpromql/engine_test.go index 05bc841b8cd..51de17be392 100644 --- a/pkg/streamingpromql/engine_test.go +++ b/pkg/streamingpromql/engine_test.go @@ -23,9 +23,13 @@ func TestUnsupportedPromQLFeatures(t *testing.T) { // The goal of this is not to list every conceivable expression that is unsupported, but to cover all the // different cases and make sure we produce a reasonable error message when these cases are encountered. unsupportedExpressions := map[string]string{ - "a + b": "PromQL expression type *parser.BinaryExpr", - "1 + 2": "PromQL expression type *parser.BinaryExpr", - "metric{} + other_metric{}": "PromQL expression type *parser.BinaryExpr", + "1 + 2": "binary expression with scalars", + "1 + metric{}": "binary expression with scalars", + "metric{} + 1": "binary expression with scalars", + "metric{} < other_metric{}": "binary expression with '<'", + "metric{} or other_metric{}": "binary expression with many-to-many matching", + "metric{} + on() group_left() other_metric{}": "binary expression with many-to-one matching", + "metric{} + on() group_right() other_metric{}": "binary expression with one-to-many matching", "1": "PromQL expression type *parser.NumberLiteral", "metric{} offset 2h": "instant vector selector with 'offset'", "avg(metric{})": "'avg' aggregation", diff --git a/pkg/streamingpromql/operator/binary_operator.go b/pkg/streamingpromql/operator/binary_operator.go new file mode 100644 index 00000000000..3b17b6a0e12 --- /dev/null +++ b/pkg/streamingpromql/operator/binary_operator.go @@ -0,0 +1,375 @@ +// SPDX-License-Identifier: AGPL-3.0-only +// Provenance-includes-location: https://github.com/prometheus/prometheus/blob/main/promql/engine.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: The Prometheus Authors + +package operator + +import ( + "context" + "fmt" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/promql" + "github.com/prometheus/prometheus/promql/parser" + "math" + "slices" +) + +type binaryOperatorFunc func(left, right float64) float64 + +type BinaryOperator struct { + Left InstantVectorOperator + Right InstantVectorOperator + Op parser.ItemType + + VectorMatching parser.VectorMatching + + // We need to retain these so that NextSeries() can return an error message with the series labels when + // multiple points match on a single side. + leftMetadata []SeriesMetadata + rightMetadata []SeriesMetadata + + remainingSeries []*binaryOperatorSeriesPair + leftBuffer *binaryOperatorSeriesBuffer + rightBuffer *binaryOperatorSeriesBuffer + operatorFunc binaryOperatorFunc +} + +var _ InstantVectorOperator = &BinaryOperator{} + +type binaryOperatorSeriesPair struct { + leftSeriesIndices []int + rightSeriesIndices []int +} + +func (b *BinaryOperator) SeriesMetadata(ctx context.Context) ([]SeriesMetadata, error) { + b.operatorFunc = arithmeticOperatorFuncs[b.Op] + if b.operatorFunc == nil { + // This should never happen, this should be caught by Query.convertToOperator + // FIXME: move NotSupportedError to a separate package so we can use it in a constructor function for BinaryOperator and remove the check in Query.convertToOperator + return nil, fmt.Errorf("unsupported binary operator '%s'", b.Op) + } + + // TODO: break this into smaller functions, it's enormous + + leftMetadata, err := b.Left.SeriesMetadata(ctx) + if err != nil { + return nil, err + } + + if len(leftMetadata) == 0 { + // FIXME: this is incorrect for 'or' + // No series on left-hand side, we'll never have any output series. + return nil, nil + } + + rightMetadata, err := b.Right.SeriesMetadata(ctx) + if err != nil { + return nil, err + } + + // Keep series labels for later so we can use them to generate error messages. + b.leftMetadata = leftMetadata + b.rightMetadata = rightMetadata + + if len(rightMetadata) == 0 { + // FIXME: this is incorrect for 'or' and 'unless' + // No series on right-hand side, we'll never have any output series. + return nil, nil + } + + // TODO: Prometheus' engine uses strings for the key here, which would avoid issues with hash collisions, but seems much slower. + hashFunc := b.hashFunc() + + // TODO: pool binaryOperatorSeriesPair? Pool internal slices? + // TODO: guess initial size of map? + allPairs := map[uint64]*binaryOperatorSeriesPair{} + + // TODO: is it better to use whichever side has fewer series for this first loop? Should result in a smaller map and therefore less work later on + // Would need to be careful about 'or' and 'unless' cases + for idx, s := range leftMetadata { + hash := hashFunc(s.Labels) + series, exists := allPairs[hash] + + if !exists { + series = &binaryOperatorSeriesPair{} + allPairs[hash] = series + } + + series.leftSeriesIndices = append(series.leftSeriesIndices, idx) + } + + for idx, s := range rightMetadata { + hash := hashFunc(s.Labels) + + if series, exists := allPairs[hash]; exists { + series.rightSeriesIndices = append(series.rightSeriesIndices, idx) + } + + // FIXME: if this is an 'or' operation, then we need to create the right side even if the left doesn't exist + } + + // Remove pairs that cannot produce series. + for hash, pair := range allPairs { + if len(pair.leftSeriesIndices) == 0 || len(pair.rightSeriesIndices) == 0 { + // FIXME: this is incorrect for 'or' and 'unless' + // No matching series on at least one side for this output series, so output series will have no samples. Remove it. + delete(allPairs, hash) + } + } + + allMetadata := make([]SeriesMetadata, 0, len(allPairs)) + b.remainingSeries = make([]*binaryOperatorSeriesPair, 0, len(allPairs)) + labelsFunc := b.labelsFunc() + + for _, pair := range allPairs { + firstSeriesLabels := leftMetadata[pair.leftSeriesIndices[0]].Labels + allMetadata = append(allMetadata, SeriesMetadata{Labels: labelsFunc(firstSeriesLabels)}) + b.remainingSeries = append(b.remainingSeries, pair) + } + + // TODO: sort output series + // Sort output series: either to favour left side or right side + // - Add comment emphasising this is critical for managing peak memory consumption, could make this decision more sophisticated in the future + // - TODO: think this through with some examples, especially for pathological cases where a single output series has multiple series on each side for different timesteps + // - One-to-one matching: + // - assume one series on each side of output series + // - therefore best option to keep peak memory utilisation low is to order series so we don't hold higher cardinality side in memory + // eg. if LHS is 20 series, and RHS is 2 series, best option is to go through LHS series in order and potentially have to hold some RHS series in memory, + // as then in worst case we'll hold 2 series in memory at once + // - Many-to-one / one-to-many matching: + // - assume "one" side is higher cardinality, and series from "many" side are used multiple times + // - therefore best option to keep peak memory utilisation low is to order series so we don't hold higher cardinality side in memory, especially as we'll + // likely have to hold some "many" side series in memory anyway (doesn't make sense to have to hold both "one" and "many" side series) + + b.leftBuffer = newBinaryOperatorSeriesBuffer(b.Left) + b.rightBuffer = newBinaryOperatorSeriesBuffer(b.Right) + + return allMetadata, nil +} + +func (b *BinaryOperator) hashFunc() func(labels.Labels) uint64 { + buf := make([]byte, 0, 1024) + names := b.VectorMatching.MatchingLabels + + if b.VectorMatching.On { + slices.Sort(names) + + return func(l labels.Labels) uint64 { + var hash uint64 + hash, buf = l.HashForLabels(buf, names...) + return hash + } + } + + names = append([]string{labels.MetricName}, names...) + slices.Sort(names) + + return func(l labels.Labels) uint64 { + var hash uint64 + hash, buf = l.HashWithoutLabels(buf, names...) + return hash + } +} + +func (b *BinaryOperator) labelsFunc() func(labels.Labels) labels.Labels { + lb := labels.NewBuilder(labels.EmptyLabels()) + + if b.VectorMatching.On { + return func(l labels.Labels) labels.Labels { + lb.Reset(l) + lb.Keep(b.VectorMatching.MatchingLabels...) + return lb.Labels() + } + } + + return func(l labels.Labels) labels.Labels { + lb.Reset(l) + lb.Del(b.VectorMatching.MatchingLabels...) + lb.Del(labels.MetricName) + return lb.Labels() + } +} + +func (b *BinaryOperator) Next(ctx context.Context) (InstantVectorSeriesData, error) { + if len(b.remainingSeries) == 0 { + return InstantVectorSeriesData{}, EOS + } + + thisSeries := b.remainingSeries[0] + b.remainingSeries = b.remainingSeries[1:] + + // TODO: need to store which series are actually used on each side in SeriesMetadata() above + // TODO: need to return original slices from Left.Next() and Right.Next() to the pool + // Solution: return a type like { data InstantVectorSeriesData, refCount int } from getSeries() and merge(), + // use this to track when it is safe to return slices here or in merge() - will work for one-to-one, many-to-one and many-to-many cases + // Populate refCount from slice that tracks the number of times each series is used in an output series + + allLeftSeries, err := b.leftBuffer.getSeries(ctx, thisSeries.leftSeriesIndices) + if err != nil { + return InstantVectorSeriesData{}, err + } + + allRightSeries, err := b.rightBuffer.getSeries(ctx, thisSeries.rightSeriesIndices) + if err != nil { + return InstantVectorSeriesData{}, err + } + + // TODO: merge left side into single slice? Or have some kind of iterator over slices? + // TODO: merge right side into single slice? Or have some kind of iterator over slices? + // Merging: + // - responsible for merging multiple series on a side into one series + // - if only one series on side, just return original slice as-is + // - if multiple series: + // - sort series by first point + // - for each remaining series: + // - if points don't overlap with previous series, just copy data + // - if points do overlap with previous series, need to zip series together, checking for conflicts + // - would this be easier to do if we were working with []float64 rather than []FPoint? + // - would also mean that some arithmetic operations become faster, as we can use vectorisation (eg. leftPoints + rightPoints, rather than output[i] = left[i] + right[i] etc.) + // - should we just change the InstantVectorOperator interface to use ([]float64, presence)? Would make some aggregation operations faster as well (eg. sum) + + // Compute result for each output series + // - Can we reuse either the left or right side slice for the output slice? + return b.computeResult(allLeftSeries[0], allRightSeries[0]), nil + + // TODO: return series slices to the pool +} + +func (b *BinaryOperator) computeResult(left InstantVectorSeriesData, right InstantVectorSeriesData) InstantVectorSeriesData { + outputLength := min(len(left.Floats), len(right.Floats)) // We can't produce more output points than input points for arithmetic operators. + output := GetFPointSlice(outputLength) // Reuse one side for the output slice? + + nextRightIndex := 0 + + for _, leftPoint := range left.Floats { + for nextRightIndex < len(right.Floats) && right.Floats[nextRightIndex].T < leftPoint.T { + nextRightIndex++ + } + + if nextRightIndex == len(right.Floats) { + // No more points on right side. We are done. + break + } + + if leftPoint.T == right.Floats[nextRightIndex].T { + // We have matching points on both sides, compute the result. + output = append(output, promql.FPoint{ + F: b.operatorFunc(leftPoint.F, right.Floats[nextRightIndex].F), + T: leftPoint.T, + }) + } + } + + return InstantVectorSeriesData{ + Floats: output, + } +} + +func (b *BinaryOperator) Close() { + if b.Left != nil { + b.Left.Close() + } + + if b.Right != nil { + b.Right.Close() + } + + if b.leftMetadata != nil { + PutSeriesMetadataSlice(b.leftMetadata) + } + + if b.rightMetadata != nil { + PutSeriesMetadataSlice(b.rightMetadata) + } +} + +// binaryOperatorSeriesBuffer buffers series data until it is needed by BinaryOperator. +// +// For example, if the source operator produces series in order A, B, C, but their corresponding output series from the +// binary operator are in order B, A, C, binaryOperatorSeriesBuffer will buffer the data for series A while series B is +// produced, then return series A when needed. +type binaryOperatorSeriesBuffer struct { + source InstantVectorOperator + nextIndexToRead int + + // TODO: what is the best way to store buffered data? + buffer map[int]InstantVectorSeriesData + + // TODO: need a way to know if a series will never be used and therefore skip buffering it + + output []InstantVectorSeriesData +} + +func newBinaryOperatorSeriesBuffer(source InstantVectorOperator) *binaryOperatorSeriesBuffer { + return &binaryOperatorSeriesBuffer{ + source: source, + buffer: map[int]InstantVectorSeriesData{}, + } +} + +// getSeries returns the data for the series in seriesIndices. +// The returned slice is only safe to use until getSeries is called again. +func (b *binaryOperatorSeriesBuffer) getSeries(ctx context.Context, seriesIndices []int) ([]InstantVectorSeriesData, error) { + if cap(b.output) < len(seriesIndices) { + // TODO: pool? + b.output = make([]InstantVectorSeriesData, len(seriesIndices)) + } + + b.output = b.output[:len(seriesIndices)] + + for i, seriesIndex := range seriesIndices { + d, err := b.getSingleSeries(ctx, seriesIndex) + + if err != nil { + return nil, err + } + + b.output[i] = d + } + + return b.output, nil +} + +func (b *binaryOperatorSeriesBuffer) getSingleSeries(ctx context.Context, seriesIndex int) (InstantVectorSeriesData, error) { + for seriesIndex > b.nextIndexToRead { + d, err := b.source.Next(ctx) + if err != nil { + return InstantVectorSeriesData{}, err + } + + // TODO: don't bother storing data we won't need, immediately return slice to pool + b.buffer[b.nextIndexToRead] = d + + b.nextIndexToRead++ + } + + if seriesIndex == b.nextIndexToRead { + // Don't bother buffering data if we can return it directly. + b.nextIndexToRead++ + return b.source.Next(ctx) + } + + d := b.buffer[seriesIndex] + delete(b.buffer, seriesIndex) + + return d, nil +} + +var arithmeticOperatorFuncs = map[parser.ItemType]binaryOperatorFunc{ + parser.ADD: func(left, right float64) float64 { + return left + right + }, + parser.SUB: func(left, right float64) float64 { + return left - right + }, + parser.MUL: func(left, right float64) float64 { + return left * right + }, + parser.DIV: func(left, right float64) float64 { + return left / right + }, + parser.MOD: math.Mod, + parser.POW: math.Pow, + parser.ATAN2: math.Atan2, +} diff --git a/pkg/streamingpromql/query.go b/pkg/streamingpromql/query.go index ed65dd736a9..82072b5a202 100644 --- a/pkg/streamingpromql/query.go +++ b/pkg/streamingpromql/query.go @@ -163,6 +163,34 @@ func (q *Query) convertToOperator(expr parser.Expr) (operator.InstantVectorOpera Matchers: vectorSelector.LabelMatchers, }, }, nil + case *parser.BinaryExpr: + if e.LHS.Type() != parser.ValueTypeVector || e.RHS.Type() != parser.ValueTypeVector { + return nil, NewNotSupportedError("binary expression with scalars") + } + + if e.VectorMatching.Card != parser.CardOneToOne { + return nil, NewNotSupportedError(fmt.Sprintf("binary expression with %v matching", e.VectorMatching.Card)) + } + + if e.Op.IsComparisonOperator() || e.Op.IsSetOperator() { + return nil, NewNotSupportedError(fmt.Sprintf("binary expression with '%s'", e.Op)) + } + + lhs, err := q.convertToOperator(e.LHS) + if err != nil { + return nil, err + } + + rhs, err := q.convertToOperator(e.RHS) + if err != nil { + return nil, err + } + + return &operator.BinaryOperator{ + Left: lhs, + Right: rhs, + Op: e.Op, + }, nil case *parser.StepInvariantExpr: // One day, we'll do something smarter here. return q.convertToOperator(e.Expr) From 81fd3e921e39f8845e6ba26f4134fd607e7d1bff Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 8 May 2024 14:02:12 +1000 Subject: [PATCH 08/43] Update comment --- pkg/streamingpromql/operator/binary_operator.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pkg/streamingpromql/operator/binary_operator.go b/pkg/streamingpromql/operator/binary_operator.go index 3b17b6a0e12..c4936bf7d1d 100644 --- a/pkg/streamingpromql/operator/binary_operator.go +++ b/pkg/streamingpromql/operator/binary_operator.go @@ -229,8 +229,6 @@ func (b *BinaryOperator) Next(ctx context.Context) (InstantVectorSeriesData, err // - would also mean that some arithmetic operations become faster, as we can use vectorisation (eg. leftPoints + rightPoints, rather than output[i] = left[i] + right[i] etc.) // - should we just change the InstantVectorOperator interface to use ([]float64, presence)? Would make some aggregation operations faster as well (eg. sum) - // Compute result for each output series - // - Can we reuse either the left or right side slice for the output slice? return b.computeResult(allLeftSeries[0], allRightSeries[0]), nil // TODO: return series slices to the pool @@ -238,7 +236,7 @@ func (b *BinaryOperator) Next(ctx context.Context) (InstantVectorSeriesData, err func (b *BinaryOperator) computeResult(left InstantVectorSeriesData, right InstantVectorSeriesData) InstantVectorSeriesData { outputLength := min(len(left.Floats), len(right.Floats)) // We can't produce more output points than input points for arithmetic operators. - output := GetFPointSlice(outputLength) // Reuse one side for the output slice? + output := GetFPointSlice(outputLength) // FIXME: Reuse one side for the output slice? nextRightIndex := 0 From a24f63211a983d396d649f91e1032caac141b3a5 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 8 May 2024 14:02:34 +1000 Subject: [PATCH 09/43] Fix issue where `on` and `ignoring` are ignored --- pkg/streamingpromql/query.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pkg/streamingpromql/query.go b/pkg/streamingpromql/query.go index 82072b5a202..c69a9375282 100644 --- a/pkg/streamingpromql/query.go +++ b/pkg/streamingpromql/query.go @@ -187,9 +187,10 @@ func (q *Query) convertToOperator(expr parser.Expr) (operator.InstantVectorOpera } return &operator.BinaryOperator{ - Left: lhs, - Right: rhs, - Op: e.Op, + Left: lhs, + Right: rhs, + VectorMatching: *e.VectorMatching, + Op: e.Op, }, nil case *parser.StepInvariantExpr: // One day, we'll do something smarter here. From a0c966e9f05d6afc5eceab8c4cd107397b55de61 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 8 May 2024 14:16:30 +1000 Subject: [PATCH 10/43] Use correct terminology --- ...binary_operator.go => binary_operation.go} | 70 +++++++++---------- pkg/streamingpromql/query.go | 2 +- 2 files changed, 36 insertions(+), 36 deletions(-) rename pkg/streamingpromql/operator/{binary_operator.go => binary_operation.go} (82%) diff --git a/pkg/streamingpromql/operator/binary_operator.go b/pkg/streamingpromql/operator/binary_operation.go similarity index 82% rename from pkg/streamingpromql/operator/binary_operator.go rename to pkg/streamingpromql/operator/binary_operation.go index c4936bf7d1d..c7e7d0339cb 100644 --- a/pkg/streamingpromql/operator/binary_operator.go +++ b/pkg/streamingpromql/operator/binary_operation.go @@ -15,9 +15,7 @@ import ( "slices" ) -type binaryOperatorFunc func(left, right float64) float64 - -type BinaryOperator struct { +type BinaryOperation struct { Left InstantVectorOperator Right InstantVectorOperator Op parser.ItemType @@ -29,25 +27,25 @@ type BinaryOperator struct { leftMetadata []SeriesMetadata rightMetadata []SeriesMetadata - remainingSeries []*binaryOperatorSeriesPair - leftBuffer *binaryOperatorSeriesBuffer - rightBuffer *binaryOperatorSeriesBuffer - operatorFunc binaryOperatorFunc + remainingSeries []*binaryOperationSeriesPair + leftBuffer *binaryOperationSeriesBuffer + rightBuffer *binaryOperationSeriesBuffer + op binaryOperationFunc } -var _ InstantVectorOperator = &BinaryOperator{} +var _ InstantVectorOperator = &BinaryOperation{} -type binaryOperatorSeriesPair struct { +type binaryOperationSeriesPair struct { leftSeriesIndices []int rightSeriesIndices []int } -func (b *BinaryOperator) SeriesMetadata(ctx context.Context) ([]SeriesMetadata, error) { - b.operatorFunc = arithmeticOperatorFuncs[b.Op] - if b.operatorFunc == nil { +func (b *BinaryOperation) SeriesMetadata(ctx context.Context) ([]SeriesMetadata, error) { + b.op = arithmeticOperationFuncs[b.Op] + if b.op == nil { // This should never happen, this should be caught by Query.convertToOperator - // FIXME: move NotSupportedError to a separate package so we can use it in a constructor function for BinaryOperator and remove the check in Query.convertToOperator - return nil, fmt.Errorf("unsupported binary operator '%s'", b.Op) + // FIXME: move NotSupportedError to a separate package so we can use it in a constructor function for BinaryOperation and remove the check in Query.convertToOperator + return nil, fmt.Errorf("unsupported binary operation '%s'", b.Op) } // TODO: break this into smaller functions, it's enormous @@ -81,9 +79,9 @@ func (b *BinaryOperator) SeriesMetadata(ctx context.Context) ([]SeriesMetadata, // TODO: Prometheus' engine uses strings for the key here, which would avoid issues with hash collisions, but seems much slower. hashFunc := b.hashFunc() - // TODO: pool binaryOperatorSeriesPair? Pool internal slices? + // TODO: pool binaryOperationSeriesPair? Pool internal slices? // TODO: guess initial size of map? - allPairs := map[uint64]*binaryOperatorSeriesPair{} + allPairs := map[uint64]*binaryOperationSeriesPair{} // TODO: is it better to use whichever side has fewer series for this first loop? Should result in a smaller map and therefore less work later on // Would need to be careful about 'or' and 'unless' cases @@ -92,7 +90,7 @@ func (b *BinaryOperator) SeriesMetadata(ctx context.Context) ([]SeriesMetadata, series, exists := allPairs[hash] if !exists { - series = &binaryOperatorSeriesPair{} + series = &binaryOperationSeriesPair{} allPairs[hash] = series } @@ -119,7 +117,7 @@ func (b *BinaryOperator) SeriesMetadata(ctx context.Context) ([]SeriesMetadata, } allMetadata := make([]SeriesMetadata, 0, len(allPairs)) - b.remainingSeries = make([]*binaryOperatorSeriesPair, 0, len(allPairs)) + b.remainingSeries = make([]*binaryOperationSeriesPair, 0, len(allPairs)) labelsFunc := b.labelsFunc() for _, pair := range allPairs { @@ -142,13 +140,13 @@ func (b *BinaryOperator) SeriesMetadata(ctx context.Context) ([]SeriesMetadata, // - therefore best option to keep peak memory utilisation low is to order series so we don't hold higher cardinality side in memory, especially as we'll // likely have to hold some "many" side series in memory anyway (doesn't make sense to have to hold both "one" and "many" side series) - b.leftBuffer = newBinaryOperatorSeriesBuffer(b.Left) - b.rightBuffer = newBinaryOperatorSeriesBuffer(b.Right) + b.leftBuffer = newBinaryOperationSeriesBuffer(b.Left) + b.rightBuffer = newBinaryOperationSeriesBuffer(b.Right) return allMetadata, nil } -func (b *BinaryOperator) hashFunc() func(labels.Labels) uint64 { +func (b *BinaryOperation) hashFunc() func(labels.Labels) uint64 { buf := make([]byte, 0, 1024) names := b.VectorMatching.MatchingLabels @@ -172,7 +170,7 @@ func (b *BinaryOperator) hashFunc() func(labels.Labels) uint64 { } } -func (b *BinaryOperator) labelsFunc() func(labels.Labels) labels.Labels { +func (b *BinaryOperation) labelsFunc() func(labels.Labels) labels.Labels { lb := labels.NewBuilder(labels.EmptyLabels()) if b.VectorMatching.On { @@ -191,7 +189,7 @@ func (b *BinaryOperator) labelsFunc() func(labels.Labels) labels.Labels { } } -func (b *BinaryOperator) Next(ctx context.Context) (InstantVectorSeriesData, error) { +func (b *BinaryOperation) Next(ctx context.Context) (InstantVectorSeriesData, error) { if len(b.remainingSeries) == 0 { return InstantVectorSeriesData{}, EOS } @@ -234,8 +232,8 @@ func (b *BinaryOperator) Next(ctx context.Context) (InstantVectorSeriesData, err // TODO: return series slices to the pool } -func (b *BinaryOperator) computeResult(left InstantVectorSeriesData, right InstantVectorSeriesData) InstantVectorSeriesData { - outputLength := min(len(left.Floats), len(right.Floats)) // We can't produce more output points than input points for arithmetic operators. +func (b *BinaryOperation) computeResult(left InstantVectorSeriesData, right InstantVectorSeriesData) InstantVectorSeriesData { + outputLength := min(len(left.Floats), len(right.Floats)) // We can't produce more output points than input points for arithmetic operations. output := GetFPointSlice(outputLength) // FIXME: Reuse one side for the output slice? nextRightIndex := 0 @@ -253,7 +251,7 @@ func (b *BinaryOperator) computeResult(left InstantVectorSeriesData, right Insta if leftPoint.T == right.Floats[nextRightIndex].T { // We have matching points on both sides, compute the result. output = append(output, promql.FPoint{ - F: b.operatorFunc(leftPoint.F, right.Floats[nextRightIndex].F), + F: b.op(leftPoint.F, right.Floats[nextRightIndex].F), T: leftPoint.T, }) } @@ -264,7 +262,7 @@ func (b *BinaryOperator) computeResult(left InstantVectorSeriesData, right Insta } } -func (b *BinaryOperator) Close() { +func (b *BinaryOperation) Close() { if b.Left != nil { b.Left.Close() } @@ -282,12 +280,12 @@ func (b *BinaryOperator) Close() { } } -// binaryOperatorSeriesBuffer buffers series data until it is needed by BinaryOperator. +// binaryOperationSeriesBuffer buffers series data until it is needed by BinaryOperation. // // For example, if the source operator produces series in order A, B, C, but their corresponding output series from the -// binary operator are in order B, A, C, binaryOperatorSeriesBuffer will buffer the data for series A while series B is +// binary operation are in order B, A, C, binaryOperationSeriesBuffer will buffer the data for series A while series B is // produced, then return series A when needed. -type binaryOperatorSeriesBuffer struct { +type binaryOperationSeriesBuffer struct { source InstantVectorOperator nextIndexToRead int @@ -299,8 +297,8 @@ type binaryOperatorSeriesBuffer struct { output []InstantVectorSeriesData } -func newBinaryOperatorSeriesBuffer(source InstantVectorOperator) *binaryOperatorSeriesBuffer { - return &binaryOperatorSeriesBuffer{ +func newBinaryOperationSeriesBuffer(source InstantVectorOperator) *binaryOperationSeriesBuffer { + return &binaryOperationSeriesBuffer{ source: source, buffer: map[int]InstantVectorSeriesData{}, } @@ -308,7 +306,7 @@ func newBinaryOperatorSeriesBuffer(source InstantVectorOperator) *binaryOperator // getSeries returns the data for the series in seriesIndices. // The returned slice is only safe to use until getSeries is called again. -func (b *binaryOperatorSeriesBuffer) getSeries(ctx context.Context, seriesIndices []int) ([]InstantVectorSeriesData, error) { +func (b *binaryOperationSeriesBuffer) getSeries(ctx context.Context, seriesIndices []int) ([]InstantVectorSeriesData, error) { if cap(b.output) < len(seriesIndices) { // TODO: pool? b.output = make([]InstantVectorSeriesData, len(seriesIndices)) @@ -329,7 +327,7 @@ func (b *binaryOperatorSeriesBuffer) getSeries(ctx context.Context, seriesIndice return b.output, nil } -func (b *binaryOperatorSeriesBuffer) getSingleSeries(ctx context.Context, seriesIndex int) (InstantVectorSeriesData, error) { +func (b *binaryOperationSeriesBuffer) getSingleSeries(ctx context.Context, seriesIndex int) (InstantVectorSeriesData, error) { for seriesIndex > b.nextIndexToRead { d, err := b.source.Next(ctx) if err != nil { @@ -354,7 +352,9 @@ func (b *binaryOperatorSeriesBuffer) getSingleSeries(ctx context.Context, series return d, nil } -var arithmeticOperatorFuncs = map[parser.ItemType]binaryOperatorFunc{ +type binaryOperationFunc func(left, right float64) float64 + +var arithmeticOperationFuncs = map[parser.ItemType]binaryOperationFunc{ parser.ADD: func(left, right float64) float64 { return left + right }, diff --git a/pkg/streamingpromql/query.go b/pkg/streamingpromql/query.go index c69a9375282..95ebdec41f9 100644 --- a/pkg/streamingpromql/query.go +++ b/pkg/streamingpromql/query.go @@ -186,7 +186,7 @@ func (q *Query) convertToOperator(expr parser.Expr) (operator.InstantVectorOpera return nil, err } - return &operator.BinaryOperator{ + return &operator.BinaryOperation{ Left: lhs, Right: rhs, VectorMatching: *e.VectorMatching, From 81832534fe0206bf2628b2e4326111a22852c14f Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 8 May 2024 16:55:05 +1000 Subject: [PATCH 11/43] Fix issue where errors are lost if result is a matrix --- pkg/streamingpromql/query.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/streamingpromql/query.go b/pkg/streamingpromql/query.go index 95ebdec41f9..1880679be67 100644 --- a/pkg/streamingpromql/query.go +++ b/pkg/streamingpromql/query.go @@ -225,7 +225,7 @@ func (q *Query) Exec(ctx context.Context) *promql.Result { } else { m, err := q.populateMatrix(ctx, series) if err != nil { - return &promql.Result{Value: m} + return &promql.Result{Err: err} } q.result = &promql.Result{Value: m} From e2430bc3d78a838365ba1389f1d681de9f9bee03 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 8 May 2024 16:57:11 +1000 Subject: [PATCH 12/43] Add support for case where multiple different series contribute to one side of a binary operation --- .../operator/binary_operation.go | 159 +++++++++-- .../operator/binary_operation_test.go | 262 ++++++++++++++++++ 2 files changed, 398 insertions(+), 23 deletions(-) create mode 100644 pkg/streamingpromql/operator/binary_operation_test.go diff --git a/pkg/streamingpromql/operator/binary_operation.go b/pkg/streamingpromql/operator/binary_operation.go index c7e7d0339cb..19e9c7b8411 100644 --- a/pkg/streamingpromql/operator/binary_operation.go +++ b/pkg/streamingpromql/operator/binary_operation.go @@ -8,11 +8,14 @@ package operator import ( "context" "fmt" + "math" + "slices" + "time" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" - "math" - "slices" ) type BinaryOperation struct { @@ -24,6 +27,8 @@ type BinaryOperation struct { // We need to retain these so that NextSeries() can return an error message with the series labels when // multiple points match on a single side. + // Note that we don't retain the output series metadata: if we need to return an error message, we can compute + // the output series labels from these again. leftMetadata []SeriesMetadata rightMetadata []SeriesMetadata @@ -146,6 +151,7 @@ func (b *BinaryOperation) SeriesMetadata(ctx context.Context) ([]SeriesMetadata, return allMetadata, nil } +// hashFunc returns a function that computes the hash of the output group this series belongs to. func (b *BinaryOperation) hashFunc() func(labels.Labels) uint64 { buf := make([]byte, 0, 1024) names := b.VectorMatching.MatchingLabels @@ -170,6 +176,7 @@ func (b *BinaryOperation) hashFunc() func(labels.Labels) uint64 { } } +// labelsFunc returns a function that computes the labels of the output group this series belongs to. func (b *BinaryOperation) labelsFunc() func(labels.Labels) labels.Labels { lb := labels.NewBuilder(labels.EmptyLabels()) @@ -197,42 +204,148 @@ func (b *BinaryOperation) Next(ctx context.Context) (InstantVectorSeriesData, er thisSeries := b.remainingSeries[0] b.remainingSeries = b.remainingSeries[1:] - // TODO: need to store which series are actually used on each side in SeriesMetadata() above - // TODO: need to return original slices from Left.Next() and Right.Next() to the pool - // Solution: return a type like { data InstantVectorSeriesData, refCount int } from getSeries() and merge(), - // use this to track when it is safe to return slices here or in merge() - will work for one-to-one, many-to-one and many-to-many cases - // Populate refCount from slice that tracks the number of times each series is used in an output series - allLeftSeries, err := b.leftBuffer.getSeries(ctx, thisSeries.leftSeriesIndices) if err != nil { return InstantVectorSeriesData{}, err } + mergedLeftSide, err := b.mergeOneSide(allLeftSeries, thisSeries.leftSeriesIndices, b.leftMetadata, "left") + if err != nil { + return InstantVectorSeriesData{}, err + } + allRightSeries, err := b.rightBuffer.getSeries(ctx, thisSeries.rightSeriesIndices) if err != nil { return InstantVectorSeriesData{}, err } - // TODO: merge left side into single slice? Or have some kind of iterator over slices? - // TODO: merge right side into single slice? Or have some kind of iterator over slices? - // Merging: - // - responsible for merging multiple series on a side into one series - // - if only one series on side, just return original slice as-is - // - if multiple series: - // - sort series by first point - // - for each remaining series: - // - if points don't overlap with previous series, just copy data - // - if points do overlap with previous series, need to zip series together, checking for conflicts - // - would this be easier to do if we were working with []float64 rather than []FPoint? - // - would also mean that some arithmetic operations become faster, as we can use vectorisation (eg. leftPoints + rightPoints, rather than output[i] = left[i] + right[i] etc.) - // - should we just change the InstantVectorOperator interface to use ([]float64, presence)? Would make some aggregation operations faster as well (eg. sum) + mergedRightSide, err := b.mergeOneSide(allRightSeries, thisSeries.rightSeriesIndices, b.rightMetadata, "right") + if err != nil { + return InstantVectorSeriesData{}, err + } + + return b.computeResult(mergedLeftSide, mergedRightSide), nil +} + +// mergeOneSide exists to handle the case where one side of an output series has different source series at different time steps. +// +// For example, consider the query "left_side + on (env) right_side" with the following source data: +// +// left_side{env="test", pod="a"} 1 2 _ +// left_side{env="test", pod="b"} _ _ 3 +// right_side{env="test"} 100 200 300 +// +// mergeOneSide will take in both series for left_side and return a single series with the points [1, 2, 3]. +// +// mergeOneSide is optimised for the case where there is only one source series, or the source series do not overlap, as in the example above. +// +// TODO: for many-to-one / one-to-many matching, we could avoid re-merging each time for the side used multiple times +// TODO: would this be easier to do if we were working with []float64 rather than []FPoint? +// - would also mean that some arithmetic operations become faster, as we can use vectorisation (eg. leftPoints + rightPoints, rather than output[i] = left[i] + right[i] etc.) +// - should we just change the InstantVectorOperator interface to use ([]float64, presence)? Would make some aggregation operations faster as well (eg. sum) +func (b *BinaryOperation) mergeOneSide(data []InstantVectorSeriesData, sourceSeriesIndices []int, sourceSeriesMetadata []SeriesMetadata, side string) (InstantVectorSeriesData, error) { + if len(data) == 1 { + // If there's only one series on this side, there's no merging required. + return data[0], nil + } + + if len(data) == 0 { + return InstantVectorSeriesData{}, nil + } + + slices.SortFunc(data, func(a, b InstantVectorSeriesData) int { + return int(a.Floats[0].T - b.Floats[0].T) + }) + + mergedSize := len(data[0].Floats) + haveOverlaps := false + + // We're going to create a new slice, so return this one to the pool. + // We'll return the other slices in the for loop below. + // We must defer here, rather than at the end, as the merge loop below reslices Floats. + // TODO: this isn't correct for many-to-one / one-to-many matching - we'll need the series again (unless we store the result of the merge) + defer PutFPointSlice(data[0].Floats) + + for i := 0; i < len(data)-1; i++ { + first := data[i] + second := data[i+1] + mergedSize += len(second.Floats) + + // We're going to create a new slice, so return this one to the pool. + // We must defer here, rather than at the end, as the merge loop below reslices Floats. + // TODO: this isn't correct for many-to-one / one-to-many matching - we'll need the series again (unless we store the result of the merge) + defer PutFPointSlice(second.Floats) + + // Check if first overlaps with second. + // InstantVectorSeriesData.Floats is required to be sorted in timestamp order, so if the last point + // of the first series is before the first point of the second series, it cannot overlap. + if first.Floats[len(first.Floats)-1].T >= second.Floats[0].T { + haveOverlaps = true + } + } - return b.computeResult(allLeftSeries[0], allRightSeries[0]), nil + output := GetFPointSlice(mergedSize) - // TODO: return series slices to the pool + if !haveOverlaps { + // Fast path: no overlaps, so we can just concatenate the slices together, and there's no + // need to check for conflicts either. + for _, d := range data { + output = append(output, d.Floats...) + } + + return InstantVectorSeriesData{Floats: output}, nil + } + + // Slow path: there are overlaps, so we need to merge slices together and check for conflicts as we go. + // We don't expect to have many series here, so something like a loser tree is likely unnecessary. + remainingSeries := len(data) + + for { + if remainingSeries == 1 { + // Only one series left, just copy remaining points. + for _, d := range data { + if len(d.Floats) > 0 { + output = append(output, d.Floats...) + return InstantVectorSeriesData{Floats: output}, nil + } + } + } + + nextT := int64(math.MaxInt64) + sourceSeriesIndexInData := -1 + + for seriesIndexInData, d := range data { + if len(d.Floats) == 0 { + continue + } + + nextPointInSeries := d.Floats[0] + if nextPointInSeries.T == nextT { + // Another series has a point with the same timestamp. We have a conflict. + firstConflictingSeriesLabels := sourceSeriesMetadata[sourceSeriesIndices[sourceSeriesIndexInData]].Labels + secondConflictingSeriesLabels := sourceSeriesMetadata[sourceSeriesIndices[seriesIndexInData]].Labels + groupLabels := b.labelsFunc()(firstConflictingSeriesLabels) + + return InstantVectorSeriesData{}, fmt.Errorf("found duplicate series for the match group %s on the %s side of the operation at timestamp %s: %s and %s", groupLabels, side, timestamp.Time(nextT).Format(time.RFC3339Nano), firstConflictingSeriesLabels, secondConflictingSeriesLabels) + } + + if d.Floats[0].T < nextT { + nextT = d.Floats[0].T + sourceSeriesIndexInData = seriesIndexInData + } + } + + output = append(output, data[sourceSeriesIndexInData].Floats[0]) + data[sourceSeriesIndexInData].Floats = data[sourceSeriesIndexInData].Floats[1:] + + if len(data[sourceSeriesIndexInData].Floats) == 0 { + remainingSeries-- + } + } } func (b *BinaryOperation) computeResult(left InstantVectorSeriesData, right InstantVectorSeriesData) InstantVectorSeriesData { + // TODO: return slices to pool if they're not reused outputLength := min(len(left.Floats), len(right.Floats)) // We can't produce more output points than input points for arithmetic operations. output := GetFPointSlice(outputLength) // FIXME: Reuse one side for the output slice? diff --git a/pkg/streamingpromql/operator/binary_operation_test.go b/pkg/streamingpromql/operator/binary_operation_test.go new file mode 100644 index 00000000000..a4c6f048f13 --- /dev/null +++ b/pkg/streamingpromql/operator/binary_operation_test.go @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: AGPL-3.0-only + +package operator + +import ( + "testing" + + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/promql" + "github.com/prometheus/prometheus/promql/parser" + "github.com/stretchr/testify/require" +) + +// Most of the functionality of the binary operation operator is tested through the test scripts in +// pkg/streamingpromql/testdata. +// +// The merging behaviour has many edge cases, so it's easier to test it here. +func TestBinaryOperation_SeriesMerging(t *testing.T) { + testCases := map[string]struct { + input []InstantVectorSeriesData + sourceSeriesIndices []int + sourceSeriesMetadata []SeriesMetadata + + expectedOutput InstantVectorSeriesData + expectedError string + }{ + "no input series": { + input: []InstantVectorSeriesData{}, + expectedOutput: InstantVectorSeriesData{}, + }, + "single input series": { + input: []InstantVectorSeriesData{ + { + Floats: []promql.FPoint{ + {T: 1, F: 10}, + {T: 2, F: 20}, + {T: 3, F: 30}, + }, + }, + }, + expectedOutput: InstantVectorSeriesData{ + Floats: []promql.FPoint{ + {T: 1, F: 10}, + {T: 2, F: 20}, + {T: 3, F: 30}, + }, + }, + }, + "two input series with no overlap, series in time order": { + input: []InstantVectorSeriesData{ + { + Floats: []promql.FPoint{ + {T: 1, F: 10}, + {T: 2, F: 20}, + {T: 3, F: 30}, + }, + }, + { + Floats: []promql.FPoint{ + {T: 4, F: 40}, + {T: 5, F: 50}, + {T: 6, F: 60}, + }, + }, + }, + expectedOutput: InstantVectorSeriesData{ + Floats: []promql.FPoint{ + {T: 1, F: 10}, + {T: 2, F: 20}, + {T: 3, F: 30}, + {T: 4, F: 40}, + {T: 5, F: 50}, + {T: 6, F: 60}, + }, + }, + }, + "two input series with no overlap, series not in time order": { + input: []InstantVectorSeriesData{ + { + Floats: []promql.FPoint{ + {T: 4, F: 40}, + {T: 5, F: 50}, + {T: 6, F: 60}, + }, + }, + { + Floats: []promql.FPoint{ + {T: 1, F: 10}, + {T: 2, F: 20}, + {T: 3, F: 30}, + }, + }, + }, + expectedOutput: InstantVectorSeriesData{ + Floats: []promql.FPoint{ + {T: 1, F: 10}, + {T: 2, F: 20}, + {T: 3, F: 30}, + {T: 4, F: 40}, + {T: 5, F: 50}, + {T: 6, F: 60}, + }, + }, + }, + "three input series with no overlap": { + input: []InstantVectorSeriesData{ + { + Floats: []promql.FPoint{ + {T: 1, F: 10}, + {T: 2, F: 20}, + {T: 3, F: 30}, + }, + }, + { + Floats: []promql.FPoint{ + {T: 4, F: 40}, + {T: 5, F: 50}, + {T: 6, F: 60}, + }, + }, + { + Floats: []promql.FPoint{ + {T: 7, F: 70}, + {T: 8, F: 80}, + {T: 9, F: 90}, + }, + }, + }, + expectedOutput: InstantVectorSeriesData{ + Floats: []promql.FPoint{ + {T: 1, F: 10}, + {T: 2, F: 20}, + {T: 3, F: 30}, + {T: 4, F: 40}, + {T: 5, F: 50}, + {T: 6, F: 60}, + {T: 7, F: 70}, + {T: 8, F: 80}, + {T: 9, F: 90}, + }, + }, + }, + "two input series with overlap": { + input: []InstantVectorSeriesData{ + { + Floats: []promql.FPoint{ + {T: 1, F: 10}, + {T: 3, F: 30}, + {T: 5, F: 50}, + }, + }, + { + Floats: []promql.FPoint{ + {T: 2, F: 20}, + {T: 4, F: 40}, + {T: 6, F: 60}, + }, + }, + }, + expectedOutput: InstantVectorSeriesData{ + Floats: []promql.FPoint{ + {T: 1, F: 10}, + {T: 2, F: 20}, + {T: 3, F: 30}, + {T: 4, F: 40}, + {T: 5, F: 50}, + {T: 6, F: 60}, + }, + }, + }, + "three input series with overlap": { + input: []InstantVectorSeriesData{ + { + Floats: []promql.FPoint{ + {T: 1, F: 10}, + {T: 4, F: 40}, + }, + }, + { + Floats: []promql.FPoint{ + {T: 2, F: 20}, + {T: 5, F: 50}, + }, + }, + { + Floats: []promql.FPoint{ + {T: 3, F: 30}, + {T: 6, F: 60}, + }, + }, + }, + expectedOutput: InstantVectorSeriesData{ + Floats: []promql.FPoint{ + {T: 1, F: 10}, + {T: 2, F: 20}, + {T: 3, F: 30}, + {T: 4, F: 40}, + {T: 5, F: 50}, + {T: 6, F: 60}, + }, + }, + }, + "input series with conflict": { + input: []InstantVectorSeriesData{ + { + Floats: []promql.FPoint{ + {T: 1, F: 10}, + {T: 2, F: 20}, + {T: 5, F: 50}, + }, + }, + { + Floats: []promql.FPoint{ + {T: 6, F: 60}, + }, + }, + { + Floats: []promql.FPoint{ + {T: 2, F: 20}, + {T: 4, F: 40}, + }, + }, + }, + sourceSeriesIndices: []int{6, 9, 4}, + sourceSeriesMetadata: []SeriesMetadata{ + {labels.FromStrings("__name__", "right_side", "env", "test", "pod", "a")}, + {labels.FromStrings("__name__", "right_side", "env", "test", "pod", "b")}, + {labels.FromStrings("__name__", "right_side", "env", "test", "pod", "c")}, + {labels.FromStrings("__name__", "right_side", "env", "test", "pod", "d")}, + {labels.FromStrings("__name__", "right_side", "env", "test", "pod", "e")}, + {labels.FromStrings("__name__", "right_side", "env", "test", "pod", "f")}, + {labels.FromStrings("__name__", "right_side", "env", "test", "pod", "g")}, + {labels.FromStrings("__name__", "right_side", "env", "test", "pod", "h")}, + {labels.FromStrings("__name__", "right_side", "env", "test", "pod", "i")}, + {labels.FromStrings("__name__", "right_side", "env", "test", "pod", "j")}, + }, + expectedError: `found duplicate series for the match group {env="test"} on the right side of the operation at timestamp 1970-01-01T00:00:00.002Z: {__name__="right_side", env="test", pod="g"} and {__name__="right_side", env="test", pod="j"}`, + }, + } + + for name, testCase := range testCases { + t.Run(name, func(t *testing.T) { + o := &BinaryOperation{ + // Simulate an expression with "on (env)". + // This is used to generate error messages. + VectorMatching: parser.VectorMatching{ + On: true, + MatchingLabels: []string{"env"}, + }, + } + + result, err := o.mergeOneSide(testCase.input, testCase.sourceSeriesIndices, testCase.sourceSeriesMetadata, "right") + + if testCase.expectedError == "" { + require.NoError(t, err) + require.Equal(t, testCase.expectedOutput, result) + } else { + require.EqualError(t, err, testCase.expectedError) + } + }) + } +} From c8438af7f60e627ddb26ad09d461526232829fb3 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 8 May 2024 17:02:00 +1000 Subject: [PATCH 13/43] Add comment explaining when slices are returned to the pool --- pkg/streamingpromql/operator/binary_operation.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/streamingpromql/operator/binary_operation.go b/pkg/streamingpromql/operator/binary_operation.go index 19e9c7b8411..fdd80702ae8 100644 --- a/pkg/streamingpromql/operator/binary_operation.go +++ b/pkg/streamingpromql/operator/binary_operation.go @@ -72,6 +72,7 @@ func (b *BinaryOperation) SeriesMetadata(ctx context.Context) ([]SeriesMetadata, } // Keep series labels for later so we can use them to generate error messages. + // We'll return them to the pool in Close(). b.leftMetadata = leftMetadata b.rightMetadata = rightMetadata From 1152fb77e332a7d62abfaccd8ef22f0e6ab53d3f Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 8 May 2024 17:07:36 +1000 Subject: [PATCH 14/43] Add further comment --- pkg/streamingpromql/operator/binary_operation.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/streamingpromql/operator/binary_operation.go b/pkg/streamingpromql/operator/binary_operation.go index fdd80702ae8..2353e457333 100644 --- a/pkg/streamingpromql/operator/binary_operation.go +++ b/pkg/streamingpromql/operator/binary_operation.go @@ -83,6 +83,7 @@ func (b *BinaryOperation) SeriesMetadata(ctx context.Context) ([]SeriesMetadata, } // TODO: Prometheus' engine uses strings for the key here, which would avoid issues with hash collisions, but seems much slower. + // Either we should use strings, or we'll need to deal with hash collisions. hashFunc := b.hashFunc() // TODO: pool binaryOperationSeriesPair? Pool internal slices? From b9c6770d22ae606413a06cfdc93d23f3fc389ba2 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Thu, 9 May 2024 14:32:48 +1000 Subject: [PATCH 15/43] Remove redundant checks --- pkg/streamingpromql/operator/binary_operation.go | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pkg/streamingpromql/operator/binary_operation.go b/pkg/streamingpromql/operator/binary_operation.go index 2353e457333..c825e56a51f 100644 --- a/pkg/streamingpromql/operator/binary_operation.go +++ b/pkg/streamingpromql/operator/binary_operation.go @@ -378,13 +378,8 @@ func (b *BinaryOperation) computeResult(left InstantVectorSeriesData, right Inst } func (b *BinaryOperation) Close() { - if b.Left != nil { - b.Left.Close() - } - - if b.Right != nil { - b.Right.Close() - } + b.Left.Close() + b.Right.Close() if b.leftMetadata != nil { PutSeriesMetadataSlice(b.leftMetadata) From 29698cd2ae2c9b8b90d6e7f227a6ca38feb5f499 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Thu, 9 May 2024 14:53:07 +1000 Subject: [PATCH 16/43] Log output from benchmark binary if it fails --- tools/benchmark-query-engine/main.go | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/benchmark-query-engine/main.go b/tools/benchmark-query-engine/main.go index d6d05ddb60d..b0bd34f3c5a 100644 --- a/tools/benchmark-query-engine/main.go +++ b/tools/benchmark-query-engine/main.go @@ -248,6 +248,7 @@ func (a *app) runTestCase(name string, printBenchmarkHeader bool) error { cmd.Env = append(cmd.Env, "STREAMING_PROMQL_ENGINE_BENCHMARK_SKIP_COMPARE_RESULTS=true") if err := cmd.Run(); err != nil { + slog.Warn("output from failed command", "output", buf.String()) return fmt.Errorf("executing command failed: %w", err) } From dedf3e4d2df7def5f600329126f52f63d325a5b5 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Thu, 9 May 2024 14:53:25 +1000 Subject: [PATCH 17/43] Don't limit the number of samples loaded in a query --- pkg/streamingpromql/testing.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/streamingpromql/testing.go b/pkg/streamingpromql/testing.go index 36fd14826c0..35c51dec88e 100644 --- a/pkg/streamingpromql/testing.go +++ b/pkg/streamingpromql/testing.go @@ -3,6 +3,7 @@ package streamingpromql import ( + "math" "time" "github.com/prometheus/prometheus/promql" @@ -12,7 +13,7 @@ func NewTestEngineOpts() promql.EngineOpts { return promql.EngineOpts{ Logger: nil, Reg: nil, - MaxSamples: 50000000, + MaxSamples: math.MaxInt, Timeout: 100 * time.Second, EnableAtModifier: true, EnableNegativeOffset: true, From 84b3a8efb8831faf5fa71c1bc93de8bf2e7eab51 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Thu, 9 May 2024 14:53:45 +1000 Subject: [PATCH 18/43] Return point slices to the pool once we're done with them --- pkg/streamingpromql/operator/binary_operation.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pkg/streamingpromql/operator/binary_operation.go b/pkg/streamingpromql/operator/binary_operation.go index c825e56a51f..9a1739d19a7 100644 --- a/pkg/streamingpromql/operator/binary_operation.go +++ b/pkg/streamingpromql/operator/binary_operation.go @@ -347,9 +347,12 @@ func (b *BinaryOperation) mergeOneSide(data []InstantVectorSeriesData, sourceSer } func (b *BinaryOperation) computeResult(left InstantVectorSeriesData, right InstantVectorSeriesData) InstantVectorSeriesData { - // TODO: return slices to pool if they're not reused + // FIXME: it is only safe to unconditionally put these slices back in the pool for one-to-one matching (otherwise we'll need some of these slices for future groups) + defer PutFPointSlice(left.Floats) + defer PutFPointSlice(right.Floats) + outputLength := min(len(left.Floats), len(right.Floats)) // We can't produce more output points than input points for arithmetic operations. - output := GetFPointSlice(outputLength) // FIXME: Reuse one side for the output slice? + output := GetFPointSlice(outputLength) // TODO: Reuse one side for the output slice. If we do this, need to make sure not to return it to the pool nextRightIndex := 0 From 7465322fb230e84f90739de4a320ffc0f3ad0b64 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Thu, 9 May 2024 15:30:19 +1000 Subject: [PATCH 19/43] Split `SeriesMetadata` into smaller methods --- .../operator/binary_operation.go | 105 ++++++++++-------- 1 file changed, 61 insertions(+), 44 deletions(-) diff --git a/pkg/streamingpromql/operator/binary_operation.go b/pkg/streamingpromql/operator/binary_operation.go index 9a1739d19a7..84c6baca944 100644 --- a/pkg/streamingpromql/operator/binary_operation.go +++ b/pkg/streamingpromql/operator/binary_operation.go @@ -53,35 +53,79 @@ func (b *BinaryOperation) SeriesMetadata(ctx context.Context) ([]SeriesMetadata, return nil, fmt.Errorf("unsupported binary operation '%s'", b.Op) } - // TODO: break this into smaller functions, it's enormous + if canProduceAnySeries, err := b.loadSeriesMetadata(ctx); err != nil { + return nil, err + } else if !canProduceAnySeries { + return nil, nil + } + + allPairs := b.computeSeriesPairs() + + // TODO: move this into computeSeriesPairs? + allMetadata := make([]SeriesMetadata, 0, len(allPairs)) + b.remainingSeries = make([]*binaryOperationSeriesPair, 0, len(allPairs)) + labelsFunc := b.labelsFunc() + + for _, pair := range allPairs { + firstSeriesLabels := b.leftMetadata[pair.leftSeriesIndices[0]].Labels + allMetadata = append(allMetadata, SeriesMetadata{Labels: labelsFunc(firstSeriesLabels)}) + b.remainingSeries = append(b.remainingSeries, pair) + } + + // TODO: sort output series + // Sort output series: either to favour left side or right side + // - Add comment emphasising this is critical for managing peak memory consumption, could make this decision more sophisticated in the future + // - TODO: think this through with some examples, especially for pathological cases where a single output series has multiple series on each side for different timesteps + // - One-to-one matching: + // - assume one series on each side of output series + // - therefore best option to keep peak memory utilisation low is to order series so we don't hold higher cardinality side in memory + // eg. if LHS is 20 series, and RHS is 2 series, best option is to go through LHS series in order and potentially have to hold some RHS series in memory, + // as then in worst case we'll hold 2 series in memory at once + // - Many-to-one / one-to-many matching: + // - assume "one" side is higher cardinality, and series from "many" side are used multiple times + // - therefore best option to keep peak memory utilisation low is to order series so we don't hold higher cardinality side in memory, especially as we'll + // likely have to hold some "many" side series in memory anyway (doesn't make sense to have to hold both "one" and "many" side series) + + b.leftBuffer = newBinaryOperationSeriesBuffer(b.Left) + b.rightBuffer = newBinaryOperationSeriesBuffer(b.Right) + + return allMetadata, nil +} + +// loadSeriesMetadata loads series metadata from both sides of this operation. +// It returns false if one side returned no series and that means there is no way for this operation to return any series. +// (eg. if doing A + B and either A or B have no series, then there is no way for this operation to produce any series) +func (b *BinaryOperation) loadSeriesMetadata(ctx context.Context) (bool, error) { + // We retain the series labels for later so we can use them to generate error messages. + // We'll return them to the pool in Close(). - leftMetadata, err := b.Left.SeriesMetadata(ctx) + var err error + b.leftMetadata, err = b.Left.SeriesMetadata(ctx) if err != nil { - return nil, err + return false, err } - if len(leftMetadata) == 0 { + if len(b.leftMetadata) == 0 { // FIXME: this is incorrect for 'or' // No series on left-hand side, we'll never have any output series. - return nil, nil + return false, nil } - rightMetadata, err := b.Right.SeriesMetadata(ctx) + b.rightMetadata, err = b.Right.SeriesMetadata(ctx) if err != nil { - return nil, err + return false, err } - // Keep series labels for later so we can use them to generate error messages. - // We'll return them to the pool in Close(). - b.leftMetadata = leftMetadata - b.rightMetadata = rightMetadata - - if len(rightMetadata) == 0 { + if len(b.rightMetadata) == 0 { // FIXME: this is incorrect for 'or' and 'unless' // No series on right-hand side, we'll never have any output series. - return nil, nil + return false, nil } + return true, nil +} + +func (b *BinaryOperation) computeSeriesPairs() map[uint64]*binaryOperationSeriesPair { // TODO: Prometheus' engine uses strings for the key here, which would avoid issues with hash collisions, but seems much slower. // Either we should use strings, or we'll need to deal with hash collisions. hashFunc := b.hashFunc() @@ -92,7 +136,7 @@ func (b *BinaryOperation) SeriesMetadata(ctx context.Context) ([]SeriesMetadata, // TODO: is it better to use whichever side has fewer series for this first loop? Should result in a smaller map and therefore less work later on // Would need to be careful about 'or' and 'unless' cases - for idx, s := range leftMetadata { + for idx, s := range b.leftMetadata { hash := hashFunc(s.Labels) series, exists := allPairs[hash] @@ -104,7 +148,7 @@ func (b *BinaryOperation) SeriesMetadata(ctx context.Context) ([]SeriesMetadata, series.leftSeriesIndices = append(series.leftSeriesIndices, idx) } - for idx, s := range rightMetadata { + for idx, s := range b.rightMetadata { hash := hashFunc(s.Labels) if series, exists := allPairs[hash]; exists { @@ -123,34 +167,7 @@ func (b *BinaryOperation) SeriesMetadata(ctx context.Context) ([]SeriesMetadata, } } - allMetadata := make([]SeriesMetadata, 0, len(allPairs)) - b.remainingSeries = make([]*binaryOperationSeriesPair, 0, len(allPairs)) - labelsFunc := b.labelsFunc() - - for _, pair := range allPairs { - firstSeriesLabels := leftMetadata[pair.leftSeriesIndices[0]].Labels - allMetadata = append(allMetadata, SeriesMetadata{Labels: labelsFunc(firstSeriesLabels)}) - b.remainingSeries = append(b.remainingSeries, pair) - } - - // TODO: sort output series - // Sort output series: either to favour left side or right side - // - Add comment emphasising this is critical for managing peak memory consumption, could make this decision more sophisticated in the future - // - TODO: think this through with some examples, especially for pathological cases where a single output series has multiple series on each side for different timesteps - // - One-to-one matching: - // - assume one series on each side of output series - // - therefore best option to keep peak memory utilisation low is to order series so we don't hold higher cardinality side in memory - // eg. if LHS is 20 series, and RHS is 2 series, best option is to go through LHS series in order and potentially have to hold some RHS series in memory, - // as then in worst case we'll hold 2 series in memory at once - // - Many-to-one / one-to-many matching: - // - assume "one" side is higher cardinality, and series from "many" side are used multiple times - // - therefore best option to keep peak memory utilisation low is to order series so we don't hold higher cardinality side in memory, especially as we'll - // likely have to hold some "many" side series in memory anyway (doesn't make sense to have to hold both "one" and "many" side series) - - b.leftBuffer = newBinaryOperationSeriesBuffer(b.Left) - b.rightBuffer = newBinaryOperationSeriesBuffer(b.Right) - - return allMetadata, nil + return allPairs } // hashFunc returns a function that computes the hash of the output group this series belongs to. From 55fd65374e5d7d29089991aa30002cbd9f188f39 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Thu, 9 May 2024 15:31:33 +1000 Subject: [PATCH 20/43] Clarify comment --- pkg/streamingpromql/operator/binary_operation.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/streamingpromql/operator/binary_operation.go b/pkg/streamingpromql/operator/binary_operation.go index 84c6baca944..3635f087733 100644 --- a/pkg/streamingpromql/operator/binary_operation.go +++ b/pkg/streamingpromql/operator/binary_operation.go @@ -264,7 +264,7 @@ func (b *BinaryOperation) Next(ctx context.Context) (InstantVectorSeriesData, er // - should we just change the InstantVectorOperator interface to use ([]float64, presence)? Would make some aggregation operations faster as well (eg. sum) func (b *BinaryOperation) mergeOneSide(data []InstantVectorSeriesData, sourceSeriesIndices []int, sourceSeriesMetadata []SeriesMetadata, side string) (InstantVectorSeriesData, error) { if len(data) == 1 { - // If there's only one series on this side, there's no merging required. + // Fast path: if there's only one series on this side, there's no merging required. return data[0], nil } From c2d4946e233869077192ae2c827165d18767597c Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Thu, 9 May 2024 15:32:07 +1000 Subject: [PATCH 21/43] Add extra benchmark --- pkg/streamingpromql/benchmarks/benchmarks.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/streamingpromql/benchmarks/benchmarks.go b/pkg/streamingpromql/benchmarks/benchmarks.go index 88a22d23cc0..2b13a884c8b 100644 --- a/pkg/streamingpromql/benchmarks/benchmarks.go +++ b/pkg/streamingpromql/benchmarks/benchmarks.go @@ -160,6 +160,9 @@ func TestCases(metricSizes []int) []BenchCase { { Expr: "rate(a_X[1m]) + rate(b_X[1m])", }, + { + Expr: "sum(a_X + b_X)", + }, { Expr: "sum by (le)(rate(h_X[1m]))", }, From ead55571c1f3cd2debd2c53389deb1e0badb4b46 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Thu, 9 May 2024 15:39:00 +1000 Subject: [PATCH 22/43] Add comment explaining tradeoff --- pkg/streamingpromql/operator/binary_operation.go | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pkg/streamingpromql/operator/binary_operation.go b/pkg/streamingpromql/operator/binary_operation.go index 3635f087733..64becf14f25 100644 --- a/pkg/streamingpromql/operator/binary_operation.go +++ b/pkg/streamingpromql/operator/binary_operation.go @@ -18,6 +18,7 @@ import ( "github.com/prometheus/prometheus/promql/parser" ) +// BinaryOperation represents a binary operation between instant vectors such as " + " or " - ". type BinaryOperation struct { Left InstantVectorOperator Right InstantVectorOperator @@ -45,6 +46,21 @@ type binaryOperationSeriesPair struct { rightSeriesIndices []int } +// SeriesMetadata returns the series expected to be produced by this operator. +// +// Note that it is possible that this method returns a series which will not have any points, as the +// list of possible output series is generated based solely on the series labels, not their data. +// +// For example, if this operator is for a range query with the expression "left_metric + right_metric", but +// left_metric has points at T=0 and T=1 in the query range, and right_metric has points at T=2 and T=3 in the +// query range, then SeriesMetadata will return a series, but NextSeries will return no points for that series. +// +// If this affects many series in the query, this may cause consuming operators to be less efficient, but in +// practice this rarely happens. +// +// (The alternative would be to compute the entire result here in SeriesMetadata and only return the series that +// contain points, but that would mean we'd need to hold the entire result in memory at once, which we want to +// avoid.) func (b *BinaryOperation) SeriesMetadata(ctx context.Context) ([]SeriesMetadata, error) { b.op = arithmeticOperationFuncs[b.Op] if b.op == nil { From 5162a1df13166d468eb03444c4eaefafa710c522 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Thu, 9 May 2024 16:50:06 +1000 Subject: [PATCH 23/43] Rename `binaryOperationSeriesPair` to `binaryOperationOutputSeries` --- .../operator/binary_operation.go | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/pkg/streamingpromql/operator/binary_operation.go b/pkg/streamingpromql/operator/binary_operation.go index 64becf14f25..b160b13754d 100644 --- a/pkg/streamingpromql/operator/binary_operation.go +++ b/pkg/streamingpromql/operator/binary_operation.go @@ -33,7 +33,7 @@ type BinaryOperation struct { leftMetadata []SeriesMetadata rightMetadata []SeriesMetadata - remainingSeries []*binaryOperationSeriesPair + remainingSeries []*binaryOperationOutputSeries leftBuffer *binaryOperationSeriesBuffer rightBuffer *binaryOperationSeriesBuffer op binaryOperationFunc @@ -41,7 +41,7 @@ type BinaryOperation struct { var _ InstantVectorOperator = &BinaryOperation{} -type binaryOperationSeriesPair struct { +type binaryOperationOutputSeries struct { leftSeriesIndices []int rightSeriesIndices []int } @@ -75,17 +75,17 @@ func (b *BinaryOperation) SeriesMetadata(ctx context.Context) ([]SeriesMetadata, return nil, nil } - allPairs := b.computeSeriesPairs() + allOutputSeries := b.computeOutputSeries() - // TODO: move this into computeSeriesPairs? - allMetadata := make([]SeriesMetadata, 0, len(allPairs)) - b.remainingSeries = make([]*binaryOperationSeriesPair, 0, len(allPairs)) + // TODO: move this into computeOutputSeries? + allMetadata := make([]SeriesMetadata, 0, len(allOutputSeries)) + b.remainingSeries = make([]*binaryOperationOutputSeries, 0, len(allOutputSeries)) labelsFunc := b.labelsFunc() - for _, pair := range allPairs { - firstSeriesLabels := b.leftMetadata[pair.leftSeriesIndices[0]].Labels + for _, outputSeries := range allOutputSeries { + firstSeriesLabels := b.leftMetadata[outputSeries.leftSeriesIndices[0]].Labels allMetadata = append(allMetadata, SeriesMetadata{Labels: labelsFunc(firstSeriesLabels)}) - b.remainingSeries = append(b.remainingSeries, pair) + b.remainingSeries = append(b.remainingSeries, outputSeries) } // TODO: sort output series @@ -141,24 +141,24 @@ func (b *BinaryOperation) loadSeriesMetadata(ctx context.Context) (bool, error) return true, nil } -func (b *BinaryOperation) computeSeriesPairs() map[uint64]*binaryOperationSeriesPair { +func (b *BinaryOperation) computeOutputSeries() map[uint64]*binaryOperationOutputSeries { // TODO: Prometheus' engine uses strings for the key here, which would avoid issues with hash collisions, but seems much slower. // Either we should use strings, or we'll need to deal with hash collisions. hashFunc := b.hashFunc() - // TODO: pool binaryOperationSeriesPair? Pool internal slices? + // TODO: pool binaryOperationOutputSeries? Pool internal slices? // TODO: guess initial size of map? - allPairs := map[uint64]*binaryOperationSeriesPair{} + allOutputSeries := map[uint64]*binaryOperationOutputSeries{} // TODO: is it better to use whichever side has fewer series for this first loop? Should result in a smaller map and therefore less work later on // Would need to be careful about 'or' and 'unless' cases for idx, s := range b.leftMetadata { hash := hashFunc(s.Labels) - series, exists := allPairs[hash] + series, exists := allOutputSeries[hash] if !exists { - series = &binaryOperationSeriesPair{} - allPairs[hash] = series + series = &binaryOperationOutputSeries{} + allOutputSeries[hash] = series } series.leftSeriesIndices = append(series.leftSeriesIndices, idx) @@ -167,23 +167,23 @@ func (b *BinaryOperation) computeSeriesPairs() map[uint64]*binaryOperationSeries for idx, s := range b.rightMetadata { hash := hashFunc(s.Labels) - if series, exists := allPairs[hash]; exists { + if series, exists := allOutputSeries[hash]; exists { series.rightSeriesIndices = append(series.rightSeriesIndices, idx) } // FIXME: if this is an 'or' operation, then we need to create the right side even if the left doesn't exist } - // Remove pairs that cannot produce series. - for hash, pair := range allPairs { - if len(pair.leftSeriesIndices) == 0 || len(pair.rightSeriesIndices) == 0 { + // Remove series that cannot produce samples. + for hash, outputSeries := range allOutputSeries { + if len(outputSeries.leftSeriesIndices) == 0 || len(outputSeries.rightSeriesIndices) == 0 { // FIXME: this is incorrect for 'or' and 'unless' // No matching series on at least one side for this output series, so output series will have no samples. Remove it. - delete(allPairs, hash) + delete(allOutputSeries, hash) } } - return allPairs + return allOutputSeries } // hashFunc returns a function that computes the hash of the output group this series belongs to. From 236f161d83e6ed37714cddbe02f357719649cedd Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Thu, 9 May 2024 16:51:28 +1000 Subject: [PATCH 24/43] Add benchmark for the case where one side of a binary operation has many more series than the other --- pkg/streamingpromql/benchmarks/benchmarks.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pkg/streamingpromql/benchmarks/benchmarks.go b/pkg/streamingpromql/benchmarks/benchmarks.go index 2b13a884c8b..79637d517f3 100644 --- a/pkg/streamingpromql/benchmarks/benchmarks.go +++ b/pkg/streamingpromql/benchmarks/benchmarks.go @@ -108,6 +108,13 @@ func TestCases(metricSizes []int) []BenchCase { Expr: "a_X - b_X", Steps: 10000, }, + // Test the case where one side of a binary operation has many more series than the other. + { + Expr: `a_100{l=~"[13579]."} - b_100`, + }, + { + Expr: `a_2000{l=~"1..."} - b_2000`, + }, //{ // Expr: "a_X and b_X{l=~'.*[0-4]$'}", //}, From 7834b9efbbec0fbc6483f47e8a8c72a39a362602 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Thu, 9 May 2024 16:54:07 +1000 Subject: [PATCH 25/43] Further simplify `SeriesMetadata` by moving more logic into `computeOutputSeries`. --- .../operator/binary_operation.go | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/pkg/streamingpromql/operator/binary_operation.go b/pkg/streamingpromql/operator/binary_operation.go index b160b13754d..e2608a4551f 100644 --- a/pkg/streamingpromql/operator/binary_operation.go +++ b/pkg/streamingpromql/operator/binary_operation.go @@ -75,18 +75,8 @@ func (b *BinaryOperation) SeriesMetadata(ctx context.Context) ([]SeriesMetadata, return nil, nil } - allOutputSeries := b.computeOutputSeries() - - // TODO: move this into computeOutputSeries? - allMetadata := make([]SeriesMetadata, 0, len(allOutputSeries)) - b.remainingSeries = make([]*binaryOperationOutputSeries, 0, len(allOutputSeries)) - labelsFunc := b.labelsFunc() - - for _, outputSeries := range allOutputSeries { - firstSeriesLabels := b.leftMetadata[outputSeries.leftSeriesIndices[0]].Labels - allMetadata = append(allMetadata, SeriesMetadata{Labels: labelsFunc(firstSeriesLabels)}) - b.remainingSeries = append(b.remainingSeries, outputSeries) - } + allMetadata, allSeries := b.computeOutputSeries() + b.remainingSeries = allSeries // TODO: sort output series // Sort output series: either to favour left side or right side @@ -141,24 +131,24 @@ func (b *BinaryOperation) loadSeriesMetadata(ctx context.Context) (bool, error) return true, nil } -func (b *BinaryOperation) computeOutputSeries() map[uint64]*binaryOperationOutputSeries { +func (b *BinaryOperation) computeOutputSeries() ([]SeriesMetadata, []*binaryOperationOutputSeries) { // TODO: Prometheus' engine uses strings for the key here, which would avoid issues with hash collisions, but seems much slower. // Either we should use strings, or we'll need to deal with hash collisions. hashFunc := b.hashFunc() // TODO: pool binaryOperationOutputSeries? Pool internal slices? // TODO: guess initial size of map? - allOutputSeries := map[uint64]*binaryOperationOutputSeries{} + outputSeriesMap := map[uint64]*binaryOperationOutputSeries{} // TODO: is it better to use whichever side has fewer series for this first loop? Should result in a smaller map and therefore less work later on // Would need to be careful about 'or' and 'unless' cases for idx, s := range b.leftMetadata { hash := hashFunc(s.Labels) - series, exists := allOutputSeries[hash] + series, exists := outputSeriesMap[hash] if !exists { series = &binaryOperationOutputSeries{} - allOutputSeries[hash] = series + outputSeriesMap[hash] = series } series.leftSeriesIndices = append(series.leftSeriesIndices, idx) @@ -167,7 +157,7 @@ func (b *BinaryOperation) computeOutputSeries() map[uint64]*binaryOperationOutpu for idx, s := range b.rightMetadata { hash := hashFunc(s.Labels) - if series, exists := allOutputSeries[hash]; exists { + if series, exists := outputSeriesMap[hash]; exists { series.rightSeriesIndices = append(series.rightSeriesIndices, idx) } @@ -175,15 +165,25 @@ func (b *BinaryOperation) computeOutputSeries() map[uint64]*binaryOperationOutpu } // Remove series that cannot produce samples. - for hash, outputSeries := range allOutputSeries { + for hash, outputSeries := range outputSeriesMap { if len(outputSeries.leftSeriesIndices) == 0 || len(outputSeries.rightSeriesIndices) == 0 { // FIXME: this is incorrect for 'or' and 'unless' // No matching series on at least one side for this output series, so output series will have no samples. Remove it. - delete(allOutputSeries, hash) + delete(outputSeriesMap, hash) } } - return allOutputSeries + allMetadata := make([]SeriesMetadata, 0, len(outputSeriesMap)) + allSeries := make([]*binaryOperationOutputSeries, 0, len(outputSeriesMap)) + labelsFunc := b.labelsFunc() + + for _, outputSeries := range outputSeriesMap { + firstSeriesLabels := b.leftMetadata[outputSeries.leftSeriesIndices[0]].Labels + allMetadata = append(allMetadata, SeriesMetadata{Labels: labelsFunc(firstSeriesLabels)}) + allSeries = append(allSeries, outputSeries) + } + + return allMetadata, allSeries } // hashFunc returns a function that computes the hash of the output group this series belongs to. From 5555bb7dc1e92a441a069f990c2c2033488e6359 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Thu, 9 May 2024 17:36:30 +1000 Subject: [PATCH 26/43] Sort output series to minimise the number of input series we need to buffer in memory --- .../operator/binary_operation.go | 107 +++++++++++++++--- 1 file changed, 93 insertions(+), 14 deletions(-) diff --git a/pkg/streamingpromql/operator/binary_operation.go b/pkg/streamingpromql/operator/binary_operation.go index e2608a4551f..525f5fc1f31 100644 --- a/pkg/streamingpromql/operator/binary_operation.go +++ b/pkg/streamingpromql/operator/binary_operation.go @@ -10,6 +10,7 @@ import ( "fmt" "math" "slices" + "sort" "time" "github.com/prometheus/prometheus/model/labels" @@ -46,6 +47,20 @@ type binaryOperationOutputSeries struct { rightSeriesIndices []int } +// latestLeftSeries returns the index of the last series from the left source needed for this output series. +// +// It assumes that leftSeriesIndices is sorted in ascending order. +func (s binaryOperationOutputSeries) latestLeftSeries() int { + return s.leftSeriesIndices[len(s.leftSeriesIndices)-1] +} + +// latestRightSeries returns the index of the last series from the right source needed for this output series. +// +// It assumes that rightSeriesIndices is sorted in ascending order. +func (s binaryOperationOutputSeries) latestRightSeries() int { + return s.rightSeriesIndices[len(s.rightSeriesIndices)-1] +} + // SeriesMetadata returns the series expected to be produced by this operator. // // Note that it is possible that this method returns a series which will not have any points, as the @@ -76,22 +91,9 @@ func (b *BinaryOperation) SeriesMetadata(ctx context.Context) ([]SeriesMetadata, } allMetadata, allSeries := b.computeOutputSeries() + b.sortSeries(allMetadata, allSeries) b.remainingSeries = allSeries - // TODO: sort output series - // Sort output series: either to favour left side or right side - // - Add comment emphasising this is critical for managing peak memory consumption, could make this decision more sophisticated in the future - // - TODO: think this through with some examples, especially for pathological cases where a single output series has multiple series on each side for different timesteps - // - One-to-one matching: - // - assume one series on each side of output series - // - therefore best option to keep peak memory utilisation low is to order series so we don't hold higher cardinality side in memory - // eg. if LHS is 20 series, and RHS is 2 series, best option is to go through LHS series in order and potentially have to hold some RHS series in memory, - // as then in worst case we'll hold 2 series in memory at once - // - Many-to-one / one-to-many matching: - // - assume "one" side is higher cardinality, and series from "many" side are used multiple times - // - therefore best option to keep peak memory utilisation low is to order series so we don't hold higher cardinality side in memory, especially as we'll - // likely have to hold some "many" side series in memory anyway (doesn't make sense to have to hold both "one" and "many" side series) - b.leftBuffer = newBinaryOperationSeriesBuffer(b.Left) b.rightBuffer = newBinaryOperationSeriesBuffer(b.Right) @@ -186,6 +188,83 @@ func (b *BinaryOperation) computeOutputSeries() ([]SeriesMetadata, []*binaryOper return allMetadata, allSeries } +// sortSeries sorts metadata and series in place to try to minimise the number of input series we'll need to buffer in memory. +// +// This is critical for minimising the memory consumption of this operator: if we choose a poor ordering of series, +// we'll need to buffer many input series in memory. +// +// At present, sortSeries uses a very basic heuristic to guess the best way to sort the output series, but we could make +// this more sophisticated in the future. +func (b *BinaryOperation) sortSeries(metadata []SeriesMetadata, series []*binaryOperationOutputSeries) { + // For one-to-one matching, we assume that each output series takes one series from each side of the operator. + // If this is true, then the best order is the one in which we read from the highest cardinality side in order. + // If we do this, then in the worst case, we'll have to buffer the whole of the lower cardinality side. + // (Compare this with sorting so that we read the lowest cardinality side in order: in the worst case, we'll have + // to buffer the whole of the higher cardinality side.) + // + // FIXME: this is reasonable for one-to-one matching, but likely not for one-to-many / many-to-one. + // For one-to-many / many-to-one, it would likely be best to buffer the side used for multiple output series (the "one" side), + // as we'll need to retain these series for multiple output series anyway. + + var sortInterface sort.Interface + + if len(b.leftMetadata) < len(b.rightMetadata) { + sortInterface = favourRightSideSorter{metadata, series} + } else { + sortInterface = favourLeftSideSorter{metadata, series} + } + + sort.Sort(sortInterface) +} + +type favourRightSideSorter struct { + metadata []SeriesMetadata + series []*binaryOperationOutputSeries +} + +type favourLeftSideSorter struct { + metadata []SeriesMetadata + series []*binaryOperationOutputSeries +} + +func (g favourRightSideSorter) Len() int { + return len(g.metadata) +} + +func (g favourLeftSideSorter) Len() int { + return len(g.metadata) +} + +func (g favourRightSideSorter) Less(i, j int) bool { + iRight := g.series[i].latestRightSeries() + jRight := g.series[j].latestRightSeries() + if iRight != jRight { + return iRight < jRight + } + + return g.series[i].latestLeftSeries() < g.series[j].latestLeftSeries() +} + +func (g favourLeftSideSorter) Less(i, j int) bool { + iLeft := g.series[i].latestLeftSeries() + jLeft := g.series[j].latestLeftSeries() + if iLeft != jLeft { + return iLeft < jLeft + } + + return g.series[i].latestRightSeries() < g.series[j].latestRightSeries() +} + +func (g favourRightSideSorter) Swap(i, j int) { + g.metadata[i], g.metadata[j] = g.metadata[j], g.metadata[i] + g.series[i], g.series[j] = g.series[j], g.series[i] +} + +func (g favourLeftSideSorter) Swap(i, j int) { + g.metadata[i], g.metadata[j] = g.metadata[j], g.metadata[i] + g.series[i], g.series[j] = g.series[j], g.series[i] +} + // hashFunc returns a function that computes the hash of the output group this series belongs to. func (b *BinaryOperation) hashFunc() func(labels.Labels) uint64 { buf := make([]byte, 0, 1024) From dc1457e1661ef1fd4d5e300e863384ede2cce1cf Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Thu, 9 May 2024 20:14:50 +1000 Subject: [PATCH 27/43] Add support for capturing CPU and memory profiles while running benchmark --- tools/benchmark-query-engine/main.go | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/tools/benchmark-query-engine/main.go b/tools/benchmark-query-engine/main.go index b0bd34f3c5a..628882cb888 100644 --- a/tools/benchmark-query-engine/main.go +++ b/tools/benchmark-query-engine/main.go @@ -35,6 +35,8 @@ type app struct { tempDir string dataDir string binaryPath string + cpuProfilePath string + memProfilePath string ingesterAddress string cleanup func() @@ -57,6 +59,16 @@ func (a *app) run() error { return nil } + if a.cpuProfilePath != "" || a.memProfilePath != "" { + if a.count != 1 { + return fmt.Errorf("must run exactly one iteration when emitting profile, but have -count=%d", a.count) + } + + if len(filteredNames) != 1 { + return fmt.Errorf("must select exactly one benchmark with -bench when emitting profile, but have %v benchmarks selected", len(filteredNames)) + } + } + if err := a.findBenchmarkPackageDir(); err != nil { return fmt.Errorf("could not find engine package directory: %w", err) } @@ -101,6 +113,8 @@ func (a *app) parseArgs() { flag.UintVar(&a.count, "count", 1, "run each benchmark n times") flag.StringVar(&a.testFilter, "bench", ".", "only run benchmarks matching regexp") flag.BoolVar(&a.listTests, "list", false, "list known benchmarks and exit") + flag.StringVar(&a.cpuProfilePath, "cpuprofile", "", "write CPU profile to file, only supported when running a single iteration of one benchmark") + flag.StringVar(&a.memProfilePath, "memprofile", "", "write memory profile to file, only supported when running a single iteration of one benchmark") if err := flagext.ParseFlagsWithoutArguments(flag.CommandLine); err != nil { fmt.Printf("%v\n", err) @@ -240,7 +254,19 @@ func (a *app) filteredTestCaseNames() ([]string, error) { } func (a *app) runTestCase(name string, printBenchmarkHeader bool) error { - cmd := exec.Command(a.binaryPath, "-test.bench="+regexp.QuoteMeta(name), "-test.run=NoTestsWillMatchThisPattern", "-test.benchmem") + args := []string{ + "-test.bench=" + regexp.QuoteMeta(name), "-test.run=NoTestsWillMatchThisPattern", "-test.benchmem", + } + + if a.cpuProfilePath != "" { + args = append(args, "-test.cpuprofile="+a.cpuProfilePath) + } + + if a.memProfilePath != "" { + args = append(args, "-test.memprofile="+a.memProfilePath) + } + + cmd := exec.Command(a.binaryPath, args...) buf := &bytes.Buffer{} cmd.Stdout = buf cmd.Stderr = os.Stderr From 9d4bc14c223b92c3d7f2598cc937a65f7a2afca4 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Thu, 9 May 2024 20:40:17 +1000 Subject: [PATCH 28/43] Reuse slices when computing result. --- .../operator/binary_operation.go | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/pkg/streamingpromql/operator/binary_operation.go b/pkg/streamingpromql/operator/binary_operation.go index 525f5fc1f31..e1cf416905a 100644 --- a/pkg/streamingpromql/operator/binary_operation.go +++ b/pkg/streamingpromql/operator/binary_operation.go @@ -459,12 +459,19 @@ func (b *BinaryOperation) mergeOneSide(data []InstantVectorSeriesData, sourceSer } func (b *BinaryOperation) computeResult(left InstantVectorSeriesData, right InstantVectorSeriesData) InstantVectorSeriesData { - // FIXME: it is only safe to unconditionally put these slices back in the pool for one-to-one matching (otherwise we'll need some of these slices for future groups) - defer PutFPointSlice(left.Floats) - defer PutFPointSlice(right.Floats) + var output []promql.FPoint - outputLength := min(len(left.Floats), len(right.Floats)) // We can't produce more output points than input points for arithmetic operations. - output := GetFPointSlice(outputLength) // TODO: Reuse one side for the output slice. If we do this, need to make sure not to return it to the pool + // For one-to-one matching for arithmetic operators, reuse one of the input slices to avoid allocating another slice. + // We'll never produce more points than the smaller input side, so use that as our output slice. + // + // FIXME: this is not safe to do for one-to-many, many-to-one or many-to-many matching, as we may need the input series for later output series. + if len(left.Floats) < len(right.Floats) { + output = left.Floats[:0] + defer PutFPointSlice(right.Floats) + } else { + output = right.Floats[:0] + defer PutFPointSlice(left.Floats) + } nextRightIndex := 0 From 825b4dcba678221a8525f1262d144bf7230d70cd Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Fri, 10 May 2024 13:29:39 +1000 Subject: [PATCH 29/43] Use bucketed pools with factor 2 rather than factor 10. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This dramatically reduces peak memory consumption, as we'll no longer potentially over-allocate by a factor of 10. For example, previously, if we needed a slice of 1001 points, we'd end up using a slice with capacity 10000. Now it will only be 1024. This fixes the issue where queries that return many series result in peak memory utilisation far greater than Prometheus' engine. Full benchmark results: goos: darwin goarch: arm64 pkg: github.com/grafana/mimir/pkg/streamingpromql/benchmarks │ standard │ streaming │ │ sec/op │ sec/op vs base │ Query/a_1,_instant_query 127.9µ ± 5% 120.6µ ± 4% -5.72% (p=0.004 n=6) Query/a_1,_range_query_with_100_steps 141.8µ ± 15% 131.8µ ± 8% ~ (p=0.132 n=6) Query/a_1,_range_query_with_1000_steps 181.0µ ± 2% 171.4µ ± 1% -5.33% (p=0.002 n=6) Query/a_100,_instant_query 577.7µ ± 7% 558.4µ ± 3% -3.35% (p=0.002 n=6) Query/a_100,_range_query_with_100_steps 1.060m ± 5% 1.039m ± 5% ~ (p=0.240 n=6) Query/a_100,_range_query_with_1000_steps 5.276m ± 1% 5.191m ± 2% -1.61% (p=0.015 n=6) Query/a_2000,_instant_query 7.207m ± 0% 7.015m ± 1% -2.65% (p=0.002 n=6) Query/a_2000,_range_query_with_100_steps 15.88m ± 1% 15.39m ± 1% -3.08% (p=0.002 n=6) Query/a_2000,_range_query_with_1000_steps 94.69m ± 1% 90.68m ± 0% -4.24% (p=0.002 n=6) Query/rate(a_1[1m]),_instant_query 123.1µ ± 7% 112.7µ ± 3% -8.43% (p=0.002 n=6) Query/rate(a_1[1m]),_range_query_with_100_steps 134.4µ ± 4% 120.3µ ± 4% -10.48% (p=0.002 n=6) Query/rate(a_1[1m]),_range_query_with_1000_steps 236.3µ ± 1% 178.0µ ± 1% -24.69% (p=0.002 n=6) Query/rate(a_100[1m]),_instant_query 570.8µ ± 2% 557.4µ ± 4% -2.34% (p=0.004 n=6) Query/rate(a_100[1m]),_range_query_with_100_steps 1.606m ± 2% 1.153m ± 0% -28.20% (p=0.002 n=6) Query/rate(a_100[1m]),_range_query_with_1000_steps 10.467m ± 2% 5.811m ± 1% -44.48% (p=0.002 n=6) Query/rate(a_2000[1m]),_instant_query 7.007m ± 0% 6.811m ± 1% -2.80% (p=0.002 n=6) Query/rate(a_2000[1m]),_range_query_with_100_steps 26.64m ± 1% 17.95m ± 2% -32.62% (p=0.002 n=6) Query/rate(a_2000[1m]),_range_query_with_1000_steps 195.2m ± 2% 104.3m ± 0% -46.57% (p=0.002 n=6) Query/rate(a_1[1m]),_range_query_with_10000_steps 1174.8µ ± 2% 744.3µ ± 1% -36.64% (p=0.002 n=6) Query/rate(a_100[1m]),_range_query_with_10000_steps 97.00m ± 1% 52.99m ± 6% -45.38% (p=0.002 n=6) Query/rate(a_2000[1m]),_range_query_with_10000_steps 2.004 ± 2% 1.047 ± 5% -47.75% (p=0.002 n=6) Query/rate(a_1[1d]),_instant_query 745.6µ ± 2% 553.4µ ± 7% -25.77% (p=0.002 n=6) Query/rate(a_1[1d]),_range_query_with_100_steps 1302.3µ ± 1% 988.9µ ± 1% -24.07% (p=0.002 n=6) Query/rate(a_1[1d]),_range_query_with_1000_steps 6.227m ± 1% 4.860m ± 1% -21.94% (p=0.002 n=6) Query/rate(a_100[1d]),_instant_query 43.68m ± 0% 33.71m ± 0% -22.83% (p=0.002 n=6) Query/rate(a_100[1d]),_range_query_with_100_steps 98.27m ± 1% 76.75m ± 1% -21.90% (p=0.002 n=6) Query/rate(a_100[1d]),_range_query_with_1000_steps 587.2m ± 1% 461.3m ± 0% -21.44% (p=0.002 n=6) Query/rate(a_2000[1d]),_instant_query 813.1m ± 1% 618.5m ± 0% -23.93% (p=0.002 n=6) Query/rate(a_2000[1d]),_range_query_with_100_steps 1.914 ± 1% 1.482 ± 8% -22.58% (p=0.002 n=6) Query/rate(a_2000[1d]),_range_query_with_1000_steps 11.671 ± 1% 9.149 ± 0% -21.61% (p=0.002 n=6) Query/a_1_-_b_1,_instant_query 225.9µ ± 1% 211.4µ ± 2% -6.42% (p=0.002 n=6) Query/a_1_-_b_1,_range_query_with_100_steps 260.3µ ± 0% 225.9µ ± 2% -13.23% (p=0.002 n=6) Query/a_1_-_b_1,_range_query_with_1000_steps 618.3µ ± 2% 319.6µ ± 1% -48.31% (p=0.002 n=6) Query/a_100_-_b_100,_instant_query 1.182m ± 1% 1.125m ± 0% -4.82% (p=0.002 n=6) Query/a_100_-_b_100,_range_query_with_100_steps 4.437m ± 1% 2.042m ± 1% -53.98% (p=0.002 n=6) Query/a_100_-_b_100,_range_query_with_1000_steps 33.51m ± 0% 10.19m ± 1% -69.59% (p=0.002 n=6) Query/a_2000_-_b_2000,_instant_query 15.92m ± 1% 15.38m ± 1% -3.42% (p=0.002 n=6) Query/a_2000_-_b_2000,_range_query_with_100_steps 82.01m ± 1% 32.76m ± 1% -60.06% (p=0.002 n=6) Query/a_2000_-_b_2000,_range_query_with_1000_steps 735.9m ± 1% 187.5m ± 1% -74.53% (p=0.002 n=6) Query/a_1_-_b_1,_range_query_with_10000_steps 3.298m ± 1% 1.273m ± 1% -61.40% (p=0.002 n=6) Query/a_100_-_b_100,_range_query_with_10000_steps 322.21m ± 0% 87.20m ± 0% -72.94% (p=0.002 n=6) Query/a_2000_-_b_2000,_range_query_with_10000_steps 7.571 ± 0% 1.733 ± 3% -77.11% (p=0.002 n=6) Query/a_100{l=~"[13579]."}_-_b_100,_instant_query 1.114m ± 1% 1.086m ± 1% -2.52% (p=0.002 n=6) Query/a_100{l=~"[13579]."}_-_b_100,_range_query_with_100_steps 3.182m ± 1% 1.803m ± 1% -43.32% (p=0.002 n=6) Query/a_100{l=~"[13579]."}_-_b_100,_range_query_with_1000_steps 21.739m ± 0% 8.109m ± 1% -62.70% (p=0.002 n=6) Query/a_2000{l=~"1..."}_-_b_2000,_instant_query 12.093m ± 1% 9.135m ± 2% -24.46% (p=0.002 n=6) Query/a_2000{l=~"1..."}_-_b_2000,_range_query_with_100_steps 52.50m ± 1% 18.63m ± 2% -64.51% (p=0.002 n=6) Query/a_2000{l=~"1..."}_-_b_2000,_range_query_with_1000_steps 470.5m ± 1% 103.8m ± 0% -77.94% (p=0.002 n=6) Query/sum(a_1),_instant_query 118.5µ ± 2% 110.8µ ± 4% -6.42% (p=0.002 n=6) Query/sum(a_1),_range_query_with_100_steps 129.1µ ± 1% 117.5µ ± 2% -9.00% (p=0.002 n=6) Query/sum(a_1),_range_query_with_1000_steps 207.4µ ± 1% 172.5µ ± 1% -16.83% (p=0.002 n=6) Query/sum(a_100),_instant_query 584.5µ ± 1% 561.9µ ± 3% -3.87% (p=0.002 n=6) Query/sum(a_100),_range_query_with_100_steps 1.092m ± 1% 1.020m ± 1% -6.59% (p=0.002 n=6) Query/sum(a_100),_range_query_with_1000_steps 5.683m ± 1% 5.199m ± 0% -8.53% (p=0.002 n=6) Query/sum(a_2000),_instant_query 7.251m ± 1% 7.010m ± 0% -3.32% (p=0.002 n=6) Query/sum(a_2000),_range_query_with_100_steps 16.84m ± 1% 15.74m ± 1% -6.53% (p=0.002 n=6) Query/sum(a_2000),_range_query_with_1000_steps 111.55m ± 2% 93.75m ± 0% -15.96% (p=0.002 n=6) Query/sum_by_(l)(h_1),_instant_query 148.6µ ± 2% 139.7µ ± 2% -5.98% (p=0.002 n=6) Query/sum_by_(l)(h_1),_range_query_with_100_steps 183.7µ ± 1% 170.9µ ± 2% -6.98% (p=0.002 n=6) Query/sum_by_(l)(h_1),_range_query_with_1000_steps 533.7µ ± 1% 480.8µ ± 2% -9.92% (p=0.002 n=6) Query/sum_by_(l)(h_100),_instant_query 2.576m ± 1% 2.458m ± 1% -4.55% (p=0.002 n=6) Query/sum_by_(l)(h_100),_range_query_with_100_steps 5.505m ± 1% 5.193m ± 1% -5.65% (p=0.002 n=6) Query/sum_by_(l)(h_100),_range_query_with_1000_steps 33.01m ± 1% 29.73m ± 1% -9.94% (p=0.002 n=6) Query/sum_by_(l)(h_2000),_instant_query 46.80m ± 1% 45.32m ± 1% -3.16% (p=0.002 n=6) Query/sum_by_(l)(h_2000),_range_query_with_100_steps 107.41m ± 1% 98.70m ± 1% -8.11% (p=0.002 n=6) Query/sum_by_(l)(h_2000),_range_query_with_1000_steps 705.7m ± 0% 563.3m ± 1% -20.17% (p=0.002 n=6) Query/sum_by_(le)(h_1),_instant_query 150.1µ ± 1% 141.2µ ± 3% -5.95% (p=0.002 n=6) Query/sum_by_(le)(h_1),_range_query_with_100_steps 187.2µ ± 1% 174.5µ ± 2% -6.80% (p=0.002 n=6) Query/sum_by_(le)(h_1),_range_query_with_1000_steps 562.1µ ± 0% 500.4µ ± 2% -10.98% (p=0.002 n=6) Query/sum_by_(le)(h_100),_instant_query 2.525m ± 2% 2.412m ± 1% -4.46% (p=0.002 n=6) Query/sum_by_(le)(h_100),_range_query_with_100_steps 5.403m ± 1% 5.072m ± 1% -6.13% (p=0.002 n=6) Query/sum_by_(le)(h_100),_range_query_with_1000_steps 32.40m ± 3% 29.59m ± 1% -8.67% (p=0.002 n=6) Query/sum_by_(le)(h_2000),_instant_query 45.69m ± 1% 43.78m ± 1% -4.18% (p=0.002 n=6) Query/sum_by_(le)(h_2000),_range_query_with_100_steps 104.06m ± 0% 96.44m ± 1% -7.32% (p=0.002 n=6) Query/sum_by_(le)(h_2000),_range_query_with_1000_steps 683.8m ± 0% 560.5m ± 1% -18.03% (p=0.002 n=6) Query/rate(a_1[1m])_+_rate(b_1[1m]),_instant_query 232.9µ ± 1% 216.2µ ± 2% -7.17% (p=0.002 n=6) Query/rate(a_1[1m])_+_rate(b_1[1m]),_range_query_with_100_steps 276.6µ ± 2% 231.2µ ± 2% -16.42% (p=0.002 n=6) Query/rate(a_1[1m])_+_rate(b_1[1m]),_range_query_with_1000_steps 710.4µ ± 2% 340.7µ ± 3% -52.04% (p=0.002 n=6) Query/rate(a_100[1m])_+_rate(b_100[1m]),_instant_query 1.150m ± 1% 1.102m ± 0% -4.15% (p=0.002 n=6) Query/rate(a_100[1m])_+_rate(b_100[1m]),_range_query_with_100_steps 5.453m ± 1% 2.268m ± 1% -58.41% (p=0.002 n=6) Query/rate(a_100[1m])_+_rate(b_100[1m]),_range_query_with_1000_steps 43.07m ± 1% 11.55m ± 1% -73.18% (p=0.002 n=6) Query/rate(a_2000[1m])_+_rate(b_2000[1m]),_instant_query 15.28m ± 1% 14.82m ± 1% -3.04% (p=0.002 n=6) Query/rate(a_2000[1m])_+_rate(b_2000[1m]),_range_query_with_100_steps 102.03m ± 1% 37.52m ± 1% -63.22% (p=0.002 n=6) Query/rate(a_2000[1m])_+_rate(b_2000[1m]),_range_query_with_1000_steps 928.4m ± 1% 214.7m ± 0% -76.87% (p=0.002 n=6) Query/sum(a_1_+_b_1),_instant_query 225.9µ ± 1% 214.6µ ± 2% -5.00% (p=0.002 n=6) Query/sum(a_1_+_b_1),_range_query_with_100_steps 266.8µ ± 2% 228.0µ ± 2% -14.55% (p=0.002 n=6) Query/sum(a_1_+_b_1),_range_query_with_1000_steps 650.4µ ± 2% 329.4µ ± 2% -49.36% (p=0.002 n=6) Query/sum(a_100_+_b_100),_instant_query 1.189m ± 1% 1.133m ± 1% -4.72% (p=0.002 n=6) Query/sum(a_100_+_b_100),_range_query_with_100_steps 4.504m ± 1% 2.043m ± 1% -54.64% (p=0.002 n=6) Query/sum(a_100_+_b_100),_range_query_with_1000_steps 34.07m ± 1% 10.34m ± 0% -69.64% (p=0.002 n=6) Query/sum(a_2000_+_b_2000),_instant_query 15.94m ± 1% 15.41m ± 1% -3.34% (p=0.002 n=6) Query/sum(a_2000_+_b_2000),_range_query_with_100_steps 83.55m ± 2% 33.20m ± 1% -60.26% (p=0.002 n=6) Query/sum(a_2000_+_b_2000),_range_query_with_1000_steps 764.1m ± 0% 192.3m ± 0% -74.84% (p=0.002 n=6) Query/sum_by_(le)(rate(h_1[1m])),_instant_query 153.3µ ± 1% 142.2µ ± 1% -7.26% (p=0.002 n=6) Query/sum_by_(le)(rate(h_1[1m])),_range_query_with_100_steps 241.2µ ± 4% 184.2µ ± 3% -23.64% (p=0.002 n=6) Query/sum_by_(le)(rate(h_1[1m])),_range_query_with_1000_steps 932.3µ ± 6% 540.8µ ± 1% -41.99% (p=0.002 n=6) Query/sum_by_(le)(rate(h_100[1m])),_instant_query 2.455m ± 1% 2.341m ± 2% -4.62% (p=0.002 n=6) Query/sum_by_(le)(rate(h_100[1m])),_range_query_with_100_steps 9.185m ± 8% 5.771m ± 0% -37.16% (p=0.002 n=6) Query/sum_by_(le)(rate(h_100[1m])),_range_query_with_1000_steps 73.48m ± 0% 33.33m ± 1% -54.64% (p=0.002 n=6) Query/sum_by_(le)(rate(h_2000[1m])),_instant_query 43.97m ± 2% 42.68m ± 0% -2.95% (p=0.002 n=6) Query/sum_by_(le)(rate(h_2000[1m])),_range_query_with_100_steps 165.6m ± 15% 111.2m ± 1% -32.84% (p=0.002 n=6) Query/sum_by_(le)(rate(h_2000[1m])),_range_query_with_1000_steps 1327.0m ± 0% 641.8m ± 1% -51.63% (p=0.002 n=6) geomean 7.430m 5.174m -30.37% │ standard │ streaming │ │ B/op │ B/op vs base │ Query/a_1,_instant_query 20.20Ki ± 0% 17.24Ki ± 0% -14.65% (p=0.002 n=6) Query/a_1,_range_query_with_100_steps 21.16Ki ± 0% 17.84Ki ± 0% -15.69% (p=0.002 n=6) Query/a_1,_range_query_with_1000_steps 27.14Ki ± 1% 23.91Ki ± 0% -11.88% (p=0.002 n=6) Query/a_100,_instant_query 140.1Ki ± 0% 122.0Ki ± 0% -12.95% (p=0.002 n=6) Query/a_100,_range_query_with_100_steps 196.0Ki ± 0% 182.1Ki ± 0% -7.09% (p=0.002 n=6) Query/a_100,_range_query_with_1000_steps 770.4Ki ± 0% 758.2Ki ± 0% -1.58% (p=0.002 n=6) Query/a_2000,_instant_query 2.407Mi ± 0% 2.086Mi ± 0% -13.36% (p=0.002 n=6) Query/a_2000,_range_query_with_100_steps 3.428Mi ± 0% 3.265Mi ± 0% -4.77% (p=0.002 n=6) Query/a_2000,_range_query_with_1000_steps 14.50Mi ± 0% 14.37Mi ± 0% -0.88% (p=0.002 n=6) Query/rate(a_1[1m]),_instant_query 22.44Ki ± 0% 17.67Ki ± 0% -21.27% (p=0.002 n=6) Query/rate(a_1[1m]),_range_query_with_100_steps 23.29Ki ± 0% 18.24Ki ± 0% -21.69% (p=0.002 n=6) Query/rate(a_1[1m]),_range_query_with_1000_steps 29.13Ki ± 1% 23.98Ki ± 0% -17.69% (p=0.002 n=6) Query/rate(a_100[1m]),_instant_query 164.3Ki ± 0% 124.9Ki ± 0% -23.97% (p=0.002 n=6) Query/rate(a_100[1m]),_range_query_with_100_steps 220.1Ki ± 0% 184.9Ki ± 0% -16.01% (p=0.002 n=6) Query/rate(a_100[1m]),_range_query_with_1000_steps 758.7Ki ± 0% 724.8Ki ± 0% -4.47% (p=0.002 n=6) Query/rate(a_2000[1m]),_instant_query 2.857Mi ± 0% 2.137Mi ± 0% -25.18% (p=0.002 n=6) Query/rate(a_2000[1m]),_range_query_with_100_steps 3.879Mi ± 0% 3.326Mi ± 1% -14.25% (p=0.002 n=6) Query/rate(a_2000[1m]),_range_query_with_1000_steps 14.32Mi ± 0% 13.84Mi ± 0% -3.40% (p=0.002 n=6) Query/rate(a_1[1m]),_range_query_with_10000_steps 79.90Ki ± 6% 66.90Ki ± 2% -16.27% (p=0.002 n=6) Query/rate(a_100[1m]),_range_query_with_10000_steps 4.993Mi ± 4% 4.986Mi ± 0% ~ (p=1.000 n=6) Query/rate(a_2000[1m]),_range_query_with_10000_steps 1068.8Mi ± 0% 599.7Mi ± 0% -43.89% (p=0.002 n=6) Query/rate(a_1[1d]),_instant_query 632.50Ki ± 3% 67.00Ki ± 5% -89.41% (p=0.002 n=6) Query/rate(a_1[1d]),_range_query_with_100_steps 651.78Ki ± 1% 66.88Ki ± 5% -89.74% (p=0.002 n=6) Query/rate(a_1[1d]),_range_query_with_1000_steps 649.71Ki ± 3% 76.55Ki ± 16% -88.22% (p=0.002 n=6) Query/rate(a_100[1d]),_instant_query 5.181Mi ± 1% 4.681Mi ± 1% -9.66% (p=0.002 n=6) Query/rate(a_100[1d]),_range_query_with_100_steps 5.197Mi ± 3% 4.685Mi ± 2% -9.84% (p=0.002 n=6) Query/rate(a_100[1d]),_range_query_with_1000_steps 5.498Mi ± 7% 5.169Mi ± 2% ~ (p=0.180 n=6) Query/rate(a_2000[1d]),_instant_query 86.90Mi ± 1% 85.94Mi ± 1% -1.11% (p=0.009 n=6) Query/rate(a_2000[1d]),_range_query_with_100_steps 92.64Mi ± 1% 92.15Mi ± 1% ~ (p=0.240 n=6) Query/rate(a_2000[1d]),_range_query_with_1000_steps 126.4Mi ± 1% 125.8Mi ± 0% -0.47% (p=0.015 n=6) Query/a_1_-_b_1,_instant_query 41.54Ki ± 0% 36.42Ki ± 0% -12.33% (p=0.002 n=6) Query/a_1_-_b_1,_range_query_with_100_steps 47.81Ki ± 0% 37.61Ki ± 0% -21.32% (p=0.002 n=6) Query/a_1_-_b_1,_range_query_with_1000_steps 102.49Ki ± 0% 49.73Ki ± 0% -51.48% (p=0.002 n=6) Query/a_100_-_b_100,_instant_query 372.1Ki ± 0% 262.7Ki ± 0% -29.40% (p=0.002 n=6) Query/a_100_-_b_100,_range_query_with_100_steps 586.3Ki ± 0% 383.7Ki ± 0% -34.55% (p=0.002 n=6) Query/a_100_-_b_100,_range_query_with_1000_steps 2.541Mi ± 0% 1.503Mi ± 0% -40.85% (p=0.002 n=6) Query/a_2000_-_b_2000,_instant_query 6.968Mi ± 0% 4.516Mi ± 0% -35.19% (p=0.002 n=6) Query/a_2000_-_b_2000,_range_query_with_100_steps 9.187Mi ± 0% 6.820Mi ± 0% -25.76% (p=0.002 n=6) Query/a_2000_-_b_2000,_range_query_with_1000_steps 32.16Mi ± 0% 29.13Mi ± 1% -9.42% (p=0.002 n=6) Query/a_1_-_b_1,_range_query_with_10000_steps 688.7Ki ± 3% 139.7Ki ± 2% -79.72% (p=0.002 n=6) Query/a_100_-_b_100,_range_query_with_10000_steps 19.575Mi ± 1% 9.913Mi ± 0% -49.36% (p=0.002 n=6) Query/a_2000_-_b_2000,_range_query_with_10000_steps 3107.2Mi ± 0% 698.1Mi ± 0% -77.53% (p=0.002 n=6) Query/a_100{l=~"[13579]."}_-_b_100,_instant_query 291.1Ki ± 0% 211.4Ki ± 0% -27.38% (p=0.002 n=6) Query/a_100{l=~"[13579]."}_-_b_100,_range_query_with_100_steps 467.5Ki ± 0% 401.0Ki ± 0% -14.23% (p=0.002 n=6) Query/a_100{l=~"[13579]."}_-_b_100,_range_query_with_1000_steps 2.092Mi ± 0% 1.929Mi ± 0% -7.79% (p=0.002 n=6) Query/a_2000{l=~"1..."}_-_b_2000,_instant_query 5.258Mi ± 0% 3.011Mi ± 0% -42.73% (p=0.002 n=6) Query/a_2000{l=~"1..."}_-_b_2000,_range_query_with_100_steps 6.911Mi ± 0% 4.723Mi ± 1% -31.66% (p=0.002 n=6) Query/a_2000{l=~"1..."}_-_b_2000,_range_query_with_1000_steps 23.54Mi ± 1% 20.19Mi ± 1% -14.24% (p=0.002 n=6) Query/sum(a_1),_instant_query 21.31Ki ± 0% 17.83Ki ± 0% -16.34% (p=0.002 n=6) Query/sum(a_1),_range_query_with_100_steps 22.26Ki ± 0% 19.51Ki ± 0% -12.35% (p=0.002 n=6) Query/sum(a_1),_range_query_with_1000_steps 28.32Ki ± 1% 33.65Ki ± 0% +18.83% (p=0.002 n=6) Query/sum(a_100),_instant_query 144.4Ki ± 0% 123.5Ki ± 0% -14.50% (p=0.002 n=6) Query/sum(a_100),_range_query_with_100_steps 204.2Ki ± 0% 183.7Ki ± 0% -10.03% (p=0.002 n=6) Query/sum(a_100),_range_query_with_1000_steps 779.0Ki ± 0% 769.8Ki ± 0% -1.18% (p=0.002 n=6) Query/sum(a_2000),_instant_query 2.471Mi ± 0% 2.072Mi ± 0% -16.13% (p=0.002 n=6) Query/sum(a_2000),_range_query_with_100_steps 3.561Mi ± 0% 3.179Mi ± 0% -10.72% (p=0.002 n=6) Query/sum(a_2000),_range_query_with_1000_steps 14.68Mi ± 0% 14.39Mi ± 0% -1.98% (p=0.002 n=6) Query/sum_by_(l)(h_1),_instant_query 27.61Ki ± 0% 23.24Ki ± 0% -15.85% (p=0.002 n=6) Query/sum_by_(l)(h_1),_range_query_with_100_steps 31.28Ki ± 0% 27.60Ki ± 0% -11.77% (p=0.002 n=6) Query/sum_by_(l)(h_1),_range_query_with_1000_steps 67.41Ki ± 0% 71.68Ki ± 0% +6.34% (p=0.002 n=6) Query/sum_by_(l)(h_100),_instant_query 906.3Ki ± 0% 748.2Ki ± 0% -17.44% (p=0.002 n=6) Query/sum_by_(l)(h_100),_range_query_with_100_steps 1.212Mi ± 0% 1.174Mi ± 0% -3.15% (p=0.002 n=6) Query/sum_by_(l)(h_100),_range_query_with_1000_steps 4.570Mi ± 1% 5.346Mi ± 1% +16.99% (p=0.002 n=6) Query/sum_by_(l)(h_2000),_instant_query 18.97Mi ± 0% 15.99Mi ± 1% -15.71% (p=0.002 n=6) Query/sum_by_(l)(h_2000),_range_query_with_100_steps 25.21Mi ± 0% 25.76Mi ± 1% +2.19% (p=0.002 n=6) Query/sum_by_(l)(h_2000),_range_query_with_1000_steps 91.63Mi ± 1% 124.01Mi ± 3% +35.34% (p=0.002 n=6) Query/sum_by_(le)(h_1),_instant_query 29.12Ki ± 0% 23.37Ki ± 0% -19.77% (p=0.002 n=6) Query/sum_by_(le)(h_1),_range_query_with_100_steps 32.55Ki ± 0% 33.19Ki ± 0% +1.98% (p=0.002 n=6) Query/sum_by_(le)(h_1),_range_query_with_1000_steps 68.69Ki ± 0% 117.21Ki ± 0% +70.64% (p=0.002 n=6) Query/sum_by_(le)(h_100),_instant_query 868.9Ki ± 0% 729.8Ki ± 0% -16.01% (p=0.002 n=6) Query/sum_by_(le)(h_100),_range_query_with_100_steps 1.181Mi ± 0% 1.052Mi ± 0% -10.87% (p=0.002 n=6) Query/sum_by_(le)(h_100),_range_query_with_1000_steps 4.542Mi ± 0% 4.491Mi ± 1% -1.11% (p=0.002 n=6) Query/sum_by_(le)(h_2000),_instant_query 18.12Mi ± 0% 15.45Mi ± 0% -14.73% (p=0.002 n=6) Query/sum_by_(le)(h_2000),_range_query_with_100_steps 24.50Mi ± 0% 22.18Mi ± 0% -9.46% (p=0.002 n=6) Query/sum_by_(le)(h_2000),_range_query_with_1000_steps 91.00Mi ± 0% 89.33Mi ± 1% -1.84% (p=0.002 n=6) Query/rate(a_1[1m])_+_rate(b_1[1m]),_instant_query 46.02Ki ± 0% 37.26Ki ± 0% -19.05% (p=0.002 n=6) Query/rate(a_1[1m])_+_rate(b_1[1m]),_range_query_with_100_steps 52.31Ki ± 0% 38.45Ki ± 0% -26.49% (p=0.002 n=6) Query/rate(a_1[1m])_+_rate(b_1[1m]),_range_query_with_1000_steps 106.31Ki ± 0% 49.80Ki ± 0% -53.15% (p=0.002 n=6) Query/rate(a_100[1m])_+_rate(b_100[1m]),_instant_query 415.2Ki ± 0% 267.0Ki ± 0% -35.69% (p=0.002 n=6) Query/rate(a_100[1m])_+_rate(b_100[1m]),_range_query_with_100_steps 629.6Ki ± 0% 387.8Ki ± 0% -38.42% (p=0.002 n=6) Query/rate(a_100[1m])_+_rate(b_100[1m]),_range_query_with_1000_steps 2.512Mi ± 0% 1.435Mi ± 0% -42.90% (p=0.002 n=6) Query/rate(a_2000[1m])_+_rate(b_2000[1m]),_instant_query 7.780Mi ± 0% 4.586Mi ± 0% -41.05% (p=0.002 n=6) Query/rate(a_2000[1m])_+_rate(b_2000[1m]),_range_query_with_100_steps 10.024Mi ± 0% 6.886Mi ± 1% -31.31% (p=0.002 n=6) Query/rate(a_2000[1m])_+_rate(b_2000[1m]),_range_query_with_1000_steps 31.72Mi ± 1% 27.82Mi ± 1% -12.27% (p=0.002 n=6) Query/sum(a_1_+_b_1),_instant_query 42.80Ki ± 0% 37.06Ki ± 0% -13.42% (p=0.002 n=6) Query/sum(a_1_+_b_1),_range_query_with_100_steps 49.12Ki ± 0% 39.33Ki ± 0% -19.93% (p=0.002 n=6) Query/sum(a_1_+_b_1),_range_query_with_1000_steps 103.73Ki ± 0% 59.51Ki ± 0% -42.63% (p=0.002 n=6) Query/sum(a_100_+_b_100),_instant_query 376.7Ki ± 0% 264.2Ki ± 0% -29.86% (p=0.002 n=6) Query/sum(a_100_+_b_100),_range_query_with_100_steps 595.1Ki ± 0% 384.4Ki ± 0% -35.40% (p=0.002 n=6) Query/sum(a_100_+_b_100),_range_query_with_1000_steps 2.549Mi ± 0% 1.514Mi ± 0% -40.61% (p=0.002 n=6) Query/sum(a_2000_+_b_2000),_instant_query 7.032Mi ± 0% 4.487Mi ± 0% -36.20% (p=0.002 n=6) Query/sum(a_2000_+_b_2000),_range_query_with_100_steps 9.335Mi ± 6% 6.728Mi ± 0% -27.94% (p=0.002 n=6) Query/sum(a_2000_+_b_2000),_range_query_with_1000_steps 32.44Mi ± 1% 29.36Mi ± 2% -9.48% (p=0.002 n=6) Query/sum_by_(le)(rate(h_1[1m])),_instant_query 32.88Ki ± 0% 23.80Ki ± 0% -27.61% (p=0.002 n=6) Query/sum_by_(le)(rate(h_1[1m])),_range_query_with_100_steps 36.00Ki ± 0% 33.58Ki ± 0% -6.71% (p=0.002 n=6) Query/sum_by_(le)(rate(h_1[1m])),_range_query_with_1000_steps 69.37Ki ± 0% 114.78Ki ± 0% +65.46% (p=0.002 n=6) Query/sum_by_(le)(rate(h_100[1m])),_instant_query 1007.7Ki ± 0% 750.0Ki ± 0% -25.57% (p=0.002 n=6) Query/sum_by_(le)(rate(h_100[1m])),_range_query_with_100_steps 1.315Mi ± 0% 1.073Mi ± 0% -18.42% (p=0.002 n=6) Query/sum_by_(le)(rate(h_100[1m])),_range_query_with_1000_steps 4.488Mi ± 0% 4.308Mi ± 0% -4.01% (p=0.002 n=6) Query/sum_by_(le)(rate(h_2000[1m])),_instant_query 20.78Mi ± 0% 15.81Mi ± 0% -23.90% (p=0.002 n=6) Query/sum_by_(le)(rate(h_2000[1m])),_range_query_with_100_steps 27.08Mi ± 1% 22.56Mi ± 0% -16.72% (p=0.002 n=6) Query/sum_by_(le)(rate(h_2000[1m])),_range_query_with_1000_steps 278.70Mi ± 0% 85.93Mi ± 0% -69.17% (p=0.002 n=6) geomean 1.147Mi 884.4Ki -24.72% │ standard │ streaming │ │ allocs/op │ allocs/op vs base │ Query/a_1,_instant_query 360.0 ± 0% 303.0 ± 0% -15.83% (p=0.002 n=6) Query/a_1,_range_query_with_100_steps 371.0 ± 0% 308.0 ± 0% -16.98% (p=0.002 n=6) Query/a_1,_range_query_with_1000_steps 401.0 ± 0% 338.0 ± 0% -15.71% (p=0.002 n=6) Query/a_100,_instant_query 1.975k ± 0% 1.910k ± 0% -3.29% (p=0.002 n=6) Query/a_100,_range_query_with_100_steps 2.486k ± 0% 2.417k ± 0% -2.78% (p=0.002 n=6) Query/a_100,_range_query_with_1000_steps 5.499k ± 0% 5.432k ± 0% -1.22% (p=0.002 n=6) Query/a_2000,_instant_query 32.79k ± 0% 32.70k ± 0% -0.27% (p=0.002 n=6) Query/a_2000,_range_query_with_100_steps 42.84k ± 0% 42.76k ± 0% -0.18% (p=0.002 n=6) Query/a_2000,_range_query_with_1000_steps 102.9k ± 0% 102.9k ± 0% -0.06% (p=0.002 n=6) Query/rate(a_1[1m]),_instant_query 403.0 ± 0% 313.0 ± 0% -22.33% (p=0.002 n=6) Query/rate(a_1[1m]),_range_query_with_100_steps 414.0 ± 0% 318.0 ± 0% -23.19% (p=0.002 n=6) Query/rate(a_1[1m]),_range_query_with_1000_steps 441.0 ± 0% 345.0 ± 0% -21.77% (p=0.002 n=6) Query/rate(a_100[1m]),_instant_query 2.422k ± 0% 2.019k ± 0% -16.64% (p=0.002 n=6) Query/rate(a_100[1m]),_range_query_with_100_steps 2.934k ± 0% 2.527k ± 0% -13.87% (p=0.002 n=6) Query/rate(a_100[1m]),_range_query_with_1000_steps 5.649k ± 0% 5.242k ± 0% -7.21% (p=0.002 n=6) Query/rate(a_2000[1m]),_instant_query 40.85k ± 0% 34.71k ± 0% -15.02% (p=0.002 n=6) Query/rate(a_2000[1m]),_range_query_with_100_steps 50.90k ± 0% 44.78k ± 0% -12.02% (p=0.002 n=6) Query/rate(a_2000[1m]),_range_query_with_1000_steps 105.00k ± 0% 98.90k ± 0% -5.81% (p=0.002 n=6) Query/rate(a_1[1m]),_range_query_with_10000_steps 670.0 ± 0% 574.0 ± 0% -14.33% (p=0.002 n=6) Query/rate(a_100[1m]),_range_query_with_10000_steps 28.63k ± 0% 28.23k ± 0% -1.39% (p=0.002 n=6) Query/rate(a_2000[1m]),_range_query_with_10000_steps 576.2k ± 0% 564.0k ± 0% -2.13% (p=0.002 n=6) Query/rate(a_1[1d]),_instant_query 646.5 ± 0% 542.0 ± 0% -16.16% (p=0.002 n=6) Query/rate(a_1[1d]),_range_query_with_100_steps 655.5 ± 0% 545.0 ± 0% -16.86% (p=0.002 n=6) Query/rate(a_1[1d]),_range_query_with_1000_steps 676.0 ± 0% 567.0 ± 0% -16.12% (p=0.002 n=6) Query/rate(a_100[1d]),_instant_query 25.37k ± 0% 24.94k ± 0% -1.71% (p=0.002 n=6) Query/rate(a_100[1d]),_range_query_with_100_steps 25.67k ± 0% 25.26k ± 0% -1.59% (p=0.002 n=6) Query/rate(a_100[1d]),_range_query_with_1000_steps 27.78k ± 0% 27.40k ± 0% -1.36% (p=0.002 n=6) Query/rate(a_2000[1d]),_instant_query 501.6k ± 0% 491.8k ± 0% -1.95% (p=0.002 n=6) Query/rate(a_2000[1d]),_range_query_with_100_steps 509.9k ± 0% 503.8k ± 0% -1.20% (p=0.002 n=6) Query/rate(a_2000[1d]),_range_query_with_1000_steps 552.1k ± 0% 546.1k ± 0% -1.08% (p=0.002 n=6) Query/a_1_-_b_1,_instant_query 720.0 ± 0% 626.0 ± 0% -13.06% (p=0.002 n=6) Query/a_1_-_b_1,_range_query_with_100_steps 934.0 ± 0% 636.0 ± 0% -31.91% (p=0.002 n=6) Query/a_1_-_b_1,_range_query_with_1000_steps 2795.0 ± 0% 697.0 ± 0% -75.06% (p=0.002 n=6) Query/a_100_-_b_100,_instant_query 4.579k ± 0% 4.246k ± 0% -7.27% (p=0.002 n=6) Query/a_100_-_b_100,_range_query_with_100_steps 5.800k ± 0% 5.260k ± 0% -9.31% (p=0.002 n=6) Query/a_100_-_b_100,_range_query_with_1000_steps 15.51k ± 0% 11.29k ± 0% -27.20% (p=0.002 n=6) Query/a_2000_-_b_2000,_instant_query 77.69k ± 0% 73.50k ± 0% -5.40% (p=0.002 n=6) Query/a_2000_-_b_2000,_range_query_with_100_steps 93.95k ± 0% 93.56k ± 0% -0.42% (p=0.002 n=6) Query/a_2000_-_b_2000,_range_query_with_1000_steps 216.1k ± 0% 213.8k ± 0% -1.04% (p=0.002 n=6) Query/a_1_-_b_1,_range_query_with_10000_steps 21.256k ± 0% 1.154k ± 0% -94.57% (p=0.002 n=6) Query/a_100_-_b_100,_range_query_with_10000_steps 97.56k ± 0% 56.64k ± 0% -41.95% (p=0.002 n=6) Query/a_2000_-_b_2000,_range_query_with_10000_steps 1.172M ± 0% 1.126M ± 0% -3.95% (p=0.002 n=6) Query/a_100{l=~"[13579]."}_-_b_100,_instant_query 3.577k ± 0% 3.302k ± 0% -7.69% (p=0.002 n=6) Query/a_100{l=~"[13579]."}_-_b_100,_range_query_with_100_steps 4.565k ± 0% 4.061k ± 0% -11.04% (p=0.002 n=6) Query/a_100{l=~"[13579]."}_-_b_100,_range_query_with_1000_steps 12.508k ± 0% 8.592k ± 0% -31.31% (p=0.002 n=6) Query/a_2000{l=~"1..."}_-_b_2000,_instant_query 57.48k ± 0% 45.14k ± 0% -21.46% (p=0.002 n=6) Query/a_2000{l=~"1..."}_-_b_2000,_range_query_with_100_steps 69.74k ± 0% 56.46k ± 0% -19.04% (p=0.002 n=6) Query/a_2000{l=~"1..."}_-_b_2000,_range_query_with_1000_steps 161.7k ± 0% 122.1k ± 0% -24.50% (p=0.002 n=6) Query/sum(a_1),_instant_query 384.0 ± 0% 312.0 ± 0% -18.75% (p=0.002 n=6) Query/sum(a_1),_range_query_with_100_steps 395.0 ± 0% 321.0 ± 0% -18.73% (p=0.002 n=6) Query/sum(a_1),_range_query_with_1000_steps 425.0 ± 0% 351.0 ± 0% -17.41% (p=0.002 n=6) Query/sum(a_100),_instant_query 1.999k ± 0% 1.920k ± 0% -3.95% (p=0.002 n=6) Query/sum(a_100),_range_query_with_100_steps 2.511k ± 0% 2.429k ± 0% -3.25% (p=0.002 n=6) Query/sum(a_100),_range_query_with_1000_steps 5.524k ± 0% 5.444k ± 0% -1.46% (p=0.002 n=6) Query/sum(a_2000),_instant_query 32.81k ± 0% 32.71k ± 0% -0.30% (p=0.002 n=6) Query/sum(a_2000),_range_query_with_100_steps 42.86k ± 0% 42.76k ± 0% -0.23% (p=0.002 n=6) Query/sum(a_2000),_range_query_with_1000_steps 103.0k ± 0% 102.9k ± 0% -0.06% (p=0.002 n=6) Query/sum_by_(l)(h_1),_instant_query 482.0 ± 0% 404.0 ± 0% -16.18% (p=0.002 n=6) Query/sum_by_(l)(h_1),_range_query_with_100_steps 518.0 ± 0% 438.0 ± 0% -15.44% (p=0.002 n=6) Query/sum_by_(l)(h_1),_range_query_with_1000_steps 698.0 ± 0% 619.0 ± 0% -11.32% (p=0.002 n=6) Query/sum_by_(l)(h_100),_instant_query 10.86k ± 0% 10.76k ± 0% -0.94% (p=0.002 n=6) Query/sum_by_(l)(h_100),_range_query_with_100_steps 13.87k ± 0% 14.19k ± 0% +2.29% (p=0.002 n=6) Query/sum_by_(l)(h_100),_range_query_with_1000_steps 31.93k ± 0% 32.27k ± 0% +1.07% (p=0.002 n=6) Query/sum_by_(l)(h_2000),_instant_query 223.0k ± 1% 210.1k ± 0% -5.78% (p=0.002 n=6) Query/sum_by_(l)(h_2000),_range_query_with_100_steps 269.2k ± 0% 279.3k ± 0% +3.74% (p=0.002 n=6) Query/sum_by_(l)(h_2000),_range_query_with_1000_steps 629.6k ± 0% 643.9k ± 0% +2.27% (p=0.002 n=6) Query/sum_by_(le)(h_1),_instant_query 490.0 ± 0% 409.0 ± 0% -16.53% (p=0.002 n=6) Query/sum_by_(le)(h_1),_range_query_with_100_steps 526.0 ± 0% 463.5 ± 0% -11.88% (p=0.002 n=6) Query/sum_by_(le)(h_1),_range_query_with_1000_steps 706.0 ± 0% 644.0 ± 0% -8.78% (p=0.002 n=6) Query/sum_by_(le)(h_100),_instant_query 10.74k ± 0% 10.65k ± 0% -0.87% (p=0.002 n=6) Query/sum_by_(le)(h_100),_range_query_with_100_steps 13.76k ± 0% 13.68k ± 0% -0.54% (p=0.002 n=6) Query/sum_by_(le)(h_100),_range_query_with_1000_steps 31.81k ± 0% 31.75k ± 0% -0.19% (p=0.002 n=6) Query/sum_by_(le)(h_2000),_instant_query 218.8k ± 1% 206.9k ± 0% -5.48% (p=0.002 n=6) Query/sum_by_(le)(h_2000),_range_query_with_100_steps 267.0k ± 0% 267.2k ± 0% ~ (p=0.240 n=6) Query/sum_by_(le)(h_2000),_range_query_with_1000_steps 627.4k ± 0% 627.7k ± 0% +0.04% (p=0.002 n=6) Query/rate(a_1[1m])_+_rate(b_1[1m]),_instant_query 798.0 ± 0% 645.0 ± 0% -19.17% (p=0.002 n=6) Query/rate(a_1[1m])_+_rate(b_1[1m]),_range_query_with_100_steps 1012.0 ± 0% 656.0 ± 0% -35.18% (p=0.002 n=6) Query/rate(a_1[1m])_+_rate(b_1[1m]),_range_query_with_1000_steps 2867.0 ± 0% 710.0 ± 0% -75.24% (p=0.002 n=6) Query/rate(a_100[1m])_+_rate(b_100[1m]),_instant_query 5.468k ± 0% 4.465k ± 0% -18.35% (p=0.002 n=6) Query/rate(a_100[1m])_+_rate(b_100[1m]),_range_query_with_100_steps 6.690k ± 0% 5.480k ± 0% -18.09% (p=0.002 n=6) Query/rate(a_100[1m])_+_rate(b_100[1m]),_range_query_with_1000_steps 15.80k ± 0% 10.92k ± 0% -30.90% (p=0.002 n=6) Query/rate(a_2000[1m])_+_rate(b_2000[1m]),_instant_query 93.80k ± 0% 77.52k ± 0% -17.35% (p=0.002 n=6) Query/rate(a_2000[1m])_+_rate(b_2000[1m]),_range_query_with_100_steps 110.06k ± 0% 97.59k ± 0% -11.33% (p=0.002 n=6) Query/rate(a_2000[1m])_+_rate(b_2000[1m]),_range_query_with_1000_steps 220.1k ± 0% 205.8k ± 0% -6.52% (p=0.002 n=6) Query/sum(a_1_+_b_1),_instant_query 744.0 ± 0% 635.0 ± 0% -14.65% (p=0.002 n=6) Query/sum(a_1_+_b_1),_range_query_with_100_steps 958.0 ± 0% 650.0 ± 0% -32.15% (p=0.002 n=6) Query/sum(a_1_+_b_1),_range_query_with_1000_steps 2819.0 ± 0% 710.0 ± 0% -74.81% (p=0.002 n=6) Query/sum(a_100_+_b_100),_instant_query 4.603k ± 0% 4.256k ± 0% -7.54% (p=0.002 n=6) Query/sum(a_100_+_b_100),_range_query_with_100_steps 5.824k ± 0% 5.271k ± 0% -9.49% (p=0.002 n=6) Query/sum(a_100_+_b_100),_range_query_with_1000_steps 15.53k ± 0% 11.30k ± 0% -27.22% (p=0.002 n=6) Query/sum(a_2000_+_b_2000),_instant_query 77.72k ± 0% 73.51k ± 0% -5.42% (p=0.002 n=6) Query/sum(a_2000_+_b_2000),_range_query_with_100_steps 93.99k ± 1% 93.57k ± 0% -0.45% (p=0.002 n=6) Query/sum(a_2000_+_b_2000),_range_query_with_1000_steps 216.1k ± 0% 214.0k ± 0% -0.99% (p=0.002 n=6) Query/sum_by_(le)(rate(h_1[1m])),_instant_query 554.0 ± 0% 418.0 ± 0% -24.55% (p=0.002 n=6) Query/sum_by_(le)(rate(h_1[1m])),_range_query_with_100_steps 582.0 ± 0% 473.0 ± 0% -18.73% (p=0.002 n=6) Query/sum_by_(le)(rate(h_1[1m])),_range_query_with_1000_steps 745.0 ± 0% 635.0 ± 0% -14.77% (p=0.002 n=6) Query/sum_by_(le)(rate(h_100[1m])),_instant_query 13.20k ± 0% 11.26k ± 0% -14.68% (p=0.002 n=6) Query/sum_by_(le)(rate(h_100[1m])),_range_query_with_100_steps 16.20k ± 0% 14.30k ± 0% -11.77% (p=0.002 n=6) Query/sum_by_(le)(rate(h_100[1m])),_range_query_with_1000_steps 32.46k ± 0% 30.56k ± 0% -5.86% (p=0.002 n=6) Query/sum_by_(le)(rate(h_2000[1m])),_instant_query 276.4k ± 1% 218.9k ± 0% -20.81% (p=0.002 n=6) Query/sum_by_(le)(rate(h_2000[1m])),_range_query_with_100_steps 315.3k ± 0% 279.2k ± 0% -11.45% (p=0.002 n=6) Query/sum_by_(le)(rate(h_2000[1m])),_range_query_with_1000_steps 665.7k ± 0% 603.7k ± 0% -9.31% (p=0.002 n=6) geomean 11.10k 9.219k -16.97% │ standard │ streaming │ │ B │ B vs base │ Query/a_1,_instant_query 72.84Mi ± 1% 72.25Mi ± 1% -0.82% (p=0.015 n=6) Query/a_1,_range_query_with_100_steps 72.70Mi ± 1% 71.51Mi ± 1% -1.64% (p=0.004 n=6) Query/a_1,_range_query_with_1000_steps 72.05Mi ± 2% 70.04Mi ± 1% -2.80% (p=0.002 n=6) Query/a_100,_instant_query 66.33Mi ± 2% 65.80Mi ± 1% ~ (p=0.368 n=6) Query/a_100,_range_query_with_100_steps 67.19Mi ± 1% 65.77Mi ± 1% -2.12% (p=0.004 n=6) Query/a_100,_range_query_with_1000_steps 68.54Mi ± 1% 68.41Mi ± 1% ~ (p=0.394 n=6) Query/a_2000,_instant_query 68.42Mi ± 3% 68.45Mi ± 0% ~ (p=1.000 n=6) Query/a_2000,_range_query_with_100_steps 74.73Mi ± 1% 76.11Mi ± 1% +1.84% (p=0.002 n=6) Query/a_2000,_range_query_with_1000_steps 134.5Mi ± 1% 136.3Mi ± 9% ~ (p=0.509 n=6) Query/rate(a_1[1m]),_instant_query 71.85Mi ± 1% 71.84Mi ± 1% ~ (p=0.818 n=6) Query/rate(a_1[1m]),_range_query_with_100_steps 72.88Mi ± 2% 71.96Mi ± 1% ~ (p=0.065 n=6) Query/rate(a_1[1m]),_range_query_with_1000_steps 70.88Mi ± 2% 69.79Mi ± 1% ~ (p=0.067 n=6) Query/rate(a_100[1m]),_instant_query 66.68Mi ± 1% 65.83Mi ± 0% -1.28% (p=0.002 n=6) Query/rate(a_100[1m]),_range_query_with_100_steps 66.08Mi ± 1% 65.91Mi ± 2% ~ (p=0.699 n=6) Query/rate(a_100[1m]),_range_query_with_1000_steps 67.52Mi ± 1% 68.53Mi ± 1% +1.49% (p=0.002 n=6) Query/rate(a_2000[1m]),_instant_query 69.01Mi ± 1% 68.42Mi ± 2% -0.85% (p=0.015 n=6) Query/rate(a_2000[1m]),_range_query_with_100_steps 73.89Mi ± 2% 75.41Mi ± 3% +2.06% (p=0.006 n=6) Query/rate(a_2000[1m]),_range_query_with_1000_steps 131.7Mi ± 1% 132.8Mi ± 2% ~ (p=0.292 n=6) Query/rate(a_1[1m]),_range_query_with_10000_steps 67.65Mi ± 1% 68.72Mi ± 1% +1.58% (p=0.004 n=6) Query/rate(a_100[1m]),_range_query_with_10000_steps 103.6Mi ± 1% 112.3Mi ± 4% +8.41% (p=0.002 n=6) Query/rate(a_2000[1m]),_range_query_with_10000_steps 543.8Mi ± 5% 421.5Mi ± 3% -22.49% (p=0.002 n=6) Query/rate(a_1[1d]),_instant_query 67.98Mi ± 1% 73.12Mi ± 2% +7.56% (p=0.002 n=6) Query/rate(a_1[1d]),_range_query_with_100_steps 67.20Mi ± 0% 71.86Mi ± 3% +6.94% (p=0.002 n=6) Query/rate(a_1[1d]),_range_query_with_1000_steps 65.39Mi ± 1% 64.44Mi ± 2% ~ (p=0.258 n=6) Query/rate(a_100[1d]),_instant_query 70.02Mi ± 2% 70.53Mi ± 4% ~ (p=0.329 n=6) Query/rate(a_100[1d]),_range_query_with_100_steps 68.98Mi ± 4% 68.83Mi ± 3% ~ (p=0.589 n=6) Query/rate(a_100[1d]),_range_query_with_1000_steps 63.19Mi ± 2% 65.43Mi ± 5% ~ (p=0.240 n=6) Query/rate(a_2000[1d]),_instant_query 78.38Mi ± 1% 76.97Mi ± 1% -1.80% (p=0.004 n=6) Query/rate(a_2000[1d]),_range_query_with_100_steps 78.30Mi ± 4% 78.70Mi ± 2% ~ (p=0.331 n=6) Query/rate(a_2000[1d]),_range_query_with_1000_steps 111.8Mi ± 4% 110.5Mi ± 4% ~ (p=0.937 n=6) Query/a_1_-_b_1,_instant_query 72.75Mi ± 1% 72.88Mi ± 1% ~ (p=0.290 n=6) Query/a_1_-_b_1,_range_query_with_100_steps 71.61Mi ± 1% 71.97Mi ± 1% ~ (p=0.420 n=6) Query/a_1_-_b_1,_range_query_with_1000_steps 68.48Mi ± 1% 70.09Mi ± 1% +2.35% (p=0.002 n=6) Query/a_100_-_b_100,_instant_query 66.76Mi ± 1% 65.92Mi ± 1% -1.25% (p=0.011 n=6) Query/a_100_-_b_100,_range_query_with_100_steps 67.09Mi ± 1% 66.21Mi ± 1% -1.30% (p=0.002 n=6) Query/a_100_-_b_100,_range_query_with_1000_steps 73.14Mi ± 3% 68.69Mi ± 1% -6.09% (p=0.002 n=6) Query/a_2000_-_b_2000,_instant_query 69.03Mi ± 1% 68.98Mi ± 1% ~ (p=0.699 n=6) Query/a_2000_-_b_2000,_range_query_with_100_steps 89.50Mi ± 3% 78.59Mi ± 2% -12.19% (p=0.002 n=6) Query/a_2000_-_b_2000,_range_query_with_1000_steps 213.2Mi ± 0% 139.4Mi ± 1% -34.62% (p=0.002 n=6) Query/a_1_-_b_1,_range_query_with_10000_steps 68.86Mi ± 1% 70.09Mi ± 1% +1.78% (p=0.004 n=6) Query/a_100_-_b_100,_range_query_with_10000_steps 158.6Mi ± 5% 113.7Mi ± 3% -28.34% (p=0.002 n=6) Query/a_2000_-_b_2000,_range_query_with_10000_steps 1600.8Mi ± 11% 458.4Mi ± 1% -71.36% (p=0.002 n=6) Query/a_100{l=~"[13579]."}_-_b_100,_instant_query 67.02Mi ± 1% 66.34Mi ± 1% ~ (p=0.065 n=6) Query/a_100{l=~"[13579]."}_-_b_100,_range_query_with_100_steps 66.80Mi ± 1% 67.16Mi ± 1% ~ (p=0.065 n=6) Query/a_100{l=~"[13579]."}_-_b_100,_range_query_with_1000_steps 70.56Mi ± 1% 71.12Mi ± 1% ~ (p=0.193 n=6) Query/a_2000{l=~"1..."}_-_b_2000,_instant_query 68.22Mi ± 3% 141.39Mi ± 3% +107.26% (p=0.002 n=6) Query/a_2000{l=~"1..."}_-_b_2000,_range_query_with_100_steps 81.85Mi ± 1% 113.49Mi ± 37% +38.66% (p=0.002 n=6) Query/a_2000{l=~"1..."}_-_b_2000,_range_query_with_1000_steps 189.6Mi ± 6% 177.4Mi ± 6% -6.46% (p=0.041 n=6) Query/sum(a_1),_instant_query 72.89Mi ± 1% 72.74Mi ± 1% ~ (p=0.331 n=6) Query/sum(a_1),_range_query_with_100_steps 72.87Mi ± 1% 73.45Mi ± 1% ~ (p=0.126 n=6) Query/sum(a_1),_range_query_with_1000_steps 71.77Mi ± 2% 79.17Mi ± 2% +10.31% (p=0.002 n=6) Query/sum(a_100),_instant_query 66.07Mi ± 1% 65.70Mi ± 1% ~ (p=0.132 n=6) Query/sum(a_100),_range_query_with_100_steps 66.69Mi ± 1% 65.60Mi ± 1% -1.63% (p=0.002 n=6) Query/sum(a_100),_range_query_with_1000_steps 68.49Mi ± 1% 65.65Mi ± 1% -4.15% (p=0.002 n=6) Query/sum(a_2000),_instant_query 68.56Mi ± 1% 67.64Mi ± 1% -1.34% (p=0.009 n=6) Query/sum(a_2000),_range_query_with_100_steps 75.28Mi ± 2% 66.77Mi ± 1% -11.31% (p=0.002 n=6) Query/sum(a_2000),_range_query_with_1000_steps 133.92Mi ± 1% 70.73Mi ± 2% -47.19% (p=0.002 n=6) Query/sum_by_(l)(h_1),_instant_query 70.35Mi ± 2% 69.88Mi ± 1% ~ (p=0.061 n=6) Query/sum_by_(l)(h_1),_range_query_with_100_steps 70.41Mi ± 2% 69.94Mi ± 2% ~ (p=0.485 n=6) Query/sum_by_(l)(h_1),_range_query_with_1000_steps 67.91Mi ± 2% 69.67Mi ± 1% +2.59% (p=0.002 n=6) Query/sum_by_(l)(h_100),_instant_query 66.62Mi ± 3% 66.23Mi ± 1% -0.57% (p=0.041 n=6) Query/sum_by_(l)(h_100),_range_query_with_100_steps 69.16Mi ± 1% 68.66Mi ± 1% ~ (p=0.132 n=6) Query/sum_by_(l)(h_100),_range_query_with_1000_steps 88.68Mi ± 2% 76.27Mi ± 1% -13.99% (p=0.002 n=6) Query/sum_by_(l)(h_2000),_instant_query 72.65Mi ± 2% 70.82Mi ± 2% -2.52% (p=0.009 n=6) Query/sum_by_(l)(h_2000),_range_query_with_100_steps 119.36Mi ± 1% 82.17Mi ± 2% -31.16% (p=0.002 n=6) Query/sum_by_(l)(h_2000),_range_query_with_1000_steps 464.1Mi ± 0% 150.4Mi ± 3% -67.60% (p=0.002 n=6) Query/sum_by_(le)(h_1),_instant_query 70.67Mi ± 1% 70.34Mi ± 1% ~ (p=0.071 n=6) Query/sum_by_(le)(h_1),_range_query_with_100_steps 70.23Mi ± 1% 74.91Mi ± 2% +6.66% (p=0.002 n=6) Query/sum_by_(le)(h_1),_range_query_with_1000_steps 68.41Mi ± 1% 86.61Mi ± 1% +26.61% (p=0.002 n=6) Query/sum_by_(le)(h_100),_instant_query 67.10Mi ± 1% 66.30Mi ± 1% -1.20% (p=0.030 n=6) Query/sum_by_(le)(h_100),_range_query_with_100_steps 69.29Mi ± 1% 66.02Mi ± 1% -4.72% (p=0.002 n=6) Query/sum_by_(le)(h_100),_range_query_with_1000_steps 87.10Mi ± 2% 69.05Mi ± 1% -20.73% (p=0.002 n=6) Query/sum_by_(le)(h_2000),_instant_query 72.02Mi ± 2% 71.38Mi ± 3% ~ (p=0.093 n=6) Query/sum_by_(le)(h_2000),_range_query_with_100_steps 113.55Mi ± 1% 69.91Mi ± 1% -38.43% (p=0.002 n=6) Query/sum_by_(le)(h_2000),_range_query_with_1000_steps 431.33Mi ± 0% 73.33Mi ± 1% -83.00% (p=0.002 n=6) Query/rate(a_1[1m])_+_rate(b_1[1m]),_instant_query 72.02Mi ± 1% 72.16Mi ± 1% ~ (p=0.589 n=6) Query/rate(a_1[1m])_+_rate(b_1[1m]),_range_query_with_100_steps 71.26Mi ± 1% 71.87Mi ± 1% +0.86% (p=0.015 n=6) Query/rate(a_1[1m])_+_rate(b_1[1m]),_range_query_with_1000_steps 67.88Mi ± 1% 70.08Mi ± 1% +3.23% (p=0.002 n=6) Query/rate(a_100[1m])_+_rate(b_100[1m]),_instant_query 66.96Mi ± 1% 65.84Mi ± 1% -1.68% (p=0.002 n=6) Query/rate(a_100[1m])_+_rate(b_100[1m]),_range_query_with_100_steps 67.11Mi ± 1% 66.54Mi ± 1% ~ (p=0.093 n=6) Query/rate(a_100[1m])_+_rate(b_100[1m]),_range_query_with_1000_steps 72.39Mi ± 3% 67.70Mi ± 2% -6.49% (p=0.002 n=6) Query/rate(a_2000[1m])_+_rate(b_2000[1m]),_instant_query 69.16Mi ± 2% 69.09Mi ± 2% ~ (p=0.563 n=6) Query/rate(a_2000[1m])_+_rate(b_2000[1m]),_range_query_with_100_steps 86.86Mi ± 4% 78.24Mi ± 2% -9.92% (p=0.002 n=6) Query/rate(a_2000[1m])_+_rate(b_2000[1m]),_range_query_with_1000_steps 211.6Mi ± 0% 140.8Mi ± 3% -33.47% (p=0.002 n=6) Query/sum(a_1_+_b_1),_instant_query 72.75Mi ± 1% 72.20Mi ± 1% ~ (p=0.065 n=6) Query/sum(a_1_+_b_1),_range_query_with_100_steps 71.85Mi ± 1% 72.59Mi ± 1% +1.03% (p=0.002 n=6) Query/sum(a_1_+_b_1),_range_query_with_1000_steps 68.41Mi ± 1% 75.12Mi ± 1% +9.80% (p=0.002 n=6) Query/sum(a_100_+_b_100),_instant_query 66.73Mi ± 1% 66.24Mi ± 1% ~ (p=0.065 n=6) Query/sum(a_100_+_b_100),_range_query_with_100_steps 66.50Mi ± 1% 65.92Mi ± 1% -0.87% (p=0.041 n=6) Query/sum(a_100_+_b_100),_range_query_with_1000_steps 73.99Mi ± 1% 65.73Mi ± 2% -11.16% (p=0.002 n=6) Query/sum(a_2000_+_b_2000),_instant_query 69.02Mi ± 1% 68.65Mi ± 3% ~ (p=0.589 n=6) Query/sum(a_2000_+_b_2000),_range_query_with_100_steps 89.06Mi ± 2% 68.27Mi ± 2% -23.35% (p=0.002 n=6) Query/sum(a_2000_+_b_2000),_range_query_with_1000_steps 213.24Mi ± 0% 77.19Mi ± 1% -63.80% (p=0.002 n=6) Query/sum_by_(le)(rate(h_1[1m])),_instant_query 70.17Mi ± 1% 69.88Mi ± 1% ~ (p=0.699 n=6) Query/sum_by_(le)(rate(h_1[1m])),_range_query_with_100_steps 69.22Mi ± 2% 74.95Mi ± 1% +8.27% (p=0.002 n=6) Query/sum_by_(le)(rate(h_1[1m])),_range_query_with_1000_steps 67.19Mi ± 1% 87.00Mi ± 2% +29.49% (p=0.002 n=6) Query/sum_by_(le)(rate(h_100[1m])),_instant_query 66.81Mi ± 1% 66.08Mi ± 1% -1.10% (p=0.002 n=6) Query/sum_by_(le)(rate(h_100[1m])),_range_query_with_100_steps 68.78Mi ± 2% 66.34Mi ± 1% -3.56% (p=0.002 n=6) Query/sum_by_(le)(rate(h_100[1m])),_range_query_with_1000_steps 84.90Mi ± 3% 68.48Mi ± 2% -19.33% (p=0.002 n=6) Query/sum_by_(le)(rate(h_2000[1m])),_instant_query 70.59Mi ± 3% 70.88Mi ± 2% ~ (p=0.589 n=6) Query/sum_by_(le)(rate(h_2000[1m])),_range_query_with_100_steps 113.20Mi ± 2% 69.80Mi ± 1% -38.33% (p=0.002 n=6) Query/sum_by_(le)(rate(h_2000[1m])),_range_query_with_1000_steps 270.64Mi ± 3% 71.90Mi ± 2% -73.43% (p=0.002 n=6) geomean 85.81Mi 78.13Mi -8.95% --- pkg/streamingpromql/operator/pool.go | 18 +++--- .../operator/ring_buffer_test.go | 59 ++++++------------- 2 files changed, 29 insertions(+), 48 deletions(-) diff --git a/pkg/streamingpromql/operator/pool.go b/pkg/streamingpromql/operator/pool.go index 76c65cf9045..7cdf95f7ef3 100644 --- a/pkg/streamingpromql/operator/pool.go +++ b/pkg/streamingpromql/operator/pool.go @@ -9,33 +9,35 @@ import ( ) const ( - maxExpectedPointsPerSeries = 100_000 // There's not too much science behind this number: 100000 points allows for a point per minute for just under 70 days. + maxExpectedPointsPerSeries = 100_000 // There's not too much science behind this number: 100000 points allows for a point per minute for just under 70 days. + pointsPerSeriesBucketFactor = 2.0 - maxExpectedSeriesPerResult = 10_000_000 // Likewise, there's not too much science behind this number: this is the based on examining the largest queries seen at Grafana Labs. + maxExpectedSeriesPerResult = 10_000_000 // Likewise, there's not too much science behind this number: this is the based on examining the largest queries seen at Grafana Labs. + seriesPerResultBucketFactor = 2.0 ) var ( - fPointSlicePool = pool.NewBucketedPool(1, maxExpectedPointsPerSeries, 10, func(size int) []promql.FPoint { + fPointSlicePool = pool.NewBucketedPool(1, maxExpectedPointsPerSeries, pointsPerSeriesBucketFactor, func(size int) []promql.FPoint { return make([]promql.FPoint, 0, size) }) - matrixPool = pool.NewBucketedPool(1, maxExpectedSeriesPerResult, 10, func(size int) promql.Matrix { + matrixPool = pool.NewBucketedPool(1, maxExpectedSeriesPerResult, seriesPerResultBucketFactor, func(size int) promql.Matrix { return make(promql.Matrix, 0, size) }) - vectorPool = pool.NewBucketedPool(1, maxExpectedPointsPerSeries, 10, func(size int) promql.Vector { + vectorPool = pool.NewBucketedPool(1, maxExpectedPointsPerSeries, pointsPerSeriesBucketFactor, func(size int) promql.Vector { return make(promql.Vector, 0, size) }) - seriesMetadataSlicePool = pool.NewBucketedPool(1, maxExpectedSeriesPerResult, 10, func(size int) []SeriesMetadata { + seriesMetadataSlicePool = pool.NewBucketedPool(1, maxExpectedSeriesPerResult, seriesPerResultBucketFactor, func(size int) []SeriesMetadata { return make([]SeriesMetadata, 0, size) }) - floatSlicePool = pool.NewBucketedPool(1, maxExpectedPointsPerSeries, 10, func(_ int) []float64 { + floatSlicePool = pool.NewBucketedPool(1, maxExpectedPointsPerSeries, pointsPerSeriesBucketFactor, func(_ int) []float64 { // Don't allocate a new slice now - we'll allocate one in GetFloatSlice if we need it, so we can differentiate between reused and new slices. return nil }) - boolSlicePool = pool.NewBucketedPool(1, maxExpectedPointsPerSeries, 10, func(_ int) []bool { + boolSlicePool = pool.NewBucketedPool(1, maxExpectedPointsPerSeries, pointsPerSeriesBucketFactor, func(_ int) []bool { // Don't allocate a new slice now - we'll allocate one in GetBoolSlice if we need it, so we can differentiate between reused and new slices. return nil }) diff --git a/pkg/streamingpromql/operator/ring_buffer_test.go b/pkg/streamingpromql/operator/ring_buffer_test.go index 361db494299..abd136f78ce 100644 --- a/pkg/streamingpromql/operator/ring_buffer_test.go +++ b/pkg/streamingpromql/operator/ring_buffer_test.go @@ -38,19 +38,13 @@ func TestRingBuffer(t *testing.T) { buf.Append(promql.FPoint{T: 5, F: 500}) shouldHavePoints(t, buf, promql.FPoint{T: 4, F: 400}, promql.FPoint{T: 5, F: 500}) - // Trigger expansion of buffer (we resize in powers of two, but the underlying slice comes from a pool that uses a factor of 10). + // Trigger expansion of buffer (we resize in powers of two, and the underlying slice comes from a pool that uses a factor of 2 as well). // Ideally we wouldn't reach into the internals here, but this helps ensure the test is testing the correct scenario. - require.Len(t, buf.points, 10, "expected underlying slice to have length 10, if this assertion fails, the test setup is not as expected") + require.Len(t, buf.points, 2, "expected underlying slice to have length 2, if this assertion fails, the test setup is not as expected") + require.Equal(t, 2, cap(buf.points), "expected underlying slice to have capacity 2, if this assertion fails, the test setup is not as expected") buf.Append(promql.FPoint{T: 6, F: 600}) buf.Append(promql.FPoint{T: 7, F: 700}) - buf.Append(promql.FPoint{T: 8, F: 800}) - buf.Append(promql.FPoint{T: 9, F: 900}) - buf.Append(promql.FPoint{T: 10, F: 1000}) - buf.Append(promql.FPoint{T: 11, F: 1100}) - buf.Append(promql.FPoint{T: 12, F: 1200}) - buf.Append(promql.FPoint{T: 13, F: 1300}) - buf.Append(promql.FPoint{T: 14, F: 1400}) - require.Greater(t, len(buf.points), 10, "expected underlying slice to be expanded, if this assertion fails, the test setup is not as expected") + require.Greater(t, cap(buf.points), 2, "expected underlying slice to be expanded, if this assertion fails, the test setup is not as expected") shouldHavePoints(t, buf, @@ -58,13 +52,6 @@ func TestRingBuffer(t *testing.T) { promql.FPoint{T: 5, F: 500}, promql.FPoint{T: 6, F: 600}, promql.FPoint{T: 7, F: 700}, - promql.FPoint{T: 8, F: 800}, - promql.FPoint{T: 9, F: 900}, - promql.FPoint{T: 10, F: 1000}, - promql.FPoint{T: 11, F: 1100}, - promql.FPoint{T: 12, F: 1200}, - promql.FPoint{T: 13, F: 1300}, - promql.FPoint{T: 14, F: 1400}, ) buf.Reset() @@ -76,48 +63,40 @@ func TestRingBuffer(t *testing.T) { func TestRingBuffer_DiscardPointsBefore_ThroughWrapAround(t *testing.T) { // Set up the buffer so that the first point is part-way through the underlying slice. - // We resize in powers of two, but the underlying slice comes from a pool that uses a factor of 10. + // We resize in powers of two, and the underlying slice comes from a pool that uses a factor of 2 as well. buf := &RingBuffer{} buf.Append(promql.FPoint{T: 1, F: 100}) buf.Append(promql.FPoint{T: 2, F: 200}) buf.Append(promql.FPoint{T: 3, F: 300}) buf.Append(promql.FPoint{T: 4, F: 400}) - buf.Append(promql.FPoint{T: 5, F: 500}) - buf.Append(promql.FPoint{T: 6, F: 600}) - buf.Append(promql.FPoint{T: 7, F: 700}) - buf.Append(promql.FPoint{T: 8, F: 800}) - buf.Append(promql.FPoint{T: 9, F: 900}) - buf.Append(promql.FPoint{T: 10, F: 1000}) // Ideally we wouldn't reach into the internals here, but this helps ensure the test is testing the correct scenario. - require.Len(t, buf.points, 10, "expected underlying slice to have length 10, if this assertion fails, the test setup is not as expected") - buf.DiscardPointsBefore(8) - buf.Append(promql.FPoint{T: 11, F: 1100}) - buf.Append(promql.FPoint{T: 12, F: 1200}) - buf.Append(promql.FPoint{T: 13, F: 1300}) + require.Len(t, buf.points, 4, "expected underlying slice to have length 4, if this assertion fails, the test setup is not as expected") + require.Equal(t, 4, cap(buf.points), "expected underlying slice to have capacity 4, if this assertion fails, the test setup is not as expected") + buf.DiscardPointsBefore(3) + buf.Append(promql.FPoint{T: 5, F: 500}) + buf.Append(promql.FPoint{T: 6, F: 600}) // Should not have expanded slice. - require.Len(t, buf.points, 10, "expected underlying slice to have length 10, if this assertion fails, the test setup is not as expected") + require.Len(t, buf.points, 4, "expected underlying slice to have length 4") + require.Equal(t, 4, cap(buf.points), "expected underlying slice to have capacity 4") // Discard before end of underlying slice. - buf.DiscardPointsBefore(9) + buf.DiscardPointsBefore(4) shouldHavePoints(t, buf, - promql.FPoint{T: 9, F: 900}, - promql.FPoint{T: 10, F: 1000}, - promql.FPoint{T: 11, F: 1100}, - promql.FPoint{T: 12, F: 1200}, - promql.FPoint{T: 13, F: 1300}, + promql.FPoint{T: 4, F: 400}, + promql.FPoint{T: 5, F: 500}, + promql.FPoint{T: 6, F: 600}, ) - require.Equal(t, 8, buf.firstIndex, "expected first point to be in middle of underlying slice, if this assertion fails, the test setup is not as expected") + require.Equal(t, 3, buf.firstIndex, "expected first point to be in middle of underlying slice, if this assertion fails, the test setup is not as expected") // Discard after wraparound. - buf.DiscardPointsBefore(12) + buf.DiscardPointsBefore(6) shouldHavePoints(t, buf, - promql.FPoint{T: 12, F: 1200}, - promql.FPoint{T: 13, F: 1300}, + promql.FPoint{T: 6, F: 600}, ) } From e07463cbf3c83e39979cb731d8d215d8572be63c Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Fri, 10 May 2024 13:49:30 +1000 Subject: [PATCH 30/43] Add changelog entry --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 52174ad2ae6..abd7fa0d246 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ * [FEATURE] Continuous-test: now runable as a module with `mimir -target=continuous-test`. #7747 * [FEATURE] Store-gateway: Allow specific tenants to be enabled or disabled via `-store-gateway.enabled-tenants` or `-store-gateway.disabled-tenants` CLI flags or their corresponding YAML settings. #7653 * [FEATURE] New `-.s3.bucket-lookup-type` flag configures lookup style type, used to access bucket in s3 compatible providers. #7684 -* [FEATURE] Querier: add experimental streaming PromQL engine, enabled with `-querier.promql-engine=streaming`. #7693 #7898 #7899 #8023 #8058 +* [FEATURE] Querier: add experimental streaming PromQL engine, enabled with `-querier.promql-engine=streaming`. #7693 #7898 #7899 #8023 #8058 #8096 * [FEATURE] New `/ingester/unregister-on-shutdown` HTTP endpoint allows dynamic access to ingesters' `-ingester.ring.unregister-on-shutdown` configuration. #7739 * [FEATURE] Server: added experimental [PROXY protocol support](https://www.haproxy.org/download/2.3/doc/proxy-protocol.txt). The PROXY protocol support can be enabled via `-server.proxy-protocol-enabled=true`. When enabled, the support is added both to HTTP and gRPC listening ports. #7698 * [ENHANCEMENT] Reduced memory allocations in functions used to propagate contextual information between gRPC calls. #7529 From bc557db94840836a6e2fdbf8023320fc73af4091 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Fri, 10 May 2024 14:02:27 +1000 Subject: [PATCH 31/43] Fix flaky test. --- pkg/streamingpromql/operator/ring_buffer.go | 16 ++++++---- .../operator/ring_buffer_test.go | 29 +++++++++++++++++++ 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/pkg/streamingpromql/operator/ring_buffer.go b/pkg/streamingpromql/operator/ring_buffer.go index 8e6869ba756..f48db09aaca 100644 --- a/pkg/streamingpromql/operator/ring_buffer.go +++ b/pkg/streamingpromql/operator/ring_buffer.go @@ -10,6 +10,12 @@ type RingBuffer struct { size int // Number of points in this buffer. } +var ( + // Overrides used only during tests. + getFPointSliceForRingBuffer = GetFPointSlice + putFPointSliceForRingBuffer = PutFPointSlice +) + // DiscardPointsBefore discards all points in this buffer with timestamp less than t. func (b *RingBuffer) DiscardPointsBefore(t int64) { for b.size > 0 && b.points[b.firstIndex].T < t { @@ -31,7 +37,7 @@ func (b *RingBuffer) DiscardPointsBefore(t int64) { // Callers must not modify the values in the returned slices or return them to a pool. // Calling UnsafePoints is more efficient than calling CopyPoints, as CopyPoints will create a new slice and copy all // points into the slice, whereas UnsafePoints returns a view into the internal state of this buffer. -// The returned slices are no longer valid if this buffer is modified (eg. a point is added, or the buffer is reset). +// The returned slices are no longer valid if this buffer is modified (eg. a point is added, or the buffer is reset or closed). // // FIXME: the fact we have to expose this is a bit gross, but the overhead of calling a function with ForEach is terrible. // Perhaps we can use range-over function iterators (https://go.dev/wiki/RangefuncExperiment) once this is not experimental? @@ -65,7 +71,7 @@ func (b *RingBuffer) CopyPoints(maxT int64) []promql.FPoint { } head, tail := b.UnsafePoints(maxT) - combined := GetFPointSlice(len(head) + len(tail)) + combined := getFPointSliceForRingBuffer(len(head) + len(tail)) combined = append(combined, head...) combined = append(combined, tail...) @@ -109,13 +115,13 @@ func (b *RingBuffer) Append(p promql.FPoint) { newSize = 2 } - newSlice := GetFPointSlice(newSize) + newSlice := getFPointSliceForRingBuffer(newSize) newSlice = newSlice[:cap(newSlice)] pointsAtEnd := b.size - b.firstIndex copy(newSlice, b.points[b.firstIndex:]) copy(newSlice[pointsAtEnd:], b.points[:b.firstIndex]) - PutFPointSlice(b.points) + putFPointSliceForRingBuffer(b.points) b.points = newSlice b.firstIndex = 0 } @@ -134,7 +140,7 @@ func (b *RingBuffer) Reset() { // Close releases any resources associated with this buffer. func (b *RingBuffer) Close() { b.Reset() - PutFPointSlice(b.points) + putFPointSliceForRingBuffer(b.points) b.points = nil } diff --git a/pkg/streamingpromql/operator/ring_buffer_test.go b/pkg/streamingpromql/operator/ring_buffer_test.go index 1f689b42fe3..c22abd58046 100644 --- a/pkg/streamingpromql/operator/ring_buffer_test.go +++ b/pkg/streamingpromql/operator/ring_buffer_test.go @@ -11,6 +11,8 @@ import ( ) func TestRingBuffer(t *testing.T) { + setupRingBufferPoolFunctionsForTesting(t) + buf := &RingBuffer{} shouldHaveNoPoints(t, buf) @@ -63,6 +65,8 @@ func TestRingBuffer(t *testing.T) { } func TestRingBuffer_DiscardPointsBefore_ThroughWrapAround(t *testing.T) { + setupRingBufferPoolFunctionsForTesting(t) + // Set up the buffer so that the first point is part-way through the underlying slice. // We resize in powers of two, and the underlying slice comes from a pool that uses a factor of 2 as well. buf := &RingBuffer{} @@ -156,3 +160,28 @@ func shouldHavePointsAtOrBeforeTime(t *testing.T, buf *RingBuffer, ts int64, exp require.Equal(t, expected[len(expected)-1], end) } } + +// setupRingBufferPoolFunctionsForTesting replaces the global FPoint slice pool used by RingBuffer +// with a fake for testing. +// +// This helps ensure that the tests behave as expected: the default global pool does not guarantee that +// slices returned have exactly the capacity requested. Instead, it only guarantees that slices have +// capacity at least as large as requested. This makes it difficult to consistently test scenarios like +// wraparound. +func setupRingBufferPoolFunctionsForTesting(t *testing.T) { + originalGet := getFPointSliceForRingBuffer + originalPut := putFPointSliceForRingBuffer + + getFPointSliceForRingBuffer = func(size int) []promql.FPoint { + return make([]promql.FPoint, 0, size) + } + + putFPointSliceForRingBuffer = func(_ []promql.FPoint) { + // Drop slice on the floor - we don't need it. + } + + t.Cleanup(func() { + getFPointSliceForRingBuffer = originalGet + putFPointSliceForRingBuffer = originalPut + }) +} From a23be64e9facb8cbfefbd6b43abca115312e368e Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Fri, 10 May 2024 14:06:24 +1000 Subject: [PATCH 32/43] Remove outdated comments --- pkg/streamingpromql/operator/binary_operation.go | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/pkg/streamingpromql/operator/binary_operation.go b/pkg/streamingpromql/operator/binary_operation.go index 41316dc8fdb..63df02cb1a5 100644 --- a/pkg/streamingpromql/operator/binary_operation.go +++ b/pkg/streamingpromql/operator/binary_operation.go @@ -137,9 +137,6 @@ func (b *BinaryOperation) computeOutputSeries() ([]SeriesMetadata, []*binaryOper // TODO: Prometheus' engine uses strings for the key here, which would avoid issues with hash collisions, but seems much slower. // Either we should use strings, or we'll need to deal with hash collisions. hashFunc := b.hashFunc() - - // TODO: pool binaryOperationOutputSeries? Pool internal slices? - // TODO: guess initial size of map? outputSeriesMap := map[uint64]*binaryOperationOutputSeries{} // TODO: is it better to use whichever side has fewer series for this first loop? Should result in a smaller map and therefore less work later on @@ -353,10 +350,7 @@ func (b *BinaryOperation) NextSeries(ctx context.Context) (InstantVectorSeriesDa // // mergeOneSide is optimised for the case where there is only one source series, or the source series do not overlap, as in the example above. // -// TODO: for many-to-one / one-to-many matching, we could avoid re-merging each time for the side used multiple times -// TODO: would this be easier to do if we were working with []float64 rather than []FPoint? -// - would also mean that some arithmetic operations become faster, as we can use vectorisation (eg. leftPoints + rightPoints, rather than output[i] = left[i] + right[i] etc.) -// - should we just change the InstantVectorOperator interface to use ([]float64, presence)? Would make some aggregation operations faster as well (eg. sum) +// FIXME: for many-to-one / one-to-many matching, we could avoid re-merging each time for the side used multiple times func (b *BinaryOperation) mergeOneSide(data []InstantVectorSeriesData, sourceSeriesIndices []int, sourceSeriesMetadata []SeriesMetadata, side string) (InstantVectorSeriesData, error) { if len(data) == 1 { // Fast path: if there's only one series on this side, there's no merging required. @@ -377,7 +371,7 @@ func (b *BinaryOperation) mergeOneSide(data []InstantVectorSeriesData, sourceSer // We're going to create a new slice, so return this one to the pool. // We'll return the other slices in the for loop below. // We must defer here, rather than at the end, as the merge loop below reslices Floats. - // TODO: this isn't correct for many-to-one / one-to-many matching - we'll need the series again (unless we store the result of the merge) + // FIXME: this isn't correct for many-to-one / one-to-many matching - we'll need the series again (unless we store the result of the merge) defer PutFPointSlice(data[0].Floats) for i := 0; i < len(data)-1; i++ { @@ -387,7 +381,7 @@ func (b *BinaryOperation) mergeOneSide(data []InstantVectorSeriesData, sourceSer // We're going to create a new slice, so return this one to the pool. // We must defer here, rather than at the end, as the merge loop below reslices Floats. - // TODO: this isn't correct for many-to-one / one-to-many matching - we'll need the series again (unless we store the result of the merge) + // FIXME: this isn't correct for many-to-one / one-to-many matching - we'll need the series again (unless we store the result of the merge) defer PutFPointSlice(second.Floats) // Check if first overlaps with second. @@ -540,7 +534,6 @@ func newBinaryOperationSeriesBuffer(source InstantVectorOperator) *binaryOperati // The returned slice is only safe to use until getSeries is called again. func (b *binaryOperationSeriesBuffer) getSeries(ctx context.Context, seriesIndices []int) ([]InstantVectorSeriesData, error) { if cap(b.output) < len(seriesIndices) { - // TODO: pool? b.output = make([]InstantVectorSeriesData, len(seriesIndices)) } From 7241678a7c39a04eacd02ad517c2198df589212f Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Fri, 10 May 2024 14:58:42 +1000 Subject: [PATCH 33/43] Don't bother buffering series that won't be used. --- .../operator/binary_operation.go | 68 +++++++++++++++---- 1 file changed, 55 insertions(+), 13 deletions(-) diff --git a/pkg/streamingpromql/operator/binary_operation.go b/pkg/streamingpromql/operator/binary_operation.go index 63df02cb1a5..5d4d587d62d 100644 --- a/pkg/streamingpromql/operator/binary_operation.go +++ b/pkg/streamingpromql/operator/binary_operation.go @@ -90,12 +90,12 @@ func (b *BinaryOperation) SeriesMetadata(ctx context.Context) ([]SeriesMetadata, return nil, nil } - allMetadata, allSeries := b.computeOutputSeries() + allMetadata, allSeries, leftSeriesUsed, rightSeriesUsed := b.computeOutputSeries() b.sortSeries(allMetadata, allSeries) b.remainingSeries = allSeries - b.leftBuffer = newBinaryOperationSeriesBuffer(b.Left) - b.rightBuffer = newBinaryOperationSeriesBuffer(b.Right) + b.leftBuffer = newBinaryOperationSeriesBuffer(b.Left, leftSeriesUsed) + b.rightBuffer = newBinaryOperationSeriesBuffer(b.Right, rightSeriesUsed) return allMetadata, nil } @@ -133,7 +133,15 @@ func (b *BinaryOperation) loadSeriesMetadata(ctx context.Context) (bool, error) return true, nil } -func (b *BinaryOperation) computeOutputSeries() ([]SeriesMetadata, []*binaryOperationOutputSeries) { +// computeOutputSeries determines the possible output series from this operator. +// It assumes leftMetadata and rightMetadata have already been populated. +// +// It returns: +// - a list of all possible series this operator could return +// - a corresponding list of the source series for each output series +// - a list indicating which series from the left side are needed to compute the output +// - a list indicating which series from the right side are needed to compute the output +func (b *BinaryOperation) computeOutputSeries() ([]SeriesMetadata, []*binaryOperationOutputSeries, []bool, []bool) { // TODO: Prometheus' engine uses strings for the key here, which would avoid issues with hash collisions, but seems much slower. // Either we should use strings, or we'll need to deal with hash collisions. hashFunc := b.hashFunc() @@ -175,14 +183,24 @@ func (b *BinaryOperation) computeOutputSeries() ([]SeriesMetadata, []*binaryOper allMetadata := make([]SeriesMetadata, 0, len(outputSeriesMap)) allSeries := make([]*binaryOperationOutputSeries, 0, len(outputSeriesMap)) labelsFunc := b.labelsFunc() + leftSeriesUsed := GetBoolSlice(len(b.leftMetadata))[:len(b.leftMetadata)] + rightSeriesUsed := GetBoolSlice(len(b.rightMetadata))[:len(b.rightMetadata)] for _, outputSeries := range outputSeriesMap { firstSeriesLabels := b.leftMetadata[outputSeries.leftSeriesIndices[0]].Labels allMetadata = append(allMetadata, SeriesMetadata{Labels: labelsFunc(firstSeriesLabels)}) allSeries = append(allSeries, outputSeries) + + for _, leftSeriesIndex := range outputSeries.leftSeriesIndices { + leftSeriesUsed[leftSeriesIndex] = true + } + + for _, rightSeriesIndex := range outputSeries.rightSeriesIndices { + rightSeriesUsed[rightSeriesIndex] = true + } } - return allMetadata, allSeries + return allMetadata, allSeries, leftSeriesUsed, rightSeriesUsed } // sortSeries sorts metadata and series in place to try to minimise the number of input series we'll need to buffer in memory. @@ -504,6 +522,14 @@ func (b *BinaryOperation) Close() { if b.rightMetadata != nil { PutSeriesMetadataSlice(b.rightMetadata) } + + if b.leftBuffer != nil { + b.leftBuffer.close() + } + + if b.rightBuffer != nil { + b.rightBuffer.close() + } } // binaryOperationSeriesBuffer buffers series data until it is needed by BinaryOperation. @@ -515,18 +541,23 @@ type binaryOperationSeriesBuffer struct { source InstantVectorOperator nextIndexToRead int - // TODO: what is the best way to store buffered data? - buffer map[int]InstantVectorSeriesData + // If seriesUsed[i] == true, then the series at index i is needed for this operation and should be buffered if not used immediately. + // If seriesUsed[i] == false, then the series at index i is never used and can be immediately discarded. + // FIXME: could use a bitmap here to save some memory + seriesUsed []bool - // TODO: need a way to know if a series will never be used and therefore skip buffering it + // Stores series read but required for later series. + buffer map[int]InstantVectorSeriesData + // Reused to avoid allocating on every call to getSeries. output []InstantVectorSeriesData } -func newBinaryOperationSeriesBuffer(source InstantVectorOperator) *binaryOperationSeriesBuffer { +func newBinaryOperationSeriesBuffer(source InstantVectorOperator, seriesUsed []bool) *binaryOperationSeriesBuffer { return &binaryOperationSeriesBuffer{ - source: source, - buffer: map[int]InstantVectorSeriesData{}, + source: source, + seriesUsed: seriesUsed, + buffer: map[int]InstantVectorSeriesData{}, } } @@ -559,8 +590,13 @@ func (b *binaryOperationSeriesBuffer) getSingleSeries(ctx context.Context, serie return InstantVectorSeriesData{}, err } - // TODO: don't bother storing data we won't need, immediately return slice to pool - b.buffer[b.nextIndexToRead] = d + if b.seriesUsed[b.nextIndexToRead] { + // We need this series later, but not right now. Store it for later. + b.buffer[b.nextIndexToRead] = d + } else { + // We don't need this series at all, return the slice to the pool now. + PutFPointSlice(d.Floats) + } b.nextIndexToRead++ } @@ -577,6 +613,12 @@ func (b *binaryOperationSeriesBuffer) getSingleSeries(ctx context.Context, serie return d, nil } +func (b *binaryOperationSeriesBuffer) close() { + if b.seriesUsed != nil { + PutBoolSlice(b.seriesUsed) + } +} + type binaryOperationFunc func(left, right float64) float64 var arithmeticOperationFuncs = map[parser.ItemType]binaryOperationFunc{ From 3c971f437743473b1e0a2195fdc7fe3764aec61b Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Fri, 10 May 2024 14:58:53 +1000 Subject: [PATCH 34/43] Clarify expected behaviour of Close --- pkg/streamingpromql/operator/operator.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/streamingpromql/operator/operator.go b/pkg/streamingpromql/operator/operator.go index 868c078ef53..443df8b8e1b 100644 --- a/pkg/streamingpromql/operator/operator.go +++ b/pkg/streamingpromql/operator/operator.go @@ -21,6 +21,7 @@ type Operator interface { // Close frees all resources associated with this operator. // Calling SeriesMetadata or NextSeries after calling Close may result in unpredictable behaviour, corruption or crashes. + // It must be safe to call Close at any time, including if SeriesMetadata or NextSeries have returned an error. Close() } From 053f3d33d2b98ce95dab55c9cee4febfce7f257e Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Fri, 10 May 2024 16:08:57 +1000 Subject: [PATCH 35/43] Move `FallbackEngine` and `NotSupportedError` to their own package. --- pkg/querier/querier.go | 3 ++- pkg/streamingpromql/{ => compat}/errors.go | 2 +- .../{ => compat}/fallback_engine.go | 2 +- .../{ => compat}/fallback_engine_test.go | 2 +- pkg/streamingpromql/engine_test.go | 8 +++--- pkg/streamingpromql/query.go | 25 ++++++++++--------- 6 files changed, 23 insertions(+), 19 deletions(-) rename pkg/streamingpromql/{ => compat}/errors.go (94%) rename pkg/streamingpromql/{ => compat}/fallback_engine.go (99%) rename pkg/streamingpromql/{ => compat}/fallback_engine_test.go (99%) diff --git a/pkg/querier/querier.go b/pkg/querier/querier.go index c1a5d6f3a7f..762296b6e18 100644 --- a/pkg/querier/querier.go +++ b/pkg/querier/querier.go @@ -30,6 +30,7 @@ import ( "github.com/grafana/mimir/pkg/storage/chunk" "github.com/grafana/mimir/pkg/storage/lazyquery" "github.com/grafana/mimir/pkg/streamingpromql" + "github.com/grafana/mimir/pkg/streamingpromql/compat" "github.com/grafana/mimir/pkg/util" "github.com/grafana/mimir/pkg/util/activitytracker" "github.com/grafana/mimir/pkg/util/limiter" @@ -170,7 +171,7 @@ func New(cfg Config, limits *validation.Overrides, distributor Distributor, stor if cfg.EnablePromQLEngineFallback { prometheusEngine := promql.NewEngine(opts) - eng = streamingpromql.NewEngineWithFallback(streamingEngine, prometheusEngine, reg, logger) + eng = compat.NewEngineWithFallback(streamingEngine, prometheusEngine, reg, logger) } else { eng = streamingEngine } diff --git a/pkg/streamingpromql/errors.go b/pkg/streamingpromql/compat/errors.go similarity index 94% rename from pkg/streamingpromql/errors.go rename to pkg/streamingpromql/compat/errors.go index 9cebc167048..14d1000d14b 100644 --- a/pkg/streamingpromql/errors.go +++ b/pkg/streamingpromql/compat/errors.go @@ -1,6 +1,6 @@ // SPDX-License-Identifier: AGPL-3.0-only -package streamingpromql +package compat import ( "errors" diff --git a/pkg/streamingpromql/fallback_engine.go b/pkg/streamingpromql/compat/fallback_engine.go similarity index 99% rename from pkg/streamingpromql/fallback_engine.go rename to pkg/streamingpromql/compat/fallback_engine.go index 28b9957a47a..237714ed8b8 100644 --- a/pkg/streamingpromql/fallback_engine.go +++ b/pkg/streamingpromql/compat/fallback_engine.go @@ -1,6 +1,6 @@ // SPDX-License-Identifier: AGPL-3.0-only -package streamingpromql +package compat import ( "context" diff --git a/pkg/streamingpromql/fallback_engine_test.go b/pkg/streamingpromql/compat/fallback_engine_test.go similarity index 99% rename from pkg/streamingpromql/fallback_engine_test.go rename to pkg/streamingpromql/compat/fallback_engine_test.go index ad49747b98a..0d18f866349 100644 --- a/pkg/streamingpromql/fallback_engine_test.go +++ b/pkg/streamingpromql/compat/fallback_engine_test.go @@ -1,6 +1,6 @@ // SPDX-License-Identifier: AGPL-3.0-only -package streamingpromql +package compat import ( "context" diff --git a/pkg/streamingpromql/engine_test.go b/pkg/streamingpromql/engine_test.go index 7596db73406..bb553c03081 100644 --- a/pkg/streamingpromql/engine_test.go +++ b/pkg/streamingpromql/engine_test.go @@ -14,6 +14,8 @@ import ( "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/promql" "github.com/stretchr/testify/require" + + "github.com/grafana/mimir/pkg/streamingpromql/compat" ) func TestUnsupportedPromQLFeatures(t *testing.T) { @@ -46,13 +48,13 @@ func TestUnsupportedPromQLFeatures(t *testing.T) { t.Run(expression, func(t *testing.T) { qry, err := engine.NewRangeQuery(ctx, nil, nil, expression, time.Now().Add(-time.Hour), time.Now(), time.Minute) require.Error(t, err) - require.ErrorIs(t, err, NotSupportedError{}) + require.ErrorIs(t, err, compat.NotSupportedError{}) require.EqualError(t, err, "not supported by streaming engine: "+expectedError) require.Nil(t, qry) qry, err = engine.NewInstantQuery(ctx, nil, nil, expression, time.Now()) require.Error(t, err) - require.ErrorIs(t, err, NotSupportedError{}) + require.ErrorIs(t, err, compat.NotSupportedError{}) require.EqualError(t, err, "not supported by streaming engine: "+expectedError) require.Nil(t, qry) }) @@ -69,7 +71,7 @@ func TestUnsupportedPromQLFeatures(t *testing.T) { t.Run(expression, func(t *testing.T) { qry, err := engine.NewInstantQuery(ctx, nil, nil, expression, time.Now()) require.Error(t, err) - require.ErrorIs(t, err, NotSupportedError{}) + require.ErrorIs(t, err, compat.NotSupportedError{}) require.EqualError(t, err, "not supported by streaming engine: "+expectedError) require.Nil(t, qry) }) diff --git a/pkg/streamingpromql/query.go b/pkg/streamingpromql/query.go index 9aed53f55dc..08387caa019 100644 --- a/pkg/streamingpromql/query.go +++ b/pkg/streamingpromql/query.go @@ -19,6 +19,7 @@ import ( "github.com/prometheus/prometheus/util/stats" "golang.org/x/exp/slices" + "github.com/grafana/mimir/pkg/streamingpromql/compat" "github.com/grafana/mimir/pkg/streamingpromql/operator" ) @@ -77,7 +78,7 @@ func newQuery(queryable storage.Queryable, opts promql.QueryOpts, qs string, sta return nil, err } default: - return nil, NewNotSupportedError(fmt.Sprintf("%s value as top-level expression", parser.DocumentedType(expr.Type()))) + return nil, compat.NewNotSupportedError(fmt.Sprintf("%s value as top-level expression", parser.DocumentedType(expr.Type()))) } return q, nil @@ -102,7 +103,7 @@ func (q *Query) convertToInstantVectorOperator(expr parser.Expr) (operator.Insta } if e.OriginalOffset != 0 || e.Offset != 0 { - return nil, NewNotSupportedError("instant vector selector with 'offset'") + return nil, compat.NewNotSupportedError("instant vector selector with 'offset'") } return &operator.InstantVectorSelector{ @@ -118,7 +119,7 @@ func (q *Query) convertToInstantVectorOperator(expr parser.Expr) (operator.Insta }, nil case *parser.AggregateExpr: if e.Op != parser.SUM { - return nil, NewNotSupportedError(fmt.Sprintf("'%s' aggregation", e.Op)) + return nil, compat.NewNotSupportedError(fmt.Sprintf("'%s' aggregation", e.Op)) } if e.Param != nil { @@ -127,7 +128,7 @@ func (q *Query) convertToInstantVectorOperator(expr parser.Expr) (operator.Insta } if e.Without { - return nil, NewNotSupportedError("grouping with 'without'") + return nil, compat.NewNotSupportedError("grouping with 'without'") } slices.Sort(e.Grouping) @@ -146,7 +147,7 @@ func (q *Query) convertToInstantVectorOperator(expr parser.Expr) (operator.Insta }, nil case *parser.Call: if e.Func.Name != "rate" { - return nil, NewNotSupportedError(fmt.Sprintf("'%s' function", e.Func.Name)) + return nil, compat.NewNotSupportedError(fmt.Sprintf("'%s' function", e.Func.Name)) } if len(e.Args) != 1 { @@ -164,15 +165,15 @@ func (q *Query) convertToInstantVectorOperator(expr parser.Expr) (operator.Insta }, nil case *parser.BinaryExpr: if e.LHS.Type() != parser.ValueTypeVector || e.RHS.Type() != parser.ValueTypeVector { - return nil, NewNotSupportedError("binary expression with scalars") + return nil, compat.NewNotSupportedError("binary expression with scalars") } if e.VectorMatching.Card != parser.CardOneToOne { - return nil, NewNotSupportedError(fmt.Sprintf("binary expression with %v matching", e.VectorMatching.Card)) + return nil, compat.NewNotSupportedError(fmt.Sprintf("binary expression with %v matching", e.VectorMatching.Card)) } if e.Op.IsComparisonOperator() || e.Op.IsSetOperator() { - return nil, NewNotSupportedError(fmt.Sprintf("binary expression with '%s'", e.Op)) + return nil, compat.NewNotSupportedError(fmt.Sprintf("binary expression with '%s'", e.Op)) } lhs, err := q.convertToInstantVectorOperator(e.LHS) @@ -197,7 +198,7 @@ func (q *Query) convertToInstantVectorOperator(expr parser.Expr) (operator.Insta case *parser.ParenExpr: return q.convertToInstantVectorOperator(e.Expr) default: - return nil, NewNotSupportedError(fmt.Sprintf("PromQL expression type %T", e)) + return nil, compat.NewNotSupportedError(fmt.Sprintf("PromQL expression type %T", e)) } } @@ -211,7 +212,7 @@ func (q *Query) convertToRangeVectorOperator(expr parser.Expr) (operator.RangeVe vectorSelector := e.VectorSelector.(*parser.VectorSelector) if vectorSelector.OriginalOffset != 0 || vectorSelector.Offset != 0 { - return nil, NewNotSupportedError("range vector selector with 'offset'") + return nil, compat.NewNotSupportedError("range vector selector with 'offset'") } interval := q.statement.Interval @@ -237,7 +238,7 @@ func (q *Query) convertToRangeVectorOperator(expr parser.Expr) (operator.RangeVe case *parser.ParenExpr: return q.convertToRangeVectorOperator(e.Expr) default: - return nil, NewNotSupportedError(fmt.Sprintf("PromQL expression type %T", e)) + return nil, compat.NewNotSupportedError(fmt.Sprintf("PromQL expression type %T", e)) } } @@ -280,7 +281,7 @@ func (q *Query) Exec(ctx context.Context) *promql.Result { } default: // This should be caught in newQuery above. - return &promql.Result{Err: NewNotSupportedError(fmt.Sprintf("unsupported result type %s", parser.DocumentedType(q.statement.Expr.Type())))} + return &promql.Result{Err: compat.NewNotSupportedError(fmt.Sprintf("unsupported result type %s", parser.DocumentedType(q.statement.Expr.Type())))} } return q.result From 3f1aa3a1ab471fad7ffccd1c148b3a9897290f11 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Fri, 10 May 2024 16:14:26 +1000 Subject: [PATCH 36/43] Consolidate check for supported operator into one place --- .../operator/binary_operation.go | 29 +++++++++++++------ pkg/streamingpromql/query.go | 11 +------ 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/pkg/streamingpromql/operator/binary_operation.go b/pkg/streamingpromql/operator/binary_operation.go index 5d4d587d62d..17fa641d78a 100644 --- a/pkg/streamingpromql/operator/binary_operation.go +++ b/pkg/streamingpromql/operator/binary_operation.go @@ -17,6 +17,8 @@ import ( "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" + + "github.com/grafana/mimir/pkg/streamingpromql/compat" ) // BinaryOperation represents a binary operation between instant vectors such as " + " or " - ". @@ -37,7 +39,7 @@ type BinaryOperation struct { remainingSeries []*binaryOperationOutputSeries leftBuffer *binaryOperationSeriesBuffer rightBuffer *binaryOperationSeriesBuffer - op binaryOperationFunc + opFunc binaryOperationFunc } var _ InstantVectorOperator = &BinaryOperation{} @@ -61,6 +63,22 @@ func (s binaryOperationOutputSeries) latestRightSeries() int { return s.rightSeriesIndices[len(s.rightSeriesIndices)-1] } +func NewBinaryOperation(left InstantVectorOperator, right InstantVectorOperator, vectorMatching parser.VectorMatching, op parser.ItemType) (*BinaryOperation, error) { + opFunc := arithmeticOperationFuncs[op] + if opFunc == nil { + return nil, compat.NewNotSupportedError(fmt.Sprintf("binary expression with '%s'", op)) + } + + return &BinaryOperation{ + Left: left, + Right: right, + VectorMatching: vectorMatching, + Op: op, + + opFunc: opFunc, + }, nil +} + // SeriesMetadata returns the series expected to be produced by this operator. // // Note that it is possible that this method returns a series which will not have any points, as the @@ -77,13 +95,6 @@ func (s binaryOperationOutputSeries) latestRightSeries() int { // contain points, but that would mean we'd need to hold the entire result in memory at once, which we want to // avoid.) func (b *BinaryOperation) SeriesMetadata(ctx context.Context) ([]SeriesMetadata, error) { - b.op = arithmeticOperationFuncs[b.Op] - if b.op == nil { - // This should never happen, this should be caught by Query.convertToOperator - // FIXME: move NotSupportedError to a separate package so we can use it in a constructor function for BinaryOperation and remove the check in Query.convertToOperator - return nil, fmt.Errorf("unsupported binary operation '%s'", b.Op) - } - if canProduceAnySeries, err := b.loadSeriesMetadata(ctx); err != nil { return nil, err } else if !canProduceAnySeries { @@ -500,7 +511,7 @@ func (b *BinaryOperation) computeResult(left InstantVectorSeriesData, right Inst if leftPoint.T == right.Floats[nextRightIndex].T { // We have matching points on both sides, compute the result. output = append(output, promql.FPoint{ - F: b.op(leftPoint.F, right.Floats[nextRightIndex].F), + F: b.opFunc(leftPoint.F, right.Floats[nextRightIndex].F), T: leftPoint.T, }) } diff --git a/pkg/streamingpromql/query.go b/pkg/streamingpromql/query.go index 08387caa019..d74cae7efd9 100644 --- a/pkg/streamingpromql/query.go +++ b/pkg/streamingpromql/query.go @@ -172,10 +172,6 @@ func (q *Query) convertToInstantVectorOperator(expr parser.Expr) (operator.Insta return nil, compat.NewNotSupportedError(fmt.Sprintf("binary expression with %v matching", e.VectorMatching.Card)) } - if e.Op.IsComparisonOperator() || e.Op.IsSetOperator() { - return nil, compat.NewNotSupportedError(fmt.Sprintf("binary expression with '%s'", e.Op)) - } - lhs, err := q.convertToInstantVectorOperator(e.LHS) if err != nil { return nil, err @@ -186,12 +182,7 @@ func (q *Query) convertToInstantVectorOperator(expr parser.Expr) (operator.Insta return nil, err } - return &operator.BinaryOperation{ - Left: lhs, - Right: rhs, - VectorMatching: *e.VectorMatching, - Op: e.Op, - }, nil + return operator.NewBinaryOperation(lhs, rhs, *e.VectorMatching, e.Op) case *parser.StepInvariantExpr: // One day, we'll do something smarter here. return q.convertToInstantVectorOperator(e.Expr) From 20344f511d74b675ce33d1a9aa74c78d133f873b Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Tue, 14 May 2024 13:45:13 +1000 Subject: [PATCH 37/43] Use string of group labels as map key when constructing output series This solves the issue of hash conflicts, and it doesn't introduce a significant performance or memory utilisation penalty. --- .../operator/binary_operation.go | 46 ++++--------------- 1 file changed, 9 insertions(+), 37 deletions(-) diff --git a/pkg/streamingpromql/operator/binary_operation.go b/pkg/streamingpromql/operator/binary_operation.go index 17fa641d78a..4f0a51200e5 100644 --- a/pkg/streamingpromql/operator/binary_operation.go +++ b/pkg/streamingpromql/operator/binary_operation.go @@ -153,29 +153,27 @@ func (b *BinaryOperation) loadSeriesMetadata(ctx context.Context) (bool, error) // - a list indicating which series from the left side are needed to compute the output // - a list indicating which series from the right side are needed to compute the output func (b *BinaryOperation) computeOutputSeries() ([]SeriesMetadata, []*binaryOperationOutputSeries, []bool, []bool) { - // TODO: Prometheus' engine uses strings for the key here, which would avoid issues with hash collisions, but seems much slower. - // Either we should use strings, or we'll need to deal with hash collisions. - hashFunc := b.hashFunc() - outputSeriesMap := map[uint64]*binaryOperationOutputSeries{} + labelsFunc := b.labelsFunc() + outputSeriesMap := map[string]*binaryOperationOutputSeries{} // TODO: is it better to use whichever side has fewer series for this first loop? Should result in a smaller map and therefore less work later on // Would need to be careful about 'or' and 'unless' cases for idx, s := range b.leftMetadata { - hash := hashFunc(s.Labels) - series, exists := outputSeriesMap[hash] + groupLabels := labelsFunc(s.Labels).String() + series, exists := outputSeriesMap[groupLabels] if !exists { series = &binaryOperationOutputSeries{} - outputSeriesMap[hash] = series + outputSeriesMap[groupLabels] = series } series.leftSeriesIndices = append(series.leftSeriesIndices, idx) } for idx, s := range b.rightMetadata { - hash := hashFunc(s.Labels) + groupLabels := labelsFunc(s.Labels).String() - if series, exists := outputSeriesMap[hash]; exists { + if series, exists := outputSeriesMap[groupLabels]; exists { series.rightSeriesIndices = append(series.rightSeriesIndices, idx) } @@ -183,17 +181,16 @@ func (b *BinaryOperation) computeOutputSeries() ([]SeriesMetadata, []*binaryOper } // Remove series that cannot produce samples. - for hash, outputSeries := range outputSeriesMap { + for seriesLabels, outputSeries := range outputSeriesMap { if len(outputSeries.leftSeriesIndices) == 0 || len(outputSeries.rightSeriesIndices) == 0 { // FIXME: this is incorrect for 'or' and 'unless' // No matching series on at least one side for this output series, so output series will have no samples. Remove it. - delete(outputSeriesMap, hash) + delete(outputSeriesMap, seriesLabels) } } allMetadata := make([]SeriesMetadata, 0, len(outputSeriesMap)) allSeries := make([]*binaryOperationOutputSeries, 0, len(outputSeriesMap)) - labelsFunc := b.labelsFunc() leftSeriesUsed := GetBoolSlice(len(b.leftMetadata))[:len(b.leftMetadata)] rightSeriesUsed := GetBoolSlice(len(b.rightMetadata))[:len(b.rightMetadata)] @@ -291,31 +288,6 @@ func (g favourLeftSideSorter) Swap(i, j int) { g.series[i], g.series[j] = g.series[j], g.series[i] } -// hashFunc returns a function that computes the hash of the output group this series belongs to. -func (b *BinaryOperation) hashFunc() func(labels.Labels) uint64 { - buf := make([]byte, 0, 1024) - names := b.VectorMatching.MatchingLabels - - if b.VectorMatching.On { - slices.Sort(names) - - return func(l labels.Labels) uint64 { - var hash uint64 - hash, buf = l.HashForLabels(buf, names...) - return hash - } - } - - names = append([]string{labels.MetricName}, names...) - slices.Sort(names) - - return func(l labels.Labels) uint64 { - var hash uint64 - hash, buf = l.HashWithoutLabels(buf, names...) - return hash - } -} - // labelsFunc returns a function that computes the labels of the output group this series belongs to. func (b *BinaryOperation) labelsFunc() func(labels.Labels) labels.Labels { lb := labels.NewBuilder(labels.EmptyLabels()) From dac881564372083461fc7d97857d5ee9265d1701 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Tue, 14 May 2024 14:12:47 +1000 Subject: [PATCH 38/43] Fix indentation --- tools/benchmark-query-engine/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/benchmark-query-engine/main.go b/tools/benchmark-query-engine/main.go index 7ab4f0dd6e8..f9ab6b7a6b1 100644 --- a/tools/benchmark-query-engine/main.go +++ b/tools/benchmark-query-engine/main.go @@ -44,7 +44,7 @@ type app struct { testFilter string listTests bool justRunIngester bool - cpuProfilePath string + cpuProfilePath string memProfilePath string } From a737a98ad051e72f5f1ec1bf56938b3247de153c Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Tue, 14 May 2024 14:24:41 +1000 Subject: [PATCH 39/43] Add tests for sorting behaviour --- .../operator/binary_operation_test.go | 230 ++++++++++++++++++ 1 file changed, 230 insertions(+) diff --git a/pkg/streamingpromql/operator/binary_operation_test.go b/pkg/streamingpromql/operator/binary_operation_test.go index a4c6f048f13..3ed609a7b3b 100644 --- a/pkg/streamingpromql/operator/binary_operation_test.go +++ b/pkg/streamingpromql/operator/binary_operation_test.go @@ -3,6 +3,9 @@ package operator import ( + "slices" + "sort" + "strconv" "testing" "github.com/prometheus/prometheus/model/labels" @@ -260,3 +263,230 @@ func TestBinaryOperation_SeriesMerging(t *testing.T) { }) } } + +func TestBinaryOperation_Sorting(t *testing.T) { + testCases := map[string]struct { + series []*binaryOperationOutputSeries + + expectedOrderFavouringLeftSide []int + expectedOrderFavouringRightSide []int + }{ + "no output series": { + series: []*binaryOperationOutputSeries{}, + + expectedOrderFavouringLeftSide: []int{}, + expectedOrderFavouringRightSide: []int{}, + }, + "single output series": { + series: []*binaryOperationOutputSeries{ + { + leftSeriesIndices: []int{4}, + rightSeriesIndices: []int{1}, + }, + }, + + expectedOrderFavouringLeftSide: []int{0}, + expectedOrderFavouringRightSide: []int{0}, + }, + "two output series, both with one input series, read from both sides in same order and already sorted correctly": { + series: []*binaryOperationOutputSeries{ + { + leftSeriesIndices: []int{1}, + rightSeriesIndices: []int{1}, + }, + { + leftSeriesIndices: []int{2}, + rightSeriesIndices: []int{2}, + }, + }, + + expectedOrderFavouringLeftSide: []int{0, 1}, + expectedOrderFavouringRightSide: []int{0, 1}, + }, + "two output series, both with one input series, read from both sides in same order but sorted incorrectly": { + series: []*binaryOperationOutputSeries{ + { + leftSeriesIndices: []int{2}, + rightSeriesIndices: []int{2}, + }, + { + leftSeriesIndices: []int{1}, + rightSeriesIndices: []int{1}, + }, + }, + + expectedOrderFavouringLeftSide: []int{1, 0}, + expectedOrderFavouringRightSide: []int{1, 0}, + }, + "two output series, both with one input series, read from both sides in different order": { + series: []*binaryOperationOutputSeries{ + { + leftSeriesIndices: []int{1}, + rightSeriesIndices: []int{2}, + }, + { + leftSeriesIndices: []int{2}, + rightSeriesIndices: []int{1}, + }, + }, + + expectedOrderFavouringLeftSide: []int{0, 1}, + expectedOrderFavouringRightSide: []int{1, 0}, + }, + "two output series, both with multiple input series": { + series: []*binaryOperationOutputSeries{ + { + leftSeriesIndices: []int{1, 2}, + rightSeriesIndices: []int{0, 3}, + }, + { + leftSeriesIndices: []int{0, 3}, + rightSeriesIndices: []int{1, 2}, + }, + }, + + expectedOrderFavouringLeftSide: []int{0, 1}, + expectedOrderFavouringRightSide: []int{1, 0}, + }, + "multiple output series, both with one input series, read from both sides in same order and already sorted correctly": { + series: []*binaryOperationOutputSeries{ + { + leftSeriesIndices: []int{1}, + rightSeriesIndices: []int{1}, + }, + { + leftSeriesIndices: []int{2}, + rightSeriesIndices: []int{2}, + }, + { + leftSeriesIndices: []int{3}, + rightSeriesIndices: []int{3}, + }, + }, + + expectedOrderFavouringLeftSide: []int{0, 1, 2}, + expectedOrderFavouringRightSide: []int{0, 1, 2}, + }, + "multiple output series, both with one input series, read from both sides in same order but sorted incorrectly": { + series: []*binaryOperationOutputSeries{ + { + leftSeriesIndices: []int{2}, + rightSeriesIndices: []int{2}, + }, + { + leftSeriesIndices: []int{3}, + rightSeriesIndices: []int{3}, + }, + { + leftSeriesIndices: []int{1}, + rightSeriesIndices: []int{1}, + }, + }, + + expectedOrderFavouringLeftSide: []int{2, 0, 1}, + expectedOrderFavouringRightSide: []int{2, 0, 1}, + }, + "multiple output series, both with one input series, read from both sides in different order": { + series: []*binaryOperationOutputSeries{ + { + leftSeriesIndices: []int{1}, + rightSeriesIndices: []int{2}, + }, + { + leftSeriesIndices: []int{3}, + rightSeriesIndices: []int{3}, + }, + { + leftSeriesIndices: []int{2}, + rightSeriesIndices: []int{1}, + }, + }, + + expectedOrderFavouringLeftSide: []int{0, 2, 1}, + expectedOrderFavouringRightSide: []int{2, 0, 1}, + }, + "multiple output series, with multiple input series each": { + series: []*binaryOperationOutputSeries{ + { + leftSeriesIndices: []int{4, 5, 10}, + rightSeriesIndices: []int{2, 20}, + }, + { + leftSeriesIndices: []int{2, 4, 15}, + rightSeriesIndices: []int{3, 5, 50}, + }, + { + leftSeriesIndices: []int{3, 1}, + rightSeriesIndices: []int{1, 40}, + }, + }, + + expectedOrderFavouringLeftSide: []int{2, 0, 1}, + expectedOrderFavouringRightSide: []int{0, 2, 1}, + }, + "multiple output series which depend on the same input series": { + series: []*binaryOperationOutputSeries{ + { + leftSeriesIndices: []int{1}, + rightSeriesIndices: []int{2}, + }, + { + leftSeriesIndices: []int{1}, + rightSeriesIndices: []int{1}, + }, + { + leftSeriesIndices: []int{2}, + rightSeriesIndices: []int{2}, + }, + { + leftSeriesIndices: []int{2}, + rightSeriesIndices: []int{1}, + }, + }, + + expectedOrderFavouringLeftSide: []int{1, 0, 3, 2}, + expectedOrderFavouringRightSide: []int{1, 3, 0, 2}, + }, + } + + for name, testCase := range testCases { + t.Run(name, func(t *testing.T) { + require.Len(t, testCase.expectedOrderFavouringLeftSide, len(testCase.series), "invalid test case: should have same number of input and output series for order favouring left side") + require.Len(t, testCase.expectedOrderFavouringRightSide, len(testCase.series), "invalid test case: should have same number of input and output series for order favouring right side") + + metadata := make([]SeriesMetadata, len(testCase.series)) + for i, _ := range testCase.series { + metadata[i] = SeriesMetadata{labels.FromStrings("series", strconv.Itoa(i))} + } + + test := func(t *testing.T, series []*binaryOperationOutputSeries, metadata []SeriesMetadata, sorter sort.Interface, expectedOrder []int) { + expectedSeriesOrder := make([]*binaryOperationOutputSeries, len(series)) + expectedMetadataOrder := make([]SeriesMetadata, len(metadata)) + + for outputIndex, inputIndex := range expectedOrder { + expectedSeriesOrder[outputIndex] = series[inputIndex] + expectedMetadataOrder[outputIndex] = metadata[inputIndex] + } + + sort.Sort(sorter) + + require.Equal(t, expectedSeriesOrder, series) + require.Equal(t, expectedMetadataOrder, metadata) + } + + t.Run("sorting favouring left side", func(t *testing.T) { + series := slices.Clone(testCase.series) + metadata := slices.Clone(metadata) + sorter := favourLeftSideSorter{metadata, series} + test(t, series, metadata, sorter, testCase.expectedOrderFavouringLeftSide) + }) + + t.Run("sorting favouring right side", func(t *testing.T) { + series := slices.Clone(testCase.series) + metadata := slices.Clone(metadata) + sorter := favourRightSideSorter{metadata, series} + test(t, series, metadata, sorter, testCase.expectedOrderFavouringRightSide) + }) + }) + } +} From 108ce4ea18adb1f7875fd777c742fb5269562edd Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Tue, 14 May 2024 14:27:52 +1000 Subject: [PATCH 40/43] Reduce duplication in `favourRightSideSorter` and `favourRightSideSorter` --- .../operator/binary_operation.go | 48 ++++++++++--------- .../operator/binary_operation_test.go | 4 +- 2 files changed, 27 insertions(+), 25 deletions(-) diff --git a/pkg/streamingpromql/operator/binary_operation.go b/pkg/streamingpromql/operator/binary_operation.go index 4f0a51200e5..dcd753a9f9e 100644 --- a/pkg/streamingpromql/operator/binary_operation.go +++ b/pkg/streamingpromql/operator/binary_operation.go @@ -232,40 +232,42 @@ func (b *BinaryOperation) sortSeries(metadata []SeriesMetadata, series []*binary var sortInterface sort.Interface if len(b.leftMetadata) < len(b.rightMetadata) { - sortInterface = favourRightSideSorter{metadata, series} + sortInterface = newFavourRightSideSorter(metadata, series) } else { - sortInterface = favourLeftSideSorter{metadata, series} + sortInterface = newFavourLeftSideSorter(metadata, series) } sort.Sort(sortInterface) } -type favourRightSideSorter struct { +type binaryOperationOutputSorter struct { metadata []SeriesMetadata series []*binaryOperationOutputSeries } type favourLeftSideSorter struct { - metadata []SeriesMetadata - series []*binaryOperationOutputSeries + binaryOperationOutputSorter } -func (g favourRightSideSorter) Len() int { - return len(g.metadata) +func newFavourLeftSideSorter(metadata []SeriesMetadata, series []*binaryOperationOutputSeries) favourLeftSideSorter { + return favourLeftSideSorter{binaryOperationOutputSorter{metadata, series}} } -func (g favourLeftSideSorter) Len() int { - return len(g.metadata) +type favourRightSideSorter struct { + binaryOperationOutputSorter } -func (g favourRightSideSorter) Less(i, j int) bool { - iRight := g.series[i].latestRightSeries() - jRight := g.series[j].latestRightSeries() - if iRight != jRight { - return iRight < jRight - } +func newFavourRightSideSorter(metadata []SeriesMetadata, series []*binaryOperationOutputSeries) favourRightSideSorter { + return favourRightSideSorter{binaryOperationOutputSorter{metadata, series}} +} - return g.series[i].latestLeftSeries() < g.series[j].latestLeftSeries() +func (g binaryOperationOutputSorter) Len() int { + return len(g.metadata) +} + +func (g binaryOperationOutputSorter) Swap(i, j int) { + g.metadata[i], g.metadata[j] = g.metadata[j], g.metadata[i] + g.series[i], g.series[j] = g.series[j], g.series[i] } func (g favourLeftSideSorter) Less(i, j int) bool { @@ -278,14 +280,14 @@ func (g favourLeftSideSorter) Less(i, j int) bool { return g.series[i].latestRightSeries() < g.series[j].latestRightSeries() } -func (g favourRightSideSorter) Swap(i, j int) { - g.metadata[i], g.metadata[j] = g.metadata[j], g.metadata[i] - g.series[i], g.series[j] = g.series[j], g.series[i] -} +func (g favourRightSideSorter) Less(i, j int) bool { + iRight := g.series[i].latestRightSeries() + jRight := g.series[j].latestRightSeries() + if iRight != jRight { + return iRight < jRight + } -func (g favourLeftSideSorter) Swap(i, j int) { - g.metadata[i], g.metadata[j] = g.metadata[j], g.metadata[i] - g.series[i], g.series[j] = g.series[j], g.series[i] + return g.series[i].latestLeftSeries() < g.series[j].latestLeftSeries() } // labelsFunc returns a function that computes the labels of the output group this series belongs to. diff --git a/pkg/streamingpromql/operator/binary_operation_test.go b/pkg/streamingpromql/operator/binary_operation_test.go index 3ed609a7b3b..e13bedcbf08 100644 --- a/pkg/streamingpromql/operator/binary_operation_test.go +++ b/pkg/streamingpromql/operator/binary_operation_test.go @@ -477,14 +477,14 @@ func TestBinaryOperation_Sorting(t *testing.T) { t.Run("sorting favouring left side", func(t *testing.T) { series := slices.Clone(testCase.series) metadata := slices.Clone(metadata) - sorter := favourLeftSideSorter{metadata, series} + sorter := newFavourLeftSideSorter(metadata, series) test(t, series, metadata, sorter, testCase.expectedOrderFavouringLeftSide) }) t.Run("sorting favouring right side", func(t *testing.T) { series := slices.Clone(testCase.series) metadata := slices.Clone(metadata) - sorter := favourRightSideSorter{metadata, series} + sorter := newFavourRightSideSorter(metadata, series) test(t, series, metadata, sorter, testCase.expectedOrderFavouringRightSide) }) }) From 98c8f60013ee1023358f567bda350fb429c253b8 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Tue, 14 May 2024 14:35:51 +1000 Subject: [PATCH 41/43] Fix linting --- pkg/streamingpromql/operator/binary_operation_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/streamingpromql/operator/binary_operation_test.go b/pkg/streamingpromql/operator/binary_operation_test.go index e13bedcbf08..9e54010057e 100644 --- a/pkg/streamingpromql/operator/binary_operation_test.go +++ b/pkg/streamingpromql/operator/binary_operation_test.go @@ -455,7 +455,7 @@ func TestBinaryOperation_Sorting(t *testing.T) { require.Len(t, testCase.expectedOrderFavouringRightSide, len(testCase.series), "invalid test case: should have same number of input and output series for order favouring right side") metadata := make([]SeriesMetadata, len(testCase.series)) - for i, _ := range testCase.series { + for i := range testCase.series { metadata[i] = SeriesMetadata{labels.FromStrings("series", strconv.Itoa(i))} } From 5818cd1be9b14971efc77d2929c4f0630b3df924 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Tue, 14 May 2024 14:55:07 +1000 Subject: [PATCH 42/43] Add tests for `binaryOperationSeriesBuffer`. --- .../operator/aggregation_test.go | 16 ----- .../operator/binary_operation.go | 1 + .../operator/binary_operation_test.go | 64 +++++++++++++++++++ pkg/streamingpromql/operator/operator_test.go | 34 ++++++++++ 4 files changed, 99 insertions(+), 16 deletions(-) create mode 100644 pkg/streamingpromql/operator/operator_test.go diff --git a/pkg/streamingpromql/operator/aggregation_test.go b/pkg/streamingpromql/operator/aggregation_test.go index 331fb7e672c..44ad8c96831 100644 --- a/pkg/streamingpromql/operator/aggregation_test.go +++ b/pkg/streamingpromql/operator/aggregation_test.go @@ -97,19 +97,3 @@ func labelsToSeriesMetadata(lbls []labels.Labels) []SeriesMetadata { return m } - -type testOperator struct { - series []labels.Labels -} - -func (t *testOperator) SeriesMetadata(_ context.Context) ([]SeriesMetadata, error) { - return labelsToSeriesMetadata(t.series), nil -} - -func (t *testOperator) NextSeries(_ context.Context) (InstantVectorSeriesData, error) { - panic("NextSeries() not supported") -} - -func (t *testOperator) Close() { - panic("Close() not supported") -} diff --git a/pkg/streamingpromql/operator/binary_operation.go b/pkg/streamingpromql/operator/binary_operation.go index dcd753a9f9e..bf3a0272956 100644 --- a/pkg/streamingpromql/operator/binary_operation.go +++ b/pkg/streamingpromql/operator/binary_operation.go @@ -548,6 +548,7 @@ func newBinaryOperationSeriesBuffer(source InstantVectorOperator, seriesUsed []b // getSeries returns the data for the series in seriesIndices. // The returned slice is only safe to use until getSeries is called again. +// seriesIndices should be sorted in ascending order to avoid unnecessary buffering. func (b *binaryOperationSeriesBuffer) getSeries(ctx context.Context, seriesIndices []int) ([]InstantVectorSeriesData, error) { if cap(b.output) < len(seriesIndices) { b.output = make([]InstantVectorSeriesData, len(seriesIndices)) diff --git a/pkg/streamingpromql/operator/binary_operation_test.go b/pkg/streamingpromql/operator/binary_operation_test.go index 9e54010057e..c329554ea47 100644 --- a/pkg/streamingpromql/operator/binary_operation_test.go +++ b/pkg/streamingpromql/operator/binary_operation_test.go @@ -3,6 +3,7 @@ package operator import ( + "context" "slices" "sort" "strconv" @@ -490,3 +491,66 @@ func TestBinaryOperation_Sorting(t *testing.T) { }) } } + +func TestBinaryOperationSeriesBuffer(t *testing.T) { + series0Data := InstantVectorSeriesData{Floats: []promql.FPoint{{T: 0, F: 0}}} + series2Data := InstantVectorSeriesData{Floats: []promql.FPoint{{T: 0, F: 2}}} + series3Data := InstantVectorSeriesData{Floats: []promql.FPoint{{T: 0, F: 3}}} + series4Data := InstantVectorSeriesData{Floats: []promql.FPoint{{T: 0, F: 4}}} + series5Data := InstantVectorSeriesData{Floats: []promql.FPoint{{T: 0, F: 5}}} + series6Data := InstantVectorSeriesData{Floats: []promql.FPoint{{T: 0, F: 6}}} + + inner := &testOperator{ + series: []labels.Labels{ + labels.FromStrings("series", "0"), + labels.FromStrings("series", "1"), + labels.FromStrings("series", "2"), + labels.FromStrings("series", "3"), + labels.FromStrings("series", "4"), + labels.FromStrings("series", "5"), + labels.FromStrings("series", "6"), + }, + data: []InstantVectorSeriesData{ + series0Data, + {Floats: []promql.FPoint{{T: 0, F: 1}}}, + series2Data, + series3Data, + series4Data, + series5Data, + series6Data, + }, + } + + seriesUsed := []bool{true, false, true, true, true} + buffer := newBinaryOperationSeriesBuffer(inner, seriesUsed) + ctx := context.Background() + + // Read first series. + series, err := buffer.getSeries(ctx, []int{0}) + require.NoError(t, err) + require.Equal(t, []InstantVectorSeriesData{series0Data}, series) + require.Empty(t, buffer.buffer) // Should not buffer series that was immediately returned. + + // Read next desired series, skipping over series that won't be used. + series, err = buffer.getSeries(ctx, []int{2}) + require.NoError(t, err) + require.Equal(t, []InstantVectorSeriesData{series2Data}, series) + require.Empty(t, buffer.buffer) // Should not buffer series at index 1 that won't be used. + + // Read another desired series, skipping over a series that will be used later. + series, err = buffer.getSeries(ctx, []int{4}) + require.NoError(t, err) + require.Equal(t, []InstantVectorSeriesData{series4Data}, series) + require.Len(t, buffer.buffer, 1) // Should only have buffered a single series (index 3). + + // Read the series we just read past from the buffer. + series, err = buffer.getSeries(ctx, []int{3}) + require.NoError(t, err) + require.Equal(t, []InstantVectorSeriesData{series3Data}, series) + require.Empty(t, buffer.buffer) // Series that has been returned should be removed from buffer once it's returned. + + // Read multiple series. + series, err = buffer.getSeries(ctx, []int{5, 6}) + require.NoError(t, err) + require.Equal(t, []InstantVectorSeriesData{series5Data, series6Data}, series) +} diff --git a/pkg/streamingpromql/operator/operator_test.go b/pkg/streamingpromql/operator/operator_test.go new file mode 100644 index 00000000000..bf071af3e7f --- /dev/null +++ b/pkg/streamingpromql/operator/operator_test.go @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: AGPL-3.0-only + +package operator + +import ( + "context" + + "github.com/prometheus/prometheus/model/labels" +) + +// Operator used only in tests. +type testOperator struct { + series []labels.Labels + data []InstantVectorSeriesData +} + +func (t *testOperator) SeriesMetadata(_ context.Context) ([]SeriesMetadata, error) { + return labelsToSeriesMetadata(t.series), nil +} + +func (t *testOperator) NextSeries(_ context.Context) (InstantVectorSeriesData, error) { + if len(t.data) == 0 { + return InstantVectorSeriesData{}, EOS + } + + d := t.data[0] + t.data = t.data[1:] + + return d, nil +} + +func (t *testOperator) Close() { + panic("Close() not supported") +} From 61b59a158ca40acf7f1f69bff22e11b0085e9774 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Tue, 14 May 2024 15:03:11 +1000 Subject: [PATCH 43/43] Add test cases for `on` and `ignoring` with multiple labels --- .../testdata/ours/binary_operators.test | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pkg/streamingpromql/testdata/ours/binary_operators.test b/pkg/streamingpromql/testdata/ours/binary_operators.test index ee6f56b2129..08de3d1ee39 100644 --- a/pkg/streamingpromql/testdata/ours/binary_operators.test +++ b/pkg/streamingpromql/testdata/ours/binary_operators.test @@ -84,6 +84,27 @@ eval range from 0 to 24m step 6m left_side + ignoring(pod) right_side clear +# One-to-one matching with "on" and "ignoring" with multiple labels. +load 6m + left_side{env="test", pod="a", group="foo"} 1 2 3 + left_side{env="test", pod="b", group="bar"} 4 5 6 + left_side{env="prod", pod="a", group="baz"} 7 8 9 + right_side{env="test", pod="a", group="bar"} 10 20 30 + right_side{env="test", pod="b", group="baz"} 40 50 60 + right_side{env="prod", pod="a", group="foo"} 70 80 90 + +eval range from 0 to 24m step 6m left_side + on(env, pod) right_side + {env="prod", pod="a"} 77 88 99 + {env="test", pod="a"} 11 22 33 + {env="test", pod="b"} 44 55 66 + +eval range from 0 to 24m step 6m left_side + ignoring(env, pod) right_side + {group="baz"} 47 58 69 + {group="bar"} 14 25 36 + {group="foo"} 71 82 93 + +clear + # One-to-one matching, but different series match at different time steps, or not at all load 6m left_side{env="test", bar="a"} 1 _ 3 _ _ 6 _