Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: implement histogram linear interpolation quantile function #253

Merged
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changeset/bright-dancers-float.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
'@hyperdx/api': patch
'@hyperdx/app': patch
---

feat: implement histogram linear interpolation quantile function
195 changes: 194 additions & 1 deletion packages/api/src/clickhouse/__tests__/clickhouse.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import {
mockLogsPropertyTypeMappingsModel,
mockSpyMetricPropertyTypeMappingsModel,
} from '@/fixtures';
import { LogPlatform, LogType } from '@/utils/logParser';
import { LogPlatform, LogType, MetricModel } from '@/utils/logParser';

import * as clickhouse from '..';

Expand Down Expand Up @@ -966,6 +966,77 @@ Array [
}),
);

const buildMetricSeriesHistogram = ({
tags,
name,
points,
data_type,
is_delta,
is_monotonic,
unit,
}: {
tags: Record<string, string>;
name: string;
points: { value: number; timestamp: string | number; le: string }[];
data_type: clickhouse.MetricsDataType;
is_monotonic: boolean;
is_delta: boolean;
unit: string;
}): MetricModel[] => {
return points.map(({ value, timestamp, le }) => ({
_string_attributes: { ...tags, le: le.toString() },
name,
value,
timestamp: parseInt(timestamp.toString(), 10) * 1000000,
data_type,
is_monotonic,
is_delta,
unit,
}));
};

await clickhouse.bulkInsertTeamMetricStream(
buildMetricSeriesHistogram({
name: 'test.response_time',
tags: { host: 'test2', runId },
data_type: clickhouse.MetricsDataType.Histogram,
is_monotonic: false,
is_delta: false,
unit: '',
points: [
{ value: 100, timestamp: now, le: '10' },
{ value: 105, timestamp: now, le: '30' },
{ value: 110, timestamp: now, le: '50' },
{ value: 120, timestamp: now, le: '100' },
{ value: 120, timestamp: now, le: '200' },
{ value: 120, timestamp: now, le: '300' },
{ value: 120, timestamp: now, le: '500' },
{ value: 140, timestamp: now, le: '1000' },
{ value: 150, timestamp: now, le: '+Inf' },

{ value: 100, timestamp: now + ms('7m'), le: '10' },
{ value: 105, timestamp: now + ms('7m'), le: '30' },
{ value: 110, timestamp: now + ms('7m'), le: '50' },
{ value: 120, timestamp: now + ms('7m'), le: '100' },
{ value: 120, timestamp: now + ms('7m'), le: '200' },
{ value: 120, timestamp: now + ms('7m'), le: '300' },
{ value: 120, timestamp: now + ms('7m'), le: '500' },
{ value: 145, timestamp: now + ms('7m'), le: '1000' },
{ value: 155, timestamp: now + ms('7m'), le: '+Inf' },

{ value: 100, timestamp: now + ms('8m'), le: '10' },
{ value: 105, timestamp: now + ms('8m'), le: '30' },
{ value: 110, timestamp: now + ms('8m'), le: '50' },
{ value: 120, timestamp: now + ms('8m'), le: '100' },
{ value: 120, timestamp: now + ms('8m'), le: '200' },
{ value: 120, timestamp: now + ms('8m'), le: '300' },
{ value: 120, timestamp: now + ms('8m'), le: '500' },
{ value: 200, timestamp: now + ms('8m'), le: '1000' },
{ value: 210, timestamp: now + ms('8m'), le: '+Inf' },
],
}),
);

mockSpyMetricPropertyTypeMappingsModel({
runId: 'string',
host: 'string',
Expand Down Expand Up @@ -1118,6 +1189,128 @@ Array [
`);
});

it('response_time histogram (p50 + p90 + p99)', async () => {
const p50Data = (
await clickhouse.getMultiSeriesChart({
series: [
{
type: 'time',
table: 'metrics',
aggFn: clickhouse.AggFn.P50,
field: 'test.response_time',
where: `runId:${runId}`,
groupBy: [],
metricDataType: clickhouse.MetricsDataType.Histogram,
},
],
tableVersion: undefined,
teamId,
startTime: now,
endTime: now + ms('10m'),
granularity: '5 minute',
maxNumGroups: 20,
seriesReturnType: clickhouse.SeriesReturnType.Column,
})
).data.map(d => {
return _.pick(d, ['group', 'series_0.data', 'ts_bucket']);
});

expect(p50Data).toMatchInlineSnapshot(`
Array [
Object {
"group": Array [],
"series_0.data": 10,
"ts_bucket": 1641340800,
},
Object {
"group": Array [],
"series_0.data": 750,
"ts_bucket": 1641341100,
},
]
`);

const p90Data = (
await clickhouse.getMultiSeriesChart({
series: [
{
type: 'time',
table: 'metrics',
aggFn: clickhouse.AggFn.P90,
field: 'test.response_time',
where: `runId:${runId}`,
groupBy: [],
metricDataType: clickhouse.MetricsDataType.Histogram,
},
],
tableVersion: undefined,
teamId,
startTime: now,
endTime: now + ms('10m'),
granularity: '5 minute',
maxNumGroups: 20,
seriesReturnType: clickhouse.SeriesReturnType.Column,
})
).data.map(d => {
return _.pick(d, ['group', 'series_0.data', 'ts_bucket']);
});

expect(p90Data).toMatchInlineSnapshot(`
Array [
Object {
"group": Array [],
"series_0.data": 875,
"ts_bucket": 1641340800,
},
Object {
"group": Array [],
"series_0.data": 950,
"ts_bucket": 1641341100,
},
]
`);

const p99Data = (
await clickhouse.getMultiSeriesChart({
series: [
{
type: 'time',
table: 'metrics',
aggFn: clickhouse.AggFn.P99,
field: 'test.response_time',
where: `runId:${runId}`,
groupBy: [],
metricDataType: clickhouse.MetricsDataType.Histogram,
},
],
tableVersion: undefined,
teamId,
startTime: now,
endTime: now + ms('10m'),
granularity: '5 minute',
maxNumGroups: 20,
seriesReturnType: clickhouse.SeriesReturnType.Column,
})
).data.map(d => {
return _.pick(d, ['group', 'series_0.data', 'ts_bucket']);
});

expect(p99Data).toMatchInlineSnapshot(`
Array [
Object {
"group": Array [],
"series_0.data": 1000,
"ts_bucket": 1641340800,
},
Object {
"group": Array [],
"series_0.data": 995,
"ts_bucket": 1641341100,
},
]
`);
});

it('filters using postGroupWhere properly', async () => {
const queryConfig: Parameters<typeof clickhouse.getMultiSeriesChart>[0] =
{
Expand Down
74 changes: 72 additions & 2 deletions packages/api/src/clickhouse/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -956,8 +956,13 @@ export const buildMetricSeriesQuery = async ({
: '0.99'
})(${isRate ? 'rate' : 'value'}) as data`,
);
} else if (dataType === MetricsDataType.Histogram) {
if (!['p50', 'p90', 'p95', 'p99'].includes(aggFn)) {
throw new Error(`Unsupported aggFn for Histogram: ${aggFn}`);
}
selectClause.push(`max(value) as data`);
} else {
logger.error(`Unsupported data type: ${dataType}`);
throw new Error(`Unsupported data type: ${dataType}`);
}

const startTimeUnixTs = Math.floor(startTime / 1000);
Expand Down Expand Up @@ -1019,6 +1024,71 @@ export const buildMetricSeriesQuery = async ({
],
);

const quantile =
aggFn === AggFn.P50
? '0.5'
: aggFn === AggFn.P90
? '0.90'
: aggFn === AggFn.P95
? '0.95'
: '0.99';

const histogramMetricSource = SqlString.format(
`
WITH points AS (
wrn14897 marked this conversation as resolved.
Show resolved Hide resolved
SELECT
timestamp,
name,
arraySort((x) -> x[2],
groupArray([
toFloat64(value),
toFloat64OrDefault(_string_attributes['le'], inf)
])
) AS _point,
neighbor(_point, -1) AS _prev_point,
length(_point) AS n,
if (
n = length(_prev_point),
arrayMap((x, y) -> [x[1] - y[1], x[2]], _point, _prev_point),
_point
wrn14897 marked this conversation as resolved.
Show resolved Hide resolved
) AS point,
mapFilter((k, v) -> (k != 'le'), _string_attributes) AS filtered_string_attributes
FROM (?)
WHERE mapContains(_string_attributes, 'le')
GROUP BY timestamp, name, filtered_string_attributes
)
SELECT
timestamp,
name,
filtered_string_attributes AS _string_attributes,
point[n][1] AS total,
toFloat64(?) * total AS rank,
arrayFirstIndex(x -> x[1] > rank, point) AS upper_idx,
if (
upper_idx = n,
wrn14897 marked this conversation as resolved.
Show resolved Hide resolved
point[upper_idx - 1][2],
if (
upper_idx = 1,
wrn14897 marked this conversation as resolved.
Show resolved Hide resolved
point[1][2],
point[upper_idx - 1][2] + (point[upper_idx][2] - point[upper_idx - 1][2]) * (
(rank - point[upper_idx - 1][1]) / (point[upper_idx][1] - point[upper_idx - 1][1])
)
)
) AS value
FROM points
WHERE total > 0
AND length(point) > 1
ORDER BY timestamp ASC`.trim(),
[SqlString.raw(gaugeMetricSource), quantile],
);

const source =
dataType === MetricsDataType.Histogram
? histogramMetricSource
: isRate
? rateMetricSource
: gaugeMetricSource;

const query = SqlString.format(
`
WITH metrics AS (?)
Expand All @@ -1036,7 +1106,7 @@ export const buildMetricSeriesQuery = async ({
}
`,
[
SqlString.raw(isRate ? rateMetricSource : gaugeMetricSource),
SqlString.raw(source),
SqlString.raw(selectClause.join(',')),
...(granularity != null
? [
Expand Down
Loading