From e6b12465d5936e1014e36edf1efb27fa4b098f29 Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Thu, 13 Nov 2025 16:11:33 -0600 Subject: [PATCH 1/5] feat(tasks): emit duration and success/failure counts for tasks Emits success/failure counter values as well as execution duration as a gauge for task execution. This allows monitoring the background task health using HyperDX alerts. --- .../__tests__/checkAlertsTask.test.ts | 10 +-- packages/api/src/tasks/index.ts | 39 ++++++---- packages/api/src/tasks/metrics.ts | 71 +++++++++++++++++++ packages/api/src/tasks/types.ts | 9 ++- 4 files changed, 108 insertions(+), 21 deletions(-) create mode 100644 packages/api/src/tasks/metrics.ts diff --git a/packages/api/src/tasks/checkAlerts/__tests__/checkAlertsTask.test.ts b/packages/api/src/tasks/checkAlerts/__tests__/checkAlertsTask.test.ts index 60a20c9a8..71f08143c 100644 --- a/packages/api/src/tasks/checkAlerts/__tests__/checkAlertsTask.test.ts +++ b/packages/api/src/tasks/checkAlerts/__tests__/checkAlertsTask.test.ts @@ -14,7 +14,7 @@ import { AlertTaskType, loadProvider, } from '@/tasks/checkAlerts/providers'; -import { CheckAlertsTaskArgs } from '@/tasks/types'; +import { CheckAlertsTaskArgs, TaskName } from '@/tasks/types'; jest.mock('@/tasks/checkAlerts/providers', () => { return { @@ -65,7 +65,7 @@ describe('CheckAlertTask', () => { }); it('should execute successfully with no alert tasks', async () => { - const args: CheckAlertsTaskArgs = { taskName: 'check-alerts' }; + const args: CheckAlertsTaskArgs = { taskName: TaskName.CHECK_ALERTS }; const task = new CheckAlertTask(args); mockAlertProvider.getAlertTasks.mockResolvedValue([]); @@ -83,7 +83,7 @@ describe('CheckAlertTask', () => { it('should execute successfully with custom provider', async () => { const args: CheckAlertsTaskArgs = { - taskName: 'check-alerts', + taskName: TaskName.CHECK_ALERTS, provider: 'custom-provider', }; const task = new CheckAlertTask(args); @@ -99,7 +99,7 @@ describe('CheckAlertTask', () => { }); it('should process alert tasks', async () => { - const args: CheckAlertsTaskArgs = { taskName: 'check-alerts' }; + const args: CheckAlertsTaskArgs = { taskName: TaskName.CHECK_ALERTS }; const task = new CheckAlertTask(args); const mockAlert = { @@ -171,7 +171,7 @@ describe('CheckAlertTask', () => { }); it("should ensure that the correct team's webhooks are passed to processAlert", async () => { - const args: CheckAlertsTaskArgs = { taskName: 'check-alerts' }; + const args: CheckAlertsTaskArgs = { taskName: TaskName.CHECK_ALERTS }; const task = new CheckAlertTask(args); // Create two teams diff --git a/packages/api/src/tasks/index.ts b/packages/api/src/tasks/index.ts index 8e4675796..71bf1d53e 100644 --- a/packages/api/src/tasks/index.ts +++ b/packages/api/src/tasks/index.ts @@ -1,12 +1,17 @@ import { CronJob } from 'cron'; import minimist from 'minimist'; -import { performance } from 'perf_hooks'; import { serializeError } from 'serialize-error'; import { RUN_SCHEDULED_TASKS_EXTERNALLY } from '@/config'; import CheckAlertTask from '@/tasks/checkAlerts'; +import { + taskExecutionDurationGauge, + taskExecutionFailureCounter, + taskExecutionSuccessCounter, + timeExec, +} from '@/tasks/metrics'; import PingPongTask from '@/tasks/pingPongTask'; -import { asTaskArgs, HdxTask, TaskArgs } from '@/tasks/types'; +import { asTaskArgs, HdxTask, TaskArgs, TaskName } from '@/tasks/types'; import logger from '@/utils/logger'; import { tasksTracer } from './tracer'; @@ -14,39 +19,45 @@ import { tasksTracer } from './tracer'; function createTask(argv: TaskArgs): HdxTask { const taskName = argv.taskName; switch (taskName) { - case 'check-alerts': + case TaskName.CHECK_ALERTS: return new CheckAlertTask(argv); - case 'ping-pong': + case TaskName.PING_PONG: return new PingPongTask(argv); default: throw new Error(`Unknown task name ${taskName}`); } } -const main = async (argv: TaskArgs) => { +async function main(argv: TaskArgs): Promise { await tasksTracer.startActiveSpan(argv.taskName || 'task', async span => { const task: HdxTask = createTask(argv); try { - const t0 = performance.now(); - logger.info(`Task [${task.name()}] started at ${new Date()}`); + logger.info(`${task.name()} started at ${new Date()}`); await task.execute(); - logger.info( - `Task [${task.name()}] finished in ${(performance.now() - t0).toFixed(2)} ms`, - ); + taskExecutionSuccessCounter.get(argv.taskName)?.add(1); } catch (e: unknown) { logger.error( { cause: e, task, }, - `Task [${task.name()}] failed: ${serializeError(e)}`, + `${task.name()} failed: ${serializeError(e)}`, ); + taskExecutionFailureCounter.get(argv.taskName)?.add(1); } finally { await task.asyncDispose(); span.end(); } }); -}; +} + +const instrumentedMain = timeExec(main, duration => { + const gauge = taskExecutionDurationGauge.get(argv.taskName); + if (gauge) { + gauge.record(duration, { useCron: true }); + } + logger.info(`${argv.taskName} finished in ${duration.toFixed(2)} ms`); +}); // Entry point const argv = asTaskArgs(minimist(process.argv.slice(2))); @@ -57,7 +68,7 @@ if (!RUN_SCHEDULED_TASKS_EXTERNALLY) { const job = CronJob.from({ cronTime: '0 * * * * *', waitForCompletion: true, - onTick: async () => main(argv), + onTick: async () => instrumentedMain(argv), errorHandler: async err => { console.error(err); }, @@ -66,7 +77,7 @@ if (!RUN_SCHEDULED_TASKS_EXTERNALLY) { }); } else { logger.warn('In-app cron job is disabled'); - main(argv) + instrumentedMain(argv) .then(() => { process.exit(0); }) diff --git a/packages/api/src/tasks/metrics.ts b/packages/api/src/tasks/metrics.ts new file mode 100644 index 000000000..32b9daedb --- /dev/null +++ b/packages/api/src/tasks/metrics.ts @@ -0,0 +1,71 @@ +import { + Attributes, + Counter, + Gauge, + metrics, + ValueType, +} from '@opentelemetry/api'; +import { performance } from 'perf_hooks'; + +import { TaskName } from '@/tasks/types'; + +const meter = metrics.getMeter('hyperdx-tasks'); + +export const taskExecutionSuccessCounter: Map< + TaskName, + Counter +> = new Map(); + +export const taskExecutionFailureCounter: Map< + TaskName, + Counter +> = new Map(); + +export const taskExecutionDurationGauge: Map< + TaskName, + Gauge +> = new Map(); + +for (const name of Object.values(TaskName)) { + taskExecutionSuccessCounter.set( + name, + meter.createCounter(`hyperdx.tasks.${name}.success`, { + description: + 'Count of the number of times the task finished without exceptions.', + }), + ); + + taskExecutionFailureCounter.set( + name, + meter.createCounter(`hyperdx.tasks.${name}.failure`, { + description: + 'Count of the number of times the task failed to finish because of an exception', + }), + ); + + taskExecutionDurationGauge.set( + name, + meter.createGauge(`hyperdx.tasks.${name}.duration`, { + description: `The wall time required for the ${name} task to complete execution.`, + unit: 'ms', + valueType: ValueType.DOUBLE, + }), + ); +} + +export function timeExec( + fn: (...args: T) => R, + recordFn?: (duration: number) => void, +) { + return async (...args: T) => { + const start = performance.now(); + try { + return await fn(...args); + } finally { + if (recordFn) { + const end = performance.now(); + recordFn(end - start); + } + } + }; +} diff --git a/packages/api/src/tasks/types.ts b/packages/api/src/tasks/types.ts index d5d705cfa..b3700558b 100644 --- a/packages/api/src/tasks/types.ts +++ b/packages/api/src/tasks/types.ts @@ -1,15 +1,20 @@ import { z } from 'zod'; +export enum TaskName { + PING_PONG = 'ping-pong', + CHECK_ALERTS = 'check-alerts', +} + /** * Command line arguments structure for tasks. * Contains task name and optional provider configuration. */ const pingTaskArgsSchema = z.object({ - taskName: z.literal('ping-pong'), + taskName: z.literal(TaskName.PING_PONG), }); const checkAlertsTaskArgsSchema = z.object({ - taskName: z.literal('check-alerts'), + taskName: z.literal(TaskName.CHECK_ALERTS), provider: z.string().optional(), concurrency: z .number() From 0c343692e46655c8726f537d8767879c38ea0acc Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Thu, 13 Nov 2025 16:27:50 -0600 Subject: [PATCH 2/5] chore: changeset --- .changeset/stale-horses-punch.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/stale-horses-punch.md diff --git a/.changeset/stale-horses-punch.md b/.changeset/stale-horses-punch.md new file mode 100644 index 000000000..f46101334 --- /dev/null +++ b/.changeset/stale-horses-punch.md @@ -0,0 +1,5 @@ +--- +"@hyperdx/api": minor +--- + +Add metrics to task execution From 55f505f13cf7289260de64d8705503ea9fe89a64 Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Thu, 13 Nov 2025 16:29:04 -0600 Subject: [PATCH 3/5] review feedback --- packages/api/src/tasks/index.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/api/src/tasks/index.ts b/packages/api/src/tasks/index.ts index 71bf1d53e..4943985a0 100644 --- a/packages/api/src/tasks/index.ts +++ b/packages/api/src/tasks/index.ts @@ -51,6 +51,9 @@ async function main(argv: TaskArgs): Promise { }); } +// Entry point +const argv = asTaskArgs(minimist(process.argv.slice(2))); + const instrumentedMain = timeExec(main, duration => { const gauge = taskExecutionDurationGauge.get(argv.taskName); if (gauge) { @@ -59,8 +62,6 @@ const instrumentedMain = timeExec(main, duration => { logger.info(`${argv.taskName} finished in ${duration.toFixed(2)} ms`); }); -// Entry point -const argv = asTaskArgs(minimist(process.argv.slice(2))); // WARNING: the cron job will be enabled only in development mode if (!RUN_SCHEDULED_TASKS_EXTERNALLY) { logger.info('In-app cron job is enabled'); From 5f3e5e7bd773aa6732e2c3c1ade56a47b894d309 Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Thu, 13 Nov 2025 16:30:07 -0600 Subject: [PATCH 4/5] review: replace any with unknown --- packages/api/src/tasks/metrics.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/api/src/tasks/metrics.ts b/packages/api/src/tasks/metrics.ts index 32b9daedb..e806a9d17 100644 --- a/packages/api/src/tasks/metrics.ts +++ b/packages/api/src/tasks/metrics.ts @@ -53,7 +53,7 @@ for (const name of Object.values(TaskName)) { ); } -export function timeExec( +export function timeExec( fn: (...args: T) => R, recordFn?: (duration: number) => void, ) { From 03b705a000bc1405edc3ecf8891abeeb8d745229 Mon Sep 17 00:00:00 2001 From: Dan Hable Date: Thu, 13 Nov 2025 16:34:45 -0600 Subject: [PATCH 5/5] additional review items --- packages/api/src/tasks/index.ts | 2 +- packages/api/src/tasks/metrics.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/api/src/tasks/index.ts b/packages/api/src/tasks/index.ts index 4943985a0..18cf51300 100644 --- a/packages/api/src/tasks/index.ts +++ b/packages/api/src/tasks/index.ts @@ -57,7 +57,7 @@ const argv = asTaskArgs(minimist(process.argv.slice(2))); const instrumentedMain = timeExec(main, duration => { const gauge = taskExecutionDurationGauge.get(argv.taskName); if (gauge) { - gauge.record(duration, { useCron: true }); + gauge.record(duration, { useCron: !RUN_SCHEDULED_TASKS_EXTERNALLY }); } logger.info(`${argv.taskName} finished in ${duration.toFixed(2)} ms`); }); diff --git a/packages/api/src/tasks/metrics.ts b/packages/api/src/tasks/metrics.ts index e806a9d17..919174134 100644 --- a/packages/api/src/tasks/metrics.ts +++ b/packages/api/src/tasks/metrics.ts @@ -54,7 +54,7 @@ for (const name of Object.values(TaskName)) { } export function timeExec( - fn: (...args: T) => R, + fn: (...args: T) => Promise, recordFn?: (duration: number) => void, ) { return async (...args: T) => {