diff --git a/packages/aws-lambda/src/sdk/chunkRuntime.ts b/packages/aws-lambda/src/sdk/chunkRuntime.ts new file mode 100644 index 000000000..c60a982e9 --- /dev/null +++ b/packages/aws-lambda/src/sdk/chunkRuntime.ts @@ -0,0 +1,11 @@ +/** + * Per-chunk runtime constants for `RenderChunk` Lambda invocations. + * Threshold drives the warning surfaced by the CLI when the slowest + * observed chunk approaches Lambda's hard cap. + */ + +/** Lambda's hard per-invocation cap. */ +export const LAMBDA_TIMEOUT_MS = 900_000; + +/** Pre-computed 80% of {@link LAMBDA_TIMEOUT_MS} — the warning threshold. */ +export const CHUNK_RUNTIME_WARN_MS = LAMBDA_TIMEOUT_MS * 0.8; diff --git a/packages/aws-lambda/src/sdk/getRenderProgress.test.ts b/packages/aws-lambda/src/sdk/getRenderProgress.test.ts index fd572c88f..81731803c 100644 --- a/packages/aws-lambda/src/sdk/getRenderProgress.test.ts +++ b/packages/aws-lambda/src/sdk/getRenderProgress.test.ts @@ -344,6 +344,83 @@ describe("getRenderProgress", () => { ]); }); + it("tracks the slowest RenderChunk billed duration for the cap warning", async () => { + const sfn = new FakeSFN(); + const renderChunk = (ms: number) => [ + stateEntered("RenderChunk"), + taskScheduled(), + taskSucceeded({ Action: "renderChunk", FramesEncoded: 10, DurationMs: ms }), + ]; + sfn.historyPages = [ + [ + stateEntered("Plan"), + taskScheduled(), + taskSucceeded({ Action: "plan", TotalFrames: 30, DurationMs: 1_000 }), + ...renderChunk(100_000), + ...renderChunk(820_000), + ...renderChunk(50_000), + stateEntered("Assemble"), + taskScheduled(), + taskSucceeded({ + Action: "assemble", + FileSize: 1_000, + OutputS3Uri: "s3://b/k.mp4", + DurationMs: 5_000, + }), + ], + ]; + sfn.describe.status = "SUCCEEDED"; + const progress = await getRenderProgress({ + executionArn: "arn", + sfn: sfn as unknown as SFNClient, + }); + expect(progress.maxChunkDurationMs).toBe(820_000); + }); + + it("ignores Plan/Assemble billed durations in maxChunkDurationMs", async () => { + const sfn = new FakeSFN(); + sfn.historyPages = [ + [ + stateEntered("Plan"), + taskScheduled(), + taskSucceeded({ Action: "plan", TotalFrames: 30, DurationMs: 60_000 }), + stateEntered("RenderChunk"), + taskScheduled(), + taskSucceeded({ Action: "renderChunk", FramesEncoded: 10, DurationMs: 12_000 }), + stateEntered("Assemble"), + taskScheduled(), + taskSucceeded({ + Action: "assemble", + FileSize: 1_000, + OutputS3Uri: "s3://b/k.mp4", + DurationMs: 90_000, + }), + ], + ]; + sfn.describe.status = "SUCCEEDED"; + const progress = await getRenderProgress({ + executionArn: "arn", + sfn: sfn as unknown as SFNClient, + }); + expect(progress.maxChunkDurationMs).toBe(12_000); + }); + + it("maxChunkDurationMs is null before any RenderChunk completes", async () => { + const sfn = new FakeSFN(); + sfn.historyPages = [ + [ + stateEntered("Plan"), + taskScheduled(), + taskSucceeded({ Action: "plan", TotalFrames: 30, DurationMs: 1_000 }), + ], + ]; + const progress = await getRenderProgress({ + executionArn: "arn", + sfn: sfn as unknown as SFNClient, + }); + expect(progress.maxChunkDurationMs).toBeNull(); + }); + it("sums billed seconds across plan + chunks + assemble", async () => { const sfn = new FakeSFN(); const renderChunkSucceeded = (frames: number, ms: number) => [ diff --git a/packages/aws-lambda/src/sdk/getRenderProgress.ts b/packages/aws-lambda/src/sdk/getRenderProgress.ts index 9442fc495..f743814f2 100644 --- a/packages/aws-lambda/src/sdk/getRenderProgress.ts +++ b/packages/aws-lambda/src/sdk/getRenderProgress.ts @@ -76,6 +76,8 @@ export interface RenderProgress { /** Total Lambda invocations scheduled so far (both optimized + raw task integrations). */ lambdasInvoked: number; costs: RenderCost; + /** Billed duration of the slowest `RenderChunk` invocation; null before any chunk completes. */ + maxChunkDurationMs: number | null; /** Final output object if Assemble succeeded; `null` otherwise. */ outputFile: { s3Uri: string; bytes: number | null } | null; errors: RenderError[]; @@ -120,6 +122,7 @@ export async function getRenderProgress(opts: GetRenderProgressOptions): Promise totalFrames: summary.totalFrames, lambdasInvoked: summary.lambdasInvoked, costs, + maxChunkDurationMs: summary.maxChunkDurationMs, outputFile: summary.outputFile, errors: summary.errors, fatalErrorEncountered: isTerminalFailure(status), @@ -154,6 +157,8 @@ interface HistorySummary { totalFrames: number | null; lambdasInvoked: number; assembleComplete: boolean; + /** Slowest billed-duration across `RenderChunk` invocations; null if none observed. */ + maxChunkDurationMs: number | null; outputFile: { s3Uri: string; bytes: number | null } | null; errors: RenderError[]; } @@ -172,6 +177,7 @@ function summarizeHistory(events: HistoryEvent[], memoryMb: number): HistorySumm let assembleComplete = false; let outputFile: HistorySummary["outputFile"] = null; let stateTransitions = 0; + let maxChunkDurationMs: number | null = null; const errors: RenderError[] = []; const lambdaInvocations: BilledLambdaInvocation[] = []; @@ -221,6 +227,11 @@ function summarizeHistory(events: HistoryEvent[], memoryMb: number): HistorySumm applyPayloadFrameCounts(payload, currentLambdaState, (delta) => { framesRendered += delta; }); + maxChunkDurationMs = bumpMaxChunkDuration( + maxChunkDurationMs, + currentLambdaState, + billedDurationMs, + ); if (payload && typeof payload === "object") { const obj = payload as Record; if (typeof obj.TotalFrames === "number") totalFrames = obj.TotalFrames; @@ -238,6 +249,11 @@ function summarizeHistory(events: HistoryEvent[], memoryMb: number): HistorySumm applyPayloadFrameCounts(payload, currentLambdaState, (delta) => { framesRendered += delta; }); + maxChunkDurationMs = bumpMaxChunkDuration( + maxChunkDurationMs, + currentLambdaState, + billedDurationMs, + ); if (payload && typeof payload === "object") { const obj = payload as Record; if (typeof obj.TotalFrames === "number") totalFrames = obj.TotalFrames; @@ -311,6 +327,7 @@ function summarizeHistory(events: HistoryEvent[], memoryMb: number): HistorySumm totalFrames, lambdasInvoked, assembleComplete, + maxChunkDurationMs, outputFile, errors, }; @@ -340,6 +357,17 @@ function unwrapLambdaPayload(payload: unknown): unknown { return payload; } +/** Track the slowest `RenderChunk` billed-duration for the cap warning. */ +function bumpMaxChunkDuration( + current: number | null, + currentLambdaState: string | null, + billedDurationMs: number, +): number | null { + if (currentLambdaState !== "RenderChunk") return current; + if (billedDurationMs <= 0) return current; + return Math.max(current ?? 0, billedDurationMs); +} + /** * Bump `framesRendered` only inside the `RenderChunk` state. Plan and * Assemble also report `FramesEncoded`, so a state-blind add would diff --git a/packages/aws-lambda/src/sdk/index.ts b/packages/aws-lambda/src/sdk/index.ts index f0e600afa..7767a299c 100644 --- a/packages/aws-lambda/src/sdk/index.ts +++ b/packages/aws-lambda/src/sdk/index.ts @@ -22,6 +22,7 @@ export { computeRenderCost, type RenderCost, } from "./costAccounting.js"; +export { CHUNK_RUNTIME_WARN_MS, LAMBDA_TIMEOUT_MS } from "./chunkRuntime.js"; export { InvalidConfigError, MAX_STEP_FUNCTIONS_INPUT_BYTES, diff --git a/packages/cli/src/commands/lambda/render.ts b/packages/cli/src/commands/lambda/render.ts index efab3c000..a57364153 100644 --- a/packages/cli/src/commands/lambda/render.ts +++ b/packages/cli/src/commands/lambda/render.ts @@ -26,6 +26,15 @@ async function loadSDK(): Promise return import("@hyperframes/aws-lambda/sdk"); } +// Inlined from `@hyperframes/aws-lambda/sdk` `chunkRuntime.ts` (the source +// of truth). Importing the constants from the SDK barrel as values would +// pull in `renderToLambda` / `getRenderProgress` / `deploySite` and their +// transitive `@aws-sdk/client-sfn` + `@aws-sdk/client-s3` deps at static- +// import time, defeating the `loadSDK()` lazy-load above. They're plain +// numbers tied to Lambda's hard cap; the duplication is cheap. +const LAMBDA_TIMEOUT_MS = 900_000; +const CHUNK_RUNTIME_WARN_MS = LAMBDA_TIMEOUT_MS * 0.8; + export interface RenderArgs { projectDir: string; stackName: string; @@ -161,7 +170,13 @@ export async function runRender(args: RenderArgs): Promise { // both, producing two concatenated JSON blobs that `jq -r` would // misparse. if (args.wait) { - await waitForCompletion(handle.executionArn, stack, args.waitIntervalMs, args.json); + await waitForCompletion( + handle.executionArn, + stack, + args.waitIntervalMs, + args.json, + args.maxParallelChunks, + ); } else { console.log(JSON.stringify(handle, null, 2)); } @@ -176,7 +191,13 @@ export async function runRender(args: RenderArgs): Promise { console.log(` ${c.dim("Stack state:")} ${stateFilePath(args.stackName)}`); console.log(); if (args.wait) { - await waitForCompletion(handle.executionArn, stack, args.waitIntervalMs, args.json); + await waitForCompletion( + handle.executionArn, + stack, + args.waitIntervalMs, + args.json, + args.maxParallelChunks, + ); return; } console.log(c.dim(`Poll with: hyperframes lambda progress ${handle.renderId}`)); @@ -187,6 +208,7 @@ async function waitForCompletion( stack: { region: string; functionName: string; lambdaMemoryMb: number }, intervalMs: number, json: boolean, + maxParallelChunks: number | undefined, ): Promise { // Lazy import to avoid pulling SFN client when only `render --no-wait` is used. const { getRenderProgress } = await loadSDK(); @@ -214,6 +236,7 @@ async function waitForCompletion( console.log(` ${c.dim("Output:")} ${progress.outputFile.s3Uri}`); console.log(` ${c.dim("Size:")} ${progress.outputFile.bytes ?? "?"} bytes`); console.log(` ${c.dim("Total cost:")} ${progress.costs.displayCost}`); + warnIfChunkRuntimeIsCloseToCap(progress, maxParallelChunks); } else { console.log(); console.log(c.error(`Render ended with status ${progress.status}.`)); @@ -228,6 +251,43 @@ async function waitForCompletion( } } +/** Warn if the slowest chunk approached the 15-min Lambda cap; suggest a higher fan-out. */ +function warnIfChunkRuntimeIsCloseToCap( + progress: { maxChunkDurationMs: number | null }, + currentMaxParallelChunks: number | undefined, +): void { + const max = progress.maxChunkDurationMs; + if (max === null || max < CHUNK_RUNTIME_WARN_MS) return; + const slowestSec = Math.round(max / 1000); + const capSec = LAMBDA_TIMEOUT_MS / 1000; + const suggested = suggestFanOut(currentMaxParallelChunks ?? DEFAULT_MAX_PARALLEL_CHUNKS, max); + console.log(); + console.log( + c.warn( + `Heads up: slowest chunk ran ${slowestSec}s of the ${capSec}s Lambda cap. ` + + `Adding fps, duration, or complexity to this composition will likely trip ` + + `Sandbox.Timedout on the next render.\n` + + ` Mitigate with: --max-parallel-chunks ${suggested} (shrinks per-chunk work).`, + ), + ); +} + +/** + * Pick a fan-out that brings the projected chunk runtime under + * {@link CHUNK_RUNTIME_WARN_MS}. Doubles `current` until the projected + * per-chunk duration (slowest / multiplier) clears the threshold, rounded + * to the next power of two and capped at `MAX_PARALLEL_CHUNKS_CEILING`. + */ +function suggestFanOut(current: number, slowestMs: number): number { + const targetMultiplier = Math.ceil(slowestMs / CHUNK_RUNTIME_WARN_MS); + const target = current * targetMultiplier; + const nextPow2 = 2 ** Math.ceil(Math.log2(target)); + return Math.min(nextPow2, MAX_PARALLEL_CHUNKS_CEILING); +} + +const DEFAULT_MAX_PARALLEL_CHUNKS = 16; +const MAX_PARALLEL_CHUNKS_CEILING = 256; + function sleep(ms: number): Promise { return new Promise((res) => setTimeout(res, ms)); }