Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions packages/aws-lambda/src/sdk/chunkRuntime.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/**
* Per-chunk runtime constants + warning threshold for `RenderChunk` Lambda
* invocations.
*
* Lambda's hard per-invocation cap is 900 seconds; a chunk that runs to
* 720s+ is one bad cold-start away from `Sandbox.Timedout` on a slightly
* heavier next render (more fps, more duration, more compositing). The
* cost-analysis sweep that produced this module hit the cap twice — both
* with default 16-way fan-out on heavy WebGL — and the
* `getRenderProgress` SDK now surfaces the slowest observed chunk so
* callers can warn ahead of that failure.
*/

/** Lambda's hard per-invocation cap. */
export const LAMBDA_TIMEOUT_MS = 900_000;

/** Fraction of {@link LAMBDA_TIMEOUT_MS} above which we surface the warning. */
export const CHUNK_RUNTIME_WARN_RATIO = 0.8;

/** Pre-computed `LAMBDA_TIMEOUT_MS × CHUNK_RUNTIME_WARN_RATIO`. */
export const CHUNK_RUNTIME_WARN_MS = LAMBDA_TIMEOUT_MS * CHUNK_RUNTIME_WARN_RATIO;
79 changes: 79 additions & 0 deletions packages/aws-lambda/src/sdk/getRenderProgress.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,85 @@ describe("getRenderProgress", () => {
]);
});

it("tracks the slowest RenderChunk billed duration for the CLI warning", async () => {
const sfn = new FakeSFN();
const renderChunk = (ms: number) => [
stateEntered("RenderChunk"),
taskScheduled(),
taskSucceeded({ Action: "renderChunk", FramesEncoded: 10, DurationMs: ms }),
];
sfn.historyPages = [
[
stateEntered("Plan"),
taskScheduled(),
taskSucceeded({ Action: "plan", TotalFrames: 30, DurationMs: 1_000 }),
...renderChunk(100_000),
...renderChunk(820_000), // 91% of the 900s cap
...renderChunk(50_000),
stateEntered("Assemble"),
taskScheduled(),
taskSucceeded({
Action: "assemble",
FileSize: 1_000,
OutputS3Uri: "s3://b/k.mp4",
DurationMs: 5_000,
}),
],
];
sfn.describe.status = "SUCCEEDED";
const progress = await getRenderProgress({
executionArn: "arn",
sfn: sfn as unknown as SFNClient,
});
expect(progress.maxChunkDurationMs).toBe(820_000);
});

it("ignores Plan/Assemble billed durations in maxChunkDurationMs", async () => {
const sfn = new FakeSFN();
sfn.historyPages = [
[
stateEntered("Plan"),
taskScheduled(),
// 60s plan — would be the max if Plan counted, but it shouldn't.
taskSucceeded({ Action: "plan", TotalFrames: 30, DurationMs: 60_000 }),
stateEntered("RenderChunk"),
taskScheduled(),
taskSucceeded({ Action: "renderChunk", FramesEncoded: 10, DurationMs: 12_000 }),
stateEntered("Assemble"),
taskScheduled(),
// 90s assemble — also would be the max if Assemble counted.
taskSucceeded({
Action: "assemble",
FileSize: 1_000,
OutputS3Uri: "s3://b/k.mp4",
DurationMs: 90_000,
}),
],
];
sfn.describe.status = "SUCCEEDED";
const progress = await getRenderProgress({
executionArn: "arn",
sfn: sfn as unknown as SFNClient,
});
expect(progress.maxChunkDurationMs).toBe(12_000);
});

it("maxChunkDurationMs is null before any RenderChunk completes", async () => {
const sfn = new FakeSFN();
sfn.historyPages = [
[
stateEntered("Plan"),
taskScheduled(),
taskSucceeded({ Action: "plan", TotalFrames: 30, DurationMs: 1_000 }),
],
];
const progress = await getRenderProgress({
executionArn: "arn",
sfn: sfn as unknown as SFNClient,
});
expect(progress.maxChunkDurationMs).toBeNull();
});

it("billed-seconds sum matches what the cost-analysis script computes", async () => {
// Real-shape regression: cost script summed `DurationMs` across plan
// + renderChunk + assemble TaskSucceeded events and pinned the
Expand Down
45 changes: 45 additions & 0 deletions packages/aws-lambda/src/sdk/getRenderProgress.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,17 @@ export interface RenderProgress {
*/
lambdasInvoked: number;
costs: RenderCost;
/**
* Per-chunk runtime headroom snapshot. `maxChunkDurationMs` is the
* billed duration of the slowest `RenderChunk` Lambda invocation so
* far; null until the first chunk reports back. The CLI warns when
* this exceeds {@link CHUNK_RUNTIME_WARN_RATIO} of Lambda's 900-second
* hard cap so the user knows to bump `--max-parallel-chunks` before
* the next render hits a `Sandbox.Timedout` retry storm. Defaults
* tuned from the cost-analysis sweep where inspector-launch at
* 1080p/60fps with mpc=16 blew past the cap.
*/
maxChunkDurationMs: number | null;
/** Final output object if Assemble succeeded; `null` otherwise. */
outputFile: { s3Uri: string; bytes: number | null } | null;
errors: RenderError[];
Expand Down Expand Up @@ -125,6 +136,7 @@ export async function getRenderProgress(opts: GetRenderProgressOptions): Promise
totalFrames: summary.totalFrames,
lambdasInvoked: summary.lambdasInvoked,
costs,
maxChunkDurationMs: summary.maxChunkDurationMs,
outputFile: summary.outputFile,
errors: summary.errors,
fatalErrorEncountered: isTerminalFailure(status),
Expand Down Expand Up @@ -159,6 +171,8 @@ interface HistorySummary {
totalFrames: number | null;
lambdasInvoked: number;
assembleComplete: boolean;
/** Slowest billed-duration across `RenderChunk` invocations; null if none observed. */
maxChunkDurationMs: number | null;
outputFile: { s3Uri: string; bytes: number | null } | null;
errors: RenderError[];
}
Expand All @@ -177,6 +191,7 @@ function summarizeHistory(events: HistoryEvent[], memoryMb: number): HistorySumm
let assembleComplete = false;
let outputFile: HistorySummary["outputFile"] = null;
let stateTransitions = 0;
let maxChunkDurationMs: number | null = null;
const errors: RenderError[] = [];
const lambdaInvocations: BilledLambdaInvocation[] = [];

Expand Down Expand Up @@ -236,6 +251,11 @@ function summarizeHistory(events: HistoryEvent[], memoryMb: number): HistorySumm
applyPayloadFrameCounts(payload, currentLambdaState, (delta) => {
framesRendered += delta;
});
maxChunkDurationMs = bumpMaxChunkDuration(
maxChunkDurationMs,
currentLambdaState,
billedDurationMs,
);
if (payload && typeof payload === "object") {
const obj = payload as Record<string, unknown>;
if (typeof obj.TotalFrames === "number") totalFrames = obj.TotalFrames;
Expand All @@ -250,6 +270,11 @@ function summarizeHistory(events: HistoryEvent[], memoryMb: number): HistorySumm
memorySizeMb: memoryMb,
estimated: billedDurationMs === 0,
});
maxChunkDurationMs = bumpMaxChunkDuration(
maxChunkDurationMs,
currentLambdaState,
billedDurationMs,
);
if (payload && typeof payload === "object") {
const obj = payload as Record<string, unknown>;
if (typeof obj.TotalFrames === "number") totalFrames = obj.TotalFrames;
Expand Down Expand Up @@ -334,6 +359,7 @@ function summarizeHistory(events: HistoryEvent[], memoryMb: number): HistorySumm
totalFrames,
lambdasInvoked,
assembleComplete,
maxChunkDurationMs,
outputFile,
errors,
};
Expand Down Expand Up @@ -369,6 +395,25 @@ function unwrapLambdaPayload(payload: unknown): unknown {
return payload;
}

/**
* Bump the running `maxChunkDurationMs` if this Lambda invocation
* belongs to the `RenderChunk` state and reported a non-zero billed
* duration. Shared between the `TaskSucceeded` and
* `LambdaFunctionSucceeded` branches so both integrations contribute
* to the same headroom counter — and so the duplication-gate stays
* happy. Returns the new max (or the input unchanged if neither
* condition is met).
*/
function bumpMaxChunkDuration(
current: number | null,
currentLambdaState: string | null,
billedDurationMs: number,
): number | null {
if (currentLambdaState !== "RenderChunk") return current;
if (billedDurationMs <= 0) return current;
return Math.max(current ?? 0, billedDurationMs);
}

/**
* Apply `FramesEncoded` from a Lambda success payload to the running
* counter, but only when the enclosing state is the chunk-render step.
Expand Down
5 changes: 5 additions & 0 deletions packages/aws-lambda/src/sdk/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ export {
computeRenderCost,
type RenderCost,
} from "./costAccounting.js";
export {
CHUNK_RUNTIME_WARN_MS,
CHUNK_RUNTIME_WARN_RATIO,
LAMBDA_TIMEOUT_MS,
} from "./chunkRuntime.js";
export {
InvalidConfigError,
MAX_STEP_FUNCTIONS_INPUT_BYTES,
Expand Down
35 changes: 35 additions & 0 deletions packages/cli/src/commands/lambda/render.ts
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ async function waitForCompletion(
console.log(` ${c.dim("Output:")} ${progress.outputFile.s3Uri}`);
console.log(` ${c.dim("Size:")} ${progress.outputFile.bytes ?? "?"} bytes`);
console.log(` ${c.dim("Total cost:")} ${progress.costs.displayCost}`);
await warnIfChunkRuntimeIsCloseToCap(progress);
} else {
console.log();
console.log(c.error(`Render ended with status ${progress.status}.`));
Expand All @@ -237,6 +238,40 @@ async function waitForCompletion(
}
}

/**
* Surface a runtime-headroom warning when the slowest chunk burned through
* more than 80% of Lambda's 900-second cap. A future render that adds
* fps, duration, or composition complexity to the same project will
* push past the cap and hit `Sandbox.Timedout` — that's the exact
* symptom that wedged the cost-analysis sweep (see the
* sparticuz-executable-guard fix). Print the actual seconds + the
* suggested fan-out so the user can act on it before the next render.
*/
// fallow-ignore-next-line complexity
async function warnIfChunkRuntimeIsCloseToCap(progress: {
maxChunkDurationMs: number | null;
lambdasInvoked: number;
}): Promise<void> {
const { CHUNK_RUNTIME_WARN_MS, LAMBDA_TIMEOUT_MS } = await loadSDK();
const max = progress.maxChunkDurationMs;
if (max === null || max < CHUNK_RUNTIME_WARN_MS) return;
const slowestSec = Math.round(max / 1000);
const capSec = LAMBDA_TIMEOUT_MS / 1000;
// Roughly halve the chunk runtime by doubling the fan-out; round up to
// the nearest power-of-2 that's a comfortable suggestion (32, 64, 128).
const headroomRatio = max / CHUNK_RUNTIME_WARN_MS;
const suggestedFanOut = headroomRatio > 1.5 ? 128 : headroomRatio > 1.2 ? 64 : 32;
console.log();
console.log(
c.warn(
`Heads up: slowest chunk ran ${slowestSec}s of the ${capSec}s Lambda cap. ` +
`Adding fps, duration, or complexity to this composition will likely trip ` +
`Sandbox.Timedout on the next render.\n` +
` Mitigate with: --max-parallel-chunks ${suggestedFanOut} (shrinks per-chunk work).`,
),
);
}

function sleep(ms: number): Promise<void> {
return new Promise((res) => setTimeout(res, ms));
}
Loading