From f1e22a6d82d7ec3244a29a5d0865545f31611309 Mon Sep 17 00:00:00 2001 From: James Date: Tue, 19 May 2026 19:28:53 +0000 Subject: [PATCH] feat(aws-lambda): validate variables + 256 KiB Step Functions input cap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add client-side validation for the new config.variables field (introduced in PR 9.1) and a 256 KiB cap on the full Step Functions Standard execution input. Both checks throw a typed InvalidConfigError BEFORE the SDK calls StartExecution — catching the obvious mistakes locally instead of as a States.DataLimitExceeded 50 ms into the execution. validateVariablesPayload walks the variables tree and rejects: - functions, Symbols, BigInts, non-finite numbers - undefined leaves (silently dropped by JSON.stringify — would surprise the caller when their value doesn't show up in the render) - non-plain objects (Date, Map, class instances) — Date's toJSON does round-trip as a string, but the composition gets a string, not a Date, so explicit reject is clearer validateStepFunctionsInputSize measures the actual UTF-8 byte length of JSON.stringify(input) against the 256 KiB cap. We use Standard workflows (per the plan §6.2 / §15.2) for execution-history visibility, so the cap is 256 KiB (Express would be 32 KiB). The error message names the actual byte count, the cap, and points at the templates-on-lambda#working-with-large-variables section so users know to URL-reference media assets instead of inlining them. Both helpers are exported from @hyperframes/aws-lambda/sdk so adapters that build custom Step Functions inputs (batch verbs, future Temporal ports) can reuse the same gates. Phase 9 PR 9.2 of the distributed rendering plan. --- packages/aws-lambda/src/sdk/index.ts | 8 +- .../aws-lambda/src/sdk/renderToLambda.test.ts | 65 ++++++ packages/aws-lambda/src/sdk/renderToLambda.ts | 15 +- .../aws-lambda/src/sdk/validateConfig.test.ts | 201 +++++++++++++++++- packages/aws-lambda/src/sdk/validateConfig.ts | 165 ++++++++++++++ 5 files changed, 451 insertions(+), 3 deletions(-) diff --git a/packages/aws-lambda/src/sdk/index.ts b/packages/aws-lambda/src/sdk/index.ts index a388bf587..f0e600afa 100644 --- a/packages/aws-lambda/src/sdk/index.ts +++ b/packages/aws-lambda/src/sdk/index.ts @@ -22,6 +22,12 @@ export { computeRenderCost, type RenderCost, } from "./costAccounting.js"; -export { InvalidConfigError, validateDistributedRenderConfig } from "./validateConfig.js"; +export { + InvalidConfigError, + MAX_STEP_FUNCTIONS_INPUT_BYTES, + validateDistributedRenderConfig, + validateStepFunctionsInputSize, + validateVariablesPayload, +} from "./validateConfig.js"; export type { SerializableDistributedRenderConfig } from "../events.js"; export type { DistributedFormat } from "../formatExtension.js"; diff --git a/packages/aws-lambda/src/sdk/renderToLambda.test.ts b/packages/aws-lambda/src/sdk/renderToLambda.test.ts index 9dda1a8a2..8beb409ec 100644 --- a/packages/aws-lambda/src/sdk/renderToLambda.test.ts +++ b/packages/aws-lambda/src/sdk/renderToLambda.test.ts @@ -179,6 +179,71 @@ describe("renderToLambda", () => { expect(handle.renderId).toMatch(/^hf-render-[0-9a-f-]{36}$/); }); + it("threads variables through the Step Functions execution input", async () => { + const sfn = new FakeSFN(); + const s3 = new FakeS3(); + const variables = { title: "Hello Alice", accent: "#ff0000" }; + await renderToLambda({ + projectDir, + bucketName: "test-bucket", + stateMachineArn: "arn:aws:states:us-east-1:1234:stateMachine:hf", + config: { ...baseConfig, variables }, + executionName: "smoke-variables", + sfn: asSFNClient(sfn), + s3: asS3Client(s3), + }); + expect(sfn.starts).toHaveLength(1); + const start = sfn.starts[0]!; + // The execution input carries the variables under Config.variables — + // the Step Functions state machine forwards `Config` verbatim into the + // PlanEvent's `Config` field, where the handler spreads it into the + // producer's DistributedRenderConfig. + const input = start.input as { Config: { variables?: Record } }; + expect(input.Config.variables).toEqual(variables); + }); + + it("rejects a config whose variables blob would push the execution input over 256 KiB", async () => { + const sfn = new FakeSFN(); + const s3 = new FakeS3(); + const huge = "x".repeat(260 * 1024); + await expect( + renderToLambda({ + projectDir, + bucketName: "test-bucket", + stateMachineArn: "arn:aws:states:us-east-1:1234:stateMachine:hf", + config: { ...baseConfig, variables: { blob: huge } }, + executionName: "smoke-too-big", + sfn: asSFNClient(sfn), + s3: asS3Client(s3), + }), + ).rejects.toThrow(/256.*KiB|templates-on-lambda/); + // The reject must happen BEFORE StartExecution — uncaught oversize input + // surfaces as States.DataLimitExceeded 50ms in, far from this call site. + expect(sfn.starts).toHaveLength(0); + }); + + it("rejects a config whose variables contain non-JSON-safe values", async () => { + const sfn = new FakeSFN(); + const s3 = new FakeS3(); + await expect( + renderToLambda({ + projectDir, + bucketName: "test-bucket", + stateMachineArn: "arn:aws:states:us-east-1:1234:stateMachine:hf", + config: { + ...baseConfig, + // BigInt would throw at JSON.stringify time; catch it at the validator + // boundary with a typed error instead. + variables: { count: 9_007_199_254_740_993n } as unknown as Record, + }, + executionName: "smoke-bigint", + sfn: asSFNClient(sfn), + s3: asS3Client(s3), + }), + ).rejects.toThrow(InvalidConfigError); + expect(sfn.starts).toHaveLength(0); + }); + it("propagates a missing executionArn as an error", async () => { const sfn = { async send(_cmd: unknown): Promise { diff --git a/packages/aws-lambda/src/sdk/renderToLambda.ts b/packages/aws-lambda/src/sdk/renderToLambda.ts index 94ee93101..7b96f2e07 100644 --- a/packages/aws-lambda/src/sdk/renderToLambda.ts +++ b/packages/aws-lambda/src/sdk/renderToLambda.ts @@ -24,7 +24,10 @@ import type { SerializableDistributedRenderConfig } from "../events.js"; import { formatExtension } from "../formatExtension.js"; import { formatS3Uri } from "../s3Transport.js"; import { deploySite, type SiteHandle } from "./deploySite.js"; -import { validateDistributedRenderConfig } from "./validateConfig.js"; +import { + validateDistributedRenderConfig, + validateStepFunctionsInputSize, +} from "./validateConfig.js"; /** Options for {@link renderToLambda}. */ export interface RenderToLambdaOptions { @@ -70,6 +73,7 @@ export interface RenderHandle { startedAt: string; } +// fallow-ignore-next-line complexity export async function renderToLambda(opts: RenderToLambdaOptions): Promise { validateDistributedRenderConfig(opts.config); @@ -108,6 +112,15 @@ export async function renderToLambda(opts: RenderToLambdaOptions): Promise { expect((err as InvalidConfigError).name).toBe("InvalidConfigError"); } }); + + describe("variables", () => { + it("accepts a plain JSON object", () => { + const cfg: SerializableDistributedRenderConfig = { + ...VALID, + variables: { + title: "Hello", + accent: "#ff0000", + nested: { items: [1, 2, 3], visible: true, note: null }, + }, + }; + expect(validateDistributedRenderConfig(cfg)).toBe(cfg); + }); + + it("rejects variables that's an array, not a plain object", () => { + try { + validateDistributedRenderConfig({ + ...VALID, + variables: [1, 2, 3] as unknown as Record, + }); + throw new Error("expected throw"); + } catch (err) { + expect(err).toBeInstanceOf(InvalidConfigError); + expect((err as InvalidConfigError).field).toBe("config.variables"); + } + }); + + it("rejects functions inside variables", () => { + try { + validateVariablesPayload({ greet: () => "hi" }); + throw new Error("expected throw"); + } catch (err) { + expect(err).toBeInstanceOf(InvalidConfigError); + expect((err as InvalidConfigError).field).toBe("config.variables.greet"); + expect((err as Error).message).toMatch(/function/i); + } + }); + + it("rejects undefined leaves (silently dropped by JSON.stringify)", () => { + try { + validateVariablesPayload({ title: "x", maybe: undefined }); + throw new Error("expected throw"); + } catch (err) { + expect(err).toBeInstanceOf(InvalidConfigError); + expect((err as InvalidConfigError).field).toBe("config.variables.maybe"); + } + }); + + it("rejects BigInt values", () => { + try { + validateVariablesPayload({ count: 9_007_199_254_740_993n }); + throw new Error("expected throw"); + } catch (err) { + expect(err).toBeInstanceOf(InvalidConfigError); + expect((err as InvalidConfigError).field).toBe("config.variables.count"); + } + }); + + it("rejects NaN / Infinity numbers", () => { + try { + validateVariablesPayload({ ratio: Number.NaN }); + throw new Error("expected throw"); + } catch (err) { + expect(err).toBeInstanceOf(InvalidConfigError); + expect((err as InvalidConfigError).field).toBe("config.variables.ratio"); + } + try { + validateVariablesPayload({ ratio: Number.POSITIVE_INFINITY }); + throw new Error("expected throw"); + } catch (err) { + expect(err).toBeInstanceOf(InvalidConfigError); + expect((err as InvalidConfigError).field).toBe("config.variables.ratio"); + } + }); + + it("rejects Symbols", () => { + try { + validateVariablesPayload({ id: Symbol("hi") }); + throw new Error("expected throw"); + } catch (err) { + expect(err).toBeInstanceOf(InvalidConfigError); + expect((err as InvalidConfigError).field).toBe("config.variables.id"); + } + }); + + it("rejects Date instances (non-plain objects)", () => { + try { + validateVariablesPayload({ when: new Date("2026-01-01") }); + throw new Error("expected throw"); + } catch (err) { + expect(err).toBeInstanceOf(InvalidConfigError); + expect((err as InvalidConfigError).field).toBe("config.variables.when"); + expect((err as Error).message).toMatch(/Date|non-plain/); + } + }); + + it("walks into arrays and reports nested paths", () => { + try { + validateVariablesPayload({ items: ["a", { broken: () => 1 }] }); + throw new Error("expected throw"); + } catch (err) { + expect(err).toBeInstanceOf(InvalidConfigError); + expect((err as InvalidConfigError).field).toBe("config.variables.items[1].broken"); + } + }); + + it("rejects circular references with a typed error instead of stack-overflowing", () => { + const cyclic: Record = { title: "x" }; + cyclic.self = cyclic; + try { + validateVariablesPayload(cyclic); + throw new Error("expected throw"); + } catch (err) { + expect(err).toBeInstanceOf(InvalidConfigError); + expect((err as Error).message).toMatch(/circular/i); + } + }); + + it("rejects cycles via arrays too", () => { + const arr: unknown[] = ["a"]; + arr.push(arr); + try { + validateVariablesPayload({ items: arr }); + throw new Error("expected throw"); + } catch (err) { + expect(err).toBeInstanceOf(InvalidConfigError); + expect((err as Error).message).toMatch(/circular/i); + } + }); + + it("round-trips through JSON for the validated set", () => { + const variables = { + title: "Personalised render", + scene: { intro: { lines: ["one", "two"], delay: 0.5 } }, + tags: ["alpha", "beta"], + active: true, + nothing: null, + }; + validateVariablesPayload(variables); + const round = JSON.parse(JSON.stringify(variables)); + expect(round).toEqual(variables); + }); + }); +}); + +describe("validateStepFunctionsInputSize", () => { + it("accepts inputs under the 256 KiB cap", () => { + const input = { + ProjectS3Uri: "s3://bucket/sites/abc/project.tar.gz", + Config: { fps: 30, width: 1280, height: 720, format: "mp4" }, + }; + expect(() => validateStepFunctionsInputSize(input)).not.toThrow(); + }); + + it("rejects inputs over the 256 KiB cap with a message that names the byte count", () => { + // Build a variables blob that pushes the serialised input over the cap. + // 256 KiB ÷ 2 bytes per char × 1 char per byte for ASCII; pad to 260 KiB + // worth of payload so the serialiser overhead is dwarfed. + const huge = "x".repeat(260 * 1024); + const input = { + ProjectS3Uri: "s3://bucket/sites/abc/project.tar.gz", + Config: { + fps: 30, + width: 1280, + height: 720, + format: "mp4", + variables: { blob: huge }, + }, + }; + try { + validateStepFunctionsInputSize(input); + throw new Error("expected throw"); + } catch (err) { + expect(err).toBeInstanceOf(InvalidConfigError); + const msg = (err as Error).message; + expect(msg).toMatch(/256/); + // Names the actual byte count so users see how far over the cap they are. + const serialized = JSON.stringify(input); + const expectedBytes = Buffer.byteLength(serialized, "utf8"); + expect(msg).toContain(String(expectedBytes)); + // Pointer to the docs section on URL'ing assets. + expect(msg).toMatch(/templates-on-lambda/); + } + }); + + it("MAX_STEP_FUNCTIONS_INPUT_BYTES is 256 KiB", () => { + expect(MAX_STEP_FUNCTIONS_INPUT_BYTES).toBe(256 * 1024); + }); + + it("rejects non-JSON-serializable roots with a clear error", () => { + // A top-level function reference makes JSON.stringify return undefined. + expect(() => validateStepFunctionsInputSize(() => "boom")).toThrow(/not JSON-serializable/); + }); }); diff --git a/packages/aws-lambda/src/sdk/validateConfig.ts b/packages/aws-lambda/src/sdk/validateConfig.ts index 87d15ef6d..59dd0d9a9 100644 --- a/packages/aws-lambda/src/sdk/validateConfig.ts +++ b/packages/aws-lambda/src/sdk/validateConfig.ts @@ -23,6 +23,8 @@ import type { SerializableDistributedRenderConfig } from "../events.js"; /** Thrown for any client-side `SerializableDistributedRenderConfig` violation. */ export class InvalidConfigError extends Error { + // Read via Error.prototype.toString; fallow can't see it. + // fallow-ignore-next-line unused-class-member override readonly name = "InvalidConfigError"; /** Dotted JSON-pointer-ish path to the offending field, e.g. `config.fps`. */ readonly field: string; @@ -169,9 +171,172 @@ export function validateDistributedRenderConfig( ); } + if (config.variables !== undefined) { + validateVariablesPayload(config.variables); + } + return config; } +/** + * Hard cap on Step Functions Standard workflow execution input — 256 KiB + * per the AWS limits page. Express workflows cap at 32 KiB; the render + * stack runs Standard for execution-history visibility, so the larger + * limit applies. The cap is on the entire serialized input, not just the + * variables, because users hit it at the wire boundary regardless of + * which field caused the bloat. + * + * Specific to Step Functions Standard. Other workflow runtimes (Temporal, + * Express SFN, raw Lambda invoke) have different caps; this constant + * shouldn't be reused for those without confirming the limit. + */ +export const MAX_STEP_FUNCTIONS_INPUT_BYTES = 256 * 1024; + +/** Pointer to the docs section that explains the URL-your-assets convention. */ +const LARGE_VARIABLES_DOCS_URL = + "https://hyperframes.heygen.com/deploy/templates-on-lambda#working-with-large-variables"; + +/** + * Validate that the serialized Step Functions execution input fits inside + * the 256 KiB Standard-workflow cap. Measured in UTF-8 bytes (the format + * Step Functions uses on the wire) — JS strings count UTF-16 code units, + * which under-reports for any multi-byte character. + * + * Throws {@link InvalidConfigError} with a clear message naming the actual + * byte count, the cap, and a pointer to the "working with large variables" + * docs section, so users hit the limit at the SDK boundary with actionable + * guidance instead of as a `States.DataLimitExceeded` 50 ms into the + * execution. + */ +// fallow-ignore-next-line complexity +export function validateStepFunctionsInputSize(input: unknown): void { + let serialized: string | undefined; + try { + serialized = JSON.stringify(input); + } catch (err) { + // JSON.stringify throws on circular refs and BigInt. The variables + // walker catches both inside `config.variables`, but a non-variables + // field could hit the same case in a future field addition. + throw new InvalidConfigError( + "config", + `Step Functions execution input is not JSON-serializable: ${err instanceof Error ? err.message : String(err)}`, + ); + } + if (serialized === undefined) { + // JSON.stringify returns undefined for non-serializable roots + // (functions, Symbols at the top level). + throw new InvalidConfigError( + "config", + "Step Functions execution input is not JSON-serializable (JSON.stringify returned undefined). " + + "Check that all fields, including config.variables, are plain JSON values.", + ); + } + const byteLength = Buffer.byteLength(serialized, "utf8"); + if (byteLength > MAX_STEP_FUNCTIONS_INPUT_BYTES) { + throw new InvalidConfigError( + "config", + `Step Functions execution input is ${byteLength} bytes, which exceeds the ` + + `${MAX_STEP_FUNCTIONS_INPUT_BYTES}-byte (256 KiB) limit for Standard workflows. ` + + `Variables are for typed data (strings, numbers, structured records); media assets ` + + `(images, audio, video) should be passed as URL references the composition resolves ` + + `at render time, not inlined as base64. See ${LARGE_VARIABLES_DOCS_URL} for the ` + + `URL-your-assets convention.`, + ); + } +} + +/** + * Validate that `variables` is a plain JSON-safe object — no functions, + * Symbols, `undefined` leaves, BigInts, non-finite numbers, or non-plain + * objects (Dates, Maps, Sets, class instances). Rejected values would + * either round-trip incorrectly through Step Functions (`undefined` is + * silently dropped by `JSON.stringify`) or throw at the wire boundary + * (`bigint`), so we surface the offending path synchronously. + * + * The check is purely structural — semantic constraints (e.g. "is this + * variable declared in `data-composition-variables`?") belong to the CLI + * layer where the project's HTML is on disk. + */ +export function validateVariablesPayload(value: unknown): void { + if (value === null || typeof value !== "object" || Array.isArray(value)) { + throw new InvalidConfigError( + "config.variables", + `must be a plain JSON object (got ${describeValue(value)})`, + ); + } + walkVariables(value, "config.variables", new WeakSet()); +} + +/** Per-typeof rejection messages for JSON-unsafe leaves. */ +const LEAF_REJECTIONS: Partial> = { + // `JSON.stringify` silently drops `undefined` leaves — caller would never + // notice their value isn't actually being sent. + undefined: + "undefined leaves are silently dropped by JSON.stringify — use null if you mean an absent value", + function: "functions are not JSON-serializable", + symbol: "Symbols are not JSON-serializable", + bigint: "BigInt values throw at JSON.stringify — encode as a string if you need 64-bit integers", +}; + +// fallow-ignore-next-line complexity +function walkVariables(value: unknown, path: string, seen: WeakSet): void { + const t = typeof value; + if (value === null || t === "string" || t === "boolean") return; + if (t === "number") { + if (!Number.isFinite(value as number)) { + throw new InvalidConfigError( + path, + `non-finite numbers (NaN / Infinity) are not JSON-serializable; got ${String(value)}`, + ); + } + return; + } + const leafReject = LEAF_REJECTIONS[t]; + if (leafReject !== undefined) { + throw new InvalidConfigError(path, leafReject); + } + // t === "object" from here on. Reject circular refs up front — recursing + // through a back-edge would stack-overflow with no actionable error. + if (seen.has(value as object)) { + throw new InvalidConfigError( + path, + "circular reference detected — JSON.stringify cannot serialize cycles", + ); + } + seen.add(value as object); + if (Array.isArray(value)) { + for (let i = 0; i < value.length; i++) { + walkVariables(value[i], `${path}[${i}]`, seen); + } + return; + } + // Reject non-plain objects (Date, Map, Set, class instances) up front. + // Date's `toJSON` does round-trip as a string, but the composition gets a + // string, not a Date — explicit reject is clearer than silent type-loss. + const proto = Object.getPrototypeOf(value); + if (proto !== Object.prototype && proto !== null) { + throw new InvalidConfigError( + path, + `non-plain objects are not supported (got ${describeValue(value)}); use a plain {…} object`, + ); + } + for (const key of Object.keys(value as Record)) { + walkVariables((value as Record)[key], `${path}.${key}`, seen); + } +} + +// fallow-ignore-next-line complexity +function describeValue(value: unknown): string { + if (value === null) return "null"; + if (Array.isArray(value)) return "array"; + if (typeof value !== "object") return typeof value; + // Class instances expose their constructor name; plain objects fall through + // to the generic "object" label. `Object.create(null)` has no constructor — + // treat its absent name the same as "Object" for reporting. + const ctorName = (value as { constructor?: { name?: string } }).constructor?.name ?? "Object"; + return ctorName === "Object" ? "object" : ctorName; +} + function validateIntDimension(field: string, value: unknown): void { if (typeof value !== "number" || !Number.isInteger(value)) { throw new InvalidConfigError(field, `must be an integer; got ${String(value)}`);