From 250ff57b3f4b9f9aa2039d833433678b960d71bd Mon Sep 17 00:00:00 2001 From: Nathan Rajlich Date: Fri, 22 May 2026 11:02:42 -0700 Subject: [PATCH 1/3] [ci] Attribute backport changelog entries to original PR author (#2091) * [ci] Attribute backport changelog entries to original PR author `@changesets/changelog-github` resolves a changeset commit to its associated PR via the GitHub GraphQL `associatedPullRequests` field. For commits landed on `stable` via our backport workflow, that resolves to the backport PR authored by `github-actions[bot]` (since the backport workflow uses `createCommitOnBranch` to produce signed commits), so the generated changelog ends up with "Thanks @github-actions!" instead of the original contributor. This adds a small `.changeset/changelog.mjs` wrapper around `@changesets/changelog-github` that detects backport PRs by matching the title (`Backport #N: ...`) or body (`Automated backport of #N to ` + '`stable`' + `...`) produced by `.github/workflows/backport.yml`, resolves the original PR number, and injects `pr:`/`commit:` directives into the changeset summary before delegating to the upstream generator. The result is that the rendered changelog entry attributes the change to the original PR and author, while the commit link still points at the backport commit on the release branch. * Address review feedback - Use `Bearer` instead of `Token` for the GitHub GraphQL auth header for consistency with the rest of the repo (Copilot review on PR #2091). - Add a defensive `formatError` helper so `console.warn` in the catch blocks doesn't itself throw when a non-Error value is thrown (Copilot review on PR #2091). --- .changeset/changelog.mjs | 271 +++++++++++++++++++++++++++++++++++++++ .changeset/config.json | 2 +- 2 files changed, 272 insertions(+), 1 deletion(-) create mode 100644 .changeset/changelog.mjs diff --git a/.changeset/changelog.mjs b/.changeset/changelog.mjs new file mode 100644 index 0000000000..de884a8724 --- /dev/null +++ b/.changeset/changelog.mjs @@ -0,0 +1,271 @@ +// Custom changelog generator that wraps `@changesets/changelog-github`. +// +// Why this exists +// --------------- +// Our backport workflow (.github/workflows/backport.yml) replays commits +// from `main` onto `stable` using GitHub's GraphQL `createCommitOnBranch` +// mutation so the resulting commits are signed by GitHub's internal key. +// A consequence is that the backport commit, and the PR that wraps it, +// are both attributed to `github-actions[bot]` — not the original author. +// +// `@changesets/changelog-github` resolves the author/PR of a changeset +// commit via the GraphQL `associatedPullRequests` field. For commits that +// landed on `stable` via the backport flow, that resolves to the backport +// PR (e.g. "Backport #2046: ...") authored by `github-actions[bot]`, and +// the rendered changelog ends up with "Thanks @github-actions!". +// +// What this wrapper does +// ---------------------- +// 1. For each changeset commit, look up its associated PR title and body. +// 2. Detect backport PRs by matching the title (`Backport #N: ...`) or +// body (`Automated backport of #N to \`stable\``) — both formats are +// produced by .github/workflows/backport.yml. +// 3. When a backport PR is detected, inject `pr: ` and +// `commit: ` lines into the changeset summary before +// handing off to the upstream changelog generator. Those lines are a +// documented `@changesets/changelog-github` feature +// (https://github.com/changesets/changesets/blob/main/packages/changelog-github/README.md#usage) +// and trigger a PR-number-based lookup, which attributes the entry to +// the original PR's author while keeping the commit link pointing at +// the backport commit on the release branch. +// 4. Falls back to plain delegation when no backport is detected, when +// the commit has no associated PR, or when any lookup fails — we never +// want a flaky network call to break `pnpm changeset version`. +// +// Note: this file is loaded by `@changesets/apply-release-plan` via a +// synchronous `require()`. Node 22.12+ supports `require()`'ing ESM +// modules with static imports, which we rely on (the release workflow +// uses Node 24). If you add a dynamic import or a top-level await here, +// the loader will throw. + +import changelogGithub from '@changesets/changelog-github'; + +const upstream = changelogGithub.default ?? changelogGithub; + +// Match the PR titles and bodies produced by .github/workflows/backport.yml. +// Title examples: +// "Backport #2046: Report corrupted event logs distinctly" +// "Backport a1b2c3d: " (when no source PR was associated) +// Body examples: +// "Automated backport of #2046 to `stable` ..." +// "Automated backport of a1b2c3def456 to `stable` ..." +const BACKPORT_TITLE_PR_RE = /^Backport\s+#(\d+):/i; +const BACKPORT_BODY_PR_RE = /Automated\s+backport\s+of\s+#(\d+)\s+to\s+`stable`/i; + +function readEnv() { + return { + GITHUB_GRAPHQL_URL: + process.env.GITHUB_GRAPHQL_URL || 'https://api.github.com/graphql', + GITHUB_TOKEN: process.env.GITHUB_TOKEN, + }; +} + +// Lightweight in-process cache so multiple changesets pointing at the +// same commit don't trigger duplicate GraphQL lookups. The values are +// promises so concurrent callers share a single in-flight request. +const commitLookupCache = new Map(); +const prLookupCache = new Map(); + +// Defensive error formatter — we don't want a non-Error throw (e.g. +// `throw null`, or a non-conforming Error from a transport polyfill) to +// turn a benign GitHub lookup failure into a hard crash inside the catch +// block itself. +function formatError(err) { + return err instanceof Error ? err.message : String(err); +} + +async function githubGraphql(query) { + const { GITHUB_GRAPHQL_URL, GITHUB_TOKEN } = readEnv(); + if (!GITHUB_TOKEN) { + // No token means the upstream `getInfo` call would have failed too — + // let it surface the error so behavior is consistent with the + // unwrapped generator. + return null; + } + const res = await fetch(GITHUB_GRAPHQL_URL, { + method: 'POST', + headers: { + Authorization: `Bearer ${GITHUB_TOKEN}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ query }), + }); + if (!res.ok) { + throw new Error( + `GitHub GraphQL request failed: ${res.status} ${res.statusText}`, + ); + } + const json = await res.json(); + if (json.errors) { + throw new Error( + `GitHub GraphQL request returned errors: ${JSON.stringify(json.errors)}`, + ); + } + return json.data; +} + +// Returns the associated PR (number, title, body) for a commit, or null +// if no PR is associated / the lookup failed. +function lookupCommitPR(repo, commit) { + const cacheKey = `${repo}@${commit}`; + if (!commitLookupCache.has(cacheKey)) { + commitLookupCache.set( + cacheKey, + (async () => { + const [owner, name] = repo.split('/'); + const query = ` + query { + repository(owner: ${JSON.stringify(owner)}, name: ${JSON.stringify(name)}) { + object(expression: ${JSON.stringify(commit)}) { + ... on Commit { + associatedPullRequests(first: 10) { + nodes { + number + title + body + mergedAt + } + } + } + } + } + } + `; + try { + const data = await githubGraphql(query); + const nodes = + data?.repository?.object?.associatedPullRequests?.nodes ?? []; + if (nodes.length === 0) return null; + // Only consider *merged* PRs — open PRs that happen to share + // a commit (e.g. a draft backport branched off the same SHA) + // shouldn't influence attribution. Then sort ascending by + // `mergedAt` (oldest first) to match `@changesets/get-github-info`'s + // selection logic: the oldest merged PR is the one that *first* + // introduced the commit to the repository. + const merged = nodes + .filter((n) => n.mergedAt !== null) + .sort((a, b) => + new Date(a.mergedAt) > new Date(b.mergedAt) ? 1 : -1, + ); + if (merged.length === 0) return null; + return merged[0]; + } catch (err) { + console.warn( + `[changelog] failed to look up associated PR for ${commit}: ${formatError(err)}`, + ); + return null; + } + })(), + ); + } + return commitLookupCache.get(cacheKey); +} + +// Returns { number, title, body } for a PR number, or null if the lookup +// failed. +function lookupPR(repo, prNumber) { + const cacheKey = `${repo}#${prNumber}`; + if (!prLookupCache.has(cacheKey)) { + prLookupCache.set( + cacheKey, + (async () => { + const [owner, name] = repo.split('/'); + const query = ` + query { + repository(owner: ${JSON.stringify(owner)}, name: ${JSON.stringify(name)}) { + pullRequest(number: ${prNumber}) { + number + title + body + } + } + } + `; + try { + const data = await githubGraphql(query); + return data?.repository?.pullRequest ?? null; + } catch (err) { + console.warn( + `[changelog] failed to look up PR #${prNumber}: ${formatError(err)}`, + ); + return null; + } + })(), + ); + } + return prLookupCache.get(cacheKey); +} + +// Returns the original PR number if `pr` is a backport PR, else null. +// Recurses one level in case of unusual chains (a backport of a backport), +// bounded so we can never loop on cycles in the PR graph. +async function resolveOriginalPR(repo, pr, depth = 0) { + if (!pr || depth > 3) return null; + const titleMatch = pr.title?.match(BACKPORT_TITLE_PR_RE); + const bodyMatch = pr.body?.match(BACKPORT_BODY_PR_RE); + const originalPRNumber = titleMatch + ? Number(titleMatch[1]) + : bodyMatch + ? Number(bodyMatch[1]) + : null; + if (!originalPRNumber || originalPRNumber === pr.number) return null; + const originalPR = await lookupPR(repo, originalPRNumber); + if (!originalPR) return originalPRNumber; // best-effort: still attribute + // If the "original" is itself a backport (unusual but defensible), + // peel one more layer. + const deeper = await resolveOriginalPR(repo, originalPR, depth + 1); + return deeper ?? originalPR.number; +} + +// Mutate a changeset summary by injecting `pr:`, `commit:` lines if and +// only if the commit's associated PR is a backport. Returns a new +// changeset object (does not modify the caller's input). +async function maybeRewriteChangesetForBackport(changeset, options) { + if (!changeset.commit) return changeset; + if (!options || !options.repo) return changeset; + + // Don't rewrite if the user already supplied explicit `pr:` / `commit:` + // / `author:` directives in the summary — they win. + const summary = changeset.summary ?? ''; + if ( + /^\s*(?:pr|pull|pull\s+request|commit|author|user):/im.test(summary) + ) { + return changeset; + } + + const pr = await lookupCommitPR(options.repo, changeset.commit); + if (!pr) return changeset; + + const originalPRNumber = await resolveOriginalPR(options.repo, pr); + if (!originalPRNumber) return changeset; + + // Inject directives at the top of the summary. The upstream parser + // strips them out before rendering the body, so they won't appear in + // the final changelog. + const rewrittenSummary = + `pr: #${originalPRNumber}\n` + + `commit: ${changeset.commit}\n` + + summary; + + return { ...changeset, summary: rewrittenSummary }; +} + +export async function getDependencyReleaseLine( + changesets, + dependenciesUpdated, + options, +) { + // For the dependency-updates roll-up line, the upstream generator + // only renders commit links (no "Thanks" attribution), so we don't + // need any rewriting here. + return upstream.getDependencyReleaseLine( + changesets, + dependenciesUpdated, + options, + ); +} + +export async function getReleaseLine(changeset, type, options) { + const rewritten = await maybeRewriteChangesetForBackport(changeset, options); + return upstream.getReleaseLine(rewritten, type, options); +} diff --git a/.changeset/config.json b/.changeset/config.json index a485b05cd9..f821bf7030 100644 --- a/.changeset/config.json +++ b/.changeset/config.json @@ -1,7 +1,7 @@ { "$schema": "https://unpkg.com/@changesets/config@3.1.1/schema.json", "changelog": [ - "@changesets/changelog-github", + "./changelog.mjs", { "repo": "vercel/workflow" } From c597ad999d6d05844cd2ba27f8f3608adeed29b3 Mon Sep 17 00:00:00 2001 From: Nathan Rajlich Date: Fri, 22 May 2026 12:53:10 -0700 Subject: [PATCH 2/3] fix(workbench): cache .vercel/output for Next workbench builds (#2095) Turborepo replays nextjs-turbopack:build from cache without restoring the Vercel diagnostics manifest (.vercel/output/diagnostics/workflows-manifest.json), which causes the Vercel deployment to fail post-build. Add .vercel/output/** to the workbench's Turbo outputs so it is persisted and replayed. Applies to both nextjs-turbopack and nextjs-webpack (whose turbo.json is a symlink). --- .changeset/turbo-next-workbench-vercel-output.md | 4 ++++ workbench/nextjs-turbopack/turbo.json | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 .changeset/turbo-next-workbench-vercel-output.md diff --git a/.changeset/turbo-next-workbench-vercel-output.md b/.changeset/turbo-next-workbench-vercel-output.md new file mode 100644 index 0000000000..9c9e519edf --- /dev/null +++ b/.changeset/turbo-next-workbench-vercel-output.md @@ -0,0 +1,4 @@ +--- +--- + +Include `.vercel/output/**` in the Next workbench Turbo outputs so cached builds restore the Vercel diagnostics manifest (`.vercel/output/diagnostics/workflows-manifest.json`), which the Vercel platform requires to finalize the deployment. diff --git a/workbench/nextjs-turbopack/turbo.json b/workbench/nextjs-turbopack/turbo.json index 8e5ade6937..cb5aca5ff1 100644 --- a/workbench/nextjs-turbopack/turbo.json +++ b/workbench/nextjs-turbopack/turbo.json @@ -8,7 +8,8 @@ "!.next/cache/**", "_workflows.ts", "app/.well-known/workflow/**", - "public/.well-known/workflow/**" + "public/.well-known/workflow/**", + ".vercel/output/**" ] } } From 657e8bb9629e7002c7658b98c32761e01e714474 Mon Sep 17 00:00:00 2001 From: Nathan Rajlich Date: Fri, 22 May 2026 13:49:41 -0700 Subject: [PATCH 3/3] Forward-port #1920: tighten world-local ID validation (#2097) * fix(world-local): tighten ID validation Forward-port of code review fixes from #1920 (backport of #1829): - Reject dots inside entity IDs in `assertSafeEntityId`. Internal IDs (ULIDs, step_N, etc.) never contain dots, but `stripTag()` / `getObjectCreatedAt()` strip a trailing `.[tag]` suffix from filenames, so a request-supplied runId like `wrun_123.foo` would be silently mangled during listing/pagination. - Reject empty `correlationId` on events that include one. The event schemas only require `z.string()`, so without this check a step_created / hook_created / wait_created request with `correlationId: ''` would silently be written under a malformed composite key like `${runId}-`. The streamer regression tests from #1920 are not forward-ported because main's streamer surface differs (renamed methods, separate test coverage) from stable's v4 shape. * simplify changeset --- .../world-local-tighten-id-validation.md | 5 ++ packages/world-local/src/fs.test.ts | 12 ++++- packages/world-local/src/fs.ts | 8 +++- packages/world-local/src/storage.test.ts | 48 +++++++++++++++++++ .../world-local/src/storage/events-storage.ts | 12 +++-- 5 files changed, 76 insertions(+), 9 deletions(-) create mode 100644 .changeset/world-local-tighten-id-validation.md diff --git a/.changeset/world-local-tighten-id-validation.md b/.changeset/world-local-tighten-id-validation.md new file mode 100644 index 0000000000..f83a553506 --- /dev/null +++ b/.changeset/world-local-tighten-id-validation.md @@ -0,0 +1,5 @@ +--- +'@workflow/world-local': patch +--- + +Reject dots and empty correlationId values in entity ID validation. diff --git a/packages/world-local/src/fs.test.ts b/packages/world-local/src/fs.test.ts index 1acd79ecc0..f02f0bcccc 100644 --- a/packages/world-local/src/fs.test.ts +++ b/packages/world-local/src/fs.test.ts @@ -861,11 +861,11 @@ describe('fs utilities', () => { 'vitest-0', // tag 'strm_01ARZ3_user', // stream id with underscores 'strm_01ARZ3_user_bmFtZXNwYWNl', // stream id with base64url namespace - 'wrun_ABC.vitest-0', // tagged file id 'a', // minimal valid value ]; - // Values that should be rejected: real-world path traversal attempts. + // Values that should be rejected: real-world path traversal attempts + // plus dotted inputs that would confuse stripTag()/getObjectCreatedAt(). const unsafeIds = [ '', '.', @@ -883,6 +883,14 @@ describe('fs utilities', () => { 'foo\0bar', // null byte 'a/../b', 'a\\..\\b', + // Dots in entity IDs would be misparsed by stripTag(), which strips + // a trailing `.[tag]` suffix from filenames. A runId like + // `wrun_123.foo` would be silently mangled to `wrun_123` during + // listing/pagination, breaking lookups for tagged file handling. + 'wrun_ABC.vitest-0', + 'wrun_123.foo', + 'foo.bar', + 'wrun_ABC.', ]; for (const id of safeIds) { diff --git a/packages/world-local/src/fs.ts b/packages/world-local/src/fs.ts index 5cccf744a0..acfcc17e78 100644 --- a/packages/world-local/src/fs.ts +++ b/packages/world-local/src/fs.ts @@ -28,7 +28,7 @@ function truncateForError(value: unknown): string { export class UnsafeEntityIdError extends WorkflowWorldError { constructor(kind: string, value: string) { super( - `Unsafe ${kind} "${truncateForError(value)}": must not be empty, start with ".", or contain path separators or null bytes` + `Unsafe ${kind} "${truncateForError(value)}": must not be empty, contain ".", "/", "\\", or null bytes` ); this.name = 'UnsafeEntityIdError'; } @@ -44,6 +44,9 @@ export class UnsafeEntityIdError extends WorkflowWorldError { * - empty * - starting with `.` (blocks `.`, `..`, `.locks`, `.tmp`, and other * hidden or reserved filenames) + * - containing `.` (would collide with the `.tag` / `.json` suffix the + * tagging logic strips from filenames; see {@link stripTag} / + * {@link getObjectCreatedAt}) * - containing `/`, `\`, or a NUL byte * * This is the primary defense against path-traversal attacks where a @@ -62,7 +65,8 @@ export function assertSafeEntityId(kind: string, value: string): void { value.startsWith('.') || value.includes('/') || value.includes('\\') || - value.includes('\0') + value.includes('\0') || + value.includes('.') ) { throw new UnsafeEntityIdError(kind, value); } diff --git a/packages/world-local/src/storage.test.ts b/packages/world-local/src/storage.test.ts index d4dffa4975..c885f16132 100644 --- a/packages/world-local/src/storage.test.ts +++ b/packages/world-local/src/storage.test.ts @@ -3165,6 +3165,54 @@ describe('Storage', () => { ).rejects.toThrow(/Unsafe correlationId/); }); + // Empty correlationId would be accepted by the event schema (which only + // requires `z.string()`) and produce composite keys like `${runId}-`, + // leaving malformed entities/events in storage. Reject it up front. + it('rejects empty correlationId on step_created', async () => { + const run = await createRun(storage, { + deploymentId: 'deployment-123', + workflowName: 'test-workflow', + input: new Uint8Array(), + }); + await expect( + storage.events.create(run.runId, { + eventType: 'step_created', + correlationId: '', + eventData: { stepName: 'step', input: new Uint8Array() }, + }) + ).rejects.toThrow(/Unsafe correlationId/); + }); + + it('rejects empty correlationId on hook_created', async () => { + const run = await createRun(storage, { + deploymentId: 'deployment-123', + workflowName: 'test-workflow', + input: new Uint8Array(), + }); + await expect( + storage.events.create(run.runId, { + eventType: 'hook_created', + correlationId: '', + eventData: { token: 'tok' }, + }) + ).rejects.toThrow(/Unsafe correlationId/); + }); + + it('rejects empty correlationId on wait_created', async () => { + const run = await createRun(storage, { + deploymentId: 'deployment-123', + workflowName: 'test-workflow', + input: new Uint8Array(), + }); + await expect( + storage.events.create(run.runId, { + eventType: 'wait_created', + correlationId: '', + eventData: { resumeAt: new Date(Date.now() + 1000) }, + }) + ).rejects.toThrow(/Unsafe correlationId/); + }); + it('rejects traversal hookId on hooks.get', async () => { await expect(storage.hooks.get('../escape')).rejects.toThrow( /Unsafe hookId/ diff --git a/packages/world-local/src/storage/events-storage.ts b/packages/world-local/src/storage/events-storage.ts index 6bdd9259b5..91ebc82942 100644 --- a/packages/world-local/src/storage/events-storage.ts +++ b/packages/world-local/src/storage/events-storage.ts @@ -138,14 +138,16 @@ export function createEventsStorage( // attacks where a client supplies runId / correlationId values like // "../../../package" to read or write files outside the storage root. // Run before taking the per-step mutex so malformed inputs fail fast. + // + // Empty `correlationId` values are also rejected here: the event + // schemas only require `z.string()`, so without this check a + // step_created / hook_created / wait_created request with + // `correlationId: ''` would silently be written under a malformed + // composite key like `${runId}-`. if (runId != null && runId !== '') { assertSafeEntityId('runId', runId); } - if ( - 'correlationId' in data && - typeof data.correlationId === 'string' && - data.correlationId.length > 0 - ) { + if ('correlationId' in data && typeof data.correlationId === 'string') { assertSafeEntityId('correlationId', data.correlationId); }