diff --git a/README.md b/README.md index 15778fd1..1346da2e 100644 --- a/README.md +++ b/README.md @@ -198,6 +198,8 @@ The script will: A clean result confirms that the compiled JS was built from the declared source. Any differences will be flagged for manual inspection. +Non-minified compiled JS (e.g. Deno `deno task bundle` output, Dart `dart compile js` readable output) is handled differently: a clean rebuild for these tends to produce toolchain-version noise (esbuild/ncc/webpack boilerplate differences) rather than actionable diffs. The script keeps these files in place during the pre-rebuild deletion step and instead diffs them against the previously approved version of the action, so reviewers see real source changes rather than rebuild artifacts. The detection threshold mirrors the comparison heuristic — fewer than 10 lines or an average line length above 500 chars is treated as minified. + #### Security Review Checklist When reviewing an action (new or updated), watch for these potential issues in the source diff between the approved and new versions: diff --git a/utils/tests/verify_action_build/test_diff_js.py b/utils/tests/verify_action_build/test_diff_js.py index a73b1048..813eb2e5 100644 --- a/utils/tests/verify_action_build/test_diff_js.py +++ b/utils/tests/verify_action_build/test_diff_js.py @@ -16,7 +16,9 @@ # specific language governing permissions and limitations # under the License. # -from verify_action_build.diff_js import _collect_compiled_js, beautify_js +from pathlib import Path + +from verify_action_build.diff_js import _collect_compiled_js, beautify_js, diff_js_files class TestBeautifyJs: @@ -76,3 +78,48 @@ def test_nested_dirs(self, tmp_path): from pathlib import Path assert _collect_compiled_js(tmp_path) == {Path("dist/sub/main/index.cjs")} + + +class TestDiffJsKeptFiles: + """Files passed in via kept_files are non-minified bundles that the + Dockerfile intentionally did not delete. The original-vs-rebuilt + comparison for them is replaced by a pointer to the approved-version + diff section, so it must not fail the JS check even when content + differs and must not consult the on-disk files.""" + + def _setup(self, tmp_path: Path) -> tuple[Path, Path]: + original = tmp_path / "original-dist" + rebuilt = tmp_path / "rebuilt-dist" + original.mkdir() + rebuilt.mkdir() + # Original committed to the repo. + (original / "post.js").write_text("// original committed bundle\n" + "x\n" * 50) + # Rebuild produced different content (toolchain noise) — should + # be ignored when the file is in kept_files. + (rebuilt / "post.js").write_text("// rebuilt with newer toolchain\n" + "y\n" * 50) + return original, rebuilt + + def test_kept_file_with_differing_rebuild_does_not_fail(self, tmp_path): + original, rebuilt = self._setup(tmp_path) + + result = diff_js_files( + original, rebuilt, "Org", "Repo", "deadbeef" * 5, + out_dir_name="dist", + kept_files={Path("post.js")}, + ) + + assert result is True + + def test_default_kept_files_none_treats_diff_as_normal(self, tmp_path): + original, rebuilt = self._setup(tmp_path) + + # Without kept_files, the same setup hits the non-minified branch + # which prints a soft warning but still returns True (it isn't a + # hard failure either way). This guards against the kept-files + # plumbing accidentally swallowing differences for *every* file. + result = diff_js_files( + original, rebuilt, "Org", "Repo", "deadbeef" * 5, + out_dir_name="dist", + ) + + assert result is True diff --git a/utils/tests/verify_action_build/test_docker_build.py b/utils/tests/verify_action_build/test_docker_build.py index bea07c3f..46554829 100644 --- a/utils/tests/verify_action_build/test_docker_build.py +++ b/utils/tests/verify_action_build/test_docker_build.py @@ -116,6 +116,19 @@ def test_deno_support_present(self): # Build step invokes the conventional bundle task. assert "deno task bundle" in content + def test_keeps_non_minified_compiled_js(self): + """Pre-rebuild deletion must skip non-minified JS and record kept + paths in /kept-js.log so they're diffed against the previously + approved version instead of the noisy rebuild.""" + content = _read_dockerfile_template() + assert "/kept-js.log" in content + # Minified-detection heuristic must be present and mirror the + # Python is_minified() in diff_js.py: <10 lines OR avg line >500. + assert "wc -l" in content + assert "wc -c" in content + assert '"$lines" -lt 10' in content + assert '"$((chars / lines))" -gt 500' in content + class TestPrintDockerBuildSteps: def test_parses_build_output(self): diff --git a/utils/tests/verify_action_build/test_verification.py b/utils/tests/verify_action_build/test_verification.py index 6e011ef2..0bc53d9b 100644 --- a/utils/tests/verify_action_build/test_verification.py +++ b/utils/tests/verify_action_build/test_verification.py @@ -102,6 +102,7 @@ def _build_in_docker_result(action_type: str = "node20") -> tuple: False, Path("/tmp/fake/original-node-modules"), Path("/tmp/fake/rebuilt-node-modules"), + [], ) diff --git a/utils/verify_action_build/diff_js.py b/utils/verify_action_build/diff_js.py index bb5a8d18..666b605b 100644 --- a/utils/verify_action_build/diff_js.py +++ b/utils/verify_action_build/diff_js.py @@ -55,9 +55,17 @@ def beautify_js(content: str) -> str: def diff_js_files( original_dir: Path, rebuilt_dir: Path, org: str, repo: str, commit_hash: str, out_dir_name: str = "dist", + kept_files: set[Path] | None = None, ) -> bool: - """Diff JS files between original and rebuilt, return True if identical.""" + """Diff JS files between original and rebuilt, return True if identical. + + Files in *kept_files* (paths relative to *out_dir_name*) are skipped here: + they're non-minified bundles where a clean rebuild produces toolchain- + version noise rather than actionable diffs, and are verified by the + approved-vs-new source diff section instead. + """ blob_url = f"https://github.com/{org}/{repo}/blob/{commit_hash}" + kept_files = kept_files or set() # Files vendored by @vercel/ncc that are not built from the action's source. ignored_files = {"sourcemap-register.js"} @@ -120,6 +128,13 @@ def is_minified(content: str) -> bool: file_link = link(f"{blob_url}/{out_dir_name}/{rel_path}", str(rel_path)) + if rel_path in kept_files: + console.print( + f" [yellow]~[/yellow] {file_link} [yellow](non-minified — " + f"kept; diffed against previously-approved version below)[/yellow]" + ) + continue + if rel_path not in original_files: console.print(f" [green]+[/green] {file_link} [dim](only in rebuilt)[/dim]") with console.status(f"[dim]Beautifying {rel_path}...[/dim]"): diff --git a/utils/verify_action_build/diff_source.py b/utils/verify_action_build/diff_source.py index bd3cd588..bd1c1952 100644 --- a/utils/verify_action_build/diff_source.py +++ b/utils/verify_action_build/diff_source.py @@ -25,13 +25,23 @@ from .console import console, run, ask_confirm, UserQuit from .diff_display import show_colored_diff +from .diff_js import beautify_js def diff_approved_vs_new( org: str, repo: str, approved_hash: str, new_hash: str, work_dir: Path, ci_mode: bool = False, + include_dist_files: set[Path] | None = None, ) -> None: - """Diff source files between an approved version and the new version.""" + """Diff source files between an approved version and the new version. + + Files in *include_dist_files* (repo-root-relative paths under dist/, e.g. + ``dist/post.js``) bypass the dist/ exclusion filter and are diffed against + the approved version. Used for non-minified compiled JS that the rebuild + step intentionally skips. JS/CJS/MJS files in this set are beautified + before diffing so the output is readable. + """ + include_dist_files = include_dist_files or set() console.print() console.rule("[bold]Diff: Approved vs New (source changes)[/bold]") @@ -72,7 +82,7 @@ def diff_approved_vs_new( continue rel = f.relative_to(clone_dir) matched = [part for part in rel.parts if part in excluded_dirs] - if matched: + if matched and rel not in include_dist_files: skipped_dirs.update(matched) continue if rel.suffix in source_extensions: @@ -128,6 +138,14 @@ def diff_approved_vs_new( skipped_by_user: list[tuple[Path, str]] = [] quit_all = False + js_extensions = {".js", ".cjs", ".mjs"} + + def _read(path: Path, rel: Path) -> str: + text = path.read_text(errors="replace") + if rel in include_dist_files and rel.suffix in js_extensions: + return beautify_js(text) + return text + for rel_path in all_files: if rel_path.name in lock_files: continue @@ -141,7 +159,7 @@ def diff_approved_vs_new( if rel_path not in approved_files: console.print(f" [cyan]+[/cyan] {rel_path} [dim](new file)[/dim]") - new_content = new_file.read_text(errors="replace") + new_content = _read(new_file, rel_path) result = show_colored_diff(rel_path, "", new_content, from_label="approved", to_label="new", border="cyan", ci_mode=ci_mode) if result == "skip_file": skipped_by_user.append((rel_path, "new file")) @@ -151,7 +169,7 @@ def diff_approved_vs_new( if rel_path not in new_files: console.print(f" [cyan]-[/cyan] {rel_path} [dim](removed)[/dim]") - approved_content = approved_file.read_text(errors="replace") + approved_content = _read(approved_file, rel_path) result = show_colored_diff(rel_path, approved_content, "", from_label="approved", to_label="new", border="cyan", ci_mode=ci_mode) if result == "skip_file": skipped_by_user.append((rel_path, "removed")) @@ -159,8 +177,8 @@ def diff_approved_vs_new( quit_all = True continue - approved_content = approved_file.read_text(errors="replace") - new_content = new_file.read_text(errors="replace") + approved_content = _read(approved_file, rel_path) + new_content = _read(new_file, rel_path) if approved_content == new_content: console.print(f" [green]✓[/green] {rel_path} [green](identical)[/green]") diff --git a/utils/verify_action_build/docker_build.py b/utils/verify_action_build/docker_build.py index ec6f625d..bd4eb8c5 100644 --- a/utils/verify_action_build/docker_build.py +++ b/utils/verify_action_build/docker_build.py @@ -105,7 +105,7 @@ def build_in_docker( show_build_steps: bool = False, approved_hash: str = "", source_commit_hash: str = "", -) -> tuple[Path, Path, str, str, bool, Path, Path]: +) -> tuple[Path, Path, str, str, bool, Path, Path, list[str]]: """Build the action in a Docker container and extract original + rebuilt dist. When *approved_hash* is supplied the Docker build restores package lock files @@ -118,7 +118,12 @@ def build_in_docker( tagged commit is an orphan tree without buildable source. Returns (original_dir, rebuilt_dir, action_type, out_dir_name, - has_node_modules, original_node_modules, rebuilt_node_modules). + has_node_modules, original_node_modules, rebuilt_node_modules, + kept_js_files). + *kept_js_files* contains repo-root-relative paths (e.g. ``dist/post.js``) + of non-minified compiled JS files that were preserved during the + pre-rebuild deletion step — these are diffed against the previously + approved version instead of the rebuild. """ repo_url = f"https://github.com/{org}/{repo}.git" container_name = f"verify-action-{org}-{repo}-{commit_hash[:12]}" @@ -246,8 +251,27 @@ def build_in_docker( console.print(f" [yellow]![/yellow] No {out_dir_name}/ directory found before rebuild") else: deleted_files = [l for l in log_content.splitlines() if l.strip()] - console.print(f" [green]✓[/green] Deleted {len(deleted_files)} compiled JS file(s) before rebuild:") - for f in deleted_files: + if deleted_files: + console.print(f" [green]✓[/green] Deleted {len(deleted_files)} compiled JS file(s) before rebuild:") + for f in deleted_files: + console.print(f" [dim]- {f}[/dim]") + + kept_js_files: list[str] = [] + kept_log = subprocess.run( + ["docker", "cp", f"{container_name}:/kept-js.log", str(work_dir / "kept-js.log")], + capture_output=True, + ) + if kept_log.returncode == 0: + kept_js_files = [ + l for l in (work_dir / "kept-js.log").read_text().strip().splitlines() + if l.strip() + ] + if kept_js_files: + console.print( + f" [green]✓[/green] Kept {len(kept_js_files)} non-minified JS file(s) " + f"(diffed against previously-approved version, not rebuild):" + ) + for f in kept_js_files: console.print(f" [dim]- {f}[/dim]") action_type_result = subprocess.run( @@ -299,4 +323,5 @@ def build_in_docker( console.print(" [green]✓[/green] Cleanup complete") return (original_dir, rebuilt_dir, action_type, out_dir_name, - has_node_modules, original_node_modules, rebuilt_node_modules) + has_node_modules, original_node_modules, rebuilt_node_modules, + kept_js_files) diff --git a/utils/verify_action_build/dockerfiles/build_action.Dockerfile b/utils/verify_action_build/dockerfiles/build_action.Dockerfile index 34e94e34..fefd77fd 100644 --- a/utils/verify_action_build/dockerfiles/build_action.Dockerfile +++ b/utils/verify_action_build/dockerfiles/build_action.Dockerfile @@ -139,9 +139,30 @@ RUN if [ -d "node_modules" ]; then \ # Delete compiled JS from output dir before rebuild to ensure a clean build. # Covers .js, .cjs and .mjs — actions bundled with esbuild/rollup may emit # dist/index.cjs (e.g. JustinBeckwith/linkinator-action) or dist/index.mjs. +# +# Non-minified bundles (e.g. Deno's deno task bundle output, dart compile +# js's readable output) are kept in place: a clean rebuild for them tends +# to produce toolchain-version noise (esbuild/ncc/webpack boilerplate +# differences) that isn't actionable for review. They're verified by +# diffing the committed file against the previously-approved version +# instead — see diff_source.py / verification.py. +# +# Mirrors the Python is_minified() heuristic in diff_js.py: <10 lines OR +# average line length >500 chars. RUN OUT_DIR=$(cat /out-dir.txt); \ if [ -d "$OUT_DIR" ]; then \ - find "$OUT_DIR" \( -name '*.js' -o -name '*.cjs' -o -name '*.mjs' \) -print -delete > /deleted-js.log 2>&1; \ + : > /deleted-js.log; \ + : > /kept-js.log; \ + find "$OUT_DIR" \( -name '*.js' -o -name '*.cjs' -o -name '*.mjs' \) -type f | while IFS= read -r f; do \ + lines=$(wc -l < "$f"); \ + chars=$(wc -c < "$f"); \ + if [ "$lines" -lt 10 ] || { [ "$lines" -gt 0 ] && [ "$((chars / lines))" -gt 500 ]; }; then \ + echo "$f" >> /deleted-js.log; \ + rm -f "$f"; \ + else \ + echo "$f" >> /kept-js.log; \ + fi; \ + done; \ else \ echo "no $OUT_DIR/ directory" > /deleted-js.log; \ fi diff --git a/utils/verify_action_build/verification.py b/utils/verify_action_build/verification.py index 563bd74a..763c92db 100644 --- a/utils/verify_action_build/verification.py +++ b/utils/verify_action_build/verification.py @@ -227,12 +227,24 @@ def verify_single_action( with tempfile.TemporaryDirectory(prefix="verify-action-") as tmp: work_dir = Path(tmp) (original_dir, rebuilt_dir, action_type, out_dir_name, - has_node_modules, original_node_modules, rebuilt_node_modules) = build_in_docker( + has_node_modules, original_node_modules, rebuilt_node_modules, + kept_js_files) = build_in_docker( org, repo, commit_hash, work_dir, sub_path=sub_path, gh=gh, cache=cache, show_build_steps=show_build_steps, source_commit_hash=source_commit_hash, ) + # Paths from /kept-js.log are repo-root-relative (e.g. "dist/post.js"). + # diff_js indexes by paths relative to out_dir_name; diff_source indexes + # by paths relative to repo root. Keep both forms. + kept_repo_paths: set[Path] = {Path(p) for p in kept_js_files} + kept_out_dir_paths: set[Path] = set() + for p in kept_repo_paths: + try: + kept_out_dir_paths.add(p.relative_to(out_dir_name)) + except ValueError: + kept_out_dir_paths.add(p) + checks_performed.append(("Action type detection", "info", action_type)) if source_detached_detail: checks_performed.append(( @@ -372,6 +384,7 @@ def verify_single_action( else: all_match = diff_js_files( original_dir, rebuilt_dir, org, repo, commit_hash, out_dir_name, + kept_files=kept_out_dir_paths, ) # If no compiled JS was found in dist/ but node_modules is vendored, @@ -409,7 +422,7 @@ def verify_single_action( retry_dir = work_dir / "retry" retry_dir.mkdir(exist_ok=True) (retry_orig, retry_rebuilt, _, _, retry_has_nm, - retry_orig_nm, retry_rebuilt_nm) = build_in_docker( + retry_orig_nm, retry_rebuilt_nm, _) = build_in_docker( org, repo, commit_hash, retry_dir, sub_path=sub_path, gh=gh, cache=cache, show_build_steps=show_build_steps, approved_hash=prev_hash, @@ -417,6 +430,7 @@ def verify_single_action( retry_match = diff_js_files( retry_orig, retry_rebuilt, org, repo, commit_hash, out_dir_name, + kept_files=kept_out_dir_paths, ) if retry_has_nm: retry_nm = diff_node_modules( @@ -462,7 +476,10 @@ def verify_single_action( selected_hash = show_approved_versions(org, repo, commit_hash, approved, gh=gh, ci_mode=ci_mode) if selected_hash: show_commits_between(org, repo, selected_hash, commit_hash, gh=gh) - diff_approved_vs_new(org, repo, selected_hash, commit_hash, work_dir, ci_mode=ci_mode) + diff_approved_vs_new( + org, repo, selected_hash, commit_hash, work_dir, + ci_mode=ci_mode, include_dist_files=kept_repo_paths, + ) checks_performed.append(("Source diff vs approved", "info", f"compared against {selected_hash[:12]}")) else: checks_performed.append(("Approved versions", "info", "new action (none on file)"))