diff --git a/.github/workflows/review-pr.yml b/.github/workflows/review-pr.yml index 1fb8b6f..21f100b 100644 --- a/.github/workflows/review-pr.yml +++ b/.github/workflows/review-pr.yml @@ -28,6 +28,11 @@ on: required: false type: string default: "" + exclude-paths: + description: 'Newline-separated list of path prefixes to strip from the diff before chunking. Files matching these prefixes are excluded from review entirely.' + required: false + type: string + default: '' trigger-run-id: description: "Workflow run ID from pr-review-trigger.yml — used to download the event context artifact. When set, resolve-context job downloads the artifact and routes to the appropriate job." required: false @@ -353,6 +358,7 @@ jobs: comment-id: ${{ inputs.comment-id || github.event.comment.id }} additional-prompt: ${{ inputs.additional-prompt }} add-prompt-files: ${{ inputs.add-prompt-files }} + exclude-paths: ${{ inputs.exclude-paths }} model: ${{ inputs.model }} github-token: ${{ env.GITHUB_APP_TOKEN || github.token }} anthropic-api-key: ${{ env.ANTHROPIC_API_KEY_FROM_SSM || secrets.ANTHROPIC_API_KEY }} diff --git a/AGENTS.md b/AGENTS.md index a9f1786..20b11f6 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -39,6 +39,14 @@ Anything else here (workflows under `.github/workflows/`, scripts, tests) exists │ │ ├── aws-credentials.ts │ │ ├── github-app.ts # Reads docker-agent-action/github-app from Secrets Manager; exports GITHUB_APP_TOKEN (a PAT) + ORG_MEMBERSHIP_TOKEN. │ │ └── __tests__/ +│ ├── filter-diff/ # Strips excluded-path sections from a unified diff. +│ │ ├── index.ts # CLI entry → bundled to dist/filter-diff.js +│ │ ├── filter-diff.ts # Core filterDiff() pure function + applyFilter() I/O wrapper. +│ │ └── __tests__/ +│ ├── score-risk/ # Per-file risk scoring for the PR review pipeline. +│ │ ├── index.ts # CLI entry → bundled to dist/score-risk.js +│ │ ├── score-risk.ts # Core scoreFiles() pure function. +│ │ └── __tests__/ │ ├── get-pr-meta/ # Fetches PR metadata (title, body, author, base branch) used by review-pr. │ │ ├── index.ts # Entry → bundled to dist/get-pr-meta.js │ │ └── __tests__/ @@ -109,6 +117,7 @@ Anything else here (workflows under `.github/workflows/`, scripts, tests) exists ### TypeScript / `src` rules - Only `src//index.ts` files listed in the explicit `entry` map in `tsup.config.ts` are bundled to `dist/.js`. To add a new action entrypoint, create `src//index.ts` **and** add it to the `entry` map in `tsup.config.ts`. Pure library modules that are only imported by other actions (e.g. `add-reaction`, `check-org-membership`, `get-pr-meta`, `post-comment`) should **not** be added to the entry map — they get bundled into their consumer automatically. +- **New logic in composite actions must be implemented as TypeScript in `src/` with Vitest unit tests — not as inline bash, awk, or other scripting languages embedded in YAML files.** Shell steps in action YAML files should only orchestrate calls to `dist/*.js` tools (e.g. `node "$ACTION_PATH/dist/filter-diff.js" pr.diff "$EXCLUDE_PATHS"`). This keeps business logic testable, type-safe, and auditable outside the YAML layer. - `tsup` runs with `noExternal: [/.*/]` — **all npm dependencies are bundled in**. Do not assume `node_modules` exists at runtime. - Target is `node24`, ESM only, Node platform (so AWS SDK uses the Node export, not browser). - Sourcemaps are intentionally disabled (consumers clone `dist/`; sourcemaps would bloat every checkout). diff --git a/review-pr/action.yml b/review-pr/action.yml index ca04d0e..da68c1d 100644 --- a/review-pr/action.yml +++ b/review-pr/action.yml @@ -59,6 +59,10 @@ inputs: description: "Skip the built-in authorization check (caller already verified auth)" required: false default: "false" + exclude-paths: + description: 'Newline-separated list of path prefixes to strip from the diff before chunking. Files matching these prefixes are excluded from review entirely.' + required: false + default: '' outputs: exit-code: @@ -233,6 +237,14 @@ runs: fi fi + - name: Filter excluded paths from diff + if: hashFiles('pr.diff') != '' && inputs.exclude-paths != '' + shell: bash + env: + EXCLUDE_PATHS: ${{ inputs.exclude-paths }} + run: | + node "$ACTION_PATH/dist/filter-diff.js" pr.diff "$EXCLUDE_PATHS" + - name: Split diff into chunks if: hashFiles('pr.diff') != '' id: chunk-diff @@ -307,63 +319,11 @@ runs: - name: Score file risk if: hashFiles('pr.diff') != '' shell: bash + env: + EXCLUDE_PATHS: ${{ inputs.exclude-paths }} run: | - # Extract per-file diff stats (added lines, hunk count) - awk ' - /^diff --git/ { - if (file != "") print file, added, hunks - file = $0; sub(/.*b\//, "", file) - added = 0; hunks = 0 - } - /^@@/ { hunks++ } - /^\+[^+]/ { added++ } - END { if (file != "") print file, added, hunks } - ' pr.diff > /tmp/file_diff_stats.txt - - # Build final scores - jq -n '{}' > /tmp/file_risk_scores.json - while read -r file added hunks; do - SCORE=0 - - # Security-sensitive paths: +2 - if echo "$file" | grep -qiE 'auth|security|crypto|session|secret|token|password|credential'; then - SCORE=$((SCORE + 2)) - fi - - # Large change (>100 added lines): +2 - if [ "$added" -gt 100 ]; then - SCORE=$((SCORE + 2)) - fi - - # Many hunks (>3): +1 - if [ "$hunks" -gt 3 ]; then - SCORE=$((SCORE + 1)) - fi - - # Test/doc/config files: score = 0 - if echo "$file" | grep -qiE '_test\.go$|\.test\.[tj]sx?$|\.spec\.[tj]sx?$|test_.*\.py$|\.md$|\.ya?ml$|\.json$|\.toml$'; then - SCORE=0 - fi - - # Error handling patterns in diff for this file: +1 - ERROR_LINES=$(awk -v f="$file" ' - /^diff --git/ { cur=$0; sub(/.*b\//, "", cur) } - cur == f && /^\+[^+]/ && /catch|rescue|except|recover|error|panic/ { count++ } - END { print count+0 } - ' pr.diff) - if [ "$ERROR_LINES" -gt 0 ]; then - SCORE=$((SCORE + 1)) - fi - - if jq --arg f "$file" --argjson s "$SCORE" '.[$f] = $s' /tmp/file_risk_scores.json > /tmp/file_risk_scores.tmp; then - mv /tmp/file_risk_scores.tmp /tmp/file_risk_scores.json - else - echo "⚠️ Failed to score $file, skipping" - rm -f /tmp/file_risk_scores.tmp - fi - done < /tmp/file_diff_stats.txt - - rm -f /tmp/file_diff_stats.txt + set -euo pipefail + node "$ACTION_PATH/dist/score-risk.js" pr.diff "$EXCLUDE_PATHS" echo "✅ File risk scores: $(jq -c . /tmp/file_risk_scores.json)" - name: Generate file history diff --git a/src/filter-diff/__tests__/filter-diff.test.ts b/src/filter-diff/__tests__/filter-diff.test.ts new file mode 100644 index 0000000..39937c6 --- /dev/null +++ b/src/filter-diff/__tests__/filter-diff.test.ts @@ -0,0 +1,289 @@ +/** + * Unit tests for src/filter-diff. + * + * The test suite is split into two layers: + * + * 1. filterDiff() — pure function tests covering every diff section type. + * These run entirely in-memory and have no filesystem dependencies. + * + * 2. applyFilter() — I/O integration tests that write real temp files and + * verify the in-place rewrite / deletion behaviour. + */ +import { existsSync, readFileSync } from 'node:fs'; +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { applyFilter, filterDiff, parseExcludePrefixes } from '../filter-diff.js'; + +// ── Fixtures ───────────────────────────────────────────────────────────────── + +/** Standard modification diff for a file in the excluded prefix. */ +const MOD_EXCLUDED = `${[ + 'diff --git a/backend/gen/foo.pb.go b/backend/gen/foo.pb.go', + 'index abc..def 100644', + '--- a/backend/gen/foo.pb.go', + '+++ b/backend/gen/foo.pb.go', + '@@ -1,2 +1,3 @@', + ' existing', + '+generated', +].join('\n')}\n`; + +/** Deletion diff: `+++ /dev/null` — the path is only available via `--- a/`. */ +const DEL_EXCLUDED = `${[ + 'diff --git a/backend/gen/old.pb.go b/backend/gen/old.pb.go', + 'deleted file mode 100644', + 'index abc..0000000', + '--- a/backend/gen/old.pb.go', + '+++ /dev/null', + '@@ -1,2 +0,0 @@', + '-line1', + '-line2', +].join('\n')}\n`; + +/** Pure rename at 100% similarity: no `---` or `+++` lines at all. */ +const RENAME_EXCLUDED = `${[ + 'diff --git a/backend/gen/old.pb.go b/backend/gen/new.pb.go', + 'similarity index 100%', + 'rename from backend/gen/old.pb.go', + 'rename to backend/gen/new.pb.go', +].join('\n')}\n`; + +/** Modification diff for a file NOT in the excluded prefix. */ +const MOD_KEPT = `${[ + 'diff --git a/src/real.go b/src/real.go', + 'index abc..def 100644', + '--- a/src/real.go', + '+++ b/src/real.go', + '@@ -1,1 +1,2 @@', + ' existing', + '+new line', +].join('\n')}\n`; + +/** New-file diff (--- /dev/null): path only via `+++ b/`. */ +const NEW_FILE_EXCLUDED = `${[ + 'diff --git a/backend/gen/brand_new.pb.go b/backend/gen/brand_new.pb.go', + 'new file mode 100644', + 'index 0000000..abc', + '--- /dev/null', + '+++ b/backend/gen/brand_new.pb.go', + '@@ -0,0 +1,3 @@', + '+// Code generated — do not edit.', + '+package gen', + '+', +].join('\n')}\n`; + +const PREFIXES = ['backend/gen/']; + +// ── Helpers ────────────────────────────────────────────────────────────────── + +let tmpDir: string; + +beforeEach(async () => { + tmpDir = await mkdtemp(join(tmpdir(), 'filter-diff-test-')); +}); + +afterEach(async () => { + await rm(tmpDir, { recursive: true, force: true }); +}); + +async function writeDiff(name: string, content: string): Promise { + const p = join(tmpDir, name); + await writeFile(p, content, 'utf-8'); + return p; +} + +// ═════════════════════════════════════════════════════════════════════════════ +// parseExcludePrefixes +// ═════════════════════════════════════════════════════════════════════════════ + +describe('parseExcludePrefixes', () => { + it('splits on newlines and trims whitespace', () => { + expect(parseExcludePrefixes(' backend/gen/\n frontend/src/gen/ \n')).toEqual([ + 'backend/gen/', + 'frontend/src/gen/', + ]); + }); + + it('strips carriage-return characters (Windows line-endings)', () => { + expect(parseExcludePrefixes('backend/gen/\r\nfrontend/src/gen/\r\n')).toEqual([ + 'backend/gen/', + 'frontend/src/gen/', + ]); + }); + + it('removes blank lines', () => { + expect(parseExcludePrefixes('\n\nbackend/gen/\n\n')).toEqual(['backend/gen/']); + }); + + it('returns empty array for empty string', () => { + expect(parseExcludePrefixes('')).toEqual([]); + }); +}); + +// ═════════════════════════════════════════════════════════════════════════════ +// filterDiff — pure function +// ═════════════════════════════════════════════════════════════════════════════ + +describe('filterDiff — modified file in excluded prefix', () => { + it('strips the section from the output', () => { + const result = filterDiff(MOD_EXCLUDED, PREFIXES); + expect(result.excludedFiles).toEqual(['backend/gen/foo.pb.go']); + expect(result.remainingCount).toBe(0); + expect(result.filtered).toBe(''); + }); + + it('does not double-log the path (--- and +++ share the same path)', () => { + const result = filterDiff(MOD_EXCLUDED, PREFIXES); + expect(result.excludedFiles).toHaveLength(1); + }); +}); + +describe('filterDiff — deleted file in excluded prefix', () => { + it('strips the section even though +++ is /dev/null', () => { + const result = filterDiff(DEL_EXCLUDED, PREFIXES); + expect(result.excludedFiles).toEqual(['backend/gen/old.pb.go']); + expect(result.remainingCount).toBe(0); + }); + + it('logs the path from the --- a/ line', () => { + const result = filterDiff(DEL_EXCLUDED, PREFIXES); + expect(result.excludedFiles[0]).toBe('backend/gen/old.pb.go'); + }); +}); + +describe('filterDiff — pure rename in excluded prefix', () => { + it('strips the section (no --- or +++ lines present)', () => { + const result = filterDiff(RENAME_EXCLUDED, PREFIXES); + expect(result.excludedFiles).toEqual(['backend/gen/new.pb.go']); + expect(result.remainingCount).toBe(0); + }); +}); + +describe('filterDiff — new file in excluded prefix', () => { + it('strips the section detected via +++ b/ (--- is /dev/null)', () => { + const result = filterDiff(NEW_FILE_EXCLUDED, PREFIXES); + expect(result.excludedFiles).toEqual(['backend/gen/brand_new.pb.go']); + expect(result.remainingCount).toBe(0); + }); +}); + +describe('filterDiff — file NOT in excluded prefix', () => { + it('preserves the section unchanged', () => { + const result = filterDiff(MOD_KEPT, PREFIXES); + expect(result.excludedFiles).toHaveLength(0); + expect(result.remainingCount).toBe(1); + expect(result.filtered).toBe(MOD_KEPT); + }); +}); + +describe('filterDiff — mixed diff (excluded + kept)', () => { + const mixed = MOD_EXCLUDED + MOD_KEPT; + + it('strips the excluded section and keeps the other', () => { + const result = filterDiff(mixed, PREFIXES); + expect(result.excludedFiles).toEqual(['backend/gen/foo.pb.go']); + expect(result.remainingCount).toBe(1); + }); + + it('the kept section content is present in the output', () => { + const result = filterDiff(mixed, PREFIXES); + expect(result.filtered).toContain('diff --git a/src/real.go'); + expect(result.filtered).not.toContain('diff --git a/backend/gen/foo.pb.go'); + }); + + it('all four section types in one diff', () => { + const all = MOD_EXCLUDED + DEL_EXCLUDED + RENAME_EXCLUDED + NEW_FILE_EXCLUDED + MOD_KEPT; + const result = filterDiff(all, PREFIXES); + expect(result.excludedFiles).toHaveLength(4); + expect(result.remainingCount).toBe(1); + expect(result.filtered).toContain('diff --git a/src/real.go'); + expect(result.filtered).not.toContain('backend/gen/'); + }); +}); + +describe('filterDiff — all sections excluded', () => { + it('returns empty filtered string and remainingCount 0', () => { + const all = MOD_EXCLUDED + DEL_EXCLUDED + RENAME_EXCLUDED; + const result = filterDiff(all, PREFIXES); + expect(result.filtered).toBe(''); + expect(result.remainingCount).toBe(0); + expect(result.excludedFiles).toHaveLength(3); + }); +}); + +describe('filterDiff — empty exclude-paths', () => { + it('returns the diff unchanged when no prefixes are given', () => { + const result = filterDiff(MOD_EXCLUDED, []); + expect(result.filtered).toBe(MOD_EXCLUDED); + expect(result.excludedFiles).toHaveLength(0); + expect(result.remainingCount).toBe(1); + }); +}); + +describe('filterDiff — empty diff content', () => { + it('returns empty result without errors', () => { + const result = filterDiff('', PREFIXES); + expect(result.filtered).toBe(''); + expect(result.excludedFiles).toHaveLength(0); + expect(result.remainingCount).toBe(0); + }); +}); + +describe('filterDiff — multiple exclude prefixes', () => { + it('excludes sections matching any of the configured prefixes', () => { + const frontendExcluded = `${[ + 'diff --git a/frontend/src/gen/api.ts b/frontend/src/gen/api.ts', + '--- a/frontend/src/gen/api.ts', + '+++ b/frontend/src/gen/api.ts', + '+// generated', + ].join('\n')}\n`; + + const result = filterDiff(MOD_EXCLUDED + frontendExcluded + MOD_KEPT, [ + 'backend/gen/', + 'frontend/src/gen/', + ]); + expect(result.excludedFiles).toHaveLength(2); + expect(result.remainingCount).toBe(1); + }); +}); + +// ═════════════════════════════════════════════════════════════════════════════ +// applyFilter — I/O behaviour +// ═════════════════════════════════════════════════════════════════════════════ + +describe('applyFilter — rewrites file in-place when some sections remain', () => { + it('file contains only the kept section after filtering', async () => { + const p = await writeDiff('pr.diff', MOD_EXCLUDED + MOD_KEPT); + applyFilter(p, PREFIXES.join('\n')); + const after = readFileSync(p, 'utf-8'); + expect(after).toContain('diff --git a/src/real.go'); + expect(after).not.toContain('backend/gen/'); + }); +}); + +describe('applyFilter — deletes the file when all sections are excluded', () => { + it('file no longer exists after all sections are filtered out', async () => { + const p = await writeDiff('pr.diff', MOD_EXCLUDED + DEL_EXCLUDED); + applyFilter(p, PREFIXES.join('\n')); + expect(existsSync(p)).toBe(false); + }); +}); + +describe('applyFilter — no-op when exclude-paths is empty', () => { + it('leaves the file unchanged', async () => { + const original = MOD_EXCLUDED + MOD_KEPT; + const p = await writeDiff('pr.diff', original); + applyFilter(p, ''); + expect(readFileSync(p, 'utf-8')).toBe(original); + }); +}); + +describe('applyFilter — no-op when exclude-paths has only blank lines', () => { + it('leaves the file unchanged', async () => { + const original = MOD_EXCLUDED; + const p = await writeDiff('pr.diff', original); + applyFilter(p, '\n \n\r\n'); + expect(readFileSync(p, 'utf-8')).toBe(original); + }); +}); diff --git a/src/filter-diff/filter-diff.ts b/src/filter-diff/filter-diff.ts new file mode 100644 index 0000000..60690ce --- /dev/null +++ b/src/filter-diff/filter-diff.ts @@ -0,0 +1,177 @@ +/** + * filter-diff — core logic for stripping excluded-path sections from a unified diff. + * + * A section is excluded when any path-bearing line within it starts with an + * excluded prefix. The three path-bearing line types handled are: + * + * `--- a/` present for modifications and deletions + * (deletions have `+++ /dev/null` so this is the only real path) + * `+++ b/` present for modifications and new-file additions + * `rename to ` present for pure renames (100% similarity — no --- or +++ lines) + * + * The `!skip` guard ensures only the first matching line per section is logged; + * modifications have both `--- a/` and `+++ b/` pointing to the same path, so + * the second occurrence is a no-op. + */ +import { existsSync, readFileSync, rmSync, writeFileSync } from 'node:fs'; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +export interface FilterResult { + /** Filtered diff content. Empty string when all sections were excluded. */ + filtered: string; + /** File paths that were stripped (one entry per excluded section). */ + excludedFiles: string[]; + /** Number of diff sections remaining after filtering. */ + remainingCount: number; +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** + * Parse a newline-separated exclude-paths string into trimmed, non-empty prefixes. + * Strips carriage-return characters so Windows line-endings are handled correctly. + */ +export function parseExcludePrefixes(excludePathsStr: string): string[] { + return excludePathsStr + .split('\n') + .map((p) => p.replace(/\r/g, '').trim()) + .filter((p) => p.length > 0); +} + +// --------------------------------------------------------------------------- +// Core filter (pure function) +// --------------------------------------------------------------------------- + +/** + * Filter sections from a unified diff string. Pure function — no filesystem access. + * + * @param diffContent Raw unified diff text (as produced by `gh pr diff`). + * @param excludePrefixes Trimmed, non-empty path prefix strings. + * @returns Filtered diff text plus metadata about what was removed. + */ +export function filterDiff(diffContent: string, excludePrefixes: string[]): FilterResult { + const prefixes = excludePrefixes.filter((p) => p.length > 0); + + // Short-circuit: nothing to filter. + if (prefixes.length === 0 || diffContent === '') { + const remainingCount = (diffContent.match(/^diff --git /gm) ?? []).length; + return { filtered: diffContent, excludedFiles: [], remainingCount }; + } + + const isExcluded = (filePath: string): boolean => + prefixes.some((prefix) => filePath.startsWith(prefix)); + + const lines = diffContent.split('\n'); + const outputLines: string[] = []; + let sectionLines: string[] = []; + let skip = false; + const excludedFiles: string[] = []; + let remainingCount = 0; + + const flushSection = (): void => { + if (sectionLines.length === 0) return; + if (!skip) { + for (const l of sectionLines) outputLines.push(l); + remainingCount++; + } + sectionLines = []; + skip = false; + }; + + for (const line of lines) { + if (line.startsWith('diff --git ')) { + // Start of a new section — flush the previous one first. + flushSection(); + sectionLines.push(line); + } else if (sectionLines.length > 0) { + // We are inside a section. Check path-bearing lines until one matches. + // The !skip guard avoids double-logging on modifications (--- a/ then +++ b/). + if (!skip) { + let filePath: string | null = null; + + if (line.startsWith('--- a/')) { + filePath = line.slice(6); // "--- a/" is 6 chars + } else if (line.startsWith('+++ b/')) { + filePath = line.slice(6); // "+++ b/" is 6 chars + } else if (line.startsWith('rename to ')) { + filePath = line.slice(10); // "rename to " is 10 chars + } + + if (filePath !== null && isExcluded(filePath)) { + skip = true; + excludedFiles.push(filePath); + } + } + sectionLines.push(line); + } else { + // Content before the first diff section (e.g. commit preamble) — preserve. + outputLines.push(line); + } + } + + // Flush the final section. + flushSection(); + + return { + filtered: outputLines.join('\n'), + excludedFiles, + remainingCount, + }; +} + +// --------------------------------------------------------------------------- +// I/O wrapper (used by the CLI entry point) +// --------------------------------------------------------------------------- + +/** + * Read a diff from `diffPath`, filter it using the given exclude-paths string, + * and write the result back in-place. + * + * When all sections are excluded the file is **deleted** (not left empty) so + * that `hashFiles('pr.diff')` in GitHub Actions returns `''` and downstream + * steps guarded by `if: hashFiles('pr.diff') != ''` are skipped automatically. + * + * All progress messages are written to stderr so they appear in the Actions log. + * + * @param diffPath Absolute or relative path to the diff file. + * @param excludePathsStr Newline-separated exclude-path prefixes. + */ +export function applyFilter(diffPath: string, excludePathsStr: string): void { + const prefixes = parseExcludePrefixes(excludePathsStr); + + if (prefixes.length === 0) { + process.stderr.write('ℹ️ No valid prefixes in exclude-paths — skipping filter\n'); + return; + } + + process.stderr.write(`🔍 Filtering diff against ${prefixes.length} excluded prefix(es):\n`); + for (const p of prefixes) { + process.stderr.write(` - ${p}\n`); + } + + const diffContent = readFileSync(diffPath, 'utf-8'); + const result = filterDiff(diffContent, prefixes); + + for (const file of result.excludedFiles) { + process.stderr.write(`⏭️ Excluded from review: ${file}\n`); + } + + process.stderr.write( + `✅ Filtered diff: ${result.excludedFiles.length} files excluded,` + + ` ${result.remainingCount} files remaining\n`, + ); + + if (result.remainingCount === 0) { + if (existsSync(diffPath)) rmSync(diffPath); + process.stderr.write( + 'ℹ️ All files excluded — removed pr.diff so downstream diff steps are skipped\n', + ); + } else { + writeFileSync(diffPath, result.filtered, 'utf-8'); + } +} diff --git a/src/filter-diff/index.ts b/src/filter-diff/index.ts new file mode 100644 index 0000000..5715ba5 --- /dev/null +++ b/src/filter-diff/index.ts @@ -0,0 +1,35 @@ +/** + * filter-diff CLI entrypoint. + * + * Usage: + * node dist/filter-diff.js + * + * diffPath Path to the diff file (read and overwritten in-place). + * excludePathsList Newline-separated list of path prefixes to exclude. + * + * All diff section types are handled correctly: + * - Modifications detected via `+++ b/` + * - Deletions detected via `--- a/` (+++ is /dev/null) + * - Pure renames detected via `rename to ` (no --- or +++ present) + * + * When all sections are excluded the file is deleted so `hashFiles()` in + * GitHub Actions returns `''` and downstream `if: hashFiles('pr.diff') != ''` + * guards fire correctly. + * + * See filter-diff.ts for the pure filtering logic and I/O wrapper. + */ +import { applyFilter } from './filter-diff.js'; + +const [, , diffPath, excludePathsArg] = process.argv; + +if (!diffPath) { + process.stderr.write('Usage: filter-diff \n'); + process.exit(1); +} + +try { + applyFilter(diffPath, excludePathsArg ?? ''); +} catch (err) { + process.stderr.write(`Error: ${err instanceof Error ? err.message : String(err)}\n`); + process.exit(1); +} diff --git a/src/score-risk/__tests__/score-risk.test.ts b/src/score-risk/__tests__/score-risk.test.ts new file mode 100644 index 0000000..7467bd8 --- /dev/null +++ b/src/score-risk/__tests__/score-risk.test.ts @@ -0,0 +1,427 @@ +/** + * Unit tests for src/score-risk. + * + * Each scoring rule is tested in isolation first (single-variable control), + * then in combination. Edge cases and both zero-score exit paths (exclude-paths + * prefix match and generated-file marker) are verified independently. + */ +import { describe, expect, it } from 'vitest'; +import { parseExcludePrefixes, scoreFiles } from '../score-risk.js'; + +// ── Fixture helpers ─────────────────────────────────────────────────────────── + +/** + * Build a minimal but structurally valid diff section for `filePath`. + * `addedLines` is a list of raw lines to append; they should already include + * the leading `+` where needed (e.g. `'+new code'`). + */ +function makeDiff(filePath: string, addedLines: string[] = ['+new line']): string { + return [ + `diff --git a/${filePath} b/${filePath}`, + 'index abc..def 100644', + `--- a/${filePath}`, + `+++ b/${filePath}`, + '@@ -1,1 +1,2 @@', + ' existing', + ...addedLines, + '', + ].join('\n'); +} + +/** Build a diff with exactly `hunkCount` `@@` headers. */ +function makeHunksDiff(filePath: string, hunkCount: number): string { + const lines = [`diff --git a/${filePath} b/${filePath}`, 'index abc..def 100644']; + for (let i = 0; i < hunkCount; i++) { + lines.push(`@@ -${i * 10 + 1},1 +${i * 10 + 1},2 @@`); + lines.push(`+hunk${i}`); + } + lines.push(''); + return lines.join('\n'); +} + +/** Build a diff with `n` added (non-hunk) lines for `filePath`. */ +function makeLargeDiff(filePath: string, addedCount: number): string { + const added = Array.from({ length: addedCount }, (_, i) => `+line${i}`); + return makeDiff(filePath, added); +} + +// ── parseExcludePrefixes ────────────────────────────────────────────────────── + +describe('parseExcludePrefixes', () => { + it('splits, trims, and drops blank lines', () => { + expect(parseExcludePrefixes(' backend/gen/\n frontend/src/gen/ \n\n')).toEqual([ + 'backend/gen/', + 'frontend/src/gen/', + ]); + }); + + it('strips CR characters (Windows line-endings)', () => { + expect(parseExcludePrefixes('backend/gen/\r\nfrontend/src/gen/\r\n')).toEqual([ + 'backend/gen/', + 'frontend/src/gen/', + ]); + }); + + it('returns empty array for empty string', () => { + expect(parseExcludePrefixes('')).toEqual([]); + }); +}); + +// ── Rule 1: exclude-paths prefix → score 0 ─────────────────────────────────── + +describe('scoreFiles — rule 1: exclude-paths prefix → score 0', () => { + it('file matching the prefix scores 0', () => { + const diff = makeDiff('backend/gen/foo.pb.go'); + const scores = scoreFiles(diff, ['backend/gen/']); + expect(scores['backend/gen/foo.pb.go']).toBe(0); + }); + + it('security-path file in excluded prefix still scores 0 (prefix wins)', () => { + // "session_service.pb.go" would score 2 via the security-path rule, + // but the exclude-paths rule fires first and short-circuits to 0. + const diff = makeLargeDiff('backend/gen/session_service.pb.go', 200); + const scores = scoreFiles(diff, ['backend/gen/']); + expect(scores['backend/gen/session_service.pb.go']).toBe(0); + }); + + it('non-matching file is scored normally', () => { + const diff = makeDiff('backend/gen/foo.pb.go') + makeDiff('src/auth/handler.go'); + const scores = scoreFiles(diff, ['backend/gen/']); + expect(scores['backend/gen/foo.pb.go']).toBe(0); + expect(scores['src/auth/handler.go']).toBe(2); // security path + }); + + it('multiple prefixes — file matching any is scored 0', () => { + const diff = + makeDiff('backend/gen/foo.pb.go') + + makeDiff('frontend/src/gen/api.ts') + + makeDiff('src/real.go'); + const scores = scoreFiles(diff, ['backend/gen/', 'frontend/src/gen/']); + expect(scores['backend/gen/foo.pb.go']).toBe(0); + expect(scores['frontend/src/gen/api.ts']).toBe(0); + expect(scores['src/real.go']).toBe(0); // no rules fire for a plain file + }); +}); + +// ── Rule 2: generated-file marker → score 0 ────────────────────────────────── + +describe('scoreFiles — rule 2: generated-file marker → score 0', () => { + it('"Code generated" in first added line → score 0', () => { + const diff = makeDiff('src/gen.pb.go', [ + '+// Code generated by protoc-gen-go. DO NOT EDIT.', + '+package gen', + ]); + const scores = scoreFiles(diff, []); + expect(scores['src/gen.pb.go']).toBe(0); + }); + + it('"DO NOT EDIT" anywhere in first 5 added lines → score 0', () => { + const diff = makeDiff('src/gen.pb.go', [ + '+// generated file', + '+// DO NOT EDIT manually', + '+package gen', + ]); + const scores = scoreFiles(diff, []); + expect(scores['src/gen.pb.go']).toBe(0); + }); + + it('marker on line 5 (boundary, inclusive of count < 5) → score 0', () => { + // count reaches 5 on line 5; the loop checks GENERATED_MARKER_RE before + // the count >= 5 exit, so line 5 (addedCount == 5 after increment) is tested. + const diff = makeDiff('src/gen.pb.go', [ + '+line1', + '+line2', + '+line3', + '+line4', + '+// Code generated', + ]); + const scores = scoreFiles(diff, []); + expect(scores['src/gen.pb.go']).toBe(0); + }); + + it('marker beyond line 5 is ignored — file scored normally', () => { + const diff = makeDiff('src/auth/handler.go', [ + '+line1', + '+line2', + '+line3', + '+line4', + '+line5', + '+// Code generated (too late to trigger marker check)', + ]); + const scores = scoreFiles(diff, []); + // Security path +2; no other rules fired. + expect(scores['src/auth/handler.go']).toBe(2); + }); + + it('marker on context line (not a "+" line) does NOT trigger → scored normally', () => { + // A context line (space-prefixed) represents a pre-existing generated marker. + const diff = makeDiff('src/auth/gen.go', [ + ' // Code generated — context, not added', + '+real change', + ]); + const scores = scoreFiles(diff, []); + // Security path matches "auth" → +2; no marker on added line. + expect(scores['src/auth/gen.go']).toBe(2); + }); +}); + +// ── Rule 3: security-sensitive path → +2 ───────────────────────────────────── + +describe('scoreFiles — rule 3: security-sensitive path → +2', () => { + const KEYWORDS = [ + 'auth', + 'security', + 'crypto', + 'session', + 'secret', + 'token', + 'password', + 'credential', + ] as const; + + for (const kw of KEYWORDS) { + it(`keyword "${kw}" in path → score 2`, () => { + const diff = makeDiff(`src/${kw}/handler.go`); + const scores = scoreFiles(diff, []); + expect(scores[`src/${kw}/handler.go`]).toBe(2); + }); + } + + it('keyword is case-insensitive (AUTH → 2)', () => { + const diff = makeDiff('src/AUTH/handler.go'); + const scores = scoreFiles(diff, []); + expect(scores['src/AUTH/handler.go']).toBe(2); + }); + + it('no keyword in path → 0', () => { + const diff = makeDiff('src/utils/helper.go'); + const scores = scoreFiles(diff, []); + expect(scores['src/utils/helper.go']).toBe(0); + }); +}); + +// ── Rule 4: large change (>100 added lines) → +2 ───────────────────────────── + +describe('scoreFiles — rule 4: large change (>100 added lines) → +2', () => { + it('101 added lines → score 2', () => { + const diff = makeLargeDiff('src/big.go', 101); + const scores = scoreFiles(diff, []); + expect(scores['src/big.go']).toBe(2); + }); + + it('100 added lines (boundary) → score 0 (not triggered)', () => { + const diff = makeLargeDiff('src/big.go', 100); + const scores = scoreFiles(diff, []); + expect(scores['src/big.go']).toBe(0); + }); + + it('1 added line → score 0', () => { + const diff = makeDiff('src/small.go', ['+one line']); + const scores = scoreFiles(diff, []); + expect(scores['src/small.go']).toBe(0); + }); +}); + +// ── Rule 5: many hunks (>3) → +1 ───────────────────────────────────────────── + +describe('scoreFiles — rule 5: many hunks (>3 hunk headers) → +1', () => { + it('4 hunks → score 1', () => { + const diff = makeHunksDiff('src/hunky.go', 4); + const scores = scoreFiles(diff, []); + expect(scores['src/hunky.go']).toBe(1); + }); + + it('3 hunks (boundary) → score 0 (not triggered)', () => { + const diff = makeHunksDiff('src/hunky.go', 3); + const scores = scoreFiles(diff, []); + expect(scores['src/hunky.go']).toBe(0); + }); + + it('1 hunk → score 0', () => { + const diff = makeDiff('src/simple.go', ['+change']); + const scores = scoreFiles(diff, []); + expect(scores['src/simple.go']).toBe(0); + }); +}); + +// ── Rule 6: test/doc/config file → reset score to 0 ────────────────────────── + +describe('scoreFiles — rule 6: test/doc/config file → reset score to 0', () => { + const TEST_PATHS = [ + 'src/auth_test.go', + 'src/auth.test.ts', + 'src/auth.test.tsx', + 'src/auth.test.js', + 'src/auth.test.jsx', + 'src/auth.spec.ts', + 'src/auth.spec.tsx', + 'test_auth.py', + 'docs/README.md', + '.github/workflows/ci.yml', + '.github/workflows/ci.yaml', + 'config/app.json', + 'config/pyproject.toml', + ] as const; + + for (const p of TEST_PATHS) { + it(`"${p}" resets score to 0 even with security-path match`, () => { + // Use a large diff to trigger rules 3, 4, 5 — all should be overridden by rule 6. + const lines = Array.from({ length: 110 }, (_, i) => `+line${i}`); + const hunkHeaders = Array.from( + { length: 4 }, + (_, i) => `@@ -${i * 5 + 1},1 +${i * 5 + 1},2 @@`, + ); + const diff = [ + `diff --git a/${p} b/${p}`, + 'index abc..def 100644', + `--- a/${p}`, + `+++ b/${p}`, + ...hunkHeaders.flatMap((h) => [h, '+hunk']), + ...lines, + '', + ].join('\n'); + const scores = scoreFiles(diff, []); + // Rule 6 resets to 0; rule 7 (error handling) doesn't fire here. + expect(scores[p]).toBe(0); + }); + } + + it('test file WITH error-handling patterns scores 1 (rule 7 still applies after reset)', () => { + // Rule 6 resets the running total to 0, then rule 7 adds 1. + const diff = makeDiff('src/auth_test.go', ['+if err != nil { panic(err) }']); + const scores = scoreFiles(diff, []); + expect(scores['src/auth_test.go']).toBe(1); + }); +}); + +// ── Rule 7: error-handling patterns → +1 ───────────────────────────────────── + +describe('scoreFiles — rule 7: error-handling patterns → +1', () => { + const KEYWORDS = ['catch', 'rescue', 'except', 'recover', 'error', 'panic'] as const; + + for (const kw of KEYWORDS) { + it(`keyword "${kw}" in an added line → +1`, () => { + const diff = makeDiff('src/service.go', [`+handle ${kw} here`]); + const scores = scoreFiles(diff, []); + expect(scores['src/service.go']).toBe(1); + }); + } + + it('keyword in a context line (not added) → no +1', () => { + // Context line (space-prefixed) doesn't count. + const diff = makeDiff('src/service.go', [' existing catch block', '+new line only']); + const scores = scoreFiles(diff, []); + expect(scores['src/service.go']).toBe(0); + }); + + it('multiple error-handling lines still only add +1 total', () => { + const diff = makeDiff('src/service.go', [ + '+catch(err) {', + '+ recover();', + '+ panic(err)', + '}', + ]); + const scores = scoreFiles(diff, []); + expect(scores['src/service.go']).toBe(1); + }); +}); + +// ── Combined scoring ────────────────────────────────────────────────────────── + +describe('scoreFiles — combined rules', () => { + it('security path + large change = 4', () => { + const diff = makeLargeDiff('src/auth/handler.go', 150); + const scores = scoreFiles(diff, []); + expect(scores['src/auth/handler.go']).toBe(4); // +2 security + +2 large + }); + + it('security path + large change + many hunks = 5', () => { + const lines = Array.from({ length: 110 }, (_, i) => `+line${i}`); + const hunkHeaders = Array.from( + { length: 4 }, + (_, i) => `@@ -${i * 30 + 1},1 +${i * 30 + 1},2 @@`, + ); + const diff = [ + 'diff --git a/src/session/service.go b/src/session/service.go', + 'index abc..def 100644', + '--- a/src/session/service.go', + '+++ b/src/session/service.go', + ...hunkHeaders.flatMap((h) => [h, '+hunk']), + ...lines, + '', + ].join('\n'); + const scores = scoreFiles(diff, []); + expect(scores['src/session/service.go']).toBe(5); // +2 security +2 large +1 hunks + }); + + it('security path + large change + many hunks + error handling = 6 (max realistic)', () => { + const lines = Array.from({ length: 110 }, (_, i) => `+line${i}`); + const hunkHeaders = Array.from( + { length: 4 }, + (_, i) => `@@ -${i * 30 + 1},1 +${i * 30 + 1},2 @@`, + ); + const diff = [ + 'diff --git a/src/session/service.go b/src/session/service.go', + 'index abc..def 100644', + '--- a/src/session/service.go', + '+++ b/src/session/service.go', + ...hunkHeaders.flatMap((h) => [h, '+hunk']), + '+catch(err) {', + ...lines, + '', + ].join('\n'); + const scores = scoreFiles(diff, []); + expect(scores['src/session/service.go']).toBe(6); + }); + + it('multiple files in one diff are scored independently', () => { + const diff = + makeDiff('src/auth/handler.go') + // security → 2 + makeLargeDiff('src/utils/big.go', 200) + // large → 2 + makeDiff('backend/gen/foo.pb.go'); // excluded → 0 + + const scores = scoreFiles(diff, ['backend/gen/']); + expect(scores['src/auth/handler.go']).toBe(2); + expect(scores['src/utils/big.go']).toBe(2); + expect(scores['backend/gen/foo.pb.go']).toBe(0); + }); +}); + +// ── extractFilePath regression: greedy-regex vs indexOf(' b/') ───────────────── + +describe('scoreFiles — extractFilePath: indexOf regression for paths containing b/', () => { + it('path with security keyword before a b/ directory component scores 2, not 0', () => { + // Old greedy regex: replace(/.*b\//, '') on + // "diff --git a/src/auth/b/helper.ts b/src/auth/b/helper.ts" + // matches everything up to the last 'b/' giving 'helper.ts'. + // 'helper.ts' has no security keyword → score 0. + // + // New indexOf(' b/'): strips "diff --git a/" prefix and splits at the + // first ' b/' separator giving 'src/auth/b/helper.ts'. + // 'src/auth/b/helper.ts' matches SECURITY_PATH_RE ('auth') → score 2. + const diff = makeDiff('src/auth/b/helper.ts', ['+changed']); + const scores = scoreFiles(diff, []); + expect(scores['src/auth/b/helper.ts']).toBe(2); + }); +}); + +// ── Edge cases ──────────────────────────────────────────────────────────────── + +describe('scoreFiles — edge cases', () => { + it('empty diff → empty scores object', () => { + expect(scoreFiles('', [])).toEqual({}); + }); + + it('empty exclude prefixes → all files scored normally', () => { + const diff = makeDiff('backend/gen/foo.pb.go'); + const scores = scoreFiles(diff, []); + // No security keyword, no large change, no hunks → 0 + expect(scores['backend/gen/foo.pb.go']).toBe(0); + }); + + it('single plain file with no matching rules → score 0', () => { + const diff = makeDiff('src/utils/helper.go', ['+minor tweak']); + const scores = scoreFiles(diff, []); + expect(scores['src/utils/helper.go']).toBe(0); + }); +}); diff --git a/src/score-risk/index.ts b/src/score-risk/index.ts new file mode 100644 index 0000000..9b7ce0d --- /dev/null +++ b/src/score-risk/index.ts @@ -0,0 +1,36 @@ +/** + * score-risk CLI entrypoint. + * + * Usage: + * node dist/score-risk.js + * + * diffPath Path to the diff file (read-only). + * excludePathsList Newline-separated list of path prefixes to score as 0. + * + * Writes a JSON object { "": , … } to /tmp/file_risk_scores.json. + * The consuming step reads it with: + * echo "✅ File risk scores: $(jq -c . /tmp/file_risk_scores.json)" + * + * See score-risk.ts for the scoring rules and pure-function logic. + */ +import { readFileSync, writeFileSync } from 'node:fs'; +import { parseExcludePrefixes, scoreFiles } from './score-risk.js'; + +const SCORES_OUTPUT_PATH = '/tmp/file_risk_scores.json'; + +const [, , diffPath, excludePathsArg] = process.argv; + +if (!diffPath) { + process.stderr.write('Usage: score-risk \n'); + process.exit(1); +} + +try { + const diffContent = readFileSync(diffPath, 'utf-8'); + const prefixes = parseExcludePrefixes(excludePathsArg ?? ''); + const scores = scoreFiles(diffContent, prefixes); + writeFileSync(SCORES_OUTPUT_PATH, JSON.stringify(scores), 'utf-8'); +} catch (err) { + process.stderr.write(`Error: ${err instanceof Error ? err.message : String(err)}\n`); + process.exit(1); +} diff --git a/src/score-risk/score-risk.ts b/src/score-risk/score-risk.ts new file mode 100644 index 0000000..4740b17 --- /dev/null +++ b/src/score-risk/score-risk.ts @@ -0,0 +1,219 @@ +/** + * score-risk — per-file risk scoring for the PR review pipeline. + * + * Replicates the "Score file risk" bash/awk block from review-pr/action.yml + * as a testable, type-safe TypeScript module. + * + * Scoring rules (applied in order, all additive unless noted): + * 1. Excluded prefix match → score 0, skip remaining rules + * 2. Generated-file marker → score 0, skip remaining rules + * (first 5 added lines contain "Code generated" or "DO NOT EDIT") + * 3. Security-sensitive path → +2 + * (auth|security|crypto|session|secret|token|password|credential, case-insensitive) + * 4. Large change (>100 added lines)→ +2 + * 5. Many hunks (>3 hunk headers) → +1 + * 6. Test/doc/config file → score = 0 (resets 3-5 to zero) + * 7. Error-handling patterns → +1 + * (catch|rescue|except|recover|error|panic in any added line, case-sensitive) + * + * Rule 6 resets the running total to 0, then rule 7 can still add 1. + * This faithfully reproduces the existing bash behaviour. + */ + +// --------------------------------------------------------------------------- +// Regexes — kept as module-level constants so they are compiled once +// --------------------------------------------------------------------------- + +/** Matches security-sensitive keywords in a file path (case-insensitive). */ +const SECURITY_PATH_RE = /auth|security|crypto|session|secret|token|password|credential/i; + +/** Matches test, documentation, and config file extensions (case-insensitive). */ +const TEST_FILE_RE = + /_test\.go$|\.test\.[tj]sx?$|\.spec\.[tj]sx?$|test_.*\.py$|\.md$|\.ya?ml$|\.json$|\.toml$/i; + +/** Matches error-handling keywords in diff hunk lines (case-sensitive, matching bash awk). */ +const ERROR_PATTERN_RE = /catch|rescue|except|recover|error|panic/; + +/** Matches generated-file header strings. */ +const GENERATED_MARKER_RE = /Code generated|DO NOT EDIT/; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +/** Map from file path to integer risk score (0 = lowest risk). */ +export type RiskScores = Record; + +/** Per-file diff statistics gathered in a single forward pass. */ +interface FileStat { + path: string; + addedLines: number; + hunks: number; +} + +// --------------------------------------------------------------------------- +// Internal helpers +// --------------------------------------------------------------------------- + +/** + * Extract the destination file path from a `diff --git a/… b/…` header line. + * + * The standard git diff format is: + * diff --git a/ b/ + * where `` is identical on both sides for modifications, deletions, and + * new files. We strip the `diff --git a/` prefix (13 chars) and then take + * everything before the first ` b/` separator. + * + * Using `indexOf(' b/')` (space-b-slash) rather than a greedy `.*b\/` regex + * avoids a mis-extraction for paths whose directory components contain `b/` + * (e.g. `.github/workflows/ci.yml` where `github/` contains `b/`). + */ +function extractFilePath(diffGitLine: string): string { + const after = diffGitLine.slice('diff --git a/'.length); + const sepIdx = after.indexOf(' b/'); + return sepIdx >= 0 ? after.slice(0, sepIdx) : after; +} + +/** + * Return true for a unified-diff added line (starts with `+` but not `+++`). + * Matches the awk pattern `/^\+[^+]/`. + */ +function isAddedLine(line: string): boolean { + return line.length >= 2 && line[0] === '+' && line[1] !== '+'; +} + +/** + * Single forward pass over the diff to collect per-file stats. + * Mirrors the awk block that builds /tmp/file_diff_stats.txt. + */ +function parseDiffStats(diffContent: string): FileStat[] { + const stats: FileStat[] = []; + let current: FileStat | null = null; + + for (const line of diffContent.split('\n')) { + if (line.startsWith('diff --git ')) { + if (current) stats.push(current); + current = { path: extractFilePath(line), addedLines: 0, hunks: 0 }; + } else if (current !== null) { + if (line.startsWith('@@')) { + current.hunks++; + } else if (isAddedLine(line)) { + current.addedLines++; + } + } + } + + if (current) stats.push(current); + return stats; +} + +/** + * Scan the first 5 added lines of `filePath`'s section for a generated-file + * marker ("Code generated" or "DO NOT EDIT"). + * + * Only fires for brand-new files whose header appears on a `+` line. + * For pre-existing generated files being modified the marker is a context line + * (space-prefixed) and won't match — the exclude-paths check is the primary + * mechanism for those. + */ +function hasGeneratedMarker(diffContent: string, filePath: string): boolean { + let inFile = false; + let addedCount = 0; + + for (const line of diffContent.split('\n')) { + if (line.startsWith('diff --git ')) { + inFile = extractFilePath(line) === filePath; + if (inFile) addedCount = 0; + } else if (inFile && isAddedLine(line)) { + addedCount++; + if (GENERATED_MARKER_RE.test(line)) return true; + if (addedCount >= 5) return false; + } + } + + return false; +} + +/** + * Count added lines in `filePath`'s section that contain an error-handling + * keyword. Matches the awk error-pattern scan (case-sensitive). + */ +function countErrorHandlingLines(diffContent: string, filePath: string): number { + let inFile = false; + let count = 0; + + for (const line of diffContent.split('\n')) { + if (line.startsWith('diff --git ')) { + inFile = extractFilePath(line) === filePath; + } else if (inFile && isAddedLine(line) && ERROR_PATTERN_RE.test(line)) { + count++; + } + } + + return count; +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +/** + * Parse a newline-separated exclude-paths string into trimmed, non-empty prefixes. + * Strips carriage-return characters to handle Windows line-endings correctly. + */ +export function parseExcludePrefixes(excludePathsStr: string): string[] { + return excludePathsStr + .split('\n') + .map((p) => p.replace(/\r/g, '').trim()) + .filter((p) => p.length > 0); +} + +/** + * Score every file in `diffContent` using the PR review risk heuristics. + * Pure function — no filesystem access. + * + * @param diffContent Raw unified diff text (as produced by `gh pr diff`). + * @param excludePrefixes Trimmed, non-empty path prefix strings (files whose + * path starts with any prefix receive score 0). + * @returns Map of { filePath → riskScore }. + */ +export function scoreFiles(diffContent: string, excludePrefixes: string[]): RiskScores { + const stats = parseDiffStats(diffContent); + const scores: RiskScores = {}; + + for (const { path, addedLines, hunks } of stats) { + // Rule 1: exclude-paths prefix → score 0, skip all other rules. + if (excludePrefixes.some((prefix) => path.startsWith(prefix))) { + scores[path] = 0; + continue; + } + + // Rule 2: generated-file markers in first 5 added lines → score 0, skip. + // (Safety net for brand-new generated files not covered by exclude-paths.) + if (hasGeneratedMarker(diffContent, path)) { + scores[path] = 0; + continue; + } + + let score = 0; + + // Rule 3: security-sensitive path → +2. + if (SECURITY_PATH_RE.test(path)) score += 2; + + // Rule 4: large change (>100 added lines) → +2. + if (addedLines > 100) score += 2; + + // Rule 5: many hunks (>3 hunk headers) → +1. + if (hunks > 3) score += 1; + + // Rule 6: test/doc/config file → reset score to 0. + if (TEST_FILE_RE.test(path)) score = 0; + + // Rule 7: error-handling patterns in added lines → +1. + if (countErrorHandlingLines(diffContent, path) > 0) score += 1; + + scores[path] = score; + } + + return scores; +} diff --git a/tsup.config.ts b/tsup.config.ts index b711189..1cd7ec9 100644 --- a/tsup.config.ts +++ b/tsup.config.ts @@ -13,8 +13,10 @@ const src = (name: string) => { }; const entry = { credentials: src('credentials'), + 'filter-diff': src('filter-diff'), main: src('main'), 'mention-reply': src('mention-reply'), + 'score-risk': src('score-risk'), security: src('security'), 'signed-commit': src('signed-commit'), };