elfrost · elfrost · May 15, 2026 · May 15, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -54,6 +54,7 @@ AI PatchLab is an AI-assisted security remediation toolkit. The MVP focuses on a
 - `scanner/run_scan.py` - CLI entry point (`python scanner/run_scan.py --repo <path>` or `--from-git-url <url>`)
 - `scanner/git_source.py` - Shallow-clone a public git URL into a temp directory via the `cloned_repo` context manager; cleanup-on-exit, `shell=False`, no remote API calls
 - `scanner/paths.py` - `rebase_finding_paths(findings, repo_root)` rewrites each finding's `file` (and `id` when it embeds the same path) to a repo-relative POSIX path so reports survive temp-dir cleanup
+- `scanner/ignore.py` - `apply_ignore(findings, patterns)` + `load_ignore_patterns(path)` provide `.gitignore`-style path suppression (used by the `--ignore-file` CLI flag). Empty-file findings are never suppressed
 - `scanner/models.py` - Normalized `Finding` dataclass + severity/confidence enums + `FINDING_FIELDS`
 - `scanner/recommendations.py` - Deterministic keyword-based recommendation enrichment
 - `scanner/confidence.py` - Centralized `Finding.confidence` rules (one function per scanner + `confidence_for_meta_finding` for shared `not-installed` / `scan-error` / etc.)

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -50,6 +50,7 @@ This project can optionally include a parallel Codex/OpenAI runtime via `AGENTS.
 - `scanner/run_scan.py` — CLI entry point (`python scanner/run_scan.py --repo <path>` or `--from-git-url <url>`)
 - `scanner/git_source.py` — Shallow-clone a public git URL into a temp directory via the `cloned_repo` context manager; cleanup-on-exit, `shell=False`, no remote API calls
 - `scanner/paths.py` — `rebase_finding_paths(findings, repo_root)` rewrites each finding's `file` (and `id` when it embeds the same path) to a repo-relative POSIX path so reports survive temp-dir cleanup
+- `scanner/ignore.py` — `apply_ignore(findings, patterns)` + `load_ignore_patterns(path)` provide `.gitignore`-style path suppression of findings (used by the `--ignore-file` CLI flag). Empty-file findings are never suppressed
 - `scanner/models.py` — Normalized `Finding` dataclass + severity/confidence enums + `FINDING_FIELDS`
 - `scanner/recommendations.py` — Deterministic keyword-based recommendation enrichment
 - `scanner/confidence.py` — Centralized `Finding.confidence` rules (one function per scanner + `confidence_for_meta_finding` for shared `not-installed` / `scan-error` / etc.)

diff --git a/README.md b/README.md
@@ -27,6 +27,9 @@ python scanner/run_scan.py --from-git-url "https://github.com/owner/repo" --repo
 # Filter low-noise findings out of public reports (default keeps everything)
 python scanner/run_scan.py --from-git-url "https://github.com/owner/repo" --reports-dir "reports\owner-repo" --min-severity medium
 
+# Suppress known false-positive paths with a .gitignore-style ignore file
+python scanner/run_scan.py --from-git-url "https://github.com/owner/repo" --reports-dir "reports\owner-repo" --ignore-file "reports\owner-repo\.aipatchlabignore"
+
 # Tests
 python -m pytest tests/ -v
 
@@ -389,6 +392,31 @@ ai-patchlab/
 `-- pyproject.toml       # Dependencies and tool config
 ```
 
+## Ignore File
+
+`--ignore-file` accepts a `.gitignore`-style file whose patterns suppress matching
+findings *after* path rebasing. Patterns match the repo-relative POSIX path of
+each finding (e.g. `tests/cassettes/foo.yaml`). Lines starting with `#` are
+comments; `!`-prefixed lines re-include previously excluded paths.
+
+Example for a project whose own safety-engine tests embed crafted fake secrets
+that look real to Gitleaks:
+
+```
+# Crafted fixtures in the safety policy engine tests.
+tests/unit_tests/safety_engine/**
+
+# Smoke tests that ship fake API tokens to exercise integrations.
+tests/smoke_tests/integrations/**
+
+# Re-include one specific file that's actually worth scanning.
+!tests/unit_tests/safety_engine/test_real_findings.py
+```
+
+Findings with an empty `file` field (e.g. info-level "tool not installed"
+placeholders) are never suppressed — they describe infrastructure state, not
+file content, and a `**` pattern should not silently drop them.
+
 ## Notes
 
 - No web app is included in v0.1.

diff --git a/pyproject.toml b/pyproject.toml
@@ -7,6 +7,7 @@ dependencies = [
     "aiomysql>=0.2.0",
     "httpx>=0.27.0",
     "loguru>=0.7.0",
+    "pathspec>=0.12.0",
     "pydantic>=2.0",
     "pydantic-settings>=2.0",
     "python-dotenv>=1.0",

diff --git a/scanner/ignore.py b/scanner/ignore.py
@@ -0,0 +1,66 @@
+"""Path-based suppression of findings via .gitignore-style patterns.
+
+A scan can be paired with an "ignore file" (passed via `--ignore-file`)
+that lists path patterns to exclude from the report. Patterns use the
+same syntax as `.gitignore`: glob with `**` for any-depth matches, `!`
+prefix for negation. This is invaluable for scanning targets that have
+recurring false-positive shapes (test cassettes, security-tool detector
+fixtures, vendored libraries) without having to teach our scanner-level
+rules about every project's conventions.
+
+Suppression happens AFTER `rebase_finding_paths`, so patterns are
+matched against POSIX repo-relative paths (e.g. `tests/**` matches
+`tests/foo/bar.py`).
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from pathspec import PathSpec
+from pathspec.patterns.gitwildmatch import GitWildMatchPattern
+
+from scanner.models import Finding
+
+
+def parse_ignore_patterns(raw: str) -> list[str]:
+    """Split raw text into ignore patterns; drop blank lines and comments.
+
+    Comments are lines whose first non-whitespace character is `#`. Both
+    leading and trailing whitespace is stripped from each kept pattern.
+    """
+    patterns: list[str] = []
+    for line in raw.splitlines():
+        stripped = line.strip()
+        if not stripped or stripped.startswith("#"):
+            continue
+        patterns.append(stripped)
+    return patterns
+
+
+def load_ignore_patterns(path: Path | None) -> list[str]:
+    """Read an ignore file from disk and return its patterns.
+
+    Returns an empty list when `path` is None (no suppression configured).
+    Raises `FileNotFoundError` if `path` is given but does not exist.
+    """
+    if path is None:
+        return []
+    return parse_ignore_patterns(path.read_text(encoding="utf-8"))
+
+
+def apply_ignore(findings: list[Finding], patterns: list[str]) -> list[Finding]:
+    """Drop findings whose `file` matches one of the gitignore-style patterns.
+
+    Findings with an empty `file` field (e.g. info-level "tool not
+    installed" placeholders that point at the repo root) are never
+    suppressed - they don't represent a real path and the user
+    presumably wants to keep seeing infrastructure signals.
+    """
+    if not patterns:
+        return list(findings)
+
+    spec = PathSpec.from_lines(GitWildMatchPattern, patterns)
+    return [
+        finding for finding in findings if not finding.file or not spec.match_file(finding.file)
+    ]
diff --git a/scanner/run_scan.py b/scanner/run_scan.py
@@ -10,6 +10,7 @@
     sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
 
 from scanner.git_source import GitCloneError, cloned_repo
+from scanner.ignore import apply_ignore, load_ignore_patterns
 from scanner.models import SEVERITIES, Finding
 from scanner.paths import rebase_finding_paths
 from scanner.recommendations import enrich_findings
@@ -30,14 +31,18 @@ def run_scan(
     repo_path: Path,
     reports_dir: Path = Path("reports"),
     min_severity: str = "info",
+    ignore_file: Path | None = None,
 ) -> dict[str, Path]:
     """Validate input, run configured scanners, and write reports."""
     resolved_repo = repo_path.expanduser().resolve()
     if not resolved_repo.exists() or not resolved_repo.is_dir():
         raise ValueError(f"Repository path does not exist or is not a directory: {repo_path}")
 
+    ignore_patterns = load_ignore_patterns(ignore_file)
+
     findings = collect_findings(resolved_repo, reports_dir)
     findings = rebase_finding_paths(findings, resolved_repo)
+    findings = apply_ignore(findings, ignore_patterns)
     findings = filter_by_min_severity(findings, min_severity)
     return write_reports(repo_path=resolved_repo, findings=findings, reports_dir=reports_dir)
 
@@ -46,6 +51,7 @@ def run_scan_from_url(
     url: str,
     reports_dir: Path = Path("reports"),
     min_severity: str = "info",
+    ignore_file: Path | None = None,
 ) -> dict[str, Path]:
     """Clone a public git URL into a temporary directory, then scan it.
 
@@ -54,7 +60,12 @@ def run_scan_from_url(
     outside the clone).
     """
     with cloned_repo(url) as clone:
-        return run_scan(clone.repo_path, reports_dir, min_severity=min_severity)
+        return run_scan(
+            clone.repo_path,
+            reports_dir,
+            min_severity=min_severity,
+            ignore_file=ignore_file,
+        )
 
 
 def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
@@ -79,21 +90,36 @@ def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
         choices=list(SEVERITIES),
         help="Drop findings strictly less severe than this threshold (default: info, keeps everything).",
     )
+    parser.add_argument(
+        "--ignore-file",
+        dest="ignore_file",
+        default=None,
+        help="Path to a .gitignore-style file whose patterns suppress matching findings.",
+    )
     return parser.parse_args(argv)
 
 
 def main(argv: list[str] | None = None) -> int:
     """CLI wrapper."""
     args = parse_args(argv)
     reports_dir = Path(args.reports_dir)
+    ignore_file = Path(args.ignore_file) if args.ignore_file else None
     try:
         if args.from_git_url:
             report_paths = run_scan_from_url(
-                args.from_git_url, reports_dir, min_severity=args.min_severity
+                args.from_git_url,
+                reports_dir,
+                min_severity=args.min_severity,
+                ignore_file=ignore_file,
             )
         else:
-            report_paths = run_scan(Path(args.repo), reports_dir, min_severity=args.min_severity)
-    except (ValueError, GitCloneError) as exc:
+            report_paths = run_scan(
+                Path(args.repo),
+                reports_dir,
+                min_severity=args.min_severity,
+                ignore_file=ignore_file,
+            )
+    except (ValueError, GitCloneError, FileNotFoundError) as exc:
         print(f"Error: {exc}", file=sys.stderr)
         return 2
 

diff --git a/tests/test_ignore.py b/tests/test_ignore.py
@@ -0,0 +1,101 @@
+"""Tests for path-based finding suppression via .gitignore-style patterns."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from scanner.ignore import (
+    apply_ignore,
+    load_ignore_patterns,
+    parse_ignore_patterns,
+)
+from scanner.models import Finding
+
+
+def _finding(file: str, finding_id: str = "x") -> Finding:
+    return Finding(
+        id=finding_id,
+        tool="semgrep",
+        severity="high",
+        title="t",
+        description="d",
+        file=file,
+        line=1,
+        recommendation="r",
+        confidence="medium",
+    )
+
+
+class TestParseIgnorePatterns:
+    def test_strips_comments_and_blank_lines(self) -> None:
+        raw = "\n# top comment\ntests/**\n\n  # indented comment\n**/cassettes/**\n"
+        patterns = parse_ignore_patterns(raw)
+        assert patterns == ["tests/**", "**/cassettes/**"]
+
+    def test_keeps_negation_lines(self) -> None:
+        raw = "tests/**\n!tests/test_critical.py\n"
+        patterns = parse_ignore_patterns(raw)
+        assert patterns == ["tests/**", "!tests/test_critical.py"]
+
+
+class TestLoadIgnorePatterns:
+    def test_returns_empty_for_none_path(self) -> None:
+        assert load_ignore_patterns(None) == []
+
+    def test_reads_existing_file(self, tmp_path: Path) -> None:
+        path = tmp_path / "ignore.txt"
+        path.write_text("tests/**\n# comment\n**/cassettes/**\n", encoding="utf-8")
+        assert load_ignore_patterns(path) == ["tests/**", "**/cassettes/**"]
+
+    def test_missing_file_raises(self, tmp_path: Path) -> None:
+        with pytest.raises(FileNotFoundError):
+            load_ignore_patterns(tmp_path / "nope.txt")
+
+
+class TestApplyIgnore:
+    def test_empty_patterns_returns_input_unchanged(self) -> None:
+        findings = [_finding("src/a.py", "1"), _finding("tests/b.py", "2")]
+        assert apply_ignore(findings, []) == findings
+
+    def test_drops_findings_matching_pattern(self) -> None:
+        findings = [
+            _finding("src/a.py", "src"),
+            _finding("tests/b.py", "tst"),
+        ]
+        result = apply_ignore(findings, ["tests/**"])
+        assert [f.id for f in result] == ["src"]
+
+    def test_double_star_matches_any_depth(self) -> None:
+        findings = [
+            _finding("packages/x/tests/cassettes/foo.yaml", "deep"),
+            _finding("tests/cassettes/bar.yaml", "shallow"),
+            _finding("src/foo.py", "keep"),
+        ]
+        result = apply_ignore(findings, ["**/cassettes/**"])
+        assert [f.id for f in result] == ["keep"]
+
+    def test_negation_re_includes_specific_file(self) -> None:
+        findings = [
+            _finding("tests/test_a.py", "a"),
+            _finding("tests/test_critical.py", "critical"),
+        ]
+        result = apply_ignore(findings, ["tests/**", "!tests/test_critical.py"])
+        assert [f.id for f in result] == ["critical"]
+
+    def test_preserves_order(self) -> None:
+        findings = [
+            _finding("src/a.py", "a"),
+            _finding("tests/b.py", "b"),
+            _finding("src/c.py", "c"),
+        ]
+        result = apply_ignore(findings, ["tests/**"])
+        assert [f.id for f in result] == ["a", "c"]
+
+    def test_empty_file_field_is_never_suppressed(self) -> None:
+        # A finding with empty file (e.g. dependency-scan info) should
+        # not be silently dropped by a `**` pattern.
+        findings = [_finding("", "empty"), _finding("tests/b.py", "tst")]
+        result = apply_ignore(findings, ["tests/**", "**"])
+        assert "empty" in [f.id for f in result]