From 9b235664f567cdab38dbac170df105d5c4f010e4 Mon Sep 17 00:00:00 2001 From: hyperpolymath <6759885+hyperpolymath@users.noreply.github.com> Date: Tue, 26 May 2026 11:06:22 +0100 Subject: [PATCH] fix(bridge): distinguish direct vs transitive phantom deps in triage action MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #47. `bridge triage` was telling users to "Remove unused dependency `` from Cargo.toml" for any phantom-classified CVE, but the 2026-05-26 estate sweep showed every sampled phantom (28/28 in a 6-repo sample, 157 phantoms total) was a *transitive* dep pulled in by some upstream crate — never declared in any Cargo.toml. The action string was unactionable. Fix: parse the project's Cargo.toml dependency tables to distinguish direct from transitive deps, then emit the right action for each. Changes: - `src/bridge/lockfile.rs`: new `collect_direct_cargo_dependencies()` walks the root Cargo.toml + each `workspace.members` manifest, indexing `[dependencies]`, `[dev-dependencies]`, `[build-dependencies]`, `[workspace.dependencies]`, and target-prefixed variants (`[target.cfg(...).dependencies]`). Crate names normalised to hyphen+lowercase for CVE-feed matching (serde_json ↔ serde-json). - `src/bridge/classify.rs`: `classify()` takes a new `is_direct: bool`. - Phantom + direct → unchanged "Remove unused dependency" action. - Phantom + transitive → new action: "Transitive — run `cargo update -p ` ... Otherwise informational: code unreachable from this project." - Reachable arms unchanged. - `src/bridge/mod.rs`: `triage()` builds the direct-deps set once (outside the per-CVE loop) and passes the lookup result into classify. Regression coverage: - `direct_deps_skips_transitive_only_crates` — repro of the `lru` / ratatui case from the issue. - `direct_deps_collects_dev_and_build_sections`, `direct_deps_handles_target_sections`, `direct_deps_handles_workspace_members`, `direct_deps_normalises_underscore_to_hyphen`, `direct_deps_ignores_commented_lines_and_strings_with_hash`, `direct_deps_empty_when_no_manifest` — parser edge cases. - `test_phantom_transitive_recommends_cargo_update` — asserts no "Remove unused dependency" string for the transitive phantom case. - `test_phantom_direct_recommends_removal` — direct case unchanged. - `test_reachable_classification_unaffected_by_is_direct` — sanity check that the new flag only affects the Phantom arm. All 236 binary tests pass. cargo clippy clean. cargo fmt clean. Out of scope (per the issue): - "For unmitigable + reachable: actionable warning" — the current rationale already describes the import sites + lack of fix. Whether the action wording needs further sharpening is a separate concern from the wrong-action-on-phantom-transitive bug. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/bridge/classify.rs | 77 +++++++++- src/bridge/lockfile.rs | 315 +++++++++++++++++++++++++++++++++++++++++ src/bridge/mod.rs | 12 +- 3 files changed, 396 insertions(+), 8 deletions(-) diff --git a/src/bridge/classify.rs b/src/bridge/classify.rs index c086d84..fc66be3 100644 --- a/src/bridge/classify.rs +++ b/src/bridge/classify.rs @@ -16,14 +16,22 @@ use super::{Classification, ReachabilityEvidence, ReachabilityStatus, Vulnerabil /// Classify a vulnerability given its reachability evidence. /// +/// `is_direct` indicates whether the vulnerable package appears as a key in +/// the project's `Cargo.toml` dependency tables. Phantom + transitive is +/// common (a vulnerable crate pulled in through the dep graph but never +/// imported in this project's source) and warrants a different remediation +/// path than phantom + direct (where the manifest entry is genuinely +/// unused). See #47. +/// /// Returns (classification, rationale, suggested_action). pub fn classify( vuln: &Vulnerability, evidence: &ReachabilityEvidence, + is_direct: bool, ) -> (Classification, String, String) { match evidence.status { // ─── Phantom dependency: declared but never imported ─── - ReachabilityStatus::Phantom => ( + ReachabilityStatus::Phantom if is_direct => ( Classification::Informational, format!( "{} {} is declared in Cargo.toml but never imported in any .rs file. \ @@ -37,6 +45,24 @@ pub fn classify( ), ), + // ─── Phantom + transitive: pulled in by an upstream, not declared here ─── + ReachabilityStatus::Phantom => ( + Classification::Informational, + format!( + "{} {} is a transitive dependency (not declared in this project's Cargo.toml) \ + and never imported in any .rs file. The vulnerable code is compiled but \ + unreachable from this project. The CVE rides along through the dependency \ + graph; remediation is upstream, not local.", + vuln.package, vuln.version + ), + format!( + "Transitive — run `cargo update -p {}` to pull a non-vulnerable version if \ + one is published, or upgrade the upstream crate that pulls it in. \ + Otherwise informational: code unreachable from this project.", + vuln.package + ), + ), + // ─── Unreachable: imported but no taint flow (Phase 2) ─── ReachabilityStatus::Unreachable => ( Classification::Informational, @@ -178,29 +204,66 @@ mod tests { } #[test] - fn test_phantom_is_informational() { - let (cls, _, action) = classify(&mock_vuln(false, false), &phantom_evidence()); + fn test_phantom_direct_recommends_removal() { + let (cls, _, action) = classify(&mock_vuln(false, false), &phantom_evidence(), true); assert_eq!(cls, Classification::Informational); - assert!(action.contains("Remove")); + assert!( + action.contains("Remove unused dependency"), + "direct phantom should recommend removal, got: {action}" + ); + } + + #[test] + fn test_phantom_transitive_recommends_cargo_update() { + // Regression for #47: phantom-classified transitive deps were + // incorrectly told to "Remove unused dependency from Cargo.toml". + let (cls, rationale, action) = + classify(&mock_vuln(false, false), &phantom_evidence(), false); + assert_eq!(cls, Classification::Informational); + assert!( + !action.contains("Remove unused dependency"), + "transitive phantom must NOT recommend manifest removal, got: {action}" + ); + assert!( + action.contains("Transitive"), + "transitive phantom action should label itself transitive, got: {action}" + ); + assert!( + action.contains("cargo update"), + "transitive phantom action should suggest cargo update, got: {action}" + ); + assert!( + rationale.contains("transitive dependency"), + "rationale should explain transitive status, got: {rationale}" + ); } #[test] fn test_reachable_no_fix_is_unmitigable() { - let (cls, _, _) = classify(&mock_vuln(false, false), &reachable_evidence()); + let (cls, _, _) = classify(&mock_vuln(false, false), &reachable_evidence(), true); assert_eq!(cls, Classification::Unmitigable); } #[test] fn test_reachable_semver_fix_is_mitigable() { - let (cls, _, action) = classify(&mock_vuln(true, true), &reachable_evidence()); + let (cls, _, action) = classify(&mock_vuln(true, true), &reachable_evidence(), true); assert_eq!(cls, Classification::Mitigable); assert!(action.contains("cargo update")); } #[test] fn test_reachable_breaking_fix_is_mitigable() { - let (cls, _, action) = classify(&mock_vuln(true, false), &reachable_evidence()); + let (cls, _, action) = classify(&mock_vuln(true, false), &reachable_evidence(), true); assert_eq!(cls, Classification::Mitigable); assert!(action.contains("breaking change")); } + + #[test] + fn test_reachable_classification_unaffected_by_is_direct() { + // is_direct is only used for the Phantom arm — reachable findings + // should classify identically regardless of manifest declaration. + let (cls_a, _, _) = classify(&mock_vuln(true, true), &reachable_evidence(), true); + let (cls_b, _, _) = classify(&mock_vuln(true, true), &reachable_evidence(), false); + assert_eq!(cls_a, cls_b); + } } diff --git a/src/bridge/lockfile.rs b/src/bridge/lockfile.rs index 7f237f6..677bcf4 100644 --- a/src/bridge/lockfile.rs +++ b/src/bridge/lockfile.rs @@ -317,6 +317,161 @@ fn unquote(s: &str) -> String { s.trim().trim_matches('"').to_string() } +// ============================================================================ +// Direct-dependency detection (Cargo.toml) +// ============================================================================ + +/// Collect the set of crate names declared as **direct** dependencies in the +/// project's `Cargo.toml`. Used to distinguish direct phantoms (genuine +/// "remove the unused dep" candidates) from transitive phantoms (pulled in +/// by an upstream crate — different remediation path). +/// +/// Sections inspected (root manifest + each workspace.member manifest): +/// +/// - `[dependencies]`, `[dev-dependencies]`, `[build-dependencies]` +/// - `[workspace.dependencies]` +/// - `[target.X.dependencies]`, `[target.X.dev-dependencies]`, `[target.X.build-dependencies]` +/// +/// Names are normalised to lowercase with `_` → `-` so a CVE feed reporting +/// `serde_json` matches a manifest line `serde-json = ...` and vice versa. +/// +/// Best-effort parser: handles plain `crate = "version"` and `crate = { ... }` +/// table forms. Quoted keys (`"crate-name" = ...`) are supported. Lines +/// inside comments (`# ...`) are skipped. +/// +/// Returns an empty set on parse failure — callers should treat that as +/// "unknown direct-deps", falling back to the conservative (transitive) +/// classification rather than asserting "direct". +pub fn collect_direct_cargo_dependencies(project_dir: &Path) -> std::collections::HashSet { + let mut acc = std::collections::HashSet::new(); + collect_from_manifest(&project_dir.join("Cargo.toml"), &mut acc, project_dir); + acc +} + +fn collect_from_manifest( + manifest: &Path, + acc: &mut std::collections::HashSet, + project_dir: &Path, +) { + let Ok(content) = std::fs::read_to_string(manifest) else { + return; + }; + + let mut current_section: Option = None; + let mut workspace_members: Vec = Vec::new(); + + for raw_line in content.lines() { + // Strip inline comments (`key = "val" # note`). Be careful: a `#` + // inside a quoted string is data, not a comment. Single quotes don't + // matter in TOML. For our use, the strict version below is enough. + let line = strip_toml_inline_comment(raw_line); + let trimmed = line.trim(); + if trimmed.is_empty() || trimmed.starts_with('#') { + continue; + } + + // Section header? + if let Some(name) = trimmed.strip_prefix('[').and_then(|s| s.strip_suffix(']')) { + current_section = Some(name.trim().to_string()); + continue; + } + + let Some(section) = current_section.as_deref() else { + continue; + }; + + if is_dependency_section(section) { + if let Some(crate_name) = extract_dependency_key(trimmed) { + acc.insert(normalise_crate_name(&crate_name)); + } + } else if section == "workspace" { + // members = ["a", "b/c"] + if let Some(members) = parse_workspace_members(trimmed) { + workspace_members.extend(members); + } + } + } + + // Recurse into workspace members (one level — we don't support nested + // workspaces, which are not a real Cargo pattern). + for member in workspace_members { + let member_manifest = project_dir.join(&member).join("Cargo.toml"); + if member_manifest.exists() && member_manifest != manifest { + collect_from_manifest(&member_manifest, acc, project_dir); + } + } +} + +fn is_dependency_section(section: &str) -> bool { + // Direct matches + matches!( + section, + "dependencies" | "dev-dependencies" | "build-dependencies" | "workspace.dependencies" + ) || section.ends_with(".dependencies") + || section.ends_with(".dev-dependencies") + || section.ends_with(".build-dependencies") +} + +fn extract_dependency_key(line: &str) -> Option { + // Lines look like one of: + // serde = "1.0" + // serde = { version = "1.0", features = ["derive"] } + // "serde-json" = "1.0" + // We only care about the LHS of the first `=`. + let eq = line.find('=')?; + let lhs = line[..eq].trim(); + let key = lhs.trim_matches('"').trim_matches('\'').trim(); + if key.is_empty() { + return None; + } + // Reject obviously-not-a-crate-name tokens that can appear in nested + // tables (e.g. `version`, `features`, `default-features`). These would + // only appear here if a section line is malformed; the section-header + // check normally keeps us out of nested-table bodies, but a paranoid + // filter is cheap. + if key.contains(char::is_whitespace) || key.contains('.') { + return None; + } + Some(key.to_string()) +} + +fn parse_workspace_members(line: &str) -> Option> { + let lhs = line.split('=').next()?.trim(); + if lhs != "members" { + return None; + } + let rhs = line.split_once('=')?.1.trim(); + let inner = rhs + .trim() + .strip_prefix('[') + .and_then(|s| s.strip_suffix(']'))?; + Some( + inner + .split(',') + .map(|s| s.trim().trim_matches('"').to_string()) + .filter(|s| !s.is_empty()) + .collect(), + ) +} + +fn strip_toml_inline_comment(line: &str) -> &str { + // Conservative: only strip when the `#` is NOT inside a "..." string. + let bytes = line.as_bytes(); + let mut in_string = false; + for (i, &b) in bytes.iter().enumerate() { + if b == b'"' { + in_string = !in_string; + } else if b == b'#' && !in_string { + return &line[..i]; + } + } + line +} + +fn normalise_crate_name(s: &str) -> String { + s.to_ascii_lowercase().replace('_', "-") +} + // ============================================================================ // Tests // ============================================================================ @@ -468,4 +623,164 @@ sqlalchemy[asyncio]==2.0.0; python_version >= "3.8" assert_eq!(deps.len(), 1); assert_eq!(deps[0].name, "serde"); } + + // ------------------------------------------------------------------ + // Direct-dependency parser (regression for #47) + // ------------------------------------------------------------------ + + fn write_cargo_toml(dir: &Path, body: &str) { + let mut f = std::fs::File::create(dir.join("Cargo.toml")).unwrap(); + write!(f, "{body}").unwrap(); + } + + #[test] + fn direct_deps_finds_dependencies_section_entries() { + let dir = tempfile::TempDir::new().unwrap(); + write_cargo_toml( + dir.path(), + r#" +[package] +name = "demo" + +[dependencies] +serde = "1.0" +anyhow = { version = "1.0" } +"serde-json" = "1.0" +"#, + ); + let direct = collect_direct_cargo_dependencies(dir.path()); + assert!(direct.contains("serde")); + assert!(direct.contains("anyhow")); + assert!(direct.contains("serde-json")); + } + + #[test] + fn direct_deps_skips_transitive_only_crates() { + // The crate `lru` only appears as a transitive dep through `ratatui` + // in the real-world repro (#47). Cargo.toml has no `lru =` line, so + // the parser must NOT consider it direct. + let dir = tempfile::TempDir::new().unwrap(); + write_cargo_toml( + dir.path(), + r#" +[package] +name = "demo" + +[dependencies] +ratatui = "0.29" +"#, + ); + let direct = collect_direct_cargo_dependencies(dir.path()); + assert!(direct.contains("ratatui")); + assert!( + !direct.contains("lru"), + "transitive deps must not be reported as direct" + ); + } + + #[test] + fn direct_deps_collects_dev_and_build_sections() { + let dir = tempfile::TempDir::new().unwrap(); + write_cargo_toml( + dir.path(), + r#" +[dependencies] +serde = "1.0" + +[dev-dependencies] +tempfile = "3" + +[build-dependencies] +cc = "1" +"#, + ); + let direct = collect_direct_cargo_dependencies(dir.path()); + for name in ["serde", "tempfile", "cc"] { + assert!(direct.contains(name), "missing {name}"); + } + } + + #[test] + fn direct_deps_handles_target_sections() { + let dir = tempfile::TempDir::new().unwrap(); + write_cargo_toml( + dir.path(), + r#" +[target.'cfg(unix)'.dependencies] +nix = "0.27" + +[target.x86_64-pc-windows-msvc.build-dependencies] +winapi = "0.3" +"#, + ); + let direct = collect_direct_cargo_dependencies(dir.path()); + assert!(direct.contains("nix")); + assert!(direct.contains("winapi")); + } + + #[test] + fn direct_deps_handles_workspace_members() { + let dir = tempfile::TempDir::new().unwrap(); + write_cargo_toml( + dir.path(), + r#" +[workspace] +members = ["crates/a", "crates/b"] +"#, + ); + std::fs::create_dir_all(dir.path().join("crates/a")).unwrap(); + std::fs::create_dir_all(dir.path().join("crates/b")).unwrap(); + write_cargo_toml( + &dir.path().join("crates/a"), + "[dependencies]\nrand = \"0.8\"\n", + ); + write_cargo_toml( + &dir.path().join("crates/b"), + "[dev-dependencies]\nproptest = \"1\"\n", + ); + + let direct = collect_direct_cargo_dependencies(dir.path()); + assert!(direct.contains("rand")); + assert!(direct.contains("proptest")); + } + + #[test] + fn direct_deps_normalises_underscore_to_hyphen() { + let dir = tempfile::TempDir::new().unwrap(); + write_cargo_toml(dir.path(), "[dependencies]\nserde_json = \"1.0\"\n"); + let direct = collect_direct_cargo_dependencies(dir.path()); + assert!( + direct.contains("serde-json"), + "underscore should normalise to hyphen for CVE feed matching" + ); + // Either spelling of the same crate must match the same normalised entry. + for spelling in ["serde_json", "serde-json"] { + let normalised = normalise_crate_name(spelling); + assert!(direct.contains(&normalised)); + } + } + + #[test] + fn direct_deps_ignores_commented_lines_and_strings_with_hash() { + let dir = tempfile::TempDir::new().unwrap(); + write_cargo_toml( + dir.path(), + r#" +[dependencies] +# commented = "1.0" +serde = "1.0" # inline comment is fine +"#, + ); + let direct = collect_direct_cargo_dependencies(dir.path()); + assert!(direct.contains("serde")); + assert!(!direct.contains("commented")); + } + + #[test] + fn direct_deps_empty_when_no_manifest() { + let dir = tempfile::TempDir::new().unwrap(); + // No Cargo.toml written + let direct = collect_direct_cargo_dependencies(dir.path()); + assert!(direct.is_empty()); + } } diff --git a/src/bridge/mod.rs b/src/bridge/mod.rs index 9b7cf3b..fdf69bf 100644 --- a/src/bridge/mod.rs +++ b/src/bridge/mod.rs @@ -264,11 +264,21 @@ pub fn triage(project_dir: &Path, offline: bool) -> anyhow::Result return Ok(BridgeReport::empty(project_dir, total_deps)); } + // Direct-vs-transitive lookup table for the project's Cargo.toml. + // Collected once (not per-CVE) to avoid re-parsing the manifest for + // every vulnerable dep — see #47. + let direct_deps = lockfile::collect_direct_cargo_dependencies(project_dir); + let is_direct = |pkg: &str| { + let normalised = pkg.to_ascii_lowercase().replace('_', "-"); + direct_deps.contains(&normalised) + }; + // Step 3 & 4: For each vuln, check reachability and classify let mut assessed = Vec::new(); for vuln in vulns { let evidence = reachability::check_reachability(project_dir, &vuln.package)?; - let (classification, rationale, action) = classify::classify(&vuln, &evidence); + let (classification, rationale, action) = + classify::classify(&vuln, &evidence, is_direct(&vuln.package)); assessed.push(AssessedCve { vulnerability: vuln,