diff --git a/src/bridge/classify.rs b/src/bridge/classify.rs index fc66be3..21181b2 100644 --- a/src/bridge/classify.rs +++ b/src/bridge/classify.rs @@ -16,52 +16,63 @@ use super::{Classification, ReachabilityEvidence, ReachabilityStatus, Vulnerabil /// Classify a vulnerability given its reachability evidence. /// -/// `is_direct` indicates whether the vulnerable package appears as a key in -/// the project's `Cargo.toml` dependency tables. Phantom + transitive is -/// common (a vulnerable crate pulled in through the dep graph but never -/// imported in this project's source) and warrants a different remediation -/// path than phantom + direct (where the manifest entry is genuinely -/// unused). See #47. +/// The three-way output (Mitigable / Unmitigable / Informational) is +/// unchanged from #47 — both phantom variants classify as `Informational` — +/// but the rationale and suggested fix differ: +/// +/// - [`ReachabilityStatus::PhantomDeclared`] — declared in Cargo.toml but +/// no `use` site. Strip the dep via `cargo machete --fix`. Canonical +/// case: file-soup#50. +/// - [`ReachabilityStatus::PhantomTransitive`] — not declared anywhere in +/// the workspace, pulled in by a parent dep. Local strip is impossible; +/// the fix requires bumping the parent (named in `evidence.parent_dep` +/// when identifiable). Track E found ~26 of 29 issues here. /// /// Returns (classification, rationale, suggested_action). pub fn classify( vuln: &Vulnerability, evidence: &ReachabilityEvidence, - is_direct: bool, ) -> (Classification, String, String) { match evidence.status { - // ─── Phantom dependency: declared but never imported ─── - ReachabilityStatus::Phantom if is_direct => ( + // ─── Phantom + declared: manifest entry is genuinely unused ─── + ReachabilityStatus::PhantomDeclared => ( Classification::Informational, format!( "{} {} is declared in Cargo.toml but never imported in any .rs file. \ The vulnerable code is compiled but unreachable. \ - Removing the dependency from Cargo.toml eliminates this CVE entirely.", + Stripping the dependency eliminates this CVE entirely.", vuln.package, vuln.version ), format!( - "Remove unused dependency `{}` from Cargo.toml", + "Strip from Cargo.toml — run `cargo machete --fix` (or remove the \ + dependency line manually) for `{}`.", vuln.package ), ), - // ─── Phantom + transitive: pulled in by an upstream, not declared here ─── - ReachabilityStatus::Phantom => ( - Classification::Informational, - format!( - "{} {} is a transitive dependency (not declared in this project's Cargo.toml) \ - and never imported in any .rs file. The vulnerable code is compiled but \ - unreachable from this project. The CVE rides along through the dependency \ - graph; remediation is upstream, not local.", - vuln.package, vuln.version - ), - format!( - "Transitive — run `cargo update -p {}` to pull a non-vulnerable version if \ - one is published, or upgrade the upstream crate that pulls it in. \ - Otherwise informational: code unreachable from this project.", - vuln.package - ), - ), + // ─── Phantom + transitive: pulled in by an upstream parent ─── + ReachabilityStatus::PhantomTransitive => { + let parent_clause = match evidence.parent_dep.as_deref() { + Some(p) => format!("`{p}`"), + None => "an upstream parent dependency".to_string(), + }; + ( + Classification::Informational, + format!( + "{} {} is a transitive dependency (not declared in this project's \ + Cargo.toml) and never imported in any .rs file. Pulled in by \ + {parent_clause}. The vulnerable code is compiled but unreachable \ + from this project; remediation lives upstream.", + vuln.package, vuln.version + ), + format!( + "Pulled in transitively by {parent_clause} — fix requires bumping \ + the parent dependency past the affected version of `{}`. No local \ + strip is possible.", + vuln.package + ), + ) + } // ─── Unreachable: imported but no taint flow (Phase 2) ─── ReachabilityStatus::Unreachable => ( @@ -183,11 +194,21 @@ mod tests { } } - fn phantom_evidence() -> ReachabilityEvidence { + fn phantom_declared_evidence() -> ReachabilityEvidence { + ReachabilityEvidence { + is_imported: false, + import_sites: vec![], + status: ReachabilityStatus::PhantomDeclared, + parent_dep: None, + } + } + + fn phantom_transitive_evidence(parent: Option<&str>) -> ReachabilityEvidence { ReachabilityEvidence { is_imported: false, import_sites: vec![], - status: ReachabilityStatus::Phantom, + status: ReachabilityStatus::PhantomTransitive, + parent_dep: parent.map(str::to_string), } } @@ -200,37 +221,51 @@ mod tests { statement: "use test_crate::Thing;".to_string(), }], status: ReachabilityStatus::Reachable, + parent_dep: None, } } #[test] - fn test_phantom_direct_recommends_removal() { - let (cls, _, action) = classify(&mock_vuln(false, false), &phantom_evidence(), true); + fn test_phantom_declared_recommends_machete_strip() { + // file-soup#50 shape: crate declared in Cargo.toml, no `use` site — + // strip the manifest entry. + let (cls, rationale, action) = classify(&mock_vuln(false, false), &phantom_declared_evidence()); assert_eq!(cls, Classification::Informational); assert!( - action.contains("Remove unused dependency"), - "direct phantom should recommend removal, got: {action}" + action.contains("cargo machete --fix") || action.contains("Strip from Cargo.toml"), + "declared phantom should recommend strip, got: {action}" + ); + assert!( + rationale.contains("declared in Cargo.toml"), + "rationale should explain declared status, got: {rationale}" ); } #[test] - fn test_phantom_transitive_recommends_cargo_update() { - // Regression for #47: phantom-classified transitive deps were - // incorrectly told to "Remove unused dependency from Cargo.toml". - let (cls, rationale, action) = - classify(&mock_vuln(false, false), &phantom_evidence(), false); + fn test_phantom_transitive_recommends_parent_bump() { + // Track E shape: ~26 of 29 issues were misclassified as phantom-declared + // when they're actually transitive. The fix is to bump the parent, NOT + // to strip the local manifest. + let (cls, rationale, action) = classify( + &mock_vuln(false, false), + &phantom_transitive_evidence(Some("reqwest")), + ); assert_eq!(cls, Classification::Informational); assert!( - !action.contains("Remove unused dependency"), - "transitive phantom must NOT recommend manifest removal, got: {action}" + !action.contains("cargo machete --fix"), + "transitive phantom must NOT recommend manifest strip, got: {action}" ); assert!( - action.contains("Transitive"), + action.contains("Pulled in transitively"), "transitive phantom action should label itself transitive, got: {action}" ); assert!( - action.contains("cargo update"), - "transitive phantom action should suggest cargo update, got: {action}" + action.contains("`reqwest`"), + "transitive phantom action should name the parent dep, got: {action}" + ); + assert!( + action.contains("bumping the parent"), + "transitive phantom action should suggest parent bump, got: {action}" ); assert!( rationale.contains("transitive dependency"), @@ -238,32 +273,48 @@ mod tests { ); } + #[test] + fn test_phantom_transitive_unknown_parent_falls_back_gracefully() { + // Best-effort parent identification: if Cargo.lock didn't reveal one, + // we still produce useful output. + let (cls, rationale, action) = classify( + &mock_vuln(false, false), + &phantom_transitive_evidence(None), + ); + assert_eq!(cls, Classification::Informational); + assert!( + action.contains("an upstream parent dependency"), + "unknown-parent transitive should fall back to generic phrasing, got: {action}" + ); + assert!(rationale.contains("transitive dependency")); + } + #[test] fn test_reachable_no_fix_is_unmitigable() { - let (cls, _, _) = classify(&mock_vuln(false, false), &reachable_evidence(), true); + let (cls, _, _) = classify(&mock_vuln(false, false), &reachable_evidence()); assert_eq!(cls, Classification::Unmitigable); } #[test] fn test_reachable_semver_fix_is_mitigable() { - let (cls, _, action) = classify(&mock_vuln(true, true), &reachable_evidence(), true); + let (cls, _, action) = classify(&mock_vuln(true, true), &reachable_evidence()); assert_eq!(cls, Classification::Mitigable); assert!(action.contains("cargo update")); } #[test] fn test_reachable_breaking_fix_is_mitigable() { - let (cls, _, action) = classify(&mock_vuln(true, false), &reachable_evidence(), true); + let (cls, _, action) = classify(&mock_vuln(true, false), &reachable_evidence()); assert_eq!(cls, Classification::Mitigable); assert!(action.contains("breaking change")); } #[test] - fn test_reachable_classification_unaffected_by_is_direct() { - // is_direct is only used for the Phantom arm — reachable findings - // should classify identically regardless of manifest declaration. - let (cls_a, _, _) = classify(&mock_vuln(true, true), &reachable_evidence(), true); - let (cls_b, _, _) = classify(&mock_vuln(true, true), &reachable_evidence(), false); - assert_eq!(cls_a, cls_b); + fn test_phantom_variants_both_classify_informational() { + // Three-way classifier output is unchanged from #47. + let (cls_decl, _, _) = classify(&mock_vuln(false, false), &phantom_declared_evidence()); + let (cls_trans, _, _) = classify(&mock_vuln(false, false), &phantom_transitive_evidence(None)); + assert_eq!(cls_decl, Classification::Informational); + assert_eq!(cls_trans, Classification::Informational); } } diff --git a/src/bridge/lockfile.rs b/src/bridge/lockfile.rs index 677bcf4..a01b86a 100644 --- a/src/bridge/lockfile.rs +++ b/src/bridge/lockfile.rs @@ -472,6 +472,134 @@ fn normalise_crate_name(s: &str) -> String { s.to_ascii_lowercase().replace('_', "-") } +// ============================================================================ +// Parent-dependency identification (Cargo.lock) +// ============================================================================ + +/// For each transitive crate in `Cargo.lock`, identify the direct dependency +/// (from `direct_deps`) whose closure pulls it in. Best-effort. +/// +/// Walks the resolution tree starting from each direct dep, marking every +/// reachable crate with that direct dep as its parent. First match wins +/// (BFS order), so a crate appearing under multiple direct deps gets +/// attributed to whichever one's BFS reaches it first — a heuristic, but +/// fine for "what to bump" recommendations on the common case (~26 of 29 +/// Track E issues). +/// +/// Returns a map keyed by normalised crate name (lowercase, hyphens) → name +/// of the direct dep (also normalised). Direct deps themselves are NOT +/// included in the map; they're the start of the walk, not transitives. +/// +/// If `Cargo.lock` is missing or unparseable, returns an empty map. Callers +/// that need parent info should fall back to the generic "an upstream +/// parent dependency" phrasing in [`crate::bridge::classify`]. +pub fn collect_cargo_parents( + project_dir: &Path, + direct_deps: &std::collections::HashSet, +) -> std::collections::HashMap { + let lock_path = project_dir.join("Cargo.lock"); + let Ok(content) = std::fs::read_to_string(&lock_path) else { + return std::collections::HashMap::new(); + }; + + // First pass: build child-list per package from `[[package]]` blocks. + // Each block has `name = "..."` and optionally `dependencies = [ ... ]`. + // We collect names in their CARGO form (lockfile spelling), then + // normalise when storing in the map so lookups match the rest of the + // codebase. Dependency lines inside the array look like: + // "serde", + // "serde 1.0.200", + // "serde 1.0.200 (registry+https://...)", + let mut children: std::collections::HashMap> = + std::collections::HashMap::new(); + + let mut current_name: Option = None; + let mut in_dep_block = false; + let mut current_deps: Vec = Vec::new(); + + for raw_line in content.lines() { + let line = raw_line.trim_end(); + let trimmed = line.trim(); + + if trimmed == "[[package]]" { + // Flush previous package's deps before starting a new one. + if let Some(name) = current_name.take() { + children + .entry(normalise_crate_name(&name)) + .or_default() + .extend(current_deps.drain(..).map(|d| normalise_crate_name(&d))); + } + in_dep_block = false; + current_deps.clear(); + continue; + } + + if in_dep_block { + if trimmed == "]" { + in_dep_block = false; + continue; + } + // Lines look like `"serde",` or `"serde 1.0.200",` — strip + // the quote, then take the first whitespace-separated token + // as the crate name. Version + source suffix are discarded. + let stripped = trimmed.trim_end_matches(',').trim().trim_matches('"'); + if stripped.is_empty() { + continue; + } + if let Some(name_token) = stripped.split_whitespace().next() { + current_deps.push(name_token.to_string()); + } + continue; + } + + if let Some(rest) = trimmed.strip_prefix("name = ") { + current_name = Some(unquote(rest)); + } else if trimmed == "dependencies = [" { + in_dep_block = true; + } + } + // Final package flush. + if let Some(name) = current_name { + children + .entry(normalise_crate_name(&name)) + .or_default() + .extend(current_deps.into_iter().map(|d| normalise_crate_name(&d))); + } + + // Second pass: BFS from each direct dep, attributing every reachable + // transitive crate to that direct dep. First-write-wins keeps the BFS + // order stable and avoids re-attribution flapping. + let mut parent: std::collections::HashMap = std::collections::HashMap::new(); + let mut visited: std::collections::HashSet = std::collections::HashSet::new(); + + for direct in direct_deps { + let direct_norm = normalise_crate_name(direct); + let mut queue: std::collections::VecDeque = + std::collections::VecDeque::from([direct_norm.clone()]); + // Each direct dep gets its own visited set so two direct deps + // sharing a transitive both have a chance to attribute it (the + // first BFS to reach the crate wins, which is what we want). + while let Some(node) = queue.pop_front() { + if !visited.insert(node.clone()) { + continue; + } + if let Some(deps) = children.get(&node) { + for child in deps { + // Don't overwrite a direct dep with its own attribution, + // and don't claim another direct dep as a transitive. + if direct_deps.contains(child) { + continue; + } + parent.entry(child.clone()).or_insert_with(|| direct_norm.clone()); + queue.push_back(child.clone()); + } + } + } + } + + parent +} + // ============================================================================ // Tests // ============================================================================ @@ -783,4 +911,113 @@ serde = "1.0" # inline comment is fine let direct = collect_direct_cargo_dependencies(dir.path()); assert!(direct.is_empty()); } + + // ------------------------------------------------------------------ + // Parent-dependency identification (Track E) + // ------------------------------------------------------------------ + + fn write_cargo_lock(dir: &Path, body: &str) { + let mut f = std::fs::File::create(dir.join("Cargo.lock")).unwrap(); + write!(f, "{body}").unwrap(); + } + + #[test] + fn parent_map_identifies_direct_to_transitive_path() { + let dir = tempfile::TempDir::new().unwrap(); + write_cargo_lock( + dir.path(), + r#"version = 4 + +[[package]] +name = "my-app" +version = "0.1.0" +dependencies = [ + "reqwest", +] + +[[package]] +name = "reqwest" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rustls", + "tokio", +] + +[[package]] +name = "rustls" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "ring", +] + +[[package]] +name = "ring" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "tokio" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +"#, + ); + + let mut direct = std::collections::HashSet::new(); + direct.insert("reqwest".to_string()); + + let parents = collect_cargo_parents(dir.path(), &direct); + assert_eq!(parents.get("rustls").map(String::as_str), Some("reqwest")); + assert_eq!(parents.get("ring").map(String::as_str), Some("reqwest")); + assert_eq!(parents.get("tokio").map(String::as_str), Some("reqwest")); + // Direct deps themselves must NOT appear in the parent map. + assert!(!parents.contains_key("reqwest")); + } + + #[test] + fn parent_map_returns_empty_when_no_lockfile() { + let dir = tempfile::TempDir::new().unwrap(); + let mut direct = std::collections::HashSet::new(); + direct.insert("reqwest".to_string()); + let parents = collect_cargo_parents(dir.path(), &direct); + assert!(parents.is_empty()); + } + + #[test] + fn parent_map_normalises_underscore_to_hyphen() { + let dir = tempfile::TempDir::new().unwrap(); + write_cargo_lock( + dir.path(), + r#"version = 4 + +[[package]] +name = "my-app" +version = "0.1.0" +dependencies = [ + "serde", +] + +[[package]] +name = "serde" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +"#, + ); + + let mut direct = std::collections::HashSet::new(); + direct.insert("serde".to_string()); + + let parents = collect_cargo_parents(dir.path(), &direct); + // Both spellings must hit the same normalised entry. + assert_eq!(parents.get("serde-derive").map(String::as_str), Some("serde")); + } } diff --git a/src/bridge/mod.rs b/src/bridge/mod.rs index fdf69bf..74c9013 100644 --- a/src/bridge/mod.rs +++ b/src/bridge/mod.rs @@ -112,6 +112,11 @@ pub struct ReachabilityEvidence { pub import_sites: Vec, /// Classification based on import analysis pub status: ReachabilityStatus, + /// For `PhantomTransitive`: the direct dep (declared in Cargo.toml) whose + /// dependency closure pulls in this crate, best-effort. `None` if the + /// parent could not be identified or if status is not `PhantomTransitive`. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub parent_dep: Option, } /// Where a dependency is imported in source code. @@ -126,11 +131,28 @@ pub struct ImportSite { } /// Whether a dependency is reachable from application code. +/// +/// `PhantomDeclared` and `PhantomTransitive` are both informational, but +/// distinguish the remediation path: +/// +/// - **PhantomDeclared** — crate IS listed in the project's `Cargo.toml` +/// (root or workspace member) but no `use ` site exists. Fix: +/// strip the manifest line (`cargo machete --fix` or manual removal). +/// - **PhantomTransitive** — crate is NOT in any Cargo.toml; it's pulled +/// in by a parent dep. Local stripping is impossible; the fix requires +/// bumping the parent dependency past the affected version. +/// +/// JSON wire format uses `phantom-declared` / `phantom-transitive` / +/// `unreachable` / `reachable`. See `schema_version` 0.2.0. #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] +#[serde(rename_all = "kebab-case")] pub enum ReachabilityStatus { - /// Declared in manifest but never imported — phantom dependency - Phantom, + /// Declared in Cargo.toml but never imported — the manifest entry is + /// genuinely unused. Strip the dep to eliminate the CVE entirely. + PhantomDeclared, + /// NOT declared in any Cargo.toml; pulled in transitively by a parent + /// crate. Local strip is impossible; fix the parent. + PhantomTransitive, /// Imported but no data flow to vulnerable code path (Phase 2: kanren taint) Unreachable, /// Imported and potentially reachable @@ -191,7 +213,7 @@ impl BridgeReport { /// Create an empty report for a project with no vulnerabilities. pub fn empty(project: &Path, total_deps: usize) -> Self { Self { - schema_version: "0.1.0".to_string(), + schema_version: "0.2.0".to_string(), project: project.to_path_buf(), total_dependencies: total_deps, vulnerable_dependencies: 0, @@ -268,17 +290,20 @@ pub fn triage(project_dir: &Path, offline: bool) -> anyhow::Result // Collected once (not per-CVE) to avoid re-parsing the manifest for // every vulnerable dep — see #47. let direct_deps = lockfile::collect_direct_cargo_dependencies(project_dir); - let is_direct = |pkg: &str| { - let normalised = pkg.to_ascii_lowercase().replace('_', "-"); - direct_deps.contains(&normalised) - }; + // Parent-dep map from Cargo.lock: for any transitive crate, identify + // which direct dep's closure pulls it in (best-effort, first match wins). + let parent_map = lockfile::collect_cargo_parents(project_dir, &direct_deps); // Step 3 & 4: For each vuln, check reachability and classify let mut assessed = Vec::new(); for vuln in vulns { - let evidence = reachability::check_reachability(project_dir, &vuln.package)?; - let (classification, rationale, action) = - classify::classify(&vuln, &evidence, is_direct(&vuln.package)); + let evidence = reachability::check_reachability_with_manifest( + project_dir, + &vuln.package, + &direct_deps, + &parent_map, + )?; + let (classification, rationale, action) = classify::classify(&vuln, &evidence); assessed.push(AssessedCve { vulnerability: vuln, @@ -290,7 +315,7 @@ pub fn triage(project_dir: &Path, offline: bool) -> anyhow::Result } let mut report = BridgeReport { - schema_version: "0.1.0".to_string(), + schema_version: "0.2.0".to_string(), project: project_dir.to_path_buf(), total_dependencies: total_deps, vulnerable_dependencies: 0, diff --git a/src/bridge/reachability.rs b/src/bridge/reachability.rs index 1d50a6d..4396db0 100644 --- a/src/bridge/reachability.rs +++ b/src/bridge/reachability.rs @@ -11,6 +11,7 @@ use super::{ImportSite, ReachabilityEvidence, ReachabilityStatus}; use anyhow::Result; +use std::collections::{HashMap, HashSet}; use std::fs::File; use std::io::Read; use std::path::Path; @@ -24,14 +25,48 @@ const SOURCE_FILE_READ_LIMIT: u64 = 64 * 1024 * 1024; /// Check whether a crate is actually imported in the project's Rust source files. /// +/// Convenience wrapper around [`check_reachability_with_manifest`] that does +/// NOT distinguish `PhantomDeclared` from `PhantomTransitive` — kept for the +/// internal test suite. Returns `PhantomTransitive` for the no-`use`-site +/// case as the conservative default (the parent identification will be +/// `None`). +/// +/// Production callers should use [`check_reachability_with_manifest`] so +/// the phantom-declared vs phantom-transitive distinction is preserved. +#[cfg(test)] +pub fn check_reachability(project_dir: &Path, crate_name: &str) -> Result { + check_reachability_with_manifest(project_dir, crate_name, &HashSet::new(), &HashMap::new()) +} + +/// Check whether a crate is actually imported in the project's Rust source files, +/// distinguishing **declared phantoms** from **transitive phantoms**. +/// /// Scans all .rs files under `project_dir` for patterns that indicate the /// crate is used: /// - `use ::` (standard import) /// - `::` (fully qualified path in code) /// - `extern crate ` (legacy import) /// -/// Crate names with hyphens are normalised to underscores (Rust convention). -pub fn check_reachability(project_dir: &Path, crate_name: &str) -> Result { +/// Crate names with hyphens are normalised to underscores for source +/// scanning (Rust convention), and to lowercase-with-hyphens for manifest +/// lookups (Cargo convention). +/// +/// When no `use` site is found, classification splits: +/// +/// - `declared_deps` contains the crate (lookup against root + workspace +/// Cargo.toml [dependencies] sections) → [`ReachabilityStatus::PhantomDeclared`] +/// - Not in `declared_deps` → [`ReachabilityStatus::PhantomTransitive`] and +/// `parent_dep` is populated from `parent_map` if known. +/// +/// `parent_map` maps every transitive crate name to the direct dep whose +/// dependency closure pulls it in (best-effort from Cargo.lock). Names in +/// both maps use the Cargo convention (lowercase, hyphens). +pub fn check_reachability_with_manifest( + project_dir: &Path, + crate_name: &str, + declared_deps: &HashSet, + parent_map: &HashMap, +) -> Result { // Rust converts hyphens to underscores in crate names let normalised = crate_name.replace('-', "_"); @@ -97,17 +132,22 @@ pub fn check_reachability(project_dir: &Path, crate_name: &str) -> Result