From 0ef4d36df8c946e96e1b33609634956fdbad5631 Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Wed, 25 Mar 2026 20:23:43 +0100 Subject: [PATCH] fix: close two-hop symlink chain bypass in SafeSymlink/SafeHardlink (#116) String-based path normalization in SafeSymlink::validate and HardlinkTracker::validate_hardlink did not account for on-disk symlinks written by earlier archive entries. A second symlink whose target traversed through a previously extracted symlink could redirect subsequent `..` steps outside the extraction root, even though normalizing the string representation kept the path within the destination (GHSA-83g3-92jg-28cx variant). Replace normalize_symlink_target with resolve_through_symlinks: a component-by-component walk that calls fs::canonicalize whenever an on-disk symlink is encountered during traversal, verifying containment within the destination directory after every step. Non-existent path components degrade gracefully to string-based normalization. Circular symlink chains (ELOOP) are mapped to SymlinkEscape. Add unit tests in both modules and a CVE regression test (tests/cve/ghsa_83g3_92jg_28cx.rs) that builds the four-entry attack archive in memory and asserts extraction is rejected with SymlinkEscape or HardlinkEscape. Requires --allow-symlinks AND --allow-hardlinks (both non-default) to trigger. --- CHANGELOG.md | 14 ++ crates/exarch-core/src/security/hardlink.rs | 109 ++++++------ crates/exarch-core/src/types/safe_symlink.rs | 168 +++++++++++++------ tests/cve/ghsa_83g3_92jg_28cx.rs | 129 ++++++++++++++ tests/cve/mod.rs | 1 + 5 files changed, 317 insertions(+), 104 deletions(-) create mode 100644 tests/cve/ghsa_83g3_92jg_28cx.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index fd42240..180b083 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Security + +- Fix two-hop symlink chain bypass in `SafeSymlink` and `SafeHardlink` validation + (GHSA-83g3-92jg-28cx variant — #116). String-based `..` normalization did not + account for on-disk symlinks written by earlier archive entries; a second symlink + whose target traversed through a previously extracted symlink could redirect + subsequent `..` steps outside the extraction root. The fix replaces string + normalization with a component-by-component on-disk walk that calls + `fs::canonicalize` whenever an on-disk symlink is encountered, verifying + containment within the destination directory after every step. + Requires `--allow-symlinks` AND `--allow-hardlinks` (both non-default) to + trigger; hardlink escape is additionally blocked by OS restrictions on + macOS for root-owned files. + ### Added - `list_archive` and `verify_archive` now support 7z archives, consistent with diff --git a/crates/exarch-core/src/security/hardlink.rs b/crates/exarch-core/src/security/hardlink.rs index 84f6570..39ce288 100644 --- a/crates/exarch-core/src/security/hardlink.rs +++ b/crates/exarch-core/src/security/hardlink.rs @@ -9,6 +9,7 @@ use crate::Result; use crate::SecurityConfig; use crate::types::DestDir; use crate::types::SafePath; +use crate::types::safe_symlink::resolve_through_symlinks; /// Tracks hardlink targets during extraction. /// @@ -141,64 +142,22 @@ impl HardlinkTracker { }); } - // Resolve target against destination - let resolved = dest.as_path().join(target); - - let needs_normalization = resolved - .components() - .any(|c| matches!(c, Component::ParentDir | Component::CurDir)); - - if !needs_normalization { - // Path is already normalized, just verify it's within destination - if !resolved.starts_with(dest.as_path()) { - return Err(ExtractionError::HardlinkEscape { - path: link_path.as_path().to_path_buf(), - }); - } - - // Track this hardlink target (H-PERF-6: use entry API) - self.seen_targets - .entry(resolved) - .or_insert_with(|| link_path.as_path().to_path_buf()); - - return Ok(()); - } - - // Normalize path: resolve .. and . components, detect escape attempts - let mut normalized = PathBuf::new(); - for component in resolved.components() { - match component { - Component::ParentDir => { - if !normalized.pop() { - // Tried to go above root - escape attempt - return Err(ExtractionError::HardlinkEscape { - path: link_path.as_path().to_path_buf(), - }); - } - } - Component::CurDir => { - // Skip current directory markers - } - // Keep Prefix/RootDir from resolved path (they come from dest, which is trusted) - _ => { - normalized.push(component); - } - } - } - - // Verify the normalized path is within destination - // Note: We need to canonicalize the destination for proper comparison - // since dest might have symlinks - let dest_canonical = dest.as_path(); - if !normalized.starts_with(dest_canonical) { - return Err(ExtractionError::HardlinkEscape { + // Resolve target against destination, following any on-disk symlinks + // encountered during traversal. + // + // String-based normalization is insufficient: if a previously extracted + // symlink lies on the target path, a hardlink target can escape the + // extraction root via on-disk resolution even though string normalization + // would pass (two-hop chain bypass, GHSA-83g3-92jg-28cx variant — #116). + let resolved = + resolve_through_symlinks(dest.as_path(), target, dest.as_path(), link_path.as_path()) + .map_err(|_| ExtractionError::HardlinkEscape { path: link_path.as_path().to_path_buf(), - }); - } + })?; - // Track this hardlink target using normalized path (H-PERF-6: use entry API) + // Track this hardlink target (H-PERF-6: use entry API) self.seen_targets - .entry(normalized) + .entry(resolved) .or_insert_with(|| link_path.as_path().to_path_buf()); Ok(()) @@ -414,4 +373,44 @@ mod tests { // Three different targets assert_eq!(tracker.count(), 3, "should track each unique target"); } + + /// Hardlink target that traverses through an already-extracted symlink must + /// be rejected (two-hop chain, GHSA-83g3-92jg-28cx variant — issue #116). + /// + /// Entry 2 (symlink a/b/c/up -> ../..) is on disk when hardlink is + /// validated. Target a/b/escape/../../etc/passwd: string normalization + /// → dest/a/etc/passwd (looks safe), but if a/b/escape is an on-disk + /// symlink resolving outside dest, the hardlink must be rejected. + #[test] + #[cfg(unix)] + #[allow(clippy::unwrap_used)] + fn test_hardlink_two_hop_chain_rejected() { + use std::fs; + use std::os::unix; + + let (temp, dest) = create_test_dest(); + let mut config = SecurityConfig::default(); + config.allowed.hardlinks = true; + + // Simulate on-disk state after extracting the two symlink entries. + let a = temp.path().join("a"); + let b = a.join("b"); + let c = b.join("c"); + fs::create_dir_all(&c).unwrap(); + // a/b/c/up -> ../.. (first hop: resolves to a/) + unix::fs::symlink("../..", c.join("up")).unwrap(); + // a/b/escape -> c/up/../.. (second hop: resolves outside dest on disk) + unix::fs::symlink("c/up/../..", b.join("escape")).unwrap(); + + let mut tracker = HardlinkTracker::new(); + let link = SafePath::validate(&PathBuf::from("exfil"), &dest, &config).unwrap(); + // Target traverses through the escape symlink + let target = PathBuf::from("a/b/escape/../../etc/passwd"); + + let result = tracker.validate_hardlink(&link, &target, &dest, &config); + assert!( + matches!(result, Err(ExtractionError::HardlinkEscape { .. })), + "hardlink through two-hop symlink chain must be rejected" + ); + } } diff --git a/crates/exarch-core/src/types/safe_symlink.rs b/crates/exarch-core/src/types/safe_symlink.rs index 29ec721..aea969e 100644 --- a/crates/exarch-core/src/types/safe_symlink.rs +++ b/crates/exarch-core/src/types/safe_symlink.rs @@ -141,24 +141,16 @@ impl SafeSymlink { // with a symlink between validation and extraction time. verify_parent_not_symlink(link.as_path(), dest)?; - // 4. Resolve target against link's parent directory + // 4. Resolve target against link's parent directory, following any + // on-disk symlinks encountered during traversal. + // + // String-based normalization is insufficient: a target component that + // is already a symlink written to disk by a previous entry can redirect + // subsequent `..` traversal outside the extraction root (two-hop chain, + // GHSA-83g3-92jg-28cx variant — issue #116). let link_parent = link.as_path().parent().unwrap_or_else(|| Path::new("")); let link_parent_full = dest.as_path().join(link_parent); - - // Join target to link's parent directory - let mut resolved = link_parent_full.join(target); - - // Normalize the path by resolving .. and . components manually - resolved = normalize_symlink_target(&resolved); - - // 5. Verify resolved target is within dest - // We can't use canonicalize here because the target might not exist yet - // Instead, we check if the normalized path starts with dest - if !resolved.starts_with(dest.as_path()) { - return Err(ExtractionError::SymlinkEscape { - path: link.as_path().to_path_buf(), - }); - } + resolve_through_symlinks(&link_parent_full, target, dest.as_path(), link.as_path())?; Ok(Self { link_path: link.as_path().to_path_buf(), @@ -226,31 +218,86 @@ fn verify_parent_not_symlink(path: &Path, dest: &DestDir) -> Result<()> { Ok(()) } -/// Normalizes a symlink target by manually resolving .. and . components. +/// Resolves `target` step by step from `start`, following any on-disk symlinks +/// encountered during traversal and verifying containment within `dest` after +/// every component. /// -/// This function is used instead of `canonicalize` because the target might -/// not exist yet during extraction planning. -fn normalize_symlink_target(path: &Path) -> PathBuf { - // Pre-allocate with expected capacity to avoid reallocations - let component_count = path.components().count(); - let mut components = Vec::with_capacity(component_count); - - for component in path.components() { +/// Unlike pure string normalization, this function calls `fs::canonicalize` +/// whenever it steps into a path that is a symlink on disk. This closes the +/// two-hop symlink chain bypass: a target component that is already a symlink +/// (written by a previous archive entry) is resolved to its real on-disk +/// location before any subsequent `..` traversal is applied. +/// +/// Non-existent path components (targets that have not been extracted yet) are +/// handled transparently — `symlink_metadata` failure is treated as "not a +/// symlink", so the function degrades gracefully to string-based normalization +/// for paths that do not yet exist on disk. +/// +/// # Errors +/// +/// Returns `SymlinkEscape` if: +/// - The resolved path escapes `dest` at any step. +/// - `fs::canonicalize` fails for a symlink component (e.g., `ELOOP` for +/// circular chains). +pub(crate) fn resolve_through_symlinks( + start: &Path, + target: &Path, + dest: &Path, + link_path: &Path, +) -> Result { + let mut current = start.to_path_buf(); + + for component in target.components() { match component { std::path::Component::ParentDir => { - // Pop the last component if possible - components.pop(); + // If the current accumulated path is an on-disk symlink, resolve + // it before applying `..` so the pop operates on the real + // filesystem topology rather than the string representation. + if std::fs::symlink_metadata(¤t) + .map(|m| m.file_type().is_symlink()) + .unwrap_or(false) + { + current = std::fs::canonicalize(¤t).map_err(|_| { + ExtractionError::SymlinkEscape { + path: link_path.to_path_buf(), + } + })?; + } + if !current.pop() { + return Err(ExtractionError::SymlinkEscape { + path: link_path.to_path_buf(), + }); + } } - std::path::Component::CurDir => { - // Skip . components + std::path::Component::CurDir => {} + std::path::Component::Normal(name) => { + current.push(name); + // Resolve the component immediately if it is a symlink so that + // any subsequent `..` steps use the real resolved path. + if std::fs::symlink_metadata(¤t) + .map(|m| m.file_type().is_symlink()) + .unwrap_or(false) + { + current = std::fs::canonicalize(¤t).map_err(|_| { + ExtractionError::SymlinkEscape { + path: link_path.to_path_buf(), + } + })?; + } } _ => { - components.push(component); + current.push(component); } } + + if !current.starts_with(dest) { + return Err(ExtractionError::SymlinkEscape { + path: link_path.to_path_buf(), + }); + } } - components.iter().collect() + Ok(current) } #[cfg(test)] @@ -382,24 +429,6 @@ mod tests { assert!(result.is_ok()); } - #[test] - fn test_normalize_symlink_target() { - // Test basic normalization - let path = PathBuf::from("/tmp/a/b/../c/./d"); - let normalized = normalize_symlink_target(&path); - assert_eq!(normalized, PathBuf::from("/tmp/a/c/d")); - - // Test multiple parent dirs - let path = PathBuf::from("/tmp/a/b/c/../../d"); - let normalized = normalize_symlink_target(&path); - assert_eq!(normalized, PathBuf::from("/tmp/a/d")); - - // Test current dir removal - let path = PathBuf::from("/tmp/./a/./b"); - let normalized = normalize_symlink_target(&path); - assert_eq!(normalized, PathBuf::from("/tmp/a/b")); - } - #[test] fn test_safe_symlink_equality() { let (_temp, dest) = create_test_dest(); @@ -616,4 +645,45 @@ mod tests { assert_eq!(symlink.link_path(), Path::new("mylink")); assert_eq!(symlink.target_path(), Path::new("target.txt")); } + + /// Two-hop symlink chain: the second symlink's target traverses through a + /// symlink already written to disk, escaping the extraction root via + /// on-disk resolution even though string-based normalization would + /// pass. + /// + /// Attack chain (GHSA-83g3-92jg-28cx variant, issue #116): + /// Entry 1: dir a/b/c/ + /// Entry 2: link a/b/c/up -> ../.. (resolves to a/ — safe, written) + /// Entry 3: link a/b/escape -> c/up/../.. (string: a/b/ — PASS; disk: + /// escapes dest) + #[test] + #[cfg(unix)] + #[allow(clippy::unwrap_used)] + fn test_safe_symlink_two_hop_chain_rejected() { + use std::os::unix::fs; + + let (temp, dest) = create_test_dest(); + let config = create_config_with_symlinks(); + + // Set up the on-disk state that entry 2 would produce. + let a = temp.path().join("a"); + let b = a.join("b"); + let c = b.join("c"); + std::fs::create_dir_all(&c).unwrap(); + // a/b/c/up -> ../.. (resolves to a/) + fs::symlink("../..", c.join("up")).unwrap(); + + // Now validate entry 3: a/b/escape -> c/up/../.. + // String normalization gives a/b/ (within dest) — but on disk, c/up + // resolves outside, so the chain escapes. + let link = SafePath::validate(&PathBuf::from("a/b/escape"), &dest, &config) + .expect("link path should be valid"); + let target = PathBuf::from("c/up/../.."); + + let result = SafeSymlink::validate(&link, &target, &dest, &config); + assert!( + matches!(result, Err(ExtractionError::SymlinkEscape { .. })), + "two-hop symlink chain must be rejected" + ); + } } diff --git a/tests/cve/ghsa_83g3_92jg_28cx.rs b/tests/cve/ghsa_83g3_92jg_28cx.rs new file mode 100644 index 0000000..b479d5c --- /dev/null +++ b/tests/cve/ghsa_83g3_92jg_28cx.rs @@ -0,0 +1,129 @@ +//! Regression test for GHSA-83g3-92jg-28cx (exarch variant — issue #116). +//! +//! When `--allow-symlinks` is enabled, a two-hop symlink chain could bypass +//! `SafeSymlink::validate`. String-based path normalization treated the second +//! symlink's target as safe because normalizing the string representation kept +//! it within the extraction root. On disk, the first symlink redirects the +//! traversal outside the root. +//! +//! Attack chain: +//! Entry 1: dir a/b/c/ +//! Entry 2: link a/b/c/up -> ../.. (resolves to a/ — written to disk) +//! Entry 3: link a/b/escape -> c/up/../.. (string: a/b/ — PASS; disk: escapes dest) +//! Entry 4: hard exfil -> a/b/escape/../../etc/passwd +//! +//! The fix resolves each target component through the real filesystem, calling +//! `fs::canonicalize` whenever an on-disk symlink is encountered, so the escape +//! is detected at the containment check. +//! +//! Requires: `--allow-symlinks` AND `--allow-hardlinks` (both non-default). + +use exarch_core::ExtractionError; +use exarch_core::SecurityConfig; +use exarch_core::formats::TarArchive; +use exarch_core::formats::traits::ArchiveFormat; +use std::io::Cursor; +use tempfile::TempDir; + +/// Build the two-hop symlink escape TAR in memory. +fn build_two_hop_chain_tar() -> Vec { + let mut builder = tar::Builder::new(Vec::new()); + + // Entry 1: directory a/b/c/ + let mut header = tar::Header::new_gnu(); + header.set_entry_type(tar::EntryType::Directory); + header.set_size(0); + header.set_mode(0o755); + header.set_cksum(); + builder + .append_data(&mut header, "a/b/c/", &[] as &[u8]) + .expect("append dir"); + + // Entry 2: symlink a/b/c/up -> ../.. + let mut header = tar::Header::new_gnu(); + header.set_entry_type(tar::EntryType::Symlink); + header.set_size(0); + header.set_mode(0o777); + header.set_cksum(); + builder + .append_link(&mut header, "a/b/c/up", "../..") + .expect("append first hop symlink"); + + // Entry 3: symlink a/b/escape -> c/up/../.. + // String normalization: dest/a/b/c/up/../.. → dest/a/b (within dest — PASS without fix) + // On disk: c/up resolves to ../../.. from dest/a/b = outside dest + let mut header = tar::Header::new_gnu(); + header.set_entry_type(tar::EntryType::Symlink); + header.set_size(0); + header.set_mode(0o777); + header.set_cksum(); + builder + .append_link(&mut header, "a/b/escape", "c/up/../..") + .expect("append second hop symlink"); + + // Entry 4: hardlink exfil -> a/b/escape/../../etc/passwd + let mut header = tar::Header::new_gnu(); + header.set_entry_type(tar::EntryType::Link); + header.set_size(0); + header.set_mode(0o644); + header.set_cksum(); + builder + .append_link(&mut header, "exfil", "a/b/escape/../../etc/passwd") + .expect("append hardlink"); + + builder.into_inner().expect("finish tar builder") +} + +/// The second symlink in the chain must be rejected when `allow-symlinks` is +/// enabled. The archive should never extract the escape symlink to disk. +#[test] +#[cfg(unix)] +fn two_hop_symlink_chain_is_rejected() { + let dest = TempDir::new().expect("temp dir"); + let mut config = SecurityConfig::default(); + config.allowed.symlinks = true; + config.allowed.hardlinks = true; + + let data = build_two_hop_chain_tar(); + let cursor = Cursor::new(data); + let mut archive = TarArchive::new(cursor); + + let result = archive.extract(dest.path(), &config); + + // Extraction must fail — the escape symlink or hardlink must be rejected. + assert!( + result.is_err(), + "two-hop symlink chain must be rejected, but extraction succeeded" + ); + + // The error must be a symlink or hardlink escape, not an unrelated I/O error. + let err = result.unwrap_err(); + assert!( + matches!( + err, + ExtractionError::SymlinkEscape { .. } | ExtractionError::HardlinkEscape { .. } + ), + "expected SymlinkEscape or HardlinkEscape, got: {err:?}" + ); + + // The escape symlink must NOT have been written to disk. + assert!( + !dest.path().join("a/b/escape").exists(), + "escape symlink must not be written to disk" + ); +} + +/// With symlinks disabled (default), the archive is rejected at the first +/// symlink entry — the two-hop chain is never attempted. +#[test] +fn two_hop_chain_rejected_when_symlinks_disabled() { + let dest = TempDir::new().expect("temp dir"); + let config = SecurityConfig::default(); // symlinks = false + + let data = build_two_hop_chain_tar(); + let cursor = Cursor::new(data); + let mut archive = TarArchive::new(cursor); + + let result = archive.extract(dest.path(), &config); + assert!(result.is_err(), "should be rejected with symlinks disabled"); +} diff --git a/tests/cve/mod.rs b/tests/cve/mod.rs index 4021712..08baf0a 100644 --- a/tests/cve/mod.rs +++ b/tests/cve/mod.rs @@ -1,4 +1,5 @@ //! CVE and security advisory regression tests. +mod ghsa_83g3_92jg_28cx; mod rustsec_2026_0067; mod rustsec_2026_0068;