From 45dc030f8d00efb5ecea5fc03e13c4c2a0e01e8d Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Wed, 25 Mar 2026 22:06:20 +0100 Subject: [PATCH 1/4] fix: skip duplicate archive entries instead of aborting extraction (#129) When `skip_duplicates` is true (default), entries whose destination path already exists on disk are skipped rather than causing an `os error 17` abort. The skip is recorded in `ExtractionReport.files_skipped` and a warning is appended to `ExtractionReport.warnings`. - Add `skip_duplicates: bool` (default `true`) to `ExtractionOptions` - Thread `ExtractionOptions` through `ArchiveFormat::extract` and all format-specific helpers (TAR, ZIP, 7z) - Handle duplicate files, symlinks, and hardlinks in `extract_file_generic`, `create_symlink`, and `create_hardlink` respectively - Add unit tests for skip (default) and error-on-duplicate (disabled) paths --- CHANGELOG.md | 4 + crates/exarch-cli/src/commands/extract.rs | 1 + crates/exarch-core/benches/extraction.rs | 51 +++- .../exarch-core/examples/dhat_extraction.rs | 9 +- crates/exarch-core/src/api.rs | 90 +++++-- crates/exarch-core/src/config.rs | 18 +- crates/exarch-core/src/formats/common.rs | 38 ++- crates/exarch-core/src/formats/sevenz.rs | 79 ++++-- crates/exarch-core/src/formats/tar.rs | 231 +++++++++++++---- crates/exarch-core/src/formats/traits.rs | 12 +- crates/exarch-core/src/formats/zip.rs | 233 +++++++++++++++--- .../tests/security/cve_regression.rs | 69 ++++-- .../exarch-core/tests/sevenz_integration.rs | 35 ++- 13 files changed, 711 insertions(+), 159 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 991e290..faeafa1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- TAR/ZIP extraction no longer aborts on duplicate entry names; conflicting entries are now + skipped with a warning recorded in `ExtractionReport.files_skipped` (#129). The new + `ExtractionOptions.skip_duplicates` field (default `true`) controls this behavior. + - Fix `list` and `verify` crash on valid empty 7z archives (#117) - Fix `verify` false positive [HIGH] for solid 7z archive entries where `compressed_size=0` is a normal artifact of solid block compression (#118) diff --git a/crates/exarch-cli/src/commands/extract.rs b/crates/exarch-cli/src/commands/extract.rs index 58631be..a78b1c4 100644 --- a/crates/exarch-cli/src/commands/extract.rs +++ b/crates/exarch-cli/src/commands/extract.rs @@ -67,6 +67,7 @@ pub fn execute(args: &ExtractArgs, formatter: &dyn OutputFormatter) -> Result<() let options = ExtractionOptions { atomic: args.atomic, + skip_duplicates: true, }; // When --atomic + --force: remove existing destination after successful diff --git a/crates/exarch-core/benches/extraction.rs b/crates/exarch-core/benches/extraction.rs index ec9aca5..11fb230 100644 --- a/crates/exarch-core/benches/extraction.rs +++ b/crates/exarch-core/benches/extraction.rs @@ -25,6 +25,7 @@ use criterion::Throughput; use criterion::criterion_group; use criterion::criterion_main; use criterion::measurement::WallTime; +use exarch_core::ExtractionOptions; use exarch_core::SecurityConfig; use exarch_core::formats::SevenZArchive; use exarch_core::formats::ZipArchive; @@ -236,7 +237,11 @@ fn benchmark_many_small_files(c: &mut Criterion) { let cursor = Cursor::new(data.clone()); let mut archive = ZipArchive::new(cursor).unwrap(); archive - .extract(temp.path(), &SecurityConfig::default()) + .extract( + temp.path(), + &SecurityConfig::default(), + &ExtractionOptions::default(), + ) .unwrap(); }); }, @@ -270,7 +275,9 @@ fn benchmark_large_files(c: &mut Criterion) { ..SecurityConfig::default() }; - archive.extract(temp.path(), &config).unwrap(); + archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); }); }, ); @@ -292,7 +299,11 @@ fn benchmark_nested_directories(c: &mut Criterion) { let cursor = Cursor::new(data.clone()); let mut archive = ZipArchive::new(cursor).unwrap(); archive - .extract(temp.path(), &SecurityConfig::default()) + .extract( + temp.path(), + &SecurityConfig::default(), + &ExtractionOptions::default(), + ) .unwrap(); }); }); @@ -318,7 +329,9 @@ fn benchmark_compression_methods(c: &mut Criterion) { let temp = TempDir::new().unwrap(); let cursor = Cursor::new(data.clone()); let mut archive = ZipArchive::new(cursor).unwrap(); - archive.extract(temp.path(), &config).unwrap(); + archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); }); }, ); @@ -333,7 +346,9 @@ fn benchmark_compression_methods(c: &mut Criterion) { let temp = TempDir::new().unwrap(); let cursor = Cursor::new(data.clone()); let mut archive = ZipArchive::new(cursor).unwrap(); - archive.extract(temp.path(), &config).unwrap(); + archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); }); }, ); @@ -364,7 +379,11 @@ fn benchmark_sevenz_simple(c: &mut Criterion) { let cursor = Cursor::new(simple_data.clone()); let mut archive = SevenZArchive::new(cursor).unwrap(); archive - .extract(temp.path(), &SecurityConfig::default()) + .extract( + temp.path(), + &SecurityConfig::default(), + &ExtractionOptions::default(), + ) .unwrap(); }); }); @@ -384,7 +403,11 @@ fn benchmark_sevenz_nested_dirs(c: &mut Criterion) { let cursor = Cursor::new(nested_data.clone()); let mut archive = SevenZArchive::new(cursor).unwrap(); archive - .extract(temp.path(), &SecurityConfig::default()) + .extract( + temp.path(), + &SecurityConfig::default(), + &ExtractionOptions::default(), + ) .unwrap(); }); }); @@ -405,7 +428,11 @@ fn benchmark_sevenz_large_file(c: &mut Criterion) { let cursor = Cursor::new(large_data.clone()); let mut archive = SevenZArchive::new(cursor).unwrap(); archive - .extract(temp.path(), &SecurityConfig::default()) + .extract( + temp.path(), + &SecurityConfig::default(), + &ExtractionOptions::default(), + ) .unwrap(); }); }); @@ -442,7 +469,9 @@ fn benchmark_file_count_scaling(c: &mut Criterion) { let temp = TempDir::new().unwrap(); let cursor = Cursor::new(data.clone()); let mut archive = ZipArchive::new(cursor).unwrap(); - archive.extract(temp.path(), &config).unwrap(); + archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); }); }); } @@ -481,7 +510,9 @@ fn benchmark_depth_scaling(c: &mut Criterion) { let temp = TempDir::new().unwrap(); let cursor = Cursor::new(data.clone()); let mut archive = ZipArchive::new(cursor).unwrap(); - archive.extract(temp.path(), &config).unwrap(); + archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); }); }); } diff --git a/crates/exarch-core/examples/dhat_extraction.rs b/crates/exarch-core/examples/dhat_extraction.rs index 319fcd5..edc2266 100644 --- a/crates/exarch-core/examples/dhat_extraction.rs +++ b/crates/exarch-core/examples/dhat_extraction.rs @@ -16,6 +16,7 @@ #[global_allocator] static ALLOC: dhat::Alloc = dhat::Alloc; +use exarch_core::ExtractionOptions; use exarch_core::SecurityConfig; use exarch_core::formats::TarArchive; use exarch_core::formats::ZipArchive; @@ -68,7 +69,9 @@ fn profile_zip_extraction(file_count: usize, file_size: usize) { let cursor = Cursor::new(zip_data); let mut archive = ZipArchive::new(cursor).unwrap(); - archive.extract(temp.path(), &config).unwrap(); + archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); } fn profile_tar_extraction(file_count: usize, file_size: usize) { @@ -80,7 +83,9 @@ fn profile_tar_extraction(file_count: usize, file_size: usize) { let cursor = Cursor::new(tar_data); let mut archive = TarArchive::new(cursor); - archive.extract(temp.path(), &config).unwrap(); + archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); } fn main() { diff --git a/crates/exarch-core/src/api.rs b/crates/exarch-core/src/api.rs index bec1e14..33116dc 100644 --- a/crates/exarch-core/src/api.rs +++ b/crates/exarch-core/src/api.rs @@ -97,6 +97,17 @@ pub fn extract_archive_with_progress, Q: AsRef>( archive_path: P, output_dir: Q, config: &SecurityConfig, + progress: &mut dyn ProgressCallback, +) -> Result { + let options = ExtractionOptions::default(); + extract_archive_with_progress_and_options(archive_path, output_dir, config, &options, progress) +} + +fn extract_archive_with_progress_and_options, Q: AsRef>( + archive_path: P, + output_dir: Q, + config: &SecurityConfig, + options: &ExtractionOptions, _progress: &mut dyn ProgressCallback, ) -> Result { let archive_path = archive_path.as_ref(); @@ -107,13 +118,13 @@ pub fn extract_archive_with_progress, Q: AsRef>( // Dispatch to format-specific extraction match format { - ArchiveType::Tar => extract_tar(archive_path, output_dir, config), - ArchiveType::TarGz => extract_tar_gz(archive_path, output_dir, config), - ArchiveType::TarBz2 => extract_tar_bz2(archive_path, output_dir, config), - ArchiveType::TarXz => extract_tar_xz(archive_path, output_dir, config), - ArchiveType::TarZst => extract_tar_zst(archive_path, output_dir, config), - ArchiveType::Zip => extract_zip(archive_path, output_dir, config), - ArchiveType::SevenZ => extract_7z(archive_path, output_dir, config), + ArchiveType::Tar => extract_tar(archive_path, output_dir, config, options), + ArchiveType::TarGz => extract_tar_gz(archive_path, output_dir, config, options), + ArchiveType::TarBz2 => extract_tar_bz2(archive_path, output_dir, config, options), + ArchiveType::TarXz => extract_tar_xz(archive_path, output_dir, config, options), + ArchiveType::TarZst => extract_tar_zst(archive_path, output_dir, config, options), + ArchiveType::Zip => extract_zip(archive_path, output_dir, config, options), + ArchiveType::SevenZ => extract_7z(archive_path, output_dir, config, options), } } @@ -146,9 +157,15 @@ pub fn extract_archive_full, Q: AsRef>( progress: &mut dyn ProgressCallback, ) -> Result { if options.atomic { - extract_atomic(archive_path, output_dir, config, progress) + extract_atomic(archive_path, output_dir, config, options, progress) } else { - extract_archive_with_progress(archive_path, output_dir, config, progress) + extract_archive_with_progress_and_options( + archive_path, + output_dir, + config, + options, + progress, + ) } } @@ -178,6 +195,7 @@ fn extract_atomic, Q: AsRef>( archive_path: P, output_dir: Q, config: &SecurityConfig, + options: &ExtractionOptions, progress: &mut dyn ProgressCallback, ) -> Result { let output_dir = output_dir.as_ref(); @@ -210,7 +228,13 @@ fn extract_atomic, Q: AsRef>( )) })?; - let result = extract_archive_with_progress(archive_path, temp_dir.path(), config, progress); + let result = extract_archive_with_progress_and_options( + archive_path, + temp_dir.path(), + config, + options, + progress, + ); match result { Ok(report) => { @@ -245,6 +269,7 @@ fn extract_tar( archive_path: &Path, output_dir: &Path, config: &SecurityConfig, + options: &ExtractionOptions, ) -> Result { use crate::formats::TarArchive; use crate::formats::traits::ArchiveFormat; @@ -254,13 +279,14 @@ fn extract_tar( let file = File::open(archive_path)?; let reader = BufReader::new(file); let mut archive = TarArchive::new(reader); - archive.extract(output_dir, config) + archive.extract(output_dir, config, options) } fn extract_tar_gz( archive_path: &Path, output_dir: &Path, config: &SecurityConfig, + options: &ExtractionOptions, ) -> Result { use crate::formats::TarArchive; use crate::formats::traits::ArchiveFormat; @@ -272,13 +298,14 @@ fn extract_tar_gz( let reader = BufReader::new(file); let decoder = GzDecoder::new(reader); let mut archive = TarArchive::new(decoder); - archive.extract(output_dir, config) + archive.extract(output_dir, config, options) } fn extract_tar_bz2( archive_path: &Path, output_dir: &Path, config: &SecurityConfig, + options: &ExtractionOptions, ) -> Result { use crate::formats::TarArchive; use crate::formats::traits::ArchiveFormat; @@ -290,13 +317,14 @@ fn extract_tar_bz2( let reader = BufReader::new(file); let decoder = BzDecoder::new(reader); let mut archive = TarArchive::new(decoder); - archive.extract(output_dir, config) + archive.extract(output_dir, config, options) } fn extract_tar_xz( archive_path: &Path, output_dir: &Path, config: &SecurityConfig, + options: &ExtractionOptions, ) -> Result { use crate::formats::TarArchive; use crate::formats::traits::ArchiveFormat; @@ -308,13 +336,14 @@ fn extract_tar_xz( let reader = BufReader::new(file); let decoder = XzDecoder::new(reader); let mut archive = TarArchive::new(decoder); - archive.extract(output_dir, config) + archive.extract(output_dir, config, options) } fn extract_tar_zst( archive_path: &Path, output_dir: &Path, config: &SecurityConfig, + options: &ExtractionOptions, ) -> Result { use crate::formats::TarArchive; use crate::formats::traits::ArchiveFormat; @@ -326,13 +355,14 @@ fn extract_tar_zst( let reader = BufReader::new(file); let decoder = ZstdDecoder::new(reader)?; let mut archive = TarArchive::new(decoder); - archive.extract(output_dir, config) + archive.extract(output_dir, config, options) } fn extract_zip( archive_path: &Path, output_dir: &Path, config: &SecurityConfig, + options: &ExtractionOptions, ) -> Result { use crate::formats::ZipArchive; use crate::formats::traits::ArchiveFormat; @@ -340,13 +370,14 @@ fn extract_zip( let file = File::open(archive_path)?; let mut archive = ZipArchive::new(file)?; - archive.extract(output_dir, config) + archive.extract(output_dir, config, options) } fn extract_7z( archive_path: &Path, output_dir: &Path, config: &SecurityConfig, + options: &ExtractionOptions, ) -> Result { use crate::formats::SevenZArchive; use crate::formats::traits::ArchiveFormat; @@ -354,7 +385,7 @@ fn extract_7z( let file = File::open(archive_path)?; let mut archive = SevenZArchive::new(file)?; - archive.extract(output_dir, config) + archive.extract(output_dir, config, options) } /// Creates an archive from source files and directories. @@ -690,7 +721,10 @@ mod tests { #[test] fn test_extract_archive_full_non_atomic_delegates_to_normal() { let dest = tempfile::TempDir::new().unwrap(); - let options = ExtractionOptions { atomic: false }; + let options = ExtractionOptions { + atomic: false, + skip_duplicates: true, + }; let result = extract_archive_full( PathBuf::from("nonexistent.tar.gz"), dest.path(), @@ -704,7 +738,10 @@ mod tests { #[test] fn test_extract_archive_with_options_delegates() { let dest = tempfile::TempDir::new().unwrap(); - let options = ExtractionOptions { atomic: false }; + let options = ExtractionOptions { + atomic: false, + skip_duplicates: true, + }; let result = extract_archive_with_options( PathBuf::from("nonexistent.tar.gz"), dest.path(), @@ -731,7 +768,10 @@ mod tests { let parent = tempfile::TempDir::new().unwrap(); let output_dir = parent.path().join("extracted"); - let options = ExtractionOptions { atomic: true }; + let options = ExtractionOptions { + atomic: true, + skip_duplicates: true, + }; let result = extract_archive_with_options( &archive_path, &output_dir, @@ -755,7 +795,10 @@ mod tests { let parent = tempfile::TempDir::new().unwrap(); let output_dir = parent.path().join("extracted"); - let options = ExtractionOptions { atomic: true }; + let options = ExtractionOptions { + atomic: true, + skip_duplicates: true, + }; let result = extract_archive_with_options( PathBuf::from("nonexistent_archive.tar.gz"), &output_dir, @@ -792,7 +835,10 @@ mod tests { std::fs::write(src_dir.path().join("new.txt"), b"new content").unwrap(); create_archive(&archive_path, &[src_dir.path()], &CreationConfig::default()).unwrap(); - let options = ExtractionOptions { atomic: true }; + let options = ExtractionOptions { + atomic: true, + skip_duplicates: true, + }; let result = extract_archive_with_options( &archive_path, &output_dir, diff --git a/crates/exarch-core/src/config.rs b/crates/exarch-core/src/config.rs index fed577c..d5d33bc 100644 --- a/crates/exarch-core/src/config.rs +++ b/crates/exarch-core/src/config.rs @@ -213,7 +213,7 @@ impl SecurityConfig { /// /// Separate from `SecurityConfig` to keep security settings focused. /// These options control operational behavior like atomicity. -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone)] pub struct ExtractionOptions { /// Extract atomically: use a temp dir in the same parent as the output /// directory, rename on success, and delete on failure. @@ -224,6 +224,22 @@ pub struct ExtractionOptions { /// /// Note: cleanup is best-effort if the process is terminated via SIGKILL. pub atomic: bool, + + /// Skip duplicate entries silently instead of aborting. + /// + /// When `true` (default), if an archive contains two entries with the same + /// destination path, the second entry is skipped and a warning is recorded + /// in `ExtractionReport`. When `false`, duplicate entries cause an error. + pub skip_duplicates: bool, +} + +impl Default for ExtractionOptions { + fn default() -> Self { + Self { + atomic: false, + skip_duplicates: true, + } + } } #[cfg(test)] diff --git a/crates/exarch-core/src/formats/common.rs b/crates/exarch-core/src/formats/common.rs index 876faf2..7cab6c2 100644 --- a/crates/exarch-core/src/formats/common.rs +++ b/crates/exarch-core/src/formats/common.rs @@ -376,6 +376,7 @@ fn create_file_with_mode(path: &Path, _mode: Option) -> std::io::Result( reader: &mut R, @@ -385,12 +386,28 @@ pub fn extract_file_generic( expected_size: Option, copy_buffer: &mut CopyBuffer, dir_cache: &mut DirCache, + skip_duplicates: bool, ) -> Result<()> { let output_path = dest.join(&validated.safe_path); // Create parent directories if needed using cache dir_cache.ensure_parent_dir(&output_path)?; + if output_path.exists() { + if skip_duplicates { + report.files_skipped += 1; + report.warnings.push(format!( + "skipped duplicate entry: {}", + validated.safe_path.as_path().display() + )); + return Ok(()); + } + return Err(ExtractionError::InvalidArchive(format!( + "duplicate entry: {}", + validated.safe_path.as_path().display() + ))); + } + // CRITICAL: Check quota BEFORE writing (prevents partial files on overflow) if let Some(size) = expected_size { report @@ -491,13 +508,15 @@ pub fn create_directory( /// - Platform does not support symlinks /// - Parent directory creation fails /// - Symlink creation fails (including when target path already exists) -/// - A file or symlink already exists at the link path +/// - A file or symlink already exists at the link path and `skip_duplicates` is +/// false #[allow(unused_variables)] pub fn create_symlink( safe_symlink: &SafeSymlink, dest: &DestDir, report: &mut ExtractionReport, dir_cache: &mut DirCache, + skip_duplicates: bool, ) -> Result<()> { #[cfg(unix)] { @@ -509,6 +528,21 @@ pub fn create_symlink( // Create parent directories using cache dir_cache.ensure_parent_dir(&link_path)?; + if link_path.exists() || link_path.symlink_metadata().is_ok() { + if skip_duplicates { + report.files_skipped += 1; + report.warnings.push(format!( + "skipped duplicate symlink: {}", + safe_symlink.link_path().display() + )); + return Ok(()); + } + return Err(ExtractionError::InvalidArchive(format!( + "duplicate entry: {}", + safe_symlink.link_path().display() + ))); + } + // Create symlink symlink(target_path, &link_path)?; @@ -572,6 +606,7 @@ mod tests { expected_size, &mut copy_buffer, &mut dir_cache, + true, ); // Should return QuotaExceeded with IntegerOverflow @@ -956,6 +991,7 @@ mod tests { None, &mut copy_buffer, &mut dir_cache, + true, ) .expect("extraction should succeed"); diff --git a/crates/exarch-core/src/formats/sevenz.rs b/crates/exarch-core/src/formats/sevenz.rs index 19fa75f..601fe23 100644 --- a/crates/exarch-core/src/formats/sevenz.rs +++ b/crates/exarch-core/src/formats/sevenz.rs @@ -99,6 +99,7 @@ use sevenz_rust2::Password; static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0); use crate::ExtractionError; +use crate::ExtractionOptions; use crate::ExtractionReport; use crate::Result; use crate::SecurityConfig; @@ -292,6 +293,7 @@ impl SevenZArchive { dest: &DestDir, validator: &mut EntryValidator, dir_cache: &mut common::DirCache, + skip_duplicates: bool, ) -> Result { // Use RefCell for interior mutability in closure let report = RefCell::new(ExtractionReport::new()); @@ -331,6 +333,24 @@ impl SevenZArchive { // Create parent directories using cache dir_cache.borrow_mut().ensure_parent_dir(&dest_path)?; + if dest_path.exists() { + if skip_duplicates { + report.borrow_mut().files_skipped += 1; + report.borrow_mut().warnings.push(format!( + "skipped duplicate entry: {}", + validated.safe_path.as_path().display() + )); + return Ok(true); + } + return Err(sevenz_rust2::Error::Other( + format!( + "duplicate entry: {}", + validated.safe_path.as_path().display() + ) + .into(), + )); + } + // Atomic write (temp + rename) with unique temp file name let counter = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed); let pid = process::id(); @@ -371,7 +391,12 @@ impl SevenZArchive { } impl ArchiveFormat for SevenZArchive { - fn extract(&mut self, output_dir: &Path, config: &SecurityConfig) -> Result { + fn extract( + &mut self, + output_dir: &Path, + config: &SecurityConfig, + options: &ExtractionOptions, + ) -> Result { // Step 0: Validate solid archive policy if self.is_solid { if !config.allow_solid_archives { @@ -459,7 +484,13 @@ impl ArchiveFormat for SevenZArchive { // double parsing in our validation logic let mut validator = EntryValidator::new(config, &dest); let mut dir_cache = common::DirCache::new(); - match Self::extract_with_callback(&mut self.source, &dest, &mut validator, &mut dir_cache) { + match Self::extract_with_callback( + &mut self.source, + &dest, + &mut validator, + &mut dir_cache, + options.skip_duplicates, + ) { Ok(report) => Ok(report), Err(e) => { // 7z pre-validates all paths before extracting, so any error @@ -706,7 +737,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 2); assert!(temp.path().join("simple/file1.txt").exists()); @@ -726,7 +759,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert!(report.files_extracted >= 1); assert!(temp.path().join("nested/subdir1/subdir2/deep.txt").exists()); @@ -743,7 +778,11 @@ mod tests { // Rejection happens in extract() with default config let temp = TempDir::new().unwrap(); - let result = archive.extract(temp.path(), &SecurityConfig::default()); + let result = archive.extract( + temp.path(), + &SecurityConfig::default(), + &ExtractionOptions::default(), + ); assert!(result.is_err()); assert!(matches!( @@ -775,7 +814,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 0); assert_eq!(report.directories_created, 0); @@ -790,7 +831,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 0); assert_eq!(report.bytes_written, 0); } @@ -807,7 +850,7 @@ mod tests { ..SecurityConfig::default() }; - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(result.is_err()); assert!(matches!( result.unwrap_err(), @@ -830,7 +873,7 @@ mod tests { ..SecurityConfig::default() }; - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!( result.is_ok(), "2 files should not exceed quota of 3: {result:?}" @@ -871,7 +914,7 @@ mod tests { ..SecurityConfig::default() }; - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(result.is_ok(), "solid archive should extract: {result:?}"); assert!(result.unwrap().files_extracted > 0); } @@ -886,7 +929,7 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(result.is_err()); assert!(matches!( result.unwrap_err(), @@ -908,7 +951,7 @@ mod tests { ..SecurityConfig::default() }; - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(result.is_err()); assert!(matches!( result.unwrap_err(), @@ -926,7 +969,7 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); // allow_solid_archives = false - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(result.is_ok(), "non-solid should work: {result:?}"); } @@ -971,7 +1014,7 @@ mod tests { ..SecurityConfig::default() }; - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!( result.is_ok(), "exact limit should allow extraction: {result:?}" @@ -1001,7 +1044,7 @@ mod tests { ..SecurityConfig::default() }; - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(result.is_err(), "one byte under limit should reject"); assert!(matches!( result.unwrap_err(), @@ -1017,7 +1060,11 @@ mod tests { let mut archive = SevenZArchive::new(cursor).unwrap(); let temp = TempDir::new().unwrap(); - let result = archive.extract(temp.path(), &SecurityConfig::default()); + let result = archive.extract( + temp.path(), + &SecurityConfig::default(), + &ExtractionOptions::default(), + ); assert!(result.is_err()); match result.unwrap_err() { diff --git a/crates/exarch-core/src/formats/tar.rs b/crates/exarch-core/src/formats/tar.rs index 28e05e2..c89743d 100644 --- a/crates/exarch-core/src/formats/tar.rs +++ b/crates/exarch-core/src/formats/tar.rs @@ -102,6 +102,7 @@ use smallvec::SmallVec; use tar::Archive; use crate::ExtractionError; +use crate::ExtractionOptions; use crate::ExtractionReport; use crate::Result; use crate::SecurityConfig; @@ -185,6 +186,7 @@ impl TarArchive { report: &mut ExtractionReport, copy_buffer: &mut CopyBuffer, dir_cache: &mut common::DirCache, + skip_duplicates: bool, ) -> Result> { // Skip TAR metadata entries (PAX headers, GNU long names/links, sparse) if TarEntryAdapter::is_metadata_entry(&entry) { @@ -205,7 +207,15 @@ impl TarArchive { match validated.entry_type { ValidatedEntryType::File => { - Self::extract_file(entry, &validated, dest, report, copy_buffer, dir_cache)?; + Self::extract_file( + entry, + &validated, + dest, + report, + copy_buffer, + dir_cache, + skip_duplicates, + )?; Ok(None) } @@ -215,7 +225,7 @@ impl TarArchive { } ValidatedEntryType::Symlink(safe_symlink) => { - common::create_symlink(&safe_symlink, dest, report, dir_cache)?; + common::create_symlink(&safe_symlink, dest, report, dir_cache, skip_duplicates)?; Ok(None) } @@ -237,6 +247,7 @@ impl TarArchive { report: &mut ExtractionReport, copy_buffer: &mut CopyBuffer, dir_cache: &mut common::DirCache, + skip_duplicates: bool, ) -> Result<()> { let size = Some(entry.size()); common::extract_file_generic( @@ -247,6 +258,7 @@ impl TarArchive { size, copy_buffer, dir_cache, + skip_duplicates, ) } @@ -261,6 +273,7 @@ impl TarArchive { dest: &DestDir, report: &mut ExtractionReport, dir_cache: &mut common::DirCache, + skip_duplicates: bool, ) -> Result<()> { let link_path = dest.join(&info.link_path); let target_path = dest.join(&info.target_path); @@ -275,6 +288,21 @@ impl TarArchive { // Create parent directories using cache dir_cache.ensure_parent_dir(&link_path)?; + if link_path.exists() { + if skip_duplicates { + report.files_skipped += 1; + report.warnings.push(format!( + "skipped duplicate hardlink: {}", + info.link_path.as_path().display() + )); + return Ok(()); + } + return Err(ExtractionError::InvalidArchive(format!( + "duplicate entry: {}", + info.link_path.as_path().display() + ))); + } + // Copy content to a new independent inode. Any subsequent write to // `link_path` cannot corrupt `target_path` because they are separate files. let bytes_copied = std::fs::copy(&target_path, &link_path)?; @@ -287,8 +315,14 @@ impl TarArchive { } impl ArchiveFormat for TarArchive { - fn extract(&mut self, output_dir: &Path, config: &SecurityConfig) -> Result { + fn extract( + &mut self, + output_dir: &Path, + config: &SecurityConfig, + options: &ExtractionOptions, + ) -> Result { let start = Instant::now(); + let skip_duplicates = options.skip_duplicates; let dest = DestDir::new_or_create(output_dir.to_path_buf())?; @@ -327,6 +361,7 @@ impl ArchiveFormat for TarArchive { &mut report, &mut copy_buffer, &mut dir_cache, + skip_duplicates, ) { Ok(Some(hardlink_info)) => hardlinks.push(hardlink_info), Ok(None) => {} @@ -345,8 +380,13 @@ impl ArchiveFormat for TarArchive { // Two-pass extraction: create hardlinks after all target files exist for hardlink_info in &hardlinks { - if let Err(e) = Self::create_hardlink(hardlink_info, &dest, &mut report, &mut dir_cache) - { + if let Err(e) = Self::create_hardlink( + hardlink_info, + &dest, + &mut report, + &mut dir_cache, + skip_duplicates, + ) { return Err(if report.total_items() > 0 { ExtractionError::PartialExtraction { source: Box::new(e), @@ -604,7 +644,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 1); assert_eq!(report.directories_created, 0); @@ -649,7 +691,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 1); assert_eq!(report.directories_created, 2); @@ -688,7 +732,9 @@ mod tests { let mut config = SecurityConfig::default(); config.allowed.symlinks = true; - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 1); assert_eq!(report.symlinks_created, 1); @@ -727,7 +773,9 @@ mod tests { let mut config = SecurityConfig::default(); config.allowed.hardlinks = true; - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 2); assert!(temp.path().join("hardlink.txt").exists()); @@ -745,7 +793,7 @@ mod tests { ..Default::default() }; - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(result.is_err()); } @@ -767,7 +815,7 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(result.is_err()); } @@ -789,7 +837,7 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(result.is_err()); } @@ -811,7 +859,7 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(result.is_err()); } @@ -854,7 +902,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 1); assert!(temp.path().join("hello.txt").exists()); @@ -889,7 +939,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 1); assert!(temp.path().join("very_long_filename.txt").exists()); @@ -924,7 +976,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 1); assert!(temp.path().join("file.txt").exists()); @@ -957,7 +1011,7 @@ mod tests { // Either succeeds (extracted as regular file) or fails with // InvalidArchive due to missing GNU sparse extension headers. // A SecurityViolation must NOT be returned for this entry type. - match archive.extract(temp.path(), &config) { + match archive.extract(temp.path(), &config, &ExtractionOptions::default()) { Ok(report) => { assert_eq!(report.files_extracted, 1); assert!(temp.path().join("sparse.txt").exists()); @@ -987,7 +1041,7 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!( matches!(result, Err(ExtractionError::SecurityViolation { .. })), @@ -1014,7 +1068,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 1); assert!(temp.path().join("cont.txt").exists()); @@ -1040,7 +1096,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 1); assert!(temp.path().join("file.txt").exists()); @@ -1066,7 +1124,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 1); assert!(temp.path().join("file.txt").exists()); @@ -1091,7 +1151,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 1); assert!(temp.path().join("file.txt").exists()); @@ -1112,7 +1174,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 1); assert!(temp.path().join("file.txt").exists()); @@ -1126,7 +1190,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 0); assert_eq!(report.directories_created, 0); @@ -1140,7 +1206,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 1); assert!(temp.path().join("empty.txt").exists()); @@ -1164,7 +1232,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 3); assert!(temp.path().join("file1.txt").exists()); @@ -1187,7 +1257,7 @@ mod tests { ..Default::default() }; - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(result.is_err()); } @@ -1206,7 +1276,7 @@ mod tests { ..Default::default() }; - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(result.is_err()); } @@ -1231,7 +1301,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 1); @@ -1260,7 +1332,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 1); @@ -1278,7 +1352,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.bytes_written, 10); } @@ -1301,7 +1377,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 0); assert_eq!(report.directories_created, 1); @@ -1328,7 +1406,7 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); // symlinks disabled by default - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(result.is_err()); } @@ -1353,7 +1431,7 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); // hardlinks disabled by default - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(result.is_err()); } @@ -1366,7 +1444,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert!(report.duration.as_nanos() > 0); } @@ -1393,7 +1473,7 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(result.is_err()); match result { @@ -1424,7 +1504,7 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(result.is_err()); match result { @@ -1465,7 +1545,7 @@ mod tests { let mut config = SecurityConfig::default(); config.allowed.symlinks = true; - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(result.is_err()); // A directory was created before the symlink escape, so the error is @@ -1501,7 +1581,7 @@ mod tests { let mut config = SecurityConfig::default(); config.allowed.hardlinks = true; - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(result.is_err()); match result { @@ -1548,7 +1628,9 @@ mod tests { let mut config = SecurityConfig::default(); config.allowed.hardlinks = true; - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); // 1 target file + 7 hardlinks = 8 files extracted assert_eq!(report.files_extracted, 8); @@ -1593,7 +1675,9 @@ mod tests { let mut config = SecurityConfig::default(); config.allowed.hardlinks = true; - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); // 1 target file + 20 hardlinks = 21 files extracted assert_eq!(report.files_extracted, 21); @@ -1671,7 +1755,7 @@ mod tests { ..Default::default() }; - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!( result.is_err(), "expected quota error for PAX file size override, got: {result:?}" @@ -1694,7 +1778,7 @@ mod tests { ..Default::default() }; - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!( result.is_err(), "expected quota error for PAX total size override, got: {result:?}" @@ -1737,7 +1821,9 @@ mod tests { let mut config = SecurityConfig::default(); config.allowed.hardlinks = true; - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); // 1 target file + 8 hardlinks = 9 files extracted assert_eq!(report.files_extracted, 9); @@ -1745,4 +1831,61 @@ mod tests { assert!(temp.path().join(format!("link{i}.txt")).exists()); } } + + /// Build a TAR archive in-memory with two entries sharing the same path. + fn create_duplicate_entry_tar(path: &str, content1: &[u8], content2: &[u8]) -> Vec { + let mut builder = tar::Builder::new(Vec::new()); + + let mut header = tar::Header::new_gnu(); + header.set_size(content1.len() as u64); + header.set_mode(0o644); + header.set_cksum(); + builder.append_data(&mut header, path, content1).unwrap(); + + let mut header = tar::Header::new_gnu(); + header.set_size(content2.len() as u64); + header.set_mode(0o644); + header.set_cksum(); + builder.append_data(&mut header, path, content2).unwrap(); + + builder.into_inner().unwrap() + } + + #[test] + fn test_duplicate_entry_skip_default() { + let tar_data = create_duplicate_entry_tar("legit.txt", b"first", b"second"); + let mut archive = TarArchive::new(Cursor::new(tar_data)); + + let temp = TempDir::new().unwrap(); + let config = SecurityConfig::default(); + let options = ExtractionOptions::default(); // skip_duplicates = true + + let report = archive.extract(temp.path(), &config, &options).unwrap(); + + // First entry extracted, second skipped + assert_eq!(report.files_extracted, 1); + assert_eq!(report.files_skipped, 1); + assert_eq!(report.warnings.len(), 1); + assert!(report.warnings[0].contains("legit.txt")); + + // File content is from the first entry + let content = std::fs::read(temp.path().join("legit.txt")).unwrap(); + assert_eq!(content, b"first"); + } + + #[test] + fn test_duplicate_entry_error_when_disabled() { + let tar_data = create_duplicate_entry_tar("legit.txt", b"first", b"second"); + let mut archive = TarArchive::new(Cursor::new(tar_data)); + + let temp = TempDir::new().unwrap(); + let config = SecurityConfig::default(); + let options = ExtractionOptions { + atomic: false, + skip_duplicates: false, + }; + + let result = archive.extract(temp.path(), &config, &options); + assert!(result.is_err()); + } } diff --git a/crates/exarch-core/src/formats/traits.rs b/crates/exarch-core/src/formats/traits.rs index 4e895e6..58b6412 100644 --- a/crates/exarch-core/src/formats/traits.rs +++ b/crates/exarch-core/src/formats/traits.rs @@ -2,6 +2,7 @@ use std::path::Path; +use crate::ExtractionOptions; use crate::ExtractionReport; use crate::Result; use crate::SecurityConfig; @@ -13,7 +14,12 @@ pub trait ArchiveFormat { /// # Errors /// /// Returns an error if extraction fails or security checks are violated. - fn extract(&mut self, output_dir: &Path, config: &SecurityConfig) -> Result; + fn extract( + &mut self, + output_dir: &Path, + config: &SecurityConfig, + options: &ExtractionOptions, + ) -> Result; /// Returns the archive format name. fn format_name(&self) -> &'static str; @@ -30,6 +36,7 @@ mod tests { &mut self, _output_dir: &Path, _config: &SecurityConfig, + _options: &ExtractionOptions, ) -> Result { Ok(ExtractionReport::new()) } @@ -51,7 +58,8 @@ mod tests { let mut format = TestFormat; let temp = tempfile::TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = format.extract(temp.path(), &config).unwrap(); + let options = ExtractionOptions::default(); + let report = format.extract(temp.path(), &config, &options).unwrap(); assert_eq!(report.files_extracted, 0); } } diff --git a/crates/exarch-core/src/formats/zip.rs b/crates/exarch-core/src/formats/zip.rs index c284dd8..4c2688c 100644 --- a/crates/exarch-core/src/formats/zip.rs +++ b/crates/exarch-core/src/formats/zip.rs @@ -119,6 +119,7 @@ use std::time::Instant; use zip::ZipArchive as ZipReader; use crate::ExtractionError; +use crate::ExtractionOptions; use crate::ExtractionReport; use crate::Result; use crate::SecurityConfig; @@ -273,6 +274,7 @@ impl ZipArchive { /// explicitly dropped before calling extraction helpers. For files, /// the zip file remains alive through validation and is reused for /// data extraction. + #[allow(clippy::too_many_arguments)] fn process_entry( &mut self, index: usize, @@ -281,6 +283,7 @@ impl ZipArchive { report: &mut ExtractionReport, copy_buffer: &mut CopyBuffer, dir_cache: &mut common::DirCache, + skip_duplicates: bool, ) -> Result<()> { let mut zip_file = self.inner.by_index(index).map_err(|e| { if e.to_string().contains("Password required to decrypt file") { @@ -335,7 +338,7 @@ impl ZipArchive { Some(dir_cache), )?; if let ValidatedEntryType::Symlink(safe_symlink) = validated.entry_type { - common::create_symlink(&safe_symlink, dest, report, dir_cache)?; + common::create_symlink(&safe_symlink, dest, report, dir_cache, skip_duplicates)?; } } else { // File: validate BEFORE writing (security invariant preserved), @@ -356,6 +359,7 @@ impl ZipArchive { uncompressed_size, copy_buffer, dir_cache, + skip_duplicates, )?; } @@ -363,6 +367,7 @@ impl ZipArchive { } /// Extracts a regular file to disk. + #[allow(clippy::too_many_arguments)] fn extract_file( zip_file: &mut zip::read::ZipFile<'_, R>, validated: &crate::security::validator::ValidatedEntry, @@ -371,6 +376,7 @@ impl ZipArchive { file_size: u64, copy_buffer: &mut CopyBuffer, dir_cache: &mut common::DirCache, + skip_duplicates: bool, ) -> Result<()> { common::extract_file_generic( zip_file, @@ -380,13 +386,20 @@ impl ZipArchive { Some(file_size), copy_buffer, dir_cache, + skip_duplicates, ) } } impl ArchiveFormat for ZipArchive { - fn extract(&mut self, output_dir: &Path, config: &SecurityConfig) -> Result { + fn extract( + &mut self, + output_dir: &Path, + config: &SecurityConfig, + options: &ExtractionOptions, + ) -> Result { let start = Instant::now(); + let skip_duplicates = options.skip_duplicates; let dest = DestDir::new_or_create(output_dir.to_path_buf())?; @@ -410,6 +423,7 @@ impl ArchiveFormat for ZipArchive { &mut report, &mut copy_buffer, &mut dir_cache, + skip_duplicates, ) { return Err(if report.total_items() > 0 { ExtractionError::PartialExtraction { @@ -538,7 +552,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 0); assert_eq!(report.directories_created, 0); @@ -553,7 +569,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 1); assert!(temp.path().join("file.txt").exists()); @@ -575,7 +593,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 3); } @@ -589,7 +609,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 1); assert!(temp.path().join("dir1/dir2/file.txt").exists()); @@ -614,7 +636,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 1); @@ -641,7 +665,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 1); } @@ -664,7 +690,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 1); } @@ -685,7 +713,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.directories_created, 1); assert!(temp.path().join("mydir").is_dir()); @@ -700,7 +730,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 1); assert!(temp.path().join("empty.txt").exists()); @@ -719,7 +751,7 @@ mod tests { let mut config = SecurityConfig::default(); config.max_file_size = 100; // Only allow 100 bytes - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(result.is_err()); } @@ -738,7 +770,7 @@ mod tests { let mut config = SecurityConfig::default(); config.max_file_count = 2; // Only allow 2 files - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(result.is_err()); } @@ -752,7 +784,7 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(result.is_err()); assert!(matches!( @@ -770,7 +802,7 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(result.is_err()); } @@ -796,7 +828,7 @@ mod tests { let mut config = SecurityConfig::default(); config.max_compression_ratio = 10.0; // Low threshold for testing - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); // Should fail with ZipBomb error assert!(result.is_err()); @@ -821,7 +853,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 1); @@ -849,7 +883,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let _report = archive.extract(temp.path(), &config).unwrap(); + let _report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); let metadata = std::fs::metadata(temp.path().join("binary")).unwrap(); let permissions = metadata.permissions(); @@ -877,7 +913,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let _report = archive.extract(temp.path(), &config).unwrap(); + let _report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); let metadata = std::fs::metadata(temp.path().join("binary")).unwrap(); let permissions = metadata.permissions(); @@ -905,7 +943,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let _report = archive.extract(temp.path(), &config).unwrap(); + let _report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); let metadata = std::fs::metadata(temp.path().join("binary")).unwrap(); let permissions = metadata.permissions(); @@ -952,7 +992,9 @@ mod tests { let mut config = SecurityConfig::default(); config.allowed.symlinks = true; - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 1, "should have 1 regular file"); assert_eq!(report.symlinks_created, 1, "should have 1 symlink"); @@ -989,7 +1031,7 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); // symlinks disabled by default - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); // Should fail because symlinks are not allowed assert!( @@ -1092,7 +1134,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 2); } @@ -1109,7 +1153,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.bytes_written, 13); } @@ -1123,7 +1169,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); // Duration should be non-zero assert!(report.duration.as_nanos() > 0); @@ -1147,7 +1195,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 1); assert_eq!(report.directories_created, 0); @@ -1169,7 +1219,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 0); assert_eq!(report.directories_created, 1); @@ -1186,7 +1238,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let _report = archive.extract(temp.path(), &config).unwrap(); + let _report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert!(temp.path().join("a/b/c/file.txt").exists()); assert!(temp.path().join("a").is_dir()); @@ -1205,7 +1259,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 1); @@ -1236,7 +1292,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 100); } @@ -1254,7 +1312,7 @@ mod tests { let mut config = SecurityConfig::default(); config.max_total_size = 1000; // Total limit 1000 bytes - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(result.is_err()); } @@ -1272,7 +1330,9 @@ mod tests { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert_eq!(report.files_extracted, 3); assert!(temp.path().join("file with spaces.txt").exists()); @@ -1382,7 +1442,9 @@ mod tests { if let Ok(mut archive) = result { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let err = archive.extract(temp.path(), &config).unwrap_err(); + let err = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap_err(); assert!( matches!(err, ExtractionError::SecurityViolation { .. }), "expected SecurityViolation for unsupported compression, got: {err:?}" @@ -1407,7 +1469,9 @@ mod tests { let temp = TempDir::new().unwrap(); let mut config = SecurityConfig::default(); config.allowed.symlinks = true; - let err = archive.extract(temp.path(), &config).unwrap_err(); + let err = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap_err(); assert!( matches!(err, ExtractionError::SecurityViolation { ref reason } if reason.contains("symlink target too large")), "expected SecurityViolation(symlink target too large), got: {err:?}" @@ -1426,7 +1490,9 @@ mod tests { let temp = TempDir::new().unwrap(); let mut config = SecurityConfig::default(); config.allowed.symlinks = true; - let err = archive.extract(temp.path(), &config).unwrap_err(); + let err = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap_err(); assert!( matches!(err, ExtractionError::InvalidArchive(ref msg) if msg.contains("UTF-8")), "expected InvalidArchive(UTF-8), got: {err:?}" @@ -1543,7 +1609,9 @@ mod tests { // PartialExtraction. Unwrap one level to check the underlying cause. let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let err = archive.extract(temp.path(), &config).unwrap_err(); + let err = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap_err(); let source = match err { ExtractionError::PartialExtraction { source, .. } => *source, other => other, @@ -1557,6 +1625,99 @@ mod tests { } } + /// Build a raw ZIP with two local entries sharing the same path. + /// + /// The `zip` crate's writer rejects duplicate filenames, so we craft bytes + /// manually. Both local file records and both central directory entries are + /// included so the archive is spec-valid. + #[allow(clippy::cast_possible_truncation)] + fn create_raw_duplicate_zip(path: &str, content1: &[u8], content2: &[u8]) -> Vec { + let name_bytes = path.as_bytes(); + let name_len = name_bytes.len() as u16; + let mut buf: Vec = Vec::new(); + + let write_local = |buf: &mut Vec, content: &[u8]| { + let crc = crc32_ieee(content); + let size = content.len() as u32; + buf.extend_from_slice(b"PK\x03\x04"); + buf.extend_from_slice(&20u16.to_le_bytes()); + buf.extend_from_slice(&0u16.to_le_bytes()); // flags + buf.extend_from_slice(&0u16.to_le_bytes()); // stored + buf.extend_from_slice(&0u16.to_le_bytes()); // mod time + buf.extend_from_slice(&0u16.to_le_bytes()); // mod date + buf.extend_from_slice(&crc.to_le_bytes()); + buf.extend_from_slice(&size.to_le_bytes()); + buf.extend_from_slice(&size.to_le_bytes()); + buf.extend_from_slice(&name_len.to_le_bytes()); + buf.extend_from_slice(&0u16.to_le_bytes()); // extra + buf.extend_from_slice(name_bytes); + buf.extend_from_slice(content); + }; + + let offset1 = buf.len() as u32; + write_local(&mut buf, content1); + let offset2 = buf.len() as u32; + write_local(&mut buf, content2); + + let write_central = |buf: &mut Vec, content: &[u8], offset: u32| { + let crc = crc32_ieee(content); + let size = content.len() as u32; + buf.extend_from_slice(b"PK\x01\x02"); + buf.extend_from_slice(&0x031eu16.to_le_bytes()); // version made: Unix + buf.extend_from_slice(&20u16.to_le_bytes()); + buf.extend_from_slice(&0u16.to_le_bytes()); // flags + buf.extend_from_slice(&0u16.to_le_bytes()); // stored + buf.extend_from_slice(&0u16.to_le_bytes()); // mod time + buf.extend_from_slice(&0u16.to_le_bytes()); // mod date + buf.extend_from_slice(&crc.to_le_bytes()); + buf.extend_from_slice(&size.to_le_bytes()); + buf.extend_from_slice(&size.to_le_bytes()); + buf.extend_from_slice(&name_len.to_le_bytes()); + buf.extend_from_slice(&0u16.to_le_bytes()); // extra + buf.extend_from_slice(&0u16.to_le_bytes()); // comment + buf.extend_from_slice(&0u16.to_le_bytes()); // disk start + buf.extend_from_slice(&0u16.to_le_bytes()); // int attrs + buf.extend_from_slice(&(0o100_644u32 << 16).to_le_bytes()); // ext attrs + buf.extend_from_slice(&offset.to_le_bytes()); + buf.extend_from_slice(name_bytes); + }; + + let central_start = buf.len() as u32; + write_central(&mut buf, content1, offset1); + write_central(&mut buf, content2, offset2); + let central_size = (buf.len() as u32) - central_start; + + buf.extend_from_slice(b"PK\x05\x06"); + buf.extend_from_slice(&0u16.to_le_bytes()); // disk + buf.extend_from_slice(&0u16.to_le_bytes()); // disk w/ cd + buf.extend_from_slice(&2u16.to_le_bytes()); // entries on disk + buf.extend_from_slice(&2u16.to_le_bytes()); // total entries + buf.extend_from_slice(¢ral_size.to_le_bytes()); + buf.extend_from_slice(¢ral_start.to_le_bytes()); + buf.extend_from_slice(&0u16.to_le_bytes()); // comment len + buf + } + + #[test] + fn test_duplicate_entry_skip_default() { + let zip_data = create_raw_duplicate_zip("legit.txt", b"first", b"second"); + let cursor = Cursor::new(zip_data); + let mut archive = ZipArchive::new(cursor).unwrap(); + + let temp = TempDir::new().unwrap(); + let config = SecurityConfig::default(); + let options = ExtractionOptions::default(); // skip_duplicates = true + + let report = archive.extract(temp.path(), &config, &options).unwrap(); + + // zip crate 8.x deduplicates entries at ZipArchive::new(), so the raw + // archive with two identical filenames appears as a single entry. + // The skip logic is verified by the TAR tests; this test confirms the + // ZIP extractor still succeeds without panicking on such archives. + assert_eq!(report.files_extracted, 1); + assert!(temp.path().join("legit.txt").exists()); + } + #[test] fn test_encrypted_zip_rejected_with_security_violation() { use zip::unstable::write::FileOptionsExt; diff --git a/crates/exarch-core/tests/security/cve_regression.rs b/crates/exarch-core/tests/security/cve_regression.rs index f9aea69..7fe158d 100644 --- a/crates/exarch-core/tests/security/cve_regression.rs +++ b/crates/exarch-core/tests/security/cve_regression.rs @@ -7,6 +7,7 @@ #![allow(clippy::unwrap_used, clippy::cast_possible_truncation)] use exarch_core::ExtractionError; +use exarch_core::ExtractionOptions; use exarch_core::SecurityConfig; use exarch_core::formats::ArchiveFormat; use exarch_core::formats::TarArchive; @@ -88,7 +89,11 @@ fn test_cve_2024_12718_dotslash_prefix_traversal() { let temp = TempDir::new().unwrap(); let mut archive = TarArchive::new(Cursor::new(tar_data)); - let result = archive.extract(temp.path(), &SecurityConfig::default()); + let result = archive.extract( + temp.path(), + &SecurityConfig::default(), + &ExtractionOptions::default(), + ); assert!( matches!(result, Err(ExtractionError::PathTraversal { .. })), @@ -103,7 +108,11 @@ fn test_cve_2024_12718_dotslash_complex_traversal() { let temp = TempDir::new().unwrap(); let mut archive = TarArchive::new(Cursor::new(tar_data)); - let result = archive.extract(temp.path(), &SecurityConfig::default()); + let result = archive.extract( + temp.path(), + &SecurityConfig::default(), + &ExtractionOptions::default(), + ); assert!( matches!(result, Err(ExtractionError::PathTraversal { .. })), @@ -125,7 +134,11 @@ fn test_cve_2024_12718_multiple_traversal_variants() { let tar_data = make_raw_tar(&[(path, b"x")]); let temp = TempDir::new().unwrap(); let mut archive = TarArchive::new(Cursor::new(tar_data)); - let result = archive.extract(temp.path(), &SecurityConfig::default()); + let result = archive.extract( + temp.path(), + &SecurityConfig::default(), + &ExtractionOptions::default(), + ); let path_str = std::str::from_utf8(path).unwrap_or(""); assert!( @@ -153,7 +166,11 @@ fn test_cve_2024_12905_symlink_outside_dest_rejected_by_default() { let temp = TempDir::new().unwrap(); let mut archive = TarArchive::new(Cursor::new(tar_data)); - let result = archive.extract(temp.path(), &SecurityConfig::default()); + let result = archive.extract( + temp.path(), + &SecurityConfig::default(), + &ExtractionOptions::default(), + ); assert!( matches!( @@ -178,7 +195,7 @@ fn test_cve_2024_12905_symlink_outside_dest_rejected_when_allowed() { config.allowed.symlinks = true; let mut archive = TarArchive::new(Cursor::new(tar_data)); - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!( matches!(result, Err(ExtractionError::SymlinkEscape { .. })), @@ -199,7 +216,7 @@ fn test_cve_2024_12905_deep_symlink_chain() { config.allowed.symlinks = true; let mut archive = TarArchive::new(Cursor::new(tar_data)); - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!( matches!(result, Err(ExtractionError::SymlinkEscape { .. })), @@ -222,7 +239,11 @@ fn test_cve_2025_48387_hardlink_outside_dest_rejected_by_default() { let temp = TempDir::new().unwrap(); let mut archive = TarArchive::new(Cursor::new(tar_data)); - let result = archive.extract(temp.path(), &SecurityConfig::default()); + let result = archive.extract( + temp.path(), + &SecurityConfig::default(), + &ExtractionOptions::default(), + ); assert!( matches!( @@ -246,7 +267,7 @@ fn test_cve_2025_48387_hardlink_outside_dest_rejected_when_allowed() { config.allowed.hardlinks = true; let mut archive = TarArchive::new(Cursor::new(tar_data)); - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!( matches!(result, Err(ExtractionError::HardlinkEscape { .. })), @@ -266,7 +287,7 @@ fn test_cve_2025_48387_absolute_hardlink_rejected() { config.allowed.hardlinks = true; let mut archive = TarArchive::new(Cursor::new(tar_data)); - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!( matches!(result, Err(ExtractionError::HardlinkEscape { .. })), @@ -374,7 +395,7 @@ fn test_ghsa_2367_hardlink_does_not_corrupt_target() { let mut archive = TarArchive::new(Cursor::new(tar_data)); // Extraction may succeed or fail depending on platform duplicate-file handling, // but legit.txt must never be corrupted. - let _ = archive.extract(temp.path(), &config); + let _ = archive.extract(temp.path(), &config, &ExtractionOptions::default()); let legit = std::fs::read_to_string(temp.path().join("legit.txt")).unwrap(); assert_eq!( @@ -399,7 +420,9 @@ fn test_ghsa_2367_hardlink_produces_independent_inode() { config.allowed.hardlinks = true; let mut archive = TarArchive::new(Cursor::new(tar_data)); - archive.extract(temp.path(), &config).unwrap(); + archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); let ino_legit = std::fs::metadata(temp.path().join("legit.txt")) .unwrap() @@ -724,7 +747,11 @@ fn test_windows_backslash_parent_traversal() { let temp = TempDir::new().unwrap(); let mut archive = TarArchive::new(Cursor::new(tar_data)); - let result = archive.extract(temp.path(), &SecurityConfig::default()); + let result = archive.extract( + temp.path(), + &SecurityConfig::default(), + &ExtractionOptions::default(), + ); assert!( result.is_err(), @@ -739,7 +766,11 @@ fn test_windows_backslash_deep_traversal() { let temp = TempDir::new().unwrap(); let mut archive = TarArchive::new(Cursor::new(tar_data)); - let result = archive.extract(temp.path(), &SecurityConfig::default()); + let result = archive.extract( + temp.path(), + &SecurityConfig::default(), + &ExtractionOptions::default(), + ); assert!( result.is_err(), @@ -758,7 +789,11 @@ fn test_windows_backslash_treated_as_filename_on_unix() { let temp = TempDir::new().unwrap(); let mut archive = TarArchive::new(Cursor::new(tar_data)); - let result = archive.extract(temp.path(), &SecurityConfig::default()); + let result = archive.extract( + temp.path(), + &SecurityConfig::default(), + &ExtractionOptions::default(), + ); // Extraction should succeed — the path is not a traversal on Unix. assert!( @@ -782,7 +817,11 @@ fn test_windows_absolute_path_treated_as_filename_on_unix() { let temp = TempDir::new().unwrap(); let mut archive = TarArchive::new(Cursor::new(tar_data)); - let result = archive.extract(temp.path(), &SecurityConfig::default()); + let result = archive.extract( + temp.path(), + &SecurityConfig::default(), + &ExtractionOptions::default(), + ); // Extraction should succeed and the file must be inside the destination. assert!( diff --git a/crates/exarch-core/tests/sevenz_integration.rs b/crates/exarch-core/tests/sevenz_integration.rs index 331ecc2..f5d3096 100644 --- a/crates/exarch-core/tests/sevenz_integration.rs +++ b/crates/exarch-core/tests/sevenz_integration.rs @@ -3,6 +3,7 @@ #![allow(clippy::unwrap_used, clippy::expect_used)] use exarch_core::ExtractionError; +use exarch_core::ExtractionOptions; use exarch_core::SecurityConfig; use exarch_core::formats::SevenZArchive; use exarch_core::formats::traits::ArchiveFormat; @@ -28,7 +29,11 @@ fn test_7z_extraction_via_trait() { let temp = TempDir::new().unwrap(); let report = archive - .extract(temp.path(), &SecurityConfig::default()) + .extract( + temp.path(), + &SecurityConfig::default(), + &ExtractionOptions::default(), + ) .unwrap(); assert_eq!(report.files_extracted, 2); @@ -52,7 +57,7 @@ fn test_7z_security_config_integration() { ..SecurityConfig::default() }; - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(matches!(result, Err(ExtractionError::QuotaExceeded { .. }))); } @@ -73,7 +78,11 @@ fn test_7z_nested_directories() { let temp = TempDir::new().unwrap(); let report = archive - .extract(temp.path(), &SecurityConfig::default()) + .extract( + temp.path(), + &SecurityConfig::default(), + &ExtractionOptions::default(), + ) .unwrap(); assert!(report.files_extracted >= 1); @@ -91,7 +100,11 @@ fn test_7z_solid_archive_rejected_at_new() { // Rejection happens in extract() with default config let temp = TempDir::new().unwrap(); - let result = archive.extract(temp.path(), &SecurityConfig::default()); + let result = archive.extract( + temp.path(), + &SecurityConfig::default(), + &ExtractionOptions::default(), + ); assert!(result.is_err()); assert!(matches!( @@ -135,7 +148,7 @@ fn test_7z_quota_file_count() { ..SecurityConfig::default() }; - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(matches!(result, Err(ExtractionError::QuotaExceeded { .. }))); } @@ -151,7 +164,7 @@ fn test_7z_quota_total_size() { ..SecurityConfig::default() }; - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(matches!(result, Err(ExtractionError::QuotaExceeded { .. }))); } @@ -173,7 +186,9 @@ fn test_7z_solid_archive_extraction_success() { ..SecurityConfig::default() }; - let report = archive.extract(temp.path(), &config).unwrap(); + let report = archive + .extract(temp.path(), &config, &ExtractionOptions::default()) + .unwrap(); assert!( report.files_extracted > 0, @@ -197,7 +212,7 @@ fn test_7z_solid_archive_with_file_count_quota() { ..SecurityConfig::default() }; - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!(matches!(result, Err(ExtractionError::QuotaExceeded { .. }))); } @@ -217,7 +232,7 @@ fn test_7z_unix_symlink_extracted_as_file() { let config = SecurityConfig::default(); // Current behavior: succeeds, extracts symlink as file - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!( result.is_ok(), "Unix symlink should extract as file (documented limitation): {result:?}" @@ -261,7 +276,7 @@ fn test_7z_hardlink_extracted_as_duplicate_files() { let temp = TempDir::new().unwrap(); let config = SecurityConfig::default(); - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!( result.is_ok(), "hardlink should extract as separate files: {result:?}" From acfa4fc83436bbc6cd604ae2ac0f9d32976deee1 Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Wed, 25 Mar 2026 22:12:40 +0100 Subject: [PATCH 2/4] fix: add missing ExtractionOptions arg to cve_regression tests --- crates/exarch-core/tests/security/cve_regression.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/crates/exarch-core/tests/security/cve_regression.rs b/crates/exarch-core/tests/security/cve_regression.rs index 7fe158d..90c5c23 100644 --- a/crates/exarch-core/tests/security/cve_regression.rs +++ b/crates/exarch-core/tests/security/cve_regression.rs @@ -317,7 +317,11 @@ fn test_rustsec_2026_0067_symlink_dir_chmod_default_config() { let temp = TempDir::new().unwrap(); let mut archive = TarArchive::new(Cursor::new(tar_data)); - let result = archive.extract(temp.path(), &SecurityConfig::default()); + let result = archive.extract( + temp.path(), + &SecurityConfig::default(), + &ExtractionOptions::default(), + ); assert!( matches!( @@ -351,7 +355,7 @@ fn test_rustsec_2026_0067_symlink_dir_chmod_symlinks_allowed() { config.allowed.symlinks = true; let mut archive = TarArchive::new(Cursor::new(tar_data)); - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!( matches!(result, Err(ExtractionError::SymlinkEscape { .. })), From f58216fc5b0809fb1dee03dc35a5b6c6f3cfb3e7 Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Wed, 25 Mar 2026 22:15:45 +0100 Subject: [PATCH 3/4] fix: add ExtractionOptions to doc-test examples in tar/zip/sevenz --- crates/exarch-core/src/formats/sevenz.rs | 15 ++++++++++-- crates/exarch-core/src/formats/tar.rs | 29 ++++++++++++++++++------ crates/exarch-core/src/formats/zip.rs | 15 ++++++++++-- 3 files changed, 48 insertions(+), 11 deletions(-) diff --git a/crates/exarch-core/src/formats/sevenz.rs b/crates/exarch-core/src/formats/sevenz.rs index 601fe23..0fc9d7d 100644 --- a/crates/exarch-core/src/formats/sevenz.rs +++ b/crates/exarch-core/src/formats/sevenz.rs @@ -56,6 +56,7 @@ //! Basic extraction: //! //! ```no_run +//! use exarch_core::ExtractionOptions; //! use exarch_core::SecurityConfig; //! use exarch_core::formats::SevenZArchive; //! use exarch_core::formats::traits::ArchiveFormat; @@ -65,7 +66,11 @@ //! # fn main() -> Result<(), exarch_core::ExtractionError> { //! let file = File::open("archive.7z")?; //! let mut archive = SevenZArchive::new(file)?; -//! let report = archive.extract(Path::new("/output"), &SecurityConfig::default())?; +//! let report = archive.extract( +//! Path::new("/output"), +//! &SecurityConfig::default(), +//! &ExtractionOptions::default(), +//! )?; //! println!("Extracted {} files", report.files_extracted); //! # Ok(()) //! # } @@ -74,6 +79,7 @@ //! Allow solid archives with memory limit: //! //! ```no_run +//! use exarch_core::ExtractionOptions; //! use exarch_core::SecurityConfig; //! //! let mut config = SecurityConfig::default(); @@ -173,6 +179,7 @@ struct CachedEntry { /// # Examples /// /// ```no_run +/// use exarch_core::ExtractionOptions; /// use exarch_core::SecurityConfig; /// use exarch_core::formats::SevenZArchive; /// use exarch_core::formats::traits::ArchiveFormat; @@ -181,7 +188,11 @@ struct CachedEntry { /// /// let file = File::open("archive.7z")?; /// let mut archive = SevenZArchive::new(file)?; -/// let report = archive.extract(Path::new("/output"), &SecurityConfig::default())?; +/// let report = archive.extract( +/// Path::new("/output"), +/// &SecurityConfig::default(), +/// &ExtractionOptions::default(), +/// )?; /// println!("Extracted {} files", report.files_extracted); /// # Ok::<(), exarch_core::ExtractionError>(()) /// ``` diff --git a/crates/exarch-core/src/formats/tar.rs b/crates/exarch-core/src/formats/tar.rs index c89743d..69ab5cb 100644 --- a/crates/exarch-core/src/formats/tar.rs +++ b/crates/exarch-core/src/formats/tar.rs @@ -62,6 +62,7 @@ //! Basic extraction: //! //! ```no_run +//! use exarch_core::ExtractionOptions; //! use exarch_core::SecurityConfig; //! use exarch_core::formats::TarArchive; //! use exarch_core::formats::traits::ArchiveFormat; @@ -70,7 +71,11 @@ //! //! let file = File::open("archive.tar")?; //! let mut archive = TarArchive::new(file); -//! let report = archive.extract(Path::new("/output"), &SecurityConfig::default())?; +//! let report = archive.extract( +//! Path::new("/output"), +//! &SecurityConfig::default(), +//! &ExtractionOptions::default(), +//! )?; //! println!("Extracted {} files", report.files_extracted); //! # Ok::<(), exarch_core::ExtractionError>(()) //! ``` @@ -78,6 +83,7 @@ //! Gzip-compressed TAR: //! //! ```no_run +//! use exarch_core::ExtractionOptions; //! use exarch_core::SecurityConfig; //! use exarch_core::formats::TarArchive; //! use exarch_core::formats::traits::ArchiveFormat; @@ -88,7 +94,11 @@ //! let file = File::open("archive.tar.gz")?; //! let decoder = GzDecoder::new(file); //! let mut archive = TarArchive::new(decoder); -//! let report = archive.extract(Path::new("/output"), &SecurityConfig::default())?; +//! let report = archive.extract( +//! Path::new("/output"), +//! &SecurityConfig::default(), +//! &ExtractionOptions::default(), +//! )?; //! # Ok::<(), exarch_core::ExtractionError>(()) //! ``` @@ -129,6 +139,7 @@ use super::traits::ArchiveFormat; /// # Examples /// /// ```no_run +/// use exarch_core::ExtractionOptions; /// use exarch_core::SecurityConfig; /// use exarch_core::formats::TarArchive; /// use exarch_core::formats::traits::ArchiveFormat; @@ -138,7 +149,7 @@ use super::traits::ArchiveFormat; /// let file = File::open("archive.tar")?; /// let mut archive = TarArchive::new(file); /// let config = SecurityConfig::default(); -/// let report = archive.extract(Path::new("/output"), &config)?; +/// let report = archive.extract(Path::new("/output"), &config, &ExtractionOptions::default())?; /// # Ok::<(), exarch_core::ExtractionError>(()) /// ``` pub struct TarArchive { @@ -510,6 +521,7 @@ impl TarEntryAdapter { /// # Examples /// /// ```no_run +/// use exarch_core::ExtractionOptions; /// use exarch_core::SecurityConfig; /// use exarch_core::formats::tar::open_tar_gz; /// use exarch_core::formats::traits::ArchiveFormat; @@ -517,7 +529,7 @@ impl TarEntryAdapter { /// /// let mut archive = open_tar_gz("archive.tar.gz")?; /// let config = SecurityConfig::default(); -/// let report = archive.extract(Path::new("/output"), &config)?; +/// let report = archive.extract(Path::new("/output"), &config, &ExtractionOptions::default())?; /// # Ok::<(), exarch_core::ExtractionError>(()) /// ``` pub fn open_tar_gz>( @@ -540,6 +552,7 @@ pub fn open_tar_gz>( /// # Examples /// /// ```no_run +/// use exarch_core::ExtractionOptions; /// use exarch_core::SecurityConfig; /// use exarch_core::formats::tar::open_tar_bz2; /// use exarch_core::formats::traits::ArchiveFormat; @@ -547,7 +560,7 @@ pub fn open_tar_gz>( /// /// let mut archive = open_tar_bz2("archive.tar.bz2")?; /// let config = SecurityConfig::default(); -/// let report = archive.extract(Path::new("/output"), &config)?; +/// let report = archive.extract(Path::new("/output"), &config, &ExtractionOptions::default())?; /// # Ok::<(), exarch_core::ExtractionError>(()) /// ``` pub fn open_tar_bz2>( @@ -570,6 +583,7 @@ pub fn open_tar_bz2>( /// # Examples /// /// ```no_run +/// use exarch_core::ExtractionOptions; /// use exarch_core::SecurityConfig; /// use exarch_core::formats::tar::open_tar_xz; /// use exarch_core::formats::traits::ArchiveFormat; @@ -577,7 +591,7 @@ pub fn open_tar_bz2>( /// /// let mut archive = open_tar_xz("archive.tar.xz")?; /// let config = SecurityConfig::default(); -/// let report = archive.extract(Path::new("/output"), &config)?; +/// let report = archive.extract(Path::new("/output"), &config, &ExtractionOptions::default())?; /// # Ok::<(), exarch_core::ExtractionError>(()) /// ``` pub fn open_tar_xz>( @@ -601,6 +615,7 @@ pub fn open_tar_xz>( /// # Examples /// /// ```no_run +/// use exarch_core::ExtractionOptions; /// use exarch_core::SecurityConfig; /// use exarch_core::formats::tar::open_tar_zst; /// use exarch_core::formats::traits::ArchiveFormat; @@ -608,7 +623,7 @@ pub fn open_tar_xz>( /// /// let mut archive = open_tar_zst("archive.tar.zst")?; /// let config = SecurityConfig::default(); -/// let report = archive.extract(Path::new("/output"), &config)?; +/// let report = archive.extract(Path::new("/output"), &config, &ExtractionOptions::default())?; /// # Ok::<(), exarch_core::ExtractionError>(()) /// ``` pub fn open_tar_zst>( diff --git a/crates/exarch-core/src/formats/zip.rs b/crates/exarch-core/src/formats/zip.rs index 4c2688c..834a7b4 100644 --- a/crates/exarch-core/src/formats/zip.rs +++ b/crates/exarch-core/src/formats/zip.rs @@ -85,6 +85,7 @@ //! Basic extraction: //! //! ```no_run +//! use exarch_core::ExtractionOptions; //! use exarch_core::SecurityConfig; //! use exarch_core::formats::ZipArchive; //! use exarch_core::formats::traits::ArchiveFormat; @@ -93,7 +94,11 @@ //! //! let file = File::open("archive.zip")?; //! let mut archive = ZipArchive::new(file)?; -//! let report = archive.extract(Path::new("/output"), &SecurityConfig::default())?; +//! let report = archive.extract( +//! Path::new("/output"), +//! &SecurityConfig::default(), +//! &ExtractionOptions::default(), +//! )?; //! println!("Extracted {} files", report.files_extracted); //! # Ok::<(), exarch_core::ExtractionError>(()) //! ``` @@ -101,6 +106,7 @@ //! Custom security configuration: //! //! ```no_run +//! use exarch_core::ExtractionOptions; //! use exarch_core::SecurityConfig; //! //! let mut config = SecurityConfig::default(); @@ -155,6 +161,7 @@ use super::traits::ArchiveFormat; /// # Examples /// /// ```no_run +/// use exarch_core::ExtractionOptions; /// use exarch_core::SecurityConfig; /// use exarch_core::formats::ZipArchive; /// use exarch_core::formats::traits::ArchiveFormat; @@ -163,7 +170,11 @@ use super::traits::ArchiveFormat; /// /// let file = File::open("archive.zip")?; /// let mut archive = ZipArchive::new(file)?; -/// let report = archive.extract(Path::new("/output"), &SecurityConfig::default())?; +/// let report = archive.extract( +/// Path::new("/output"), +/// &SecurityConfig::default(), +/// &ExtractionOptions::default(), +/// )?; /// println!("Extracted {} files", report.files_extracted); /// # Ok::<(), exarch_core::ExtractionError>(()) /// ``` From 8bbe8feaf2199884d80253b332bbcf7e8db3cdb9 Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Wed, 25 Mar 2026 22:29:54 +0100 Subject: [PATCH 4/4] fix: update cve_regression tests for new ExtractionOptions parameter --- .../exarch-core/tests/security/cve_regression.rs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/crates/exarch-core/tests/security/cve_regression.rs b/crates/exarch-core/tests/security/cve_regression.rs index 90c5c23..2e0c655 100644 --- a/crates/exarch-core/tests/security/cve_regression.rs +++ b/crates/exarch-core/tests/security/cve_regression.rs @@ -462,7 +462,11 @@ fn test_cve_2026_24842_deep_nested_hardlink_escape_rejected_by_default() { let temp = TempDir::new().unwrap(); let mut archive = TarArchive::new(Cursor::new(tar_data)); - let result = archive.extract(temp.path(), &SecurityConfig::default()); + let result = archive.extract( + temp.path(), + &SecurityConfig::default(), + &ExtractionOptions::default(), + ); assert!( matches!( @@ -487,7 +491,7 @@ fn test_cve_2026_24842_deep_nested_hardlink_escape_rejected_when_allowed() { config.allowed.hardlinks = true; let mut archive = TarArchive::new(Cursor::new(tar_data)); - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!( matches!(result, Err(ExtractionError::HardlinkEscape { .. })), @@ -514,7 +518,7 @@ fn test_cve_2026_24842_safe_deep_nested_hardlink_allowed() { config.allowed.hardlinks = true; let mut archive = TarArchive::new(Cursor::new(tar_data)); - let result = archive.extract(temp.path(), &config); + let result = archive.extract(temp.path(), &config, &ExtractionOptions::default()); assert!( result.is_ok(), @@ -680,7 +684,7 @@ fn test_cve_2025_29787_zip_slip_blocked_with_symlinks_enabled() { let data = build_cve_2025_29787_zip(); let mut archive = ZipArchive::new(Cursor::new(data)).unwrap(); - let result = archive.extract(dest.path(), &config); + let result = archive.extract(dest.path(), &config, &ExtractionOptions::default()); assert!( result.is_err(), @@ -723,7 +727,7 @@ fn test_cve_2025_29787_zip_slip_blocked_with_symlinks_disabled() { let data = build_cve_2025_29787_zip(); let mut archive = ZipArchive::new(Cursor::new(data)).unwrap(); - let result = archive.extract(dest.path(), &config); + let result = archive.extract(dest.path(), &config, &ExtractionOptions::default()); assert!( result.is_err(),