awesomized · onethumb · Oct 30, 2025 · Oct 30, 2025 · Oct 30, 2025 · Oct 30, 2025
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -6,14 +6,45 @@ on:
   workflow_dispatch:
 
 jobs:
-  test-accelerated:
-    name: Test accelerated (aarch64, x86_64)
+  test-aarch64:
+    name: Test aarch64
     strategy:
       matrix:
-        os: [ubuntu-latest, ubuntu-22.04-arm, ubuntu-24.04-arm, macos-latest]
+        os: [ubuntu-22.04-arm, ubuntu-24.04-arm, macos-14, macos-15, macos-26, macos-latest, windows-11-arm]
         rust-toolchain:
           - "1.81" # minimum for this crate
-          - "1.89" # when VPCLMULQDQ was stabilized
+          - "1.89" # when AVX-512 VPCLMULQDQ was stabilized
+          - "stable"
+          - "nightly"
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4 # not pinning to commit hash since this is a GitHub action, which we trust
+      - uses: actions-rust-lang/setup-rust-toolchain@9d7e65c320fdb52dcd45ffaa68deb6c02c8754d9 # v1.12.0
+        with:
+          toolchain: ${{ matrix.rust-toolchain }}
+          components: rustfmt, clippy
+          cache-key: ${{ matrix.os }}-${{ matrix.rust-toolchain }}
+      - name: Check
+        run: cargo check
+      - name: Architecture check
+        run: cargo run --bin arch-check
+      - if: ${{ matrix.rust-toolchain != 'nightly' }}
+        name: Format
+        run: cargo fmt -- --check
+      - if: ${{ matrix.rust-toolchain != 'nightly' }}
+        name: Clippy
+        run: cargo clippy
+      - name: Test
+        run: cargo test
+
+  test-x86_64:
+    name: Test x86_64
+    strategy:
+      matrix:
+        os: [ ubuntu-latest, ubuntu-22.04, ubuntu-24.04, macos-13, macos-15-intel, windows-2022, windows-2025, windows-latest ]
+        rust-toolchain:
+          - "1.81" # minimum for this crate
+          - "1.89" # when AVX-512 VPCLMULQDQ was stabilized
           - "stable"
           - "nightly"
     runs-on: ${{ matrix.os }}
@@ -38,14 +69,14 @@ jobs:
         run: cargo test
 
   test-x86:
-    name:  Test accelerated (x86)
+    name:  Test x86
     runs-on: ubuntu-latest
     strategy:
       matrix:
         target: [i586-unknown-linux-gnu, i686-unknown-linux-gnu]
         rust-toolchain:
           - "1.81" # minimum for this crate
-          - "1.89" # when VPCLMULQDQ was stabilized
+          - "1.89" # when AVX-512 VPCLMULQDQ was stabilized
           - "stable"
           - "nightly"
     steps:
@@ -71,7 +102,7 @@ jobs:
         target: [powerpc-unknown-linux-gnu, powerpc64-unknown-linux-gnu]
         rust-toolchain:
           - "1.81" # minimum for this crate
-          - "1.89" # when VPCLMULQDQ was stabilized
+          - "1.89" # when AVX-512 VPCLMULQDQ was stabilized
           - "stable"
           - "nightly"
     steps:

diff --git a/.gitignore b/.gitignore
@@ -3,4 +3,5 @@
 /test/test_*.bin
 .idea
 .DS_Store
-.git
+.git
+.vscode
diff --git a/.kiro/specs/checksum-benchmark-option/design.md b/.kiro/specs/checksum-benchmark-option/design.md
@@ -0,0 +1,202 @@
+# Design Document
+
+## Overview
+
+This design extends the existing `bin/checksum.rs` tool with benchmark functionality through a new `-b` flag. The benchmark mode will measure CRC performance using either user-provided data (files/strings) or randomly generated data, reporting throughput in GiB/s along with the acceleration target used.
+
+The design maintains backward compatibility while adding a clean benchmark interface that leverages existing patterns from the `benches/benchmark.rs` implementation.
+
+## Architecture
+
+### Command Line Interface
+
+The tool will extend the existing argument parsing to support:
+- `-b`: Enable benchmark mode
+- `--size <bytes>`: Specify data size for random generation (when no file/string provided)
+- `--duration <seconds>`: Benchmark duration as floating-point seconds (default: 10.0)
+- Existing `-a <algorithm>`: CRC algorithm (required in benchmark mode)
+- Existing `-f <file>` or `-s <string>`: Optional data source for benchmarking
+
+### Data Flow
+
+```
+User Input → Argument Parsing → Mode Detection → Benchmark Execution → Results Display
+                                      ↓
+                              [Normal Checksum Mode]
+                                      ↓
+                              [Existing Functionality]
+```
+
+In benchmark mode:
+1. Parse and validate benchmark parameters
+2. Determine data source (file, string, or generated)
+3. For string/generated data: Load/generate test data once; For file data: use file path directly
+4. Run benchmark loop for specified duration using appropriate checksum function
+5. Calculate and display results
+
+## Components and Interfaces
+
+### Enhanced Config Structure
+
+```rust
+#[derive(Debug)]
+struct Config {
+    algorithm: String,
+    file: Option<String>,
+    string: Option<String>,
+    format: OutputFormat,
+    benchmark: Option<BenchmarkConfig>,
+}
+
+#[derive(Debug)]
+struct BenchmarkConfig {
+    size: Option<usize>,
+    duration: f64,
+}
+```
+
+### Benchmark Execution Module
+
+```rust
+enum BenchmarkData {
+    InMemory(Vec<u8>),
+    File(String),
+}
+
+struct BenchmarkRunner {
+    algorithm: CrcAlgorithm,
+    data: BenchmarkData,
+    duration: f64,
+}
+
+impl BenchmarkRunner {
+    fn new(algorithm: CrcAlgorithm, data: BenchmarkData, duration: f64) -> Self
+    fn run(&self) -> BenchmarkResult
+}
+
+struct BenchmarkResult {
+    iterations: u64,
+    elapsed_seconds: f64,
+    throughput_gibs: f64,
+    time_per_iteration_nanos: f64,
+    acceleration_target: String,
+    data_size: u64,
+}
+```
+
+### Data Generation
+
+The benchmark will reuse the random data generation pattern from `benches/benchmark.rs`:
+
+```rust
+fn generate_random_data(size: usize) -> Vec<u8> {
+    let mut rng = rand::rng();
+    let mut buf = vec![0u8; size];
+    rng.fill_bytes(&mut buf);
+    buf
+}
+```
+
+## Data Models
+
+### Input Data Sources
+
+1. **File Input**: Use `checksum_file()` function to benchmark the entire file I/O and checksum stack
+2. **String Input**: Use string bytes directly with in-memory `checksum()` function
+3. **Generated Data**: Create random data of specified size using `rand::RngCore::fill_bytes()` and use in-memory `checksum()` function
+
+### Benchmark Metrics
+
+- **Iterations**: Number of checksum calculations performed
+- **Elapsed Time**: Actual benchmark duration in seconds
+- **Throughput**: Calculated as `(data_size * iterations) / elapsed_time / (1024^3)` GiB/s
+- **Acceleration Target**: Result from `crc_fast::get_calculator_target(algorithm)`
+
+## Error Handling
+
+### Validation Errors
+
+- Invalid algorithm names (reuse existing validation)
+- Invalid size parameters (non-positive values)
+- Invalid duration parameters (non-positive values)
+- File read errors (reuse existing error handling)
+
+### Runtime Errors
+
+- Memory allocation failures for large data sizes
+- Timer precision issues (fallback to alternative timing methods)
+
+### Error Messages
+
+All errors will follow the existing pattern of displaying the error message followed by usage information.
+
+## Testing Strategy
+
+### Unit Tests
+
+- Argument parsing validation for benchmark flags
+- BenchmarkConfig creation and validation
+- Data generation with various sizes
+- Throughput calculation accuracy
+
+### Integration Tests
+
+- End-to-end benchmark execution with different algorithms
+- File and string input handling in benchmark mode
+- Error handling for invalid parameters
+- Backward compatibility verification
+
+### Performance Validation
+
+- Verify benchmark results are reasonable (within expected ranges)
+- Compare with existing `benches/benchmark.rs` results for consistency
+- Test with various data sizes to ensure linear scaling
+
+## Implementation Notes
+
+### Timing Mechanism
+
+Use `std::time::Instant` for high-precision timing, with different approaches for different data sources:
+
+```rust
+let start = std::time::Instant::now();
+let mut iterations = 0u64;
+
+while start.elapsed().as_secs_f64() < duration {
+    match &self.data {
+        BenchmarkData::InMemory(data) => {
+            std::hint::black_box(checksum(algorithm, data));
+        }
+        BenchmarkData::File(filename) => {
+            std::hint::black_box(checksum_file(algorithm, filename, None).unwrap());
+        }
+    }
+    iterations += 1;
+}
+
+let elapsed = start.elapsed().as_secs_f64();
+```
+
+### Memory Considerations
+
+- Pre-allocate test data once before benchmark loop
+- Use `std::hint::black_box()` to prevent compiler optimizations
+- Consider memory alignment for optimal performance (optional enhancement)
+
+### Output Format
+
+```
+Algorithm: CRC-32/ISCSI
+Acceleration Target: aarch64-neon-sha3
+Data Size: 1,048,576 bytes (1.0 MiB)
+Duration: 10.00 seconds
+Iterations: 12,345
+Throughput: 45.67 GiB/s
+Time per iteration: 810.2 μs
+```
+
+### Default Values
+
+- **Size**: 1,048,576 bytes (1 MiB)
+- **Duration**: 10.0 seconds
+- **Algorithm**: Must be specified via `-a` flag (no default)
diff --git a/.kiro/specs/checksum-benchmark-option/requirements.md b/.kiro/specs/checksum-benchmark-option/requirements.md
@@ -0,0 +1,52 @@
+# Requirements Document
+
+## Introduction
+
+This feature adds a simple benchmark option to the existing `bin/checksum.rs` tool via a command-line flag. The benchmark will allow users to test performance across different platforms using a single binary, reporting throughput in GiB/s and the acceleration target used. This enables cross-platform performance comparison without requiring a full development environment checkout.
+
+## Glossary
+
+- **Checksum_Tool**: The existing `bin/checksum.rs` binary application
+- **Benchmark_Mode**: A new operational mode that measures and reports performance metrics
+- **Acceleration_Target**: The hardware-specific optimization path returned by `get_calculator_target()`
+- **Throughput_Metric**: Performance measurement expressed in GiB/s (gibibytes per second)
+- **Test_Data**: Randomly generated byte array used for benchmark measurements
+- **Black_Box**: Rust's `std::hint::black_box()` function that prevents compiler optimizations during benchmarking
+
+## Requirements
+
+### Requirement 1
+
+**User Story:** As a developer, I want to run performance benchmarks from the checksum tool, so that I can compare CRC performance across different hardware platforms without setting up a full development environment.
+
+#### Acceptance Criteria
+
+1. WHEN the user provides a `-b` flag, THE Checksum_Tool SHALL enter Benchmark_Mode
+2. WHILE in Benchmark_Mode, THE Checksum_Tool SHALL generate Test_Data of the specified size once and reuse it for all iterations
+3. THE Checksum_Tool SHALL report Throughput_Metric in GiB/s format
+4. THE Checksum_Tool SHALL display the Acceleration_Target used for the benchmark
+5. THE Checksum_Tool SHALL use Black_Box to prevent compiler optimizations during measurement
+
+### Requirement 2
+
+**User Story:** As a developer, I want to specify benchmark parameters, so that I can control the test conditions for consistent cross-platform comparisons.
+
+#### Acceptance Criteria
+
+1. WHEN the user provides `-a` parameter with `-b` flag, THE Checksum_Tool SHALL use the specified CRC algorithm for benchmarking
+2. WHEN the user provides `--size` parameter, THE Checksum_Tool SHALL generate Test_Data of the specified byte size
+3. WHEN the user provides `--duration` parameter, THE Checksum_Tool SHALL run the benchmark for the specified number of seconds
+4. WHERE no benchmark parameters are provided, THE Checksum_Tool SHALL use default values of 1 MiB for size and 10 seconds for duration
+5. THE Checksum_Tool SHALL validate all benchmark parameter values before starting the benchmark
+
+### Requirement 3
+
+**User Story:** As a developer, I want the benchmark to support both file/string input and generated data, so that I can benchmark with specific data or use random data for consistent testing.
+
+#### Acceptance Criteria
+
+1. WHEN the user provides `-b` with `-f` or `-s` flags, THE Checksum_Tool SHALL use the file or string content as Test_Data for benchmarking
+2. WHEN the user provides `-b` with `--size` parameter but no `-f` or `-s` flags, THE Checksum_Tool SHALL generate random Test_Data of the specified size
+3. IF the user provides `-b` without any data source or size specification, THEN THE Checksum_Tool SHALL generate random Test_Data using the default size
+4. THE Checksum_Tool SHALL display appropriate usage information when benchmark parameters are invalid
+5. THE Checksum_Tool SHALL maintain backward compatibility with existing checksum functionality
diff --git a/.kiro/specs/checksum-benchmark-option/tasks.md b/.kiro/specs/checksum-benchmark-option/tasks.md
@@ -0,0 +1,49 @@
+# Implementation Plan
+
+- [x] 1. Extend command line argument parsing for benchmark options
+  - Add `-b` flag to enable benchmark mode in the argument parser
+  - Add `--size` parameter for specifying random data size
+  - Add `--duration` parameter for benchmark duration (floating-point seconds)
+  - Update the `Config` struct to include optional `BenchmarkConfig`
+  - Update usage/help text to include new benchmark options
+  - _Requirements: 1.1, 2.1, 2.2, 2.3, 2.4_
+
+- [x] 2. Implement benchmark data structures and validation
+  - Create `BenchmarkConfig` struct with size and duration fields
+  - Create `BenchmarkData` enum to handle in-memory vs file data sources
+  - Create `BenchmarkRunner` struct with algorithm, data, and duration
+  - Create `BenchmarkResult` struct with all metrics including time per iteration
+  - Add validation logic for benchmark parameters (positive values)
+  - _Requirements: 2.5, 3.4_
+
+- [x] 3. Implement benchmark execution logic
+  - Create benchmark runner with timing loop using `std::time::Instant`
+  - Implement separate execution paths for in-memory data vs file data
+  - Use `std::hint::black_box()` to prevent compiler optimizations
+  - Calculate throughput in GiB/s and time per iteration with appropriate units
+  - Integrate `get_calculator_target()` for acceleration target reporting
+  - _Requirements: 1.2, 1.3, 1.4, 1.5_
+
+- [x] 4. Implement data source handling
+  - Add random data generation function using `rand::RngCore::fill_bytes()`
+  - Implement logic to determine data source (file, string, or generated)
+  - Handle file size detection for throughput calculations
+  - Create `BenchmarkData` instances based on user input
+  - _Requirements: 3.1, 3.2, 3.3_
+
+- [x] 5. Integrate benchmark mode into main application flow
+  - Modify main function to detect benchmark mode and route accordingly
+  - Ensure mutual exclusivity validation between benchmark and normal modes
+  - Add benchmark result formatting and display
+  - Update error handling to include benchmark-specific errors
+  - Maintain backward compatibility with existing functionality
+  - _Requirements: 3.4, 3.5_
+
+- [x] 6. Add comprehensive testing for benchmark functionality
+  - Write unit tests for argument parsing with benchmark flags
+  - Test benchmark parameter validation (invalid sizes, durations)
+  - Test data source selection logic (file vs string vs generated)
+  - Test benchmark execution with different algorithms
+  - Verify throughput calculation accuracy
+  - Test error handling for invalid benchmark configurations
+  - _Requirements: All requirements_
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,4 +3,5 @@ @@
     /test/test_*.bin
     .idea
     .DS_Store
-    .git
+    .git
+    .vscode