diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a63c9a2..d0a782a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -65,6 +65,40 @@ jobs: RUSTDOCFLAGS: '-D warnings' run: cargo hack doc --feature-powerset + sanitizers: + name: AddressSanitizer + LeakSanitizer + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + + # AddressSanitizer (which bundles LeakSanitizer) requires nightly. We use + # clang for the bundled C++ so it shares the LLVM sanitizer runtime that + # rustc links into the test binaries. Note: `cargo +nightly` is used + # explicitly because rust-toolchain.toml pins a stable channel that would + # otherwise override the default toolchain in this directory. + - name: Install nightly toolchain + run: rustup toolchain install nightly --profile minimal + + - uses: Swatinem/rust-cache@v2 + with: + shared-key: sanitizers + save-if: ${{ github.ref_name == 'main' }} + + - run: rustup +nightly show + + # `--target` is required so build scripts / proc-macros (host) are not + # instrumented. LeakSanitizer is enabled by default under ASan on Linux; + # this catches regressions like https://github.com/ada-url/rust/issues/101 + # where the failed-parse path leaked the allocation from `ada_parse`. + - name: Test under AddressSanitizer + LeakSanitizer + env: + CC: clang + CXX: clang++ + ADA_SANITIZE: address + RUSTFLAGS: '-Zsanitizer=address' + ASAN_OPTIONS: 'detect_leaks=1' + run: cargo +nightly test --tests --target x86_64-unknown-linux-gnu + format: name: Format runs-on: ubuntu-latest diff --git a/Cargo.lock b/Cargo.lock index d6289fb..f751b52 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,7 +4,7 @@ version = 4 [[package]] name = "ada-url" -version = "3.4.4" +version = "3.4.5" dependencies = [ "cc", "codspeed-criterion-compat", diff --git a/Cargo.toml b/Cargo.toml index deb68b1..78d85d3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,7 @@ authors = [ "LongYinan ", "Boshen " ] -version = "3.4.4" +version = "3.4.5" edition = "2024" description = "Fast WHATWG Compliant URL parser" documentation = "https://docs.rs/ada-url" diff --git a/build.rs b/build.rs index 93769be..dc641c1 100644 --- a/build.rs +++ b/build.rs @@ -194,5 +194,19 @@ fn main() { } } + // Optionally compile the bundled C++ (ada) with a sanitizer. This is used + // by the AddressSanitizer CI job so that FFI allocations (e.g. the result + // `ada_parse` always heap-allocates) are instrumented and leaks/errors + // inside ada itself are caught. Enable with `ADA_SANITIZE=address` and a + // matching `RUSTFLAGS=-Zsanitizer=address`; use a clang toolchain (CC/CXX) + // so the C++ shares the LLVM sanitizer runtime that rustc links in. + println!("cargo:rerun-if-env-changed=ADA_SANITIZE"); + if let Ok(sanitizer) = env::var("ADA_SANITIZE") + && !sanitizer.is_empty() + { + build.flag(format!("-fsanitize={sanitizer}")); + build.flag("-fno-omit-frame-pointer"); + } + build.compile("ada"); } diff --git a/src/lib.rs b/src/lib.rs index 6d767de..3afee2d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -230,6 +230,11 @@ impl Url { if unsafe { ffi::ada_is_valid(url_aggregator) } { Ok(url_aggregator.into()) } else { + // `ada_parse`/`ada_parse_with_base` always allocate a result on the + // heap, even when parsing fails. On the success path the pointer is + // owned by `Url` and freed via its `Drop` impl, but on the error path + // we must free it here to avoid leaking the allocation. + unsafe { ffi::ada_free(url_aggregator) }; Err(ParseUrlError { input }) } } diff --git a/tests/basic_tests.rs b/tests/basic_tests.rs index 6603033..fa50cee 100644 --- a/tests/basic_tests.rs +++ b/tests/basic_tests.rs @@ -242,6 +242,22 @@ fn confusing_mess() { assert_eq!(url.origin(), "http://d:2"); } +/// Regression test for . +/// +/// `ada_parse`/`ada_parse_with_base` always allocate a result on the heap, +/// even when the input fails to parse. The failed-parse path used to drop the +/// returned pointer without calling `ada_free`, leaking one allocation per +/// call. This test repeatedly parses invalid input on both the base-less and +/// with-base code paths; under LeakSanitizer/Valgrind a regression here shows +/// up as leaked allocations originating in `ada_parse`. +#[test] +fn parse_invalid_url_does_not_leak() { + for _ in 0..1_000 { + assert!(Url::parse("http://[invalid", None).is_err()); + assert!(Url::parse("http://[invalid", Some("http://example.com")).is_err()); + } +} + #[test] fn standard_file() { let url = parse("file:///tmp/mock/path");