diff --git a/Cargo.toml b/Cargo.toml index bb932de..e8d6c2e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,3 +26,8 @@ serde_json = "1.0" rer-version = { path = "crates/rer-version", version = "0.1.0-rc.6" } rer-resolver = { path = "crates/rer-resolver", version = "0.1.0-rc.6" } pyo3 = { version = "0.23.5", features = ["extension-module"] } +# `mimalloc` is wired into the bench binary as a `#[global_allocator]`. +# Callgrind shows ~33 % of cycles in libc malloc/free; mimalloc has measurably +# lower per-call cost on the small-object churn rer creates (`SmallVec` in +# `Ranges`, per-call `FxHashMap`s in `reduce_by`, etc.). +mimalloc = "0.1" diff --git a/crates/examples/Cargo.toml b/crates/examples/Cargo.toml index 166e6e3..645642d 100644 --- a/crates/examples/Cargo.toml +++ b/crates/examples/Cargo.toml @@ -12,3 +12,6 @@ path = "rez_benchmark_dataset.rs" rer-resolver = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +# `mimalloc` is set as the global allocator in `rez_benchmark_dataset` — see +# the comment in that file. Workspace-pinned for a consistent version. +mimalloc = { workspace = true } diff --git a/crates/examples/rez_benchmark_dataset.rs b/crates/examples/rez_benchmark_dataset.rs index d29a456..437896d 100644 --- a/crates/examples/rez_benchmark_dataset.rs +++ b/crates/examples/rez_benchmark_dataset.rs @@ -11,6 +11,13 @@ use rer_resolver::rez_solver::{ make_shared_cache, PackageRepo, Requirement, Solver, SolverStatus, }; + +// Callgrind on this binary shows ~33 % of cycles in libc malloc/free — +// `SmallVec` extends inside `Ranges`, per-call `FxHashMap`s in `reduce_by`, +// hashbrown rehashes, and `String::clone`s. mimalloc's small-object path +// outperforms glibc's `_int_malloc`/`_int_free` on exactly this workload. +#[global_allocator] +static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; use serde::Deserialize; use std::path::PathBuf; use std::rc::Rc;