diff --git a/Cargo.toml b/Cargo.toml
index bb932de..e8d6c2e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -26,3 +26,8 @@ serde_json = "1.0"
 rer-version = { path = "crates/rer-version", version = "0.1.0-rc.6" }
 rer-resolver = { path = "crates/rer-resolver", version = "0.1.0-rc.6" }
 pyo3 = { version = "0.23.5", features = ["extension-module"] }
+# `mimalloc` is wired into the bench binary as a `#[global_allocator]`.
+# Callgrind shows ~33 % of cycles in libc malloc/free; mimalloc has measurably
+# lower per-call cost on the small-object churn rer creates (`SmallVec` in
+# `Ranges`, per-call `FxHashMap`s in `reduce_by`, etc.).
+mimalloc = "0.1"
diff --git a/crates/examples/Cargo.toml b/crates/examples/Cargo.toml
index 166e6e3..645642d 100644
--- a/crates/examples/Cargo.toml
+++ b/crates/examples/Cargo.toml
@@ -12,3 +12,6 @@ path = "rez_benchmark_dataset.rs"
 rer-resolver = { workspace = true }
 serde = { workspace = true }
 serde_json = { workspace = true }
+# `mimalloc` is set as the global allocator in `rez_benchmark_dataset` — see
+# the comment in that file. Workspace-pinned for a consistent version.
+mimalloc = { workspace = true }
diff --git a/crates/examples/rez_benchmark_dataset.rs b/crates/examples/rez_benchmark_dataset.rs
index d29a456..437896d 100644
--- a/crates/examples/rez_benchmark_dataset.rs
+++ b/crates/examples/rez_benchmark_dataset.rs
@@ -11,6 +11,13 @@
 use rer_resolver::rez_solver::{
     make_shared_cache, PackageRepo, Requirement, Solver, SolverStatus,
 };
+
+// Callgrind on this binary shows ~33 % of cycles in libc malloc/free —
+// `SmallVec` extends inside `Ranges`, per-call `FxHashMap`s in `reduce_by`,
+// hashbrown rehashes, and `String::clone`s. mimalloc's small-object path
+// outperforms glibc's `_int_malloc`/`_int_free` on exactly this workload.
+#[global_allocator]
+static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
 use serde::Deserialize;
 use std::path::PathBuf;
 use std::rc::Rc;