From 8fa7dd7c77da2bfedeb4e0fe0e00180f35c79146 Mon Sep 17 00:00:00 2001 From: Philippe Llerena Date: Sun, 17 May 2026 10:05:02 +0200 Subject: [PATCH 1/2] feat(python): add PackageData.from_strings raw-string fast path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #88. ## Why `pyrer.PackageData.from_rez(pkg)` is hot in rez integrations — every package the shim materialises pays the full cost. The current path walks rez's `AttributeForwardMeta` per attribute, lets rez parse each requirement string into a `Requirement` object, and then immediately calls `str(req)` on every one to round-trip back to the raw string pyrer wants in the first place. Per the issue, on the 188-case rez benchmark via the downstream shim that's ~50 packages materialised per resolve × ~10–20 round-trips per package — a few percent of total end-to-end wall time, completely avoidable when the caller has access to `pkg.resource.data` (which already holds these as raw `list[str]` in the common non-late-bound case). ## What A new classmethod symmetric with `from_rez`: ```python pyrer.PackageData.from_strings( name: str, version: str, requires: Iterable[str] | None = None, variants: Iterable[Iterable[str]] | None = None, ) -> PackageData ``` Skips: - `AttributeForwardMeta` lookup (no `pkg.requires` walk). - The `Requirement` parse (no `Version` / `VersionRange` AST built). - The `str(Requirement)` round-trip per requirement. Accepts `None` for `requires` / `variants` to play well with `data.get("requires")` — no `or ()` boilerplate. ## Honest framing on the perf claim `from_strings` is **functionally equivalent** to the four-arg constructor `PackageData(name, version, requires, variants)`. Both take the same fast PyO3 extraction path (PyO3 extracts `Vec` directly from any iterable of `PyUnicode` — no `.str()` round-trip). Any rez-shim caller using the constructor with raw strings from `pkg.resource.data` already gets this perf today. The classmethod form's value isn't a new fast path, it's: - A named, documented contract ("raw strings only — no wrapper objects, no late-bound source code"). Mirrors `from_rez`'s naming. - Discoverability — a place to land in autocomplete and docs when the caller is wondering "what's the fast path?". - One canonical site to update if we ever add real fast-path specialisations (e.g. interning the family name, pre-allocating the `Vec`s sized to the iterable's `__len__`). The docs (`docs/content/docs/getting-started/rez-integration.md`) get a worked example showing the recommended shim pattern: try `from_strings` against `pkg.resource.data`, fall back to `from_rez` for `@early` / `@late`-bound attributes where the raw data is a `SourceCode` instance instead of a `list[str]`. ## Tests 7 new tests in `tests/test_rich_api.py`: - `test_from_strings_basic` — happy path - `test_from_strings_defaults_to_empty` — `requires` / `variants` default - `test_from_strings_accepts_none_for_collections` — `dict.get` ergonomics - `test_from_strings_accepts_tuples_and_iterables` — non-list iterables - `test_from_strings_matches_constructor` — same `PackageData` as `__new__` - `test_from_strings_drives_solve_like_from_rez` — end-to-end: a solve fed via `from_strings` resolves identically to one fed via `from_rez` against an equivalent fake-rez Package - `test_from_strings_rejects_non_string_requires` — contract-strict: passing an object with `__str__` raises `TypeError` rather than silently stringifying. The contract is "raw strings only" — use `from_rez` (or pre-stringify) for object inputs. ## Verification - `cargo build`: clean. - `cargo test --lib -p rer-resolver`: **44/44**. - `pytest tests/`: **94/94** (was 87 + 7 new). - `cargo test --release -p rer-resolver --test test_rez_benchmark -- --ignored` (strict 188-case rez differential): **188/188 in 16.52 s** — unchanged. No new Rust code on the solver hot path, no shape change to `PackageData` itself; this is a one-method addition to the PyO3 bridge. Co-Authored-By: Claude Opus 4.7 --- CHANGELOG.md | 11 ++ crates/rer-python/src/lib.rs | 62 ++++++++++ .../docs/getting-started/rez-integration.md | 44 +++++++ tests/test_rich_api.py | 113 ++++++++++++++++++ 4 files changed, 230 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2aa6ea4..5d7055f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,17 @@ page. ### Added +- **`PackageData.from_strings(name, version, requires=None, variants=None)`** — + classmethod constructor for raw-string callers, symmetric with + `from_rez(pkg)`. Skips rez's `AttributeForwardMeta` chain, the + `Requirement` parse, and the `str(Requirement)` round-trip — the + latter being a measurable fraction of integration overhead on + rez-shim hot paths (per-package, every package, every resolve). + Functionally equivalent to the four-arg constructor; the + classmethod form exists so callers wiring `pkg.resource.data` + through pyrer have a named, documented contract to reach for. + Falls back to `from_rez` for `@early` / `@late`-bound attributes. + Closes #88. - **`load_family` callback** on `pyrer.solve()` — opt-in lazy package discovery: pass `load_family: Callable[[str], list[PackageData]]` and the solver calls it on demand the first time it needs a family it hasn't seen. diff --git a/crates/rer-python/src/lib.rs b/crates/rer-python/src/lib.rs index 75952d8..c6792be 100644 --- a/crates/rer-python/src/lib.rs +++ b/crates/rer-python/src/lib.rs @@ -85,6 +85,16 @@ impl PackageData { /// `pyrer` so every integration site doesn't have to write the same /// extraction loop. `pyrer` itself does not import rez; this method is /// duck-typed and works against any object with the four attributes. + /// + /// **Faster alternative for raw-data callers:** if you already have the + /// raw strings (typically from `pkg.resource.data` on a rez `Package`, + /// which stores `requires` / `variants` as raw `list[str]` / + /// `list[list[str]]` in the common non-late-bound case), prefer + /// [`Self::from_strings`]. It skips the per-attribute + /// `AttributeForwardMeta` lookup, the late-bound wrapping, the + /// `Requirement` parse, and the `str(Requirement)` round-trip — none + /// of which produce a different `PackageData` for the common case, + /// but all of which take real time per package. #[classmethod] fn from_rez(_cls: &Bound<'_, PyType>, pkg: &Bound<'_, PyAny>) -> PyResult { let name: String = pkg.getattr("name")?.extract()?; @@ -98,6 +108,58 @@ impl PackageData { variants, }) } + + /// Build a [`PackageData`] from raw strings, skipping any rez + /// wrapper-object resolution. Use this when you already have raw + /// `(name, version, requires, variants)` data — typically pulled from + /// `pkg.resource.data` on a rez `Package`: + /// + /// ```python + /// data = pkg.resource.data + /// pd = pyrer.PackageData.from_strings( + /// data["name"], + /// data["version"], + /// data.get("requires"), # may be None / list[str] + /// data.get("variants"), # may be None / list[list[str]] + /// ) + /// ``` + /// + /// Faster than [`Self::from_rez`] on rez-integration hot paths because + /// it does not trigger rez's `AttributeForwardMeta` per attribute, does + /// not parse each requirement string into a `Requirement` object, and + /// does not round-trip each `Requirement` back through `__str__`. + /// + /// `requires` and `variants` accept `None` (interpreted as empty), + /// matching `dict.get(...)` ergonomics. + /// + /// Functionally equivalent to the four-arg constructor + /// `PackageData(name, version, requires, variants)` — both take the + /// same fast PyO3 extraction path. The classmethod form exists to make + /// the fast path discoverable alongside [`Self::from_rez`] and to give + /// the contract a name in callers' code. Closes #88. + /// + /// **Caveat — late-bound requirements:** for packages where rez stores + /// `requires` or `variants` as a `SourceCode` instance (`@early` / + /// `@late` binding), `pkg.resource.data["requires"]` is *not* a + /// `list[str]` and this method will raise. Fall back to + /// [`Self::from_rez`] for those packages — it walks rez's lazy + /// attribute path which evaluates the source code. + #[classmethod] + #[pyo3(signature = (name, version, requires=None, variants=None))] + fn from_strings( + _cls: &Bound<'_, PyType>, + name: String, + version: String, + requires: Option>, + variants: Option>>, + ) -> Self { + PackageData { + name, + version, + requires: requires.unwrap_or_default(), + variants: variants.unwrap_or_default(), + } + } } /// Pull a flat list of requirement strings from a Python object that is diff --git a/docs/content/docs/getting-started/rez-integration.md b/docs/content/docs/getting-started/rez-integration.md index d633c33..89f79e7 100644 --- a/docs/content/docs/getting-started/rez-integration.md +++ b/docs/content/docs/getting-started/rez-integration.md @@ -79,6 +79,50 @@ is duck-typed — `pyrer` itself does not import rez — so you can also pass any object exposing the same four attributes (e.g. a test fixture). +### Faster construction with `from_strings` + +`from_rez(pkg)` triggers rez's `AttributeForwardMeta` chain on every +attribute and parses each requirement string into a `Requirement` +object only to immediately turn it back into a string. When you +already have the raw strings, prefer +`PackageData.from_strings(name, version, requires, variants)` — +it skips the wrapper round-trip entirely: + +```python +def build_pyrer_packages_fast(package_paths): + for family in iter_package_families(paths=package_paths): + for pkg in family.iter_packages(): + data = pkg.resource.data + # `data["requires"]` is a raw list[str] in the common + # (non-late-bound) case; fall back to from_rez otherwise. + if isinstance(data.get("requires", []), list) and \ + isinstance(data.get("variants", []), list): + yield pyrer.PackageData.from_strings( + data["name"], + data["version"], + data.get("requires"), + data.get("variants"), + ) + else: + # @early / @late bindings — let rez evaluate them. + yield pyrer.PackageData.from_rez(pkg) +``` + +The `from_strings` method: + +- Skips the per-attribute `AttributeForwardMeta` lookup. +- Skips the `Requirement` parse (no `Version` / `VersionRange` AST + is built then discarded). +- Skips the `str(Requirement)` round-trip per requirement. +- Accepts `None` for `requires` / `variants` (matches + `dict.get(...)` ergonomics — no `or ()` boilerplate needed). + +Functionally equivalent to the four-arg constructor; the +classmethod form exists so the contract has a name. **Always fall +back to `from_rez` for packages with `@early` or `@late` binding** — +in those cases `resource.data["requires"]` is a `SourceCode` +instance, not a `list[str]`, and `from_strings` will raise. + Two notes on this step: - It is **eager** — every package on every path is loaded before the diff --git a/tests/test_rich_api.py b/tests/test_rich_api.py index 3370e8b..c20d2ba 100644 --- a/tests/test_rich_api.py +++ b/tests/test_rich_api.py @@ -265,6 +265,119 @@ class NotAPackage: pyrer.PackageData.from_rez(NotAPackage()) +# --------------------------------------------------------------------------- +# PackageData.from_strings — raw-string fast path (issue #88) +# --------------------------------------------------------------------------- + + +def test_from_strings_basic(): + """All four args supplied as raw strings — no wrapper objects involved.""" + pd = pyrer.PackageData.from_strings( + "maya", + "2024.0", + ["python-3"], + [["python-3.10"], ["python-3.11"]], + ) + assert pd.name == "maya" + assert pd.version == "2024.0" + assert pd.requires == ["python-3"] + assert pd.variants == [["python-3.10"], ["python-3.11"]] + + +def test_from_strings_defaults_to_empty(): + """requires=None and variants=None default to empty lists.""" + pd = pyrer.PackageData.from_strings("foo", "1.0") + assert pd.requires == [] + assert pd.variants == [] + + +def test_from_strings_accepts_none_for_collections(): + """`dict.get("requires")` returns None for a missing key — must accept it.""" + pd = pyrer.PackageData.from_strings("foo", "1.0", None, None) + assert pd.requires == [] + assert pd.variants == [] + + +def test_from_strings_accepts_tuples_and_iterables(): + """PyO3 extracts Vec from any iterable, not just list.""" + pd = pyrer.PackageData.from_strings( + "tool", + "1.0", + ("python-3", "qt-5"), + (("linux", "python-3.10"),), + ) + assert pd.requires == ["python-3", "qt-5"] + assert pd.variants == [["linux", "python-3.10"]] + + +def test_from_strings_matches_constructor(): + """`from_strings` must produce the same PackageData as the four-arg + constructor — same fast PyO3 extraction path, classmethod is just a + named alias for callers wiring rez's resource.data through pyrer.""" + args = ("maya", "2024.0", ["python-3"], [["python-3.10"], ["python-3.11"]]) + via_classmethod = pyrer.PackageData.from_strings(*args) + via_constructor = pyrer.PackageData(*args) + assert via_classmethod.name == via_constructor.name + assert via_classmethod.version == via_constructor.version + assert via_classmethod.requires == via_constructor.requires + assert via_classmethod.variants == via_constructor.variants + + +def test_from_strings_drives_solve_like_from_rez(): + """End-to-end: a solve fed via from_strings produces the same result as + one fed via from_rez against an equivalent fake-rez Package.""" + + class FakeReq: + def __init__(self, s): + self._s = s + + def __str__(self): + return self._s + + class FakePkg: + def __init__(self, name, version, requires=None, variants=None): + self.name = name + self.version = version + self.requires = ( + [FakeReq(r) for r in requires] if requires else None + ) + self.variants = ( + [[FakeReq(r) for r in v] for v in variants] if variants else None + ) + + fakes = [ + FakePkg("app", "1.0.0", requires=["lib-2"]), + FakePkg("lib", "1.0.0"), + FakePkg("lib", "2.0.0"), + ] + via_from_rez = [pyrer.PackageData.from_rez(p) for p in fakes] + + via_from_strings = [ + pyrer.PackageData.from_strings("app", "1.0.0", ["lib-2"]), + pyrer.PackageData.from_strings("lib", "1.0.0"), + pyrer.PackageData.from_strings("lib", "2.0.0"), + ] + + result_a = pyrer.solve(["app"], via_from_rez) + result_b = pyrer.solve(["app"], via_from_strings) + assert result_a.resolved == result_b.resolved + assert result_a.status == result_b.status == "solved" + + +def test_from_strings_rejects_non_string_requires(): + """from_strings is the contract-strict fast path — pass it a non-string + in `requires` and it raises rather than silently stringifying. Use + `from_rez` (or pre-stringify) for object inputs.""" + import pytest + + class NotAString: + def __str__(self): + return "python-3" + + with pytest.raises(TypeError): + pyrer.PackageData.from_strings("foo", "1.0", [NotAString()]) + + # --------------------------------------------------------------------------- # variant_select_mode — rez's intersection_priority vs version_priority # --------------------------------------------------------------------------- From fa80585b078011c02e4f72f27787f7ea9852855a Mon Sep 17 00:00:00 2001 From: Philippe Llerena Date: Sun, 17 May 2026 10:23:53 +0200 Subject: [PATCH 2/2] chore(release): bump workspace version to 0.1.0-rc.9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Picks up the `PackageData.from_strings` classmethod (issue #88) — a raw-string fast-path constructor symmetric with `from_rez`, intended for rez-integration callers that already have the strings pulled from `pkg.resource.data`. Functionally equivalent to the four-arg constructor; the classmethod exists to give the fast path a name and a stable contract. Co-Authored-By: Claude Opus 4.7 --- Cargo.toml | 6 +++--- docs/config.toml | 2 +- docs/content/_index.md | 2 +- docs/content/docs/getting-started/quick-start.md | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4e3b845..e1c771b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ members = ["crates/*"] resolver = "2" [workspace.package] -version = "0.1.0-rc.8" +version = "0.1.0-rc.9" authors = [ "Lorenzo Montant ", "Maxim Doucet ", @@ -23,8 +23,8 @@ lazy_static = "1.5.0" rand = "0.8.5" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" -rer-version = { path = "crates/rer-version", version = "0.1.0-rc.8" } -rer-resolver = { path = "crates/rer-resolver", version = "0.1.0-rc.8" } +rer-version = { path = "crates/rer-version", version = "0.1.0-rc.9" } +rer-resolver = { path = "crates/rer-resolver", version = "0.1.0-rc.9" } pyo3 = { version = "0.23.5", features = ["extension-module"] } # `mimalloc` is wired into the bench binary as a `#[global_allocator]`. # Callgrind shows ~33 % of cycles in libc malloc/free; mimalloc has measurably diff --git a/docs/config.toml b/docs/config.toml index f1cd0b6..e3e16c9 100644 --- a/docs/config.toml +++ b/docs/config.toml @@ -125,7 +125,7 @@ weight = 10 name = "GitHub" pre = '' url = "https://github.com/doubleailes/rer" -post = "v0.1.0-rc.8" +post = "v0.1.0-rc.9" weight = 20 # Footer contents diff --git a/docs/content/_index.md b/docs/content/_index.md index f16bd1e..46882bd 100644 --- a/docs/content/_index.md +++ b/docs/content/_index.md @@ -7,7 +7,7 @@ title = "rer — Rez En Rust" lead = "A faithful Rust port of rez's package solver — callable from Python via PyO3, resolves match rez 1:1." url = "/docs/getting-started/introduction/" url_button = "Get started" -repo_version = "GitHub v0.1.0-rc.8" +repo_version = "GitHub v0.1.0-rc.9" repo_license = "MIT-licensed." repo_url = "https://github.com/doubleailes/rer" diff --git a/docs/content/docs/getting-started/quick-start.md b/docs/content/docs/getting-started/quick-start.md index c04fdcc..941ca27 100644 --- a/docs/content/docs/getting-started/quick-start.md +++ b/docs/content/docs/getting-started/quick-start.md @@ -95,7 +95,7 @@ Add the resolver crate to your `Cargo.toml`: ```toml [dependencies] -rer-resolver = "0.1.0-rc.8" +rer-resolver = "0.1.0-rc.9" ``` Then call the solver against an in-memory repository: