diff --git a/CHANGELOG.md b/CHANGELOG.md index 2aa6ea4..5d7055f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,17 @@ page. ### Added +- **`PackageData.from_strings(name, version, requires=None, variants=None)`** — + classmethod constructor for raw-string callers, symmetric with + `from_rez(pkg)`. Skips rez's `AttributeForwardMeta` chain, the + `Requirement` parse, and the `str(Requirement)` round-trip — the + latter being a measurable fraction of integration overhead on + rez-shim hot paths (per-package, every package, every resolve). + Functionally equivalent to the four-arg constructor; the + classmethod form exists so callers wiring `pkg.resource.data` + through pyrer have a named, documented contract to reach for. + Falls back to `from_rez` for `@early` / `@late`-bound attributes. + Closes #88. - **`load_family` callback** on `pyrer.solve()` — opt-in lazy package discovery: pass `load_family: Callable[[str], list[PackageData]]` and the solver calls it on demand the first time it needs a family it hasn't seen. diff --git a/crates/rer-python/src/lib.rs b/crates/rer-python/src/lib.rs index 75952d8..c6792be 100644 --- a/crates/rer-python/src/lib.rs +++ b/crates/rer-python/src/lib.rs @@ -85,6 +85,16 @@ impl PackageData { /// `pyrer` so every integration site doesn't have to write the same /// extraction loop. `pyrer` itself does not import rez; this method is /// duck-typed and works against any object with the four attributes. + /// + /// **Faster alternative for raw-data callers:** if you already have the + /// raw strings (typically from `pkg.resource.data` on a rez `Package`, + /// which stores `requires` / `variants` as raw `list[str]` / + /// `list[list[str]]` in the common non-late-bound case), prefer + /// [`Self::from_strings`]. It skips the per-attribute + /// `AttributeForwardMeta` lookup, the late-bound wrapping, the + /// `Requirement` parse, and the `str(Requirement)` round-trip — none + /// of which produce a different `PackageData` for the common case, + /// but all of which take real time per package. #[classmethod] fn from_rez(_cls: &Bound<'_, PyType>, pkg: &Bound<'_, PyAny>) -> PyResult { let name: String = pkg.getattr("name")?.extract()?; @@ -98,6 +108,58 @@ impl PackageData { variants, }) } + + /// Build a [`PackageData`] from raw strings, skipping any rez + /// wrapper-object resolution. Use this when you already have raw + /// `(name, version, requires, variants)` data — typically pulled from + /// `pkg.resource.data` on a rez `Package`: + /// + /// ```python + /// data = pkg.resource.data + /// pd = pyrer.PackageData.from_strings( + /// data["name"], + /// data["version"], + /// data.get("requires"), # may be None / list[str] + /// data.get("variants"), # may be None / list[list[str]] + /// ) + /// ``` + /// + /// Faster than [`Self::from_rez`] on rez-integration hot paths because + /// it does not trigger rez's `AttributeForwardMeta` per attribute, does + /// not parse each requirement string into a `Requirement` object, and + /// does not round-trip each `Requirement` back through `__str__`. + /// + /// `requires` and `variants` accept `None` (interpreted as empty), + /// matching `dict.get(...)` ergonomics. + /// + /// Functionally equivalent to the four-arg constructor + /// `PackageData(name, version, requires, variants)` — both take the + /// same fast PyO3 extraction path. The classmethod form exists to make + /// the fast path discoverable alongside [`Self::from_rez`] and to give + /// the contract a name in callers' code. Closes #88. + /// + /// **Caveat — late-bound requirements:** for packages where rez stores + /// `requires` or `variants` as a `SourceCode` instance (`@early` / + /// `@late` binding), `pkg.resource.data["requires"]` is *not* a + /// `list[str]` and this method will raise. Fall back to + /// [`Self::from_rez`] for those packages — it walks rez's lazy + /// attribute path which evaluates the source code. + #[classmethod] + #[pyo3(signature = (name, version, requires=None, variants=None))] + fn from_strings( + _cls: &Bound<'_, PyType>, + name: String, + version: String, + requires: Option>, + variants: Option>>, + ) -> Self { + PackageData { + name, + version, + requires: requires.unwrap_or_default(), + variants: variants.unwrap_or_default(), + } + } } /// Pull a flat list of requirement strings from a Python object that is diff --git a/docs/content/docs/getting-started/rez-integration.md b/docs/content/docs/getting-started/rez-integration.md index d633c33..89f79e7 100644 --- a/docs/content/docs/getting-started/rez-integration.md +++ b/docs/content/docs/getting-started/rez-integration.md @@ -79,6 +79,50 @@ is duck-typed — `pyrer` itself does not import rez — so you can also pass any object exposing the same four attributes (e.g. a test fixture). +### Faster construction with `from_strings` + +`from_rez(pkg)` triggers rez's `AttributeForwardMeta` chain on every +attribute and parses each requirement string into a `Requirement` +object only to immediately turn it back into a string. When you +already have the raw strings, prefer +`PackageData.from_strings(name, version, requires, variants)` — +it skips the wrapper round-trip entirely: + +```python +def build_pyrer_packages_fast(package_paths): + for family in iter_package_families(paths=package_paths): + for pkg in family.iter_packages(): + data = pkg.resource.data + # `data["requires"]` is a raw list[str] in the common + # (non-late-bound) case; fall back to from_rez otherwise. + if isinstance(data.get("requires", []), list) and \ + isinstance(data.get("variants", []), list): + yield pyrer.PackageData.from_strings( + data["name"], + data["version"], + data.get("requires"), + data.get("variants"), + ) + else: + # @early / @late bindings — let rez evaluate them. + yield pyrer.PackageData.from_rez(pkg) +``` + +The `from_strings` method: + +- Skips the per-attribute `AttributeForwardMeta` lookup. +- Skips the `Requirement` parse (no `Version` / `VersionRange` AST + is built then discarded). +- Skips the `str(Requirement)` round-trip per requirement. +- Accepts `None` for `requires` / `variants` (matches + `dict.get(...)` ergonomics — no `or ()` boilerplate needed). + +Functionally equivalent to the four-arg constructor; the +classmethod form exists so the contract has a name. **Always fall +back to `from_rez` for packages with `@early` or `@late` binding** — +in those cases `resource.data["requires"]` is a `SourceCode` +instance, not a `list[str]`, and `from_strings` will raise. + Two notes on this step: - It is **eager** — every package on every path is loaded before the diff --git a/scripts/bench_python_construction.py b/scripts/bench_python_construction.py new file mode 100644 index 0000000..307a440 --- /dev/null +++ b/scripts/bench_python_construction.py @@ -0,0 +1,305 @@ +#!/usr/bin/env python3 +"""Micro-benchmark the `pyrer.PackageData` construction paths. + +Establishes the baseline for issue #88's perf claim: how much does +`from_rez(pkg)` actually cost vs. `from_strings(...)` / the four-arg +constructor, and how does that scale with package count? Run on +demand — not part of CI; results are machine-dependent. + +Measures: + 1. Per-call construction cost for each path (μs, isolated). + 2. Per-batch construction cost for N packages (ms). + 3. End-to-end `pyrer.solve(...)` time with packages built each way. + 4. Construction-vs-solve share of total wall time. + +Usage: + python scripts/bench_python_construction.py + python scripts/bench_python_construction.py --packages 500 --iters 200 + +Requires: + pip install maturin + cd crates/rer-python && maturin develop +""" +import argparse +import sys +import timeit +from typing import List + +import pyrer + + +# --------------------------------------------------------------------------- +# Fake-rez Package mimics +# --------------------------------------------------------------------------- + + +class FakeRequirement: + """Mimics `rez.version.Requirement` — not a str, only renders via __str__.""" + + __slots__ = ("_s",) + + def __init__(self, s: str) -> None: + self._s = s + + def __str__(self) -> str: + return self._s + + +class FakeVersion: + """Mimics `rez.version.Version` — not a str, only renders via __str__.""" + + __slots__ = ("_s",) + + def __init__(self, s: str) -> None: + self._s = s + + def __str__(self) -> str: + return self._s + + +class FakeRezPackage: + """Stand-in for `rez.packages.Package`. The four duck-typed attributes + surface `FakeVersion` / `FakeRequirement` objects (not `str`) so that + `from_rez` pays the `__str__` round-trip on each one — the cost issue + #88 was filed against. + """ + + __slots__ = ("name", "version", "requires", "variants") + + def __init__( + self, + name: str, + version: str, + requires: List[str], + variants: List[List[str]], + ) -> None: + self.name = name + self.version = FakeVersion(version) + self.requires = [FakeRequirement(r) for r in requires] if requires else None + self.variants = ( + [[FakeRequirement(r) for r in v] for v in variants] if variants else None + ) + + +# --------------------------------------------------------------------------- +# Synthetic repo +# --------------------------------------------------------------------------- + + +def synth_packages(n: int): + """Return three parallel lists for N packages: + + - raw_specs: (name, version, requires, variants) tuples — what + `pkg.resource.data` would hand you (and what `from_strings` + consumes). + - fake_pkgs: `FakeRezPackage` instances — what `from_rez` consumes. + - solver_inputs: the resolve seed (just "app" — any subset works). + + Each package has 3 requires and 2 variants of 2 entries each — a + realistic shape that exercises the full attribute walk on `from_rez`. + """ + raw_specs = [] + fake_pkgs = [] + for i in range(n): + if i == 0: + name, version = "app", "1.0.0" + requires = ["lib", "util"] + variants = [] + else: + name = f"pkg{i:04d}" + version = "1.0.0" + requires = ["lib", "util"] if i < n // 2 else [] + variants = ( + [[f"python-3.{(i + 10) % 12}"], [f"python-3.{(i + 11) % 12}"]] + if i % 3 == 0 + else [] + ) + raw_specs.append((name, version, requires, variants)) + fake_pkgs.append(FakeRezPackage(name, version, requires, variants)) + # lib / util — referenced by app's requires. + for extra in (("lib", "1.0.0"), ("util", "1.0.0")): + raw_specs.append((extra[0], extra[1], [], [])) + fake_pkgs.append(FakeRezPackage(extra[0], extra[1], [], [])) + return raw_specs, fake_pkgs + + +# --------------------------------------------------------------------------- +# Timers +# --------------------------------------------------------------------------- + + +def best_of(stmt, setup, number, repeat) -> float: + """Median microseconds-per-call from a timeit.Timer run.""" + t = timeit.Timer(stmt=stmt, setup=setup, globals=globals()) + times = t.repeat(repeat=repeat, number=number) + return (min(times) / number) * 1_000_000 # μs/call + + +def bench_single_call(raw_specs, fake_pkgs, iters: int) -> None: + """Per-call construction cost for each path (a single random package).""" + spec = raw_specs[0] + fake = fake_pkgs[0] + name, version, requires, variants = spec + globals().update( + spec=spec, fake=fake, + name=name, version=version, requires=requires, variants=variants, + ) + + print("Per-call construction (median μs, one package, deep inputs)") + print("-" * 60) + + t_new = best_of( + "pyrer.PackageData(name, version, requires, variants)", + "", + number=iters, + repeat=5, + ) + t_strs = best_of( + "pyrer.PackageData.from_strings(name, version, requires, variants)", + "", + number=iters, + repeat=5, + ) + t_rez = best_of( + "pyrer.PackageData.from_rez(fake)", + "", + number=iters, + repeat=5, + ) + + print(f" PackageData(name, version, requires, variants): {t_new:7.2f} μs") + print(f" PackageData.from_strings(...): {t_strs:7.2f} μs") + print(f" PackageData.from_rez(fake_rez_pkg): {t_rez:7.2f} μs") + delta_strs = t_rez - t_strs + pct_strs = (delta_strs / t_rez) * 100 + print() + print(f" from_strings vs from_rez: -{delta_strs:.2f} μs / -{pct_strs:.1f}%") + delta_new = t_rez - t_new + pct_new = (delta_new / t_rez) * 100 + print(f" __new__ vs from_rez: -{delta_new:.2f} μs / -{pct_new:.1f}% " + "(should match from_strings — same PyO3 path)") + print() + + +def bench_batch(raw_specs, fake_pkgs, iters: int) -> None: + """Total cost of materialising the whole repo N times.""" + globals().update(raw_specs=raw_specs, fake_pkgs=fake_pkgs) + n = len(raw_specs) + + print(f"Per-batch construction (median ms, {n} packages built per iteration)") + print("-" * 60) + + t_new = best_of( + "[pyrer.PackageData(n, v, r, vr) for (n, v, r, vr) in raw_specs]", + "", + number=iters, + repeat=5, + ) + t_strs = best_of( + "[pyrer.PackageData.from_strings(n, v, r, vr) for (n, v, r, vr) in raw_specs]", + "", + number=iters, + repeat=5, + ) + t_rez = best_of( + "[pyrer.PackageData.from_rez(p) for p in fake_pkgs]", + "", + number=iters, + repeat=5, + ) + + # best_of returns μs/call; one "call" is one batch of N constructions. + print(f" list-comp via PackageData(...): {t_new / 1000:7.3f} ms / batch ({t_new / n:6.2f} μs/pkg)") + print(f" list-comp via from_strings(...): {t_strs / 1000:7.3f} ms / batch ({t_strs / n:6.2f} μs/pkg)") + print(f" list-comp via from_rez(fake_pkg): {t_rez / 1000:7.3f} ms / batch ({t_rez / n:6.2f} μs/pkg)") + print() + delta_ms = (t_rez - t_strs) / 1000 + print(f" Savings switching from_rez → from_strings: {delta_ms:6.3f} ms per batch") + print() + + +def bench_end_to_end(raw_specs, fake_pkgs, iters: int) -> None: + """Build + solve, isolating construction's share of total time.""" + globals().update(raw_specs=raw_specs, fake_pkgs=fake_pkgs) + + print("End-to-end pyrer.solve() — construction's share of wall time") + print("-" * 60) + + t_solve_only = best_of( + "pyrer.solve(['app'], pkgs)", + "pkgs = [pyrer.PackageData.from_strings(n, v, r, vr) for (n, v, r, vr) in raw_specs]", + number=iters, + repeat=5, + ) + t_e2e_strs = best_of( + "pyrer.solve(['app'], [pyrer.PackageData.from_strings(n, v, r, vr) for (n, v, r, vr) in raw_specs])", + "", + number=iters, + repeat=5, + ) + t_e2e_rez = best_of( + "pyrer.solve(['app'], [pyrer.PackageData.from_rez(p) for p in fake_pkgs])", + "", + number=iters, + repeat=5, + ) + + print(f" solve() alone (pre-built packages): {t_solve_only / 1000:7.3f} ms") + print(f" build (from_strings) + solve: {t_e2e_strs / 1000:7.3f} ms") + print(f" build (from_rez) + solve: {t_e2e_rez / 1000:7.3f} ms") + print() + build_share_strs = (t_e2e_strs - t_solve_only) / t_e2e_strs * 100 + build_share_rez = (t_e2e_rez - t_solve_only) / t_e2e_rez * 100 + print(f" Build-phase share of total (from_strings): {build_share_strs:5.1f}%") + print(f" Build-phase share of total (from_rez): {build_share_rez:5.1f}%") + print() + e2e_delta = (t_e2e_rez - t_e2e_strs) / 1000 + e2e_pct = (t_e2e_rez - t_e2e_strs) / t_e2e_rez * 100 + print(f" End-to-end savings switching to from_strings: " + f"{e2e_delta:.3f} ms ({e2e_pct:.1f}%)") + print() + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__.splitlines()[0]) + parser.add_argument( + "--packages", + type=int, + default=50, + help="Number of packages in the synthetic repo (default: 50)", + ) + parser.add_argument( + "--iters", + type=int, + default=500, + help="`timeit` `number` argument — batches per repeat (default: 500)", + ) + args = parser.parse_args() + + print(f"pyrer version: {getattr(pyrer, '__version__', '?')}") + print(f"synthetic repo: {args.packages} packages") + print(f"iterations per timing: {args.iters} (best of 5)") + print() + + raw_specs, fake_pkgs = synth_packages(args.packages) + + bench_single_call(raw_specs, fake_pkgs, iters=args.iters * 10) + bench_batch(raw_specs, fake_pkgs, iters=max(args.iters // 5, 50)) + bench_end_to_end(raw_specs, fake_pkgs, iters=max(args.iters // 10, 20)) + + print("Note: these are Python-side construction costs. The Rust solver") + print("itself is unchanged. End-to-end wall time on a real rez integration") + print("will also include rez's own per-attribute AttributeForwardMeta cost") + print("(not modelled by FakeRezPackage), so the from_rez line above is a") + print("lower bound on the cost in production. The from_strings line is a") + print("realistic upper bound for the fast path.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/test_rich_api.py b/tests/test_rich_api.py index 3370e8b..c20d2ba 100644 --- a/tests/test_rich_api.py +++ b/tests/test_rich_api.py @@ -265,6 +265,119 @@ class NotAPackage: pyrer.PackageData.from_rez(NotAPackage()) +# --------------------------------------------------------------------------- +# PackageData.from_strings — raw-string fast path (issue #88) +# --------------------------------------------------------------------------- + + +def test_from_strings_basic(): + """All four args supplied as raw strings — no wrapper objects involved.""" + pd = pyrer.PackageData.from_strings( + "maya", + "2024.0", + ["python-3"], + [["python-3.10"], ["python-3.11"]], + ) + assert pd.name == "maya" + assert pd.version == "2024.0" + assert pd.requires == ["python-3"] + assert pd.variants == [["python-3.10"], ["python-3.11"]] + + +def test_from_strings_defaults_to_empty(): + """requires=None and variants=None default to empty lists.""" + pd = pyrer.PackageData.from_strings("foo", "1.0") + assert pd.requires == [] + assert pd.variants == [] + + +def test_from_strings_accepts_none_for_collections(): + """`dict.get("requires")` returns None for a missing key — must accept it.""" + pd = pyrer.PackageData.from_strings("foo", "1.0", None, None) + assert pd.requires == [] + assert pd.variants == [] + + +def test_from_strings_accepts_tuples_and_iterables(): + """PyO3 extracts Vec from any iterable, not just list.""" + pd = pyrer.PackageData.from_strings( + "tool", + "1.0", + ("python-3", "qt-5"), + (("linux", "python-3.10"),), + ) + assert pd.requires == ["python-3", "qt-5"] + assert pd.variants == [["linux", "python-3.10"]] + + +def test_from_strings_matches_constructor(): + """`from_strings` must produce the same PackageData as the four-arg + constructor — same fast PyO3 extraction path, classmethod is just a + named alias for callers wiring rez's resource.data through pyrer.""" + args = ("maya", "2024.0", ["python-3"], [["python-3.10"], ["python-3.11"]]) + via_classmethod = pyrer.PackageData.from_strings(*args) + via_constructor = pyrer.PackageData(*args) + assert via_classmethod.name == via_constructor.name + assert via_classmethod.version == via_constructor.version + assert via_classmethod.requires == via_constructor.requires + assert via_classmethod.variants == via_constructor.variants + + +def test_from_strings_drives_solve_like_from_rez(): + """End-to-end: a solve fed via from_strings produces the same result as + one fed via from_rez against an equivalent fake-rez Package.""" + + class FakeReq: + def __init__(self, s): + self._s = s + + def __str__(self): + return self._s + + class FakePkg: + def __init__(self, name, version, requires=None, variants=None): + self.name = name + self.version = version + self.requires = ( + [FakeReq(r) for r in requires] if requires else None + ) + self.variants = ( + [[FakeReq(r) for r in v] for v in variants] if variants else None + ) + + fakes = [ + FakePkg("app", "1.0.0", requires=["lib-2"]), + FakePkg("lib", "1.0.0"), + FakePkg("lib", "2.0.0"), + ] + via_from_rez = [pyrer.PackageData.from_rez(p) for p in fakes] + + via_from_strings = [ + pyrer.PackageData.from_strings("app", "1.0.0", ["lib-2"]), + pyrer.PackageData.from_strings("lib", "1.0.0"), + pyrer.PackageData.from_strings("lib", "2.0.0"), + ] + + result_a = pyrer.solve(["app"], via_from_rez) + result_b = pyrer.solve(["app"], via_from_strings) + assert result_a.resolved == result_b.resolved + assert result_a.status == result_b.status == "solved" + + +def test_from_strings_rejects_non_string_requires(): + """from_strings is the contract-strict fast path — pass it a non-string + in `requires` and it raises rather than silently stringifying. Use + `from_rez` (or pre-stringify) for object inputs.""" + import pytest + + class NotAString: + def __str__(self): + return "python-3" + + with pytest.raises(TypeError): + pyrer.PackageData.from_strings("foo", "1.0", [NotAString()]) + + # --------------------------------------------------------------------------- # variant_select_mode — rez's intersection_priority vs version_priority # ---------------------------------------------------------------------------