From 248cb53693474d859138946344f3d27e18e4377b Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 27 May 2026 12:34:58 +0000 Subject: [PATCH] =?UTF-8?q?feat(main):=20Phase=204.6=20writer=20wiring=20?= =?UTF-8?q?=E2=80=94=20populate=20universe-provenance=20Metadata=20in=20fo?= =?UTF-8?q?rward=20cron?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the last leg of the Phase 4.6 chain. PR #274 landed the `historical_universe.members_at()` module + 2 nullable Metadata fields. PR #275 wired `universe_provider` into `pbo_dsr.factor_passes_gates()` so validation gates carry honest provenance. This PR makes the forward-cron `metadata.json` output ACTUALLY populate those fields instead of leaving them None. ## What changed - `compute/main.py` — `Metadata(...)` construction now passes: - `universe_membership_as_of=now.date().isoformat()` (today's date — forward cron scores as-of today) - `survivorship_bias_corrected=True` (today's S&P 500 IS the honest universe for an as-of-today query, per the PR #274 schema docstring semantic — True means "this output's universe assumption is honest for its as_of_date") - `tests/test_output/test_writer.py` — 2 new round-trip tests: - Phase 4.6 happy path: both fields survive Pydantic → JSON - Legacy snapshot back-compat: when neither field is passed (pre-0.10.7 caller pattern), Pydantic defaults to None and JSON writes nulls ## Hard rules preserved - ✅ Rule 9 (schema triple) — no schema change in this PR (fields already in schemas.py + types.ts + snapshot from PR #274) - ✅ Rule 16 — N/A (no scoring change) - ✅ Rule 18 — observability surface from PR #274 is now actually populated; consumers can branch on it - ✅ No new deps - ✅ No new env-vars ## Verification - `ruff check compute/main.py tests/test_output/test_writer.py` — clean - `python -m compute.output.schema_check` — Schema snapshot in sync - `python -m pytest tests/test_output/test_writer.py -k metadata` — 4 passed (2 existing + 2 new) ## What goes live on next cron Next weekday cron (Wed 2026-05-28 22:00 UTC) writes: metadata.json: ... universe_membership_as_of: "2026-05-28" survivorship_bias_corrected: true Backward compat: legacy snapshots (pre-0.10.7) still have these fields as null per the Pydantic optional default. ## Closes the Phase 4.6 chain | Layer | PR | Status | |---|---|---| | Module | #274 | members_at() + CSV + tests | | Schema | #274 | Metadata fields + types.ts + snapshot | | Validation gate | #275 | universe_provider kwarg in pbo_dsr | | **Writer** | **this PR** | **forward cron populates Metadata** | ## NOT in this PR (next follow-ups) - Honest re-validation of existing pillars + manipulation_index with historical universe (likely shifts PBO/DSR baselines DOWN 5-15%) - Verify-helper Section M for universe-provenance accounting equation - Backtest harness that consumes the new universe_provider end-to-end --- compute/main.py | 10 ++++++++ tests/test_output/test_writer.py | 41 ++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/compute/main.py b/compute/main.py index 6a3638800..613fb7508 100644 --- a/compute/main.py +++ b/compute/main.py @@ -1952,6 +1952,16 @@ def _fetch_one_form4(ticker: str) -> tuple[dict, float, bool]: next_update_utc=_iso(now + timedelta(days=_next_business_day_offset(now))), universe=config.UNIVERSE, universe_size=len(summaries), + # Phase 4.6 (0.10.7-phase4.6) — survivorship-bias provenance per + # Research Report v1.0 §7.4. Forward cron's as-of is today, and + # the universe we just scored IS today's current S&P 500 — so + # the lookup is honest by definition. survivorship_bias_corrected + # = True signals "this output's universe assumption is honest for + # its as_of_date" (vs False = historical query that fell back to + # current). Backtest / validation callers populate these from + # ``compute.ingest.historical_universe.members_at()`` directly. + universe_membership_as_of=now.date().isoformat(), + survivorship_bias_corrected=True, compute_run_id=os.environ.get("GITHUB_RUN_ID", "local"), git_commit=(os.environ.get("GITHUB_SHA") or "unknown")[:40], mos_trailing_ic_smoke=mos_ic, diff --git a/tests/test_output/test_writer.py b/tests/test_output/test_writer.py index b147c2ddf..a77ee3cf9 100644 --- a/tests/test_output/test_writer.py +++ b/tests/test_output/test_writer.py @@ -101,6 +101,47 @@ def test_write_metadata_json_tier2_disabled_round_trip(tmp_path): assert payload["tier2_enabled"] is False +def test_write_metadata_json_universe_provenance_round_trip(tmp_path): + """Phase 4.6 — `universe_membership_as_of` + `survivorship_bias_corrected` + survive the Pydantic → JSON round trip and stay accessible to the + verify-helper + downstream backtest consumers.""" + meta = Metadata( + version="0.10.7-phase4.6", + last_update_utc="2026-05-27T22:00:00Z", + next_update_utc="2026-05-28T22:00:00Z", + universe="SP500", + universe_size=502, + compute_run_id="run-789", + git_commit="abc789", + universe_membership_as_of="2026-05-27", + survivorship_bias_corrected=True, + ) + out = write_metadata_json(meta, tmp_path) + payload = json.loads(out.read_text()) + assert payload["universe_membership_as_of"] == "2026-05-27" + assert payload["survivorship_bias_corrected"] is True + + +def test_write_metadata_json_universe_provenance_legacy_snapshot(tmp_path): + """Pre-0.10.7 metadata.json shapes (no universe-provenance fields) + still serialize cleanly — the two new fields default to None and the + payload contains them as null.""" + meta = Metadata( + version="0.10.6-phase4.5e", + last_update_utc="2026-05-26T22:00:00Z", + next_update_utc="2026-05-27T22:00:00Z", + universe="SP500", + universe_size=502, + compute_run_id="run-legacy", + git_commit="legacy0", + # universe_membership_as_of / survivorship_bias_corrected omitted + ) + out = write_metadata_json(meta, tmp_path) + payload = json.loads(out.read_text()) + assert payload["universe_membership_as_of"] is None + assert payload["survivorship_bias_corrected"] is None + + def test_write_stock_detail_round_trip(tmp_path): detail = StockDetail( ticker="AAPL",