dackclup · dackclup · May 27, 2026 · May 27, 2026
diff --git a/compute/validation/universe_drift.py b/compute/validation/universe_drift.py
@@ -0,0 +1,185 @@
+"""Universe drift diagnostics — Phase 4.6 honest re-validation scaffolding.
+
+Phase 4.6 task #2 first unit. Closes the gap between PR #276 (writer
+wiring populates ``Metadata.universe_membership_as_of`` going forward)
+and the eventual full historical re-validation of pillars +
+``manipulation_index`` per Research Report v1.0 §7.4.
+
+This module answers the foundational question every honest re-validation
+must answer FIRST: **what's the universe drift between today and any
+historical as-of date?** Without this answer, IC / PBO / DSR re-runs are
+just numbers — you can't tell if the delta vs the published baseline is
+from (a) survivorship bias correction, (b) scoring drift, or (c) real
+factor decay.
+
+The module is pure-functional: takes a date and a current-universe set,
+returns a ``UniverseDriftReport``. No I/O beyond reading the historical
+CSV (handled by ``compute.ingest.historical_universe``).
+
+Future PRs (out of this scope) will layer on:
+- IC time-series per pillar at historical as-of dates
+- PBO / DSR re-runs using the universe_provider kwarg landed in PR #275
+- Honest baseline comparison vs published Phase 4.5f numbers
+
+API:
+    >>> from datetime import date
+    >>> from compute.validation.universe_drift import compute_universe_drift
+    >>> current = frozenset({"AAPL", "MSFT", "TSLA", "SMCI"})
+    >>> report = compute_universe_drift(date(2023, 1, 1), current)
+    >>> report.added_since   # tickers added between 2023-01-01 and today
+    frozenset({"SMCI"})
+    >>> report.removed_since  # tickers removed between 2023-01-01 and today
+    frozenset()  # (depends on CSV coverage)
+    >>> report.universe_size_at_as_of
+    3
+    >>> report.is_complete
+    True
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from datetime import date as _date
+
+from compute.ingest.historical_universe import MembershipResult, members_at
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass(frozen=True)
+class UniverseDriftReport:
+    """Result of ``compute_universe_drift(as_of_date, current_universe)``.
+
+    Attributes:
+        as_of_date: the historical date queried.
+        anchor_date: the date the current_universe snapshot was taken
+            (typically today).
+        current_size: ``len(current_universe)`` for downstream sanity.
+        universe_size_at_as_of: ``len(historical_universe)`` per the
+            CSV reversal walk.
+        added_since: tickers in CURRENT that were NOT in historical
+            (i.e., ADDED to S&P 500 between as_of_date and today).
+        removed_since: tickers in HISTORICAL that are NOT in current
+            (i.e., REMOVED from S&P 500 between as_of_date and today —
+            the survivorship-bias-corrected cohort that current-only
+            views silently exclude).
+        unchanged: tickers in BOTH (the always-in cohort over the
+            window).
+        is_complete: passes through ``MembershipResult.is_complete``
+            from the underlying members_at() call.
+        events_applied: number of reverse events the underlying walk
+            applied.
+        note: passes through ``MembershipResult.note``.
+    """
+
+    as_of_date: _date
+    anchor_date: _date
+    current_size: int
+    universe_size_at_as_of: int
+    added_since: frozenset[str]
+    removed_since: frozenset[str]
+    unchanged: frozenset[str]
+    is_complete: bool
+    events_applied: int
+    note: str
+
+
+def compute_universe_drift(
+    as_of_date: _date,
+    current_universe: set[str] | frozenset[str],
+    anchor_date: _date | None = None,
+) -> UniverseDriftReport:
+    """Return the add/remove drift between today and ``as_of_date``.
+
+    Wraps ``compute.ingest.historical_universe.members_at()`` and
+    produces the symmetric-difference partitions that downstream
+    honest-re-validation work consumes.
+
+    Args:
+        as_of_date: the historical date to query.
+        current_universe: today's S&P 500 anchor set.
+        anchor_date: defaults to None → members_at uses today UTC.
+
+    Returns:
+        UniverseDriftReport with added / removed / unchanged sets +
+        size + completeness flag.
+
+    Raises:
+        ValueError: if ``as_of_date`` is in the future (propagates
+            from members_at).
+    """
+    membership: MembershipResult = members_at(
+        as_of_date=as_of_date,
+        current_universe=current_universe,
+        anchor_date=anchor_date,
+    )
+
+    current_set = frozenset(current_universe)
+    historical_set = membership.tickers
+
+    added = current_set - historical_set
+    removed = historical_set - current_set
+    unchanged = current_set & historical_set
+
+    if not membership.is_complete:
+        logger.warning(
+            "compute_universe_drift: degraded report for as_of=%s — "
+            "is_complete=False, note=%r. Added/removed sets reflect "
+            "current-only fallback, NOT honest historical reversal.",
+            as_of_date,
+            membership.note,
+        )
+
+    return UniverseDriftReport(
+        as_of_date=as_of_date,
+        anchor_date=membership.anchor_date,
+        current_size=len(current_set),
+        universe_size_at_as_of=len(historical_set),
+        added_since=added,
+        removed_since=removed,
+        unchanged=unchanged,
+        is_complete=membership.is_complete,
+        events_applied=membership.events_applied,
+        note=membership.note,
+    )
+
+
+def format_drift_report(report: UniverseDriftReport, *, max_listed: int = 20) -> str:
+    """Render a UniverseDriftReport as a human-readable text block.
+
+    Used by the CLI in ``scripts/historical_pillar_revalidate.py``.
+    Caps listed tickers at ``max_listed`` per category to keep output
+    scannable in terminal.
+    """
+    lines = []
+    lines.append(f"Universe drift report — as_of={report.as_of_date}")
+    lines.append(f"  anchor date         : {report.anchor_date}")
+    lines.append(f"  current universe    : {report.current_size} tickers")
+    lines.append(f"  historical universe : {report.universe_size_at_as_of} tickers")
+    lines.append(f"  events applied      : {report.events_applied}")
+    lines.append(f"  is_complete         : {report.is_complete}")
+    if report.note:
+        lines.append(f"  note                : {report.note}")
+    lines.append("")
+    lines.append(f"  ADDED since as_of   : {len(report.added_since)} tickers")
+    if report.added_since:
+        sample = sorted(report.added_since)[:max_listed]
+        extra = "" if len(sample) == len(report.added_since) else f" (+{len(report.added_since) - len(sample)} more)"
+        lines.append(f"    {', '.join(sample)}{extra}")
+    lines.append(f"  REMOVED since as_of : {len(report.removed_since)} tickers")
+    if report.removed_since:
+        sample = sorted(report.removed_since)[:max_listed]
+        extra = "" if len(sample) == len(report.removed_since) else f" (+{len(report.removed_since) - len(sample)} more)"
+        lines.append(f"    {', '.join(sample)}{extra}")
+        lines.append("    ↑ this is the SURVIVORSHIP-BIAS-CORRECTED cohort —")
+        lines.append("      current-universe-only views silently EXCLUDE these")
+    lines.append(f"  UNCHANGED           : {len(report.unchanged)} tickers (always-in cohort)")
+    return "\n".join(lines)
+
+
+__all__ = [
+    "UniverseDriftReport",
+    "compute_universe_drift",
+    "format_drift_report",
+]
diff --git a/docs/research/historical-revalidation-harness.md b/docs/research/historical-revalidation-harness.md
@@ -0,0 +1,92 @@
+# Historical Re-validation Harness
+
+**Status**: Phase 4.6 first unit (scaffolding) — shipped 2026-05-27. Future PRs layer IC / PBO / DSR baselines on top.
+
+## What this harness IS
+
+A pure-function module + CLI that answers the first foundational question of honest re-validation: **what's the universe drift between today and any historical as-of date?**
+
+- `compute/validation/universe_drift.py` — `compute_universe_drift(as_of_date, current_universe) -> UniverseDriftReport` returning the 3-way partition (`added_since`, `removed_since`, `unchanged`)
+- `scripts/historical_pillar_revalidate.py` — CLI wrapper that prints the report (text or JSON)
+- 11 tests in `tests/test_validation/test_universe_drift.py`
+
+## What this harness is NOT (yet)
+
+This first unit ships the **universe-drift** half of honest re-validation. The other half — per-pillar IC / PBO / DSR baselines at historical dates — depends on:
+
+1. **Git-archived `rankings.json` snapshots** — daily cron commits exist (`chore: update rankings YYYY-MM-DD`), but a separate fetcher needs to checkout historical SHAs + load composite_score time series per ticker
+2. **Forward realized returns** — requires the gitignored `compute/cache/prices/` cache (5Y daily OHLCV per stock)
+3. **Pillar computation at historical dates** — requires `compute/scoring/pillars.py` pure functions to run on historical fundamentals (already pure; needs a wrapper that loads the right historical snapshot)
+
+Each of those is a separate PR sized at 1-2 days each. This PR ships the first leg cleanly.
+
+## How to use the CLI
+
+```bash
+# Real universe (fetches Wikipedia)
+python -m scripts.historical_pillar_revalidate --as-of 2024-06-01
+
+# Smoke mode (offline; uses 7-ticker hardcoded universe)
+python -m scripts.historical_pillar_revalidate --as-of 2023-06-01 --no-fetch-universe
+
+# JSON output for downstream tooling
+python -m scripts.historical_pillar_revalidate --as-of 2023-06-01 --json
+
+# Pre-coverage degraded mode (exit code 1, loud warning)
+python -m scripts.historical_pillar_revalidate --as-of 2010-01-01
+```
+
+### Sample output (smoke mode, 2023-06-01)
+
+```
+Universe drift report — as_of=2023-06-01
+  anchor date         : 2026-05-27
+  current universe    : 7 tickers
+  historical universe : 15 tickers
+  events applied      : 19
+  is_complete         : True
+  note                : reversed 19 post-as_of events
+
+  ADDED since as_of   : 1 tickers
+    SMCI
+  REMOVED since as_of : 9 tickers
+    AAP, ATVI, BIO, BLL, DISH, ETSY, LNC, WHR, ZION
+    ↑ this is the SURVIVORSHIP-BIAS-CORRECTED cohort —
+      current-universe-only views silently EXCLUDE these
+  UNCHANGED           : 6 tickers (always-in cohort)
+```
+
+The 9 REMOVED tickers (AAP/ATVI/BIO/BLL/DISH/ETSY/LNC/WHR/ZION) are exactly the cohort an honest backtest at as-of 2023-06-01 must include. A current-universe-only view silently excludes them → systematically biased Sharpe / IC estimates per Hou-Xue-Zhang (2020) RFS.
+
+## Methodology anchors
+
+- **Hou, Xue, Zhang (2020)**. "Replicating Anomalies." *Review of Financial Studies* 33(5):2019-2133.
+- **McLean, Pontiff (2016)**. "Does Academic Research Destroy Stock Return Predictability?" *Journal of Finance* 71(1):5-32.
+- License posture: `data/sp500_membership_historical.csv` is uncopyrightable factual data per Feist v. Rural Tel. Service Co. (1991).
+
+## Acceptance criteria for next PRs in the chain
+
+- [ ] Per-pillar IC re-baseline (load `rankings.json` history via git, compute IC against realized 6/12-month returns from `compute/cache/prices/`)
+- [ ] Per-pillar PBO/DSR re-run using PR #275's `universe_provider` kwarg
+- [ ] `manipulation_index` distribution comparison: forward (current universe) vs honest (historical-universe-corrected at quarterly anchor dates)
+- [ ] Honest-correction report `docs/research/honest-baseline-2026-05-27.md` documenting the delta vs Phase 4.5f published numbers (expected DOWN 5-15% per Research Report v1.0 §1.1 decay budget)
+
+## Caveats
+
+- **CSV coverage starts 2020-01-01** — pre-coverage queries return `is_complete=False` and degrade to current-universe fallback. Subsequent PRs may extend coverage backward.
+- **Only ADD/REMOVE events** — RENAME events (e.g., FB→META) don't affect membership and are excluded from drift analysis.
+- **Drift report is symmetric-difference only** — corporate actions like stock splits, dividends, and sector reclassifications are out of scope.
+- **No price/return data here** — `removed_since` is the cohort of tickers that EXISTED in the historical universe but no longer trade. Computing their realized returns from delisting requires CRSP or equivalent (paid) — this harness identifies the cohort, doesn't price it.
+
+## Future-work TODO list
+
+| # | Item | Effort | Blocker |
+|---|---|---|---|
+| 1 | Git-archived `rankings.json` time-series loader | 1d | — |
+| 2 | Forward-return computation per ticker from `compute/cache/prices/` | 0.5d | gitignored cache; needs warm CI run |
+| 3 | Per-pillar IC at historical dates | 1d | needs #1 + #2 |
+| 4 | PBO/DSR re-baseline via `factor_passes_gates(universe_provider=members_at, ...)` | 1d | needs #3 |
+| 5 | `manipulation_index` distribution shift report | 0.5d | needs #1 |
+| 6 | `docs/research/honest-baseline-2026-05-27.md` with revised PBO/DSR numbers | 0.5d | needs #4 |
+
+**Total to honest-baseline report**: ~4-5 days focused dev across a sequence of PRs.
diff --git a/scripts/historical_pillar_revalidate.py b/scripts/historical_pillar_revalidate.py
@@ -0,0 +1,120 @@
+"""Historical pillar revalidation CLI — Phase 4.6 honest baseline scaffolding.
+
+Phase 4.6 task #2 first unit. Quick CLI that reports the universe drift
+between today and a historical as-of date. Future revisions layer on
+per-pillar IC / PBO / DSR baselines pulled from git-archived
+``rankings.json`` snapshots.
+
+Usage:
+    python -m scripts.historical_pillar_revalidate --as-of 2024-06-01
+    python -m scripts.historical_pillar_revalidate --as-of 2023-01-01 --json
+    python -m scripts.historical_pillar_revalidate --as-of 2020-01-01
+
+Exit codes:
+    0  — drift report produced cleanly
+    1  — degraded (is_complete=False) — operator must investigate
+    2  — usage / argument error
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import logging
+import sys
+from datetime import date as _date
+
+from compute.ingest.universe import get_sp500_constituents
+from compute.validation.universe_drift import (
+    compute_universe_drift,
+    format_drift_report,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def _parse_iso_date(s: str) -> _date:
+    try:
+        return _date.fromisoformat(s)
+    except ValueError as exc:
+        raise argparse.ArgumentTypeError(
+            f"--as-of must be ISO YYYY-MM-DD, got {s!r}"
+        ) from exc
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(
+        prog="historical_pillar_revalidate",
+        description=(
+            "Phase 4.6 universe-drift report for honest re-validation. "
+            "Compares today's S&P 500 to the constituent set at any "
+            "historical as-of date (per the CSV in data/sp500_membership_historical.csv)."
+        ),
+    )
+    parser.add_argument(
+        "--as-of",
+        required=True,
+        type=_parse_iso_date,
+        help="ISO date (YYYY-MM-DD) to query historical membership at.",
+    )
+    parser.add_argument(
+        "--json",
+        action="store_true",
+        help="Emit report as JSON to stdout (default: human-readable text).",
+    )
+    parser.add_argument(
+        "--no-fetch-universe",
+        action="store_true",
+        help=(
+            "Skip the Wikipedia fetch and use a tiny hardcoded current-"
+            "universe (CI-friendly + smoke-testable without network). "
+            "Drift counts will be artificially small."
+        ),
+    )
+    args = parser.parse_args(argv)
+
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s %(levelname)s %(name)s: %(message)s",
+    )
+
+    if args.no_fetch_universe:
+        current_universe = frozenset({"AAPL", "MSFT", "NVDA", "TSLA", "SMCI", "META", "GOOGL"})
+        logger.info(
+            "Using hardcoded 7-ticker current-universe (smoke mode); pass --as-of older "
+            "than 2020-01-01 to see degraded-mode handling."
+        )
+    else:
+        try:
+            df = get_sp500_constituents()
+        except Exception as exc:  # noqa: BLE001 — graceful degradation
+            logger.error("Universe fetch failed (%s). Re-run with --no-fetch-universe for offline.", type(exc).__name__)
+            return 2
+        current_universe = frozenset(df["ticker"].tolist())
+
+    report = compute_universe_drift(args.as_of, current_universe)
+
+    if args.json:
+        out = {
+            "as_of_date": report.as_of_date.isoformat(),
+            "anchor_date": report.anchor_date.isoformat(),
+            "current_size": report.current_size,
+            "universe_size_at_as_of": report.universe_size_at_as_of,
+            "added_since_count": len(report.added_since),
+            "removed_since_count": len(report.removed_since),
+            "unchanged_count": len(report.unchanged),
+            "added_since": sorted(report.added_since),
+            "removed_since": sorted(report.removed_since),
+            "is_complete": report.is_complete,
+            "events_applied": report.events_applied,
+            "note": report.note,
+        }
+        print(json.dumps(out, indent=2))
+    else:
+        print(format_drift_report(report))
+
+    return 0 if report.is_complete else 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())