Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
185 changes: 185 additions & 0 deletions compute/validation/universe_drift.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
"""Universe drift diagnostics — Phase 4.6 honest re-validation scaffolding.

Phase 4.6 task #2 first unit. Closes the gap between PR #276 (writer
wiring populates ``Metadata.universe_membership_as_of`` going forward)
and the eventual full historical re-validation of pillars +
``manipulation_index`` per Research Report v1.0 §7.4.

This module answers the foundational question every honest re-validation
must answer FIRST: **what's the universe drift between today and any
historical as-of date?** Without this answer, IC / PBO / DSR re-runs are
just numbers — you can't tell if the delta vs the published baseline is
from (a) survivorship bias correction, (b) scoring drift, or (c) real
factor decay.

The module is pure-functional: takes a date and a current-universe set,
returns a ``UniverseDriftReport``. No I/O beyond reading the historical
CSV (handled by ``compute.ingest.historical_universe``).

Future PRs (out of this scope) will layer on:
- IC time-series per pillar at historical as-of dates
- PBO / DSR re-runs using the universe_provider kwarg landed in PR #275
- Honest baseline comparison vs published Phase 4.5f numbers

API:
>>> from datetime import date
>>> from compute.validation.universe_drift import compute_universe_drift
>>> current = frozenset({"AAPL", "MSFT", "TSLA", "SMCI"})
>>> report = compute_universe_drift(date(2023, 1, 1), current)
>>> report.added_since # tickers added between 2023-01-01 and today
frozenset({"SMCI"})
>>> report.removed_since # tickers removed between 2023-01-01 and today
frozenset() # (depends on CSV coverage)
>>> report.universe_size_at_as_of
3
>>> report.is_complete
True
"""

from __future__ import annotations

import logging
from dataclasses import dataclass
from datetime import date as _date

from compute.ingest.historical_universe import MembershipResult, members_at

logger = logging.getLogger(__name__)


@dataclass(frozen=True)
class UniverseDriftReport:
"""Result of ``compute_universe_drift(as_of_date, current_universe)``.

Attributes:
as_of_date: the historical date queried.
anchor_date: the date the current_universe snapshot was taken
(typically today).
current_size: ``len(current_universe)`` for downstream sanity.
universe_size_at_as_of: ``len(historical_universe)`` per the
CSV reversal walk.
added_since: tickers in CURRENT that were NOT in historical
(i.e., ADDED to S&P 500 between as_of_date and today).
removed_since: tickers in HISTORICAL that are NOT in current
(i.e., REMOVED from S&P 500 between as_of_date and today —
the survivorship-bias-corrected cohort that current-only
views silently exclude).
unchanged: tickers in BOTH (the always-in cohort over the
window).
is_complete: passes through ``MembershipResult.is_complete``
from the underlying members_at() call.
events_applied: number of reverse events the underlying walk
applied.
note: passes through ``MembershipResult.note``.
"""

as_of_date: _date
anchor_date: _date
current_size: int
universe_size_at_as_of: int
added_since: frozenset[str]
removed_since: frozenset[str]
unchanged: frozenset[str]
is_complete: bool
events_applied: int
note: str


def compute_universe_drift(
as_of_date: _date,
current_universe: set[str] | frozenset[str],
anchor_date: _date | None = None,
) -> UniverseDriftReport:
"""Return the add/remove drift between today and ``as_of_date``.

Wraps ``compute.ingest.historical_universe.members_at()`` and
produces the symmetric-difference partitions that downstream
honest-re-validation work consumes.

Args:
as_of_date: the historical date to query.
current_universe: today's S&P 500 anchor set.
anchor_date: defaults to None → members_at uses today UTC.

Returns:
UniverseDriftReport with added / removed / unchanged sets +
size + completeness flag.

Raises:
ValueError: if ``as_of_date`` is in the future (propagates
from members_at).
"""
membership: MembershipResult = members_at(
as_of_date=as_of_date,
current_universe=current_universe,
anchor_date=anchor_date,
)

current_set = frozenset(current_universe)
historical_set = membership.tickers

added = current_set - historical_set
removed = historical_set - current_set
unchanged = current_set & historical_set

if not membership.is_complete:
logger.warning(
"compute_universe_drift: degraded report for as_of=%s — "
"is_complete=False, note=%r. Added/removed sets reflect "
"current-only fallback, NOT honest historical reversal.",
as_of_date,
membership.note,
)

return UniverseDriftReport(
as_of_date=as_of_date,
anchor_date=membership.anchor_date,
current_size=len(current_set),
universe_size_at_as_of=len(historical_set),
added_since=added,
removed_since=removed,
unchanged=unchanged,
is_complete=membership.is_complete,
events_applied=membership.events_applied,
note=membership.note,
)


def format_drift_report(report: UniverseDriftReport, *, max_listed: int = 20) -> str:
"""Render a UniverseDriftReport as a human-readable text block.

Used by the CLI in ``scripts/historical_pillar_revalidate.py``.
Caps listed tickers at ``max_listed`` per category to keep output
scannable in terminal.
"""
lines = []
lines.append(f"Universe drift report — as_of={report.as_of_date}")
lines.append(f" anchor date : {report.anchor_date}")
lines.append(f" current universe : {report.current_size} tickers")
lines.append(f" historical universe : {report.universe_size_at_as_of} tickers")
lines.append(f" events applied : {report.events_applied}")
lines.append(f" is_complete : {report.is_complete}")
if report.note:
lines.append(f" note : {report.note}")
lines.append("")
lines.append(f" ADDED since as_of : {len(report.added_since)} tickers")
if report.added_since:
sample = sorted(report.added_since)[:max_listed]
extra = "" if len(sample) == len(report.added_since) else f" (+{len(report.added_since) - len(sample)} more)"
lines.append(f" {', '.join(sample)}{extra}")
lines.append(f" REMOVED since as_of : {len(report.removed_since)} tickers")
if report.removed_since:
sample = sorted(report.removed_since)[:max_listed]
extra = "" if len(sample) == len(report.removed_since) else f" (+{len(report.removed_since) - len(sample)} more)"
lines.append(f" {', '.join(sample)}{extra}")
lines.append(" ↑ this is the SURVIVORSHIP-BIAS-CORRECTED cohort —")
lines.append(" current-universe-only views silently EXCLUDE these")
lines.append(f" UNCHANGED : {len(report.unchanged)} tickers (always-in cohort)")
return "\n".join(lines)


__all__ = [
"UniverseDriftReport",
"compute_universe_drift",
"format_drift_report",
]
92 changes: 92 additions & 0 deletions docs/research/historical-revalidation-harness.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Historical Re-validation Harness

**Status**: Phase 4.6 first unit (scaffolding) — shipped 2026-05-27. Future PRs layer IC / PBO / DSR baselines on top.

## What this harness IS

A pure-function module + CLI that answers the first foundational question of honest re-validation: **what's the universe drift between today and any historical as-of date?**

- `compute/validation/universe_drift.py` — `compute_universe_drift(as_of_date, current_universe) -> UniverseDriftReport` returning the 3-way partition (`added_since`, `removed_since`, `unchanged`)
- `scripts/historical_pillar_revalidate.py` — CLI wrapper that prints the report (text or JSON)
- 11 tests in `tests/test_validation/test_universe_drift.py`

## What this harness is NOT (yet)

This first unit ships the **universe-drift** half of honest re-validation. The other half — per-pillar IC / PBO / DSR baselines at historical dates — depends on:

1. **Git-archived `rankings.json` snapshots** — daily cron commits exist (`chore: update rankings YYYY-MM-DD`), but a separate fetcher needs to checkout historical SHAs + load composite_score time series per ticker
2. **Forward realized returns** — requires the gitignored `compute/cache/prices/` cache (5Y daily OHLCV per stock)
3. **Pillar computation at historical dates** — requires `compute/scoring/pillars.py` pure functions to run on historical fundamentals (already pure; needs a wrapper that loads the right historical snapshot)

Each of those is a separate PR sized at 1-2 days each. This PR ships the first leg cleanly.

## How to use the CLI

```bash
# Real universe (fetches Wikipedia)
python -m scripts.historical_pillar_revalidate --as-of 2024-06-01

# Smoke mode (offline; uses 7-ticker hardcoded universe)
python -m scripts.historical_pillar_revalidate --as-of 2023-06-01 --no-fetch-universe

# JSON output for downstream tooling
python -m scripts.historical_pillar_revalidate --as-of 2023-06-01 --json

# Pre-coverage degraded mode (exit code 1, loud warning)
python -m scripts.historical_pillar_revalidate --as-of 2010-01-01
```

### Sample output (smoke mode, 2023-06-01)

```
Universe drift report — as_of=2023-06-01
anchor date : 2026-05-27
current universe : 7 tickers
historical universe : 15 tickers
events applied : 19
is_complete : True
note : reversed 19 post-as_of events

ADDED since as_of : 1 tickers
SMCI
REMOVED since as_of : 9 tickers
AAP, ATVI, BIO, BLL, DISH, ETSY, LNC, WHR, ZION
↑ this is the SURVIVORSHIP-BIAS-CORRECTED cohort —
current-universe-only views silently EXCLUDE these
UNCHANGED : 6 tickers (always-in cohort)
```

The 9 REMOVED tickers (AAP/ATVI/BIO/BLL/DISH/ETSY/LNC/WHR/ZION) are exactly the cohort an honest backtest at as-of 2023-06-01 must include. A current-universe-only view silently excludes them → systematically biased Sharpe / IC estimates per Hou-Xue-Zhang (2020) RFS.

## Methodology anchors

- **Hou, Xue, Zhang (2020)**. "Replicating Anomalies." *Review of Financial Studies* 33(5):2019-2133.
- **McLean, Pontiff (2016)**. "Does Academic Research Destroy Stock Return Predictability?" *Journal of Finance* 71(1):5-32.
- License posture: `data/sp500_membership_historical.csv` is uncopyrightable factual data per Feist v. Rural Tel. Service Co. (1991).

## Acceptance criteria for next PRs in the chain

- [ ] Per-pillar IC re-baseline (load `rankings.json` history via git, compute IC against realized 6/12-month returns from `compute/cache/prices/`)
- [ ] Per-pillar PBO/DSR re-run using PR #275's `universe_provider` kwarg
- [ ] `manipulation_index` distribution comparison: forward (current universe) vs honest (historical-universe-corrected at quarterly anchor dates)
- [ ] Honest-correction report `docs/research/honest-baseline-2026-05-27.md` documenting the delta vs Phase 4.5f published numbers (expected DOWN 5-15% per Research Report v1.0 §1.1 decay budget)

## Caveats

- **CSV coverage starts 2020-01-01** — pre-coverage queries return `is_complete=False` and degrade to current-universe fallback. Subsequent PRs may extend coverage backward.
- **Only ADD/REMOVE events** — RENAME events (e.g., FB→META) don't affect membership and are excluded from drift analysis.
- **Drift report is symmetric-difference only** — corporate actions like stock splits, dividends, and sector reclassifications are out of scope.
- **No price/return data here** — `removed_since` is the cohort of tickers that EXISTED in the historical universe but no longer trade. Computing their realized returns from delisting requires CRSP or equivalent (paid) — this harness identifies the cohort, doesn't price it.

## Future-work TODO list

| # | Item | Effort | Blocker |
|---|---|---|---|
| 1 | Git-archived `rankings.json` time-series loader | 1d | — |
| 2 | Forward-return computation per ticker from `compute/cache/prices/` | 0.5d | gitignored cache; needs warm CI run |
| 3 | Per-pillar IC at historical dates | 1d | needs #1 + #2 |
| 4 | PBO/DSR re-baseline via `factor_passes_gates(universe_provider=members_at, ...)` | 1d | needs #3 |
| 5 | `manipulation_index` distribution shift report | 0.5d | needs #1 |
| 6 | `docs/research/honest-baseline-2026-05-27.md` with revised PBO/DSR numbers | 0.5d | needs #4 |

**Total to honest-baseline report**: ~4-5 days focused dev across a sequence of PRs.
120 changes: 120 additions & 0 deletions scripts/historical_pillar_revalidate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
"""Historical pillar revalidation CLI — Phase 4.6 honest baseline scaffolding.

Phase 4.6 task #2 first unit. Quick CLI that reports the universe drift
between today and a historical as-of date. Future revisions layer on
per-pillar IC / PBO / DSR baselines pulled from git-archived
``rankings.json`` snapshots.

Usage:
python -m scripts.historical_pillar_revalidate --as-of 2024-06-01
python -m scripts.historical_pillar_revalidate --as-of 2023-01-01 --json
python -m scripts.historical_pillar_revalidate --as-of 2020-01-01

Exit codes:
0 — drift report produced cleanly
1 — degraded (is_complete=False) — operator must investigate
2 — usage / argument error
"""

from __future__ import annotations

import argparse
import json
import logging
import sys
from datetime import date as _date

from compute.ingest.universe import get_sp500_constituents
from compute.validation.universe_drift import (
compute_universe_drift,
format_drift_report,
)

logger = logging.getLogger(__name__)


def _parse_iso_date(s: str) -> _date:
try:
return _date.fromisoformat(s)
except ValueError as exc:
raise argparse.ArgumentTypeError(
f"--as-of must be ISO YYYY-MM-DD, got {s!r}"
) from exc


def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(
prog="historical_pillar_revalidate",
description=(
"Phase 4.6 universe-drift report for honest re-validation. "
"Compares today's S&P 500 to the constituent set at any "
"historical as-of date (per the CSV in data/sp500_membership_historical.csv)."
),
)
parser.add_argument(
"--as-of",
required=True,
type=_parse_iso_date,
help="ISO date (YYYY-MM-DD) to query historical membership at.",
)
parser.add_argument(
"--json",
action="store_true",
help="Emit report as JSON to stdout (default: human-readable text).",
)
parser.add_argument(
"--no-fetch-universe",
action="store_true",
help=(
"Skip the Wikipedia fetch and use a tiny hardcoded current-"
"universe (CI-friendly + smoke-testable without network). "
"Drift counts will be artificially small."
),
)
args = parser.parse_args(argv)

logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s %(name)s: %(message)s",
)

if args.no_fetch_universe:
current_universe = frozenset({"AAPL", "MSFT", "NVDA", "TSLA", "SMCI", "META", "GOOGL"})
logger.info(
"Using hardcoded 7-ticker current-universe (smoke mode); pass --as-of older "
"than 2020-01-01 to see degraded-mode handling."
)
else:
try:
df = get_sp500_constituents()
except Exception as exc: # noqa: BLE001 — graceful degradation
logger.error("Universe fetch failed (%s). Re-run with --no-fetch-universe for offline.", type(exc).__name__)
return 2
current_universe = frozenset(df["ticker"].tolist())

report = compute_universe_drift(args.as_of, current_universe)

if args.json:
out = {
"as_of_date": report.as_of_date.isoformat(),
"anchor_date": report.anchor_date.isoformat(),
"current_size": report.current_size,
"universe_size_at_as_of": report.universe_size_at_as_of,
"added_since_count": len(report.added_since),
"removed_since_count": len(report.removed_since),
"unchanged_count": len(report.unchanged),
"added_since": sorted(report.added_since),
"removed_since": sorted(report.removed_since),
"is_complete": report.is_complete,
"events_applied": report.events_applied,
"note": report.note,
}
print(json.dumps(out, indent=2))
else:
print(format_drift_report(report))

return 0 if report.is_complete else 1


if __name__ == "__main__":
sys.exit(main())
Loading