Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion polygon_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ def __init__(self, api_key: str | None = None, calls_per_min: int = 5):
self._call_times: deque[float] = deque()
self._session = requests.Session()
self._session.params = {"apiKey": self._api_key} # type: ignore[assignment]
# Per-process cache for grouped-daily responses. Historical grouped-daily
# data is immutable, and callers (universe_returns) fetch the same
# calendar dates repeatedly across overlapping eval_date windows
# (t0, +5d, +10d, +30d). Dedup'ing here cuts the free-tier 5 calls/min
# rate-limit tax by ~3.5× on backfill runs.
self._grouped_daily_cache: dict[str, dict[str, dict]] = {}

# -- Rate limiter --------------------------------------------------------

Expand Down Expand Up @@ -99,13 +105,18 @@ def get_grouped_daily(self, date_str: str) -> dict[str, dict]:
Returns {ticker: {"open": float, "high": float, "low": float,
"close": float, "volume": float,
"vwap": float | None}}

Responses are cached per-instance (see __init__). Empty results
(non-trading days) are cached too — same URL returns the same answer.
"""
if date_str in self._grouped_daily_cache:
return self._grouped_daily_cache[date_str]
data = self._get(
f"/v2/aggs/grouped/locale/us/market/stocks/{date_str}",
params={"adjusted": "true"},
)
results = data.get("results", [])
return {
parsed = {
r["T"]: {
"open": r["o"],
"high": r["h"],
Expand All @@ -117,6 +128,8 @@ def get_grouped_daily(self, date_str: str) -> dict[str, dict]:
for r in results
if "T" in r
}
self._grouped_daily_cache[date_str] = parsed
return parsed

def get_daily_bars(
self,
Expand Down
74 changes: 74 additions & 0 deletions tests/test_polygon_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""Tests for polygon_client.PolygonClient.

Focus: response caching on get_grouped_daily, which dedup's calendar-date
repeats across overlapping eval_date windows in universe_returns and cuts
the free-tier 5 calls/min rate-limit tax by ~3.5x on backfill runs.
"""

from __future__ import annotations

from unittest.mock import patch

import pytest

from polygon_client import PolygonClient


def _make_client() -> PolygonClient:
return PolygonClient(api_key="test-key", calls_per_min=5)


def _fake_response(tickers: list[tuple[str, float]]) -> dict:
return {
"results": [
{"T": t, "o": 1.0, "h": 2.0, "l": 0.5, "c": close, "v": 1000, "vw": 1.5}
for t, close in tickers
],
"resultsCount": len(tickers),
}


def test_grouped_daily_caches_identical_dates():
client = _make_client()
with patch.object(client, "_get", return_value=_fake_response([("AAPL", 200.0)])) as mock_get:
first = client.get_grouped_daily("2026-01-05")
second = client.get_grouped_daily("2026-01-05")
assert mock_get.call_count == 1
assert first == second
assert first["AAPL"]["close"] == 200.0


def test_grouped_daily_distinct_dates_hit_api():
client = _make_client()
responses = [
_fake_response([("AAPL", 200.0)]),
_fake_response([("AAPL", 201.0)]),
]
with patch.object(client, "_get", side_effect=responses) as mock_get:
a = client.get_grouped_daily("2026-01-05")
b = client.get_grouped_daily("2026-01-06")
assert mock_get.call_count == 2
assert a["AAPL"]["close"] == 200.0
assert b["AAPL"]["close"] == 201.0


def test_grouped_daily_caches_empty_response():
"""Non-trading days return empty dicts — cache them too (same URL, same answer)."""
client = _make_client()
with patch.object(client, "_get", return_value={"results": [], "resultsCount": 0}) as mock_get:
first = client.get_grouped_daily("2026-01-03") # Saturday
second = client.get_grouped_daily("2026-01-03")
assert mock_get.call_count == 1
assert first == {}
assert second == {}


def test_cache_is_per_instance():
c1 = _make_client()
c2 = _make_client()
with patch.object(c1, "_get", return_value=_fake_response([("AAPL", 200.0)])) as m1:
c1.get_grouped_daily("2026-01-05")
with patch.object(c2, "_get", return_value=_fake_response([("AAPL", 201.0)])) as m2:
c2.get_grouped_daily("2026-01-05")
assert m1.call_count == 1
assert m2.call_count == 1
Loading