Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 48 additions & 3 deletions builders/daily_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ def daily_append(
n_ok = 0
n_skip = 0
n_err = 0
n_partial = 0 # short-history tickers: OHLCV-only written, features NaN

for ticker in stock_tickers:
try:
Expand All @@ -197,7 +198,50 @@ def daily_append(
continue

if len(hist) < MIN_ROWS_FOR_FEATURES:
n_skip += 1
# Short-history tickers (new listings, IPOs, spinoffs — e.g.
# SNDK after the 2026 WDC flash-memory spinoff) are a
# first-class supported state, not a skip. Below the feature
# warmup threshold we write an OHLCV-only row with NaN for
# every feature column. Downstream consumers that need
# features (training, inference) see NaN and handle
# accordingly; consumers that only read prices (EOD
# reconcile, attribution) get the authoritative close.
#
# Prior behavior silently skipped the row entirely — no
# OHLCV written, no warning — which made EOD reconcile
# hard-fail on every held short-history ticker. See
# 2026-04-21 SNDK incident.
bar = closes[ticker]
if np.isnan(bar["Close"]):
n_skip += 1
continue

new_row = pd.DataFrame(
[{col: bar.get(col, np.nan) for col in OHLCV_COLS}],
index=pd.DatetimeIndex([today_ts], name="date"),
)
# Align to the stored schema: NaN for every non-OHLCV column
# the library already has for this ticker.
for col in hist.columns:
if col not in new_row.columns:
new_row[col] = np.nan
new_row = new_row[hist.columns]
for col in new_row.columns:
if col in OHLCV_COLS:
if col == "Volume":
new_row[col] = new_row[col].astype("int64")
else:
new_row[col] = new_row[col].astype("float64")
else:
new_row[col] = new_row[col].astype("float32")

log.warning(
"short-history ticker=%s rows=%d min_required=%d "
"— writing OHLCV-only row with NaN features",
ticker, len(hist), MIN_ROWS_FOR_FEATURES,
)
universe_lib.update(ticker, new_row)
n_partial += 1
continue

# Re-running daily_append for the same date MUST overwrite the
Expand Down Expand Up @@ -391,6 +435,7 @@ def daily_append(
"status": "ok",
"date": date_str,
"tickers_appended": n_ok,
"tickers_partial": n_partial,
"tickers_skipped": n_skip,
"tickers_errored": n_err,
"load_seconds": round(t_load, 1),
Expand All @@ -399,9 +444,9 @@ def daily_append(
}

log.info(
"ArcticDB daily_append: stocks n_ok=%d n_skip=%d n_err=%d (of %d) | "
"ArcticDB daily_append: stocks n_ok=%d n_partial=%d n_skip=%d n_err=%d (of %d) | "
"macro_updated=%d sector_updated=%d | %.1fs total",
n_ok, n_skip, n_err, len(stock_tickers),
n_ok, n_partial, n_skip, n_err, len(stock_tickers),
len(macro_updated) if not dry_run else 0,
len(sector_updated) if not dry_run else 0,
t_total,
Expand Down
51 changes: 51 additions & 0 deletions tests/test_daily_append_semantics.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,57 @@ def test_sector_etfs_iterate_explicit_list():
assert 'sector_etfs = ["XLB"' in src or 'sector_etfs = [\n' in src


def test_short_history_writes_ohlcv_not_skipped():
"""Short-history tickers (new listings, spinoffs) must get an OHLCV-only
row written, never silently skipped.

Regression for 2026-04-21 SNDK incident: the 2026 WDC flash-memory
spinoff re-listed SNDK with ~44 rows of history. daily_append's
`len(hist) < MIN_ROWS_FOR_FEATURES` branch silently n_skip++'d without
writing any row. EOD reconcile then hard-failed on every held
short-history ticker because authoritative close was missing from
ArcticDB. New listings are a normal market event (20-40 S&P
constituent changes/year; every spinoff creates one). They are a
first-class supported state.

The fix writes OHLCV + NaN-for-every-feature-column when below the
warmup threshold, logs loudly with a structured `short-history
ticker=X rows=N` message, and increments a dedicated ``n_partial``
counter (not ``n_skip``, not ``n_err`` — short history is neither).
"""
src = _source()

# Loud warning with structured key=val tags so coverage gaps surface.
assert "short-history ticker=" in src, (
"short-history branch must log `short-history ticker=X rows=N` — "
"silent fallback is forbidden (feedback_no_silent_fails)."
)

# Write path must exist — ticker gets OHLCV, not a skip.
assert "n_partial" in src, (
"short-history path must track a dedicated n_partial counter, "
"distinct from n_skip (legitimate skips) and n_err (read errors)."
)

# Skip-only pattern (the bug) must be gone: the old `if len(hist) <
# MIN_ROWS_FOR_FEATURES: n_skip += 1; continue` with no write.
# Check the short-history branch reaches universe_lib.update().
lines = src.splitlines()
for i, line in enumerate(lines):
if "len(hist) < MIN_ROWS_FOR_FEATURES" in line:
window = "\n".join(lines[i:i + 60])
assert "universe_lib.update(ticker" in window, (
"short-history branch must reach universe_lib.update() — "
"writing OHLCV-only is the whole point of the fix."
)
assert "n_partial" in window, (
"short-history branch must increment n_partial."
)
break
else:
raise AssertionError("short-history branch not found in daily_append.py")


def test_no_skip_guard_on_existing_today_row():
"""daily_append must NOT skip tickers whose history already contains today_ts.

Expand Down
Loading