diff --git a/builders/daily_append.py b/builders/daily_append.py index 9bede87..dd88c72 100644 --- a/builders/daily_append.py +++ b/builders/daily_append.py @@ -181,6 +181,7 @@ def daily_append( n_ok = 0 n_skip = 0 n_err = 0 + n_partial = 0 # short-history tickers: OHLCV-only written, features NaN for ticker in stock_tickers: try: @@ -197,7 +198,50 @@ def daily_append( continue if len(hist) < MIN_ROWS_FOR_FEATURES: - n_skip += 1 + # Short-history tickers (new listings, IPOs, spinoffs — e.g. + # SNDK after the 2026 WDC flash-memory spinoff) are a + # first-class supported state, not a skip. Below the feature + # warmup threshold we write an OHLCV-only row with NaN for + # every feature column. Downstream consumers that need + # features (training, inference) see NaN and handle + # accordingly; consumers that only read prices (EOD + # reconcile, attribution) get the authoritative close. + # + # Prior behavior silently skipped the row entirely — no + # OHLCV written, no warning — which made EOD reconcile + # hard-fail on every held short-history ticker. See + # 2026-04-21 SNDK incident. + bar = closes[ticker] + if np.isnan(bar["Close"]): + n_skip += 1 + continue + + new_row = pd.DataFrame( + [{col: bar.get(col, np.nan) for col in OHLCV_COLS}], + index=pd.DatetimeIndex([today_ts], name="date"), + ) + # Align to the stored schema: NaN for every non-OHLCV column + # the library already has for this ticker. + for col in hist.columns: + if col not in new_row.columns: + new_row[col] = np.nan + new_row = new_row[hist.columns] + for col in new_row.columns: + if col in OHLCV_COLS: + if col == "Volume": + new_row[col] = new_row[col].astype("int64") + else: + new_row[col] = new_row[col].astype("float64") + else: + new_row[col] = new_row[col].astype("float32") + + log.warning( + "short-history ticker=%s rows=%d min_required=%d " + "— writing OHLCV-only row with NaN features", + ticker, len(hist), MIN_ROWS_FOR_FEATURES, + ) + universe_lib.update(ticker, new_row) + n_partial += 1 continue # Re-running daily_append for the same date MUST overwrite the @@ -391,6 +435,7 @@ def daily_append( "status": "ok", "date": date_str, "tickers_appended": n_ok, + "tickers_partial": n_partial, "tickers_skipped": n_skip, "tickers_errored": n_err, "load_seconds": round(t_load, 1), @@ -399,9 +444,9 @@ def daily_append( } log.info( - "ArcticDB daily_append: stocks n_ok=%d n_skip=%d n_err=%d (of %d) | " + "ArcticDB daily_append: stocks n_ok=%d n_partial=%d n_skip=%d n_err=%d (of %d) | " "macro_updated=%d sector_updated=%d | %.1fs total", - n_ok, n_skip, n_err, len(stock_tickers), + n_ok, n_partial, n_skip, n_err, len(stock_tickers), len(macro_updated) if not dry_run else 0, len(sector_updated) if not dry_run else 0, t_total, diff --git a/tests/test_daily_append_semantics.py b/tests/test_daily_append_semantics.py index a9ab0dd..9e3893d 100644 --- a/tests/test_daily_append_semantics.py +++ b/tests/test_daily_append_semantics.py @@ -72,6 +72,57 @@ def test_sector_etfs_iterate_explicit_list(): assert 'sector_etfs = ["XLB"' in src or 'sector_etfs = [\n' in src +def test_short_history_writes_ohlcv_not_skipped(): + """Short-history tickers (new listings, spinoffs) must get an OHLCV-only + row written, never silently skipped. + + Regression for 2026-04-21 SNDK incident: the 2026 WDC flash-memory + spinoff re-listed SNDK with ~44 rows of history. daily_append's + `len(hist) < MIN_ROWS_FOR_FEATURES` branch silently n_skip++'d without + writing any row. EOD reconcile then hard-failed on every held + short-history ticker because authoritative close was missing from + ArcticDB. New listings are a normal market event (20-40 S&P + constituent changes/year; every spinoff creates one). They are a + first-class supported state. + + The fix writes OHLCV + NaN-for-every-feature-column when below the + warmup threshold, logs loudly with a structured `short-history + ticker=X rows=N` message, and increments a dedicated ``n_partial`` + counter (not ``n_skip``, not ``n_err`` — short history is neither). + """ + src = _source() + + # Loud warning with structured key=val tags so coverage gaps surface. + assert "short-history ticker=" in src, ( + "short-history branch must log `short-history ticker=X rows=N` — " + "silent fallback is forbidden (feedback_no_silent_fails)." + ) + + # Write path must exist — ticker gets OHLCV, not a skip. + assert "n_partial" in src, ( + "short-history path must track a dedicated n_partial counter, " + "distinct from n_skip (legitimate skips) and n_err (read errors)." + ) + + # Skip-only pattern (the bug) must be gone: the old `if len(hist) < + # MIN_ROWS_FOR_FEATURES: n_skip += 1; continue` with no write. + # Check the short-history branch reaches universe_lib.update(). + lines = src.splitlines() + for i, line in enumerate(lines): + if "len(hist) < MIN_ROWS_FOR_FEATURES" in line: + window = "\n".join(lines[i:i + 60]) + assert "universe_lib.update(ticker" in window, ( + "short-history branch must reach universe_lib.update() — " + "writing OHLCV-only is the whole point of the fix." + ) + assert "n_partial" in window, ( + "short-history branch must increment n_partial." + ) + break + else: + raise AssertionError("short-history branch not found in daily_append.py") + + def test_no_skip_guard_on_existing_today_row(): """daily_append must NOT skip tickers whose history already contains today_ts.