From 69a42617c82f6e663022666ee23edb2fabdd0827 Mon Sep 17 00:00:00 2001 From: Brian McMahon Date: Tue, 21 Apr 2026 14:08:43 -0700 Subject: [PATCH] fix(data): short-history tickers are first-class, no silent skip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the len(hist) < MIN_ROWS_FOR_FEATURES silent skip in daily_append with an OHLCV-only write. When a ticker has insufficient history for feature warmup (new listings, IPOs, spinoffs — e.g. SNDK post the 2026 WDC flash-memory spinoff) we still write the authoritative close, with NaN for every feature column, and log "short-history ticker=X rows=N" so coverage gaps surface. A dedicated n_partial counter separates this state from n_skip (dry run / NaN close) and n_err (ArcticDB read failures). The 5% err_rate guard is unchanged — partial writes don't count as errors. Root cause traced 2026-04-21: EOD reconcile hard-failed on every held short-history ticker because authoritative close was missing from ArcticDB. New listings are a normal, recurring market event; silently dropping them violates the no-silent-fails, no-unscoreable-labels, and hard-fail-until-stable preferences. Regression test locks the write path + structured log. Co-Authored-By: Claude Opus 4.7 (1M context) --- builders/daily_append.py | 51 ++++++++++++++++++++++++++-- tests/test_daily_append_semantics.py | 51 ++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 3 deletions(-) diff --git a/builders/daily_append.py b/builders/daily_append.py index 9bede87..dd88c72 100644 --- a/builders/daily_append.py +++ b/builders/daily_append.py @@ -181,6 +181,7 @@ def daily_append( n_ok = 0 n_skip = 0 n_err = 0 + n_partial = 0 # short-history tickers: OHLCV-only written, features NaN for ticker in stock_tickers: try: @@ -197,7 +198,50 @@ def daily_append( continue if len(hist) < MIN_ROWS_FOR_FEATURES: - n_skip += 1 + # Short-history tickers (new listings, IPOs, spinoffs — e.g. + # SNDK after the 2026 WDC flash-memory spinoff) are a + # first-class supported state, not a skip. Below the feature + # warmup threshold we write an OHLCV-only row with NaN for + # every feature column. Downstream consumers that need + # features (training, inference) see NaN and handle + # accordingly; consumers that only read prices (EOD + # reconcile, attribution) get the authoritative close. + # + # Prior behavior silently skipped the row entirely — no + # OHLCV written, no warning — which made EOD reconcile + # hard-fail on every held short-history ticker. See + # 2026-04-21 SNDK incident. + bar = closes[ticker] + if np.isnan(bar["Close"]): + n_skip += 1 + continue + + new_row = pd.DataFrame( + [{col: bar.get(col, np.nan) for col in OHLCV_COLS}], + index=pd.DatetimeIndex([today_ts], name="date"), + ) + # Align to the stored schema: NaN for every non-OHLCV column + # the library already has for this ticker. + for col in hist.columns: + if col not in new_row.columns: + new_row[col] = np.nan + new_row = new_row[hist.columns] + for col in new_row.columns: + if col in OHLCV_COLS: + if col == "Volume": + new_row[col] = new_row[col].astype("int64") + else: + new_row[col] = new_row[col].astype("float64") + else: + new_row[col] = new_row[col].astype("float32") + + log.warning( + "short-history ticker=%s rows=%d min_required=%d " + "— writing OHLCV-only row with NaN features", + ticker, len(hist), MIN_ROWS_FOR_FEATURES, + ) + universe_lib.update(ticker, new_row) + n_partial += 1 continue # Re-running daily_append for the same date MUST overwrite the @@ -391,6 +435,7 @@ def daily_append( "status": "ok", "date": date_str, "tickers_appended": n_ok, + "tickers_partial": n_partial, "tickers_skipped": n_skip, "tickers_errored": n_err, "load_seconds": round(t_load, 1), @@ -399,9 +444,9 @@ def daily_append( } log.info( - "ArcticDB daily_append: stocks n_ok=%d n_skip=%d n_err=%d (of %d) | " + "ArcticDB daily_append: stocks n_ok=%d n_partial=%d n_skip=%d n_err=%d (of %d) | " "macro_updated=%d sector_updated=%d | %.1fs total", - n_ok, n_skip, n_err, len(stock_tickers), + n_ok, n_partial, n_skip, n_err, len(stock_tickers), len(macro_updated) if not dry_run else 0, len(sector_updated) if not dry_run else 0, t_total, diff --git a/tests/test_daily_append_semantics.py b/tests/test_daily_append_semantics.py index a9ab0dd..9e3893d 100644 --- a/tests/test_daily_append_semantics.py +++ b/tests/test_daily_append_semantics.py @@ -72,6 +72,57 @@ def test_sector_etfs_iterate_explicit_list(): assert 'sector_etfs = ["XLB"' in src or 'sector_etfs = [\n' in src +def test_short_history_writes_ohlcv_not_skipped(): + """Short-history tickers (new listings, spinoffs) must get an OHLCV-only + row written, never silently skipped. + + Regression for 2026-04-21 SNDK incident: the 2026 WDC flash-memory + spinoff re-listed SNDK with ~44 rows of history. daily_append's + `len(hist) < MIN_ROWS_FOR_FEATURES` branch silently n_skip++'d without + writing any row. EOD reconcile then hard-failed on every held + short-history ticker because authoritative close was missing from + ArcticDB. New listings are a normal market event (20-40 S&P + constituent changes/year; every spinoff creates one). They are a + first-class supported state. + + The fix writes OHLCV + NaN-for-every-feature-column when below the + warmup threshold, logs loudly with a structured `short-history + ticker=X rows=N` message, and increments a dedicated ``n_partial`` + counter (not ``n_skip``, not ``n_err`` — short history is neither). + """ + src = _source() + + # Loud warning with structured key=val tags so coverage gaps surface. + assert "short-history ticker=" in src, ( + "short-history branch must log `short-history ticker=X rows=N` — " + "silent fallback is forbidden (feedback_no_silent_fails)." + ) + + # Write path must exist — ticker gets OHLCV, not a skip. + assert "n_partial" in src, ( + "short-history path must track a dedicated n_partial counter, " + "distinct from n_skip (legitimate skips) and n_err (read errors)." + ) + + # Skip-only pattern (the bug) must be gone: the old `if len(hist) < + # MIN_ROWS_FOR_FEATURES: n_skip += 1; continue` with no write. + # Check the short-history branch reaches universe_lib.update(). + lines = src.splitlines() + for i, line in enumerate(lines): + if "len(hist) < MIN_ROWS_FOR_FEATURES" in line: + window = "\n".join(lines[i:i + 60]) + assert "universe_lib.update(ticker" in window, ( + "short-history branch must reach universe_lib.update() — " + "writing OHLCV-only is the whole point of the fix." + ) + assert "n_partial" in window, ( + "short-history branch must increment n_partial." + ) + break + else: + raise AssertionError("short-history branch not found in daily_append.py") + + def test_no_skip_guard_on_existing_today_row(): """daily_append must NOT skip tickers whose history already contains today_ts.