igerber · igerber · Apr 19, 2026 · Apr 19, 2026 · Apr 19, 2026 · Apr 19, 2026
diff --git a/TODO.md b/TODO.md
@@ -82,6 +82,7 @@ Deferred items from PR reviews that were not addressed before merge.
 | HC2 / HC2 + Bell-McCaffrey on absorbed-FE fits currently raises `NotImplementedError` in three places: `TwoWayFixedEffects` unconditionally; `DifferenceInDifferences(absorb=..., vcov_type in {"hc2","hc2_bm"})`; `MultiPeriodDiD(absorb=..., vcov_type in {"hc2","hc2_bm"})`. Within-transformation preserves coefficients and residuals under FWL but not the hat matrix, so the reduced-design `h_ii` is not the diagonal of the full FE projection and CR2's block adjustment `A_g = (I - H_gg)^{-1/2}` is likewise wrong on absorbed cluster blocks. Lifting the guard needs HC2/CR2-BM computed from the full absorbed projection (unit/time FE dummies reconstructed internally, or a FE-aware hat-matrix formulation) and a parity harness against a full-dummy OLS run or R `fixest`/`clubSandwich`. HC1/CR1 are unaffected by this because they have no leverage term. | `twfe.py::fit`, `estimators.py::DifferenceInDifferences.fit`, `estimators.py::MultiPeriodDiD.fit` | Phase 1a | Medium |
 | Weighted CR2 Bell-McCaffrey cluster-robust (`vcov_type="hc2_bm"` + `cluster_ids` + `weights`) currently raises `NotImplementedError`. Weighted hat matrix and residual rebalancing need threading per clubSandwich WLS handling. | `linalg.py::_compute_cr2_bm` | Phase 1a | Medium |
 | Regenerate `benchmarks/data/clubsandwich_cr2_golden.json` from R (`Rscript benchmarks/R/generate_clubsandwich_golden.R`). Current JSON has `source: python_self_reference` as a stability anchor until an authoritative R run. | `benchmarks/R/generate_clubsandwich_golden.R` | Phase 1a | Medium |
+| `honest_did.py:1907` `np.linalg.solve(A_sys, b_sys) / except LinAlgError: continue` is a silent basis-rejection in the vertex-enumeration loop that is algorithmically intentional (try the next basis). Consider surfacing a count of rejected bases as a diagnostic when ARP enumeration exhausts, so users see when the vertex search was heavily constrained. Not a silent failure in the sense of the Phase 2 audit (the algorithm is supposed to skip), but the diagnostic would help debug borderline cases. | `honest_did.py` | #334 | Low |
 
 #### Performance
 

diff --git a/diff_diff/efficient_did_covariates.py b/diff_diff/efficient_did_covariates.py
@@ -172,7 +172,15 @@ def estimate_propensity_ratio_sieve(
 
     Selects K via AIC/BIC: ``IC(K) = 2*loss(K) + C_n*K/n``.
 
-    On singular basis: tries lower K.  Short-circuits r_{g,g}(X) = 1.
+    Precondition check per K: if ``cond(Psi_{g'}' W Psi_{g'}) > 1/sqrt(eps)``
+    (≈ 6.7e7), that K is skipped. LinAlgError on the `np.linalg.solve` call
+    or a non-finite beta skips as well. If at least one K succeeds but
+    others were skipped, emits a ``UserWarning`` listing the skipped K
+    values (silent-failure audit PR, axis-A finding #18). If every K is
+    skipped, the caller falls back to a constant ratio of 1 with a
+    separate "estimation failed for all K values" warning.
+
+    Short-circuits ``r_{g,g}(X) = 1`` for same-cohort comparisons (PT-All).
 
     Parameters
     ----------
@@ -227,6 +235,10 @@ def estimate_propensity_ratio_sieve(
 
     best_ic = np.inf
     best_ratio = np.ones(n_units)  # fallback: constant ratio 1
+    singular_K: List[int] = []  # K values skipped due to rank deficiency (#18)
+    # Near-singular matrices solve without raising LinAlgError but return
+    # numerically meaningless beta. Rule-of-thumb threshold: 1/sqrt(eps).
+    cond_threshold = 1.0 / np.sqrt(np.finfo(float).eps)
 
     for K in range(1, k_max + 1):
         n_basis = comb(K + d, d)
@@ -249,13 +261,23 @@ def estimate_propensity_ratio_sieve(
             A = Psi_gp.T @ Psi_gp
             b = Psi_g.sum(axis=0)
 
+        # Precondition check (#18, axis A): reject near-singular A explicitly
+        # so np.linalg.solve can't silently return garbage coefficients.
+        with np.errstate(invalid="ignore", over="ignore"):
+            A_cond = float(np.linalg.cond(A))
+        if not np.isfinite(A_cond) or A_cond > cond_threshold:
+            singular_K.append(K)
+            continue
+
         try:
             beta = np.linalg.solve(A, b)
         except np.linalg.LinAlgError:
+            singular_K.append(K)
             continue  # singular — try next K
 
         # Check for NaN/Inf in solution
         if not np.all(np.isfinite(beta)):
+            singular_K.append(K)
             continue
 
         # Predicted ratio for all units
@@ -282,6 +304,18 @@ def estimate_propensity_ratio_sieve(
             UserWarning,
             stacklevel=2,
         )
+    elif singular_K:
+        # Finding #18 (axis A): partial K-failure was previously silent.
+        # Surface it so users see that the selected basis order was
+        # forced by rank deficiency at higher K rather than by the IC.
+        warnings.warn(
+            f"Propensity ratio sieve: skipped K={singular_K} due to "
+            f"rank-deficient or non-finite normal equations. "
+            f"Selected basis used the remaining K values; "
+            f"this may indicate limited variation in the covariates.",
+            UserWarning,
+            stacklevel=2,
+        )
 
     # Overlap diagnostics: warn if ratios require significant clipping
     n_extreme = int(np.sum((best_ratio < 1.0 / ratio_clip) | (best_ratio > ratio_clip)))
@@ -329,6 +363,14 @@ def estimate_inverse_propensity_sieve(
     units on the RHS (not just group g), following the paper's
     algorithm step 4.
 
+    Precondition check per K: if ``cond(Psi_{g'}' W Psi_{g'}) > 1/sqrt(eps)``
+    (≈ 6.7e7), that K is skipped. LinAlgError on the `np.linalg.solve` call
+    or a non-finite beta skips as well. If at least one K succeeds but
+    others were skipped, emits a ``UserWarning`` listing the skipped K
+    values (silent-failure audit PR, axis-A finding #18). If every K is
+    skipped, the caller falls back to unconditional ``n/n_group`` scaling
+    with a separate "estimation failed for all K values" warning.
+
     Parameters
     ----------
     covariate_matrix : ndarray, shape (n_units, n_covariates)
@@ -377,6 +419,8 @@ def estimate_inverse_propensity_sieve(
 
     best_ic = np.inf
     best_s = np.full(n_units, fallback_ratio)  # fallback: unconditional
+    singular_K: List[int] = []  # K values skipped due to rank deficiency (#18)
+    cond_threshold = 1.0 / np.sqrt(np.finfo(float).eps)
 
     for K in range(1, k_max + 1):
         n_basis = comb(K + d, d)
@@ -397,11 +441,20 @@ def estimate_inverse_propensity_sieve(
             # RHS: sum of basis over ALL units (not just one group)
             b = basis_all.sum(axis=0)
 
+        # Precondition check (#18, axis A): see ratio-sieve comment above.
+        with np.errstate(invalid="ignore", over="ignore"):
+            A_cond = float(np.linalg.cond(A))
+        if not np.isfinite(A_cond) or A_cond > cond_threshold:
+            singular_K.append(K)
+            continue
+
         try:
             beta = np.linalg.solve(A, b)
         except np.linalg.LinAlgError:
+            singular_K.append(K)
             continue
         if not np.all(np.isfinite(beta)):
+            singular_K.append(K)
             continue
 
         s_hat = basis_all @ beta
@@ -423,6 +476,16 @@ def estimate_inverse_propensity_sieve(
             UserWarning,
             stacklevel=2,
         )
+    elif singular_K:
+        # Finding #18 (axis A): partial K-failure was previously silent.
+        warnings.warn(
+            f"Inverse propensity sieve: skipped K={singular_K} due to "
+            f"rank-deficient or non-finite normal equations. "
+            f"Selected basis used the remaining K values; "
+            f"this may indicate limited variation in the covariates.",
+            UserWarning,
+            stacklevel=2,
+        )
 
     # Overlap diagnostics: warn if s_hat values require clipping
     n_clipped = int(np.sum((best_s < 1.0) | (best_s > float(n_units))))

diff --git a/diff_diff/staggered.py b/diff_diff/staggered.py
@@ -92,11 +92,29 @@ def _linear_regression(
     return beta, residuals
 
 
-def _safe_inv(A: np.ndarray) -> np.ndarray:
-    """Invert a square matrix with lstsq fallback for near-singular cases."""
+def _safe_inv(
+    A: np.ndarray,
+    tracker: Optional[list] = None,
+) -> np.ndarray:
+    """Invert a square matrix with lstsq fallback for near-singular cases.
+
+    Parameters
+    ----------
+    A : np.ndarray
+        Square matrix to invert.
+    tracker : list, optional
+        When provided, one condition-number sample of ``A`` is appended on
+        every LinAlgError fallback. ``CallawaySantAnna.fit()`` initializes
+        a list and emits a single aggregate `UserWarning` after the fit
+        finishes, rather than surfacing a separate warning per fallback.
+        Sibling of finding #17 in the Phase 2 silent-failures audit.
+    """
     try:
         return np.linalg.solve(A, np.eye(A.shape[0]))
     except np.linalg.LinAlgError:
+        if tracker is not None:
+            with np.errstate(invalid="ignore", over="ignore"):
+                tracker.append(float(np.linalg.cond(A)))
         return np.linalg.lstsq(A, np.eye(A.shape[0]), rcond=None)[0]
 
 
@@ -1436,6 +1454,12 @@ def fit(
         # Reset stale state from prior fit (prevents leaking event-study VCV)
         self._event_study_vcov = None
 
+        # Tracker for _safe_inv lstsq fallbacks across all analytical SE
+        # paths (PS Hessian, OR bread, event-study bread, etc.). Emit ONE
+        # aggregate warning at the end of fit rather than fanning out per
+        # cell. Sibling of PR #9 finding #17.
+        self._safe_inv_tracker: List[float] = []
+
         if not self.panel:
             warnings.warn(
                 "panel=False uses repeated cross-section DRDID estimators "
@@ -1976,6 +2000,26 @@ def fit(
                         eff_data["effect"] + cband_crit_value * se_val,
                     )
 
+        # Consolidated _safe_inv lstsq-fallback warning (sibling of PR #9
+        # finding #17). Rank-deficient PS Hessian / OR bread matrices in the
+        # analytical SE paths previously fell back to np.linalg.lstsq
+        # silently per cell. Now aggregated here into ONE UserWarning so
+        # a bad design surface doesn't quietly degrade analytical SEs.
+        if self._safe_inv_tracker:
+            n_fallbacks = len(self._safe_inv_tracker)
+            finite_conds = [c for c in self._safe_inv_tracker if np.isfinite(c)]
+            max_cond = max(finite_conds) if finite_conds else float("inf")
+            warnings.warn(
+                f"Rank-deficient matrix encountered {n_fallbacks} time(s) "
+                f"in analytical SE paths (propensity-score Hessian or "
+                f"outcome-regression bread); fell back to np.linalg.lstsq. "
+                f"Max condition number of affected matrix: {max_cond:.2e}. "
+                f"Analytical SEs may be numerically unstable; consider "
+                f"dropping collinear covariates or using n_bootstrap > 0.",
+                UserWarning,
+                stacklevel=2,
+            )
+
         # Store results
         # Retrieve event-study VCV from aggregation mixin (Phase 7d).
         # Clear it when bootstrap overwrites event-study SEs to prevent
@@ -2276,7 +2320,7 @@ def _ipw_estimation(
                         W_ps = W_ps * sw_all
                     # R: Hessian.ps = crossprod(X * sqrt(W)) / n
                     H_psi = X_all_int.T @ (W_ps[:, None] * X_all_int) / n_all_panel
-                    H_psi_inv = _safe_inv(H_psi)
+                    H_psi_inv = _safe_inv(H_psi, tracker=self._safe_inv_tracker)
 
                     D_all = np.concatenate([np.ones(n_t), np.zeros(n_c)])
                     score_ps = (D_all - pscore_all)[:, None] * X_all_int
@@ -2562,7 +2606,7 @@ def _doubly_robust(
                         if sw_all is not None:
                             W_ps = W_ps * sw_all
                         H_psi = X_all_int.T @ (W_ps[:, None] * X_all_int) / n_all_panel
-                        H_psi_inv = _safe_inv(H_psi)
+                        H_psi_inv = _safe_inv(H_psi, tracker=self._safe_inv_tracker)
 
                         D_all = np.concatenate([np.ones(n_t), np.zeros(n_c)])
                         score_ps = (D_all - pscore_all)[:, None] * X_all_int
@@ -2584,7 +2628,7 @@ def _doubly_robust(
                     X_c_int = X_control_with_intercept
                     W_diag = sw_control if sw_control is not None else np.ones(n_c)
                     XtWX = X_c_int.T @ (W_diag[:, None] * X_c_int)
-                    bread = _safe_inv(XtWX)
+                    bread = _safe_inv(XtWX, tracker=self._safe_inv_tracker)
 
                     # M1: dATT/dbeta — gradient of DR ATT w.r.t. OR parameters
                     X_t_int = X_treated_with_intercept
@@ -2628,7 +2672,7 @@ def _doubly_robust(
 
                         W_ps = pscore_all * (1 - pscore_all)
                         H_psi = X_all_int.T @ (W_ps[:, None] * X_all_int) / n_all_panel
-                        H_psi_inv = _safe_inv(H_psi)
+                        H_psi_inv = _safe_inv(H_psi, tracker=self._safe_inv_tracker)
 
                         D_all = np.concatenate([np.ones(n_t), np.zeros(n_c)])
                         score_ps = (D_all - pscore_all)[:, None] * X_all_int
@@ -2645,7 +2689,7 @@ def _doubly_robust(
                     # --- OR IF correction ---
                     X_c_int = X_control_with_intercept
                     XtX = X_c_int.T @ X_c_int
-                    bread = _safe_inv(XtX)
+                    bread = _safe_inv(XtX, tracker=self._safe_inv_tracker)
 
                     X_t_int = X_treated_with_intercept
                     M1 = (
@@ -3204,8 +3248,14 @@ def _outcome_regression_rc(
         # R's colMeans (= sum/n_all) for M1, matching the product exactly.
         W_ct = sw_ct if sw_ct is not None else np.ones(n_ct)
         W_cs = sw_cs if sw_cs is not None else np.ones(n_cs)
-        bread_t = _safe_inv(X_ct_int.T @ (W_ct[:, None] * X_ct_int))
-        bread_s = _safe_inv(X_cs_int.T @ (W_cs[:, None] * X_cs_int))
+        bread_t = _safe_inv(
+            X_ct_int.T @ (W_ct[:, None] * X_ct_int),
+            tracker=self._safe_inv_tracker,
+        )
+        bread_s = _safe_inv(
+            X_cs_int.T @ (W_cs[:, None] * X_cs_int),
+            tracker=self._safe_inv_tracker,
+        )
 
         # R: M1 = colMeans(w.cont * out.x) = sum(w_D * X) / n_all
         M1 = (
@@ -3407,7 +3457,7 @@ def _ipw_estimation_rc(
                 W_ps = W_ps * sw_all
             # R: Hessian.ps = crossprod(X * sqrt(W)) / n
             H_psi = X_all_int.T @ (W_ps[:, None] * X_all_int) / n_all
-            H_psi_inv = _safe_inv(H_psi)
+            H_psi_inv = _safe_inv(H_psi, tracker=self._safe_inv_tracker)
 
             score_ps = (D_all - pscore)[:, None] * X_all_int
             if sw_all is not None:
@@ -3744,7 +3794,7 @@ def _doubly_robust_rc(
                 W_ps = W_ps * sw_all
             # R: Hessian.ps = crossprod(X * sqrt(W)) / n
             H_psi = X_all_int.T @ (W_ps[:, None] * X_all_int) / n_all
-            H_psi_inv = _safe_inv(H_psi)
+            H_psi_inv = _safe_inv(H_psi, tracker=self._safe_inv_tracker)
 
             score_ps = (D_all - pscore)[:, None] * X_all_int
             if sw_all is not None:
@@ -3779,8 +3829,14 @@ def _doubly_robust_rc(
         # =====================================================================
         W_ct_vals = sw_ct if sw_ct is not None else np.ones(n_ct)
         W_cs_vals = sw_cs if sw_cs is not None else np.ones(n_cs)
-        bread_ct = _safe_inv(X_ct_int.T @ (W_ct_vals[:, None] * X_ct_int))
-        bread_cs = _safe_inv(X_cs_int.T @ (W_cs_vals[:, None] * X_cs_int))
+        bread_ct = _safe_inv(
+            X_ct_int.T @ (W_ct_vals[:, None] * X_ct_int),
+            tracker=self._safe_inv_tracker,
+        )
+        bread_cs = _safe_inv(
+            X_cs_int.T @ (W_cs_vals[:, None] * X_cs_int),
+            tracker=self._safe_inv_tracker,
+        )
 
         # R: asy.lin.rep.ols  (per-obs OLS score * bread)
         asy_lin_rep_ct = (W_ct_vals * resid_ct)[:, None] * X_ct_int @ bread_ct
@@ -3818,8 +3874,14 @@ def _doubly_robust_rc(
         # =====================================================================
         W_gt_vals = sw_gt if sw_gt is not None else np.ones(n_gt)
         W_gs_vals = sw_gs if sw_gs is not None else np.ones(n_gs)
-        bread_gt = _safe_inv(X_gt_int.T @ (W_gt_vals[:, None] * X_gt_int))
-        bread_gs = _safe_inv(X_gs_int.T @ (W_gs_vals[:, None] * X_gs_int))
+        bread_gt = _safe_inv(
+            X_gt_int.T @ (W_gt_vals[:, None] * X_gt_int),
+            tracker=self._safe_inv_tracker,
+        )
+        bread_gs = _safe_inv(
+            X_gs_int.T @ (W_gs_vals[:, None] * X_gs_int),
+            tracker=self._safe_inv_tracker,
+        )
 
         asy_lin_rep_gt = (W_gt_vals * resid_gt)[:, None] * X_gt_int @ bread_gt
         asy_lin_rep_gs = (W_gs_vals * resid_gs)[:, None] * X_gs_int @ bread_gs