igerber · igerber · Apr 19, 2026 · Apr 19, 2026 · Apr 19, 2026 · Apr 19, 2026
diff --git a/diff_diff/continuous_did.py b/diff_diff/continuous_did.py
@@ -227,7 +227,50 @@ def fit(
                 f"Dose must be time-invariant. Units with varying dose: {bad_units[:5]}"
             )
 
-        # Normalize first_treat: inf → 0
+        # Normalize first_treat: +inf → 0 (R-style never-treated encoding).
+        # Count rows recategorized so users can see how many units just
+        # crossed from "treated at some point" to "never treated" — silent
+        # recategorization here would shift the control composition (axis-E
+        # silent coercion). Only positive infinity is recoded (to match the
+        # existing `.replace([np.inf, float("inf")], 0)` semantics on the
+        # next line).
+        first_treat_vals = df[first_treat].values
+        # Reject NaN first_treat explicitly. NaN survives preprocessing but
+        # satisfies neither the treated (g > 0) nor never-treated (g == 0)
+        # mask, so affected units would be silently excluded from the
+        # estimator (same silent-failure shape as `first_treat < 0`).
+        nan_mask = pd.isna(df[first_treat])
+        n_nan_first_treat = int(nan_mask.sum())
+        if n_nan_first_treat > 0:
+            raise ValueError(
+                f"{n_nan_first_treat} row(s) have NaN '{first_treat}' "
+                f"values. Valid values are 0 (never-treated) or a positive "
+                f"treatment period; such units would otherwise be silently "
+                f"excluded from both treated and control pools."
+            )
+        inf_mask = np.isposinf(first_treat_vals)
+        n_inf_first_treat = int(inf_mask.sum())
+        if n_inf_first_treat > 0:
+            warnings.warn(
+                f"{n_inf_first_treat} row(s) have inf in '{first_treat}'; "
+                f"treating the corresponding units as never-treated. Pass an "
+                f"explicit never-treated marker (0) if this is not intended.",
+                UserWarning,
+                stacklevel=2,
+            )
+        # Reject negative first_treat values (including -inf) explicitly.
+        # Without this guard they would survive preprocessing but fall out of
+        # both the treated (g > 0) and never-treated (g == 0) masks, silently
+        # excluding the affected units.
+        negative_mask = first_treat_vals < 0
+        n_negative_first_treat = int(negative_mask.sum())
+        if n_negative_first_treat > 0:
+            raise ValueError(
+                f"{n_negative_first_treat} row(s) have negative '{first_treat}' "
+                f"values (including -inf). Valid values are 0 (never-treated) "
+                f"or a positive treatment period; such units would otherwise "
+                f"be silently excluded from both treated and control pools."
+            )
         df[first_treat] = df[first_treat].replace([np.inf, float("inf")], 0)
 
         # Drop units with positive first_treat but zero dose (R convention)
@@ -265,9 +308,22 @@ def fit(
                 stacklevel=2,
             )
 
-        # Force dose=0 for never-treated units with nonzero dose
+        # Force dose=0 for never-treated units with nonzero dose. Report the
+        # affected row count via UserWarning so users can see whether their
+        # never-treated rows had unintended nonzero doses — silent zeroing
+        # here would quietly shift part of the control trajectory (axis-E
+        # silent coercion, paired with the `first_treat=inf -> 0` fix above).
         never_treated_mask = df[first_treat] == 0
-        if (df.loc[never_treated_mask, dose] != 0).any():
+        nonzero_dose_rows = never_treated_mask & (df[dose] != 0)
+        n_nonzero_dose_never_treated = int(nonzero_dose_rows.sum())
+        if n_nonzero_dose_never_treated > 0:
+            warnings.warn(
+                f"{n_nonzero_dose_never_treated} row(s) have '{first_treat}'=0 "
+                f"(never-treated) but nonzero '{dose}'; zeroing the dose. Pass "
+                f"dose=0 for never-treated rows to avoid this coercion.",
+                UserWarning,
+                stacklevel=2,
+            )
             df.loc[never_treated_mask, dose] = 0.0
 
         # Verify balanced panel

diff --git a/diff_diff/staggered_triple_diff.py b/diff_diff/staggered_triple_diff.py
@@ -284,6 +284,19 @@ def fit(
 
         if first_treat != "first_treat":
             df["first_treat"] = df[first_treat]
+        # Surface the inf → 0 recategorization the same way StaggeredDiD does
+        # (see `staggered.py:1508-1519`). Silently recoding inf would shift
+        # units between treated and never-treated pools with no signal
+        # (axis-E silent coercion under the Phase 2 audit).
+        _inf_mask = np.isposinf(df["first_treat"].values)
+        if _inf_mask.any():
+            n_inf_rows = int(_inf_mask.sum())
+            warnings.warn(
+                f"{n_inf_rows} row(s) have first_treat=inf; recoding to 0 "
+                f"(never-treated). Use first_treat=0 to suppress this warning.",
+                UserWarning,
+                stacklevel=2,
+            )
         df["first_treat"] = df["first_treat"].replace([np.inf, float("inf")], 0)
 
         precomputed = self._precompute_structures(

diff --git a/diff_diff/utils.py b/diff_diff/utils.py
@@ -821,7 +821,8 @@ def check_parallel_trends_robust(
 
     # Compute outcome changes
     treated_changes, control_changes = _compute_outcome_changes(
-        pre_data, outcome, time, treatment_group, unit
+        pre_data, outcome, time, treatment_group, unit,
+        caller_label="check_parallel_trends_robust",
     )
 
     if len(treated_changes) < 2 or len(control_changes) < 2:
@@ -897,7 +898,12 @@ def check_parallel_trends_robust(
 
 
 def _compute_outcome_changes(
-    data: pd.DataFrame, outcome: str, time: str, treatment_group: str, unit: Optional[str] = None
+    data: pd.DataFrame,
+    outcome: str,
+    time: str,
+    treatment_group: str,
+    unit: Optional[str] = None,
+    caller_label: str = "parallel-trend diagnostic",
 ) -> Tuple[np.ndarray, np.ndarray]:
     """
     Compute period-to-period outcome changes for treated and control groups.
@@ -925,7 +931,24 @@ def _compute_outcome_changes(
         data_sorted = data.sort_values([unit, time])
         data_sorted["_outcome_change"] = data_sorted.groupby(unit)[outcome].diff()
 
-        # Remove NaN from first period of each unit
+        # Remove NaN from first period of each unit. The first period per unit
+        # has no prior observation to diff against, so n_units drops are
+        # expected. Anything beyond that is a silent side-effect of gaps or
+        # NaN outcomes — surface the excess via warning (axis-E drop counter).
+        n_units_observed = int(data_sorted[unit].nunique())
+        n_dropped = int(data_sorted["_outcome_change"].isna().sum())
+        n_unexpected_drops = max(0, n_dropped - n_units_observed)
+        if n_unexpected_drops > 0:
+            warnings.warn(
+                f"{caller_label}: dropped {n_dropped} row(s) with NaN "
+                f"first-differences; {n_units_observed} are the expected "
+                f"first-period-per-unit drops, and {n_unexpected_drops} are "
+                f"additional NaN first-differences (e.g. NaN outcomes or "
+                f"unit-period gaps upstream). Parallel-trend statistics are "
+                f"computed on the remaining rows.",
+                UserWarning,
+                stacklevel=3,
+            )
         changes_data = data_sorted.dropna(subset=["_outcome_change"])
 
         treated_changes = changes_data[changes_data[treatment_group] == 1]["_outcome_change"].values
@@ -1001,7 +1024,8 @@ def equivalence_test_trends(
 
     # Compute outcome changes
     treated_changes, control_changes = _compute_outcome_changes(
-        pre_data, outcome, time, treatment_group, unit
+        pre_data, outcome, time, treatment_group, unit,
+        caller_label="equivalence_test_trends",
     )
 
     # Need at least 2 observations per group to compute variance

diff --git a/diff_diff/wooldridge.py b/diff_diff/wooldridge.py
@@ -13,6 +13,7 @@
 
 from __future__ import annotations
 
+import warnings
 from typing import Any, Dict, List, Optional, Tuple
 
 import numpy as np
@@ -112,6 +113,26 @@ def _resolve_survey_for_wooldridge(survey_design, sample, cluster_ids, cluster_n
     return resolved, survey_weights, survey_weight_type, survey_metadata, df_inf
 
 
+def _warn_and_fill_nan_cohort(df: pd.DataFrame, cohort: str, stacklevel: int) -> pd.DataFrame:
+    """Fill NaN cohort with 0 (never-treated) and warn with the row count.
+
+    Used by both `_filter_sample` (pre-fit) and `WooldridgeDiD.fit()` so the
+    silent recategorization is surfaced on whichever entry path the caller
+    hits first. See REGISTRY.md §WooldridgeDiD (axis-E silent coercion).
+    """
+    n_nan_cohort = int(df[cohort].isna().sum())
+    if n_nan_cohort > 0:
+        warnings.warn(
+            f"{n_nan_cohort} row(s) have NaN cohort values; filling with 0 "
+            f"and treating the corresponding units as never-treated. Pass "
+            f"an explicit never-treated marker (0) if this is not intended.",
+            UserWarning,
+            stacklevel=stacklevel,
+        )
+    df[cohort] = df[cohort].fillna(0)
+    return df
+
+
 def _filter_sample(
     data: pd.DataFrame,
     unit: str,
@@ -128,8 +149,7 @@ def _filter_sample(
     (see _build_interaction_matrix).
     """
     df = data.copy()
-    # Normalise never-treated: fill NaN cohort with 0
-    df[cohort] = df[cohort].fillna(0)
+    df = _warn_and_fill_nan_cohort(df, cohort, stacklevel=3)
 
     treated_mask = df[cohort] > 0
 
@@ -396,7 +416,7 @@ def fit(
             ``NotImplementedError``.
         """
         df = data.copy()
-        df[cohort] = df[cohort].fillna(0)
+        df = _warn_and_fill_nan_cohort(df, cohort, stacklevel=2)
 
         # 0a. Validate cohort is time-invariant within unit
         cohort_per_unit = df.groupby(unit)[cohort].nunique()

diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
@@ -720,6 +720,8 @@ See `docs/methodology/continuous-did.md` Section 4 for full details.
 - [ ] Lowest-dose-as-control (Remark 3.1)
 - [x] Survey design support (Phase 3): weighted B-spline OLS, TSL on influence functions; bootstrap+survey supported (Phase 6)
 - **Note:** ContinuousDiD bootstrap with survey weights supported (Phase 6) via PSU-level multiplier weights
+- **Note:** The R-style convention of coding never-treated units as `first_treat=inf` is still accepted and normalized to `first_treat=0` internally, but the estimator now emits a `UserWarning` reporting the row count so the silent recategorization is surfaced (axis-E silent coercion under the Phase 2 audit). Only `+inf` is recoded (matching the R convention). Any **negative** `first_treat` value (including `-inf`) raises `ValueError` with the row count, since such units would otherwise silently fall out of both the treated (`g > 0`) and never-treated (`g == 0`) masks. Pass `0` directly for never-treated units to avoid the warning.
+- **Note:** Rows where `first_treat=0` (never-treated) carry a nonzero `dose` are silently zeroed for internal consistency (never-treated cells must have `D=0` in the dose response). The estimator now emits a `UserWarning` with the affected row count before the zeroing, so unintended nonzero doses on never-treated rows are no longer absorbed without a signal (axis-E silent coercion).
 
 ---
 
@@ -1303,6 +1305,7 @@ The saturated ETWFE regression includes:
 The interaction coefficient `δ_{g,t}` identifies `ATT(g, t)` under parallel trends.
 - **Note:** OLS path uses iterative alternating-projection within-transformation (uniform weights) for exact FE absorption on both balanced and unbalanced panels. One-pass demeaning (`y - ȳ_i - ȳ_t + ȳ`) is only exact for balanced panels.
 - **Note:** The weighted within-transformation (`utils.within_transform` with `weights`) is invoked on every WooldridgeDiD fit (survey weights when provided, `np.ones` otherwise) and emits a `UserWarning` on non-convergence per the shared convention documented under *Absorbed Fixed Effects with Survey Weights*.
+- **Note:** NaN values in the `cohort` column are filled with 0 (treated as never-treated), both in `_filter_sample` and in `fit()`. This recategorization now emits a `UserWarning` reporting the affected row count so it is no longer silent (axis-E silent coercion under the Phase 2 audit). Pass `0` directly for never-treated units to avoid the warning.
 
 *Nonlinear extensions (Wooldridge 2023):*
 
@@ -1689,6 +1692,7 @@ Balanced panel. Key variables:
 - `Q_i` (`eligibility`): binary, time-invariant eligibility indicator
 - Treatment: `D_{i,t} = 1{t >= S_i AND Q_i = 1}` (absorbing)
 - Covariates `X_i`: time-invariant (first observation per unit used)
+- **Note:** `first_treat=inf` (R-style never-enabled marker) is accepted and normalized to `0` internally. The recoding now emits a `UserWarning` reporting the affected row count so the reclassification is not silent (axis-E silent coercion under the Phase 2 audit, mirroring the StaggeredDiD behavior). Pass `first_treat=0` directly to avoid the warning.
 
 *Estimator equation (Equation 4.1 in paper, as implemented):*