From 2ccedb9923f15bd79db3555d2907507e90e46456 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Mon, 1 Jun 2026 09:11:04 -0400
Subject: [PATCH 1/4] Reject covariate names that collide with reserved
 structural terms (DiD family)

DifferenceInDifferences, MultiPeriodDiD, and TwoWayFixedEffects build their
coefficients dict by zipping a var_names list (structural term names + user
covariate column names appended verbatim) with the fitted coefficient vector.
A covariate whose name equaled a reserved structural name (const; the
treatment/time column names; the {treatment}:{time} interaction; MPD period_{p}
dummies and {treatment}:period_{p} interactions; TWFE ATT; fixed-effect / unit /
time dummy names; or an internal _-prefixed working column such as _treat_time /
_did_treatment / _treatment_post) silently overwrote that structural coefficient
via dict last-write-wins -- e.g. a covariate named const dropped the intercept --
with no error.

Add a shared validate_covariate_names helper (utils.py) called in each fit()
before the design matrix is built: it raises ValueError on a collision
(case-sensitive, so Const is allowed) and on duplicate covariate names.
Fixed-effect/unit/time dummy reserved names are taken from the same
pd.get_dummies(..., drop_first=True) call used to build them, so they match
exactly (including for Categorical columns with a non-default category order).
The TWFE guard fires on both variance paths: the within-transform path returns
only {"ATT": att}, but a covariate named _treatment_post would still clobber the
internal interaction column.

Potentially breaking: a fit that previously succeeded with a colliding (or
duplicated) covariate name -- silently returning a corrupted coefficient dict --
now raises. The staggered / influence-function estimators key results by (g, t)
tuples / relative-time indices and are unaffected; SyntheticDiD overrides fit()
and never reaches the base-class guard.

Tests: tests/test_utils.py::TestValidateCovariateNames,
tests/test_estimators.py::TestCovariateNameCollision,
tests/test_estimators_vcov_type.py::TestTWFECovariateNameCollision.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                       |   3 +
 diff_diff/estimators.py            |  57 ++++++++++-
 diff_diff/twfe.py                  |  30 +++++-
 diff_diff/utils.py                 |  69 ++++++++++++-
 docs/methodology/REGISTRY.md       |   3 +
 tests/test_estimators.py           | 153 +++++++++++++++++++++++++++++
 tests/test_estimators_vcov_type.py |  54 ++++++++++
 tests/test_utils.py                |  40 ++++++++
 8 files changed, 405 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 93406cbe..f5ca3c8e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Fixed
+- **Covariate names that collide with reserved structural terms now raise `ValueError` instead of silently corrupting the coefficient dict (`DifferenceInDifferences`, `MultiPeriodDiD`, `TwoWayFixedEffects`).** These estimators build their `coefficients` dict by zipping a variable-name list -- structural term names PLUS the user covariate column names appended verbatim -- with the fitted coefficient vector. A covariate whose name equaled a reserved structural name (`const`; the treatment/time column names; the `{treatment}:{time}` interaction; MultiPeriodDiD `period_{p}` dummies and `{treatment}:period_{p}` interactions; `TwoWayFixedEffects` `ATT`; fixed-effect / unit / time dummy names; or an internal `_`-prefixed working column such as `_treat_time` / `_did_treatment` / `_treatment_post`) silently **overwrote** that structural coefficient via Python dict last-write-wins -- e.g. a covariate named `const` dropped the intercept -- with no error or warning. A new shared `validate_covariate_names` helper (`diff_diff/utils.py`) is now called in each of the three `fit()` methods before the design matrix is built; it raises `ValueError` on a collision (the comparison is case-sensitive, so e.g. `Const` is still allowed) **and** on duplicate names within `covariates` (which collapse to a single dict entry the same way). Fixed-effect/unit/time dummy reserved names are taken from the same `pd.get_dummies(..., drop_first=True)` call used to build them, so they match exactly (including for pandas `Categorical` columns with a non-default category order). For `TwoWayFixedEffects` the guard fires on **all** variance paths: the default within-transform path returns only `{"ATT": att}` (no covariate is a dict key there), but a covariate named `_treatment_post` would still clobber the internal interaction column, so guarding both paths is uniform and forward-compatible. **Potentially breaking:** a fit that previously *succeeded* with a colliding (or duplicated) covariate name -- silently returning a corrupted coefficient dict -- now raises; rename the covariate column(s). The staggered / influence-function estimators (CallawaySantAnna, SunAbraham, StaggeredTripleDifference, EfficientDiD, TwoStageDiD, ImputationDiD, WooldridgeDiD, dCDH, StackedDiD) key results by `(g, t)` tuples / relative-time indices, never covariate names, and `TripleDifference` / `SyntheticControl` / `SyntheticDiD` do not expose covariates by name, so none are affected. New tests in `tests/test_utils.py`, `tests/test_estimators.py`, and `tests/test_estimators_vcov_type.py`.
+
 ## [3.5.0] - 2026-06-01
 
 ### Added
diff --git a/diff_diff/estimators.py b/diff_diff/estimators.py
index 8c71dd76..bb9aa180 100644
--- a/diff_diff/estimators.py
+++ b/diff_diff/estimators.py
@@ -32,6 +32,7 @@
     demean_by_group,
     safe_inference,
     validate_binary,
+    validate_covariate_names,
     wild_bootstrap_se,
 )
 
@@ -255,6 +256,11 @@ def fit(
             If provided, overrides outcome, treatment, and time parameters.
         covariates : list, optional
             List of covariate column names to include as linear controls.
+            Names must not collide with reserved structural terms (``const``,
+            the treatment/time column names, the ``{treatment}:{time}``
+            interaction, fixed-effect dummy names, or internal working columns)
+            and must be unique; a collision or duplicate raises ``ValueError``
+            (it would otherwise silently overwrite a structural coefficient).
         fixed_effects : list, optional
             List of categorical column names to include as fixed effects.
             Creates dummy variables for each category (drops first level).
@@ -286,7 +292,9 @@ def fit(
         Raises
         ------
         ValueError
-            If required parameters are missing or data validation fails.
+            If required parameters are missing or data validation fails, or if
+            a covariate name collides with a reserved structural term name or
+            duplicates another covariate.
 
         Examples
         --------
@@ -465,6 +473,22 @@ def fit(
         else:
             dt = d * t
 
+        # Reject covariate names that collide with reserved structural terms.
+        # Covariate names are appended verbatim to var_names below and zipped
+        # into coef_dict, so a covariate named like a structural term would
+        # silently overwrite that coefficient (dict last-write-wins). The
+        # reserved set covers the intercept, treatment/time indicators, the
+        # interaction, the internal _treat_time working column, and any
+        # fixed-effect dummy names (derived from the SAME get_dummies call used
+        # to build them below, so the names match exactly).
+        _reserved = {"const", treatment, time, f"{treatment}:{time}", "_treat_time"}
+        if fixed_effects:
+            for fe in fixed_effects:
+                _reserved.update(
+                    pd.get_dummies(working_data[fe], prefix=fe, drop_first=True).columns
+                )
+        validate_covariate_names(covariates, _reserved, estimator="DifferenceInDifferences")
+
         # Build design matrix
         X = np.column_stack([np.ones(len(y)), d, t, dt])
         var_names = ["const", treatment, time, f"{treatment}:{time}"]
@@ -1244,6 +1268,12 @@ def fit(  # type: ignore[override]
             All other periods are treated as pre-treatment.
         covariates : list, optional
             List of covariate column names to include as linear controls.
+            Names must not collide with reserved structural terms (``const``,
+            the treatment column name, ``period_{p}`` dummies, the
+            ``{treatment}:period_{p}`` interactions, fixed-effect dummy names, or
+            internal working columns) and must be unique; a collision or
+            duplicate raises ``ValueError`` (it would otherwise silently
+            overwrite a structural coefficient).
         fixed_effects : list, optional
             List of categorical column names to include as fixed effects.
         absorb : list, optional
@@ -1271,7 +1301,9 @@ def fit(  # type: ignore[override]
         Raises
         ------
         ValueError
-            If required parameters are missing or data validation fails.
+            If required parameters are missing or data validation fails, or if
+            a covariate name collides with a reserved structural term name or
+            duplicates another covariate.
         """
         # Fall back to analytical inference if wild bootstrap requested
         # (must happen before _resolve_survey_for_fit which rejects bootstrap+survey).
@@ -1567,6 +1599,27 @@ def fit(  # type: ignore[override]
             d = working_data[treatment].values.astype(float)
         t = working_data[time].values
 
+        # Reject covariate names that collide with reserved structural terms.
+        # Covariates are appended verbatim to var_names below and zipped into
+        # coef_dict, so a covariate named like a structural term (intercept,
+        # treatment, a period dummy, a treatment-period interaction, an internal
+        # _did_* working column, or a fixed-effect dummy) would silently
+        # overwrite that coefficient (dict last-write-wins). FE dummy names use
+        # the SAME get_dummies call (and fe==time skip) as the construction below.
+        _reserved = {"const", treatment, "_did_treatment"}
+        _reserved.update(f"period_{p}" for p in non_ref_periods)
+        _reserved.update(f"{treatment}:period_{p}" for p in non_ref_periods)
+        _reserved.update(f"_did_period_{p}" for p in non_ref_periods)
+        _reserved.update(f"_did_interact_{p}" for p in non_ref_periods)
+        if fixed_effects:
+            for fe in fixed_effects:
+                if fe == time:
+                    continue
+                _reserved.update(
+                    pd.get_dummies(working_data[fe], prefix=fe, drop_first=True).columns
+                )
+        validate_covariate_names(covariates, _reserved, estimator="MultiPeriodDiD")
+
         # Build design matrix
         # Start with intercept and treatment main effect
         X = np.column_stack([np.ones(len(y)), d])
diff --git a/diff_diff/twfe.py b/diff_diff/twfe.py
index 72948cb9..e090ce85 100644
--- a/diff_diff/twfe.py
+++ b/diff_diff/twfe.py
@@ -14,6 +14,9 @@
 from diff_diff.estimators import DifferenceInDifferences
 from diff_diff.linalg import LinearRegression
 from diff_diff.results import DiDResults
+from diff_diff.utils import (
+    validate_covariate_names,
+)
 from diff_diff.utils import (
     within_transform as _within_transform_util,
 )
@@ -135,7 +138,12 @@ def fit(  # type: ignore[override]
         unit : str
             Name of unit identifier column.
         covariates : list, optional
-            List of covariate column names.
+            List of covariate column names. Names must not collide with reserved
+            structural terms (``const``, ``ATT``, unit/time fixed-effect dummy
+            names, or the internal ``_treatment_post`` column) and must be
+            unique; a collision or duplicate raises ``ValueError`` (it would
+            otherwise silently overwrite a structural coefficient on the
+            full-dummy HC2/HC2-BM path).
         survey_design : SurveyDesign, optional
             Survey design specification for design-based inference. When provided,
             uses Taylor Series Linearization for variance estimation and
@@ -145,6 +153,12 @@ def fit(  # type: ignore[override]
         -------
         DiDResults
             Estimation results.
+
+        Raises
+        ------
+        ValueError
+            If a covariate name collides with a reserved structural term name
+            or duplicates another covariate.
         """
         # Validate unit column exists
         if unit not in data.columns:
@@ -282,6 +296,20 @@ def fit(  # type: ignore[override]
         n_units = data[unit].nunique()
         n_times = data[time].nunique()
 
+        # Reject covariate names that collide with reserved structural terms.
+        # On the full-dummy (HC2/HC2-BM) path covariates are zipped into the
+        # coefficient dict alongside "const"/"ATT" and the unit/time dummies, so
+        # a colliding covariate would silently overwrite that coefficient (dict
+        # last-write-wins). The within-transform path does not expose covariates
+        # in the dict, but the covariate is still in X and a covariate named
+        # "_treatment_post" would clobber the internal interaction column; we
+        # therefore guard ALL paths. Unit/time dummy names use the SAME
+        # get_dummies call as the full-dummy construction below.
+        _reserved = {"const", "ATT", "_treatment_post"}
+        _reserved.update(pd.get_dummies(data[unit], prefix=f"_fe_{unit}", drop_first=True).columns)
+        _reserved.update(pd.get_dummies(data[time], prefix=f"_fe_{time}", drop_first=True).columns)
+        validate_covariate_names(covariates, _reserved, estimator="TwoWayFixedEffects")
+
         if use_full_dummy:
             # HC2 / HC2-BM full-dummy build: bypass the within-transform
             # and stack [intercept, treated×post, covariates, unit_dummies,
diff --git a/diff_diff/utils.py b/diff_diff/utils.py
index 41f2f574..817c8e5d 100644
--- a/diff_diff/utils.py
+++ b/diff_diff/utils.py
@@ -4,7 +4,7 @@
 
 import warnings
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, Iterable, List, Optional, Tuple
 
 import numpy as np
 import pandas as pd
@@ -67,6 +67,73 @@ def validate_binary(arr: np.ndarray, name: str) -> None:
         raise ValueError(f"{name} must be binary (0 or 1). " f"Found values: {unique_values}")
 
 
+def validate_covariate_names(
+    covariates: Optional[List[str]],
+    reserved_names: Iterable[str],
+    *,
+    estimator: str = "estimator",
+) -> None:
+    """
+    Validate that covariate column names do not collide with reserved
+    structural term names (and are not duplicated within ``covariates``).
+
+    Fitted coefficients are stored in a ``name -> value`` dict built by zipping
+    a variable-name list -- structural term names PLUS the user covariate column
+    names appended verbatim -- with the coefficient vector. A covariate whose
+    name equals a reserved structural name (the intercept ``const``, the
+    treatment/time indicators, the interaction term, period dummies,
+    fixed-effect dummies, or an internal working column) would silently
+    overwrite the structural coefficient (Python dict last-write-wins),
+    corrupting the result with no error. Duplicate names within ``covariates``
+    collapse to a single dict entry the same way.
+
+    The comparison is case-sensitive: column names and dict keys are
+    case-sensitive, so e.g. ``Const`` does not actually collide with ``const``
+    and is allowed.
+
+    Parameters
+    ----------
+    covariates : list of str or None
+        User-supplied covariate column names. ``None`` or empty is a no-op.
+    reserved_names : iterable of str
+        Reserved structural term names this estimator builds (estimator-specific).
+    estimator : str
+        Estimator name, used in the error message.
+
+    Raises
+    ------
+    ValueError
+        If a covariate name collides with a reserved structural name, or if
+        ``covariates`` contains duplicate names.
+    """
+    if not covariates:
+        return
+    reserved = set(reserved_names)
+    collisions = sorted({c for c in covariates if c in reserved})
+    if collisions:
+        raise ValueError(
+            f"{estimator}: covariate name(s) {collisions} collide with reserved "
+            f"structural term name(s). These names are used internally for the "
+            f"intercept, the treatment/time indicators, the interaction term, "
+            f"period dummies, fixed-effect dummies, or internal working columns, "
+            f"and a colliding covariate would silently overwrite the structural "
+            f"coefficient. Rename the covariate column(s). Reserved names for "
+            f"this fit: {sorted(reserved)}."
+        )
+    seen: set = set()
+    duplicates = []
+    for c in covariates:
+        if c in seen:
+            duplicates.append(c)
+        seen.add(c)
+    if duplicates:
+        raise ValueError(
+            f"{estimator}: duplicate covariate name(s) {sorted(set(duplicates))} "
+            f"in `covariates`. Each covariate maps to one coefficient; duplicates "
+            f"collapse to a single entry. Remove the duplicate(s)."
+        )
+
+
 def warn_if_not_converged(
     converged: bool,
     method_name: str,
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
index cb838ec6..5bf5cc1e 100644
--- a/docs/methodology/REGISTRY.md
+++ b/docs/methodology/REGISTRY.md
@@ -78,6 +78,7 @@ where τ is the ATT.
   - Tolerance: `1e-07` (matches R's `qr()` default), relative to largest diagonal element of R in QR decomposition
   - Controllable via `rank_deficient_action` parameter: "warn" (default), "error", or "silent"
   - **Note (scale invariance):** the shared `diff_diff/linalg.py` rank check re-detects on column-equilibrated (unit 2-norm) columns when the raw pass reports a deficiency (adopting the higher equilibrated rank and its scale-corrected pivot selection only when a large-scale column inflated the threshold; genuine collinearity keeps the raw selection), and the least-squares solve equilibrates columns then unscales the coefficients — so rank detection and the fit are invariant to per-column scaling; for a well-scaled collinear design the dropped column is unchanged, while a scale-induced under-count adopts the scale-corrected equilibrated selection (which may differ from the raw choice but is guaranteed to retain an identified subset). This covers every covariate fit routed through `solve_ols` — DiD, TwoWayFixedEffects, MultiPeriodDiD, ImputationDiD, TwoStageDiD, and TripleDifference. CallawaySantAnna and StaggeredTripleDifference fit their covariate outcome-regression nuisance via estimator-local `cho_solve`(X'X) / `lstsq` that are not yet equilibrated — see the CallawaySantAnna rank-deficiency Note's scope caveat.
+- **Note (covariate-name collision guard):** `DifferenceInDifferences`, `MultiPeriodDiD`, and `TwoWayFixedEffects` build their `coefficients` dict by zipping a variable-name list (structural terms + user covariate column names, appended verbatim) with the coefficient vector. A covariate whose name equals a reserved structural name — `const`, the treatment/time column names, the `{treatment}:{time}` interaction (DiD), the `period_{p}` dummies / `{treatment}:period_{p}` interactions (MultiPeriodDiD), `ATT` (TwoWayFixedEffects), any fixed-effect / unit / time dummy name, or an internal `_`-prefixed working column (`_treat_time`, `_did_treatment`, `_treatment_post`) — would silently overwrite that structural coefficient (dict last-write-wins) with no error. `fit()` now calls `utils.validate_covariate_names()` before building the design, raising `ValueError` on such a collision (case-sensitive, so `Const` is allowed) and on duplicate covariate names. Reserved fixed-effect/unit/time dummy names are taken from the same `pd.get_dummies(..., drop_first=True)` call used to build them (exact match, including for `Categorical` columns). The TwoWayFixedEffects guard fires on both variance paths — the within-transform path exposes only `{"ATT": att}`, but a `_treatment_post`-named covariate would still clobber the internal interaction column. The influence-function estimators (CallawaySantAnna, etc.) key results by `(g, t)` and are unaffected.
 
 **Reference implementation(s):**
 - R: `fixest::feols()` with interaction term
@@ -207,6 +208,7 @@ where V is the VCV sub-matrix for post-treatment δ_e coefficients.
 - Rank-deficient design matrix (collinearity): warns and sets NA for dropped
   coefficients (R-style, matches `lm()`)
   - **Note (scale invariance):** shared `diff_diff/linalg.py` behavior — rank detection re-checks on column-equilibrated columns and the solve equilibrates/unscales, so detection and fit are invariant to per-column scaling. For a well-scaled collinear design the dropped column is unchanged; a scale-induced under-count adopts the scale-corrected equilibrated selection (which may differ from the raw choice but retains an identified subset). See the CallawaySantAnna rank-deficiency Note.
+  - **Note (covariate-name collision guard):** a covariate named like a reserved structural term (`const`, the treatment column, a `period_{p}` dummy, a `{treatment}:period_{p}` interaction, a fixed-effect dummy, or an internal `_did_*` column) — or a duplicate covariate name — raises `ValueError` (would otherwise silently overwrite a structural coefficient in the coef dict). See the DifferenceInDifferences "covariate-name collision guard" Note.
 - Average ATT (`avg_att`) is NA if any post-period effect is unidentified
   (R-style NA propagation)
 - NaN inference for undefined statistics:
@@ -279,6 +281,7 @@ This matches the behavior of R's `fixest::feols()` with absorbed FE.
 - Covariate collinearity emits warning but estimation continues (ATT still identified)
 - Rank-deficient design matrix: warns and sets NA for dropped coefficients (R-style, matches `lm()`)
   - **Note (scale invariance):** shared `diff_diff/linalg.py` behavior — rank detection re-checks on column-equilibrated columns and the solve equilibrates/unscales, so detection and fit are invariant to per-column scaling. For a well-scaled collinear design the dropped column is unchanged; a scale-induced under-count adopts the scale-corrected equilibrated selection (which may differ from the raw choice but retains an identified subset). See the CallawaySantAnna rank-deficiency Note.
+  - **Note (covariate-name collision guard):** a covariate named `const`, `ATT`, `_treatment_post`, or a unit/time fixed-effect dummy name — or a duplicate covariate name — raises `ValueError` on both variance paths (would otherwise silently overwrite a structural coefficient on the full-dummy HC2/HC2-BM path). See the DifferenceInDifferences "covariate-name collision guard" Note.
 - Unbalanced panels handled via proper demeaning
 - Multi-period `time` parameter: only binary (0/1) post indicator is recommended; multi-period values
   produce `treated × period_number` rather than `treated × post_indicator`. A `UserWarning` is
diff --git a/tests/test_estimators.py b/tests/test_estimators.py
index 77382274..e4880b25 100644
--- a/tests/test_estimators.py
+++ b/tests/test_estimators.py
@@ -3555,3 +3555,156 @@ def test_twfe_with_absorbed_covariate(self):
 
         assert np.isfinite(results.att)
         assert results.se > 0
+
+
+class TestCovariateNameCollision:
+    """PR3: a covariate whose name equals a reserved structural term silently
+    overwrote that coefficient in the zip()-built coef_dict (dict last-write-wins).
+    The DiD-family estimators now raise ValueError; non-colliding covariates leave
+    the structural coefficients intact. TWFE collision tests live in
+    tests/test_estimators_vcov_type.py (next to the full-dummy invariant)."""
+
+    @staticmethod
+    def _did_data():
+        rng = np.random.default_rng(7)
+        rows = []
+        for unit in range(60):
+            treated = int(unit < 30)
+            for post in (0, 1):
+                y = 1.0 + 2.0 * treated * post + rng.normal()
+                rows.append({"unit": unit, "treated": treated, "post": post, "outcome": y})
+        df = pd.DataFrame(rows)
+        df["x1"] = np.random.default_rng(11).normal(size=len(df))
+        return df
+
+    @staticmethod
+    def _mpd_data():
+        rng = np.random.default_rng(7)
+        rows = []
+        for unit in range(60):
+            treated = int(unit < 30)
+            ueff = rng.normal()
+            for period in range(6):
+                y = (
+                    10.0 + ueff + 0.5 * period
+                    + (3.0 if (treated and period >= 3) else 0.0)
+                    + rng.normal(0, 0.5)
+                )
+                rows.append(
+                    {"unit": unit, "treated": treated, "time": period, "outcome": y}
+                )
+        df = pd.DataFrame(rows)
+        df["x1"] = np.random.default_rng(11).normal(size=len(df))
+        return df
+
+    def test_did_collision_for_each_structural_name(self):
+        # Drive the reserved set from a clean fit's actual coefficient keys so
+        # the test cannot drift from the real var_names (const/treated/post/
+        # treated:post), plus the internal _treat_time working column.
+        data = self._did_data()
+        clean = DifferenceInDifferences().fit(
+            data, "outcome", "treated", "post", covariates=["x1"]
+        )
+        reserved = [k for k in clean.coefficients if k != "x1"] + ["_treat_time"]
+        assert "const" in reserved and "treated:post" in reserved  # sanity
+        for name in reserved:
+            d2 = data.copy()
+            if name not in d2.columns:
+                d2[name] = np.random.default_rng(5).normal(size=len(d2))
+            with pytest.raises(ValueError, match="collide"):
+                DifferenceInDifferences().fit(
+                    d2, "outcome", "treated", "post", covariates=[name]
+                )
+
+    def test_did_fixed_effects_dummy_collision(self):
+        # get_dummies(region, prefix="region", drop_first=True) drops "region_A"
+        # and keeps "region_B" -> a covariate named "region_B" collides.
+        data = self._did_data()
+        data["region"] = np.where(data["unit"] % 2 == 0, "A", "B")
+        data["region_B"] = np.random.default_rng(2).normal(size=len(data))
+        with pytest.raises(ValueError, match="collide"):
+            DifferenceInDifferences().fit(
+                data, "outcome", "treated", "post",
+                covariates=["region_B"], fixed_effects=["region"],
+            )
+
+    def test_did_noncolliding_preserves_structural_coefs(self):
+        data = self._did_data()
+        r = DifferenceInDifferences().fit(
+            data, "outcome", "treated", "post", covariates=["x1"]
+        )
+        ck = r.coefficients
+        assert np.isfinite(ck["const"]) and np.isfinite(ck["treated:post"])
+        assert "x1" in ck and ck["x1"] != ck["treated:post"]
+        # No key was overwritten: dict size matches the vcov rank.
+        assert len(ck) == r.vcov.shape[0]
+
+    def test_did_formula_colliding_raises(self):
+        data = self._did_data()
+        data["const"] = np.random.default_rng(3).normal(size=len(data))
+        with pytest.raises(ValueError, match="collide"):
+            DifferenceInDifferences().fit(data, formula="outcome ~ treated*post + const")
+
+    def test_did_formula_noncolliding_works(self):
+        data = self._did_data()
+        r = DifferenceInDifferences().fit(data, formula="outcome ~ treated*post + x1")
+        assert "x1" in r.coefficients
+
+    def test_did_duplicate_covariates_raise(self):
+        data = self._did_data()
+        with pytest.raises(ValueError, match="duplicate"):
+            DifferenceInDifferences().fit(
+                data, "outcome", "treated", "post", covariates=["x1", "x1"]
+            )
+
+    def test_mpd_collision_for_each_structural_name(self):
+        data = self._mpd_data()
+        clean = MultiPeriodDiD().fit(
+            data, "outcome", "treated", "time", covariates=["x1"]
+        )
+        # const/treated/period_*/treated:period_* (actual keys) + internal column.
+        reserved = [k for k in clean.coefficients if k != "x1"] + ["_did_treatment"]
+        assert any(k.startswith("period_") for k in reserved)  # sanity
+        for name in reserved:
+            d2 = data.copy()
+            if name not in d2.columns:
+                d2[name] = np.random.default_rng(5).normal(size=len(d2))
+            with pytest.raises(ValueError, match="collide"):
+                MultiPeriodDiD().fit(
+                    d2, "outcome", "treated", "time", covariates=[name]
+                )
+
+    def test_mpd_noncolliding_preserves_structural_coefs(self):
+        data = self._mpd_data()
+        r = MultiPeriodDiD().fit(
+            data, "outcome", "treated", "time", covariates=["x1"]
+        )
+        ck = r.coefficients
+        assert "x1" in ck and np.isfinite(ck["const"])
+        assert any(k.startswith("period_") for k in ck)
+        assert len(ck) == r.vcov.shape[0]
+
+    def test_synthetic_did_unaffected_by_guard(self):
+        # SyntheticDiD overrides fit() and never reaches the base-class guard;
+        # a covariate must still fit (regression lock that the guard didn't leak).
+        rng = np.random.default_rng(42)
+        rows = []
+        for unit in range(30):
+            treated = int(unit < 5)
+            ueff = rng.normal(0, 3)
+            for period in range(8):
+                y = (
+                    10.0 + ueff + 0.5 * period
+                    + (5.0 if (treated and period >= 4) else 0.0)
+                    + rng.normal(0, 0.5)
+                )
+                rows.append(
+                    {"unit": unit, "period": period, "treated": treated, "outcome": y}
+                )
+        d = pd.DataFrame(rows)
+        d["x1"] = rng.normal(size=len(d))
+        r = SyntheticDiD().fit(
+            d, "outcome", "treated", unit="unit", time="period",
+            post_periods=[4, 5, 6, 7], covariates=["x1"],
+        )
+        assert np.isfinite(r.att)
diff --git a/tests/test_estimators_vcov_type.py b/tests/test_estimators_vcov_type.py
index b25a577d..ea5926bf 100644
--- a/tests/test_estimators_vcov_type.py
+++ b/tests/test_estimators_vcov_type.py
@@ -2151,3 +2151,57 @@ def test_absorb_hc2_bm_replicate_weights_auto_routes(self):
         np.testing.assert_allclose(pe_a.se, pe_f.se, atol=1e-12)
         np.testing.assert_allclose(res_absorb.avg_att, res_fe.avg_att, atol=1e-12)
         np.testing.assert_allclose(res_absorb.avg_se, res_fe.avg_se, atol=1e-12)
+
+
+class TestTWFECovariateNameCollision:
+    """PR3: TwoWayFixedEffects covariate-name collision guard.
+
+    On the full-dummy HC2/HC2-BM path covariates are zipped into the coefficient
+    dict alongside "const"/"ATT" and the unit/time dummies, so a colliding
+    covariate would silently overwrite that coefficient. The within-transform
+    (default HC1) path exposes only {"ATT": att}, but the covariate is still in
+    X and a covariate named "_treatment_post" would clobber the internal
+    interaction column, so the guard fires on ALL paths. Lives here next to the
+    full-dummy ``len(coefficients) == vcov.shape[0]`` invariant.
+    """
+
+    @staticmethod
+    def _panel_with(covname: str) -> pd.DataFrame:
+        df = _make_did_panel(n_units=30, seed=20260420)
+        # unit 0..29 / time {0,1}: get_dummies(drop_first=True) keeps
+        # "_fe_unit_1".."_fe_unit_29" and "_fe_time_1".
+        df[covname] = np.random.default_rng(99).normal(size=len(df))
+        return df
+
+    @pytest.mark.parametrize("vcov_type", ["hc1", "hc2"])
+    @pytest.mark.parametrize(
+        "name", ["const", "ATT", "_treatment_post", "_fe_unit_1", "_fe_time_1"]
+    )
+    def test_collision_raises_on_all_paths(self, vcov_type, name):
+        df = self._panel_with(name)
+        with pytest.raises(ValueError, match="collide"):
+            TwoWayFixedEffects(vcov_type=vcov_type).fit(
+                df, outcome="y", treatment="treated", time="time", unit="unit",
+                covariates=[name],
+            )
+
+    def test_hc2_full_dummy_noncolliding_preserves_coefs(self):
+        df = self._panel_with("x1")
+        r = TwoWayFixedEffects(vcov_type="hc2").fit(
+            df, outcome="y", treatment="treated", time="time", unit="unit",
+            covariates=["x1"],
+        )
+        ck = r.coefficients
+        assert "ATT" in ck and "x1" in ck
+        # No key overwritten: dict size matches the full-dummy vcov rank.
+        assert len(ck) == r.vcov.shape[0]
+
+    def test_within_transform_noncolliding_returns_att_only(self):
+        df = self._panel_with("x1")
+        r = TwoWayFixedEffects().fit(  # default hc1 -> within-transform
+            df, outcome="y", treatment="treated", time="time", unit="unit",
+            covariates=["x1"],
+        )
+        # The within-transform path exposes only the ATT coefficient by design;
+        # the covariate is NOT a dict key there (so there is no overwrite surface).
+        assert set(r.coefficients.keys()) == {"ATT"}
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 9d605955..c30fb3f0 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -29,6 +29,7 @@
     equivalence_test_trends,
     safe_inference,
     validate_binary,
+    validate_covariate_names,
 )
 
 # =============================================================================
@@ -1302,3 +1303,42 @@ def test_multiple_post_periods(self):
         # Control DiD: 14 - 10 = 4
         # tau = 9 - 4 = 5
         assert abs(tau - 5.0) < 1e-6
+
+
+class TestValidateCovariateNames:
+    """Tests for validate_covariate_names (covariate/reserved-term collision guard)."""
+
+    def test_none_is_noop(self):
+        validate_covariate_names(None, {"const", "treated"})
+
+    def test_empty_is_noop(self):
+        validate_covariate_names([], {"const", "treated"})
+
+    def test_non_colliding_passes(self):
+        # Should not raise.
+        validate_covariate_names(["x1", "x2"], {"const", "treated", "treated:post"})
+
+    def test_collision_raises(self):
+        with pytest.raises(ValueError, match="collide"):
+            validate_covariate_names(["const"], {"const", "treated"})
+
+    def test_collision_lists_offender_and_estimator(self):
+        with pytest.raises(ValueError, match="MyEstimator") as exc:
+            validate_covariate_names(
+                ["age", "const"], {"const", "treated"}, estimator="MyEstimator"
+            )
+        # The offending name is reported (not the innocent one as a collision).
+        assert "const" in str(exc.value)
+
+    def test_case_sensitive_does_not_collide(self):
+        # Column names / dict keys are case-sensitive; "Const" != "const".
+        validate_covariate_names(["Const"], {"const", "treated"})
+
+    def test_duplicate_covariates_raise(self):
+        with pytest.raises(ValueError, match="duplicate"):
+            validate_covariate_names(["x1", "x1"], {"const"})
+
+    def test_duplicate_takes_precedence_only_when_no_collision(self):
+        # A name that both collides AND is duplicated reports the collision first.
+        with pytest.raises(ValueError, match="collide"):
+            validate_covariate_names(["const", "const"], {"const"})

From f6344d448722ed37ae34ec60ee3004eb4af8019f Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Mon, 1 Jun 2026 09:31:48 -0400
Subject: [PATCH 2/4] Add validate_design_term_names backstop for fixed-effect
 dummy collisions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The upfront validate_covariate_names guard only validates the user `covariates`
list. Fixed-effect dummy names (`{fe}_{value}`) are appended to var_names AFTER
that guard and were not themselves checked against structural names — so
MultiPeriodDiD(fixed_effects=["period"]) (a non-time FE whose dummies are named
period_1, period_2, ...) could still collide with the structural period_{p}
event-study keys and silently overwrite them in the coefficients dict, the exact
bug class this PR addresses (caught in local review).

Add a shared validate_design_term_names(var_names) backstop in utils.py that
enforces uniqueness on the FINAL assembled term list (structural + covariates +
fixed-effect dummies) right before the coefficients dict is built, in all three
estimators. This catches data-dependent collisions (FE-dummy vs structural,
FE-dummy vs FE-dummy) that cannot be known up front. The upfront covariate guard
is kept for its precise, fail-fast messaging on the common case.

Tests: tests/test_utils.py::TestValidateDesignTermNames + an MPD fixed-effect-
dummy-vs-period_{p} regression in TestCovariateNameCollision. Full estimator +
TWFE-methodology suites pass with the backstop (no false positives on existing
fixed-effect fits).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 diff_diff/estimators.py  |  6 +++++
 diff_diff/twfe.py        |  5 +++++
 diff_diff/utils.py       | 48 ++++++++++++++++++++++++++++++++++++++++
 tests/test_estimators.py | 13 +++++++++++
 tests/test_utils.py      | 23 +++++++++++++++++++
 5 files changed, 95 insertions(+)

diff --git a/diff_diff/estimators.py b/diff_diff/estimators.py
index bb9aa180..3c0c35dd 100644
--- a/diff_diff/estimators.py
+++ b/diff_diff/estimators.py
@@ -33,6 +33,7 @@
     safe_inference,
     validate_binary,
     validate_covariate_names,
+    validate_design_term_names,
     wild_bootstrap_se,
 )
 
@@ -685,6 +686,7 @@ def _refit_did_absorb(w_r):
         n_control = n_control_raw
 
         # Create coefficient dictionary
+        validate_design_term_names(var_names, estimator="DifferenceInDifferences")
         coef_dict = {name: coef for name, coef in zip(var_names, coefficients)}
 
         # Determine inference method and bootstrap info
@@ -2036,6 +2038,10 @@ def _refit_mp_absorb(w_r):
         n_treated = n_treated_raw
         n_control = n_control_raw
 
+        # Backstop: reject any duplicate in the FINAL term list (e.g. a
+        # fixed-effect dummy colliding with a structural `period_{p}` key)
+        # before it silently overwrites a coefficient in the dict below.
+        validate_design_term_names(var_names, estimator="MultiPeriodDiD")
         # Create coefficient dictionary
         coef_dict = {name: coef for name, coef in zip(var_names, coefficients)}
 
diff --git a/diff_diff/twfe.py b/diff_diff/twfe.py
index e090ce85..9a6597a4 100644
--- a/diff_diff/twfe.py
+++ b/diff_diff/twfe.py
@@ -16,6 +16,7 @@
 from diff_diff.results import DiDResults
 from diff_diff.utils import (
     validate_covariate_names,
+    validate_design_term_names,
 )
 from diff_diff.utils import (
     within_transform as _within_transform_util,
@@ -367,6 +368,10 @@ def fit(  # type: ignore[override]
                 + list(unit_dummies_df.columns)
                 + list(time_dummies_df.columns)
             )
+            # Backstop: reject any duplicate in the FINAL term list (e.g. a
+            # unit/time dummy colliding with a structural term or another dummy)
+            # before it silently overwrites a coefficient in the dict below.
+            validate_design_term_names(_twfe_var_names, estimator="TwoWayFixedEffects")
         else:
             # Default within-transform path (HC1 / classical / Conley):
             # demean outcome, covariates, AND interaction in a single pass
diff --git a/diff_diff/utils.py b/diff_diff/utils.py
index 817c8e5d..90e5785e 100644
--- a/diff_diff/utils.py
+++ b/diff_diff/utils.py
@@ -134,6 +134,54 @@ def validate_covariate_names(
         )
 
 
+def validate_design_term_names(
+    var_names: Iterable[str],
+    *,
+    estimator: str = "estimator",
+) -> None:
+    """
+    Raise if the assembled design term-name list contains duplicates.
+
+    Backstop for :func:`validate_covariate_names`: even after the user
+    covariates are cleared, a fixed-effect dummy name (``{fe}_{value}``) can
+    still collide with a structural term — most notably a ``MultiPeriodDiD``
+    ``period_{p}`` event-study key when a non-time fixed effect produces matching
+    dummy names — or with another dummy. Such a duplicate would silently
+    overwrite a coefficient when ``var_names`` is zipped into the result's
+    ``coefficients`` dict (Python dict last-write-wins). This checks the FINAL
+    name list (structural terms + covariates + fixed-effect dummies) right
+    before the dict is built, catching collisions that depend on the data and so
+    cannot be known up front.
+
+    Parameters
+    ----------
+    var_names : iterable of str
+        The fully assembled design-matrix column-name list.
+    estimator : str
+        Estimator name, used in the error message.
+
+    Raises
+    ------
+    ValueError
+        If any name appears more than once.
+    """
+    seen: set = set()
+    duplicates = []
+    for name in var_names:
+        if name in seen:
+            duplicates.append(name)
+        seen.add(name)
+    if duplicates:
+        raise ValueError(
+            f"{estimator}: the fitted design has duplicate term name(s) "
+            f"{sorted(set(duplicates))} — a covariate or fixed-effect dummy name "
+            f"collides with a structural term (intercept, treatment/time "
+            f"indicators, the interaction, or period dummies) or with another "
+            f"column. This would silently overwrite a coefficient in the result. "
+            f"Rename the offending fixed-effect category or covariate column."
+        )
+
+
 def warn_if_not_converged(
     converged: bool,
     method_name: str,
diff --git a/tests/test_estimators.py b/tests/test_estimators.py
index e4880b25..7de586d4 100644
--- a/tests/test_estimators.py
+++ b/tests/test_estimators.py
@@ -3684,6 +3684,19 @@ def test_mpd_noncolliding_preserves_structural_coefs(self):
         assert any(k.startswith("period_") for k in ck)
         assert len(ck) == r.vcov.shape[0]
 
+    def test_mpd_fixed_effect_dummy_collides_with_period_keys(self):
+        # Backstop: a NON-time fixed effect whose get_dummies names match the
+        # structural period_{p} event-study keys must raise (would otherwise
+        # silently overwrite those coefficients) — the FE dummy is appended to
+        # var_names directly, so the upfront covariate guard cannot catch it.
+        data = self._mpd_data()
+        data["period"] = data["time"]  # FE column 'period' -> 'period_1'... dummies
+        with pytest.raises(ValueError, match="collide"):
+            MultiPeriodDiD().fit(
+                data, "outcome", "treated", "time",
+                covariates=["x1"], fixed_effects=["period"],
+            )
+
     def test_synthetic_did_unaffected_by_guard(self):
         # SyntheticDiD overrides fit() and never reaches the base-class guard;
         # a covariate must still fit (regression lock that the guard didn't leak).
diff --git a/tests/test_utils.py b/tests/test_utils.py
index c30fb3f0..f1288674 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -30,6 +30,7 @@
     safe_inference,
     validate_binary,
     validate_covariate_names,
+    validate_design_term_names,
 )
 
 # =============================================================================
@@ -1342,3 +1343,25 @@ def test_duplicate_takes_precedence_only_when_no_collision(self):
         # A name that both collides AND is duplicated reports the collision first.
         with pytest.raises(ValueError, match="collide"):
             validate_covariate_names(["const", "const"], {"const"})
+
+
+class TestValidateDesignTermNames:
+    """Tests for validate_design_term_names (final var_names uniqueness backstop)."""
+
+    def test_unique_passes(self):
+        validate_design_term_names(["const", "treated", "post", "treated:post", "x1"])
+
+    def test_duplicate_raises(self):
+        # e.g. a fixed-effect dummy "period_2" colliding with a structural key.
+        with pytest.raises(ValueError, match="collide"):
+            validate_design_term_names(
+                ["const", "treated", "period_2", "period_2"], estimator="MultiPeriodDiD"
+            )
+
+    def test_message_names_duplicate_and_estimator(self):
+        with pytest.raises(ValueError, match="MultiPeriodDiD") as exc:
+            validate_design_term_names(["a", "a"], estimator="MultiPeriodDiD")
+        assert "a" in str(exc.value)
+
+    def test_empty_passes(self):
+        validate_design_term_names([])

From ef4ea276781a295277026b9c3af6c05499db35e7 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Mon, 1 Jun 2026 09:44:52 -0400
Subject: [PATCH 3/4] Derive reserved FE-dummy names without materializing the
 dummy matrix (perf)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The collision guard reserved fixed-effect/unit/time dummy names via
pd.get_dummies(col, ...).columns, which materializes the full (n x G) dummy
matrix. In TwoWayFixedEffects this ran UNCONDITIONALLY — including the default
within-transform (hc1/classical/conley) path whose entire purpose is to avoid
expanding high-cardinality FE dummies — so a large-G panel paid an avoidable
O(n x G) allocation (latency / potential OOM) before estimation (caught in
local review).

Add utils.fe_dummy_names(col, prefix): derives the same names get_dummies would
produce, from the category levels only (O(G)), via pd.Categorical(col).categories
(sorted unique for a plain column, declared order for a Categorical) + drop_first.
Verified to match pd.get_dummies(drop_first=True).columns exactly across int /
str / float / NaN / non-default-order Categorical dtypes. Wire it into the DiD,
MPD, and TWFE reserved-name construction in place of get_dummies. The within-
transform TWFE path now performs no FE-dummy materialization at all.

Tests: tests/test_utils.py::TestFeDummyNames (equivalence to get_dummies,
incl. Categorical order) + a TwoWayFixedEffects within-path regression that
monkeypatches pd.get_dummies to raise and asserts the hc1 fit still succeeds.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 diff_diff/estimators.py            | 20 ++++++++---------
 diff_diff/twfe.py                  | 13 +++++++----
 diff_diff/utils.py                 | 35 ++++++++++++++++++++++++++++++
 tests/test_estimators_vcov_type.py | 20 +++++++++++++++++
 tests/test_utils.py                | 21 ++++++++++++++++++
 5 files changed, 95 insertions(+), 14 deletions(-)

diff --git a/diff_diff/estimators.py b/diff_diff/estimators.py
index 3c0c35dd..9545f316 100644
--- a/diff_diff/estimators.py
+++ b/diff_diff/estimators.py
@@ -30,6 +30,7 @@
 from diff_diff.utils import (
     WildBootstrapResults,
     demean_by_group,
+    fe_dummy_names,
     safe_inference,
     validate_binary,
     validate_covariate_names,
@@ -480,14 +481,13 @@ def fit(
         # silently overwrite that coefficient (dict last-write-wins). The
         # reserved set covers the intercept, treatment/time indicators, the
         # interaction, the internal _treat_time working column, and any
-        # fixed-effect dummy names (derived from the SAME get_dummies call used
-        # to build them below, so the names match exactly).
+        # fixed-effect dummy names (derived via fe_dummy_names WITHOUT
+        # materializing the dummy matrix; names match the get_dummies build
+        # below exactly). validate_design_term_names re-checks the FINAL list.
         _reserved = {"const", treatment, time, f"{treatment}:{time}", "_treat_time"}
         if fixed_effects:
             for fe in fixed_effects:
-                _reserved.update(
-                    pd.get_dummies(working_data[fe], prefix=fe, drop_first=True).columns
-                )
+                _reserved.update(fe_dummy_names(working_data[fe], fe))
         validate_covariate_names(covariates, _reserved, estimator="DifferenceInDifferences")
 
         # Build design matrix
@@ -1606,8 +1606,10 @@ def fit(  # type: ignore[override]
         # coef_dict, so a covariate named like a structural term (intercept,
         # treatment, a period dummy, a treatment-period interaction, an internal
         # _did_* working column, or a fixed-effect dummy) would silently
-        # overwrite that coefficient (dict last-write-wins). FE dummy names use
-        # the SAME get_dummies call (and fe==time skip) as the construction below.
+        # overwrite that coefficient (dict last-write-wins). FE dummy names are
+        # derived via fe_dummy_names (no dummy-matrix materialization), matching
+        # the construction below (and applying the same fe==time skip).
+        # validate_design_term_names re-checks the FINAL list before coef_dict.
         _reserved = {"const", treatment, "_did_treatment"}
         _reserved.update(f"period_{p}" for p in non_ref_periods)
         _reserved.update(f"{treatment}:period_{p}" for p in non_ref_periods)
@@ -1617,9 +1619,7 @@ def fit(  # type: ignore[override]
             for fe in fixed_effects:
                 if fe == time:
                     continue
-                _reserved.update(
-                    pd.get_dummies(working_data[fe], prefix=fe, drop_first=True).columns
-                )
+                _reserved.update(fe_dummy_names(working_data[fe], fe))
         validate_covariate_names(covariates, _reserved, estimator="MultiPeriodDiD")
 
         # Build design matrix
diff --git a/diff_diff/twfe.py b/diff_diff/twfe.py
index 9a6597a4..ce7ff445 100644
--- a/diff_diff/twfe.py
+++ b/diff_diff/twfe.py
@@ -15,6 +15,7 @@
 from diff_diff.linalg import LinearRegression
 from diff_diff.results import DiDResults
 from diff_diff.utils import (
+    fe_dummy_names,
     validate_covariate_names,
     validate_design_term_names,
 )
@@ -304,11 +305,15 @@ def fit(  # type: ignore[override]
         # last-write-wins). The within-transform path does not expose covariates
         # in the dict, but the covariate is still in X and a covariate named
         # "_treatment_post" would clobber the internal interaction column; we
-        # therefore guard ALL paths. Unit/time dummy names use the SAME
-        # get_dummies call as the full-dummy construction below.
+        # therefore guard ALL paths. Unit/time dummy names are derived via
+        # fe_dummy_names WITHOUT materializing the dummy matrix — critical here
+        # because the within-transform path (the default hc1/classical/conley
+        # branch) deliberately never expands the full FE dummies (its scaling
+        # contract for high-cardinality panels); a get_dummies build would
+        # defeat that. validate_design_term_names re-checks the full-dummy list.
         _reserved = {"const", "ATT", "_treatment_post"}
-        _reserved.update(pd.get_dummies(data[unit], prefix=f"_fe_{unit}", drop_first=True).columns)
-        _reserved.update(pd.get_dummies(data[time], prefix=f"_fe_{time}", drop_first=True).columns)
+        _reserved.update(fe_dummy_names(data[unit], f"_fe_{unit}"))
+        _reserved.update(fe_dummy_names(data[time], f"_fe_{time}"))
         validate_covariate_names(covariates, _reserved, estimator="TwoWayFixedEffects")
 
         if use_full_dummy:
diff --git a/diff_diff/utils.py b/diff_diff/utils.py
index 90e5785e..7723dd79 100644
--- a/diff_diff/utils.py
+++ b/diff_diff/utils.py
@@ -182,6 +182,41 @@ def validate_design_term_names(
         )
 
 
+def fe_dummy_names(col: pd.Series, prefix: str) -> List[str]:
+    """
+    Reserved fixed-effect dummy column names for the collision guard, matching
+    ``pd.get_dummies(col, prefix=prefix, drop_first=True).columns`` WITHOUT
+    materializing the dense ``(n x G)`` dummy matrix.
+
+    The within-transform ``TwoWayFixedEffects`` path is specifically designed to
+    avoid expanding high-cardinality fixed-effect dummies (that is its scaling
+    contract), so the collision guard must reserve those names without building
+    the dummy block. ``pd.get_dummies`` orders categories via
+    ``pd.Categorical(col).categories`` — sorted unique values for a plain column,
+    the declared category order for a ``Categorical`` — then ``drop_first=True``
+    drops the first. This derivation reproduces that exactly (including
+    ``Categorical`` columns with a non-default category order) at ``O(G)`` memory.
+
+    Parameters
+    ----------
+    col : pandas.Series
+        The fixed-effect / unit / time column.
+    prefix : str
+        Dummy-name prefix (the project uses ``fe`` for ``fixed_effects`` and
+        ``_fe_{unit}`` / ``_fe_{time}`` for TWFE unit/time dummies).
+
+    Returns
+    -------
+    list of str
+        The kept (post ``drop_first``) dummy column names.
+    """
+    if isinstance(col.dtype, pd.CategoricalDtype):
+        cats = list(col.cat.categories)
+    else:
+        cats = list(pd.Categorical(col).categories)
+    return [f"{prefix}_{c}" for c in cats[1:]]
+
+
 def warn_if_not_converged(
     converged: bool,
     method_name: str,
diff --git a/tests/test_estimators_vcov_type.py b/tests/test_estimators_vcov_type.py
index ea5926bf..a41f6a23 100644
--- a/tests/test_estimators_vcov_type.py
+++ b/tests/test_estimators_vcov_type.py
@@ -2205,3 +2205,23 @@ def test_within_transform_noncolliding_returns_att_only(self):
         # The within-transform path exposes only the ATT coefficient by design;
         # the covariate is NOT a dict key there (so there is no overwrite surface).
         assert set(r.coefficients.keys()) == {"ATT"}
+
+    def test_within_path_does_not_materialize_fe_dummies(self, monkeypatch):
+        # Regression: the within-transform (default hc1) path must NOT build full
+        # unit/time dummy matrices merely to reserve collision names — that would
+        # defeat its high-cardinality scaling contract. Reserved names come from
+        # fe_dummy_names (category levels only), so pd.get_dummies must never be
+        # called on this path.
+        df = self._panel_with("x1")
+
+        def _boom(*args, **kwargs):
+            raise AssertionError(
+                "pd.get_dummies must not be called on the within-transform path"
+            )
+
+        monkeypatch.setattr(pd, "get_dummies", _boom)
+        r = TwoWayFixedEffects().fit(
+            df, outcome="y", treatment="treated", time="time", unit="unit",
+            covariates=["x1"],
+        )
+        assert set(r.coefficients.keys()) == {"ATT"}
diff --git a/tests/test_utils.py b/tests/test_utils.py
index f1288674..97f36d32 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -27,6 +27,7 @@
     compute_sdid_estimator,
     compute_time_weights,
     equivalence_test_trends,
+    fe_dummy_names,
     safe_inference,
     validate_binary,
     validate_covariate_names,
@@ -1365,3 +1366,23 @@ def test_message_names_duplicate_and_estimator(self):
 
     def test_empty_passes(self):
         validate_design_term_names([])
+
+
+class TestFeDummyNames:
+    """fe_dummy_names must match pd.get_dummies(drop_first=True).columns exactly
+    (including Categorical non-default order) without materializing the matrix."""
+
+    @pytest.mark.parametrize(
+        "col",
+        [
+            pd.Series([3, 1, 2, 1]),
+            pd.Series(["b", "a", "c", "a"]),
+            pd.Series(pd.Categorical(["m", "a", "z", "a"], categories=["m", "a", "z"])),
+            pd.Series([2.0, 1.0, 3.0]),
+            pd.Series(["b", "a", np.nan, "a"]),
+        ],
+    )
+    def test_matches_get_dummies(self, col):
+        assert fe_dummy_names(col, "fe") == list(
+            pd.get_dummies(col, prefix="fe", drop_first=True).columns
+        )

From 7ecffd21460cd64040dfa6e3934ff1a5772c19c4 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Mon, 1 Jun 2026 09:52:26 -0400
Subject: [PATCH 4/4] Move design-term uniqueness check before the regression
 (fail-fast)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

validate_design_term_names ran just before coef_dict construction — i.e. AFTER
the OLS fit — in DifferenceInDifferences and MultiPeriodDiD, so a duplicate
assembled term name (e.g. an MPD fixed-effect dummy colliding with a structural
period_{p} key) drove a wasted rank-deficient fit and emitted a misleading
multicollinearity warning before the intended ValueError (local review P3).
Move the check to immediately after var_names is fully assembled and before the
regression call in both estimators (TwoWayFixedEffects already checked pre-fit).
var_names is not mutated between assembly and coef_dict, so this is behavior-
preserving except that the ValueError now fires fast and warning-free.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 diff_diff/estimators.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/diff_diff/estimators.py b/diff_diff/estimators.py
index 9545f316..fcaa0520 100644
--- a/diff_diff/estimators.py
+++ b/diff_diff/estimators.py
@@ -510,6 +510,12 @@ def fit(
                     X = np.column_stack([X, dummies[col].values.astype(float)])
                     var_names.append(col)
 
+        # Reject any duplicate in the FINAL term list (e.g. a fixed-effect dummy
+        # colliding with a structural term) BEFORE the regression — so the fit is
+        # not wasted and no misleading multicollinearity warning is emitted ahead
+        # of the intended ValueError.
+        validate_design_term_names(var_names, estimator="DifferenceInDifferences")
+
         # Extract ATT index (coefficient on interaction term)
         att_idx = 3  # Index of interaction term
         att_var_name = f"{treatment}:{time}"
@@ -686,7 +692,6 @@ def _refit_did_absorb(w_r):
         n_control = n_control_raw
 
         # Create coefficient dictionary
-        validate_design_term_names(var_names, estimator="DifferenceInDifferences")
         coef_dict = {name: coef for name, coef in zip(var_names, coefficients)}
 
         # Determine inference method and bootstrap info
@@ -1680,6 +1685,12 @@ def fit(  # type: ignore[override]
                     X = np.column_stack([X, dummies[col].values.astype(float)])
                     var_names.append(col)
 
+        # Reject any duplicate in the FINAL term list (e.g. a fixed-effect dummy
+        # colliding with a structural period_{p} key) BEFORE the regression — so
+        # the fit is not wasted and no misleading multicollinearity warning is
+        # emitted ahead of the intended ValueError.
+        validate_design_term_names(var_names, estimator="MultiPeriodDiD")
+
         # Fit OLS using unified backend
         # Pass cluster_ids to solve_ols for proper vcov computation
         # This handles rank-deficient matrices by returning NaN for dropped columns
@@ -2038,11 +2049,8 @@ def _refit_mp_absorb(w_r):
         n_treated = n_treated_raw
         n_control = n_control_raw
 
-        # Backstop: reject any duplicate in the FINAL term list (e.g. a
-        # fixed-effect dummy colliding with a structural `period_{p}` key)
-        # before it silently overwrites a coefficient in the dict below.
-        validate_design_term_names(var_names, estimator="MultiPeriodDiD")
-        # Create coefficient dictionary
+        # Create coefficient dictionary (var_names uniqueness already enforced
+        # before the fit above).
         coef_dict = {name: coef for name, coef in zip(var_names, coefficients)}
 
         # Store results