From b044ae36ab6557d90137dfa78e07222b6d119d92 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 3 Jan 2026 23:13:53 +0000
Subject: [PATCH 1/2] Address code review feedback for CallawaySantAnna
 covariates

- Remove unused self._covariates instance variable
- Add docstring note about SE approximation in outcome regression
- Fix empty influence function in IPW without covariates
- Add test for extreme propensity scores (near-perfect separation)
- Update class docstring with covariate adjustment example
---
 diff_diff/staggered.py  | 22 +++++++++++++----
 tests/test_staggered.py | 53 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+), 4 deletions(-)

diff --git a/diff_diff/staggered.py b/diff_diff/staggered.py
index 370f30ce..50aec81a 100644
--- a/diff_diff/staggered.py
+++ b/diff_diff/staggered.py
@@ -476,6 +476,14 @@ class CallawaySantAnna:
     >>> from diff_diff import plot_event_study
     >>> plot_event_study(results)
 
+    With covariate adjustment (conditional parallel trends):
+
+    >>> # When parallel trends only holds conditional on covariates
+    >>> cs = CallawaySantAnna(estimation_method='dr')  # doubly robust
+    >>> results = cs.fit(data, outcome='outcome', unit='unit',
+    ...                  time='time', first_treat='first_treat',
+    ...                  covariates=['age', 'income'])  # adjust for confounders
+
     Notes
     -----
     The key innovation of Callaway & Sant'Anna (2021) is the disaggregated
@@ -593,9 +601,6 @@ def fit(
                 "Use n_bootstrap=0 for analytical standard errors."
             )
 
-        # Store covariates for use in estimation
-        self._covariates = covariates
-
         # Create working copy
         df = data.copy()
 
@@ -839,6 +844,12 @@ def _outcome_regression(
 
         Without covariates:
         Simple difference in means.
+
+        Note
+        ----
+        The standard error calculation with covariates uses an approximation
+        that ignores estimation error in the regression coefficients. For a
+        full sandwich variance estimator, see Sant'Anna & Zhao (2020).
         """
         n_t = len(treated_change)
         n_c = len(control_change)
@@ -970,7 +981,10 @@ def _ipw_estimation(
             # Adjusted variance for IPW
             se = np.sqrt(var_t / n_t + var_c * (1 - p_treat) / (n_c * p_treat)) if (n_t > 0 and n_c > 0 and p_treat > 0) else 0.0
 
-            inf_func = np.array([])  # Placeholder
+            # Influence function (for aggregation)
+            inf_treated = (treated_change - np.mean(treated_change)) / n_t
+            inf_control = (control_change - np.mean(control_change)) / n_c
+            inf_func = np.concatenate([inf_treated, -inf_control])
 
         return att, se, inf_func
 
diff --git a/tests/test_staggered.py b/tests/test_staggered.py
index c0ab92c5..71a45853 100644
--- a/tests/test_staggered.py
+++ b/tests/test_staggered.py
@@ -652,3 +652,56 @@ def test_treatment_effect_recovery_with_covariates(self):
         # Note: we use a generous bound due to finite sample variance
         assert results.overall_att > 0, "ATT should be positive"
         assert abs(results.overall_att - 3.0) < 2.0, f"ATT={results.overall_att} too far from 3.0"
+
+    def test_extreme_propensity_scores(self):
+        """Test handling of covariates that strongly predict treatment.
+
+        When covariates nearly perfectly separate treated/control units,
+        propensity scores approach 0 or 1. The estimator should handle
+        this gracefully via propensity score clipping.
+        """
+        np.random.seed(42)
+        n_units = 100
+        n_periods = 8
+
+        # Generate unit and time identifiers
+        units = np.repeat(np.arange(n_units), n_periods)
+        times = np.tile(np.arange(n_periods), n_units)
+
+        # Create a covariate that strongly predicts treatment
+        # High values -> treated, low values -> never-treated
+        x_strong = np.random.randn(n_units)
+        x_strong_expanded = np.repeat(x_strong, n_periods)
+
+        # Assign treatment based on covariate (top 50% treated at period 4)
+        first_treat = np.zeros(n_units)
+        first_treat[x_strong > np.median(x_strong)] = 4
+        first_treat_expanded = np.repeat(first_treat, n_periods)
+
+        # Generate outcomes
+        post = (times >= first_treat_expanded) & (first_treat_expanded > 0)
+        outcomes = 1.0 + 0.5 * x_strong_expanded + 2.0 * post + np.random.randn(len(units)) * 0.3
+
+        data = pd.DataFrame({
+            'unit': units,
+            'time': times,
+            'outcome': outcomes,
+            'first_treat': first_treat_expanded.astype(int),
+            'x_strong': x_strong_expanded,
+        })
+
+        # IPW should handle extreme propensity scores via clipping
+        cs = CallawaySantAnna(estimation_method='ipw')
+        results = cs.fit(
+            data,
+            outcome='outcome',
+            unit='unit',
+            time='time',
+            first_treat='first_treat',
+            covariates=['x_strong']
+        )
+
+        # Should produce valid results (not NaN or inf)
+        assert np.isfinite(results.overall_att), "ATT should be finite"
+        assert np.isfinite(results.overall_se), "SE should be finite"
+        assert results.overall_se > 0, "SE should be positive"

From c7c9954a2bd6d73944120db158ff4bc513bc3f73 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 3 Jan 2026 23:26:37 +0000
Subject: [PATCH 2/2] Consolidate PR #22 review feedback into single document

- Move scattered docstring notes to docs/reviews/pr22_covariate_adjustment_review.md
- Keep actual code fixes: removed unused self._covariates, fixed IPW influence function
- Keep new test for extreme propensity scores
---
 diff_diff/staggered.py                        | 14 ----
 .../pr22_covariate_adjustment_review.md       | 65 +++++++++++++++++++
 2 files changed, 65 insertions(+), 14 deletions(-)
 create mode 100644 docs/reviews/pr22_covariate_adjustment_review.md

diff --git a/diff_diff/staggered.py b/diff_diff/staggered.py
index 50aec81a..adf49392 100644
--- a/diff_diff/staggered.py
+++ b/diff_diff/staggered.py
@@ -476,14 +476,6 @@ class CallawaySantAnna:
     >>> from diff_diff import plot_event_study
     >>> plot_event_study(results)
 
-    With covariate adjustment (conditional parallel trends):
-
-    >>> # When parallel trends only holds conditional on covariates
-    >>> cs = CallawaySantAnna(estimation_method='dr')  # doubly robust
-    >>> results = cs.fit(data, outcome='outcome', unit='unit',
-    ...                  time='time', first_treat='first_treat',
-    ...                  covariates=['age', 'income'])  # adjust for confounders
-
     Notes
     -----
     The key innovation of Callaway & Sant'Anna (2021) is the disaggregated
@@ -844,12 +836,6 @@ def _outcome_regression(
 
         Without covariates:
         Simple difference in means.
-
-        Note
-        ----
-        The standard error calculation with covariates uses an approximation
-        that ignores estimation error in the regression coefficients. For a
-        full sandwich variance estimator, see Sant'Anna & Zhao (2020).
         """
         n_t = len(treated_change)
         n_c = len(control_change)
diff --git a/docs/reviews/pr22_covariate_adjustment_review.md b/docs/reviews/pr22_covariate_adjustment_review.md
new file mode 100644
index 00000000..5cbfeba7
--- /dev/null
+++ b/docs/reviews/pr22_covariate_adjustment_review.md
@@ -0,0 +1,65 @@
+# Code Review: PR #22 - CallawaySantAnna Covariate Adjustment
+
+**Reviewer:** Claude
+**Date:** 2026-01-03
+**Status:** Approved with suggestions
+
+## Summary
+
+This PR implements covariate adjustment for the `CallawaySantAnna` estimator, enabling conditional parallel trends assumptions. This addresses a key 1.0 blocker from the roadmap.
+
+## Changes Made During Review
+
+The following issues were fixed directly in this review:
+
+1. **Removed unused instance variable** (`staggered.py:596-597`)
+   - `self._covariates = covariates` was stored but never used
+   - Covariates are passed through the method chain instead
+
+2. **Fixed empty influence function in IPW** (`staggered.py:883-886`)
+   - The unconditional IPW case returned `np.array([])` as a placeholder
+   - Now properly computes the influence function for consistency with other methods
+
+3. **Added test for extreme propensity scores** (`test_staggered.py:656-707`)
+   - Tests that propensity score clipping handles near-perfect separation gracefully
+
+## Suggestions for Future Work
+
+### Standard Error Approximation
+
+The SE calculation in `_outcome_regression` (line 776) ignores estimation error in the regression coefficients:
+
+```python
+# Approximate SE (ignoring estimation error in beta for simplicity)
+se = np.sqrt(var_t / n_t + var_c / n_c)
+```
+
+For a full sandwich variance estimator, see Sant'Anna & Zhao (2020). This is a reasonable approximation for now but could be improved in a future release.
+
+### Propensity Score Model Caching
+
+In `_doubly_robust`, the propensity score is estimated independently (line 932). For efficiency with large datasets, consider refactoring to share the propensity model between IPW and DR estimators when both are called.
+
+### Additional Test Coverage
+
+Consider adding tests for:
+- Near-collinear covariates (testing `_linear_regression` fallback to pseudo-inverse)
+- Missing values in covariates (testing the fallback warning path)
+- Very small sample sizes per group-time cell
+
+### Documentation
+
+The class docstring could include an example showing covariate-adjusted usage:
+
+```python
+# When parallel trends only holds conditional on covariates
+cs = CallawaySantAnna(estimation_method='dr')  # doubly robust
+results = cs.fit(data, outcome='outcome', unit='unit',
+                 time='time', first_treat='first_treat',
+                 covariates=['age', 'income'])
+```
+
+## References
+
+- Callaway, B., & Sant'Anna, P. H. (2021). Difference-in-Differences with multiple time periods. Journal of Econometrics, 225(2), 200-230.
+- Sant'Anna, P. H., & Zhao, J. (2020). Doubly robust difference-in-differences estimators. Journal of Econometrics, 219(1), 101-122.