From 59fa76159f816d4c52bf78c3ccc795f851f5a588 Mon Sep 17 00:00:00 2001 From: igerber Date: Sun, 12 Apr 2026 14:21:45 -0400 Subject: [PATCH 1/2] docs: add JOSS paper (paper.md) and bibliography (paper.bib) JOSS submission for diff-diff: 16 estimators, survey-weighted inference, R validation to machine precision, and practitioner tooling under a unified scikit-learn-style API. 19 BibTeX entries sourced from docs/methodology/REGISTRY.md. Co-Authored-By: Claude Opus 4.6 (1M context) --- paper.bib | 190 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ paper.md | 97 ++++++++++++++++++++++++++++ 2 files changed, 287 insertions(+) create mode 100644 paper.bib create mode 100644 paper.md diff --git a/paper.bib b/paper.bib new file mode 100644 index 00000000..3599941c --- /dev/null +++ b/paper.bib @@ -0,0 +1,190 @@ +@article{Callaway2021, + author = {Callaway, Brantly and Sant'Anna, Pedro H. C.}, + title = {Difference-in-Differences with Multiple Time Periods}, + journal = {Journal of Econometrics}, + volume = {225}, + number = {2}, + pages = {200--230}, + year = {2021}, + doi = {10.1016/j.jeconom.2020.12.001} +} + +@article{Sun2021, + author = {Sun, Liyang and Abraham, Sarah}, + title = {Estimating Dynamic Treatment Effects in Event Studies with Heterogeneous Treatment Effects}, + journal = {Journal of Econometrics}, + volume = {225}, + number = {2}, + pages = {175--199}, + year = {2021}, + doi = {10.1016/j.jeconom.2020.09.006} +} + +@article{Borusyak2024, + author = {Borusyak, Kirill and Jaravel, Xavier and Spiess, Jann}, + title = {Revisiting Event-Study Designs: Robust and Efficient Estimation}, + journal = {Review of Economic Studies}, + volume = {91}, + number = {6}, + pages = {3253--3285}, + year = {2024}, + doi = {10.1093/restud/rdae007} +} + +@misc{Gardner2022, + author = {Gardner, John}, + title = {Two-Stage Differences in Differences}, + year = {2022}, + eprint = {2207.05943}, + archiveprefix = {arXiv}, + primaryclass = {econ.EM} +} + +@article{Arkhangelsky2021, + author = {Arkhangelsky, Dmitry and Athey, Susan and Hirshberg, David A. and Imbens, Guido W. and Wager, Stefan}, + title = {Synthetic Difference-in-Differences}, + journal = {American Economic Review}, + volume = {111}, + number = {12}, + pages = {4088--4118}, + year = {2021}, + doi = {10.1257/aer.20190159} +} + +@article{Rambachan2023, + author = {Rambachan, Ashesh and Roth, Jonathan}, + title = {A More Credible Approach to Parallel Trends}, + journal = {Review of Economic Studies}, + volume = {90}, + number = {5}, + pages = {2555--2591}, + year = {2023}, + doi = {10.1093/restud/rdad018} +} + +@article{Roth2022, + author = {Roth, Jonathan}, + title = {Pretest with Caution: Event-Study Estimates after Testing for Parallel Trends}, + journal = {American Economic Review: Insights}, + volume = {4}, + number = {3}, + pages = {305--322}, + year = {2022}, + doi = {10.1257/aeri.20210236} +} + +@article{GoodmanBacon2021, + author = {Goodman-Bacon, Andrew}, + title = {Difference-in-Differences with Variation in Treatment Timing}, + journal = {Journal of Econometrics}, + volume = {225}, + number = {2}, + pages = {254--277}, + year = {2021}, + doi = {10.1016/j.jeconom.2021.03.014} +} + +@article{deChaisemartin2020, + author = {{de Chaisemartin}, Cl\'{e}ment and D'Haultf{\oe}uille, Xavier}, + title = {Two-Way Fixed Effects Estimators with Heterogeneous Treatment Effects}, + journal = {American Economic Review}, + volume = {110}, + number = {9}, + pages = {2964--2996}, + year = {2020}, + doi = {10.1257/aer.20181169} +} + +@article{Wooldridge2023, + author = {Wooldridge, Jeffrey M.}, + title = {Simple Approaches to Nonlinear Difference-in-Differences with Panel Data}, + journal = {The Econometrics Journal}, + volume = {26}, + number = {3}, + pages = {C31--C66}, + year = {2023}, + doi = {10.1093/ectj/utad016} +} + +@techreport{Wing2024, + author = {Wing, Coady and Freedman, Seth M. and Hollingsworth, Alex}, + title = {Stacked Difference-in-Differences}, + institution = {National Bureau of Economic Research}, + type = {Working Paper}, + number = {32054}, + year = {2024}, + url = {https://www.nber.org/papers/w32054} +} + +@techreport{Callaway2024, + author = {Callaway, Brantly and Goodman-Bacon, Andrew and Sant'Anna, Pedro H. C.}, + title = {Difference-in-Differences with a Continuous Treatment}, + institution = {National Bureau of Economic Research}, + type = {Working Paper}, + number = {32117}, + year = {2024} +} + +@misc{Chen2025, + author = {Chen, Xun and Sant'Anna, Pedro H. C. and Xie, Haitian}, + title = {Efficient Difference-in-Differences and Event Study Estimators}, + year = {2025}, + note = {Working paper} +} + +@misc{Athey2025, + author = {Athey, Susan and Imbens, Guido W. and Qu, Zhaonan and Viviano, Davide}, + title = {Triply Robust Panel Estimators}, + year = {2025}, + eprint = {2508.21536}, + archiveprefix = {arXiv}, + primaryclass = {econ.EM} +} + +@misc{OrtizVillavicencio2025, + author = {Ortiz-Villavicencio, Marco and Sant'Anna, Pedro H. C.}, + title = {Better Understanding Triple Differences Estimators}, + year = {2025}, + eprint = {2505.09942}, + archiveprefix = {arXiv}, + primaryclass = {econ.EM} +} + +@article{Roth2023, + author = {Roth, Jonathan and Sant'Anna, Pedro H. C. and Bilinski, Alyssa and Poe, John}, + title = {What's Trending in Difference-in-Differences? {A} Synthesis of the Recent Econometrics Literature}, + journal = {Journal of Econometrics}, + volume = {235}, + number = {2}, + pages = {2218--2244}, + year = {2023}, + doi = {10.1016/j.jeconom.2023.03.008} +} + +@misc{Baker2025, + author = {Baker, Andrew C. and Larcker, David F. and McClure, Charles G. and Saraph, Durgesh and Watts, Edward M.}, + title = {Difference-in-Differences Designs: A Practitioner's Guide}, + year = {2025}, + eprint = {2503.13323}, + archiveprefix = {arXiv}, + primaryclass = {econ.EM} +} + +@article{Lumley2004, + author = {Lumley, Thomas}, + title = {Analysis of Complex Survey Samples}, + journal = {Journal of Statistical Software}, + volume = {9}, + number = {8}, + pages = {1--19}, + year = {2004}, + doi = {10.18637/jss.v009.i08} +} + +@manual{Berge2018, + author = {Berg\'{e}, Laurent}, + title = {fixest: Fast Fixed-Effects Estimations}, + year = {2018}, + note = {R package}, + url = {https://CRAN.R-project.org/package=fixest} +} diff --git a/paper.md b/paper.md new file mode 100644 index 00000000..8b82cf3b --- /dev/null +++ b/paper.md @@ -0,0 +1,97 @@ +--- +title: "diff-diff: Comprehensive Difference-in-Differences Causal Inference for Python" +tags: + - difference-in-differences + - causal-inference + - econometrics + - Python + - treatment-effects + - survey-data +authors: + - name: Isaac Gerber + orcid: 0009-0009-3275-5591 + affiliation: 1 +affiliations: + - name: Independent Researcher + index: 1 +date: 12 April 2026 +bibliography: paper.bib +--- + +# Summary + +`diff-diff` is a Python library for Difference-in-Differences (DiD) causal inference +analysis. It provides 16 estimators covering the full modern DiD toolkit - from classic +two-group/two-period designs through heterogeneity-robust staggered adoption methods, +synthetic control hybrids, and sensitivity analysis - under a consistent scikit-learn-style +API. All estimators accept an optional `SurveyDesign` object for design-based variance +estimation with complex survey data, a capability absent from existing DiD software in any +language. Point estimates and standard errors are validated against established R packages +to machine precision. + +# Statement of Need + +Difference-in-differences is the most widely used quasi-experimental research design in +applied economics and the social sciences. Since 2018, a wave of methodological advances +has addressed fundamental limitations of the conventional two-way fixed effects (TWFE) +estimator under staggered treatment adoption and heterogeneous effects [@Roth2023]. These +modern methods - including Callaway and Sant'Anna [-@Callaway2021], Sun and Abraham +[-@Sun2021], Borusyak, Jaravel, and Spiess [-@Borusyak2024], and others - are now standard +practice in applied work. + +The R ecosystem provides mature implementations across several packages: `did` +[@Callaway2021], `fixest` [@Berge2018], `synthdid` [@Arkhangelsky2021], and `HonestDiD` +[@Rambachan2023]. Stata offers `csdid` and `didregress`. Python, however, lacks a unified +DiD library. Practitioners working in Python-based data science workflows - increasingly +common in industry settings for marketing measurement, product experimentation, and policy +evaluation - must either context-switch to R, reimplement methods from scratch, or rely on +partial implementations scattered across unrelated packages. + +`diff-diff` fills this gap by providing a single-import library that covers 16 estimators +with a consistent API, survey-weighted inference, and numerical validation against R to +machine precision. It targets both applied researchers who need rigorous econometric methods +and data science practitioners who need accessible causal inference tools integrated into +Python workflows. + +# Key Features + +**Breadth of methods.** `diff-diff` implements 16 estimators organized across the modern +DiD taxonomy: classic DiD and TWFE; heterogeneity-robust staggered estimators including +Callaway-Sant'Anna [@Callaway2021], Sun-Abraham [@Sun2021], imputation +[@Borusyak2024], two-stage [@Gardner2022], stacked [@Wing2024], and efficient +[@Chen2025] approaches; extended designs including synthetic DiD [@Arkhangelsky2021], +triple difference [@OrtizVillavicencio2025], continuous treatment [@Callaway2024], +nonlinear ETWFE [@Wooldridge2023], and triply robust panel estimation [@Athey2025]; +reversible-treatment DiD for non-absorbing interventions [@deChaisemartin2020]; and +diagnostics including Goodman-Bacon decomposition [@GoodmanBacon2021], Honest DiD +sensitivity analysis [@Rambachan2023], and pre-trends power analysis [@Roth2022]. All +estimators share a consistent `fit()` interface with `get_params()`/`set_params()` for +configuration, R-style formula support, and rich results objects with `summary()` output. +An optional Rust backend via PyO3 accelerates compute-intensive operations. + +**Survey-weighted inference.** A `SurveyDesign` class supports stratification, primary +sampling units, finite population corrections, and probability weights. Variance estimation +includes Taylor series linearization, five replicate weight methods (BRR, Fay's BRR, JK1, +JKn, SDR), and survey-aware bootstrap. Survey variance is validated against R's `survey` +package [@Lumley2004] on three federal datasets (NHANES, RECS, API) to machine precision +(differences < 1e-10). No other DiD package in any language provides integrated survey +support. + +**Validation against R.** Point estimates match the R `did`, `synthdid`, and `fixest` +packages to machine precision (differences < 1e-10). Standard errors match exactly for +core estimators including Callaway-Sant'Anna and basic DiD. Validation includes the +canonical MPDTA minimum-wage dataset from Callaway and Sant'Anna [-@Callaway2021]. + +**Practitioner tooling.** Beyond estimation, `diff-diff` includes a practitioner decision +tree for estimator selection, an 8-step diagnostic workflow based on Baker et al. +[-@Baker2025], AI agent integration with structured next-steps guidance, and microdata +aggregation utilities for converting individual-level survey responses into +geographic-period panels suitable for DiD analysis. + +# Acknowledgments + +Wenli Xu (Faculty of Finance, City University of Macau) implemented the WooldridgeDiD +(ETWFE) estimator, including saturated OLS, logit, and Poisson QMLE paths with ASF-based +ATT and delta-method standard errors. Development was assisted by Claude Code (Anthropic). + +# References From 788a51df10f4ab16083149b88379c08f4b425536 Mon Sep 17 00:00:00 2001 From: igerber Date: Sun, 12 Apr 2026 14:32:07 -0400 Subject: [PATCH 2/2] docs: address AI review findings on JOSS paper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix estimator count: 16 → 17 (includes StaggeredTripleDifference) - Soften survey claim: "All estimators" → "Most estimators" (dCDH raises NotImplementedError for survey_design) - Narrow validation claim: machine-precision point estimates, exact or sub-percent SE parity (SyntheticDiD has 0.3% SE gap) - Add Wooldridge (2025) as primary ETWFE citation alongside (2023) - Add ORCID to CITATION.cff Co-Authored-By: Claude Opus 4.6 (1M context) --- CITATION.cff | 1 + paper.bib | 10 ++++++++++ paper.md | 17 ++++++++--------- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index d2a0297e..bcde731f 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -5,6 +5,7 @@ type: software authors: - given-names: Isaac family-names: Gerber + orcid: "https://orcid.org/0009-0009-3275-5591" license: MIT version: "3.0.1" date-released: "2026-04-07" diff --git a/paper.bib b/paper.bib index 3599941c..898da4a8 100644 --- a/paper.bib +++ b/paper.bib @@ -95,6 +95,16 @@ @article{deChaisemartin2020 doi = {10.1257/aer.20181169} } +@article{Wooldridge2025, + author = {Wooldridge, Jeffrey M.}, + title = {Two-Way Fixed Effects, the Two-Way {Mundlak} Regression, and Difference-in-Differences Estimators}, + journal = {Empirical Economics}, + volume = {69}, + number = {5}, + pages = {2545--2587}, + year = {2025} +} + @article{Wooldridge2023, author = {Wooldridge, Jeffrey M.}, title = {Simple Approaches to Nonlinear Difference-in-Differences with Panel Data}, diff --git a/paper.md b/paper.md index 8b82cf3b..8ef25549 100644 --- a/paper.md +++ b/paper.md @@ -21,13 +21,13 @@ bibliography: paper.bib # Summary `diff-diff` is a Python library for Difference-in-Differences (DiD) causal inference -analysis. It provides 16 estimators covering the full modern DiD toolkit - from classic +analysis. It provides 17 estimators covering the full modern DiD toolkit - from classic two-group/two-period designs through heterogeneity-robust staggered adoption methods, synthetic control hybrids, and sensitivity analysis - under a consistent scikit-learn-style -API. All estimators accept an optional `SurveyDesign` object for design-based variance +API. Most estimators accept an optional `SurveyDesign` object for design-based variance estimation with complex survey data, a capability absent from existing DiD software in any -language. Point estimates and standard errors are validated against established R packages -to machine precision. +language. Point estimates are validated against established R packages to machine precision, +with standard errors matching exactly or to sub-percent relative differences. # Statement of Need @@ -47,21 +47,20 @@ common in industry settings for marketing measurement, product experimentation, evaluation - must either context-switch to R, reimplement methods from scratch, or rely on partial implementations scattered across unrelated packages. -`diff-diff` fills this gap by providing a single-import library that covers 16 estimators -with a consistent API, survey-weighted inference, and numerical validation against R to -machine precision. It targets both applied researchers who need rigorous econometric methods +`diff-diff` fills this gap by providing a single-import library that covers 17 estimators +with a consistent API, survey-weighted inference, and numerical validation against R. It targets both applied researchers who need rigorous econometric methods and data science practitioners who need accessible causal inference tools integrated into Python workflows. # Key Features -**Breadth of methods.** `diff-diff` implements 16 estimators organized across the modern +**Breadth of methods.** `diff-diff` implements 17 estimators organized across the modern DiD taxonomy: classic DiD and TWFE; heterogeneity-robust staggered estimators including Callaway-Sant'Anna [@Callaway2021], Sun-Abraham [@Sun2021], imputation [@Borusyak2024], two-stage [@Gardner2022], stacked [@Wing2024], and efficient [@Chen2025] approaches; extended designs including synthetic DiD [@Arkhangelsky2021], triple difference [@OrtizVillavicencio2025], continuous treatment [@Callaway2024], -nonlinear ETWFE [@Wooldridge2023], and triply robust panel estimation [@Athey2025]; +nonlinear ETWFE [@Wooldridge2025; @Wooldridge2023], and triply robust panel estimation [@Athey2025]; reversible-treatment DiD for non-absorbing interventions [@deChaisemartin2020]; and diagnostics including Goodman-Bacon decomposition [@GoodmanBacon2021], Honest DiD sensitivity analysis [@Rambachan2023], and pre-trends power analysis [@Roth2022]. All