From d7657d17fcd619743b47532a3ecbdff6a47e29b0 Mon Sep 17 00:00:00 2001 From: igerber Date: Mon, 1 Jun 2026 14:53:39 -0400 Subject: [PATCH] SyntheticControl: cv (out-of-sample) + inverse-variance V-selection (ADH 2015 / Abadie 2021) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Completes the ADH-2015 / Abadie-2021 predictor-importance V-selection menu with two new `v_method` values, each threaded through the in-space / leave-one-out / in-time placebo refits so a diagnostic uses the same estimator as the headline fit. `v_method="cv"` — out-of-sample cross-validation (ADH 2015 §; Abadie 2021 Eq. 9). The pre-period is split at `v_cv_t0` (new constructor param; default `len(pre)//2`) into a training and a validation window. Each predictor spec is RE-AGGREGATED over each window (its op recomputed over only that window's periods — a separate `dataprep` per window, standardized per window), so the V-search is genuinely out-of-sample for every predictor type: V is selected to minimize the validation-window outcome MSPE of the training-window fit, then the final weights are re-estimated on the validation-window predictors (step 4). The same V* drives both fits with no zeroed coordinate, so `v_weights` reproduce `donor_weights` and `predictor_balance` is reported on the validation-window basis. `mspe_v` reports the held-out validation MSPE. Abadie 2021 fn.7 non-uniqueness is handled by a deterministic, convergence-aware flat-MSPE tie-break (prefer the densest V; never let a non-converged candidate displace a converged incumbent). `v_method="inverse_variance"` — closed-form `v_h = 1/Var(X_h)` (Abadie 2021 §3.2(a)), variance over donors+treated on the unstandardized predictors, applied to the RAW predictors (the `standardize` pre-scaling is intentionally bypassed — inverse-variance weighting IS the unit-variance rescaling). Exact for every positive variance (no flooring); zero-variance rows get 0 weight; an all-zero / overflow panel falls back to uniform + warn. Fail-closed identification gates (cv): every predictor must SPAN both windows (re-aggregation needs it measurable on each — default single-period lags are rejected), and each window must have cross-DONOR variation (donor-indistinguishable windows leave X0·W constant in W → the weight solve is unidentified). Violations raise on the headline fit; in-space placebos drop the affected refit; in-time-truncated dates are marked `infeasible`. Single-donor fits force w=[1] so V is unidentified → uniform `v_weights` + `mspe_v=None` for all methods (documented). Validation: R `Synth` has no built-in CV, so cv is anchored by deterministic equivalence to the R-anchored `custom_v` path on the per-window re-aggregated predictors (both the step-3 criterion and step-4 weights) + cv self-consistency (in_time_placebo == fresh backdated fit); inverse-variance is bit-for-bit vs `custom_v=1/Var(X)`. New tests cover config validation, exact inverse-variance, the spanning / donor-variation / convergence fail-closed gates, single-donor degeneracy, and placebo/LOO/in-time propagation for both methods. Docs: REGISTRY §SyntheticControl Notes (per-window re-aggregation, fully-spanning + donor gates, tie-break, inverse-variance, single-donor), checklist tick, `docs/api`, the LLM guides, README, CHANGELOG. The remaining ADH-2015 tail (`W^reg` extrapolation, sparse-SC) and an in-space/LOO machine-readable cv-infeasible reason-code stay tracked in TODO.md. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 3 + README.md | 2 +- TODO.md | 3 +- diff_diff/guides/llms-full.txt | 12 +- diff_diff/guides/llms.txt | 2 +- diff_diff/synthetic_control.py | 648 +++++++++++++++- diff_diff/synthetic_control_results.py | 89 ++- docs/api/synthetic_control.rst | 24 +- docs/methodology/REGISTRY.md | 13 +- tests/test_methodology_synthetic_control.py | 813 +++++++++++++++++++- 10 files changed, 1549 insertions(+), 60 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f5ca3c8e..97e5a054 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- **`SyntheticControl` cross-validation + inverse-variance `V`-selection (ADH 2015 §; Abadie 2021 §3.2(a), Eq. 9).** Two new `v_method` values complete the ADH-2015/Abadie-2021 `V`-selection menu (joining `"nested"` / `"custom"`), each threaded through the in-space / leave-one-out / in-time placebo refits so a diagnostic uses the **same** estimator as the headline fit. **`v_method="cv"`** selects the diagonal predictor-importance `V` by out-of-sample cross-validation: the pre-period is split positionally at `v_cv_t0` (new constructor param; default `len(pre)//2`, Abadie 2021's `t0 = T0/2`) into a training and a validation window, `V` is chosen to minimize the validation-window outcome MSPE of the training-fit weights (`mspe_v` now reports this validation MSPE under cv), and the final reported weights are re-estimated on the validation-window predictors (ADH 2015 step 4). Each predictor spec is **re-aggregated** over each window (its mean/sum/identity recomputed over only the periods that fall in that window — a separate `dataprep` per window, exactly as ADH 2015's CV does, since R `Synth` has no built-in CV function), so the V-search is genuinely out-of-sample for every predictor type and the same `V*` drives both fits with no zeroed coordinate (`v_weights` reproduce `donor_weights` on the validation-window predictors, and `predictor_balance` is reported on that validation-window basis). **Fully-spanning precondition (fail-closed):** re-aggregating a predictor on each window requires it to be observed in **both** windows, so `cv` **requires every predictor to span both the training and validation windows** and raises `ValueError` otherwise — satisfied by ADH 2015's shared covariate / multi-period `special_predictors` (which span the windows) but NOT by the default per-period outcome lags (each is single-period and lives in one window only), so `cv` with the bare default predictors is rejected with guidance to pass spanning predictors. In-time-placebo truncation that breaks the fully-spanning precondition (a kept spec stops spanning both windows at the truncated split) marks that date `infeasible`. A second fail-closed gate covers windows that span but carry **no cross-donor variation** (every re-aggregated predictor constant across the donors, so `X0·W` is constant in `W` → a flat, unidentified weight solve that would otherwise return arbitrary "converged" weights — even when the treated unit differs, since donor distinguishability, not treated-vs-donor variation, identifies `W`): the headline fit raises `ValueError`, in-space placebo refits whose donor pool is indistinguishable in a window are dropped from the reference set, and such in-time-truncated dates are marked `infeasible`. Abadie 2021 footnote 7's CV non-uniqueness is handled by a **deterministic tie-break** (prefer the `V` closest to uniform among ties), making the selected `V*` among equally-good optima independent of the multistart evaluation order. The cv fit is reproducible for a fixed `seed` (like `nested`) but is not seed-independent — the multistart fills any slots beyond the distinct heuristic starts with seed-dependent random Dirichlet draws, so the tie-break removes start-order dependence among ties, not seed dependence. The tie-break is convergence-aware (a non-converged optimizer candidate cannot displace a converged incumbent on an objective tie). If the training-window solve that defines `mspe_v` truncates (e.g. `inner_max_iter` too small), the fit fails closed — `mspe_v=NaN` and the fit is marked non-converged — rather than reporting an invalid Eq. 9 criterion. **`v_method="inverse_variance"`** uses the closed form `v_h = 1/Var(X_h)` (variance over donors+treated on the unstandardized predictors), applied to the **raw** predictors so the effective objective is the unit-variance-rescaled `Σ_h diff_h²/Var_h` (Abadie 2021 §3.2(a)); the `standardize` pre-scaling is intentionally bypassed on this branch (inverse-variance weighting *is* the unit-variance rescaling — applying it on already-standardized rows would double-rescale to `Σ_h diff_h²/Var_h²`), so it is equivalent to uniform `V` on standardized predictors. No search (`mspe_v=None`); a zero-variance row gets 0 weight and an all-zero-variance panel falls back to uniform `V` with a warning. `custom_v` is rejected (fail-closed) for both methods and `v_cv_t0` is rejected unless `v_method="cv"`. On the degenerate **single-donor** path (`J=1` forces `w=[1]`) `V` is unidentified — every `V` yields the same synthetic — so `v_weights` is **uniform** and `mspe_v=None` for ALL `v_method`s (cv / inverse_variance included; their selected / closed-form `V` would be inert), with a `UserWarning`; the donor weights / gap / ATT are unaffected. An explicitly pinned `v_cv_t0` that no longer fits the truncated pre-fake window is nulled to the `//2` default for the placebo refit (a pinned value that still fits the truncated window is kept). **Validation:** R `Synth` has no built-in CV function (ADH 2015's CV is a manual `dataprep`+`synth` re-run), so cv is anchored by deterministic equivalence to the R-anchored `custom_v` path (the step-3 validation MSPE of the training-window fit and the step-4 validation-window weights each match a `custom_v=V*` fit on the correspondingly re-aggregated predictors) plus cv self-consistency (`in_time_placebo` under cv == a fresh cv fit on the backdated panel to 1e-7); inverse-variance is anchored bit-for-bit to a `custom_v=1/Var(X)` fit. Documented in `docs/methodology/REGISTRY.md` §SyntheticControl (new `**Note:**` labels for the per-window re-aggregation convention, the flat-MSPE tie-break, and inverse-variance), `docs/api/synthetic_control.rst`, the LLM guides, and `README.md`. The remaining ADH-2015 items (`W^reg` extrapolation diagnostic, sparse-SC subset search) stay tracked in `TODO.md`. + ### Fixed - **Covariate names that collide with reserved structural terms now raise `ValueError` instead of silently corrupting the coefficient dict (`DifferenceInDifferences`, `MultiPeriodDiD`, `TwoWayFixedEffects`).** These estimators build their `coefficients` dict by zipping a variable-name list -- structural term names PLUS the user covariate column names appended verbatim -- with the fitted coefficient vector. A covariate whose name equaled a reserved structural name (`const`; the treatment/time column names; the `{treatment}:{time}` interaction; MultiPeriodDiD `period_{p}` dummies and `{treatment}:period_{p}` interactions; `TwoWayFixedEffects` `ATT`; fixed-effect / unit / time dummy names; or an internal `_`-prefixed working column such as `_treat_time` / `_did_treatment` / `_treatment_post`) silently **overwrote** that structural coefficient via Python dict last-write-wins -- e.g. a covariate named `const` dropped the intercept -- with no error or warning. A new shared `validate_covariate_names` helper (`diff_diff/utils.py`) is now called in each of the three `fit()` methods before the design matrix is built; it raises `ValueError` on a collision (the comparison is case-sensitive, so e.g. `Const` is still allowed) **and** on duplicate names within `covariates` (which collapse to a single dict entry the same way). Fixed-effect/unit/time dummy reserved names are taken from the same `pd.get_dummies(..., drop_first=True)` call used to build them, so they match exactly (including for pandas `Categorical` columns with a non-default category order). For `TwoWayFixedEffects` the guard fires on **all** variance paths: the default within-transform path returns only `{"ATT": att}` (no covariate is a dict key there), but a covariate named `_treatment_post` would still clobber the internal interaction column, so guarding both paths is uniform and forward-compatible. **Potentially breaking:** a fit that previously *succeeded* with a colliding (or duplicated) covariate name -- silently returning a corrupted coefficient dict -- now raises; rename the covariate column(s). The staggered / influence-function estimators (CallawaySantAnna, SunAbraham, StaggeredTripleDifference, EfficientDiD, TwoStageDiD, ImputationDiD, WooldridgeDiD, dCDH, StackedDiD) key results by `(g, t)` tuples / relative-time indices, never covariate names, and `TripleDifference` / `SyntheticControl` / `SyntheticDiD` do not expose covariates by name, so none are affected. New tests in `tests/test_utils.py`, `tests/test_estimators.py`, and `tests/test_estimators_vcov_type.py`. diff --git a/README.md b/README.md index 8330f441..b9444c68 100644 --- a/README.md +++ b/README.md @@ -108,7 +108,7 @@ Full guide: `diff_diff.get_llm_guide("practitioner")`. - [TwoStageDiD](https://diff-diff.readthedocs.io/en/stable/api/two_stage.html) - Gardner (2022) two-stage estimator with GMM sandwich variance - [SpilloverDiD](https://diff-diff.readthedocs.io/en/stable/api/spillover.html) - Butts (2021) ring-indicator spillover-aware DiD identifying direct effect on treated + per-ring spillover on near-control units; handles non-staggered and staggered timing; supports survey-design variance under `survey_design=` for HC1 / CR1 (Wave E.1 Binder TSL) and Conley (Wave E.2 panel-aware stratified-Conley sandwich on per-period PSU totals; extended in Wave E.2 follow-up to `conley_lag_cutoff > 0` via panel-block composition with within-PSU serial Bartlett HAC — `lag>0` requires an effective PSU via explicit `survey_design.psu` or injected `cluster=`); `SurveyDesign.subpopulation()` preserves full-design `n_psu` / `df_survey` via zero-padded scores (Wave E.3, R `svyrecvar(subset())` form) - [SyntheticDiD](https://diff-diff.readthedocs.io/en/stable/api/estimators.html) - Synthetic DiD combining standard DiD and synthetic control for few treated units -- [SyntheticControl](https://diff-diff.readthedocs.io/en/stable/api/synthetic_control.html) - Abadie, Diamond & Hainmueller (2010) classic synthetic control for a single treated unit (donor-weight counterfactual, nested/custom V; in-space placebo permutation inference via `in_space_placebo()`, plus ADH-2015 `leave_one_out()` + `in_time_placebo()` robustness) +- [SyntheticControl](https://diff-diff.readthedocs.io/en/stable/api/synthetic_control.html) - Abadie, Diamond & Hainmueller (2010) classic synthetic control for a single treated unit (donor-weight counterfactual, nested/cv/inverse-variance/custom V; in-space placebo permutation inference via `in_space_placebo()`, plus ADH-2015 `leave_one_out()` + `in_time_placebo()` robustness) - [TripleDifference](https://diff-diff.readthedocs.io/en/stable/api/triple_diff.html) - triple difference (DDD) estimator for designs requiring two criteria for treatment eligibility - [ContinuousDiD](https://diff-diff.readthedocs.io/en/stable/api/continuous_did.html) - Callaway, Goodman-Bacon & Sant'Anna (2024) continuous treatment DiD with dose-response curves - [HeterogeneousAdoptionDiD](https://diff-diff.readthedocs.io/en/stable/api/had.html) - de Chaisemartin, Ciccia, D'Haultfœuille & Knau (2026) for designs where **no unit remains untreated**; local-linear estimator at the dose support boundary returning Weighted Average Slope (WAS) on Design 1' (`d̲ = 0` / QUG) or `WAS_{d̲}` on Design 1 (`d̲ > 0`, continuous-near-d̲ or mass-point), with a multi-period event-study extension (last-treatment cohort, pointwise CIs). **Panel-only** in this release - repeated cross-sections rejected by the validator. Alias `HAD`. diff --git a/TODO.md b/TODO.md index fe7971cd..d8aa1f0d 100644 --- a/TODO.md +++ b/TODO.md @@ -74,6 +74,7 @@ Deferred items from PR reviews that were not addressed before merge. | Issue | Location | PR | Priority | |-------|----------|----|----------| +| `SyntheticControl` cv: `in_space_placebo()` / `leave_one_out()` report a cv refit excluded for STRUCTURAL infeasibility (donor-indistinguishable re-aggregated window) with the generic `status="failed"` — same machine-readable status as a genuine inner-solver non-convergence. The failure warnings now distinguish the two causes (and the correct remediation) under cv, and `in_time_placebo()` already splits structural→`"infeasible"` vs `"failed"`, but in-space/LOO do not yet emit a separate machine-readable status/reason-code. Thread a reason code from `_outer_solve_V_cv()`/`_placebo_fit_unit()` and add an `"infeasible"` status + count to the in-space/LOO outputs (mirror the in-time split). | `synthetic_control.py`, `synthetic_control_results.py` | follow-up | Low | | dCDH: Phase 1 per-period placebo DID_M^pl has NaN SE (no IF derivation for the per-period aggregation path). Multi-horizon placebos (L_max >= 1) have valid SE. | `chaisemartin_dhaultfoeuille.py` | #294 | Low | | dCDH: Survey cell-period allocator's post-period attribution is a library convention, not derived from the observation-level survey linearization. MC coverage is empirically close to nominal on the test DGP; a formal derivation (or a covariance-aware two-cell alternative) is deferred. Documented in REGISTRY.md survey IF expansion Note. | `chaisemartin_dhaultfoeuille.py`, `docs/methodology/REGISTRY.md` | #408 | Medium | | dCDH: Parity test SE/CI assertions only cover pure-direction scenarios; mixed-direction SE comparison is structurally apples-to-oranges (cell-count vs obs-count weighting). | `test_chaisemartin_dhaultfoeuille_parity.py` | #294 | Low | @@ -84,7 +85,7 @@ Deferred items from PR reviews that were not addressed before merge. | ImputationDiD dense `(A0'A0).toarray()` scales O((U+T+K)^2), OOM risk on large panels | `imputation.py` | #141 | Medium (deferred — only triggers when sparse solver fails) | | Multi-absorb weighted demeaning needs iterative alternating projections for N > 1 absorbed FE with survey weights; unweighted multi-absorb also uses single-pass (pre-existing, exact only for balanced panels) | `estimators.py` | #218 | Medium | | Survey design resolution/collapse patterns are inconsistent across panel estimators — ContinuousDiD rebuilds unit-level design in SE code, EfficientDiD builds once in fit(), StackedDiD re-resolves on stacked data; extract shared helpers for panel-to-unit collapse, post-filter re-resolution, and metadata recomputation | `continuous_did.py`, `efficient_did.py`, `stacked_did.py` | #226 | Low | -| SyntheticControl: the remaining ADH-2015 §4 items are out of scope of the leave-one-out + in-time-placebo PR — out-of-sample cross-validation `V`-selection (training/validation split), the regression-weight `W^reg = X_0'(X_0 X_0')^{-1} X_1` extrapolation diagnostic (flag implied OLS weights outside `[0,1]`), and sparse-SC subset search (`l < J`, holding `V` fixed). Leave-one-out (`leave_one_out()`) and the in-time placebo (`in_time_placebo()`) landed and are surfaced under `estimator_native_diagnostics`; these three are the deferred tail. | `synthetic_control.py`, `synthetic_control_results.py` | ADH-2015 follow-up | Low | +| SyntheticControl: the remaining ADH-2015 §4 items — the regression-weight `W^reg = X_0'(X_0 X_0')^{-1} X_1` extrapolation diagnostic (flag implied OLS weights outside `[0,1]`) and sparse-SC subset search (`l < J`, holding `V` fixed). Leave-one-out (`leave_one_out()`), the in-time placebo (`in_time_placebo()`), out-of-sample CV `V`-selection (`v_method="cv"`), and inverse-variance `V` (`v_method="inverse_variance"`) have landed; these two are the deferred tail. | `synthetic_control.py`, `synthetic_control_results.py` | ADH-2015 follow-up | Low | | ContinuousDiD deferred CGBS 2024 extensions: (a) `covariates=` kwarg not implemented (matches R `contdid` v0.1.0); (b) discrete-treatment saturated regression deferred (integer-valued dose currently warned, not routed to per-level coefficients); (c) lowest-dose-as-control per CGBS 2024 Remark 3.1 (when `P(D=0) = 0`) not implemented — estimator requires never-treated controls. REGISTRY `## ContinuousDiD` → Implementation Checklist marks these as deferred `[ ]` items. | `diff_diff/continuous_did.py` | — | Low | | Survey-weighted Silverman bandwidth in EfficientDiD conditional Omega* — `_silverman_bandwidth()` uses unweighted mean/std for bandwidth selection; survey-weighted statistics would better reflect the population distribution but is a second-order refinement | `efficient_did_covariates.py` | — | Low | | TROP: extend Wave 4's `_setup_trop_data` helper to also cover the duplicated bootstrap resampling loop in `_bootstrap_variance` / `_bootstrap_variance_global` (~40 LoC dedup; mirrors the data-setup helper pattern with a `fit_callable` parameter for the per-draw refit step). | `trop_local.py`, `trop_global.py` | follow-up | Low | diff --git a/diff_diff/guides/llms-full.txt b/diff_diff/guides/llms-full.txt index 5298bd0f..d7f22261 100644 --- a/diff_diff/guides/llms-full.txt +++ b/diff_diff/guides/llms-full.txt @@ -583,15 +583,16 @@ Classic Synthetic Control Method (Abadie, Diamond & Hainmueller 2010; Abadie & G ```python SyntheticControl( - v_method: str = "nested", # "nested" (data-driven V) or "custom" - custom_v=None, # diagonal V (len = #predictors); required iff v_method="custom" - optimizer_options: dict | None = None, # merged into scipy.optimize.minimize (outer V search) - n_starts: int = 4, # multistart count for the outer V search + v_method: str = "nested", # "nested" (pre-MSPE) | "cv" (out-of-sample, ADH 2015; needs predictors spanning BOTH train/val windows — default single-period lags are REJECTED) | "inverse_variance" (1/Var(X) on RAW predictors, bypasses standardize) | "custom" + custom_v=None, # diagonal V (len = #predictors); required iff v_method="custom"; forbidden otherwise + optimizer_options: dict | None = None, # merged into scipy.optimize.minimize (outer V search; nested/cv) + n_starts: int = 4, # multistart count for the outer V search (nested/cv) inner_max_iter: int = 10000, # Frank-Wolfe inner-solve cap inner_min_decrease: float = 1e-5, # inner-solve convergence scale (scale-aware) standardize: str = "std", # "std" (per-row SD, ddof=1) or "none" (deviation from R) alpha: float = 0.05, seed: int | None = None, # seeds the multistart Dirichlet draws + v_cv_t0: int | None = None, # cv train/val split index; default len(pre)//2 (Abadie 2021 t0=T0/2); None unless v_method="cv" ) ``` @@ -1293,10 +1294,11 @@ Returned by `SyntheticControl.fit()`. | `predictor_balance` | `pd.DataFrame` | treated vs synthetic vs donor-mean per predictor | | `gap_path` | `dict` | `{period: gap}` for all periods | | `pre_rmspe` | `float` | Pre-treatment fit diagnostic | -| `mspe_v` | `float \| None` | Outer-objective MSPE (nested) | +| `mspe_v` | `float \| None` | Selected-V objective: pre-period MSPE (nested) or held-out validation MSPE (cv); None for custom / inverse_variance | | `treated_unit` | `Any` | Treated unit identifier | | `pre_periods`, `post_periods` | `list` | Calendar-sorted periods | | `v_method`, `standardize` | `str` | Echoed configuration | +| `v_cv_t0` | `int \| None` | Resolved cv train/validation split index (None unless v_method="cv") | **Methods:** `in_space_placebo()` (opt-in permutation inference; refits one synthetic control per donor), `get_placebo_df()` (per-unit RMSPE-ratio table incl. the treated row), `leave_one_out()` (ADH-2015 §4 donor robustness; drops each reportably-weighted donor (weight > 1e-6) → per-drop ATT/`delta_att` table) + `get_leave_one_out_df()`/`get_leave_one_out_gaps()`, `in_time_placebo()` (ADH-2015 §4 backdating placebo; reassigns the intervention earlier, TRUNCATE windowing, placebo ATT ~0 if no real pre-effect) + `get_in_time_placebo_df()`/`get_in_time_placebo_gaps()`, `summary()`, `print_summary()`, `to_dict()`, `to_dataframe()`, `get_gap_df()`, `get_weights_df()` diff --git a/diff_diff/guides/llms.txt b/diff_diff/guides/llms.txt index 7fb2922b..250068de 100644 --- a/diff_diff/guides/llms.txt +++ b/diff_diff/guides/llms.txt @@ -60,7 +60,7 @@ Full practitioner guide: call `diff_diff.get_llm_guide("practitioner")` - [TwoStageDiD](https://diff-diff.readthedocs.io/en/stable/api/two_stage.html): Gardner (2022) two-stage estimator with GMM sandwich variance - [SpilloverDiD](https://diff-diff.readthedocs.io/en/stable/api/spillover.html): Butts (2021) ring-indicator spillover-aware DiD identifying direct effect on treated + per-ring spillover-on-control; reuses `conley_coords` for ring construction; handles non-staggered and staggered timing; supports `SurveyDesign(weights, strata, psu, fpc)` under `vcov_type="hc1"` with optional `cluster=` for CR1 via Gerber (2026) Binder TSL (Wave E.1) and under `vcov_type="conley"` via a panel-aware stratified-Conley sandwich on per-period PSU totals (Wave E.2 cross-sectional `conley_lag_cutoff=0`) extended in Wave E.2 follow-up to `conley_lag_cutoff > 0` via panel-block composition with within-PSU serial Bartlett HAC (Newey-West 1987 separable form; `lag>0` requires an effective PSU via explicit `survey_design.psu` or injected `cluster=`), both composed with the Wave D Gardner GMM correction; `SurveyDesign.subpopulation()` preserves full-design `n_psu` / `df_survey` via zero-padded scores at the meat-helper boundary (Wave E.3, R `svyrecvar(subset())` form) (replicate weights queued as follow-up) - [SyntheticDiD](https://diff-diff.readthedocs.io/en/stable/api/estimators.html): Synthetic DiD combining standard DiD and synthetic control methods for few treated units -- [SyntheticControl](https://diff-diff.readthedocs.io/en/stable/api/synthetic_control.html): Abadie, Diamond & Hainmueller (2010) classic synthetic control for ONE treated unit — donor-weight counterfactual, nested or custom predictor-importance V, gap path + pre-RMSPE; no analytical SE (inference fields NaN), significance via in-space placebo permutation inference (`in_space_placebo()`, post/pre RMSPE-ratio, p = rank/(n_placebos+1)); ADH-2015 §4 robustness: `leave_one_out()` donor-robustness + `in_time_placebo()` backdating placebo +- [SyntheticControl](https://diff-diff.readthedocs.io/en/stable/api/synthetic_control.html): Abadie, Diamond & Hainmueller (2010) classic synthetic control for ONE treated unit — donor-weight counterfactual, predictor-importance V via nested / cv (out-of-sample, ADH 2015; needs predictors spanning both train/val windows, so default single-period lags are rejected) / inverse-variance (1/Var on raw predictors, bypasses standardize) / custom, gap path + pre-RMSPE; no analytical SE (inference fields NaN), significance via in-space placebo permutation inference (`in_space_placebo()`, post/pre RMSPE-ratio, p = rank/(n_placebos+1)); ADH-2015 §4 robustness: `leave_one_out()` donor-robustness + `in_time_placebo()` backdating placebo - [TripleDifference](https://diff-diff.readthedocs.io/en/stable/api/triple_diff.html): Triple difference (DDD) estimator for designs requiring two criteria for treatment eligibility - [ContinuousDiD](https://diff-diff.readthedocs.io/en/stable/api/continuous_did.html): Callaway, Goodman-Bacon & Sant'Anna (2024) continuous treatment DiD with dose-response curves - [HeterogeneousAdoptionDiD](https://diff-diff.readthedocs.io/en/stable/api/had.html): de Chaisemartin, Ciccia, D'Haultfœuille & Knau (2026) for designs where **no unit remains untreated**; local-linear estimator at the dose support boundary returning Weighted Average Slope (WAS) on Design 1' (`d̲=0` / QUG) or `WAS_{d̲}` on Design 1 (`d̲>0`, continuous-near-d̲ or mass-point), with multi-period event-study extension (last-treatment cohort, pointwise CIs). **Panel-only** in this release (repeated cross-sections rejected by the validator). Alias `HAD`. diff --git a/diff_diff/synthetic_control.py b/diff_diff/synthetic_control.py index 4004446c..5a04fa75 100644 --- a/diff_diff/synthetic_control.py +++ b/diff_diff/synthetic_control.py @@ -9,9 +9,11 @@ A single treated unit's counterfactual is built as a convex combination of "donor" (never-treated) units. Donor weights ``W*(V)`` solve a simplex-constrained weighted least-squares fit of the treated unit's predictors; the predictor-importance -matrix ``V`` (diagonal, PSD) is either chosen data-driven by minimizing pre-period -outcome MSPE ("nested") or supplied by the user ("custom"). The treatment-effect -path is the gap ``α̂_1t = Y_1t − Σ_j w_j · Y_jt`` over the post periods. +matrix ``V`` (diagonal, PSD) is chosen data-driven by minimizing pre-period outcome MSPE +("nested"), by out-of-sample cross-validation ("cv", split at ``v_cv_t0``), or by the +closed-form inverse-variance heuristic ("inverse_variance"), or supplied by the user +("custom"). The treatment-effect path is the gap ``α̂_1t = Y_1t − Σ_j w_j · Y_jt`` over +the post periods. Distinct from :class:`~diff_diff.SyntheticDiD` (Arkhangelsky et al. 2021), which adds time weights and ridge regularization: classic SCM uses **donor weights only** and a @@ -80,15 +82,28 @@ class SyntheticControl: Parameters ---------- - v_method : {"nested", "custom"}, default "nested" + v_method : {"nested", "custom", "cv", "inverse_variance"}, default "nested" How the predictor-importance matrix V is chosen. ``"nested"`` selects V data-driven by minimizing the pre-period outcome MSPE of ``W*(V)`` (ADH 2010 §2.3). ``"custom"`` uses the user-supplied ``custom_v`` and - skips the outer search. + skips the outer search. ``"cv"`` selects V by out-of-sample + cross-validation (ADH 2015 §; Abadie 2021 Eq. 9): the pre-period is split + at ``v_cv_t0`` into a training and a validation window; V is chosen to + minimize the validation-window outcome MSPE of the training-fit weights, + then the final weights are re-estimated on the validation-window + predictors. ``"inverse_variance"`` uses the closed-form + ``v_h = 1/Var(X_{h·})`` (Abadie 2021 §3.2(a); variance over donors+treated), + applied to the RAW predictors so the effective objective is the + unit-variance-rescaled ``Σ_h diff_h²/Var_h`` — no search, deterministic. Note + this rescaling *is* what ``standardize="std"`` does, so the ``standardize`` + setting does not compose with it (equivalent to uniform V on standardized + predictors); applying ``1/Var`` on already-standardized rows would + double-rescale to ``Σ_h diff_h²/Var_h²``. custom_v : array-like, optional Diagonal of V (length = number of predictors). Required iff - ``v_method="custom"``; must be None when ``v_method="nested"``. Must be - finite and non-negative; trace-normalized internally. + ``v_method="custom"``; must be None for every other ``v_method`` + (``nested`` / ``cv`` / ``inverse_variance``). Must be finite and + non-negative; trace-normalized internally. optimizer_options : dict, optional Extra options merged into every ``scipy.optimize.minimize`` call in the outer V search (e.g. ``maxiter``, ``xatol``, ``fatol``). @@ -112,6 +127,12 @@ class SyntheticControl: Significance level recorded for downstream (placebo) inference. seed : int, optional Seed for the multistart random (Dirichlet) starting points. + v_cv_t0 : int, optional + Training/validation split index for ``v_method="cv"`` only (positional + into the pre-periods: training = ``pre[:v_cv_t0]``, validation = + ``pre[v_cv_t0:]``). Must leave at least 1 training and 1 validation + pre-period. Default None → ``len(pre_periods) // 2`` (Abadie 2021's + ``t0 = T0/2``). Must be None unless ``v_method="cv"``. """ def __init__( @@ -125,6 +146,7 @@ def __init__( standardize: str = "std", alpha: float = 0.05, seed: Optional[int] = None, + v_cv_t0: Optional[int] = None, ): self.v_method = v_method self.custom_v = custom_v @@ -135,6 +157,7 @@ def __init__( self.standardize = standardize self.alpha = alpha self.seed = seed + self.v_cv_t0 = v_cv_t0 self._validate_config() @@ -148,8 +171,11 @@ def __init__( def _validate_config(self) -> None: """Validate hyperparameters; shared by ``__init__`` and ``set_params``.""" - if self.v_method not in ("nested", "custom"): - raise ValueError(f"v_method must be one of ('nested', 'custom'), got {self.v_method!r}") + if self.v_method not in ("nested", "custom", "cv", "inverse_variance"): + raise ValueError( + "v_method must be one of ('nested', 'custom', 'cv', 'inverse_variance'), " + f"got {self.v_method!r}" + ) if self.standardize not in ("std", "none"): raise ValueError( f"standardize must be one of ('std', 'none'), got {self.standardize!r}" @@ -157,9 +183,9 @@ def _validate_config(self) -> None: # custom_v cross-field rules — fail-closed, never silently ignore. if self.v_method == "custom" and self.custom_v is None: raise ValueError("custom_v is required when v_method='custom'.") - if self.v_method == "nested" and self.custom_v is not None: + if self.v_method != "custom" and self.custom_v is not None: raise ValueError( - "custom_v must be None when v_method='nested' " + f"custom_v must be None when v_method={self.v_method!r} " "(it would be silently ignored otherwise)." ) if self.custom_v is not None: @@ -184,6 +210,23 @@ def _validate_config(self) -> None: ) if not (0 < self.alpha < 1): raise ValueError(f"alpha must be in (0, 1), got {self.alpha!r}") + # v_cv_t0 is meaningful ONLY for v_method="cv" — fail closed (never silently + # ignore). The full range check (1 <= t0 <= n_pre-1) needs the fitted pre-period + # count and is enforced in fit(); here we validate type/positivity + the + # cross-field rule. (bool is an int subclass but is rejected by the < 1 path only + # for False; treat True/False as invalid via the explicit bool guard.) + if self.v_cv_t0 is not None: + if self.v_method != "cv": + raise ValueError( + "v_cv_t0 is only valid when v_method='cv' " + f"(got v_method={self.v_method!r}); leave v_cv_t0=None otherwise." + ) + if isinstance(self.v_cv_t0, bool) or not isinstance(self.v_cv_t0, (int, np.integer)): + raise ValueError( + f"v_cv_t0 must be a positive integer or None, got {self.v_cv_t0!r}" + ) + if self.v_cv_t0 < 1: + raise ValueError(f"v_cv_t0 must be >= 1, got {self.v_cv_t0!r}") def get_params(self) -> Dict[str, Any]: """Get estimator parameters.""" @@ -197,6 +240,7 @@ def get_params(self) -> Dict[str, Any]: "standardize": self.standardize, "alpha": self.alpha, "seed": self.seed, + "v_cv_t0": self.v_cv_t0, } def set_params(self, **params) -> "SyntheticControl": @@ -385,10 +429,18 @@ def fit( "unit or a donor over their selected periods. Restrict predictor_window / " "special_predictors periods to where the variable is observed." ) - X1s, X0s, _ = _standardize(X1, X0, self.standardize) - - k = X1s.shape[0] + # Standardized predictors feed the nested / custom paths. inverse_variance applies + # 1/Var to the RAW predictors (standardization would double-rescale to 1/Var²); cv + # re-standardizes each window separately inside _outer_solve_V_cv (ADH runs a + # separate dataprep per window). Both bind X1s/X0s to the raw matrices here so they + # never trigger _standardize's (irrelevant) full-pre zero-variance-row warning on a + # path that does not use the full-pre standardized matrices. + k = X1.shape[0] J = len(donor_ids) + if self.v_method in ("inverse_variance", "cv"): + X1s, X0s = X1, X0 + else: + X1s, X0s, _ = _standardize(X1, X0, self.standardize) # --- solve for V and donor weights --- # mspe_v is the OUTER-objective value; it is populated only when a nested V @@ -397,21 +449,112 @@ def fit( # ``outer_converged`` tracks whether the nested V search reached an optimum # (trivially True when there is no outer search: custom V or a single donor). outer_converged = True + # The RESOLVED cv train/validation split actually used (None unless v_method="cv"); + # surfaced on the Results object as public, pickle-surviving metadata. + resolved_v_cv_t0: Optional[int] = None + # Resolve + validate the cv split and the cv predictor precondition UP FRONT, before + # the single-donor / solve dispatch, so an invalid v_cv_t0 or predictor configuration + # fails closed and v_cv_t0 is surfaced consistently even on the degenerate J==1 path + # (where the V search is skipped — the single donor forces w=[1] regardless). + if self.v_method == "cv": + n_pre = len(pre_periods) + if n_pre < 2: + raise ValueError( + f"v_method='cv' requires at least 2 pre-treatment periods (got {n_pre}) " + "to form a training and a validation window." + ) + t0 = self.v_cv_t0 if self.v_cv_t0 is not None else n_pre // 2 + if not (1 <= t0 <= n_pre - 1): + raise ValueError( + f"v_cv_t0={t0} is out of range; it must leave >=1 training and >=1 " + f"validation pre-period (1 <= v_cv_t0 <= {n_pre - 1})." + ) + _, _, all_spanning = _cv_window_status(specs, pre_periods, t0) + if not all_spanning: + # PRECONDITION (identification): faithful per-window re-aggregation needs + # every predictor measurable on BOTH windows — the training-window fit + # (steps 2-3) AND the validation-window step-4 refit. A predictor present + # in only one window cannot be re-aggregated on the other, so its V weight + # would be unidentified. The default per-period outcome lags are + # single-period and live in one window only, so they violate this. + raise ValueError( + "v_method='cv' requires every predictor to span BOTH the training " + "window (pre[:v_cv_t0]) and the validation window (pre[v_cv_t0:]) so it " + "can be re-aggregated on each (ADH 2015 cross-validation re-measures " + "each predictor per window). At least one predictor is observed in only " + "one window. The default per-period outcome lags are single-period and " + "cannot span; pass covariate `predictors` or multi-period " + "`special_predictors` that cover both windows (or adjust `v_cv_t0`)." + ) + if not _cv_windows_have_variation( + pivots, specs, treated_id, donor_ids, pre_periods, t0 + ): + # IDENTIFICATION (fail-closed): if a window's re-aggregated predictors are + # constant ACROSS DONORS (all donor columns identical), X0·W is constant in + # W, so the inner solve's objective is flat and returns arbitrary weights + # reported as converged. Raise rather than return an arbitrary ATT. (Donor + # distinguishability identifies W; treated-vs-donor variation does not.) + raise ValueError( + "v_method='cv': a cross-validation window (pre[:v_cv_t0] or " + "pre[v_cv_t0:]) has no variation ACROSS DONORS in ANY predictor after " + "re-aggregation — every donor has identical predictors in that window, " + "so X0·W is constant in W and the weight solve is unidentified (any " + "donor weights fit it equally, even when the treated unit differs). " + "Adjust `v_cv_t0`, the `predictors`/`special_predictors`, or the donor pool." + ) + resolved_v_cv_t0 = t0 if self.v_method == "custom": v = self._prepare_custom_v(k) w, converged = _inner_solve_W(X1s, X0s, v, self.inner_max_iter, self.inner_min_decrease) elif J == 1: - # Degenerate: a single donor forces w = [1.0]; V is irrelevant. + # Degenerate: a single donor forces w = [1.0], so the predictor-importance V is + # UNIDENTIFIED — every V yields the same synthetic — for EVERY v_method, cv and + # inverse_variance included (their selected / closed-form V would be inert). We + # report a uniform v_weights and leave mspe_v None, and warn. The donor weights / + # gap path / ATT do not depend on V here, so they are unaffected. This is the + # documented single-donor contract (REGISTRY §SyntheticControl). warnings.warn( "Only one donor unit is available; the synthetic control is that " - "single donor (w = 1) and the V search is skipped. SCM is degenerate " - "with a single donor.", + "single donor (w = 1) and the V search is skipped (V is unidentified with " + "one donor), so v_weights is uniform regardless of v_method. SCM is " + "degenerate with a single donor.", UserWarning, stacklevel=2, ) v = np.ones(k) / k w = np.array([1.0]) converged = True + elif self.v_method == "inverse_variance": + # Closed-form V = 1/Var(X) (Abadie 2021 §3.2(a)); no outer search, so + # mspe_v stays None and outer_converged stays True. Apply V to the RAW + # (UNstandardized) predictors: inverse-variance weighting IS the unit-variance + # rescaling, so the effective objective is Σ_h diff_h²/Var_h (the paper's + # selector). Using the standardized X1s/X0s here would double-rescale to + # Σ_h diff_h²/Var_h² — hence the standardize pre-scaling is intentionally + # bypassed on this branch (equivalent to uniform V on standardized predictors). + v = _inverse_variance_v(X1, X0) + w, converged = _inner_solve_W(X1, X0, v, self.inner_max_iter, self.inner_min_decrease) + elif self.v_method == "cv": + # Out-of-sample CV V-selection (ADH 2015; Abadie 2021 t0=T0/2). The split and + # the predictor precondition were resolved/validated up front (above), so + # resolved_v_cv_t0 is a valid int here. + assert resolved_v_cv_t0 is not None + v, w, converged, mspe_v, outer_converged = _outer_solve_V_cv( + pivots, + specs, + treated_id, + donor_ids, + Z1, + Z0, + pre_periods, + resolved_v_cv_t0, + self.n_starts, + self.seed, + self.optimizer_options, + self.inner_max_iter, + self.inner_min_decrease, + self.standardize, + ) else: v, w, converged, mspe_v, outer_converged = _outer_solve_V( X1, @@ -471,12 +614,26 @@ def fit( # --- reporting structures --- donor_weights = {donor_ids[j]: float(w[j]) for j in range(J) if w[j] > _MIN_REPORT_WEIGHT} v_weights = {labels[i]: float(v[i]) for i in range(k)} - synthetic_X = X0 @ w - donor_mean = X0.mean(axis=1) + # Predictor-balance basis: under cv the reported donor_weights come from the + # ADH-2015 step-4 refit on the VALIDATION-window re-aggregated predictors, so the + # balance table is reported on that same basis — making the public surface + # internally consistent (v_weights + this balance reproduce donor_weights). Every + # other V method fits on the full-pre predictors and reports balance there. + if self.v_method == "cv": + assert resolved_v_cv_t0 is not None + bal_specs = cast( + List[_PredictorSpec], + _truncate_specs_to_window(specs, set(pre_periods[resolved_v_cv_t0:])), + ) + Xb1, Xb0, _ = _build_predictor_matrix(pivots, bal_specs, treated_id, donor_ids) + else: + Xb1, Xb0 = X1, X0 + synthetic_X = Xb0 @ w + donor_mean = Xb0.mean(axis=1) predictor_balance = pd.DataFrame( { "predictor": labels, - "treated": X1, + "treated": Xb1, "synthetic": synthetic_X, "donor_mean": donor_mean, } @@ -504,6 +661,7 @@ def fit( standardize=self.standardize, alpha=self.alpha, mspe_v=mspe_v, + v_cv_t0=resolved_v_cv_t0, rmspe_ratio=rmspe_ratio, ) # Retain the panel state needed to refit each donor as a pseudo-treated @@ -539,6 +697,7 @@ def fit( ), inner_max_iter=self.inner_max_iter, inner_min_decrease=self.inner_min_decrease, + v_cv_t0=self.v_cv_t0, ) # Persist whether the treated unit's own fit reached a valid optimum — both # the inner Frank-Wolfe weight solve AND (on the nested path) the outer V @@ -574,7 +733,8 @@ def synthetic_control( """ Convenience function for classic synthetic control estimation. - Constructor-only keyword arguments (``v_method``, ``custom_v``, ``n_starts``, + Constructor-only keyword arguments (``v_method`` — ``"nested"`` / ``"custom"`` / + ``"cv"`` / ``"inverse_variance"`` — ``custom_v``, ``v_cv_t0``, ``n_starts``, ``standardize``, ``alpha``, ``seed``, ``optimizer_options``, ``inner_max_iter``, ``inner_min_decrease``) and ``fit`` keyword arguments (``post_periods``, ``treated_unit``, ``predictors``, ``special_predictors``, @@ -929,6 +1089,47 @@ def _standardize( return X1s, X0s, divisor +def _inverse_variance_v(X1: np.ndarray, X0: np.ndarray) -> np.ndarray: + """Closed-form inverse-variance predictor importance ``v_h = 1/Var(X_{h·})``. + + Abadie (2021) §3.2(a). Variance is taken over donors+treated on the + UNSTANDARDIZED predictors ``X1``/``X0``. The caller applies this V to the RAW + predictors (NOT the standardized ``X1s``/``X0s``): inverse-variance weighting IS + the unit-variance rescaling, so the effective objective is ``Σ_h diff_h²/Var_h``; + applying it to already-standardized rows would double-rescale to + ``Σ_h diff_h²/Var_h²``. The inverse is **exact** for every strictly-positive + variance (``1/Var_h``, no flooring of small-but-positive variances — flooring would + distort the relative weights of rows with tiny unequal variances and break the + documented closed form). Special handling is reserved for **non-positive** variance: + a zero-variance predictor row (no cross-unit information) contributes 0 weight, + matching the inverse-variance multistart seed in ``_v_starts``. If EVERY row is + zero-variance — or, in the pathological case of a positive variance so tiny that + ``1/Var_h`` overflows to a non-finite total — the result falls back to uniform V + (with a ``UserWarning``). Trace-normalized to sum to 1 (``W*`` is invariant to V's + scale; this just matches ``_prepare_custom_v``). + """ + k = X1.shape[0] + combined = np.column_stack([X0, X1.reshape(-1, 1)]) + row_var = np.var(combined, axis=1, ddof=1) + # Exact 1/Var on the strictly-positive rows only (mask the division so a zero-variance + # row neither divides by zero nor gets clipped); non-positive rows get 0 weight. + inv = np.zeros(k) + pos = row_var > 0 + inv[pos] = 1.0 / row_var[pos] + total = float(np.sum(inv)) + if total <= 0 or not np.isfinite(total): + warnings.warn( + "inverse_variance V: no usable predictor variance across donors+treated " + "(every row is zero-variance, or a positive variance is so tiny that 1/Var " + "overflows), so there is no information to weight predictors; falling back to " + "uniform V.", + UserWarning, + stacklevel=2, + ) + return np.ones(k) / k + return inv / total + + def _inner_solve_W( X1s: np.ndarray, X0s: np.ndarray, @@ -1196,6 +1397,341 @@ def objective(theta: np.ndarray) -> float: return v_star, w_star, converged, mspe, outer_converged +def _truncate_specs_to_window( + specs: List["_PredictorSpec"], window: set +) -> List[Optional["_PredictorSpec"]]: + """Re-aggregate each predictor spec over a sub-window of the pre-period. + + Returns a list parallel to ``specs`` whose entry ``i`` is ``specs[i]`` with its + ``periods`` intersected with ``window`` (so the spec's op — mean/sum/identity — then + recomputes over only those periods), or ``None`` where the spec has NO period in + ``window``. RE-aggregating (recomputing the aggregate over the window's periods) is + the faithful ADH-2015 cross-validation operation — a separate ``dataprep`` per + window — as opposed to masking a fixed full-pre aggregate in/out (which cannot make + a spanning predictor out-of-sample, since the full-pre value already bakes in the + validation-window observations). + """ + out: List[Optional["_PredictorSpec"]] = [] + for spec in specs: + kept = [p for p in spec.periods if p in window] + out.append(replace(spec, periods=kept) if kept else None) + return out + + +def _cv_window_status( + specs: List["_PredictorSpec"], pre_periods: List[Any], t0: int +) -> Tuple[bool, bool, bool]: + """Classify cv predictor coverage at the positional split ``t0``. + + Returns ``(has_training, has_validation, all_spanning)`` over the spec set, where a + spec is *training*-active iff any of its periods lie in ``pre[:t0]`` and + *validation*-active iff any lie in ``pre[t0:]``: + + - ``has_training`` / ``has_validation``: at least one spec is active in that window. + - ``all_spanning``: EVERY spec is active in BOTH windows — the cv identification + precondition for faithful per-window re-aggregation. Each predictor can then be + re-measured on the training window for the V-search fit AND on the validation + window for the step-4 refit, so the cross-validated ``V*`` is fully identified and + drives both fits with no zeroed coordinate. This holds for ADH-2015's shared + covariate / multi-period special predictors (which span the windows) but NOT for + the window-specific default per-period outcome lags (each lives in one window). + """ + tr_set = set(pre_periods[:t0]) + va_set = set(pre_periods[t0:]) + has_tr = has_va = False + all_spanning = True + for s in specs: + in_tr = any(p in tr_set for p in s.periods) + in_va = any(p in va_set for p in s.periods) + has_tr = has_tr or in_tr + has_va = has_va or in_va + if not (in_tr and in_va): + all_spanning = False + return has_tr, has_va, all_spanning + + +def _window_has_donor_variation(X0w: np.ndarray) -> bool: + """True iff at least one predictor row varies ACROSS DONORS in this window. + + SCM weights are identified by the DONORS being distinguishable: ``X0·W`` is a convex + combination of the donor columns, so if every predictor row is constant across donors + (all donor columns identical) then ``X0·W`` is the same for every simplex ``W`` and the + weight solve is unidentified — ``_inner_solve_W`` returns arbitrary (tie-broken) weights + while reporting convergence, REGARDLESS of whether the treated unit differs (the treated + unit is the matching target, not part of W's identification). So this checks donor-side + variance only, NOT treated-vs-donor variance. ``_standardize`` merely *warns* on + zero-variance rows, so cv needs this as an explicit fail-closed gate. A single donor + (``J < 2``) forces ``W=[1]`` and is handled by the J==1 short-circuit, so it is treated + as identified here (and avoids a ddof=1 variance over one column). + """ + if X0w.shape[1] < 2: + return True + return bool(np.any(np.var(X0w, axis=1, ddof=1) > 0)) + + +def _cv_windows_have_variation( + pivots: Dict[str, pd.DataFrame], + specs: List["_PredictorSpec"], + treated_id: Any, + donor_ids: List[Any], + pre_periods: List[Any], + t0: int, +) -> bool: + """True iff BOTH cv windows carry identifying cross-DONOR predictor variation. + + For each window, re-aggregate the specs over it and check that at least one row varies + across donors (see ``_window_has_donor_variation``). A window with no such variation + leaves the weights unidentified, so the CV fit must fail closed (headline ``ValueError``; + placebo / backdated refit dropped or marked infeasible) rather than feed arbitrary + weights into the ATT or the placebo reference set. + """ + for window in (set(pre_periods[:t0]), set(pre_periods[t0:])): + wspecs = [s for s in _truncate_specs_to_window(specs, window) if s is not None] + if not wspecs: + return False + _, X0w, _ = _build_predictor_matrix(pivots, wspecs, treated_id, donor_ids) + if not _window_has_donor_variation(X0w): + return False + return True + + +def _outer_solve_V_cv( + pivots: Dict[str, pd.DataFrame], + specs: List["_PredictorSpec"], + treated_id: Any, + donor_ids: List[Any], + Z1: np.ndarray, + Z0: np.ndarray, + pre_periods: List[Any], + t0: int, + n_starts: int, + seed: Optional[int], + optimizer_options: Optional[Dict[str, Any]], + inner_max_iter: int, + inner_min_decrease: float, + standardize: str, +) -> Tuple[np.ndarray, np.ndarray, bool, float, bool]: + """Out-of-sample cross-validation V selection (ADH 2015 §; Abadie 2021 Eq. 9). + + Faithful per-window re-aggregation. The pre-period is split positionally at ``t0`` + into a training window ``pre[:t0]`` and a validation window ``pre[t0:]``. Each + predictor spec is RE-AGGREGATED over each window — its op (mean/sum/identity) is + recomputed over only the periods that fall in that window — exactly as ADH-2015's + cross-validation re-runs ``dataprep`` separately on each window. The predictor + dimension ``k`` is preserved: re-aggregation changes each row's VALUES per window, + not the number of rows, so V stays ``k``-dimensional throughout. + + - Steps 2-3 (V-search): for each candidate V, fit the training weights on the + TRAINING-window re-aggregated predictors and rank V by the held-out + validation-window outcome MSPE ``mean((Z1[t0:] - Z0[t0:]@w)**2)`` -> ``V*``. + Because each predictor is re-measured on the training window only, the V-search is + genuinely out-of-sample for ALL predictor types (a spanning predictor's + validation-window observations never enter the training fit) — the property that + masking a fixed full-pre aggregate could not deliver. + - Step 4 (final weights): re-estimate ``W* = W(V*)`` on the VALIDATION-window + re-aggregated predictors (ADH-2015 "predictors from the last part of the + pre-period"). The same ``V*`` drives both fits with no zeroed coordinate, so the + reported ``v_weights`` (= V*) reproduce the reported ``donor_weights`` on the + validation-window predictors. + + Each window's predictor matrix is standardized SEPARATELY (ADH runs a separate + ``dataprep`` per window; ``V*`` is predictor-importance applied in each window's own + scaled space). The caller (``fit`` / ``_truncate_snapshot_in_time``) enforces the + fully-spanning precondition (every spec observed in BOTH windows) before calling, so + every re-aggregated spec is non-empty; this guards defensively and fails closed + (non-converged sentinel) otherwise. + + Returns ``(v_star, w_star, inner_converged, mspe_v, outer_converged)`` where + ``mspe_v`` is the step-3 validation-MSPE selection criterion at V* (the validation + MSPE of the TRAINING-window fit). + """ + k = len(specs) + train_specs = _truncate_specs_to_window(specs, set(pre_periods[:t0])) + val_specs = _truncate_specs_to_window(specs, set(pre_periods[t0:])) + Z1_va = Z1[t0:] + Z0_va = Z0[t0:, :] + + # Fail closed if any predictor is absent from a window (a non-fully-spanning spec set + # — e.g. an in-time-truncated placebo snapshot). The caller enforces the + # fully-spanning precondition up front for the headline fit and each placebo date; + # this guards the path defensively, returning a non-converged sentinel so the caller + # drops/raises rather than crashing on an empty predictor build. + if any(s is None for s in train_specs) or any(s is None for s in val_specs): + return np.ones(k) / k, np.zeros(len(donor_ids)), False, float("nan"), False + train_specs_c = cast(List["_PredictorSpec"], train_specs) + val_specs_c = cast(List["_PredictorSpec"], val_specs) + + # Re-aggregate the predictors over each window and standardize each window SEPARATELY. + X1_tr, X0_tr, _ = _build_predictor_matrix(pivots, train_specs_c, treated_id, donor_ids) + X1_va, X0_va, _ = _build_predictor_matrix(pivots, val_specs_c, treated_id, donor_ids) + + # Fail closed if a window has NO cross-DONOR predictor variation: the donor columns are + # then identical, so X0·W is constant in W, the inner solve's objective is flat, and it + # returns arbitrary weights while reporting convergence — silently corrupting the ATT + # (headline) or the placebo reference set. fit() raises up front for the headline; here + # we return the non-converged sentinel so a placebo refit is dropped (_placebo_fit_unit + # returns None) instead of entering the permutation rank. (Unit-specific: in-space + # placebos reassign the treated role and shrink the donor pool, so donor-distinguishability + # must be re-checked for each pseudo-treated unit, not just the headline.) + if not _window_has_donor_variation(X0_tr) or not _window_has_donor_variation(X0_va): + return np.ones(k) / k, np.zeros(len(donor_ids)), False, float("nan"), False + + X1s_tr, X0s_tr, _ = _standardize(X1_tr, X0_tr, standardize) + X1s_va, X0s_va, _ = _standardize(X1_va, X0_va, standardize) + + if k == 1: + # Single predictor: V is fixed; re-aggregated in each window (guarded non-empty). + v = np.array([1.0]) + w_tr, conv_tr = _inner_solve_W(X1s_tr, X0s_tr, v, inner_max_iter, inner_min_decrease) + w_star, converged = _inner_solve_W(X1s_va, X0s_va, v, inner_max_iter, inner_min_decrease) + # mspe_v is Eq. 9's validation MSPE of the TRAINING-window fit, so it is only valid + # when that solve converged; if it truncates, fail closed (nan criterion + outer + # non-converged) so downstream placebo/LOO diagnostics do not run off an invalid CV + # criterion (there is no outer search here, so conv_tr IS the "search" convergence). + mspe_v = float(np.mean((Z1_va - Z0_va @ w_tr) ** 2)) if conv_tr else float("nan") + return v, w_star, converged, mspe_v, conv_tr + + _st = {"total": 0, "nonconv": 0} + # Penalty bound on the VALIDATION window (the objective's window), so a truncated + # training fit can never win the argmin. Finite (np.inf floods scipy). + _vertex_mspe = [float(np.mean((Z1_va - Z0_va[:, j]) ** 2)) for j in range(Z0_va.shape[1])] + _penalty = 10.0 * (max(_vertex_mspe) + 1.0) if _vertex_mspe else 1.0 + + def objective(theta: np.ndarray) -> float: + v = _softmax(theta) + # Step 2: fit the training weights on the TRAINING-window re-aggregated predictors. + w, conv = _inner_solve_W(X1s_tr, X0s_tr, v, inner_max_iter, inner_min_decrease) + _st["total"] += 1 + if not conv: + _st["nonconv"] += 1 + return _penalty + # Step 3: rank V by the held-out validation-window OUTCOME MSPE. + return float(np.mean((Z1_va - Z0_va @ w) ** 2)) + + def _uniformity(theta: np.ndarray) -> float: + # Squared distance of V=softmax(theta) from uniform; smaller = more uniform. + v = _softmax(theta) + return float(np.sum((v - 1.0 / k) ** 2)) + + # Deterministic tie-break (Abadie 2021 fn. 7: the CV V* "need not be unique" and an + # implementation must pick a deterministic tie-break). Among candidates whose + # validation MSPE ties to tolerance, prefer the V closest to uniform — a principled, + # start-order-independent choice (the densest V among equally-good optima). + _tie_rtol, _tie_atol = 1e-9, 1e-12 + + def _is_better( + new_fun: float, + new_x: np.ndarray, + new_success: bool, + cur_fun: float, + cur_x: Optional[np.ndarray], + cur_success: bool, + ) -> bool: + if cur_x is None: + return True + tol = _tie_atol + _tie_rtol * abs(cur_fun) + if new_fun < cur_fun - tol: + return True + if abs(new_fun - cur_fun) <= tol: + # Tie on the validation MSPE. Prefer a CONVERGED candidate over a non-converged + # one first — a success=False candidate must not displace a converged incumbent + # (which would spuriously flip outer_converged to False and drop the fit / + # placebos). Among EQUALLY-converged ties, prefer the V closest to uniform (the + # deterministic densest-optimum tie-break, Abadie 2021 fn. 7). + if new_success != cur_success: + return new_success + return _uniformity(new_x) < _uniformity(cur_x) - 1e-15 + return False + + nm_options = {"maxiter": 1000, "xatol": 1e-8, "fatol": 1e-8} + if optimizer_options: + nm_options.update(optimizer_options) + powell_options = dict(nm_options) + if "xatol" in powell_options: + powell_options["xtol"] = powell_options.pop("xatol") + if "fatol" in powell_options: + powell_options["ftol"] = powell_options.pop("fatol") + + rng = np.random.default_rng(seed) + # Heuristic starts use the TRAINING-window matrices (the V-search fits there) scored + # on the validation outcomes (the CV criterion), so the univariate seed is aligned + # with the objective the search actually minimizes. + starts, start_total, start_nonconv = _v_starts( + k, + X1_tr, + X0_tr, + X1s_tr, + X0s_tr, + Z1_va, + Z0_va, + n_starts, + rng, + inner_max_iter, + inner_min_decrease, + ) + _st["total"] += start_total + _st["nonconv"] += start_nonconv + + best_x: Optional[np.ndarray] = None + best_fun = np.inf + best_success = False + for theta0 in starts: + res = minimize(objective, theta0, method="Nelder-Mead", options=nm_options) + if _is_better(float(res.fun), res.x, bool(res.success), best_fun, best_x, best_success): + best_fun = float(res.fun) + best_x = res.x + best_success = bool(res.success) + + # ``starts`` is non-empty (always includes the uniform start) and the first + # iteration sets ``best_x`` via the ``cur_x is None`` guard, so it is non-None here. + assert best_x is not None + res_p = minimize(objective, best_x, method="Powell", options=powell_options) + if _is_better(float(res_p.fun), res_p.x, bool(res_p.success), best_fun, best_x, best_success): + best_fun = float(res_p.fun) + best_x = res_p.x + best_success = bool(res_p.success) + elif bool(res_p.success) and np.isclose(res_p.fun, best_fun, rtol=1e-5, atol=1e-8): + # Powell converged back AT the incumbent objective → validates it as an optimum + # (mirrors _outer_solve_V; a "success" at a strictly worse point must NOT flip + # best_success). + best_success = True + outer_converged = best_success + + if not outer_converged: + warnings.warn( + "Outer CV V-search (Nelder-Mead / Powell) did not converge; the selected " + "predictor-importance matrix V (and the resulting donor weights / ATT) may " + "be sub-optimal. Increase optimizer_options['maxiter'] or n_starts.", + UserWarning, + stacklevel=3, + ) + if _st["nonconv"] > 0: + warnings.warn( + f"Inner Frank-Wolfe did not converge on {_st['nonconv']} of {_st['total']} " + f"weight solves during CV V selection (inner_max_iter={inner_max_iter}); those " + "evaluations were excluded from V ranking, so the selected V / donor weights / " + "ATT may be sub-optimal. Increase inner_max_iter or relax inner_min_decrease.", + UserWarning, + stacklevel=3, + ) + + assert best_x is not None # set in the NM loop above; narrows past the Powell branch + v_star = _softmax(best_x) + # mspe_v = step-3 selection criterion at V* (validation MSPE of the TRAINING-window fit). + # It is Eq. 9's criterion only if the training solve converged; if it truncates (e.g. + # inner_max_iter too small), fail closed — nan criterion + outer non-converged — so the + # fit and downstream placebo/LOO diagnostics do not run off an invalid CV criterion. + w_tr, conv_tr = _inner_solve_W(X1s_tr, X0s_tr, v_star, inner_max_iter, inner_min_decrease) + if conv_tr: + mspe_v = float(np.mean((Z1_va - Z0_va @ w_tr) ** 2)) + else: + mspe_v = float("nan") + outer_converged = False + # Step 4 reported weights: refit V* on the VALIDATION-window re-aggregated predictors. + w_star, converged = _inner_solve_W(X1s_va, X0s_va, v_star, inner_max_iter, inner_min_decrease) + return v_star, w_star, converged, mspe_v, outer_converged + + def _compute_gap_path( Y: pd.DataFrame, w: np.ndarray, @@ -1269,7 +1805,13 @@ def _placebo_fit_unit( # treated+donor panel, so a donor reassigned as pseudo-treated has finite cells. if not (np.all(np.isfinite(X1)) and np.all(np.isfinite(X0))): return None - X1s, X0s, _ = _standardize(X1, X0, snap.standardize) + # inverse_variance applies 1/Var to the RAW predictors; cv re-standardizes each window + # separately inside _outer_solve_V_cv (mirrors fit()). Both skip the full-pre + # _standardize pass (unused on those paths) and its irrelevant zero-variance warning. + if snap.v_method in ("inverse_variance", "cv"): + X1s, X0s = X1, X0 + else: + X1s, X0s, _ = _standardize(X1, X0, snap.standardize) Y = snap.pivots[snap.outcome] Z1 = Y.loc[snap.pre_periods, unit].to_numpy(dtype=float) Z0 = Y.loc[snap.pre_periods, donor_pool].to_numpy(dtype=float) @@ -1288,6 +1830,40 @@ def _placebo_fit_unit( elif len(donor_pool) == 1: # Degenerate: a single donor forces w = [1.0]; V is irrelevant. w, converged = np.array([1.0]), True + elif snap.v_method == "inverse_variance": + # Recompute the closed-form V for THIS pseudo-treated unit's predictors, + # applied to the RAW predictors (deterministic; no outer search; the + # standardize pre-scaling is bypassed to avoid the 1/Var² double-rescale — + # see fit()). outer_converged stays True. + v = _inverse_variance_v(X1, X0) + w, converged = _inner_solve_W(X1, X0, v, snap.inner_max_iter, snap.inner_min_decrease) + elif snap.v_method == "cv": + # Reproduce the FULL per-window re-aggregation CV procedure for this + # pseudo-treated unit so placebos use the SAME estimator as the treated fit. + # The positional t0 split and fully-spanning precondition depend only on + # snap.specs / snap.pre_periods (NOT on which unit is treated), so a headline + # fit that satisfied fully-spanning satisfies it for every pseudo-treated unit; + # n_pre>=2 and t0 in range are likewise guaranteed (the headline snapshot was + # fit()-validated, and an in-time-truncated snapshot has >=2 pre-fake periods + # with an out-of-range pinned t0 nulled to the default). + n_pre = len(snap.pre_periods) + t0 = snap.v_cv_t0 if snap.v_cv_t0 is not None else n_pre // 2 + _, w, converged, _, outer_converged = _outer_solve_V_cv( + snap.pivots, + snap.specs, + unit, + donor_pool, + Z1, + Z0, + snap.pre_periods, + t0, + n_starts, + snap.seed, + snap.optimizer_options, + snap.inner_max_iter, + snap.inner_min_decrease, + snap.standardize, + ) else: _, w, converged, _, outer_converged = _outer_solve_V( X1, @@ -1389,6 +1965,33 @@ def _truncate_snapshot_in_time( # so return None (→ status="infeasible") rather than letting the refit "fail". if new_custom_v is not None and float(np.sum(new_custom_v)) <= 0.0: return None, dropped + # An explicitly pinned v_cv_t0 (v_method="cv") may exceed the truncated pre-fake + # window's valid range; null it to the //2 default for the placebo refit (the + # placebo is a robustness re-run, not the headline — an explicit v_cv_t0 is not + # preserved across in-time truncation). None already means "use the default". + new_v_cv_t0 = snap.v_cv_t0 + if new_v_cv_t0 is not None and not (1 <= new_v_cv_t0 <= len(new_pre) - 1): + new_v_cv_t0 = None + # Under v_method="cv", in-time truncation can break the fully-spanning precondition + # even when the headline fit satisfied it: a spec may lose its training-window (or + # validation-window) periods to the truncation and stop spanning both windows. That is + # a STRUCTURAL infeasibility of the date (not a solver-convergence failure) — return + # None (→ status="infeasible") so it is not mislabeled "failed" with + # optimizer-remediation messaging. Uses the same fully-spanning precondition the + # headline fit enforces (over the kept, re-truncated specs at the truncated split). + if snap.v_method == "cv": + eff_t0 = new_v_cv_t0 if new_v_cv_t0 is not None else len(new_pre) // 2 + _, _, all_spanning = _cv_window_status(kept_specs, new_pre, eff_t0) + if not all_spanning: + return None, dropped + # Truncation can also leave a window with NO cross-unit variation (its predictors + # become constant across treated+donors) even when fully-spanning still holds — + # another STRUCTURAL infeasibility of the date (unidentified weight solve), so mark + # it infeasible rather than letting the refit return arbitrary "converged" weights. + if not _cv_windows_have_variation( + snap.pivots, kept_specs, snap.treated_id, snap.donor_ids, new_pre, eff_t0 + ): + return None, dropped snap_mod = replace( snap, specs=kept_specs, @@ -1396,5 +1999,6 @@ def _truncate_snapshot_in_time( pre_periods=new_pre, post_periods=new_post, custom_v=new_custom_v, + v_cv_t0=new_v_cv_t0, ) return snap_mod, dropped diff --git a/diff_diff/synthetic_control_results.py b/diff_diff/synthetic_control_results.py index 6dfa10b5..60ef44ad 100644 --- a/diff_diff/synthetic_control_results.py +++ b/diff_diff/synthetic_control_results.py @@ -63,6 +63,10 @@ class _SyntheticControlFitSnapshot: optimizer_options: Optional[Dict[str, Any]] inner_max_iter: int inner_min_decrease: float + # Training/validation split index for v_method="cv" (positional into pre_periods); + # None → len(pre_periods)//2 default. Carried so in-space/LOO/in-time placebo refits + # reproduce the same CV split as the treated fit. + v_cv_t0: Optional[int] @dataclass @@ -101,10 +105,19 @@ class SyntheticControlResults: the interpretability floor (1e-6) are dropped. v_weights : dict Mapping ``{predictor_label: v}`` — the diagonal predictor-importance - matrix V, trace-normalized to sum to 1. + matrix V, trace-normalized to sum to 1. On the degenerate **single-donor** + path (one donor forces ``w=[1]``) V is unidentified — every V yields the same + synthetic — so ``v_weights`` is **uniform** for every ``v_method`` (including + ``cv`` / ``inverse_variance``), with a ``UserWarning`` emitted at fit time. predictor_balance : pandas.DataFrame Predictor-balance table: for each predictor, the treated value, the - synthetic value (donor-weighted), and the donor-pool mean. + synthetic value (donor-weighted), and the donor-pool mean. Under + ``v_method="cv"`` the reported ``donor_weights`` come from the ADH-2015 step-4 + refit on the **validation-window** re-aggregated predictors, so the ``treated`` / + ``synthetic`` / ``donor_mean`` values are reported on that same validation-window + basis (each spec re-aggregated over ``pre[v_cv_t0:]``) — the row's ``predictor`` + label remains the full spec identity, so it stays aligned with ``v_weights``. For + every other ``v_method`` the values are the full-pre-period predictor aggregates. gap_path : dict Mapping ``{period: gap}`` for ALL periods (pre periods carry the fit residual used for ``pre_rmspe``; post periods carry the effect path). @@ -112,15 +125,24 @@ class SyntheticControlResults: Root mean squared prediction error over the pre-treatment periods (the primary fit diagnostic). mspe_v : float, optional - The outer-objective value (pre-period outcome MSPE of ``W*(V*)``), - populated only when the nested outer search actually runs; None on the - ``v_method="custom"`` path and on the degenerate single-donor path. + The outer-objective value of the selected ``V``: the **pre-period** outcome + MSPE of ``W*(V*)`` under ``v_method="nested"``, or the held-out + **validation-window** outcome MSPE under ``v_method="cv"`` (the CV selection + criterion). None when there is no outer search — the ``v_method="custom"`` + and ``"inverse_variance"`` paths and the degenerate single-donor path. Not + comparable across ``v_method`` values (different objective windows). treated_unit : Any The treated unit's identifier. pre_periods, post_periods : list Calendar-sorted pre / post period values. v_method : str - ``"nested"`` (data-driven V) or ``"custom"`` (user-supplied V). + ``"nested"`` (data-driven V), ``"custom"`` (user-supplied V), ``"cv"`` + (out-of-sample cross-validation V), or ``"inverse_variance"`` (closed-form + ``1/Var(X)`` V). + v_cv_t0 : int, optional + The training/validation split index actually used under ``v_method="cv"`` + (the resolved value — equals ``n_pre_periods // 2`` when the constructor's + ``v_cv_t0`` was None). None for every other ``v_method``. Survives pickling. standardize : str ``"std"`` (per-row SD scaling) or ``"none"``. alpha : float @@ -164,6 +186,7 @@ class SyntheticControlResults: standardize: str alpha: float = 0.05 mspe_v: Optional[float] = None + v_cv_t0: Optional[int] = None survey_metadata: Optional[Any] = field(default=None) # In-space placebo permutation inference (Abadie-Diamond-Hainmueller 2010 # Section 2.4), populated by ``in_space_placebo()``. ``rmspe_ratio`` (the @@ -323,7 +346,12 @@ def summary(self, alpha: Optional[float] = None) -> str: f"{'Standardization:':<28} {self.standardize:>10}", ] if self.mspe_v is not None and np.isfinite(self.mspe_v): - lines.append(f"{'Outer-objective MSPE:':<28} {self.mspe_v:>10.6f}") + # Under cv, mspe_v is the held-out VALIDATION-window MSPE (the CV selection + # criterion), not the pre-period objective minimized on the nested path. + _mspe_label = "Validation MSPE:" if self.v_method == "cv" else "Outer-objective MSPE:" + lines.append(f"{_mspe_label:<28} {self.mspe_v:>10.6f}") + if self.v_method == "cv" and self.v_cv_t0 is not None: + lines.append(f"{'CV train/val split (t0):':<28} {self.v_cv_t0:>10d}") if self.survey_metadata is not None: lines.extend(_format_survey_block(self.survey_metadata, 75)) @@ -445,6 +473,7 @@ def to_dict(self) -> Dict[str, Any]: "mspe_v": self.mspe_v, "treated_unit": self.treated_unit, "v_method": self.v_method, + "v_cv_t0": self.v_cv_t0, "standardize": self.standardize, # In-space placebo permutation inference. rmspe_ratio is set at fit; # placebo_p_value / n_placebos / n_failed stay at their no-inference @@ -594,7 +623,7 @@ def in_space_placebo( Parameters ---------- n_starts : int, optional - Override the multistart count for each placebo's nested V search. + Override the multistart count for each placebo's outer V search (nested/cv). Default None inherits the original fit's ``n_starts``. The placebo loop is the cost driver (one outer V search per donor); lower it for a faster, coarser scan. @@ -757,10 +786,19 @@ def in_space_placebo( p_value = rank / (n_placebos + 1) if n_failed > 0: + cv_note = ( + " Under v_method='cv' an excluded refit may instead be STRUCTURALLY " + "infeasible (the pseudo-treated unit's donor pool is indistinguishable in a " + "re-aggregated CV window) — remedied by adjusting the predictors, v_cv_t0, " + "or the donor pool, NOT inner_max_iter / n_starts." + if snap.v_method == "cv" + else "" + ) warnings.warn( - f"{n_failed} of {n_donors} in-space placebos failed to converge " - "and were excluded from the permutation distribution; " - f"placebo_p_value uses the remaining {n_placebos}.", + f"{n_failed} of {n_donors} in-space placebos were excluded from the " + "permutation distribution (the refit did not reach a valid optimum — a " + "non-converged inner weight solve or outer V search); " + f"placebo_p_value uses the remaining {n_placebos}.{cv_note}", UserWarning, stacklevel=2, ) @@ -810,8 +848,8 @@ def leave_one_out(self, n_starts: Optional[int] = None) -> pd.DataFrame: Parameters ---------- n_starts : int, optional - Override the multistart count for each leave-one-out refit's nested V - search. Default None inherits the original fit's ``n_starts``. + Override the multistart count for each leave-one-out refit's outer V + search (nested/cv). Default None inherits the original fit's ``n_starts``. Returns ------- @@ -950,10 +988,19 @@ def leave_one_out(self, n_starts: Optional[int] = None) -> pd.DataFrame: ordered = [baseline_row] + finite_rows + failed_rows if n_failed > 0: + cv_note = ( + " Under v_method='cv' a 'failed' drop may instead be STRUCTURALLY " + "infeasible (the reduced donor pool is indistinguishable in a re-aggregated " + "CV window) — remedied by adjusting the predictors, v_cv_t0, or the donor " + "pool, NOT inner_max_iter / n_starts." + if snap.v_method == "cv" + else "" + ) warnings.warn( - f"{n_failed} of {len(pos_donors)} leave-one-out refits failed to " - "converge and are reported with NaN metrics (status='failed'); the " - "ATT range uses the remaining refits.", + f"{n_failed} of {len(pos_donors)} leave-one-out refits were excluded with " + "NaN metrics (status='failed'; the refit did not reach a valid optimum — a " + "non-converged inner weight solve or outer V search); the ATT range uses " + f"the remaining refits.{cv_note}", UserWarning, stacklevel=2, ) @@ -1075,7 +1122,7 @@ def in_time_placebo( valid pre-date that is dimensionally infeasible (too few pre-fake periods, or all predictors dropped) yields a ``status="infeasible"`` row (no raise). n_starts : int, optional - Override the multistart count for each placebo refit's nested V search. + Override the multistart count for each placebo refit's outer V search (nested/cv). Default None inherits the original fit's ``n_starts``. Returns @@ -1261,9 +1308,11 @@ def in_time_placebo( ) if n_infeasible > 0: warnings.warn( - f"{n_infeasible} in-time placebo date(s) were dimensionally infeasible " - "(too few pre-fake periods or all predictors dropped) and are reported " - "with status='infeasible' (NaN metrics).", + f"{n_infeasible} in-time placebo date(s) were structurally infeasible " + "(too few pre-fake periods, all predictors dropped, or — under " + "v_method='cv' — a kept predictor no longer spans both windows, or a " + "re-aggregated window loses cross-donor variation, after truncation) and " + "are reported with status='infeasible' (NaN metrics).", UserWarning, stacklevel=2, ) diff --git a/docs/api/synthetic_control.rst b/docs/api/synthetic_control.rst index b727315d..b50e840a 100644 --- a/docs/api/synthetic_control.rst +++ b/docs/api/synthetic_control.rst @@ -7,8 +7,10 @@ Hainmueller 2010; originating in Abadie & Gardeazabal 2003). The treated unit's counterfactual is a convex combination of "donor" (never-treated) units. Donor weights ``W*(V)`` solve a simplex-constrained, predictor-importance-weighted least-squares fit of the treated unit's pre-period predictors; the diagonal -predictor-importance matrix ``V`` is chosen either data-driven (minimizing pre-period -outcome MSPE, ``v_method="nested"``) or supplied by the user (``v_method="custom"``). The +predictor-importance matrix ``V`` is chosen data-driven (minimizing pre-period outcome +MSPE, ``v_method="nested"``; out-of-sample cross-validation, ``v_method="cv"``; or +closed-form inverse-variance, ``v_method="inverse_variance"``) or supplied by the user +(``v_method="custom"``). The treatment-effect path is the gap :math:`\hat{\alpha}_{1t} = Y_{1t} - \sum_j w_j Y_{jt}` over the post periods. @@ -120,8 +122,20 @@ order (the ordering matches R ``Synth::dataprep``), from: ``v_method="nested"`` selects the diagonal predictor-importance matrix ``V`` by minimizing the pre-period **outcome** MSPE of ``W*(V)`` over a multistart Nelder-Mead search with a -derivative-free Powell polish. ``v_method="custom"`` takes a user-supplied ``custom_v`` -(one entry per predictor row, trace-normalized) and skips the outer search. +derivative-free Powell polish. ``v_method="cv"`` selects ``V`` by **out-of-sample +cross-validation** (Abadie-Diamond-Hainmueller 2015; Abadie 2021): the pre-period is split +at ``v_cv_t0`` (default ``len(pre)//2``, i.e. ``t0 = T0/2``) into a training and a validation +window; ``V`` is chosen to minimize the validation-window outcome MSPE of the training-fit +weights, then the final weights are re-estimated on the validation-window predictors. Each +predictor is **re-aggregated** over each window (a separate ``dataprep`` per window, as +ADH 2015's CV does), so it must **span both windows** — the default per-period outcome lags +(single-period) are rejected; pass spanning covariate / multi-period ``special_predictors`` +(see ``docs/methodology/REGISTRY.md`` §SyntheticControl). +``v_method="inverse_variance"`` uses the closed-form ``v_h = 1/Var(X_h)`` (variance over +donors+treated; no search), applied to the **raw** predictors — it intentionally bypasses +``standardize`` (inverse-variance weighting *is* the unit-variance rescaling). ``v_method="custom"`` takes a user-supplied ``custom_v`` +(one entry per predictor row, trace-normalized) and skips the outer search. ``v_cv_t0`` +must be ``None`` unless ``v_method="cv"``. .. note:: @@ -206,7 +220,7 @@ Comparison with Synthetic DiD - None (level matching) - Simplex (double difference) * - Predictor-importance ``V`` - - Nested / custom diagonal ``V`` + - Nested / cv / inverse-variance / custom diagonal ``V`` - No analog * - Inference - Placebo permutation (no analytical SE) diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index 5bf5cc1e..86ab67de 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -1986,7 +1986,7 @@ Classic synthetic control (donor/unit weights only) for a single treated unit, d **Estimator (Section 2.3):** - Predictor matrices `X1` (k×1 treated) / `X0` (k×J donors) = covariates `Z` + linear combinations of pre-period outcomes (`predictors` averaged over `predictor_window`, `special_predictors`, and/or per-period outcome lags `pre_period_outcomes`). Canonical row order: predictor averages → special predictors → outcome lags (the row *order* matches R `Synth::dataprep`; aggregation semantics differ — see the `na.rm` deviation below). - **Inner solve:** `W*(V) = argmin_W (X1 − X0 W)' diag(V) (X1 − X0 W)` s.t. `w_j ≥ 0, Σ w_j = 1`. Implemented by folding `V^½` into the predictors (`packed = [V^½·X0 | V^½·X1]`) and calling the Frank-Wolfe simplex solver `utils._sc_weight_fw(intercept=False, zeta=0)`. -- **Outer solve (`v_method="nested"`):** choose diagonal PSD `V` minimizing the pre-period **outcome** MSPE `mean((Z1 − Z0·W*(V))²)`. `v_method="custom"` skips the outer search and uses a user-supplied `custom_v` (trace-normalized). +- **`V` selection (`v_method`):** `"nested"` chooses diagonal PSD `V` minimizing the pre-period **outcome** MSPE `mean((Z1 − Z0·W*(V))²)` over all pre periods; `"cv"` chooses `V` by **out-of-sample cross-validation** (ADH 2015 §; Abadie 2021 Eq. 9 — see the per-window re-aggregation Note); `"inverse_variance"` uses the closed-form `v_h = 1/Var(X_{h·})` (Abadie 2021 §3.2(a); no search); `"custom"` skips the search and uses a user-supplied `custom_v` (trace-normalized). `mspe_v` reports the selected `V`'s objective value — the pre-period MSPE under `"nested"`, the held-out validation-window MSPE under `"cv"`, and `None` for the search-free `"custom"`/`"inverse_variance"` paths (not comparable across methods). - **Effect:** gap path `α̂_1t = Y_1t − Σ_j w_j·Y_jt`; `att` = mean post-period gap; `pre_rmspe` = pre-period fit diagnostic. **Inference:** **No analytical standard error** (Section 2.4) — `se`/`t_stat`/`p_value`/`conf_int` are always NaN. Significance comes from **in-space placebo permutation inference** via `SyntheticControlResults.in_space_placebo()`: reassign treatment to each donor, refit a synthetic control for it, and rank the treated unit's post/pre RMSPE ratio (`rmspe_ratio` = `RMSPE_post / RMSPE_pre` = `sqrt(MSPE_post / MSPE_pre)`) among all units; `placebo_p_value = rank / (n_placebos + 1)`, where `rank = 1 + #{placebos with ratio ≥ treated ratio}` — an **upper-tail rank test on the (unsigned) RMSPE-ratio statistic**, ties counted conservatively via `≥`. Because the ratio squares the gaps it is direction-agnostic: a large ratio signals an effect of *either* sign, so this is NOT a signed/one-directional ("one-sided") hypothesis test on the treatment-effect direction. ADH 2010 §3.4 reports the *MSPE* ratio (the square of `rmspe_ratio`); the two are monotone-equivalent, so the rank and p-value are identical — only the reported statistic's scale differs. `rmspe_ratio` (the treated statistic) is computed at fit time; `placebo_p_value` / `n_placebos` / `n_failed` are populated by the opt-in `in_space_placebo()` call. `get_placebo_df()` returns the per-unit ratio table used for the rank (the per-period placebo gap paths for a "spaghetti" plot are retained internally, not in this summary). @@ -1999,6 +1999,11 @@ Classic synthetic control (donor/unit weights only) for a single treated unit, d - **Note:** The standardization divisor `divisor = sqrt(apply(cbind(X0,X1), 1, var))` (per-predictor SD over donors+treated, ddof=1) and the inner/outer optimizer are **not specified in ADH 2010** (which defers these numerics to Abadie & Gardeazabal 2003 App. B / the `Synth` software). The divisor is pinned from the R `Synth::synth` source; `solution.v` lives in this scaled predictor space, so the deterministic R-parity test feeds `custom_v` in the same scaled space. - **Note:** The outer objective minimizes the pre-period outcome MSPE over **all** pre periods, whereas R `Synth` uses a `time.optimize.ssr` window (1960–1969 in the Basque example). The nested `V` therefore differs from R by an efficiency-only choice (the paper notes inferential validity holds for *any* `V`), so end-to-end nested parity is a tolerance band, not equality. - **Note:** `V` is parametrized on the unit simplex via a softmax of an unconstrained vector (trace-normalization is identification-fixing, not a constraint loss); the multistart Nelder-Mead + derivative-free Powell polish approximates R's best-of-`optimx` behavior over the non-smooth outer objective. +- **Note (out-of-sample CV `V`-selection — per-window re-aggregation + fully-spanning precondition):** `v_method="cv"` implements ADH 2015's four-step out-of-sample procedure (Abadie 2021 Eq. 9): (1) split the pre-period positionally at `t0` (`v_cv_t0`; default `len(pre)//2`, Abadie 2021's `t0 = T0/2`) into a training window `pre[:t0]` and a validation window `pre[t0:]`; (2) for each candidate `V`, fit the training weights `W̃(V)` on the **training-window** predictors; (3) pick `V*` minimizing the **validation-window** outcome MSPE of `W̃(V)`; (4) re-estimate the final `W* = W(V*)` on the **validation-window** predictors. Each predictor spec is **re-aggregated** over each window — its op (mean/sum/identity) is recomputed over only the periods that fall in that window — exactly as ADH 2015's CV re-runs `dataprep` separately on each window (`_truncate_specs_to_window` + a per-window `_build_predictor_matrix` + a per-window `_standardize`, since `V*` is predictor-importance applied in each window's own scaled space). The predictor dimension `k` is preserved: re-aggregation changes each row's **values** per window, not the row count, so `V` stays `k`-dimensional throughout. Because each predictor is re-measured on the **training window only** for the V-search, the selection is genuinely out-of-sample for **all** predictor types — a spanning predictor's validation-window observations never enter the training fit (the property a masked full-pre aggregate could not provide, since the full-pre value already bakes in the validation-window observations). The same `V*` drives both fits with no zeroed coordinate, so `v_weights` (= `V*`) reproduce `donor_weights` on the validation-window predictors; `predictor_balance` is reported on that same validation-window basis (each row's values are the spec re-aggregated over `pre[v_cv_t0:]`; the row label remains the full spec identity so it stays aligned with `v_weights`). `mspe_v` reports the step-3 validation MSPE at `V*`. **Deviation from R:** R `Synth` has no built-in CV function — ADH 2015's CV is a *manual* two-`dataprep` re-run — so this re-aggregation reproduces that manual procedure for our absolute-period spec aggregates. + - **Fully-spanning precondition (fail-closed):** steps 2-3 re-aggregate each predictor on the training window and step 4 re-aggregates it on the validation window, so every predictor must be observed in **both** windows (have ≥1 period in each) to be re-measurable on each. `cv` therefore **requires every predictor to span both windows**; a violation raises `ValueError`. This holds for ADH 2015's shared covariate / multi-period special predictors (which span the windows) but NOT for the **default per-period outcome lags** (each is single-period and lives in one window only) — `cv` with the bare default predictors is rejected with guidance to pass spanning `predictors` / multi-period `special_predictors`. **Window-information gate (fail-closed):** spanning is necessary but not sufficient — `W` is identified by the DONORS being distinguishable (`X0·W` is a convex combination of the donor columns), so if a window's re-aggregated predictors are constant **across donors** (all donor columns identical) then `X0·W` is the same for every simplex `W`, the inner solve's objective is flat, and `_inner_solve_W` returns arbitrary weights while reporting convergence — even when the treated unit differs (the treated unit is the matching target, not part of `W`'s identification), and `_standardize` only *warns* on the zero-variance rows. `cv` therefore also **requires each window to have cross-DONOR variation in at least one predictor**: the headline fit raises `ValueError`; an in-space placebo refit whose (pseudo-treated) donor pool is indistinguishable in a window is dropped from the permutation reference set; an in-time-truncated date whose window goes donor-flat is marked `status="infeasible"`. Validated by deterministic equivalence to the R-anchored `custom_v` path (the step-3 validation MSPE of the training-window fit and the step-4 validation-window weights each match a `custom_v=V*` fit on the correspondingly re-aggregated predictors) + cv self-consistency (`in_time_placebo` under cv == a fresh cv fit on the backdated panel) + a rejection test for non-spanning predictors. An explicitly pinned `v_cv_t0` that **no longer fits the truncated pre-fake window** is nulled to the `//2` default for the placebo refit (a pinned value that still fits the truncated window is kept); in-time truncation that breaks the fully-spanning precondition for an otherwise-valid headline fit (a kept spec stops spanning both windows at the truncated split) marks that date `status="infeasible"`. +- **Note (CV non-uniqueness — deterministic tie-break):** Abadie 2021 footnote 7 warns the CV `V*` "need not be unique" (the validation MSPE can be flat in `V`) and that "an implementation must pick a deterministic tie-break." Among candidate `V` whose validation MSPE ties to tolerance, this implementation selects the one **closest to uniform** (the densest `V`, mirroring footnote 7's ridge-toward-dense remedy in spirit) — a principled choice that makes the selected `V*` among equally-good optima independent of the **multistart evaluation order**. The cv fit is reproducible for a **fixed `seed`** (like `nested`); it is NOT seed-independent — the multistart fills any slots beyond the distinct heuristic starts (uniform, inverse-variance, univariate-fit — at most three, fewer if any coincide) with seed-dependent random Dirichlet draws, so different seeds can explore different optima. The tie-break removes only the start-order dependence among ties, not the seed dependence. A user-supplied `inner` ridge penalty `γ·Σv_h²` is a deferred extension (out of scope this release). +- **Note (inverse-variance `V`):** `v_method="inverse_variance"` uses the closed form `v_h = 1/Var(X_{h·})` (Abadie 2021 §3.2(a)), variance taken over donors+treated on the **unstandardized** predictors, and applies that `V` to the **raw** predictors so the effective objective is the unit-variance-rescaled `Σ_h (X_{1h} − X_{0h}W)²/Var_h`. Abadie 2021 describes inverse-variance `V` precisely as "rescal[ing] each predictor row to unit variance" — which is exactly what `standardize="std"` already does — so the `standardize` pre-scaling is intentionally **bypassed** on this branch (it is equivalent to uniform `V` on the standardized predictors). Applying `1/Var` on top of the standardized rows would double-rescale to `Σ_h diff_h²/Var_h²` (inverse-variance *squared*), which is NOT the paper's selector. Deterministic (no search; `mspe_v` is `None`). A zero-variance predictor row (no cross-unit information) gets 0 `V` weight; if **every** row is zero-variance the result falls back to uniform `V` with a `UserWarning`. `custom_v` is rejected for this method (and for `"cv"`) — fail-closed, never silently ignored. +- **Note (single-donor degeneracy — uniform `V` for all methods):** with a single donor (`J=1`) the synthetic control is forced to `w=[1]`, so the predictor-importance `V` is **unidentified** — every `V` yields the same synthetic. `fit()` short-circuits to `w=[1]` with a **uniform** `v_weights` and `mspe_v=None` for ALL `v_method`s, INCLUDING `"cv"` and `"inverse_variance"` (their selected / closed-form `V` would be inert, so it is not computed or reported), emitting a `UserWarning`. The donor weights / gap path / ATT do not depend on `V` when `J=1`, so they are unaffected; for `"cv"` the `v_cv_t0` resolution + spanning/variation preconditions still run (and can still raise) before this short-circuit. This is a deliberate, consistent degenerate contract — not a per-method `V` mismatch. - **Note:** The 1×SD poor-fit threshold is a defensive implementation choice in the spirit of the `SyntheticDiD` convention; ADH 2010 gives only the qualitative guidance "do not use SCM when the fit is poor" (no numeric cutoff). The warning fires whenever pre-period RMSPE exceeds the SD of the treated unit's pre-period outcomes — **including a flat treated pre-path** (`SD = 0`) with non-trivial RMSPE (a scale-aware roundoff floor suppresses the warning on a near-perfect flat fit). This is slightly broader than `SyntheticDiD`'s `SD > 0`-gated form, matching the literal RMSPE-exceeds-SD contract above. - **Deviation from R:** `standardize="none"` disables predictor standardization entirely; R `Synth` always scales by the predictor SD. Provided for diagnostics; changes the geometry of the `V` objective. - **Note:** predictor rows support only **equal-weight** linear combinations of pre-period values — `mean` (`k_s = 1/T0`), `sum` (`k_s = 1`), and per-period outcome lags (identity, a single `k_s = 1`). ADH (2010) §2.3 defines the general form `Ȳ_i^{K_m} = Σ_s k_s Y_is` with *arbitrary* weights `k_s`; this release does NOT accept user-supplied non-uniform `K_m` weight vectors (and `median` and other non-linear aggregations are intentionally excluded). The supported set still spans the standard `Synth::dataprep` `predictors.op` + `special.predictors` usage; arbitrary-weight `K_m` is a deferred extension. @@ -2010,7 +2015,7 @@ Classic synthetic control (donor/unit weights only) for a single treated unit, d - **Note (in-time placebo windowing — TRUNCATE):** ADH 2015 §4 says to re-estimate the in-time placebo "with the same predictors lagged accordingly." Because `diff_diff`'s predictor specs reference **absolute** periods, the in-time placebo re-cuts them by TRUNCATION: pre-period-outcome predictors become the pre-`t_f` outcomes, and covariate / special-predictor windows are intersected with the pre-`t_f` window; a window lying ENTIRELY in the held-out region `[t_f, T0)` is **dropped** (surfaced in the `n_dropped_specs` column + an aggregated warning), and `custom_v` is subset in lockstep with the surviving specs. For an outcome-predictor fit (the R-anchorable case) TRUNCATE is identical to ADH's "lag" — both equal a manual `Synth::synth` re-run with `time.optimize.ssr` cut at `t_f`. The held-out window never enters the fit (the placebo's `all_periods` is the pre-fake + post-fake span; the true post-treatment periods are excluded entirely), so there is no "peeking." This concrete convention is NOT spelled out in ADH 2015 (which gives only the qualitative "lag accordingly"). - **Note (in-time placebo requires ≥2 pre-fake periods):** the in-time placebo treats a date with fewer than 2 pre-fake periods as `status="infeasible"` (the default sweep starts at the 3rd pre-period). This is DELIBERATELY stricter than the base estimator's `T0 ≥ 1` allowance (which permits a single-pre-period fit but warns that nested-`V` selection is unreliable): an auto-swept placebo date with a single pre-fake period is a trivially-matchable, non-credible pre-fit, so it is dropped rather than surfaced as a `ran` placebo (mirrors `SyntheticDiD.in_time_placebo`'s `i ≥ 2` rule). A date whose surviving `custom_v` has zero mass after truncation is likewise infeasible (not a convergence failure). - **Note (leave-one-out weight floor):** ADH 2015 §4 leave-one-out omits "each donor that received positive weight." This implementation drops each donor with **reportable** weight — above the `1e-6` interpretability floor (`synthetic_control._MIN_REPORT_WEIGHT`), i.e. exactly the donors in `donor_weights` — rather than every strictly-positive weight. A donor with `0 < w ≤ 1e-6` is numerical dust whose removal moves the ATT by ~its weight (its `delta_att` would be ~0, an uninformative row), and the floor keeps the LOO table aligned with the reported donor support. The drop-set is **frozen at fit time** on the fit snapshot (`weighted_donor_ids`), so `leave_one_out()` is immune to post-fit mutation of the presentation-level `donor_weights` dict. -- **Note (ADH-2015 diagnostics validation):** R `Synth` has **no** in-time-placebo or leave-one-out function (verified against its full CRAN function index; `SCtools` adds only the *in-space* placebo battery, `scpi` only prediction-interval uncertainty), so there is no canonical R *output* to match for these diagnostics — in R they are hand-rolled by re-running `dataprep()`+`synth()`. They are validated instead by (a) the solver's existing Basque R parity (above), and (b) deterministic **self-consistency** tests proving each diagnostic equals a from-scratch `synthetic_control()` fit on the equivalent sub-problem — `leave_one_out()` drop-`d` == a fit on the donor pool minus `d`; `in_time_placebo([t_f])` == a fit on the backdated/truncated panel — both via a fixed `custom_v` (match to 1e-7). The deferred ADH-2015 items (out-of-sample CV `V`-selection, regression-weight `W^reg` extrapolation diagnostic, sparse-SC subset search) are tracked in `TODO.md`. +- **Note (ADH-2015 diagnostics validation):** R `Synth` has **no** in-time-placebo or leave-one-out function (verified against its full CRAN function index; `SCtools` adds only the *in-space* placebo battery, `scpi` only prediction-interval uncertainty), so there is no canonical R *output* to match for these diagnostics — in R they are hand-rolled by re-running `dataprep()`+`synth()`. They are validated instead by (a) the solver's existing Basque R parity (above), and (b) deterministic **self-consistency** tests proving each diagnostic equals a from-scratch `synthetic_control()` fit on the equivalent sub-problem — `leave_one_out()` drop-`d` == a fit on the donor pool minus `d`; `in_time_placebo([t_f])` == a fit on the backdated/truncated panel — both via a fixed `custom_v` (match to 1e-7). The remaining deferred ADH-2015 items (regression-weight `W^reg` extrapolation diagnostic, sparse-SC subset search) are tracked in `TODO.md`. **Reference implementation:** authors' `Synth` package for R/MATLAB/Stata (`Synth::synth`); in-space placebo construction follows `SCtools::generate.placebos`. **R-parity anchor:** the Basque Country study (Abadie-Gardeazabal 2003, `data("basque")`) — published synthetic = region 10 (Cataluña) 0.851 + region 14 (Madrid) 0.149, `loss.v` 0.0089. Two-tier test (`tests/test_methodology_synthetic_control.py`): Tier-1 feeds R's `solution.v` via `custom_v` → donor weights match to atol 1e-3 (deterministic); Tier-2 checks the nested fit in a band. @@ -2019,11 +2024,13 @@ Classic synthetic control (donor/unit weights only) for a single treated unit, d - [x] Predictors = covariates + linear combinations of pre-period outcomes (incl. "all pre-period outcomes" default). - [x] Inner simplex-constrained weighted LS via `_sc_weight_fw` with diagonal PSD `V`. - [x] Outer nested `V` (pre-period outcome MSPE) + user-supplied `custom_v`. +- [x] Out-of-sample cross-validation `V`-selection (`v_method="cv"`, ADH 2015 §; Abadie 2021 Eq. 9 `t0=T0/2`): per-window re-aggregation train/validation split (fully-spanning precondition) + deterministic flat-MSPE tie-break (fn. 7); threaded through the placebo refits. +- [x] Inverse-variance `V` (`v_method="inverse_variance"`, Abadie 2021 §3.2(a)): closed-form `1/Var(X)`, zero-variance-row + all-zero-uniform-fallback handling. - [x] Gap path + pre-period RMSPE + predictor-balance table. - [x] No analytical SE (NaN inference); in-space placebo permutation inference (`in_space_placebo()`, `rank/(n_placebos+1)`) with the real treated unit excluded from every placebo pool, effective-count denominator, and a scale-aware RMSPE-ratio floor. - [x] Leave-one-out donor robustness (`leave_one_out()`, ADH 2015 §4): per-drop ATT / `delta_att` table + overlay gaps; fail-closed. - [x] In-time (backdating) placebo (`in_time_placebo()`, ADH 2015 §4): TRUNCATE windowing (drop held-out-window predictors + lockstep `custom_v` subset), feasible-date sweep, fail-closed. -- [ ] *Deferred (ADH 2015):* out-of-sample CV `V`-selection, regression-weight `W^reg` extrapolation diagnostic, sparse-SC subset search (see `TODO.md`). +- [ ] *Deferred (ADH 2015):* regression-weight `W^reg` extrapolation diagnostic, sparse-SC subset search (see `TODO.md`). - [x] Predictor-leakage, absorbing-suffix/no-anticipation, empty-window, duplicate-label, and inner-non-convergence validation gates. --- diff --git a/tests/test_methodology_synthetic_control.py b/tests/test_methodology_synthetic_control.py index ee85ea78..02eeb494 100644 --- a/tests/test_methodology_synthetic_control.py +++ b/tests/test_methodology_synthetic_control.py @@ -712,6 +712,7 @@ def test_get_set_params_roundtrip(): "standardize", "alpha", "seed", + "v_cv_t0", } est2 = SyntheticControl().set_params(**params) assert est2.get_params() == params @@ -1526,7 +1527,7 @@ def flaky_fit_unit(snap, unit, donor_pool, n_starts): return real_fit_unit(snap, unit, donor_pool, n_starts) monkeypatch.setattr(sc, "_placebo_fit_unit", flaky_fit_unit) - with pytest.warns(UserWarning, match="failed to converge"): + with pytest.warns(UserWarning, match="did not reach a valid optimum"): loo = res.leave_one_out() assert res._loo_n_failed == 1 failed = loo[loo["status"] == "failed"] @@ -2152,7 +2153,7 @@ def test_leave_one_out_all_refits_failed_status(monkeypatch): sc = importlib.import_module("diff_diff.synthetic_control") res = _fit_for_placebo(n_donors=4) monkeypatch.setattr(sc, "_placebo_fit_unit", lambda *a, **k: None) # every drop fails - with pytest.warns(UserWarning, match="failed to converge"): + with pytest.warns(UserWarning, match="did not reach a valid optimum"): loo = res.leave_one_out() # Distinct status (NOT "ran"); att_range is None; baseline + only failed rows. assert res._loo_status == "all_refits_failed" @@ -2204,3 +2205,811 @@ def test_in_time_placebo_mixed_failed_and_infeasible_status(monkeypatch): block = DiagnosticReport(res).to_dict()["estimator_native_diagnostics"]["in_time_placebo"] assert block["reason_code"] == "all_dates_unusable" assert block["n_failed"] == 1 and block["n_infeasible"] == 1 + + +# =========================================================================== +# V-selection menu: v_method="inverse_variance" and v_method="cv" +# (ADH 2015 § / Abadie 2021 Eq. 9; §3.2(a) inverse-variance). The CV per-window +# re-aggregation reproduces ADH 2015's manual two-dataprep CV re-run for our +# absolute-period spec aggregates (see REGISTRY §SyntheticControl). +# =========================================================================== + + +# --- config / validation (cheap; no fit) ---------------------------------- + + +def test_inverse_variance_and_cv_methods_accepted(): + # Both new v_method values construct without error and round-trip v_cv_t0. + SyntheticControl(v_method="inverse_variance") + est = SyntheticControl(v_method="cv", v_cv_t0=3) + assert est.get_params()["v_cv_t0"] == 3 + + +def test_v_cv_t0_requires_cv_method(): + # Fail closed: v_cv_t0 is meaningless unless v_method="cv" (it would be silently + # ignored otherwise), mirroring the custom_v cross-field rule. + for method in ("nested", "custom", "inverse_variance"): + kw = {"custom_v": [1.0, 1.0]} if method == "custom" else {} + with pytest.raises(ValueError, match="v_cv_t0 is only valid when v_method='cv'"): + SyntheticControl(v_method=method, v_cv_t0=2, **kw) + + +@pytest.mark.parametrize("bad", [0, -1, 1.5, "2", True]) +def test_v_cv_t0_type_and_positivity_rejected(bad): + with pytest.raises(ValueError, match=r"v_cv_t0 must be"): + SyntheticControl(v_method="cv", v_cv_t0=bad) + + +def test_custom_v_forbidden_for_cv_and_inverse_variance(): + for method in ("cv", "inverse_variance"): + with pytest.raises(ValueError, match="custom_v must be None when v_method="): + SyntheticControl(v_method=method, custom_v=[1.0, 1.0]) + + +def test_set_params_cv_rollback(): + # A valid cv update sticks; an invalid combo (v_cv_t0 without cv) rolls back fully. + est = SyntheticControl(v_method="cv", v_cv_t0=2) + est.set_params(v_method="cv", v_cv_t0=3) + assert est.v_cv_t0 == 3 and est.v_method == "cv" + with pytest.raises(ValueError): + est.set_params(v_method="nested") # v_cv_t0=3 now invalid -> rollback + assert est.v_method == "cv" and est.v_cv_t0 == 3 # unchanged + + +# --- inverse_variance behavior + parity ------------------------------------ + + +def test_inverse_variance_fit_is_deterministic_and_searchless(): + df, _, _ = _make_panel(n_donors=4) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + r1 = synthetic_control(df, "y", "treated", "unit", "year", v_method="inverse_variance") + r2 = synthetic_control(df, "y", "treated", "unit", "year", v_method="inverse_variance") + assert r1.mspe_v is None # no outer search ran + assert r1.att == r2.att # fully deterministic (no rng) + assert r1.donor_weights == r2.donor_weights + + +def test_inverse_variance_weights_equal_inverse_row_variance(): + # Closed-form anchor: the selected V equals trace-normalized 1/Var(X_row) computed + # on the UNSTANDARDIZED predictors over donors+treated. + import importlib + + sc = importlib.import_module("diff_diff.synthetic_control") + df, _, _ = _make_panel(n_donors=4) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + res = synthetic_control(df, "y", "treated", "unit", "year", v_method="inverse_variance") + snap = res._fit_snapshot + X1, X0, labels = sc._build_predictor_matrix( + snap.pivots, snap.specs, snap.treated_id, snap.donor_ids + ) + expected = sc._inverse_variance_v(X1, X0) + got = np.array([res.v_weights[lab] for lab in labels]) + assert np.allclose(got, expected, atol=1e-12) + + +def test_inverse_variance_exact_for_tiny_positive_variances(): + # Regression (local codex R6 P1): the inverse-variance V must be the EXACT 1/Var + # selector for EVERY strictly-positive variance — no flooring of tiny-but-positive + # variances. With two predictor rows of tiny-but-UNEQUAL variance (ratio 1:4 here), a + # 1e-12 floor would clip both to the same value and equalize their V weights; the exact + # selector preserves their 1/Var ratio. The oracle is built INLINE from raw row + # variances (NOT via the production helper) so it genuinely cross-checks the code. + import importlib + + sc = importlib.import_module("diff_diff.synthetic_control") + # 3 donors + treated (4 cols). Rows 0,1 have tiny positive variances; row 2 is normal. + d0, d1 = 1e-8, 2e-8 + X0 = np.array( + [ + [5.0 + d0, 5.0 - d0, 5.0 + d0], + [2.0 + d1, 2.0 - d1, 2.0 + d1], + [1.0, 3.0, 2.5], + ] + ) + X1 = np.array([5.0 - d0, 2.0 - d1, 2.0]) + row_var = np.var(np.column_stack([X0, X1.reshape(-1, 1)]), axis=1, ddof=1) + assert np.all((row_var[:2] > 0) & (row_var[:2] < 1e-12)) # tiny-but-positive + inv_oracle = 1.0 / row_var # exact, all rows positive here + v_oracle = inv_oracle / inv_oracle.sum() + v = sc._inverse_variance_v(X1, X0) + assert np.allclose(v, v_oracle, rtol=1e-12, atol=0.0) + # Discrimination: the two tiny rows keep their exact 1/Var ratio (~4:1), which a + # clipping implementation would collapse to 1:1. + assert v[0] / v[1] == pytest.approx(row_var[1] / row_var[0], rel=1e-9) + assert not np.isclose(v[0], v[1]) # NOT equalized by a floor + + +def test_inverse_variance_matches_paper_objective(): + # SOURCE-anchored: inverse_variance must realize Abadie 2021 §3.2(a)'s unit-variance + # rescaled objective Σ_h diff_h²/Var_h, NOT the double-rescaled Σ_h diff_h²/Var_h² + # that applying 1/Var on already-standardized predictors would produce. Two + # independent encodings of the SAME paper objective (both R-anchored via the custom_v + # path) must agree with the inverse_variance fit: + # (a) standardize="std" + custom_v = uniform -> Σ_h (diff_h/SD_h)²·1 = Σ diff²/Var + # (b) standardize="none" + custom_v = 1/Var(X) -> Σ_h diff_h²·(1/Var_h) = Σ diff²/Var + # The OLD self-equivalence (custom_v=1/Var at the default standardize="std") would + # encode the BUGGY Σ diff²/Var² objective and is deliberately NOT used here. + import importlib + + sc = importlib.import_module("diff_diff.synthetic_control") + df, _, _ = _make_panel(n_donors=4) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + res = synthetic_control(df, "y", "treated", "unit", "year", v_method="inverse_variance") + snap = res._fit_snapshot + X1, X0, labels = sc._build_predictor_matrix( + snap.pivots, snap.specs, snap.treated_id, snap.donor_ids + ) + k = X1.shape[0] + v_iv = sc._inverse_variance_v(X1, X0) + res_uniform_std = synthetic_control( + df, + "y", + "treated", + "unit", + "year", + v_method="custom", + custom_v=np.ones(k), + standardize="std", + ) + res_invvar_none = synthetic_control( + df, + "y", + "treated", + "unit", + "year", + v_method="custom", + custom_v=v_iv, + standardize="none", + ) + for ref in (res_uniform_std, res_invvar_none): + assert res.att == pytest.approx(ref.att, abs=1e-9) + assert res.donor_weights.keys() == ref.donor_weights.keys() + for d in res.donor_weights: + assert res.donor_weights[d] == pytest.approx(ref.donor_weights[d], abs=1e-9) + # Guard: confirm the BUGGY double-rescale (custom_v=1/Var at standardize="std") gives + # a DIFFERENT result, so this test actually discriminates the fix. + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + res_double = synthetic_control( + df, + "y", + "treated", + "unit", + "year", + v_method="custom", + custom_v=v_iv, + standardize="std", + ) + assert not np.allclose( + [res.donor_weights.get(d, 0.0) for d in snap.donor_ids], + [res_double.donor_weights.get(d, 0.0) for d in snap.donor_ids], + atol=1e-6, + ) + + +def _panel_with_constant_lag(constant_years, n_donors=4, T=8, T0=6): + """Panel where the outcome in ``constant_years`` is identical across ALL units + (treated + donors), so those pre-period lag predictors have zero cross-unit + variance. A post effect is added to the treated unit.""" + rng = np.random.default_rng(0) + years = list(range(2000, 2000 + T)) + rows = [] + for j in range(n_donors): + series = rng.normal(10, 2, T) + for t in range(T): + y = 7.0 if years[t] in constant_years else float(series[t]) + rows.append({"unit": f"d{j}", "year": years[t], "y": y, "treated": 0}) + for t in range(T): + y = 7.0 if years[t] in constant_years else 10.0 + rng.normal(0, 1) + rows.append( + { + "unit": "treated", + "year": years[t], + "y": y + (5.0 if t >= T0 else 0.0), + "treated": int(t >= T0), + } + ) + return pd.DataFrame(rows), years + + +def test_inverse_variance_zero_variance_row_gets_zero_weight(): + # A single zero-variance predictor row (one pre-period constant across units) gets + # 0 V weight; the others keep positive, trace-normalized weight. + df, years = _panel_with_constant_lag(constant_years={2001}) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + res = synthetic_control(df, "y", "treated", "unit", "year", v_method="inverse_variance") + assert res.v_weights["y_2001"] == pytest.approx(0.0, abs=1e-12) + assert sum(res.v_weights.values()) == pytest.approx(1.0, abs=1e-9) + assert any(v > 0 for k, v in res.v_weights.items() if k != "y_2001") + + +def test_inverse_variance_all_zero_variance_falls_back_to_uniform(): + # EVERY pre-period constant across units -> no information to weight predictors -> + # uniform V + ONE warning. + df, years = _panel_with_constant_lag(constant_years={2000, 2001, 2002, 2003, 2004, 2005}) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + warnings.simplefilter("always", UserWarning) + with pytest.warns(UserWarning, match="no usable predictor variance"): + res = synthetic_control(df, "y", "treated", "unit", "year", v_method="inverse_variance") + vals = list(res.v_weights.values()) + assert np.allclose(vals, 1.0 / len(vals)) + + +def test_inverse_variance_single_donor_returns_uniform_v(): + # Documented single-donor contract (NOT a skip-bug): with J==1, w=[1] is forced and V is + # UNIDENTIFIED (every V yields the same synthetic), so v_weights is uniform and mspe_v is + # None for EVERY v_method — inverse_variance included (its closed-form 1/Var would be + # inert here). The fit warns rather than silently relabeling. + df, _, _ = _make_panel(n_donors=1) + with pytest.warns(UserWarning, match="uniform regardless of v_method"): + res = synthetic_control(df, "y", "treated", "unit", "year", v_method="inverse_variance") + assert res.n_donors == 1 + assert abs(sum(res.donor_weights.values()) - 1.0) < 1e-9 + vw = list(res.v_weights.values()) + assert np.allclose(vw, 1.0 / len(vw)) # uniform V (unidentified), NOT 1/Var + assert res.mspe_v is None + + +def test_inverse_variance_leave_one_out_recomputes_per_unit(): + # LOO under inverse_variance recomputes the closed-form V on each reduced pool and + # runs deterministically. + res = _fit_for_placebo(n_donors=4, v_method="inverse_variance") + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + loo = res.leave_one_out() + assert res._loo_status == "ran" + assert (loo[loo["status"] == "loo"].shape[0]) >= 1 + + +def test_inverse_variance_in_space_placebo_recomputes_v_and_enters_reference_set(monkeypatch): + # The in-space placebo refits must take the inverse_variance branch (recompute the + # closed-form V per pseudo-treated unit, NOT fall through to nested) AND enter the + # permutation reference set. + import importlib + + sc = importlib.import_module("diff_diff.synthetic_control") + res = _fit_for_placebo(n_donors=4, v_method="inverse_variance") + real_iv = sc._inverse_variance_v + state = {"calls": 0} + + def spy(*a, **k): + state["calls"] += 1 + return real_iv(*a, **k) + + monkeypatch.setattr(sc, "_inverse_variance_v", spy) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + res.in_space_placebo() + assert state["calls"] >= res.n_donors # the iv branch recomputed V for each placebo + assert res.n_placebos >= 1 and res._placebo_status == "ran" + + +def test_inverse_variance_in_time_placebo_matches_fresh_backdated_fit(): + # Self-consistency: the in-time placebo under inverse_variance equals a fresh + # inverse_variance fit on the backdated panel. inverse_variance is deterministic (no + # search), so the match is exact; this anchors the in-time placebo's inverse_variance + # branch to a direct fit on the equivalent sub-problem. + df, _, _ = _make_panel(n_donors=4) # pre = 2000..2005 (default per-period lags) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + res = synthetic_control( + df, "y", "treated", "unit", "year", v_method="inverse_variance", **_FAST + ) + itp = res.in_time_placebo([2004]) # pre-fake = {2000..2003} + placebo_att = itp.loc[itp["placebo_period"] == 2004, "placebo_att"].iloc[0] + back = df[df["year"] <= 2005].copy() + back["treated"] = ((back["unit"] == "treated") & (back["year"] >= 2004)).astype(int) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + fresh = synthetic_control( + back, "y", "treated", "unit", "year", v_method="inverse_variance", **_FAST + ) + assert placebo_att == pytest.approx(fresh.att, abs=1e-7) + + +# --- cv behavior + parity + determinism ------------------------------------ + + +# CV tests use SPANNING predictors — multi-period special predictors observed in BOTH the +# training and validation halves of the 6-period pre-window 2000-2005 (split at t0=3, and +# also at t0=2) — so cv's fully-spanning precondition is satisfied and each predictor can be +# re-aggregated on each window. The default per-period outcome lags are single-period (each +# lives in one window only) and rejected (see test_cv_rejects_non_spanning_predictors). +_CV_SPANNING = [("y", [2000, 2002, 2004], "mean"), ("y", [2001, 2003, 2005], "mean")] + + +def _fit_cv(df, *, specs=_CV_SPANNING, **kw): + opts = dict(_FAST) + opts.update(kw) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + return synthetic_control( + df, "y", "treated", "unit", "year", v_method="cv", special_predictors=specs, **opts + ) + + +def test_cv_rejects_non_spanning_predictors(): + # Fully-spanning precondition (fail-closed): faithful per-window re-aggregation needs + # every predictor measurable on BOTH windows. The default per-period outcome lags are + # single-period (each lives in one window only) so they cannot span, and cv rejects them + # with guidance to pass spanning predictors. + df, _, _ = _make_panel(n_donors=4) + with pytest.raises(ValueError, match="span BOTH the training"): + synthetic_control(df, "y", "treated", "unit", "year", v_method="cv", seed=0, **_FAST) + + +def test_cv_runs_and_reports_validation_mspe(): + df, _, _ = _make_panel(n_donors=4) + res = _fit_cv(df, seed=0) + assert np.isfinite(res.att) + assert res.mspe_v is not None and np.isfinite(res.mspe_v) # validation MSPE + assert abs(sum(res.donor_weights.values()) - 1.0) < 1e-6 + + +def test_cv_default_t0_is_half_and_explicit_t0_changes_result(): + df, _, _ = _make_panel(n_donors=4) # 6 pre periods -> default t0 = 3 + res_default = _fit_cv(df, seed=0) + res_t0_3 = _fit_cv(df, v_cv_t0=3, seed=0) + res_t0_2 = _fit_cv(df, v_cv_t0=2, seed=0) + # Default == explicit t0=3 (len(pre)//2 == 3). + assert res_default.mspe_v == pytest.approx(res_t0_3.mspe_v, abs=1e-12) + # A different split (different validation window) yields a different criterion value. + assert res_t0_2.mspe_v != pytest.approx(res_t0_3.mspe_v, abs=1e-9) + + +def test_cv_t0_out_of_range_raises(): + # The t0-range check fires before the predictor-precondition check. + df, _, _ = _make_panel(n_donors=4) # 6 pre periods -> valid 1..5 + with pytest.raises(ValueError, match="out of range"): + synthetic_control( + df, + "y", + "treated", + "unit", + "year", + v_method="cv", + special_predictors=_CV_SPANNING, + v_cv_t0=6, + **_FAST, + ) + + +def test_cv_requires_two_pre_periods(): + # A single pre period cannot form both a training and validation window (this n_pre<2 + # check fires before the predictor-precondition check). + rows = [] + years = [2000, 2001, 2002] + rng = np.random.default_rng(0) + for j in range(3): + for yr in years: + rows.append({"unit": f"d{j}", "year": yr, "y": 10.0 + rng.normal(), "treated": 0}) + for i, yr in enumerate(years): # pre = {2000}; post = {2001, 2002} + rows.append({"unit": "treated", "year": yr, "y": 11.0 + i, "treated": int(i >= 1)}) + df = pd.DataFrame(rows) + with pytest.raises(ValueError, match="requires at least 2 pre-treatment periods"): + synthetic_control(df, "y", "treated", "unit", "year", v_method="cv", **_FAST) + + +def test_cv_single_donor_validates_and_surfaces_v_cv_t0(): + # The single-donor (J==1) fast path must NOT bypass cv's v_cv_t0 resolution/validation: + # an out-of-range split still raises, and a valid split is resolved + surfaced on the + # result, even though the single-donor synthetic is degenerate (w = 1). Spanning + # predictors keep the cv precondition satisfied so we exercise the J==1 path itself. + df, _, _ = _make_panel(n_donors=1) # 6 pre periods 2000-2005 + with pytest.raises(ValueError, match="out of range"): + synthetic_control( + df, + "y", + "treated", + "unit", + "year", + v_method="cv", + special_predictors=_CV_SPANNING, + v_cv_t0=99, + **_FAST, + ) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") # degenerate single-donor warning + res = synthetic_control( + df, + "y", + "treated", + "unit", + "year", + v_method="cv", + special_predictors=_CV_SPANNING, + v_cv_t0=2, + **_FAST, + ) + assert res.n_donors == 1 + assert res.v_cv_t0 == 2 # surfaced despite the degenerate single-donor fast path + assert abs(sum(res.donor_weights.values()) - 1.0) < 1e-9 + # Documented degenerate contract: V unidentified with one donor -> uniform v_weights + + # mspe_v None, same as every other v_method (the cv selection would be inert here). + vw = list(res.v_weights.values()) + assert np.allclose(vw, 1.0 / len(vw)) + assert res.mspe_v is None + + +def test_cv_same_seed_reproducible_under_multistart(): + # Footnote-7 non-uniqueness: with a FIXED SEED the cv fit is reproducible under + # multistart — the deterministic tie-break selects the same V* (closest-to-uniform + # among ties) regardless of start-evaluation order. cv is seeded like nested (the + # n_starts>=4 Dirichlet starts are seed-dependent), so this asserts same-seed + # reproducibility at n_starts=3 (3 deterministic heuristic starts), NOT + # seed-independence. + df, _, _ = _make_panel(n_donors=4) + r1 = _fit_cv(df, n_starts=3, seed=0) + r2 = _fit_cv(df, n_starts=3, seed=0) + assert r1.att == r2.att + assert r1.donor_weights == r2.donor_weights + + +def test_cv_reaggregation_matches_custom_v_per_window_steps(): + # R-parity anchor: the cv fit's REPORTED weights come from the step-4 refit of V* on the + # VALIDATION-window re-aggregated predictors, and mspe_v is the validation MSPE of the + # step-2/3 fit of V* on the TRAINING-window re-aggregated predictors. Both per-window + # fits are reproduced via the R-anchored custom_v path: re-aggregate each spec over its + # window (intersect its periods) and fit custom_v=V* on those re-aggregated predictors. + # R Synth has no built-in CV function (ADH 2015's CV is a manual two-dataprep re-run); + # this self-consistency anchors both CV steps to the custom_v path (transitively + # R-anchored by the PR-1 Basque custom_v parity). + df, _, _ = _make_panel(n_donors=4) + res = _fit_cv(df, seed=0) + snap = res._fit_snapshot + pre = snap.pre_periods + t0 = len(pre) // 2 # default = 3 + tr_set, va_set = set(pre[:t0]), set(pre[t0:]) + v_star = np.array(list(res.v_weights.values())) # selected V, in spec order + # Re-aggregate each spec over each window (the faithful ADH-2015 per-window dataprep). + train_reagg = [(s.var, [p for p in s.periods if p in tr_set], s.op) for s in snap.specs] + val_reagg = [(s.var, [p for p in s.periods if p in va_set], s.op) for s in snap.specs] + # Each spec genuinely re-aggregates to DIFFERENT periods per window (the two-window + # distinction is real, not a no-op). + assert all(tp != vp for (_, tp, _), (_, vp, _) in zip(train_reagg, val_reagg)) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + fin = synthetic_control( + df, + "y", + "treated", + "unit", + "year", + v_method="custom", + special_predictors=val_reagg, + custom_v=v_star, + inner_min_decrease=1e-3, + ) + tr = synthetic_control( + df, + "y", + "treated", + "unit", + "year", + v_method="custom", + special_predictors=train_reagg, + custom_v=v_star, + inner_min_decrease=1e-3, + ) + # Step 4: reported weights == V* refit on the VALIDATION-window re-aggregated predictors. + for d in snap.donor_ids: + assert res.donor_weights.get(d, 0.0) == pytest.approx( + fin.donor_weights.get(d, 0.0), abs=1e-6 + ) + # Step 3: mspe_v == validation MSPE of V* fit on the TRAINING-window re-aggregated preds. + Y = snap.pivots[snap.outcome] + Z1 = Y.loc[pre, snap.treated_id].to_numpy(float) + Z0 = Y.loc[pre, snap.donor_ids].to_numpy(float) + w_tr = np.array([tr.donor_weights.get(d, 0.0) for d in snap.donor_ids]) + val_mspe = float(np.mean((Z1[t0:] - Z0[t0:] @ w_tr) ** 2)) + assert res.mspe_v == pytest.approx(val_mspe, abs=1e-7) + + +# --- cv placebo threading + in-time ---------------------------------------- + + +def test_cv_placebo_refit_uses_cv_and_enters_reference_set(monkeypatch): + # The in-space placebo refits must take the cv per-window re-aggregation branch (NOT + # fall through to nested) AND actually enter the permutation reference set. + import importlib + + sc = importlib.import_module("diff_diff.synthetic_control") + res = _fit_for_placebo(n_donors=4, v_method="cv", special_predictors=_CV_SPANNING) + real_cv = sc._outer_solve_V_cv + state = {"calls": 0} + + def spy(*a, **k): + state["calls"] += 1 + return real_cv(*a, **k) + + monkeypatch.setattr(sc, "_outer_solve_V_cv", spy) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + res.in_space_placebo() + assert state["calls"] >= res.n_donors # the cv branch ran for each placebo unit + assert res.n_placebos >= 1 and res._placebo_status == "ran" # placebos entered the set + + +def test_cv_in_time_placebo_pinned_t0_nulled_after_truncation(): + # An explicit v_cv_t0 that exceeds the truncated pre-fake window is nulled to the + # //2 default for the placebo refit (not preserved across in-time truncation), so the + # backdated date still runs. Backdate to 2004 keeps both spanning specs spanning. + df, _, _ = _make_panel(n_donors=4) # pre = 2000..2005 + res = _fit_cv(df, v_cv_t0=4, seed=0) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + itp = res.in_time_placebo( + [2004] + ) # truncated pre-fake = {2000..2003}; t0=4 invalid -> nulled + assert itp.loc[itp["placebo_period"] == 2004, "status"].iloc[0] == "ran" + + +def test_cv_in_time_placebo_matches_fresh_backdated_fit(): + # Self-consistency: the in-time placebo under cv equals a fresh cv fit on the + # backdated panel (fixed seed + n_starts=1 -> deterministic). Backdate to 2004: + # pre-fake = {2000..2003}, the spanning specs truncate to {2000,2002}/{2001,2003} + # (still spanning the t0=2 split), so the date is feasible. + df, _, _ = _make_panel(n_donors=4) # pre = 2000..2005 + res = _fit_cv(df, seed=0) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + itp = res.in_time_placebo([2004]) + placebo_att = itp.loc[itp["placebo_period"] == 2004, "placebo_att"].iloc[0] + back = df[df["year"] <= 2005].copy() + back["treated"] = ((back["unit"] == "treated") & (back["year"] >= 2004)).astype(int) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + # backdated pre-fake = {2000..2003}; the spanning specs truncated to that window. + fresh = synthetic_control( + back, + "y", + "treated", + "unit", + "year", + v_method="cv", + special_predictors=[("y", [2000, 2002], "mean"), ("y", [2001, 2003], "mean")], + seed=0, + n_starts=1, + optimizer_options={"maxiter": 50}, + inner_min_decrease=1e-3, + ) + assert placebo_att == pytest.approx(fresh.att, abs=1e-7) + + +def test_cv_v_cv_t0_surfaced_on_results_and_serialized(): + # The new v_cv_t0 constructor param must appear on the public results surface + # (downstream-propagation rule): a public field (the RESOLVED split), in to_dict() + # /to_dataframe(), and surviving a pickle round-trip; None for non-cv methods. + df, _, _ = _make_panel(n_donors=4) # 6 pre periods -> default split 3 + res_default = _fit_cv(df, seed=0) + res_explicit = _fit_cv(df, v_cv_t0=2, seed=0) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + res_nested = synthetic_control(df, "y", "treated", "unit", "year", seed=0, **_FAST) + # Resolved value: None constructor -> len(pre)//2 = 3; explicit -> 2; non-cv -> None. + assert res_default.v_cv_t0 == 3 + assert res_explicit.v_cv_t0 == 2 + assert res_nested.v_cv_t0 is None + # to_dict() / to_dataframe() carry it. + assert res_explicit.to_dict()["v_cv_t0"] == 2 + assert res_explicit.to_dataframe()["v_cv_t0"].iloc[0] == 2 + # Survives pickling (it is a public scalar, not snapshot-only). + restored = pickle.loads(pickle.dumps(res_explicit)) + assert restored.v_cv_t0 == 2 + + +def test_cv_in_time_placebo_empty_window_is_infeasible_not_failed(): + # A truncated cv date can keep a predictor overall yet leave it on only ONE side of + # the split (the other window then has NO predictor) -> the fully-spanning precondition + # is broken -> STRUCTURAL infeasibility, must report status="infeasible" (not "failed"). + # Use a single special predictor spanning both windows at full pre but only the training + # side after backdating. + # Panel: 4 donors, years 2000..2010 (10 pre 2000..2009, post 2010). One special + # predictor on {2000, 2008}: at full pre (split t0=5) both windows hold it (feasible + # headline fit); backdated to 2005 the pre-fake is {2000..2004}, the spec truncates + # to {2000}, split t0=2 -> validation window {2002,2003,2004} has no predictor. + rng = np.random.default_rng(0) + years = list(range(2000, 2011)) + rows = [] + for j in range(4): + series = rng.normal(10, 2) + rng.normal(0, 0.3) * np.arange(11) + rng.normal(0, 0.15, 11) + for t in range(11): + rows.append({"unit": f"d{j}", "year": years[t], "y": float(series[t]), "treated": 0}) + treated = rng.normal(10, 2) + rng.normal(0, 0.3) * np.arange(11) + rng.normal(0, 0.1, 11) + treated = treated.copy() + treated[10] += 3.0 + for t in range(11): + rows.append( + {"unit": "treated", "year": years[t], "y": float(treated[t]), "treated": int(t >= 10)} + ) + df = pd.DataFrame(rows) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + res = synthetic_control( + df, + "y", + "treated", + "unit", + "year", + v_method="cv", + special_predictors=[("y", [2000, 2008], "mean")], + seed=0, + **_FAST, + ) + itp = res.in_time_placebo([2005]) + assert itp.loc[itp["placebo_period"] == 2005, "status"].iloc[0] == "infeasible" + + +# --- cv flat-window identification gate (local codex R9 P0) ----------------- + + +def _cv_panel_flat_years(flat_years, treated_flat=True): + # 4 donors + treated, years 2000-2007 (pre 2000-2005, post 2006-2007). In `flat_years` + # every DONOR has the same outcome (=10) -> zero cross-DONOR variance for any predictor + # re-aggregated onto those years (X0·W is constant in W -> unidentified); other pre + # years vary across donors. treated_flat just sets whether the treated unit also equals + # 10 there (True) or differs at 12 (False) — either way the DONORS are identical, so the + # cv donor-identification gate fires regardless of the treated unit's value. + rng = np.random.default_rng(0) + years = list(range(2000, 2008)) + rows = [] + for j in range(4): + for yr in years: + if yr in flat_years: + y = 10.0 + elif yr <= 2005: + y = 5.0 + j + rng.normal(0, 0.2) + else: + y = 10.0 + rng.normal(0, 0.2) + rows.append({"unit": f"d{j}", "year": yr, "y": y, "treated": 0}) + for yr in years: + if yr in flat_years: + y = 10.0 if treated_flat else 12.0 + elif yr <= 2005: + y = 5.5 + rng.normal(0, 0.2) + else: + y = 13.0 + rows.append({"unit": "treated", "year": yr, "y": y, "treated": int(yr >= 2006)}) + return pd.DataFrame(rows) + + +@pytest.mark.parametrize("treated_flat", [True, False]) +def test_cv_rejects_window_with_no_donor_variation(treated_flat): + # Fail-closed identification gate: W is identified by the DONORS being distinguishable + # (X0·W is a convex combination of donor columns). If every donor has identical + # predictors in a window, X0·W is constant in W -> flat objective -> arbitrary weights + # reported as converged. The headline fit must RAISE. The validation window + # {2003,2004,2005} is constant across all donors, so _CV_SPANNING re-aggregates to + # donor-indistinguishable validation predictors (t0=3). This must fail closed EVEN when + # the treated unit differs (treated_flat=False) — treated-vs-donor variation does NOT + # identify W (the gate that an earlier revision missed). + df = _cv_panel_flat_years({2003, 2004, 2005}, treated_flat=treated_flat) + with pytest.raises(ValueError, match="every donor has identical predictors"): + _fit_cv(df, seed=0) + + +def test_cv_in_time_placebo_flat_window_is_infeasible_not_failed(): + # A backdated date can leave a cv window with no cross-DONOR variation even when the + # headline fit is well-posed -> STRUCTURAL infeasibility (status="infeasible", not a + # convergence "failed"). Donors are identical in 2002,2003 only, so the headline windows + # still carry donor variation (via 2000/2001/2004/2005) but backdating to 2004 (pre-fake + # {2000..2003}, t0=2) makes the validation window {2002,2003} donor-indistinguishable. + df = _cv_panel_flat_years({2002, 2003}, treated_flat=True) + res = _fit_cv(df, seed=0) + assert np.isfinite(res.att) # headline well-posed (donors vary in the other years) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + itp = res.in_time_placebo([2004]) + assert itp.loc[itp["placebo_period"] == 2004, "status"].iloc[0] == "infeasible" + + +def test_cv_in_space_placebo_excludes_donor_flat_refits(): + # In-space placebo path (solve-level sentinel): the FULL donor set is distinguishable so + # the headline fit is well-posed, but dropping donor d0 (pseudo-treating it) leaves the + # remaining donors {d1,d2,d3} identical in the validation window -> that placebo's pool + # is donor-indistinguishable and must be EXCLUDED (not enter with arbitrary "converged" + # weights). Placebos for d1/d2/d3 keep d0 in the pool, so they remain identified. + rng = np.random.default_rng(3) + years = list(range(2000, 2008)) + rows = [] + for j in range(4): + for yr in years: + if yr in (2003, 2004, 2005): + y = 8.0 if j == 0 else 10.0 # d1=d2=d3 identical; d0 distinguishes the full set + elif yr <= 2005: + y = 5.0 + j + rng.normal(0, 0.2) # training varies across all donors + else: + y = 10.0 + rng.normal(0, 0.2) + rows.append({"unit": f"d{j}", "year": yr, "y": y, "treated": 0}) + for yr in years: + y = 12.0 if yr in (2003, 2004, 2005) else (5.5 + rng.normal(0, 0.2) if yr <= 2005 else 13.0) + rows.append({"unit": "treated", "year": yr, "y": y, "treated": int(yr >= 2006)}) + df = pd.DataFrame(rows) + res = _fit_cv(df, seed=0) + assert np.isfinite(res.att) # headline well-posed: the full donor set is distinguishable + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + res.in_space_placebo() + # The d0 placebo (pool {d1,d2,d3} identical in val) is dropped; the others enter. + assert res._placebo_status == "ran" + assert 1 <= res.n_placebos < res.n_donors + assert res.n_failed >= 1 + + +@pytest.mark.parametrize("specs", [_CV_SPANNING, [("y", [2000, 2002, 2004], "mean")]]) +def test_cv_fails_closed_when_training_solve_truncates(specs): + # The training-window solve defines mspe_v (Eq. 9's held-out criterion); if it truncates + # (inner_max_iter too small) the fit must FAIL CLOSED — mspe_v=NaN and _fit_converged + # False — so downstream placebo/LOO diagnostics never run off an invalid CV criterion. + # Covers BOTH the general multi-predictor path and the single-predictor k==1 fast path. + df, _, _ = _make_panel(n_donors=4) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + res = synthetic_control( + df, + "y", + "treated", + "unit", + "year", + v_method="cv", + special_predictors=specs, + seed=0, + n_starts=1, + inner_max_iter=1, # truncate the inner Frank-Wolfe solve + inner_min_decrease=1e-3, + ) + assert np.isnan(res.mspe_v) + assert res._fit_converged is False + + +def test_leave_one_out_cv_branch_and_fresh_reduced_pool_parity(monkeypatch): + # leave_one_out() under v_method="cv" must (a) actually exercise the cv re-aggregation + # branch (_outer_solve_V_cv), and (b) each drop's ATT must match a FRESH cv fit on the + # reduced donor pool (self-consistency; deterministic at n_starts=1) — the LOO wrapper + # was threaded for cv, so it needs direct coverage, not just in-space/in-time. + import importlib + + sc = importlib.import_module("diff_diff.synthetic_control") + df, _, _ = _make_panel(n_donors=4) + res = _fit_cv(df, seed=0) + real_cv = sc._outer_solve_V_cv + state = {"calls": 0} + + def spy(*a, **k): + state["calls"] += 1 + return real_cv(*a, **k) + + monkeypatch.setattr(sc, "_outer_solve_V_cv", spy) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + loo = res.leave_one_out() + assert res._loo_status == "ran" + loo_rows = loo[loo["status"] == "loo"] + assert state["calls"] >= len(loo_rows) >= 1 # the cv branch ran for each drop + + # Fresh-reduced-pool parity for the most influential drop (the spy forwards to the real + # solver, so the fresh fit below is unaffected by the patch). + dropped = loo_rows.iloc[0]["dropped_unit"] + loo_att = loo_rows.iloc[0]["att"] + reduced = df[df["unit"] != dropped].copy() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + fresh = synthetic_control( + reduced, + "y", + "treated", + "unit", + "year", + v_method="cv", + special_predictors=_CV_SPANNING, + seed=0, + n_starts=1, + optimizer_options={"maxiter": 50}, + inner_min_decrease=1e-3, + ) + assert loo_att == pytest.approx(fresh.att, abs=1e-7)