diff --git a/benchmarks/R/benchmark_synthdid.R b/benchmarks/R/benchmark_synthdid.R index 1c36f7d4..496be3a7 100644 --- a/benchmarks/R/benchmark_synthdid.R +++ b/benchmarks/R/benchmark_synthdid.R @@ -73,14 +73,21 @@ weights <- attr(tau_hat, "weights") unit_weights <- weights$omega time_weights <- weights$lambda -# Compute SE via placebo (jackknife) +# Compute SE via placebo message("Computing standard errors...") se_start <- Sys.time() se_matrix <- vcov(tau_hat, method = "placebo") se <- as.numeric(sqrt(se_matrix[1, 1])) # Extract scalar SE se_time <- as.numeric(difftime(Sys.time(), se_start, units = "secs")) -total_time <- estimation_time + se_time +# Compute SE via jackknife (Algorithm 3) +message("Computing jackknife standard errors...") +se_jk_start <- Sys.time() +se_jk_matrix <- vcov(tau_hat, method = "jackknife") +se_jackknife <- as.numeric(sqrt(se_jk_matrix[1, 1])) +se_jk_time <- as.numeric(difftime(Sys.time(), se_jk_start, units = "secs")) + +total_time <- estimation_time + se_time # placebo only, matches `se` field # Compute noise level and regularization (to match Python's auto-computed values) N0 <- setup$N0 @@ -98,6 +105,7 @@ results <- list( # Point estimate and SE att = as.numeric(tau_hat), se = se, + se_jackknife = se_jackknife, # Weights (full precision) unit_weights = as.numeric(unit_weights), @@ -111,7 +119,8 @@ results <- list( # Timing timing = list( estimation_seconds = estimation_time, - se_seconds = se_time, + se_placebo_seconds = se_time, + se_jackknife_seconds = se_jk_time, total_seconds = total_time ), diff --git a/benchmarks/python/benchmark_synthdid.py b/benchmarks/python/benchmark_synthdid.py index 7986d35d..4392bf45 100644 --- a/benchmarks/python/benchmark_synthdid.py +++ b/benchmarks/python/benchmark_synthdid.py @@ -45,7 +45,7 @@ def parse_args(): ) parser.add_argument( "--variance-method", type=str, default="placebo", - choices=["bootstrap", "placebo"], + choices=["bootstrap", "jackknife", "placebo"], help="Variance estimation method (default: placebo to match R)" ) parser.add_argument( diff --git a/diff_diff/results.py b/diff_diff/results.py index c92a3083..7129173d 100644 --- a/diff_diff/results.py +++ b/diff_diff/results.py @@ -662,7 +662,7 @@ class SyntheticDiDResults: att : float Average Treatment effect on the Treated (ATT). se : float - Standard error of the ATT estimate (bootstrap or placebo-based). + Standard error of the ATT estimate (bootstrap, jackknife, or placebo-based). t_stat : float T-statistic for the ATT estimate. p_value : float @@ -684,7 +684,12 @@ class SyntheticDiDResults: post_periods : list List of post-treatment period identifiers. variance_method : str - Method used for variance estimation: "bootstrap" or "placebo". + Method used for variance estimation: "bootstrap", "jackknife", or "placebo". + placebo_effects : np.ndarray, optional + Method-specific per-iteration estimates: placebo treatment effects + (for "placebo"), bootstrap ATT estimates (for "bootstrap"), or + leave-one-out estimates (for "jackknife"). The ``variance_method`` + field disambiguates the contents. """ att: float diff --git a/diff_diff/synthetic_did.py b/diff_diff/synthetic_did.py index 56d5f029..e11bfeeb 100644 --- a/diff_diff/synthetic_did.py +++ b/diff_diff/synthetic_did.py @@ -53,10 +53,15 @@ class SyntheticDiD(DifferenceInDifferences): Implements Algorithm 4 from Arkhangelsky et al. (2021). This is R's default. - "bootstrap": Bootstrap at unit level with fixed weights matching R's synthdid::vcov(method="bootstrap"). + - "jackknife": Jackknife variance matching R's synthdid::vcov(method="jackknife"). + Implements Algorithm 3 from Arkhangelsky et al. (2021). Deterministic + (N_control + N_treated iterations), uses fixed weights (no re-estimation). + The ``n_bootstrap`` parameter is ignored for this method. n_bootstrap : int, default=200 Number of replications for variance estimation. Used for both: - Bootstrap: Number of bootstrap samples - Placebo: Number of random permutations (matches R's `replications` argument) + Ignored when ``variance_method="jackknife"``. seed : int, optional Random seed for reproducibility. If None (default), results will vary between runs. @@ -163,15 +168,15 @@ def __init__( self.n_bootstrap = n_bootstrap self.seed = seed - # Validate n_bootstrap - if n_bootstrap < 2: + # Validate n_bootstrap (irrelevant for jackknife, which is deterministic) + if n_bootstrap < 2 and variance_method != "jackknife": raise ValueError( f"n_bootstrap must be >= 2 (got {n_bootstrap}). At least 2 " f"iterations are needed to estimate standard errors." ) # Validate variance_method - valid_methods = ("bootstrap", "placebo") + valid_methods = ("bootstrap", "jackknife", "placebo") if variance_method not in valid_methods: raise ValueError( f"variance_method must be one of {valid_methods}, " f"got '{variance_method}'" @@ -218,8 +223,9 @@ def fit( # type: ignore[override] survey_design : SurveyDesign, optional Survey design specification. Only pweight weight_type is supported. Strata/PSU/FPC are supported via Rao-Wu rescaled bootstrap when - variance_method='bootstrap'. Placebo variance does not support - strata/PSU/FPC; use variance_method='bootstrap' for full designs. + variance_method='bootstrap'. Non-bootstrap variance methods + (placebo, jackknife) do not support strata/PSU/FPC; use + variance_method='bootstrap' for full designs. Returns ------- @@ -269,7 +275,7 @@ def fit( # type: ignore[override] f"Got '{resolved_survey.weight_type}'." ) - # Reject placebo + full survey design (strata/PSU/FPC are silently ignored) + # Reject non-bootstrap + full survey design (strata/PSU/FPC need Rao-Wu) if ( resolved_survey is not None and ( @@ -277,10 +283,11 @@ def fit( # type: ignore[override] or resolved_survey.psu is not None or resolved_survey.fpc is not None ) - and self.variance_method == "placebo" + and self.variance_method != "bootstrap" ): raise NotImplementedError( - "SyntheticDiD with variance_method='placebo' does not support strata/PSU/FPC. " + f"SyntheticDiD with variance_method='{self.variance_method}' does not " + "support strata/PSU/FPC. " "Use variance_method='bootstrap' for full survey design support." ) @@ -510,6 +517,20 @@ def fit( # type: ignore[override] ) placebo_effects = bootstrap_estimates inference_method = "bootstrap" + elif self.variance_method == "jackknife": + # Fixed-weight jackknife (R's synthdid Algorithm 3) + se, jackknife_estimates = self._jackknife_se( + Y_pre_control, + Y_post_control, + Y_pre_treated, + Y_post_treated, + unit_weights, + time_weights, + w_treated=w_treated, + w_control=w_control, + ) + placebo_effects = jackknife_estimates + inference_method = "jackknife" else: # Use placebo-based variance (R's synthdid Algorithm 4) se, placebo_effects = self._placebo_variance_se( @@ -528,7 +549,14 @@ def fit( # type: ignore[override] # Compute test statistics t_stat, p_value_analytical, conf_int = safe_inference(att, se, alpha=self.alpha) - if len(placebo_effects) > 0 and np.isfinite(t_stat): + # Empirical p-value for placebo/bootstrap (null-distribution draws). + # Jackknife pseudo-values are NOT null-distribution draws, so use + # analytical (normal) p-value instead. + if ( + inference_method != "jackknife" + and len(placebo_effects) > 0 + and np.isfinite(t_stat) + ): p_value = max( np.mean(np.abs(placebo_effects) >= np.abs(att)), 1.0 / (len(placebo_effects) + 1), @@ -1106,6 +1134,202 @@ def _placebo_variance_se( return se, placebo_estimates + def _jackknife_se( + self, + Y_pre_control: np.ndarray, + Y_post_control: np.ndarray, + Y_pre_treated: np.ndarray, + Y_post_treated: np.ndarray, + unit_weights: np.ndarray, + time_weights: np.ndarray, + w_treated=None, + w_control=None, + ) -> Tuple[float, np.ndarray]: + """Compute jackknife standard error matching R's synthdid Algorithm 3. + + Delete-1 jackknife over all units (control + treated) with **fixed** + weights. For each leave-one-out sample the original omega is subsetted + and renormalized; lambda stays unchanged. No Frank-Wolfe + re-estimation, making this the fastest variance method. + + This matches R's ``synthdid::vcov(method="jackknife")`` which sets + ``update.omega=FALSE, update.lambda=FALSE``. + + Parameters + ---------- + Y_pre_control : np.ndarray + Control outcomes in pre-treatment periods, shape (n_pre, n_control). + Y_post_control : np.ndarray + Control outcomes in post-treatment periods, shape (n_post, n_control). + Y_pre_treated : np.ndarray + Treated outcomes in pre-treatment periods, shape (n_pre, n_treated). + Y_post_treated : np.ndarray + Treated outcomes in post-treatment periods, shape (n_post, n_treated). + unit_weights : np.ndarray + Unit weights from Frank-Wolfe optimization, shape (n_control,). + time_weights : np.ndarray + Time weights from Frank-Wolfe optimization, shape (n_pre,). + w_treated : np.ndarray, optional + Survey probability weights for treated units. + w_control : np.ndarray, optional + Survey probability weights for control units. + + Returns + ------- + tuple + (se, jackknife_estimates) where se is the standard error and + jackknife_estimates is a length-N array of leave-one-out estimates + (first n_control entries are control-LOO, last n_treated are + treated-LOO). + + References + ---------- + Arkhangelsky, D., Athey, S., Hirshberg, D. A., Imbens, G. W., & Wager, S. + (2021). Synthetic Difference-in-Differences. American Economic Review, + 111(12), 4088-4118. Algorithm 3. + """ + n_control = Y_pre_control.shape[1] + n_treated = Y_pre_treated.shape[1] + n = n_control + n_treated + + # --- Early-return NaN: matches R's NA conditions --- + if n_treated <= 1: + warnings.warn( + "Jackknife variance requires more than 1 treated unit. " + "Use variance_method='placebo' for single treated unit.", + UserWarning, + stacklevel=3, + ) + return np.nan, np.array([]) + + if np.sum(unit_weights > 0) <= 1: + warnings.warn( + "Jackknife variance requires more than 1 control unit with " + "nonzero weight. Consider variance_method='placebo'.", + UserWarning, + stacklevel=3, + ) + return np.nan, np.array([]) + + # --- Effective-support guards for survey-weighted path --- + if w_control is not None: + effective_control = unit_weights * w_control + if np.sum(effective_control > 0) <= 1: + warnings.warn( + "Jackknife variance requires more than 1 control unit with " + "positive effective weight (omega * survey_weight). " + "Consider variance_method='placebo'.", + UserWarning, + stacklevel=3, + ) + return np.nan, np.array([]) + + if w_treated is not None and np.sum(w_treated > 0) <= 1: + warnings.warn( + "Jackknife variance requires more than 1 treated unit with " + "positive survey weight. " + "Consider variance_method='placebo'.", + UserWarning, + stacklevel=3, + ) + return np.nan, np.array([]) + + jackknife_estimates = np.empty(n) + + # --- Precompute treated means (constant across control-LOO) --- + if w_treated is not None: + treated_pre_mean = np.average(Y_pre_treated, axis=1, weights=w_treated) + treated_post_mean = np.average(Y_post_treated, axis=1, weights=w_treated) + else: + treated_pre_mean = np.mean(Y_pre_treated, axis=1) + treated_post_mean = np.mean(Y_post_treated, axis=1) + + # --- Precompute omega composed with survey weights (for treated-LOO) --- + if w_control is not None: + omega_eff_full = unit_weights * w_control + omega_eff_full = omega_eff_full / omega_eff_full.sum() + else: + omega_eff_full = unit_weights + + # --- Leave-one-out over control units --- + mask = np.ones(n_control, dtype=bool) + for j in range(n_control): + mask[j] = False + + # Subset and renormalize omega + omega_jk = _sum_normalize(unit_weights[mask]) + + # Compose with survey weights if present + if w_control is not None: + omega_jk = omega_jk * w_control[mask] + if omega_jk.sum() == 0: + jackknife_estimates[j] = np.nan + mask[j] = True + continue + omega_jk = omega_jk / omega_jk.sum() + + jackknife_estimates[j] = compute_sdid_estimator( + Y_pre_control[:, mask], + Y_post_control[:, mask], + treated_pre_mean, + treated_post_mean, + omega_jk, + time_weights, + ) + + mask[j] = True # restore for next iteration + + # --- Leave-one-out over treated units --- + mask = np.ones(n_treated, dtype=bool) + for k in range(n_treated): + mask[k] = False + + # Recompute treated means from remaining units + if w_treated is not None: + w_t_jk = w_treated[mask] + if w_t_jk.sum() == 0: + jackknife_estimates[n_control + k] = np.nan + mask[k] = True + continue + t_pre_mean = np.average( + Y_pre_treated[:, mask], axis=1, weights=w_t_jk + ) + t_post_mean = np.average( + Y_post_treated[:, mask], axis=1, weights=w_t_jk + ) + else: + t_pre_mean = np.mean(Y_pre_treated[:, mask], axis=1) + t_post_mean = np.mean(Y_post_treated[:, mask], axis=1) + + jackknife_estimates[n_control + k] = compute_sdid_estimator( + Y_pre_control, + Y_post_control, + t_pre_mean, + t_post_mean, + omega_eff_full, + time_weights, + ) + + mask[k] = True # restore for next iteration + + # --- Check for non-finite estimates (propagate NaN like R's var()) --- + if not np.all(np.isfinite(jackknife_estimates)): + warnings.warn( + "Some jackknife leave-one-out estimates are non-finite. " + "Standard error cannot be computed.", + UserWarning, + stacklevel=3, + ) + return np.nan, jackknife_estimates + + # --- Jackknife SE formula: sqrt((n-1)/n * sum((u - ubar)^2)) --- + # Matches R's: sqrt(((n-1)/n) * (n-1) * var(u)) + u_bar = np.mean(jackknife_estimates) + ss = np.sum((jackknife_estimates - u_bar) ** 2) + se = np.sqrt((n - 1) / n * ss) + + return se, jackknife_estimates + def get_params(self) -> Dict[str, Any]: """Get estimator parameters.""" return { diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index 7601a8e9..278fd1ce 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -1472,6 +1472,23 @@ Convergence criterion: stop when objective decrease < min_decrease² (default mi 5. Compute SDID estimate with renormalized ω and original λ 6. `SE = sd(bootstrap_estimates, ddof=1)` +- Alternative: Jackknife variance (matching R's `synthdid::vcov(method="jackknife")`) + Implements Algorithm 3 from Arkhangelsky et al. (2021): + 1. For each control unit j=1,...,N_co: + - Remove unit j, renormalize omega: `ω_jk = _sum_normalize(ω[remaining])` + - Keep λ unchanged, keep treated means unchanged + - Compute SDID estimate τ_{(-j)} + 2. For each treated unit k=1,...,N_tr: + - Keep ω and λ unchanged + - Recompute treated mean from remaining N_tr-1 treated units + - Compute SDID estimate τ_{(-k)} + 3. `SE = sqrt( ((n-1)/n) × Σ (τ_{(-i)} - τ̄)² )` where n = N_co + N_tr + + Fixed weights: No Frank-Wolfe re-estimation (`update.omega=FALSE, update.lambda=FALSE`). + Returns NaN SE for single treated unit or single nonzero-weight control. + Deterministic: exactly N_co + N_tr iterations, no replications parameter. + P-value: analytical (normal distribution), not empirical. + *Edge cases:* - **Frank-Wolfe non-convergence**: Returns current weights after max_iter iterations. No warning emitted; the convergence check `vals[t-1] - vals[t] < min_decrease²` simply does not trigger early exit, and the final iterate is returned. - **`_sparsify` all-zero input**: If `max(v) <= 0`, returns uniform weights `ones(len(v)) / len(v)`. @@ -1490,7 +1507,11 @@ Convergence criterion: stop when objective decrease < min_decrease² (default mi - **Varying treatment within unit**: Raises `ValueError`. SDID requires block treatment (constant within each unit). Suggests CallawaySantAnna or ImputationDiD for staggered adoption. - **Unbalanced panel**: Raises `ValueError`. SDID requires all units observed in all periods. Suggests `balance_panel()`. - **Poor pre-treatment fit**: Warns (`UserWarning`) when `pre_fit_rmse > std(treated_pre_outcomes, ddof=1)`. Diagnostic only; estimation proceeds. -- **Note:** Survey support: weights, strata, PSU, and FPC are all supported. Full-design surveys use Rao-Wu rescaled bootstrap (Phase 6); `variance_method="placebo"` requires weights-only (strata/PSU/FPC require bootstrap). Both sides weighted per WLS regression interpretation: treated-side means are survey-weighted (Frank-Wolfe target and ATT formula); control-side synthetic weights are composed with survey weights post-optimization (ω_eff = ω * w_co, renormalized). Frank-Wolfe optimization itself is unweighted — survey importance enters after trajectory-matching. Covariate residualization uses WLS with survey weights. Placebo and bootstrap SE preserve survey weights on both sides. +- **Jackknife with single treated unit**: Returns NaN SE. Cannot leave-one-out with N_tr=1; R returns NA for the same condition. +- **Jackknife with single nonzero-weight control**: Returns NaN SE. Leaving out the only effective control is not meaningful. +- **Jackknife with non-finite LOO estimate**: Returns NaN SE. Unlike bootstrap/placebo, jackknife is deterministic and cannot skip failed iterations; NaN propagates through `var()` (matches R behavior). +- **Jackknife with survey weights**: Guards on effective positive support (omega * w_control > 0 and w_treated > 0) after composition, not raw FW counts. Returns NaN SE if fewer than 2 effective controls or 2 positive-weight treated units. Per-iteration zero-sum guards return NaN for individual LOO iterations when remaining composed weights sum to zero. +- **Note:** Survey support: weights, strata, PSU, and FPC are all supported. Full-design surveys use Rao-Wu rescaled bootstrap (Phase 6); non-bootstrap variance methods (`variance_method="placebo"` or `"jackknife"`) require weights-only (strata/PSU/FPC require bootstrap). Both sides weighted per WLS regression interpretation: treated-side means are survey-weighted (Frank-Wolfe target and ATT formula); control-side synthetic weights are composed with survey weights post-optimization (ω_eff = ω * w_co, renormalized). Frank-Wolfe optimization itself is unweighted — survey importance enters after trajectory-matching. Covariate residualization uses WLS with survey weights. Placebo, jackknife, and bootstrap SE preserve survey weights on both sides. **Reference implementation(s):** - R: `synthdid::synthdid_estimate()` (Arkhangelsky et al.'s official package) @@ -1505,6 +1526,9 @@ Convergence criterion: stop when objective decrease < min_decrease² (default mi - [x] Placebo SE formula: sqrt((r-1)/r) * sd(placebo_estimates) - [x] Placebo SE: re-estimates omega and lambda per replication (matching R's update.omega=TRUE, update.lambda=TRUE) - [x] Bootstrap: fixed weights (original lambda unchanged, omega renormalized for resampled controls) +- [x] Jackknife SE: fixed weights, LOO all units, formula `sqrt((n-1)/n * sum((u-ubar)^2))` +- [x] Jackknife: NaN SE for single treated or single nonzero-weight control +- [x] Jackknife: analytical p-value (not empirical) - [x] Returns both unit and time weights for interpretation - [x] Column centering (intercept=True) in Frank-Wolfe optimization diff --git a/tests/test_estimators.py b/tests/test_estimators.py index 6bcc9f89..62440f7d 100644 --- a/tests/test_estimators.py +++ b/tests/test_estimators.py @@ -2575,6 +2575,28 @@ def test_bootstrap_inference(self, sdid_panel_data, ci_params): assert results.se > 0 assert results.conf_int[0] < results.att < results.conf_int[1] + def test_jackknife_inference(self, sdid_panel_data): + """Test jackknife-based variance estimation.""" + sdid = SyntheticDiD(variance_method="jackknife", seed=42) + results = sdid.fit( + sdid_panel_data, + outcome="outcome", + treatment="treated", + unit="unit", + time="period", + post_periods=[4, 5, 6, 7], + ) + assert results.variance_method == "jackknife" + assert results.se > 0 + assert results.conf_int[0] < results.att < results.conf_int[1] + # n_bootstrap should be None for jackknife + assert results.n_bootstrap is None + + def test_jackknife_valid_method(self): + """Test that 'jackknife' is accepted as a variance_method.""" + sdid = SyntheticDiD(variance_method="jackknife") + assert sdid.variance_method == "jackknife" + def test_invalid_variance_method(self): """Test that invalid variance_method raises ValueError.""" with pytest.raises(ValueError, match="variance_method must be one of"): diff --git a/tests/test_methodology_sdid.py b/tests/test_methodology_sdid.py index dd3cb40d..e4b3fd38 100644 --- a/tests/test_methodology_sdid.py +++ b/tests/test_methodology_sdid.py @@ -482,6 +482,343 @@ def test_bootstrap_with_zeta_overrides(self, ci_params): assert results.se > 0 +# ============================================================================= +# Jackknife SE +# ============================================================================= + + +class TestJackknifeSE: + """Verify jackknife SE with fixed weights (Algorithm 3).""" + + def test_jackknife_se_positive(self): + """Jackknife SE should be positive for well-specified data.""" + df = _make_panel(n_control=20, n_treated=3, seed=42) + sdid = SyntheticDiD(variance_method="jackknife", seed=42) + results = sdid.fit( + df, outcome="outcome", treatment="treated", + unit="unit", time="period", + post_periods=list(range(5, 8)), + ) + assert results.se > 0 + assert results.variance_method == "jackknife" + + def test_jackknife_deterministic(self): + """Jackknife should produce identical results regardless of seed.""" + df = _make_panel(n_control=15, n_treated=3, seed=42) + results1 = SyntheticDiD(variance_method="jackknife", seed=1).fit( + df, outcome="outcome", treatment="treated", + unit="unit", time="period", + post_periods=list(range(5, 8)), + ) + results2 = SyntheticDiD(variance_method="jackknife", seed=999).fit( + df, outcome="outcome", treatment="treated", + unit="unit", time="period", + post_periods=list(range(5, 8)), + ) + # ATT should be identical (same weights, no randomness in point est) + assert results1.att == results2.att + # SE should be identical (jackknife is deterministic) + assert results1.se == results2.se + + def test_jackknife_se_formula(self): + """Verify SE matches sqrt((n-1)/n * sum((u - ubar)^2)).""" + df = _make_panel(n_control=15, n_treated=3, seed=42) + sdid = SyntheticDiD(variance_method="jackknife", seed=42) + results = sdid.fit( + df, outcome="outcome", treatment="treated", + unit="unit", time="period", + post_periods=list(range(5, 8)), + ) + assert results.placebo_effects is not None + u = results.placebo_effects + n = len(u) + u_bar = np.mean(u) + expected_se = np.sqrt((n - 1) / n * np.sum((u - u_bar) ** 2)) + assert abs(results.se - expected_se) < 1e-10 + + def test_jackknife_n_iterations(self): + """Number of jackknife estimates = n_control + n_treated.""" + n_co, n_tr = 15, 3 + df = _make_panel(n_control=n_co, n_treated=n_tr, seed=42) + sdid = SyntheticDiD(variance_method="jackknife", seed=42) + results = sdid.fit( + df, outcome="outcome", treatment="treated", + unit="unit", time="period", + post_periods=list(range(5, 8)), + ) + assert results.placebo_effects is not None + assert len(results.placebo_effects) == n_co + n_tr + + def test_jackknife_single_treated_nan(self): + """Single treated unit -> NaN SE (matches R's NA).""" + df = _make_panel(n_control=15, n_treated=1, seed=42) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + sdid = SyntheticDiD(variance_method="jackknife", seed=42) + results = sdid.fit( + df, outcome="outcome", treatment="treated", + unit="unit", time="period", + post_periods=list(range(5, 7)), + ) + assert np.isnan(results.se) + assert np.isnan(results.t_stat) + assert np.isnan(results.p_value) + + def test_jackknife_analytical_pvalue(self): + """Jackknife should use analytical p-value, not empirical.""" + from scipy.stats import norm + + df = _make_panel(n_control=20, n_treated=3, seed=42) + sdid = SyntheticDiD(variance_method="jackknife", seed=42) + results = sdid.fit( + df, outcome="outcome", treatment="treated", + unit="unit", time="period", + post_periods=list(range(5, 8)), + ) + if np.isfinite(results.t_stat): + expected_p = 2 * (1 - norm.cdf(abs(results.t_stat))) + assert abs(results.p_value - expected_p) < 1e-10 + + def test_jackknife_same_att_as_placebo(self): + """Jackknife should produce the same point estimate as placebo.""" + df = _make_panel(n_control=15, n_treated=3, seed=42) + res_jk = SyntheticDiD(variance_method="jackknife", seed=42).fit( + df, outcome="outcome", treatment="treated", + unit="unit", time="period", + post_periods=list(range(5, 8)), + ) + res_pl = SyntheticDiD(variance_method="placebo", seed=42, n_bootstrap=50).fit( + df, outcome="outcome", treatment="treated", + unit="unit", time="period", + post_periods=list(range(5, 8)), + ) + assert abs(res_jk.att - res_pl.att) < 1e-10 + + def test_jackknife_n_bootstrap_ignored(self): + """n_bootstrap=1 should not raise for jackknife (it's ignored).""" + sdid = SyntheticDiD(variance_method="jackknife", n_bootstrap=1) + assert sdid.n_bootstrap == 1 + assert sdid.variance_method == "jackknife" + + def test_jackknife_n_bootstrap_none_in_results(self): + """Results should have n_bootstrap=None for jackknife.""" + df = _make_panel(n_control=15, n_treated=3, seed=42) + sdid = SyntheticDiD(variance_method="jackknife", seed=42) + results = sdid.fit( + df, outcome="outcome", treatment="treated", + unit="unit", time="period", + post_periods=list(range(5, 8)), + ) + assert results.n_bootstrap is None + + def test_jackknife_with_pweights(self): + """Jackknife should produce finite SE with survey pweights.""" + from diff_diff.survey import SurveyDesign + + df = _make_panel(n_control=15, n_treated=3, seed=42) + # Add unit-constant survey weights + unit_weights = {u: 1.0 + u * 0.1 for u in df["unit"].unique()} + df["weight"] = df["unit"].map(unit_weights) + + sdid = SyntheticDiD(variance_method="jackknife", seed=42) + results = sdid.fit( + df, outcome="outcome", treatment="treated", + unit="unit", time="period", + post_periods=list(range(5, 8)), + survey_design=SurveyDesign(weights="weight"), + ) + assert results.se > 0 + assert np.isfinite(results.se) + assert results.variance_method == "jackknife" + + def test_jackknife_zero_effective_control_nan(self): + """Zero-weight controls after composition -> NaN SE.""" + from diff_diff.survey import SurveyDesign + + # 3 controls, 2 treated. Set all but 1 control survey weight to 0 + # so effective support <= 1. + df = _make_panel(n_control=3, n_treated=2, seed=42) + weights = {} + control_units = sorted(df.loc[df["treated"] == 0, "unit"].unique()) + treated_units = sorted(df.loc[df["treated"] == 1, "unit"].unique()) + # Only first control gets positive weight + for i, u in enumerate(control_units): + weights[u] = 1.0 if i == 0 else 0.0 + for u in treated_units: + weights[u] = 1.0 + df["weight"] = df["unit"].map(weights) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + sdid = SyntheticDiD(variance_method="jackknife", seed=42) + results = sdid.fit( + df, outcome="outcome", treatment="treated", + unit="unit", time="period", + post_periods=list(range(5, 7)), + survey_design=SurveyDesign(weights="weight"), + ) + assert np.isnan(results.se) + + def test_jackknife_zero_treated_weight_nan(self): + """Single positive-weight treated unit with survey -> NaN SE.""" + from diff_diff.survey import SurveyDesign + + df = _make_panel(n_control=10, n_treated=2, seed=42) + weights = {} + treated_units = sorted(df.loc[df["treated"] == 1, "unit"].unique()) + control_units = sorted(df.loc[df["treated"] == 0, "unit"].unique()) + for u in control_units: + weights[u] = 1.0 + # Only first treated unit gets positive weight + weights[treated_units[0]] = 1.0 + weights[treated_units[1]] = 0.0 + df["weight"] = df["unit"].map(weights) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + sdid = SyntheticDiD(variance_method="jackknife", seed=42) + results = sdid.fit( + df, outcome="outcome", treatment="treated", + unit="unit", time="period", + post_periods=list(range(5, 7)), + survey_design=SurveyDesign(weights="weight"), + ) + assert np.isnan(results.se) + + +# ============================================================================= +# Jackknife SE - R Golden Value Parity +# ============================================================================= + + +class TestJackknifeSERParity: + """Verify jackknife SE matches R's synthdid::vcov(method='jackknife'). + + Golden values generated with R 4.5.2, synthdid package: + + library(synthdid) + set.seed(42) + N0 <- 20; N1 <- 3; T0 <- 5; T1 <- 3 + N <- N0 + N1; T <- T0 + T1 + Y <- matrix(0, nrow=N, ncol=T) + for (i in 1:N) { + unit_fe <- rnorm(1, sd=2) + for (t in 1:T) { + Y[i,t] <- 10 + unit_fe + (t-1)*0.3 + rnorm(1, sd=0.5) + if (i > N0 && t > T0) Y[i,t] <- Y[i,t] + 5.0 + } + } + tau_hat <- synthdid_estimate(Y, N0, T0) + se_jk <- sqrt(vcov(tau_hat, method="jackknife")[1,1]) + """ + + # R's Y matrix (23 units x 8 periods), row-major + Y_FLAT = [ + 12.459567808595292, 13.223481099962006, 13.658348196773856, + 13.844051055863837, 13.888854636247594, 14.997677893012806, + 14.494587375086788, 15.851128751231856, 10.527006629006900, + 11.317894498245712, 9.780141451338988, 10.635177418486473, + 11.007911133698329, 11.692547000930196, 11.532445341187122, + 10.646344091442769, 5.779122815714058, 5.265746845809725, + 4.828411925858962, 5.933107464969151, 6.926403492435262, + 7.566662873481445, 6.703831577045862, 7.090431451464497, + 6.703722507026075, 6.453676391630379, 7.301398891231049, + 7.726092498224848, 8.191225590595401, 7.669210641906834, + 8.526151391259425, 7.715169490073769, 8.005628186152748, + 7.523978158267692, 9.049143286687135, 9.434081283341134, + 9.450553333966674, 10.310163601090766, 9.867729569702721, + 9.846941461031360, 10.459939463684098, 11.887686682638062, + 11.249912950470762, 12.093459993478538, 12.226598684379407, + 11.973716581337246, 13.453499811673423, 13.287085704636093, + 10.317796666844943, 10.819165701226847, 10.824437736488752, + 9.582976251622744, 11.521962769964540, 11.495903971828724, + 12.072136575632017, 12.570433156881965, 12.435827624848123, + 13.750744970607428, 13.567397714461393, 14.218726703934166, + 14.459837938730677, 14.659912736018788, 14.077914185301429, + 14.854380461280002, 10.770274645112915, 11.275621916712160, + 12.137534572839927, 12.531125692916383, 12.678920118269170, + 12.304148175294246, 12.497145874675160, 14.103389828901550, + 10.560062989643855, 10.755394606294518, 10.518678427483797, + 11.721841324084256, 11.607272952190801, 11.924464521898100, + 12.782516039349641, 13.026729430318186, 12.546145790341205, + 13.409407032231695, 14.079787980063543, 13.128838312144593, + 13.553836458429620, 13.718363411441658, 13.854625752117343, + 14.924224028489123, 11.906891367097627, 12.128784222882244, + 11.404804355878456, 13.130649630134753, 12.173021974919472, + 12.859165585526416, 12.895280738363951, 13.345233593320895, + 10.435966548001499, 10.663839793569295, 11.030422432974012, + 11.033668451079661, 11.324277503659044, 11.045836529045589, + 11.985219205566086, 12.220060940064094, 14.722723885094736, + 15.772410109968900, 15.256969467031452, 15.568564129971197, + 16.666133193788099, 16.405462433247578, 17.202870693537243, + 17.289652559976691, 7.760317864391456, 8.460282811921017, + 9.462415007659978, 9.956467084312777, 9.726218110324272, + 10.272688229133685, 11.134101608790994, 11.592584658589104, + 7.747112683063268, 8.706521663648207, 8.170907672905205, + 8.679537720718859, 8.962718814069811, 8.861932954235140, + 9.383430460745986, 9.891050023644237, 9.728955313568255, + 9.231765881057163, 9.555677785583788, 10.420693590160205, + 9.844078095298698, 10.651913064308546, 10.196489890710358, + 11.855847076501993, 9.218785934915712, 9.133582433258733, + 10.048827580363175, 9.952567508276010, 10.385962432276619, + 11.596546220044132, 11.164945662130776, 11.016817405176500, + 10.145044557120791, 10.921420538928436, 11.642624728800259, + 10.730067509380019, 11.753738913724906, 11.868862794274008, + 12.574196556067037, 12.311524695461632, 10.800710206252880, + 12.817967597577915, 12.705627126180516, 12.497850142478354, + 12.148734571851643, 13.494742486942219, 13.714835068828613, + 13.770060323710533, 10.010857300549947, 10.787315152039971, + 11.050238955584605, 11.063282099053561, 10.834793458278272, + 17.153286194944865, 17.380010096861866, 16.984758489324143, + 6.913302966281331, 6.938279687001069, 7.537129527669741, + 7.063822443245238, 7.531238453797332, 13.853711102827464, + 13.812711128345372, 14.204067444347162, 13.694867606609098, + 12.929992273442151, 14.397345491024691, 15.116119455987304, + 15.860226513457558, 19.442026093187646, 19.855029109494353, + 20.377546194927845, + ] + N0, N1, T0, T1 = 20, 3, 5, 3 + R_ATT = 4.980848860060929 + R_JACKKNIFE_SE = 0.613846670319004 + + @pytest.fixture + def r_panel_df(self): + """Reconstruct the R panel as a pandas DataFrame.""" + N = self.N0 + self.N1 + T = self.T0 + self.T1 + Y = np.array(self.Y_FLAT).reshape(N, T) + rows = [] + for i in range(N): + for t in range(T): + rows.append({ + "unit": i, + "time": t, + "outcome": Y[i, t], + "treated": int(i >= self.N0), + }) + return pd.DataFrame(rows) + + def test_att_matches_r(self, r_panel_df): + """ATT should match R's synthdid_estimate to machine precision.""" + sdid = SyntheticDiD(variance_method="jackknife", seed=42) + results = sdid.fit( + r_panel_df, outcome="outcome", treatment="treated", + unit="unit", time="time", + post_periods=[5, 6, 7], + ) + assert abs(results.att - self.R_ATT) < 1e-10 + + def test_jackknife_se_matches_r(self, r_panel_df): + """Jackknife SE should match R's vcov(method='jackknife').""" + sdid = SyntheticDiD(variance_method="jackknife", seed=42) + results = sdid.fit( + r_panel_df, outcome="outcome", treatment="treated", + unit="unit", time="time", + post_periods=[5, 6, 7], + ) + assert abs(results.se - self.R_JACKKNIFE_SE) < 1e-10 + + # ============================================================================= # Edge Cases # ============================================================================= diff --git a/tests/test_survey_phase5.py b/tests/test_survey_phase5.py index 3cf80968..6250261c 100644 --- a/tests/test_survey_phase5.py +++ b/tests/test_survey_phase5.py @@ -198,7 +198,21 @@ def test_full_design_bootstrap_smoke(self, sdid_survey_data, survey_design_full) def test_full_design_placebo_raises(self, sdid_survey_data, survey_design_full): """Placebo variance with full design raises NotImplementedError.""" est = SyntheticDiD(variance_method="placebo", n_bootstrap=50, seed=42) - with pytest.raises(NotImplementedError, match="placebo.*does not support strata/PSU/FPC"): + with pytest.raises(NotImplementedError, match="does not support strata/PSU/FPC"): + est.fit( + sdid_survey_data, + outcome="outcome", + treatment="treated", + unit="unit", + time="time", + post_periods=[6, 7, 8, 9], + survey_design=survey_design_full, + ) + + def test_full_design_jackknife_raises(self, sdid_survey_data, survey_design_full): + """Jackknife variance with full design raises NotImplementedError.""" + est = SyntheticDiD(variance_method="jackknife", seed=42) + with pytest.raises(NotImplementedError, match="does not support strata/PSU/FPC"): est.fit( sdid_survey_data, outcome="outcome",