diff --git a/diff_diff/bacon.py b/diff_diff/bacon.py index 7ffbdf88..a2314086 100644 --- a/diff_diff/bacon.py +++ b/diff_diff/bacon.py @@ -324,6 +324,7 @@ class BaconDecomposition: ---------- weights : str, default="approximate" Weight calculation method: + - "approximate": Fast simplified formula using group shares and treatment variance. Good for diagnostic purposes where relative weights are sufficient to identify problematic comparisons. diff --git a/docs/api/index.rst b/docs/api/index.rst index 88324723..4cb44278 100644 --- a/docs/api/index.rst +++ b/docs/api/index.rst @@ -101,6 +101,19 @@ Placebo tests and model diagnostics: diff_diff.run_all_placebo_tests diff_diff.PlaceboTestResults +Panel Profiling +--------------- + +Pre-fit description of panel structure for estimator selection. The +:class:`~diff_diff.PanelProfile` return type and its supporting dataclasses +are documented in :doc:`profile`. + +.. autosummary:: + :toctree: _autosummary + :nosignatures: + + diff_diff.profile_panel + Sensitivity Analysis -------------------- @@ -246,6 +259,22 @@ Estimators wooldridge_etwfe bacon +Infrastructure +~~~~~~~~~~~~~~ + +.. toctree:: + :maxdepth: 2 + + local_linear + +Pre-Fit Profiling +~~~~~~~~~~~~~~~~~ + +.. toctree:: + :maxdepth: 2 + + profile + Diagnostics & Inference ~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/api/local_linear.rst b/docs/api/local_linear.rst new file mode 100644 index 00000000..db4149b1 --- /dev/null +++ b/docs/api/local_linear.rst @@ -0,0 +1,100 @@ +Local-Linear Infrastructure +=========================== + +Kernels, kernel-moment constants, univariate local-linear regression at a +boundary, the MSE-optimal bandwidth selector, and the bias-corrected +local-linear fit. + +This module ships the nonparametric building blocks that +:class:`~diff_diff.HeterogeneousAdoptionDiD` composes for its continuous-dose +fit paths (``continuous_at_zero`` and ``continuous_near_d_lower``); see +:doc:`had` for the estimator that consumes them. + +Two scope tiers are exposed: + +- **Generic helpers** (``local_linear_fit``, the three kernels, + ``kernel_moments``, ``KERNELS``) - usable on their own for any one-sided + boundary local-linear regression problem with a strictly nonnegative + running variable. +- **HAD-scoped public wrappers** (``mse_optimal_bandwidth``, + ``bias_corrected_local_linear``) - the configuration is hard-coded to + HAD Phase 1b/1c (``p=1``, ``deriv=0``, ``interior=False``, ``vce="nn"``) + and only those settings are parity-tested against R ``nprobust``. Other + settings (``hc0``/``hc1``/``hc2``/``hc3`` variance, interior evaluation, + higher polynomial orders) require dropping to the private + ``diff_diff._nprobust_port`` module and accepting that parity has not + been verified outside the HAD configuration. See ``docs/methodology/REGISTRY.md`` + ``HeterogeneousAdoptionDiD`` section for the full Phase 1b/1c contract. + +The selector and bias correction are ports of the Calonico-Cattaneo-Farrell +(2018) plug-in bandwidth and Calonico-Cattaneo-Titiunik (2014) robust-bias +correction from the R ``nprobust`` package; methodology context lives in +:doc:`had` and ``docs/methodology/REGISTRY.md``. + +Kernels +------- + +Bounded one-sided kernels on ``[0, 1]`` for boundary-point nonparametric +regression. The library normalizes the Epanechnikov and triangular kernels +to ``int_0^1 k(u) du = 1/2``; the uniform kernel uses +``int_0^1 k(u) du = 1``. + +.. autofunction:: diff_diff.epanechnikov_kernel + +.. autofunction:: diff_diff.triangular_kernel + +.. autofunction:: diff_diff.uniform_kernel + +.. autodata:: diff_diff.KERNELS + :annotation: : dict[str, Callable[[np.ndarray], np.ndarray]] + :no-value: + + Mapping from kernel name (``"epanechnikov"`` / ``"triangular"`` / + ``"uniform"``) to its callable evaluator on ``[0, 1]``. Pass the name + string (not the callable) to ``local_linear_fit`` and + ``mse_optimal_bandwidth`` via their ``kernel=`` argument. + +.. autofunction:: diff_diff.kernel_moments + +Boundary local-linear fit +------------------------- + +Kernel-weighted OLS estimator of the conditional mean +``m(d0) := E[Y | D = d0]`` at the boundary of ``D``'s support. + +.. autofunction:: diff_diff.local_linear_fit + +.. autoclass:: diff_diff.LocalLinearFit + :members: + :undoc-members: + :show-inheritance: + +MSE-optimal bandwidth selector +------------------------------ + +Plug-in MSE-optimal bandwidth (Calonico-Cattaneo-Farrell 2018) for the +boundary local-linear fit. Returns ``BandwidthResult`` carrying the final +bandwidth ``h_mse`` plus the per-stage diagnostics needed to audit the +selector against the R ``nprobust`` reference. + +.. autofunction:: diff_diff.mse_optimal_bandwidth + +.. autoclass:: diff_diff.BandwidthResult + :members: + :undoc-members: + :show-inheritance: + +Bias-corrected local-linear fit +------------------------------- + +Calonico-Cattaneo-Titiunik (2014) robust-bias-corrected boundary +estimator. Composes the bandwidth selector and the local-linear fit and +returns a ``BiasCorrectedFit`` with point estimate, robust standard error, +and 95% confidence interval. + +.. autofunction:: diff_diff.bias_corrected_local_linear + +.. autoclass:: diff_diff.BiasCorrectedFit + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/profile.rst b/docs/api/profile.rst new file mode 100644 index 00000000..201038ca --- /dev/null +++ b/docs/api/profile.rst @@ -0,0 +1,60 @@ +Panel Profiling +=============== + +Pre-fit description of a DiD panel's structural facts. ``profile_panel()`` +inspects a long-format panel and returns a :class:`PanelProfile` dataclass +covering balance, treatment-type classification, outcome characteristics, and +a list of factual :class:`Alert` observations. + +The profile is descriptive, not opinionated: alerts report what is (e.g. +"smallest cohort has 7 units"), never what to do about it. Estimator +selection is the caller's responsibility. For autonomous-agent consumption, +pair the profile output with the +`autonomous-agent reference guide <../llms-autonomous.txt>`_ (also accessible +at runtime via ``diff_diff.get_llm_guide("autonomous")``), which walks +through the estimator-support matrix and the per-design-feature reasoning +keyed off ``PanelProfile`` field values. + +.. note:: + + ``PanelProfile`` and its three supporting dataclasses + (:class:`OutcomeShape`, :class:`TreatmentDoseShape`, :class:`Alert`) are + re-exported at the top level of ``diff_diff`` so callers can construct + or pattern-match against them without dotted-module access. + +profile_panel +------------- + +.. autofunction:: diff_diff.profile_panel + +PanelProfile +------------ + +.. autoclass:: diff_diff.PanelProfile + :members: + :undoc-members: + :show-inheritance: + +OutcomeShape +------------ + +.. autoclass:: diff_diff.OutcomeShape + :members: + :undoc-members: + :show-inheritance: + +TreatmentDoseShape +------------------ + +.. autoclass:: diff_diff.TreatmentDoseShape + :members: + :undoc-members: + :show-inheritance: + +Alert +----- + +.. autoclass:: diff_diff.Alert + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/conf.py b/docs/conf.py index cd68e2a7..4baa9ccb 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -75,6 +75,7 @@ "../diff_diff/guides/llms.txt", "../diff_diff/guides/llms-full.txt", "../diff_diff/guides/llms-practitioner.txt", + "../diff_diff/guides/llms-autonomous.txt", ] sitemap_url_scheme = "{link}" diff --git a/docs/doc-deps.yaml b/docs/doc-deps.yaml index 7c7aefca..1bf4d282 100644 --- a/docs/doc-deps.yaml +++ b/docs/doc-deps.yaml @@ -410,6 +410,17 @@ sources: type: methodology - path: docs/api/had.rst type: api_reference + - path: docs/api/local_linear.rst + type: api_reference + + diff_diff/profile.py: + drift_risk: low + docs: + - path: docs/api/profile.rst + type: api_reference + - path: diff_diff/guides/llms-autonomous.txt + section: "PanelProfile field reference" + type: user_guide # ── SyntheticDiD ───────────��───────────────────────────────────────