Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion tests/test_methodology_honest_did.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
equations, known analytical cases, and expected mathematical properties.
"""

import os

import numpy as np
import pytest

Expand Down Expand Up @@ -243,8 +245,23 @@ def test_optimal_flci_is_finite_and_valid(self):
assert ci_lb_opt <= lb, "CI lower should be <= identified set lower"
assert ci_ub_opt >= ub, "CI upper should be >= identified set upper"

@pytest.mark.skipif(
os.environ.get("CI") == "true",
reason="wall-clock timing is flaky on shared CI runners; short-circuit "
"correctness signal will be replaced with a mock/spy per TODO.md "
"(see PR #330 follow-up note)",
)
def test_m0_short_circuit(self):
"""M=0 should use standard CI without optimization."""
"""M=0 should use standard CI without optimization.

Uses wall-clock elapsed time as a proxy for "short-circuit path
taken" — fast path is ``<0.5s``, slow optimization would be ``>>
0.5s``. Skipped on CI because neighbor-VM contention on shared
runners can push even the short-circuit path past the threshold.
Run locally to validate the fast-path invariant; the TODO.md entry
added by PR #330 tracks replacing this with a mock/spy so the
correctness signal becomes CI-safe.
"""
beta_pre = np.array([0.3, 0.2, 0.1])
beta_post = np.array([2.0])
sigma = np.eye(4) * 0.01
Expand Down
35 changes: 27 additions & 8 deletions tests/test_se_accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
- BasicDiD/TWFE: Should be 0% difference (exact match)
"""

import os
import time
from typing import Dict, Tuple

Expand All @@ -19,6 +20,16 @@

from diff_diff import CallawaySantAnna

# Wall-clock timing assertions on shared CI runners are flaky (neighbor-VM
# contention, BLAS path variation, cold caches). Default Python CI already
# excludes `@pytest.mark.slow`; Rust-backend CI invokes pytest with `-m ''`
# which overrides that filter and re-includes the slow set. GitHub Actions
# sets ``CI=true`` on every runner, so this predicate catches both.
_SKIP_WALLCLOCK_ON_CI = pytest.mark.skipif(
os.environ.get("CI") == "true",
reason="wall-clock timing is flaky on shared CI runners; run locally via `pytest -m slow`",
)


def generate_staggered_data_for_benchmark(
n_units: int = 200,
Expand Down Expand Up @@ -253,17 +264,21 @@ def test_se_vs_r_benchmark(self):
f"SE differs from R by {se_diff_pct:.4f}%, expected <0.01%"

@pytest.mark.slow
@_SKIP_WALLCLOCK_ON_CI
def test_timing_performance(self, cs_results):
"""
Ensure estimation timing doesn't regress.

Baseline: ~0.005s for 200 units x 8 periods (small scale)
Threshold: <0.1s.

Excluded from default CI via ``@pytest.mark.slow`` — wall-clock time
on shared runners is noisy (BLAS path variation, neighbor VM
contention, cold caches) and produces false positives. Run locally
with ``pytest -m slow`` for ad-hoc performance sanity checks.
Excluded from default CI via ``@pytest.mark.slow`` AND from all CI
via ``skipif(CI=="true")`` — wall-clock time on shared runners is
noisy (BLAS path variation, neighbor VM contention, cold caches)
and produces false positives. The ``skipif`` layer is needed
because the Rust-backend CI jobs override ``-m 'not slow'`` with
``-m ''`` to include the full slow suite. Run locally with
``pytest -m slow`` for ad-hoc performance sanity checks.
"""
_, elapsed = cs_results

Expand Down Expand Up @@ -405,13 +420,17 @@ def test_influence_function_normalization(self):


@pytest.mark.slow
@_SKIP_WALLCLOCK_ON_CI
class TestPerformanceRegression:
"""Tests to prevent performance regression.

Excluded from default CI via ``@pytest.mark.slow`` — wall-clock time on
shared runners is noisy (BLAS path variation, neighbor VM contention,
cold caches) and produces false positives. Run locally with
``pytest -m slow`` for ad-hoc performance sanity checks.
Excluded from default CI via ``@pytest.mark.slow`` AND from all CI via
``skipif(CI=="true")`` — wall-clock time on shared runners is noisy
(BLAS path variation, neighbor VM contention, cold caches) and
produces false positives. The ``skipif`` layer is needed because the
Rust-backend CI jobs override ``-m 'not slow'`` with ``-m ''`` to
include the full slow suite. Run locally with ``pytest -m slow`` for
ad-hoc performance sanity checks.
"""

@pytest.mark.parametrize("n_units,max_time", [
Expand Down
Loading