diff --git a/tests/test_methodology_honest_did.py b/tests/test_methodology_honest_did.py index efaf8fe7..f2a63017 100644 --- a/tests/test_methodology_honest_did.py +++ b/tests/test_methodology_honest_did.py @@ -5,6 +5,8 @@ equations, known analytical cases, and expected mathematical properties. """ +import os + import numpy as np import pytest @@ -243,8 +245,23 @@ def test_optimal_flci_is_finite_and_valid(self): assert ci_lb_opt <= lb, "CI lower should be <= identified set lower" assert ci_ub_opt >= ub, "CI upper should be >= identified set upper" + @pytest.mark.skipif( + os.environ.get("CI") == "true", + reason="wall-clock timing is flaky on shared CI runners; short-circuit " + "correctness signal will be replaced with a mock/spy per TODO.md " + "(see PR #330 follow-up note)", + ) def test_m0_short_circuit(self): - """M=0 should use standard CI without optimization.""" + """M=0 should use standard CI without optimization. + + Uses wall-clock elapsed time as a proxy for "short-circuit path + taken" — fast path is ``<0.5s``, slow optimization would be ``>> + 0.5s``. Skipped on CI because neighbor-VM contention on shared + runners can push even the short-circuit path past the threshold. + Run locally to validate the fast-path invariant; the TODO.md entry + added by PR #330 tracks replacing this with a mock/spy so the + correctness signal becomes CI-safe. + """ beta_pre = np.array([0.3, 0.2, 0.1]) beta_post = np.array([2.0]) sigma = np.eye(4) * 0.01 diff --git a/tests/test_se_accuracy.py b/tests/test_se_accuracy.py index 55de69f3..c56ca150 100644 --- a/tests/test_se_accuracy.py +++ b/tests/test_se_accuracy.py @@ -10,6 +10,7 @@ - BasicDiD/TWFE: Should be 0% difference (exact match) """ +import os import time from typing import Dict, Tuple @@ -19,6 +20,16 @@ from diff_diff import CallawaySantAnna +# Wall-clock timing assertions on shared CI runners are flaky (neighbor-VM +# contention, BLAS path variation, cold caches). Default Python CI already +# excludes `@pytest.mark.slow`; Rust-backend CI invokes pytest with `-m ''` +# which overrides that filter and re-includes the slow set. GitHub Actions +# sets ``CI=true`` on every runner, so this predicate catches both. +_SKIP_WALLCLOCK_ON_CI = pytest.mark.skipif( + os.environ.get("CI") == "true", + reason="wall-clock timing is flaky on shared CI runners; run locally via `pytest -m slow`", +) + def generate_staggered_data_for_benchmark( n_units: int = 200, @@ -253,6 +264,7 @@ def test_se_vs_r_benchmark(self): f"SE differs from R by {se_diff_pct:.4f}%, expected <0.01%" @pytest.mark.slow + @_SKIP_WALLCLOCK_ON_CI def test_timing_performance(self, cs_results): """ Ensure estimation timing doesn't regress. @@ -260,10 +272,13 @@ def test_timing_performance(self, cs_results): Baseline: ~0.005s for 200 units x 8 periods (small scale) Threshold: <0.1s. - Excluded from default CI via ``@pytest.mark.slow`` — wall-clock time - on shared runners is noisy (BLAS path variation, neighbor VM - contention, cold caches) and produces false positives. Run locally - with ``pytest -m slow`` for ad-hoc performance sanity checks. + Excluded from default CI via ``@pytest.mark.slow`` AND from all CI + via ``skipif(CI=="true")`` — wall-clock time on shared runners is + noisy (BLAS path variation, neighbor VM contention, cold caches) + and produces false positives. The ``skipif`` layer is needed + because the Rust-backend CI jobs override ``-m 'not slow'`` with + ``-m ''`` to include the full slow suite. Run locally with + ``pytest -m slow`` for ad-hoc performance sanity checks. """ _, elapsed = cs_results @@ -405,13 +420,17 @@ def test_influence_function_normalization(self): @pytest.mark.slow +@_SKIP_WALLCLOCK_ON_CI class TestPerformanceRegression: """Tests to prevent performance regression. - Excluded from default CI via ``@pytest.mark.slow`` — wall-clock time on - shared runners is noisy (BLAS path variation, neighbor VM contention, - cold caches) and produces false positives. Run locally with - ``pytest -m slow`` for ad-hoc performance sanity checks. + Excluded from default CI via ``@pytest.mark.slow`` AND from all CI via + ``skipif(CI=="true")`` — wall-clock time on shared runners is noisy + (BLAS path variation, neighbor VM contention, cold caches) and + produces false positives. The ``skipif`` layer is needed because the + Rust-backend CI jobs override ``-m 'not slow'`` with ``-m ''`` to + include the full slow suite. Run locally with ``pytest -m slow`` for + ad-hoc performance sanity checks. """ @pytest.mark.parametrize("n_units,max_time", [