igerber · igerber · Apr 19, 2026 · Apr 19, 2026
diff --git a/tests/test_methodology_honest_did.py b/tests/test_methodology_honest_did.py
@@ -5,6 +5,8 @@
 equations, known analytical cases, and expected mathematical properties.
 """
 
+import os
+
 import numpy as np
 import pytest
 
@@ -243,8 +245,23 @@ def test_optimal_flci_is_finite_and_valid(self):
         assert ci_lb_opt <= lb, "CI lower should be <= identified set lower"
         assert ci_ub_opt >= ub, "CI upper should be >= identified set upper"
 
+    @pytest.mark.skipif(
+        os.environ.get("CI") == "true",
+        reason="wall-clock timing is flaky on shared CI runners; short-circuit "
+        "correctness signal will be replaced with a mock/spy per TODO.md "
+        "(see PR #330 follow-up note)",
+    )
     def test_m0_short_circuit(self):
-        """M=0 should use standard CI without optimization."""
+        """M=0 should use standard CI without optimization.
+
+        Uses wall-clock elapsed time as a proxy for "short-circuit path
+        taken" — fast path is ``<0.5s``, slow optimization would be ``>>
+        0.5s``. Skipped on CI because neighbor-VM contention on shared
+        runners can push even the short-circuit path past the threshold.
+        Run locally to validate the fast-path invariant; the TODO.md entry
+        added by PR #330 tracks replacing this with a mock/spy so the
+        correctness signal becomes CI-safe.
+        """
         beta_pre = np.array([0.3, 0.2, 0.1])
         beta_post = np.array([2.0])
         sigma = np.eye(4) * 0.01

diff --git a/tests/test_se_accuracy.py b/tests/test_se_accuracy.py
@@ -10,6 +10,7 @@
 - BasicDiD/TWFE: Should be 0% difference (exact match)
 """
 
+import os
 import time
 from typing import Dict, Tuple
 
@@ -19,6 +20,16 @@
 
 from diff_diff import CallawaySantAnna
 
+# Wall-clock timing assertions on shared CI runners are flaky (neighbor-VM
+# contention, BLAS path variation, cold caches). Default Python CI already
+# excludes `@pytest.mark.slow`; Rust-backend CI invokes pytest with `-m ''`
+# which overrides that filter and re-includes the slow set. GitHub Actions
+# sets ``CI=true`` on every runner, so this predicate catches both.
+_SKIP_WALLCLOCK_ON_CI = pytest.mark.skipif(
+    os.environ.get("CI") == "true",
+    reason="wall-clock timing is flaky on shared CI runners; run locally via `pytest -m slow`",
+)
+
 
 def generate_staggered_data_for_benchmark(
     n_units: int = 200,
@@ -253,17 +264,21 @@ def test_se_vs_r_benchmark(self):
             f"SE differs from R by {se_diff_pct:.4f}%, expected <0.01%"
 
     @pytest.mark.slow
+    @_SKIP_WALLCLOCK_ON_CI
     def test_timing_performance(self, cs_results):
         """
         Ensure estimation timing doesn't regress.
 
         Baseline: ~0.005s for 200 units x 8 periods (small scale)
         Threshold: <0.1s.
 
-        Excluded from default CI via ``@pytest.mark.slow`` — wall-clock time
-        on shared runners is noisy (BLAS path variation, neighbor VM
-        contention, cold caches) and produces false positives. Run locally
-        with ``pytest -m slow`` for ad-hoc performance sanity checks.
+        Excluded from default CI via ``@pytest.mark.slow`` AND from all CI
+        via ``skipif(CI=="true")`` — wall-clock time on shared runners is
+        noisy (BLAS path variation, neighbor VM contention, cold caches)
+        and produces false positives. The ``skipif`` layer is needed
+        because the Rust-backend CI jobs override ``-m 'not slow'`` with
+        ``-m ''`` to include the full slow suite. Run locally with
+        ``pytest -m slow`` for ad-hoc performance sanity checks.
         """
         _, elapsed = cs_results
 
@@ -405,13 +420,17 @@ def test_influence_function_normalization(self):
 
 
 @pytest.mark.slow
+@_SKIP_WALLCLOCK_ON_CI
 class TestPerformanceRegression:
     """Tests to prevent performance regression.
 
-    Excluded from default CI via ``@pytest.mark.slow`` — wall-clock time on
-    shared runners is noisy (BLAS path variation, neighbor VM contention,
-    cold caches) and produces false positives. Run locally with
-    ``pytest -m slow`` for ad-hoc performance sanity checks.
+    Excluded from default CI via ``@pytest.mark.slow`` AND from all CI via
+    ``skipif(CI=="true")`` — wall-clock time on shared runners is noisy
+    (BLAS path variation, neighbor VM contention, cold caches) and
+    produces false positives. The ``skipif`` layer is needed because the
+    Rust-backend CI jobs override ``-m 'not slow'`` with ``-m ''`` to
+    include the full slow suite. Run locally with ``pytest -m slow`` for
+    ad-hoc performance sanity checks.
     """
 
     @pytest.mark.parametrize("n_units,max_time", [