diff --git a/src/sentry/testutils/pytest/sentry.py b/src/sentry/testutils/pytest/sentry.py index ce5a965fc41986..ca1ff164adac8d 100644 --- a/src/sentry/testutils/pytest/sentry.py +++ b/src/sentry/testutils/pytest/sentry.py @@ -1,6 +1,7 @@ from __future__ import annotations import collections +import collections.abc import os import random import shutil @@ -218,15 +219,23 @@ def pytest_configure(config: pytest.Config) -> None: if snuba_url := xdist.get_snuba_url(): settings.SENTRY_SNUBA = snuba_url + # Also set the env var that _SnubaPool._get() prefers; this is immune + # to Django override_settings resets that can happen mid-session. + os.environ["_SENTRY_SNUBA_POOL_URL"] = snuba_url settings.SENTRY_ISSUE_PLATFORM_FUTURES_MAX_LIMIT = 1 if not hasattr(settings, "SENTRY_OPTIONS"): settings.SENTRY_OPTIONS = {} + redis_db = xdist.get_redis_db() settings.SENTRY_OPTIONS.update( { - "redis.clusters": {"default": {"hosts": {0: {"db": xdist.get_redis_db()}}}}, + # Each xdist worker gets its own Redis DB so that teardown + # flushdb() only affects that worker's data. Isolation is + # intra-shard (within a single pytest process); each CI shard + # job runs on its own runner with its own Redis service. + "redis.clusters": {"default": {"hosts": {0: {"db": redis_db}}}}, "mail.backend": "django.core.mail.backends.locmem.EmailBackend", "system.url-prefix": "http://testserver", "system.base-hostname": "testserver", @@ -267,6 +276,21 @@ def pytest_configure(config: pytest.Config) -> None: settings.SENTRY_OPTIONS_COMPLAIN_ON_ERRORS = True settings.VALIDATE_SUPERUSER_ACCESS_CATEGORY_AND_REASON = False + # Flush the options store's local cache so that any redis.clusters value + # that may have been cached before this configure hook (e.g. during early + # Django import) is evicted. The next access will re-read from settings + # and pick up the worker-specific DB number. + from sentry import options as sentry_options + + sentry_options.default_manager.store.flush_local_cache() + + # Also bust the redis_clusters manager's cluster cache so it re-creates + # the connection with the correct DB on first use. + from sentry.utils.redis import redis_clusters + + redis_clusters._clusters_bytes.clear() + redis_clusters._clusters_str.clear() + _configure_test_env_cells() # ID controls @@ -378,8 +402,41 @@ def pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None: TaskNamespace.send_task = TaskNamespace._original_send_task # type: ignore[method-assign] del TaskNamespace._original_send_task + # When running shuffled tests with --exitfirst, record the first failing + # test ID so the CI workflow can pass it to detect-test-pollution. + failing_testid = getattr(session, "_shuffle_failing_testid", None) + if failing_testid and exitstatus != 0: + outdir = os.environ.get("GITHUB_WORKSPACE", "/tmp") + with open(f"{outdir}/failing-testid", "w") as f: + f.write(failing_testid + "\n") + longrepr = getattr(session, "_shuffle_failing_longrepr", None) + if longrepr: + with open(f"{outdir}/failing-testid-longrepr", "w") as f: + f.write(longrepr) + def pytest_runtest_setup(item: pytest.Item) -> None: + # Ensure the _SnubaPool proxy uses the correct per-worker Snuba URL before + # each test. The proxy caches settings.SENTRY_SNUBA on first use; if that + # first use happened before configure_for_worker updated the setting (or if + # the setting was never updated because xdist.get_snuba_url() returned None + # due to a stale module-level env read), the pool may point at 1218. + # + # Re-read the target URL from env here (call-time, not import-time) and + # force the proxy to rebuild the pool if the cached URL doesn't match. + snuba_url = xdist.get_snuba_url() + if snuba_url: + # Set an env var that _SnubaPool._get() prefers over settings.SENTRY_SNUBA. + # Environment variables are immune to Django's override_settings and + # TestCase isolation mechanisms that can reset the Django setting mid-session. + os.environ["_SENTRY_SNUBA_POOL_URL"] = snuba_url + settings.SENTRY_SNUBA = snuba_url + if "sentry.utils.snuba" in sys.modules: + snuba_mod = sys.modules["sentry.utils.snuba"] + pool_proxy = getattr(snuba_mod, "_snuba_pool", None) + if pool_proxy is not None and pool_proxy._url != snuba_url: + pool_proxy._url = None # force _get() to rebuild pool at correct URL + if item.config.getvalue("nomigrations") and any( mark for mark in item.iter_markers(name="migrations") ): @@ -522,11 +579,29 @@ def pytest_collection_modifyitems(config: pytest.Config, items: list[pytest.Item if shuffle_enabled: _shuffle(items, random.Random(seed)) + # Write the final ordered test IDs to a file for detect-test-pollution + outdir = os.environ.get("GITHUB_WORKSPACE", "/tmp") + with open(f"{outdir}/testids-full", "w") as f: + f.write("\n".join(item.nodeid for item in items) + "\n") + # This only needs to be done if there are items to be de-selected if len(discard) > 0: config.hook.pytest_deselected(items=discard) +@pytest.hookimpl(tryfirst=True, hookwrapper=True) +def pytest_runtest_makereport( + item: pytest.Item, call: pytest.CallInfo[None] +) -> collections.abc.Generator[None]: + outcome = yield + report = outcome.get_result() + # Track the last failing test for detect-test-pollution (written to disk + # in pytest_sessionfinish only when the session actually fails). + if os.environ.get("SENTRY_SHUFFLE_TESTS") and report.failed and report.when == "call": + item.session._shuffle_failing_testid = item.nodeid # type: ignore[attr-defined] + item.session._shuffle_failing_longrepr = str(report.longrepr) # type: ignore[attr-defined] + + def pytest_xdist_setupnodes() -> None: # prevent out-of-order django initialization os.environ.pop("DJANGO_SETTINGS_MODULE", None) diff --git a/src/sentry/testutils/pytest/xdist.py b/src/sentry/testutils/pytest/xdist.py index 3ad38b62dd6522..fca0ef3279c7f1 100644 --- a/src/sentry/testutils/pytest/xdist.py +++ b/src/sentry/testutils/pytest/xdist.py @@ -2,25 +2,31 @@ import os -_TEST_REDIS_DB = 9 +# Redis defaults to 16 databases (0-15). We reserve DBs 0-8 for non-test +# use and assign test workers to DBs 9-15. With a base of 9 that gives 7 +# usable slots (gw0-gw6). When there are more workers than slots we wrap +# around using modulo so we never crash; adjacent workers may share a DB, +# but flushdb() still provides per-test isolation within each worker. +_TEST_REDIS_DB_BASE = 9 +_REDIS_DB_COUNT = 16 # Redis default; configure "databases N" to increase +_REDIS_TEST_SLOTS = _REDIS_DB_COUNT - _TEST_REDIS_DB_BASE # 7 slots (9-15) + _SNUBA_BASE_PORT = 1230 -# Redis defaults to 16 DBs (0-15). With base DB 9, max 7 workers (gw0-gw6). -_MAX_WORKERS = 7 _worker_id: str | None = os.environ.get("PYTEST_XDIST_WORKER") _worker_num: int | None = int(_worker_id.replace("gw", "")) if _worker_id else None -if _worker_num is not None and _worker_num >= _MAX_WORKERS: - raise RuntimeError( - f"xdist worker {_worker_id} exceeds max supported workers ({_MAX_WORKERS}). " - f"Redis only has DBs 0-15 and base DB is {_TEST_REDIS_DB}." - ) - def get_redis_db() -> int: + """Return the Redis DB number for this xdist worker. + + Each worker gets its own DB so that ``flushdb()`` in teardown only + affects that worker's data. Workers are mapped round-robin into the + available test DB slots when there are more workers than slots. + """ if _worker_num is not None: - return _TEST_REDIS_DB + _worker_num - return _TEST_REDIS_DB + return _TEST_REDIS_DB_BASE + (_worker_num % _REDIS_TEST_SLOTS) + return _TEST_REDIS_DB_BASE def get_kafka_topic(base_name: str) -> str: @@ -30,6 +36,19 @@ def get_kafka_topic(base_name: str) -> str: def get_snuba_url() -> str | None: - if _worker_num is not None and os.environ.get("XDIST_PER_WORKER_SNUBA"): - return f"http://127.0.0.1:{_SNUBA_BASE_PORT + _worker_num}" - return None + """Return the per-worker Snuba URL, or None if not in per-worker Snuba mode. + + Reads both env vars at call time (not just at module import time) to handle + any edge case where xdist.py was imported before the subprocess environment + was fully initialised. + """ + if not os.environ.get("XDIST_PER_WORKER_SNUBA"): + return None + worker_id = os.environ.get("PYTEST_XDIST_WORKER") or "" + if not worker_id.startswith("gw"): + return None + try: + worker_num = int(worker_id[2:]) + except ValueError: + return None + return f"http://127.0.0.1:{_SNUBA_BASE_PORT + worker_num}" diff --git a/src/sentry/utils/snuba.py b/src/sentry/utils/snuba.py index 8021ed233921fd..da682407353919 100644 --- a/src/sentry/utils/snuba.py +++ b/src/sentry/utils/snuba.py @@ -562,19 +562,49 @@ def increment( ) -_snuba_pool = connection_from_url( - settings.SENTRY_SNUBA, - retries=RetrySkipTimeout( - total=5, - # Our calls to snuba frequently fail due to network issues. We want to - # automatically retry most requests. Some of our POSTs and all of our DELETEs - # do cause mutations, but we have other things in place to handle duplicate - # mutations. - allowed_methods={"GET", "POST", "DELETE"}, - ), - timeout=settings.SENTRY_SNUBA_TIMEOUT, - maxsize=10, -) +class _SnubaPool: + """Proxy for the Snuba HTTP connection pool. + + Reads ``settings.SENTRY_SNUBA`` on first use instead of at module-import + time. This ensures xdist workers that override the setting in + ``pytest_configure`` (to point at a per-worker snuba-gw container) get the + correct pool, even when the module was imported before the override ran. + + The proxy is a regular object so existing ``patch.object(_snuba_pool, ...)`` + calls in tests continue to work without modification. + """ + + def __init__(self) -> None: + self._pool: urllib3.HTTPConnectionPool | None = None + self._url: str | None = None + + def _get(self) -> urllib3.HTTPConnectionPool: + # Prefer the env-var override set by pytest_runtest_setup, which is + # immune to Django override_settings / TestCase isolation that can + # reset settings.SENTRY_SNUBA back to the default 1218 mid-session. + url = os.environ.get("_SENTRY_SNUBA_POOL_URL") or settings.SENTRY_SNUBA + if self._pool is None or self._url != url: + self._pool = connection_from_url( + url, + retries=RetrySkipTimeout( + total=5, + # Our calls to snuba frequently fail due to network issues. We want to + # automatically retry most requests. Some of our POSTs and all of our DELETEs + # do cause mutations, but we have other things in place to handle duplicate + # mutations. + allowed_methods={"GET", "POST", "DELETE"}, + ), + timeout=settings.SENTRY_SNUBA_TIMEOUT, + maxsize=10, + ) + self._url = url + return self._pool + + def urlopen(self, *args: Any, **kwargs: Any) -> urllib3.response.HTTPResponse: + return self._get().urlopen(*args, **kwargs) + + +_snuba_pool = _SnubaPool() epoch_naive = datetime(1970, 1, 1, tzinfo=None)