diff --git a/src/sentry/seer/explorer/tools.py b/src/sentry/seer/explorer/tools.py index 829b27324a3d8a..9e264b22173a8a 100644 --- a/src/sentry/seer/explorer/tools.py +++ b/src/sentry/seer/explorer/tools.py @@ -34,17 +34,35 @@ logger = logging.getLogger(__name__) +def _validate_date_params( + *, stats_period: str | None = None, start: str | None = None, end: str | None = None +) -> None: + """ + Validate that either stats_period or both start and end are provided, but not both. + """ + if not any([bool(stats_period), bool(start), bool(end)]): + raise ValueError("either stats_period or start and end must be provided") + + if stats_period and (start or end): + raise ValueError("stats_period and start/end cannot be provided together") + + if not stats_period and not all([bool(start), bool(end)]): + raise ValueError("start and end must be provided together") + + def execute_table_query( *, org_id: int, dataset: str, fields: list[str], per_page: int, - stats_period: str, query: str | None = None, sort: str | None = None, project_ids: list[int] | None = None, project_slugs: list[str] | None = None, + stats_period: str | None = None, + start: str | None = None, + end: str | None = None, sampling_mode: SAMPLING_MODES = "NORMAL", case_insensitive: bool | None = None, ) -> dict[str, Any] | None: @@ -55,7 +73,12 @@ def execute_table_query( project_ids: The IDs of the projects to query. Cannot be provided with project_slugs. project_slugs: The slugs of the projects to query. Cannot be provided with project_ids. If neither project_ids nor project_slugs are provided, all active projects will be queried. + + To prevent excessive queries and timeouts, either stats_period or *both* start and end should be provided. + Providing start or end with stats_period will result in a ValueError. """ + _validate_date_params(stats_period=stats_period, start=start, end=end) + try: organization = Organization.objects.get(id=org_id) except Organization.DoesNotExist: @@ -79,6 +102,8 @@ def execute_table_query( "sort": sort if sort else ("-timestamp" if "timestamp" in fields else None), "per_page": per_page, "statsPeriod": stats_period, + "start": start, + "end": end, "project": project_ids, "projectSlug": project_slugs, "sampling": sampling_mode, @@ -102,6 +127,7 @@ def execute_table_query( return {"data": resp.data["data"]} except client.ApiError as e: if e.status_code == 400: + logger.exception("execute_table_query: bad request", extra={"org_id": org_id}) error_detail = e.body.get("detail") if isinstance(e.body, dict) else None return {"error": str(error_detail) if error_detail is not None else str(e.body)} raise @@ -114,10 +140,12 @@ def execute_timeseries_query( y_axes: list[str], group_by: list[str] | None = None, query: str, - stats_period: str, - interval: str | None = None, project_ids: list[int] | None = None, project_slugs: list[str] | None = None, + stats_period: str | None = None, + start: str | None = None, + end: str | None = None, + interval: str | None = None, sampling_mode: SAMPLING_MODES = "NORMAL", partial: bool | None = None, case_insensitive: bool | None = None, @@ -131,7 +159,12 @@ def execute_timeseries_query( project_ids: The IDs of the projects to query. Cannot be provided with project_slugs. project_slugs: The slugs of the projects to query. Cannot be provided with project_ids. If neither project_ids nor project_slugs are provided, all active projects will be queried. + + To prevent excessive queries and timeouts, either stats_period or *both* start and end should be provided. + Providing start or end with stats_period will result in a 400 ApiError. """ + _validate_date_params(stats_period=stats_period, start=start, end=end) + try: organization = Organization.objects.get(id=org_id) except Organization.DoesNotExist: @@ -149,6 +182,8 @@ def execute_timeseries_query( "field": y_axes + group_by, "query": query, "statsPeriod": stats_period, + "start": start, + "end": end, "interval": interval, "project": project_ids, "projectSlug": project_slugs, diff --git a/tests/sentry/seer/explorer/test_tools.py b/tests/sentry/seer/explorer/test_tools.py index a6bdf017816230..67fad12b997976 100644 --- a/tests/sentry/seer/explorer/test_tools.py +++ b/tests/sentry/seer/explorer/test_tools.py @@ -38,9 +38,12 @@ from tests.sentry.issues.test_utils import OccurrenceTestMixin -@pytest.mark.django_db(databases=["default", "control"]) +def _get_utc_iso_without_timezone(dt: datetime) -> str: + """Seer and Sentry UI pass iso timestamps in this format.""" + return dt.astimezone(UTC).isoformat().replace("+00:00", "") + + class TestSpansQuery(APITransactionTestCase, SnubaTestCase, SpanTestCase): - databases = {"default", "control"} default_span_fields = [ "id", "span.op", @@ -55,6 +58,8 @@ class TestSpansQuery(APITransactionTestCase, SnubaTestCase, SpanTestCase): def setUp(self): super().setUp() self.ten_mins_ago = before_now(minutes=10) + self.two_mins_ago = before_now(minutes=2) + self.one_min_ago = before_now(minutes=1) # Create spans using the exact pattern from working tests spans = [ @@ -87,7 +92,7 @@ def setUp(self): "description": "Redis GET user:123", "sentry_tags": {"op": "cache.get", "transaction": "api/user/profile"}, }, - start_ts=self.ten_mins_ago, + start_ts=self.one_min_ago, duration=25, ), ] @@ -116,6 +121,46 @@ def test_spans_timeseries_count_metric(self): total_count = sum(point[1][0]["count"] for point in data_points if point[1]) assert total_count == 4 + def test_spans_timeseries_count_metric_start_end_filter(self): + """Test timeseries query with count() metric using real data, filtered by start and end""" + # Since this is a small range, we need to specify the interval. The event endpoint's + # get_rollup fx doesn't go below 15m when calculating from date range. + result = execute_timeseries_query( + org_id=self.organization.id, + dataset="spans", + query="", + start=_get_utc_iso_without_timezone(self.ten_mins_ago), + end=_get_utc_iso_without_timezone(self.two_mins_ago), + interval="1m", + y_axes=["count()"], + ) + + assert result is not None + # Result is now dict from events-stats endpoint + assert "count()" in result + assert "data" in result["count()"] + + data_points = result["count()"]["data"] + assert len(data_points) > 0 + + # Each data point is [timestamp, [{"count": value}]] + total_count = sum(point[1][0]["count"] for point in data_points if point[1]) + assert total_count == 3 # Should exclude the span created at 1 minute ago + + def test_spans_timeseries_count_metric_start_end_errors_with_stats_period(self): + with pytest.raises( + ValueError, match="stats_period and start/end cannot be provided together" + ): + execute_timeseries_query( + org_id=self.organization.id, + dataset="spans", + query="", + stats_period="1h", + start=_get_utc_iso_without_timezone(self.ten_mins_ago), + end=_get_utc_iso_without_timezone(self.two_mins_ago), + y_axes=["count()"], + ) + def test_spans_timeseries_multiple_metrics(self): """Test timeseries query with multiple metrics""" result = execute_timeseries_query( @@ -176,6 +221,47 @@ def test_spans_table_basic_query(self): cache_rows = [row for row in rows if row.get("span.op") == "cache.get"] assert len(cache_rows) == 1 # One cache span + def test_spans_table_query_start_end_filter(self): + result = execute_table_query( + org_id=self.organization.id, + dataset="spans", + fields=self.default_span_fields, + query="", + start=_get_utc_iso_without_timezone(self.ten_mins_ago), + end=_get_utc_iso_without_timezone(self.two_mins_ago), + sort="-timestamp", + per_page=10, + ) + + assert result is not None + assert "data" in result + + rows = result["data"] + assert len(rows) == 3 # Should exclude the span created at 1 minute ago + + # Verify span data + db_rows = [row for row in rows if row.get("span.op") == "db"] + assert len(db_rows) == 2 # Two database spans + + http_rows = [row for row in rows if row.get("span.op") == "http.client"] + assert len(http_rows) == 1 # One HTTP span + + def test_spans_table_query_start_end_errors_with_stats_period(self): + with pytest.raises( + ValueError, match="stats_period and start/end cannot be provided together" + ): + execute_table_query( + org_id=self.organization.id, + dataset="spans", + fields=self.default_span_fields, + query="", + stats_period="1h", + start=_get_utc_iso_without_timezone(self.ten_mins_ago), + end=_get_utc_iso_without_timezone(self.two_mins_ago), + sort="-timestamp", + per_page=10, + ) + def test_spans_table_specific_operation(self): """Test table query filtering by specific operation""" result = execute_table_query( @@ -1047,7 +1133,6 @@ def test_get_issue_details_timeseries_resolution( group.delete() -@pytest.mark.django_db(databases=["default", "control"]) class TestGetRepositoryDefinition(APITransactionTestCase): def test_get_repository_definition_success(self): """Test successful repository lookup"""