From b196d6df9b9b80aab869e30b192890cff8c89f92 Mon Sep 17 00:00:00 2001 From: Andrew Liu <159852527+aliu39@users.noreply.github.com> Date: Mon, 17 Nov 2025 13:55:01 -0800 Subject: [PATCH 1/3] cleanup and basic logs test --- src/sentry/seer/endpoints/seer_rpc.py | 4 - src/sentry/seer/explorer/tools.py | 152 +------------------ tests/sentry/seer/explorer/test_tools.py | 185 +++++++++++++++++------ 3 files changed, 143 insertions(+), 198 deletions(-) diff --git a/src/sentry/seer/endpoints/seer_rpc.py b/src/sentry/seer/endpoints/seer_rpc.py index c36832afdc6539..96310ab73271a4 100644 --- a/src/sentry/seer/endpoints/seer_rpc.py +++ b/src/sentry/seer/endpoints/seer_rpc.py @@ -88,8 +88,6 @@ from sentry.seer.explorer.tools import ( execute_table_query, execute_timeseries_query, - execute_trace_query_chart, - execute_trace_query_table, get_issue_details, get_replay_metadata, get_repository_definition, @@ -1206,8 +1204,6 @@ def check_repository_integrations_status(*, repository_integrations: list[dict[s "get_trace_waterfall": rpc_get_trace_waterfall, "get_issue_details": get_issue_details, "get_profile_flamegraph": rpc_get_profile_flamegraph, - "execute_trace_query_chart": execute_trace_query_chart, - "execute_trace_query_table": execute_trace_query_table, "execute_table_query": execute_table_query, "execute_timeseries_query": execute_timeseries_query, "get_trace_item_attributes": get_trace_item_attributes, diff --git a/src/sentry/seer/explorer/tools.py b/src/sentry/seer/explorer/tools.py index 8b57892a364dd6..920a7769c0c540 100644 --- a/src/sentry/seer/explorer/tools.py +++ b/src/sentry/seer/explorer/tools.py @@ -34,159 +34,15 @@ logger = logging.getLogger(__name__) -def execute_trace_query_chart( - *, - org_id: int, - query: str, - stats_period: str, - y_axes: list[str], - group_by: list[str] | None = None, - project_ids: list[int] | None = None, -) -> dict[str, Any] | None: - """ - Execute a trace query to get chart/timeseries data by calling the events-stats endpoint. - """ - try: - organization = Organization.objects.get(id=org_id) - except Organization.DoesNotExist: - logger.warning("Organization not found", extra={"org_id": org_id}) - return None - - # Use provided project_ids or get all project IDs for the organization - if project_ids is None: - project_ids = list(organization.project_set.values_list("id", flat=True)) - if not project_ids: - logger.warning("No projects found for organization", extra={"org_id": org_id}) - return None - - params: dict[str, Any] = { - "query": query, - "statsPeriod": stats_period, - "yAxis": y_axes, - "project": project_ids, - "dataset": "spans", - "referrer": Referrer.SEER_RPC, - "transformAliasToInputFormat": "1", # Required for RPC datasets - } - - # Add group_by if provided (for top events) - if group_by and len(group_by) > 0: - params["topEvents"] = 5 - params["field"] = group_by - params["excludeOther"] = "0" # Include "Other" series - - resp = client.get( - auth=ApiKey(organization_id=organization.id, scope_list=["org:read", "project:read"]), - user=None, - path=f"/organizations/{organization.slug}/events-stats/", - params=params, - ) - data = resp.data - - # Always normalize to the nested {"metric": {"data": [...]}} format for consistency - metric_is_single = len(y_axes) == 1 - metric_name = y_axes[0] if metric_is_single else None - if metric_name and metric_is_single: - # Handle grouped data with single metric: wrap each group's data in the metric name - if group_by: - return { - group_value: ( - {metric_name: group_data} - if isinstance(group_data, dict) and "data" in group_data - else group_data - ) - for group_value, group_data in data.items() - } - - # Handle non-grouped data with single metric: wrap data in the metric name - if isinstance(data, dict) and "data" in data: - return {metric_name: data} - - return data - - -def execute_trace_query_table( - *, - org_id: int, - query: str, - stats_period: str, - sort: str, - group_by: list[str] | None = None, - y_axes: list[str] | None = None, - per_page: int = 50, - mode: Literal["spans", "aggregates"] = "spans", - project_ids: list[int] | None = None, -) -> dict[str, Any] | None: - """ - Execute a trace query to get table data by calling the events endpoint. - """ - try: - organization = Organization.objects.get(id=org_id) - except Organization.DoesNotExist: - logger.warning("Organization not found", extra={"org_id": org_id}) - return None - - # Use provided project_ids or get all project IDs for the organization - if project_ids is None: - project_ids = list(organization.project_set.values_list("id", flat=True)) - if not project_ids: - logger.warning("No projects found for organization", extra={"org_id": org_id}) - return None - - # Determine fields based on mode - if mode == "aggregates": - # Aggregates mode: group_by fields + aggregate functions - fields = [] - if group_by: - fields.extend(group_by) - if y_axes: - fields.extend(y_axes) - else: - # Samples mode: default span fields - fields = [ - "id", - "span.op", - "span.description", - "span.duration", - "transaction", - "timestamp", - "project", - "trace", - ] - - params: dict[str, Any] = { - "query": query, - "statsPeriod": stats_period, - "field": fields, - "sort": sort if sort else ("-timestamp" if not group_by else None), - "per_page": per_page, - "project": project_ids, - "dataset": "spans", - "referrer": Referrer.SEER_RPC, - "transformAliasToInputFormat": "1", # Required for RPC datasets - } - - # Remove None values - params = {k: v for k, v in params.items() if v is not None} - - resp = client.get( - auth=ApiKey(organization_id=organization.id, scope_list=["org:read", "project:read"]), - user=None, - path=f"/organizations/{organization.slug}/events/", - params=params, - ) - return resp.data - - def execute_table_query( *, org_id: int, dataset: str, fields: list[str], - query: str, - sort: str, per_page: int, stats_period: str, + query: str | None = None, + sort: str | None = None, project_ids: list[int] | None = None, project_slugs: list[str] | None = None, sampling_mode: SAMPLING_MODES = "NORMAL", @@ -212,7 +68,7 @@ def execute_table_query( params: dict[str, Any] = { "dataset": dataset, "field": fields, - "query": query, + "query": query or None, "sort": sort if sort else ("-timestamp" if "timestamp" in fields else None), "per_page": per_page, "statsPeriod": stats_period, @@ -232,7 +88,7 @@ def execute_table_query( path=f"/organizations/{organization.slug}/events/", params=params, ) - return resp.data + return {"data": resp.data["data"]} def execute_timeseries_query( diff --git a/tests/sentry/seer/explorer/test_tools.py b/tests/sentry/seer/explorer/test_tools.py index 7f0c27a841a451..d6c178fe8d7482 100644 --- a/tests/sentry/seer/explorer/test_tools.py +++ b/tests/sentry/seer/explorer/test_tools.py @@ -15,8 +15,8 @@ from sentry.seer.endpoints.seer_rpc import get_organization_project_ids from sentry.seer.explorer.tools import ( EVENT_TIMESERIES_RESOLUTIONS, - execute_trace_query_chart, - execute_trace_query_table, + execute_table_query, + execute_timeseries_query, get_issue_details, get_replay_metadata, get_repository_definition, @@ -27,6 +27,7 @@ from sentry.testutils.cases import ( APITestCase, APITransactionTestCase, + OurLogTestCase, ReplaysSnubaTestCase, SnubaTestCase, SpanTestCase, @@ -38,8 +39,18 @@ @pytest.mark.django_db(databases=["default", "control"]) -class TestTraceQueryChartTable(APITransactionTestCase, SnubaTestCase, SpanTestCase): +class TestSpansQuery(APITransactionTestCase, SnubaTestCase, SpanTestCase): databases = {"default", "control"} + default_span_fields = [ + "id", + "span.op", + "span.description", + "span.duration", + "transaction", + "timestamp", + "project", + "trace", + ] def setUp(self): super().setUp() @@ -83,10 +94,11 @@ def setUp(self): self.store_spans(spans, is_eap=True) - def test_execute_trace_query_chart_count_metric(self): - """Test chart query with count() metric using real data""" - result = execute_trace_query_chart( + def test_spans_timeseries_count_metric(self): + """Test timeseries query with count() metric using real data""" + result = execute_timeseries_query( org_id=self.organization.id, + dataset="spans", query="", stats_period="1h", y_axes=["count()"], @@ -104,10 +116,11 @@ def test_execute_trace_query_chart_count_metric(self): total_count = sum(point[1][0]["count"] for point in data_points if point[1]) assert total_count == 4 - def test_execute_trace_query_chart_multiple_metrics(self): - """Test chart query with multiple metrics""" - result = execute_trace_query_chart( + def test_spans_timeseries_multiple_metrics(self): + """Test timeseries query with multiple metrics""" + result = execute_timeseries_query( org_id=self.organization.id, + dataset="spans", query="", stats_period="1h", y_axes=["count()", "avg(span.duration)"], @@ -135,10 +148,12 @@ def test_execute_trace_query_chart_multiple_metrics(self): ] assert len(duration_values) > 0 - def test_execute_trace_query_table_basic_query(self): + def test_spans_table_basic_query(self): """Test table query returns actual span data""" - result = execute_trace_query_table( + result = execute_table_query( org_id=self.organization.id, + dataset="spans", + fields=self.default_span_fields, query="", stats_period="1h", sort="-timestamp", @@ -147,7 +162,6 @@ def test_execute_trace_query_table_basic_query(self): assert result is not None assert "data" in result - assert "meta" in result rows = result["data"] assert len(rows) == 4 # Should find all 4 spans we created @@ -162,13 +176,16 @@ def test_execute_trace_query_table_basic_query(self): cache_rows = [row for row in rows if row.get("span.op") == "cache.get"] assert len(cache_rows) == 1 # One cache span - def test_execute_trace_query_table_specific_operation(self): + def test_spans_table_specific_operation(self): """Test table query filtering by specific operation""" - result = execute_trace_query_table( + result = execute_table_query( org_id=self.organization.id, + dataset="spans", + fields=self.default_span_fields, query="span.op:http.client", stats_period="1h", sort="-timestamp", + per_page=10, ) assert result is not None @@ -182,10 +199,11 @@ def test_execute_trace_query_table_specific_operation(self): descriptions = [row.get("span.description", "") for row in http_rows] assert any("api.external.com" in desc for desc in descriptions) - def test_execute_trace_query_chart_empty_results(self): - """Test chart query with query that returns no results""" - result = execute_trace_query_chart( + def test_spans_timeseries_empty_results(self): + """Test timeseries query with query that returns no results""" + result = execute_timeseries_query( org_id=self.organization.id, + dataset="spans", query="span.op:nonexistent", stats_period="1h", y_axes=["count()"], @@ -201,23 +219,27 @@ def test_execute_trace_query_chart_empty_results(self): total_count = sum(point[1][0]["count"] for point in data_points if point[1]) assert total_count == 0 - def test_execute_trace_query_table_empty_results(self): + def test_spans_table_empty_results(self): """Test table query with query that returns no results""" - result = execute_trace_query_table( + result = execute_table_query( org_id=self.organization.id, + dataset="spans", + fields=self.default_span_fields, query="span.op:nonexistent", stats_period="1h", sort="-timestamp", + per_page=10, ) assert result is not None assert "data" in result assert len(result["data"]) == 0 - def test_execute_trace_query_chart_duration_filtering(self): - """Test chart query with duration filter""" - result = execute_trace_query_chart( + def test_spans_timeseries_duration_filtering(self): + """Test timeseries query with duration filter""" + result = execute_timeseries_query( org_id=self.organization.id, + dataset="spans", query="span.duration:>100ms", # Should match spans > 100ms stats_period="1h", y_axes=["count()"], @@ -233,10 +255,12 @@ def test_execute_trace_query_chart_duration_filtering(self): total_count = sum(point[1][0]["count"] for point in data_points if point[1]) assert total_count == 3 - def test_execute_trace_query_table_duration_stats(self): + def test_spans_table_duration_stats(self): """Test table query with duration statistics""" - result = execute_trace_query_table( + result = execute_table_query( org_id=self.organization.id, + dataset="spans", + fields=self.default_span_fields, query="", stats_period="1h", sort="-span.duration", @@ -259,26 +283,31 @@ def test_execute_trace_query_table_duration_stats(self): def test_execute_trace_query_nonexistent_organization(self): """Test queries handle nonexistent organization gracefully""" - chart_result = execute_trace_query_chart( + timeseries_result = execute_timeseries_query( org_id=99999, + dataset="spans", query="", stats_period="1h", y_axes=["count()"], ) - assert chart_result is None + assert timeseries_result is None - table_result = execute_trace_query_table( + table_result = execute_table_query( org_id=99999, + dataset="spans", + fields=self.default_span_fields, query="", stats_period="1h", sort="-count", + per_page=10, ) assert table_result is None - def test_execute_trace_query_chart_with_groupby(self): - """Test chart query with group_by parameter for aggregates""" - result = execute_trace_query_chart( + def test_spans_timeseries_with_groupby(self): + """Test timeseries query with group_by parameter for aggregates""" + result = execute_timeseries_query( org_id=self.organization.id, + dataset="spans", query="", stats_period="1h", y_axes=["count()"], @@ -305,22 +334,20 @@ def test_execute_trace_query_chart_with_groupby(self): data_points = metrics["count()"]["data"] assert isinstance(data_points, list) - def test_execute_trace_query_table_with_groupby(self): + def test_spans_table_aggregates_groupby(self): """Test table query with group_by for aggregates mode""" - result = execute_trace_query_table( + result = execute_table_query( org_id=self.organization.id, + dataset="spans", + fields=["span.op", "count()"], query="", stats_period="1h", sort="-count()", - group_by=["span.op"], - y_axes=["count()"], per_page=10, - mode="aggregates", ) assert result is not None assert "data" in result - assert "meta" in result rows = result["data"] # Should have one row per unique span.op value @@ -331,21 +358,20 @@ def test_execute_trace_query_table_with_groupby(self): assert "span.op" in row assert "count()" in row - def test_execute_trace_query_table_aggregates_mode_basic(self): + def test_spans_table_aggregates_basic(self): """Test table query in aggregates mode without group_by""" - result = execute_trace_query_table( + result = execute_table_query( org_id=self.organization.id, + dataset="spans", query="", stats_period="1h", sort="-count()", - y_axes=["count()", "avg(span.duration)"], + fields=["count()", "avg(span.duration)"], per_page=10, - mode="aggregates", ) assert result is not None assert "data" in result - assert "meta" in result rows = result["data"] # Should have aggregate results @@ -356,21 +382,20 @@ def test_execute_trace_query_table_aggregates_mode_basic(self): assert "count()" in row assert "avg(span.duration)" in row - def test_execute_trace_query_table_aggregates_mode_multiple_functions(self): + def test_spans_table_aggregates_multiple_functions(self): """Test table query in aggregates mode with multiple aggregate functions""" - result = execute_trace_query_table( + result = execute_table_query( org_id=self.organization.id, + dataset="spans", query="span.op:db", # Filter to only database operations stats_period="1h", sort="-sum(span.duration)", - y_axes=["count()", "sum(span.duration)", "avg(span.duration)"], + fields=["count()", "sum(span.duration)", "avg(span.duration)"], per_page=10, - mode="aggregates", ) assert result is not None assert "data" in result - assert "meta" in result rows = result["data"] # Should have aggregate results for database spans @@ -1536,3 +1561,71 @@ def test_get_replay_metadata_short_id(self) -> None: ) is None ) + + +class TestLogsQuery(APITransactionTestCase, SnubaTestCase, OurLogTestCase): + def setUp(self) -> None: + super().setUp() + self.login_as(user=self.user) + self.ten_mins_ago = before_now(minutes=10) + self.nine_mins_ago = before_now(minutes=9) + + def test_logs_table_basic(self) -> None: + # Create logs with various attributes + logs = [ + self.create_ourlog( + { + "body": "User authentication failed", + "severity_text": "ERROR", + "severity_number": 17, + }, + timestamp=self.ten_mins_ago, + ), + self.create_ourlog( + { + "body": "Request processed successfully", + "severity_text": "INFO", + "severity_number": 9, + }, + timestamp=self.nine_mins_ago, + ), + self.create_ourlog( + { + "body": "Database connection timeout", + "severity_text": "WARN", + "severity_number": 13, + }, + timestamp=self.nine_mins_ago, + ), + ] + self.store_ourlogs(logs) + + fields = [ + "id", + "message", + "message.template", + "project.id", + "trace", + "severity_number", + "severity", + "timestamp", + "timestamp_precise", + "observed_timestamp", + ] + + result = execute_table_query( + org_id=self.organization.id, + dataset="ourlogs", + fields=fields, + per_page=10, + stats_period="1h", + project_slugs=[self.project.slug], + ) + assert result is not None + assert "data" in result + data = result["data"] + assert len(data) == len(logs) + + for log in data: + for field in fields: + assert field in log, field From 8b02016f041f2cb6562094f9138b0f366c6a001b Mon Sep 17 00:00:00 2001 From: Andrew Liu <159852527+aliu39@users.noreply.github.com> Date: Mon, 17 Nov 2025 14:04:37 -0800 Subject: [PATCH 2/3] append sort --- src/sentry/seer/explorer/tools.py | 6 +++ tests/sentry/seer/explorer/test_tools.py | 47 ++++++++++++++++-------- 2 files changed, 37 insertions(+), 16 deletions(-) diff --git a/src/sentry/seer/explorer/tools.py b/src/sentry/seer/explorer/tools.py index 920a7769c0c540..70b6294df8cbdf 100644 --- a/src/sentry/seer/explorer/tools.py +++ b/src/sentry/seer/explorer/tools.py @@ -65,6 +65,12 @@ def execute_table_query( project_ids = [ALL_ACCESS_PROJECT_ID] # Note if both project_ids and project_slugs are provided, the API request will 400. + if sort: + # Auto-select sort field to avoid snuba errors. + sort_field = sort.lstrip("-") + if sort_field not in fields: + fields.append(sort_field) + params: dict[str, Any] = { "dataset": dataset, "field": fields, diff --git a/tests/sentry/seer/explorer/test_tools.py b/tests/sentry/seer/explorer/test_tools.py index d6c178fe8d7482..b4690bceb8749d 100644 --- a/tests/sentry/seer/explorer/test_tools.py +++ b/tests/sentry/seer/explorer/test_tools.py @@ -281,7 +281,23 @@ def test_spans_table_duration_stats(self): # Allow for some tolerance in duration matching assert any(abs(d - expected) < 10 for d in durations) - def test_execute_trace_query_nonexistent_organization(self): + def test_spans_table_appends_sort(self): + """Test sort is automatically appended to selected fields if not provided.""" + result = execute_table_query( + org_id=self.organization.id, + dataset="spans", + fields=["id"], + stats_period="1h", + sort="-timestamp", + per_page=1, + ) + + assert result is not None + rows = result["data"] + assert "id" in rows[0] + assert "timestamp" in rows[0] + + def test_spans_query_nonexistent_organization(self): """Test queries handle nonexistent organization gracefully""" timeseries_result = execute_timeseries_query( org_id=99999, @@ -1569,6 +1585,18 @@ def setUp(self) -> None: self.login_as(user=self.user) self.ten_mins_ago = before_now(minutes=10) self.nine_mins_ago = before_now(minutes=9) + self.default_fields = [ + "id", + "message", + "message.template", + "project.id", + "trace", + "severity_number", + "severity", + "timestamp", + "timestamp_precise", + "observed_timestamp", + ] def test_logs_table_basic(self) -> None: # Create logs with various attributes @@ -1600,23 +1628,10 @@ def test_logs_table_basic(self) -> None: ] self.store_ourlogs(logs) - fields = [ - "id", - "message", - "message.template", - "project.id", - "trace", - "severity_number", - "severity", - "timestamp", - "timestamp_precise", - "observed_timestamp", - ] - result = execute_table_query( org_id=self.organization.id, dataset="ourlogs", - fields=fields, + fields=self.default_fields, per_page=10, stats_period="1h", project_slugs=[self.project.slug], @@ -1627,5 +1642,5 @@ def test_logs_table_basic(self) -> None: assert len(data) == len(logs) for log in data: - for field in fields: + for field in self.default_fields: assert field in log, field From 6ed896a7243aae8e4f7ca50b804d9b332a41a7a1 Mon Sep 17 00:00:00 2001 From: Andrew Liu <159852527+aliu39@users.noreply.github.com> Date: Mon, 17 Nov 2025 14:11:45 -0800 Subject: [PATCH 3/3] test negative --- tests/sentry/seer/explorer/test_tools.py | 25 ++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/tests/sentry/seer/explorer/test_tools.py b/tests/sentry/seer/explorer/test_tools.py index b4690bceb8749d..71d4876dfe55e8 100644 --- a/tests/sentry/seer/explorer/test_tools.py +++ b/tests/sentry/seer/explorer/test_tools.py @@ -283,19 +283,20 @@ def test_spans_table_duration_stats(self): def test_spans_table_appends_sort(self): """Test sort is automatically appended to selected fields if not provided.""" - result = execute_table_query( - org_id=self.organization.id, - dataset="spans", - fields=["id"], - stats_period="1h", - sort="-timestamp", - per_page=1, - ) + for sort in ["timestamp", "-timestamp"]: + result = execute_table_query( + org_id=self.organization.id, + dataset="spans", + fields=["id"], + stats_period="1h", + sort=sort, + per_page=1, + ) - assert result is not None - rows = result["data"] - assert "id" in rows[0] - assert "timestamp" in rows[0] + assert result is not None + rows = result["data"] + assert "id" in rows[0] + assert "timestamp" in rows[0] def test_spans_query_nonexistent_organization(self): """Test queries handle nonexistent organization gracefully"""