getsentry · obostjancic · Feb 4, 2026 · Feb 3, 2026 · Feb 3, 2026 · Feb 3, 2026
@@ -342,8 +342,8 @@ def _build_aggregations_query(
                 "failure_count()",
                 "count_if(gen_ai.operation.type,equals,ai_client)",
                 "count_if(gen_ai.operation.type,equals,tool)",
-                "sum(gen_ai.usage.total_tokens)",
-                "sum(gen_ai.cost.total_tokens)",
+                "sum_if(gen_ai.usage.total_tokens,gen_ai.operation.type,equals,ai_client)",
+                "sum_if(gen_ai.cost.total_tokens,gen_ai.operation.type,equals,ai_client)",
                 "min(precise.start_ts)",
                 "max(precise.finish_ts)",
             ],
@@ -425,8 +425,18 @@ def _build_conversations_from_aggregations(
                     errors=int(row.get("failure_count()") or 0),
                     llm_calls=int(row.get("count_if(gen_ai.operation.type,equals,ai_client)") or 0),
                     tool_calls=int(row.get("count_if(gen_ai.operation.type,equals,tool)") or 0),
-                    total_tokens=int(row.get("sum(gen_ai.usage.total_tokens)") or 0),
-                    total_cost=float(row.get("sum(gen_ai.cost.total_tokens)") or 0),
+                    total_tokens=int(
+                        row.get(
+                            "sum_if(gen_ai.usage.total_tokens,gen_ai.operation.type,equals,ai_client)"
+                        )
+                        or 0
+                    ),
+                    total_cost=float(
+                        row.get(
+                            "sum_if(gen_ai.cost.total_tokens,gen_ai.operation.type,equals,ai_client)"
+                        )
+                        or 0
+                    ),
                     trace_ids=[],
                     flow=[],
                     first_input=None,

@@ -415,3 +415,63 @@ def test_stats_period_is_ignored(self) -> None:
         assert response.status_code == 200, response.data
         assert len(response.data) == 1
         assert response.data[0]["gen_ai.conversation.id"] == conversation_id
+
+    def test_tokens_on_multiple_span_types(self) -> None:
+        """Test that raw spans are returned with their individual token/cost values.
+
+        This endpoint returns raw span data without aggregation. Consumers must
+        filter by gen_ai.operation.type:ai_client when summing tokens/costs
+        to avoid double counting from agent spans that may also have token data.
+        """
+        now = before_now(days=91).replace(microsecond=0)
+        trace_id = uuid4().hex
+        conversation_id = uuid4().hex
+
+        # Agent span with tokens/cost
+        self.store_ai_span(
+            conversation_id=conversation_id,
+            timestamp=now - timedelta(seconds=2),
+            op="gen_ai.invoke_agent",
+            operation_type="invoke_agent",
+            description="Test Agent",
+            agent_name="Test Agent",
+            trace_id=trace_id,
+            tokens=500,
+            cost=0.05,
+        )
+
+        # ai_client span with tokens/cost
+        self.store_ai_span(
+            conversation_id=conversation_id,
+            timestamp=now - timedelta(seconds=1),
+            op="gen_ai.chat",
+            operation_type="ai_client",
+            trace_id=trace_id,
+            tokens=100,
+            cost=0.01,
+        )
+
+        query = {
+            "project": [self.project.id],
+            "start": (now - timedelta(hours=1)).isoformat(),
+            "end": (now + timedelta(hours=1)).isoformat(),
+        }
+
+        response = self.do_request(conversation_id, query)
+        assert response.status_code == 200, response.data
+        assert len(response.data) == 2
+
+        # Sort by timestamp to ensure consistent order (oldest first)
+        spans = sorted(response.data, key=lambda s: s["precise.start_ts"])
+
+        # First span is the agent span with its own token values
+        agent_span = spans[0]
+        assert agent_span["gen_ai.operation.type"] == "invoke_agent"
+        assert agent_span["gen_ai.usage.total_tokens"] == 500
+        assert agent_span["gen_ai.cost.total_tokens"] == 0.05
+
+        # Second span is the ai_client span with its own token values
+        ai_client_span = spans[1]
+        assert ai_client_span["gen_ai.operation.type"] == "ai_client"
+        assert ai_client_span["gen_ai.usage.total_tokens"] == 100
+        assert ai_client_span["gen_ai.cost.total_tokens"] == 0.01
@@ -1193,3 +1193,56 @@ def test_empty_tool_names_when_no_tool_calls(self) -> None:
         assert conversation["toolNames"] == []
         assert conversation["toolCalls"] == 0
         assert conversation["toolErrors"] == 0
+
+    def test_tokens_only_counted_from_ai_client_spans(self) -> None:
+        """Test that tokens and costs are only counted from ai_client spans, not agent spans.
+
+        This prevents double counting when both agent spans (invoke_agent) and their
+        child ai_client spans have token/cost data.
+        """
+        now = before_now(days=109).replace(microsecond=0)
+        conversation_id = uuid4().hex
+        trace_id = uuid4().hex
+
+        # Agent span with tokens/cost (should NOT be counted)
+        self.store_ai_span(
+            conversation_id=conversation_id,
+            timestamp=now - timedelta(seconds=2),
+            op="gen_ai.invoke_agent",
+            operation_type="invoke_agent",
+            description="Test Agent",
+            agent_name="Test Agent",
+            trace_id=trace_id,
+            tokens=500,
+            cost=0.05,
+        )
+
+        # ai_client span with tokens/cost (should be counted)
+        self.store_ai_span(
+            conversation_id=conversation_id,
+            timestamp=now - timedelta(seconds=1),
+            op="gen_ai.chat",
+            operation_type="ai_client",
+            trace_id=trace_id,
+            tokens=100,
+            cost=0.01,
+        )
+
+        query = {
+            "project": [self.project.id],
+            "start": (now - timedelta(hours=1)).isoformat(),
+            "end": (now + timedelta(hours=1)).isoformat(),
+        }
+
+        response = self.do_request(query)
+        assert response.status_code == 200
+        assert len(response.data) == 1
+
+        conversation = response.data[0]
+        # Tokens and cost should only come from ai_client span (100, 0.01)
+        # NOT the sum of both spans (600, 0.06) which would be double counting
+        assert conversation["totalTokens"] == 100
+        assert conversation["totalCost"] == 0.01
+        # Verify counts are correct
+        assert conversation["llmCalls"] == 1
+        assert conversation["flow"] == ["Test Agent"]