diff --git a/src/askui/agent_base.py b/src/askui/agent_base.py index 8bdacb0c..e3554f28 100644 --- a/src/askui/agent_base.py +++ b/src/askui/agent_base.py @@ -10,7 +10,7 @@ from typing_extensions import Self from askui.agent_settings import AgentSettings -from askui.callbacks import ConversationCallback, UsageTrackingCallback +from askui.callbacks import ConversationCallback, ConversationStatisticsCallback from askui.container import telemetry from askui.locators.locators import Locator from askui.models.shared.agent_message_param import MessageParam @@ -78,7 +78,7 @@ def __init__( speakers = Speakers() _callbacks = list(callbacks or []) _callbacks.append( - UsageTrackingCallback( + ConversationStatisticsCallback( reporter=self._reporter, pricing=self._vlm_provider.pricing, ) diff --git a/src/askui/callbacks/__init__.py b/src/askui/callbacks/__init__.py index 29eb7029..c39dd663 100644 --- a/src/askui/callbacks/__init__.py +++ b/src/askui/callbacks/__init__.py @@ -1,7 +1,7 @@ from .conversation_callback import ConversationCallback -from .usage_tracking_callback import UsageTrackingCallback +from .conversation_statistics_callback import ConversationStatisticsCallback __all__ = [ "ConversationCallback", - "UsageTrackingCallback", + "ConversationStatisticsCallback", ] diff --git a/src/askui/callbacks/usage_tracking_callback.py b/src/askui/callbacks/conversation_statistics_callback.py similarity index 86% rename from src/askui/callbacks/usage_tracking_callback.py rename to src/askui/callbacks/conversation_statistics_callback.py index d4c536f7..38637306 100644 --- a/src/askui/callbacks/usage_tracking_callback.py +++ b/src/askui/callbacks/conversation_statistics_callback.py @@ -1,7 +1,13 @@ -"""Callback for tracking token usage and reporting usage summaries.""" +"""Callback for tracking per-conversation statistics (token usage, timing). + +Emits a `UsageSummary` (with per-conversation and per-step breakdowns, +including start/end timestamps for each conversation) to a reporter when the +conversation ends. +""" from __future__ import annotations +from datetime import datetime, timezone from typing import TYPE_CHECKING from opentelemetry import trace @@ -172,15 +178,35 @@ class StepUsageSummary(UsageSummary): class ConversationUsageSummary(UsageSummary): - """Usage summary for one conversation including per-step breakdown.""" + """Usage summary for one conversation including per-step breakdown. + + Args: + conversation_index (int): 1-based index of the conversation within the + current agent lifecycle. + conversation_id (str): Unique identifier of the conversation. + step_summaries (list[StepUsageSummary]): Per-step usage summaries. + started_at (datetime | None): UTC timestamp captured at + `on_conversation_start`. `None` if timing was not tracked. + ended_at (datetime | None): UTC timestamp captured at + `on_conversation_end`. `None` if timing was not tracked. + """ conversation_index: int conversation_id: str step_summaries: list[StepUsageSummary] = Field(default_factory=list) + started_at: datetime | None = None + ended_at: datetime | None = None + +class ConversationStatisticsCallback(ConversationCallback): + """Tracks per-conversation statistics (token usage per step and wall-clock + timing) and reports a summary at conversation end. -class UsageTrackingCallback(ConversationCallback): - """Tracks token usage per step and reports a summary at conversation end. + The reported `UsageSummary` contains, for each conversation, the raw + ``started_at`` and ``ended_at`` UTC timestamps alongside token usage. + Downstream consumers (e.g. `SimpleHtmlReporter`) are responsible for + deriving human-readable durations from those timestamps so the raw values + remain available for other uses. Args: reporter: Reporter to write the final usage summary to. @@ -199,12 +225,14 @@ def __init__( self._per_conversation_summaries: list[ConversationUsageSummary] = [] self._per_step_summaries: list[StepUsageSummary] = [] self._conversation_index: int = 0 + self._conversation_started_at: datetime | None = None @override def on_conversation_start(self, conversation: Conversation) -> None: self._per_conversation_usage = UsageSummary.create_from(self._summary) self._per_step_summaries = [] self._conversation_index += 1 + self._conversation_started_at = datetime.now(tz=timezone.utc) @override def on_step_end( @@ -237,9 +265,12 @@ def on_conversation_end(self, conversation: Conversation) -> None: generated_steps: list[StepUsageSummary] = [ step_summary.generate() for step_summary in self._per_step_summaries ] + ended_at = datetime.now(tz=timezone.utc) conversation_summary = self._create_conversation_summary( conversation=conversation, generated_step_summaries=generated_steps, + started_at=self._conversation_started_at, + ended_at=ended_at, ) self._per_conversation_summaries.append(conversation_summary) self._summary.per_conversation_summaries = list( @@ -275,11 +306,15 @@ def _create_conversation_summary( self, conversation: Conversation, generated_step_summaries: list[StepUsageSummary], + started_at: datetime | None = None, + ended_at: datetime | None = None, ) -> ConversationUsageSummary: conversation_summary = ConversationUsageSummary( conversation_index=self._conversation_index, conversation_id=conversation.conversation_id, step_summaries=generated_step_summaries, + started_at=started_at, + ended_at=ended_at, input_tokens=self._per_conversation_usage.input_tokens, output_tokens=self._per_conversation_usage.output_tokens, cache_creation_input_tokens=( diff --git a/src/askui/reporting.py b/src/askui/reporting.py index 50e640f6..cc8be97a 100644 --- a/src/askui/reporting.py +++ b/src/askui/reporting.py @@ -21,7 +21,10 @@ if TYPE_CHECKING: from PIL import Image - from askui.callbacks.usage_tracking_callback import UsageSummary + from askui.callbacks.conversation_statistics_callback import ( + ConversationUsageSummary, + UsageSummary, + ) def normalize_to_pil_images( @@ -37,6 +40,27 @@ def normalize_to_pil_images( return [image] +def _format_duration(seconds: float) -> str: + """Format a duration given in seconds as ``HH:MM:SS`` or + ``HH:MM:SS.mmm`` for sub-second precision. + + Used by `SimpleHtmlReporter` to render both the overall execution time and + per-conversation durations consistently. + """ + total_seconds = max(float(seconds), 0.0) + whole_seconds = int(total_seconds) + millis = int(round((total_seconds - whole_seconds) * 1000)) + if millis == 1000: + whole_seconds += 1 + millis = 0 + hours, remainder = divmod(whole_seconds, 3600) + minutes, secs = divmod(remainder, 60) + base = f"{hours:02d}:{minutes:02d}:{secs:02d}" + if whole_seconds == 0 and millis > 0: + return f"{base}.{millis:03d}" + return base + + def truncate_base64_images(content: Any) -> Any: """Replace base64 image data with a placeholder to keep reports readable. @@ -1003,6 +1027,7 @@ def generate(self) -> None: