dapr · sicoyle · Feb 3, 2026 · Feb 3, 2026 · Feb 3, 2026 · Feb 3, 2026
@@ -25,8 +25,10 @@
 
 import grpc  # type: ignore
 from google.protobuf.any_pb2 import Any as GrpcAny
+from google.protobuf.duration_pb2 import Duration as GrpcDuration
 from google.protobuf.empty_pb2 import Empty as GrpcEmpty
 from google.protobuf.message import Message as GrpcMessage
+from google.protobuf.struct_pb2 import Struct as GrpcStruct
 from grpc import (  # type: ignore
     RpcError,
     StreamStreamClientInterceptor,
@@ -1787,6 +1789,8 @@ def converse_alpha2(
         temperature: Optional[float] = None,
         tools: Optional[List[conversation.ConversationTools]] = None,
         tool_choice: Optional[str] = None,
+        response_format: Optional[GrpcStruct] = None,
+        prompt_cache_retention: Optional[GrpcDuration] = None,
     ) -> conversation.ConversationResponseAlpha2:
         """Invoke an LLM using the conversation API (Alpha2) with tool calling support.
 
@@ -1800,6 +1804,8 @@ def converse_alpha2(
             temperature: Optional temperature setting for the LLM to optimize for creativity or predictability
             tools: Optional list of tools available for the LLM to call
             tool_choice: Optional control over which tools can be called ('none', 'auto', 'required', or specific tool name)
+            response_format: Optional response format (google.protobuf.struct_pb2.Struct, ex: json_schema for structured output)
+            prompt_cache_retention: Optional retention for prompt cache (google.protobuf.duration_pb2.Duration)
 
         Returns:
             ConversationResponseAlpha2 containing the conversation results with choices and tool calls
@@ -1856,6 +1862,10 @@ def converse_alpha2(
             request.temperature = temperature
         if tool_choice is not None:
             request.tool_choice = tool_choice
+        if response_format is not None and hasattr(request, 'response_format'):
+            request.response_format.CopyFrom(response_format)
+        if prompt_cache_retention is not None and hasattr(request, 'prompt_cache_retention'):
+            request.prompt_cache_retention.CopyFrom(prompt_cache_retention)
 
         try:
             response, call = self.retry_policy.run_rpc(self._stub.ConverseAlpha2.with_call, request)

@@ -338,11 +338,46 @@ class ConversationResultAlpha2Choices:
     message: ConversationResultAlpha2Message
 
 
+@dataclass
+class ConversationResultAlpha2CompletionUsageCompletionTokensDetails:
+    """Breakdown of tokens used in the completion."""
+
+    accepted_prediction_tokens: int = 0
+    audio_tokens: int = 0
+    reasoning_tokens: int = 0
+    rejected_prediction_tokens: int = 0
+
+
+@dataclass
+class ConversationResultAlpha2CompletionUsagePromptTokensDetails:
+    """Breakdown of tokens used in the prompt."""
+
+    audio_tokens: int = 0
+    cached_tokens: int = 0
+
+
+@dataclass
+class ConversationResultAlpha2CompletionUsage:
+    """Token usage for one Alpha2 conversation result."""
+
+    completion_tokens: int = 0
+    prompt_tokens: int = 0
+    total_tokens: int = 0
+    completion_tokens_details: Optional[
+        ConversationResultAlpha2CompletionUsageCompletionTokensDetails
+    ] = None
+    prompt_tokens_details: Optional[ConversationResultAlpha2CompletionUsagePromptTokensDetails] = (
+        None
+    )
+
+
 @dataclass
 class ConversationResultAlpha2:
     """One of the outputs in Alpha2 response from conversation input."""
 
     choices: List[ConversationResultAlpha2Choices] = field(default_factory=list)
+    model: Optional[str] = None
+    usage: Optional[ConversationResultAlpha2CompletionUsage] = None
 
 
 @dataclass
@@ -657,5 +692,38 @@ def _get_outputs_from_grpc_response(
                 )
             )
 
-        outputs.append(ConversationResultAlpha2(choices=choices))
+        model: Optional[str] = None
+        usage: Optional[ConversationResultAlpha2CompletionUsage] = None
+        if hasattr(output, 'model') and getattr(output, 'model', None):
+            model = output.model
+        if hasattr(output, 'usage') and output.usage:
+            u = output.usage
+            completion_details: Optional[
+                ConversationResultAlpha2CompletionUsageCompletionTokensDetails
+            ] = None
+            prompt_details: Optional[ConversationResultAlpha2CompletionUsagePromptTokensDetails] = (
+                None
+            )
+            if hasattr(u, 'completion_tokens_details') and u.completion_tokens_details:
+                cd = u.completion_tokens_details
+                completion_details = ConversationResultAlpha2CompletionUsageCompletionTokensDetails(
+                    accepted_prediction_tokens=getattr(cd, 'accepted_prediction_tokens', 0) or 0,
+                    audio_tokens=getattr(cd, 'audio_tokens', 0) or 0,
+                    reasoning_tokens=getattr(cd, 'reasoning_tokens', 0) or 0,
+                    rejected_prediction_tokens=getattr(cd, 'rejected_prediction_tokens', 0) or 0,
+                )
+            if hasattr(u, 'prompt_tokens_details') and u.prompt_tokens_details:
+                pd = u.prompt_tokens_details
+                prompt_details = ConversationResultAlpha2CompletionUsagePromptTokensDetails(
+                    audio_tokens=getattr(pd, 'audio_tokens', 0) or 0,
+                    cached_tokens=getattr(pd, 'cached_tokens', 0) or 0,
+                )
+            usage = ConversationResultAlpha2CompletionUsage(
+                completion_tokens=getattr(u, 'completion_tokens', 0) or 0,
+                prompt_tokens=getattr(u, 'prompt_tokens', 0) or 0,
+                total_tokens=getattr(u, 'total_tokens', 0) or 0,
+                completion_tokens_details=completion_details,
+                prompt_tokens_details=prompt_details,
+            )
+        outputs.append(ConversationResultAlpha2(choices=choices, model=model, usage=usage))
     return outputs